package org.molgenis.data.annotation.impl;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.lang.ProcessBuilder;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.UUID;
import java.util.regex.Pattern;
import org.molgenis.MolgenisFieldTypes;
import org.molgenis.data.AttributeMetaData;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.EntityMetaData;
import org.molgenis.data.QueryRule;
import org.molgenis.data.annotation.AnnotationService;
import org.molgenis.data.annotation.RepositoryAnnotator;
import org.molgenis.data.support.AnnotationServiceImpl;
import org.molgenis.data.support.DefaultAttributeMetaData;
import org.molgenis.data.support.DefaultEntityMetaData;
import org.molgenis.data.support.MapEntity;
import org.molgenis.data.support.QueryImpl;
import org.molgenis.data.vcf.VcfRepository;
import org.molgenis.framework.server.MolgenisSettings;
import org.molgenis.framework.server.MolgenisSimpleSettings;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.context.ApplicationListener;
import org.springframework.context.event.ContextRefreshedEvent;
import org.springframework.stereotype.Component;

@Component("SnpEffServiceAnnotator")
/* loaded from: input_file:WEB-INF/lib/molgenis-data-annotators-1.5.0-SNAPSHOT.jar:org/molgenis/data/annotation/impl/SnpEffServiceAnnotator.class */
public class SnpEffServiceAnnotator implements RepositoryAnnotator, ApplicationListener<ContextRefreshedEvent> {
    public static final String SNPEFF_JAR_LOCATION_PROPERTY = "snpeff_jar_location";
    private final MolgenisSettings molgenisSettings;
    private final AnnotationService annotatorService;
    private static final String NAME = "SnpEff";
    public static final String ANNOTATION = "Annotation";
    public static final String PUTATIVE_IMPACT = "Putative_impact";
    public static final String GENE_NAME = "Gene_Name";
    public static final String GENE_ID = "Gene_ID";
    public static final String FEATURE_TYPE = "Feature_type";
    public static final String FEATURE_ID = "Feature_ID";
    public static final String TRANSCRIPT_BIOTYPE = "Transcript_biotype";
    public static final String RANK_TOTAL = "Rank_total";
    public static final String HGVS_C = "HGVS_c";
    public static final String HGVS_P = "HGVS_p";
    public static final String C_DNA_POSITION = "cDNA_position";
    public static final String CDS_POSITION = "CDS_position";
    public static final String PROTEIN_POSITION = "Protein_position";
    public static final String DISTANCE_TO_FEATURE = "Distance_to_feature";
    public static final String ERRORS = "Errors";
    public static final String LOF = "LOF";
    public static final String NMD = "NMD";
    private DataService dataService;
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) SnpEffServiceAnnotator.class);
    public static String snpEffPath = "";

    /* loaded from: input_file:WEB-INF/lib/molgenis-data-annotators-1.5.0-SNAPSHOT.jar:org/molgenis/data/annotation/impl/SnpEffServiceAnnotator$impact.class */
    public enum impact {
        MODIFIER,
        LOW,
        MODERATE,
        HIGH
    }

    @Override // org.springframework.context.ApplicationListener
    public void onApplicationEvent(ContextRefreshedEvent contextRefreshedEvent) {
        this.annotatorService.addAnnotator(this);
    }

    @Autowired
    public SnpEffServiceAnnotator(MolgenisSettings molgenisSettings, AnnotationService annotationService, DataService dataService) throws IOException {
        this.dataService = null;
        this.molgenisSettings = molgenisSettings;
        this.annotatorService = annotationService;
        this.dataService = dataService;
    }

    public SnpEffServiceAnnotator(File file, File file2, File file3) throws Exception {
        this.dataService = null;
        this.molgenisSettings = new MolgenisSimpleSettings();
        this.molgenisSettings.setProperty(SNPEFF_JAR_LOCATION_PROPERTY, file.getAbsolutePath());
        this.annotatorService = new AnnotationServiceImpl();
        checkSnpEffPath();
        runSnpEff(file2, file3);
        System.out.println("All done!");
    }

    @Override // org.molgenis.data.annotation.RepositoryAnnotator
    public String getSimpleName() {
        return NAME;
    }

    @Override // org.molgenis.data.annotation.RepositoryAnnotator
    public String getFullName() {
        return getSimpleName();
    }

    @Override // org.molgenis.data.annotation.RepositoryAnnotator
    public String getDescription() {
        return "SnpEff is a variant annotation and effect prediction tool. It annotates and predicts the effects of genetic variants (such as amino acid changes).(source:http://snpeff.sourceforge.net)";
    }

    private boolean checkSnpEffPath() {
        boolean z = false;
        snpEffPath = this.molgenisSettings.getProperty(SNPEFF_JAR_LOCATION_PROPERTY);
        if (snpEffPath != null) {
            File file = new File(snpEffPath);
            if (file.exists() && file.isFile()) {
                LOG.info("SnpEff found at: " + file.getAbsolutePath());
                z = true;
            } else {
                LOG.error("SnpEff not found at: " + file.getAbsolutePath());
            }
        }
        return z;
    }

    @Override // org.molgenis.data.annotation.RepositoryAnnotator
    public Iterator<Entity> annotate(Iterable<Entity> iterable) {
        String uuid = UUID.randomUUID().toString();
        String uuid2 = UUID.randomUUID().toString();
        ArrayList arrayList = new ArrayList();
        try {
            File createTempFile = File.createTempFile(uuid2, ".vcf");
            runSnpEff(getInputTempFile(iterable, uuid), createTempFile);
            BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(createTempFile.getAbsolutePath()), "utf-8"));
            Iterator<Entity> it = iterable.iterator();
            while (true) {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return arrayList.iterator();
                }
                if (!readLine.startsWith("##")) {
                    arrayList.add(parseOutputLineToEntity(readLine, it.next().getEntityMetaData().getName()));
                }
            }
        } catch (IOException e) {
            throw new RuntimeException("Could not read or create an intermediate file during annotation", e);
        } catch (InterruptedException e2) {
            throw new RuntimeException("Exception during annotation", e2);
        }
    }

    public void runSnpEff(File file, File file2) throws IOException, InterruptedException {
        ProcessBuilder processBuilder = new ProcessBuilder("java", "-jar", "-Xmx2g", snpEffPath, "hg19", "-noStats", "-lof", "-canon", "-ud", "0", "-spliceSiteSize", "5", file.getAbsolutePath());
        processBuilder.redirectOutput(file2);
        processBuilder.redirectError(ProcessBuilder.Redirect.INHERIT);
        processBuilder.start().waitFor();
    }

    public File getInputTempFile(Iterable<Entity> iterable, String str) throws IOException {
        File createTempFile = File.createTempFile(str, ".vcf");
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(createTempFile), "UTF-8"));
        for (Entity entity : iterable) {
            bufferedWriter.write(entity.getString("#CHROM") + "\t" + entity.getString("POS") + "\t.\t" + entity.getString("REF") + "\t" + entity.getString("ALT") + "\n");
        }
        bufferedWriter.close();
        return createTempFile;
    }

    public Entity parseOutputLineToEntity(String str, String str2) {
        String str3 = "";
        String str4 = "";
        String[] split = str.split("\t");
        String[] split2 = split[7].split(";");
        String[] split3 = split2[0].split(Pattern.quote("|"), -1);
        Entity findOne = this.dataService.findOne(str2, new QueryImpl(new QueryRule("#CHROM", QueryRule.Operator.EQUALS, split[0])).and().eq("POS", split[1]));
        if (split2.length > 1) {
            if (split2[1].startsWith("LOF=")) {
                str3 = split2[1];
            } else if (split2[1].startsWith("NMD=")) {
                str4 = split2[1];
            }
        }
        if (split2.length > 2) {
            if (split2[2].startsWith("LOF=")) {
                str3 = split2[2];
            } else if (split2[2].startsWith("NMD=")) {
                str4 = split2[2];
            }
        }
        findOne.set(ANNOTATION, split3[1]);
        findOne.set(PUTATIVE_IMPACT, split3[2]);
        findOne.set(GENE_NAME, split3[3]);
        findOne.set(GENE_ID, split3[4]);
        findOne.set(FEATURE_TYPE, split3[5]);
        findOne.set(FEATURE_ID, split3[6]);
        findOne.set(TRANSCRIPT_BIOTYPE, split3[7]);
        findOne.set(RANK_TOTAL, split3[8]);
        findOne.set(HGVS_C, split3[9]);
        findOne.set(HGVS_P, split3[10]);
        findOne.set(C_DNA_POSITION, split3[11]);
        findOne.set(CDS_POSITION, split3[12]);
        findOne.set(PROTEIN_POSITION, split3[13]);
        findOne.set(DISTANCE_TO_FEATURE, split3[14]);
        findOne.set(ERRORS, split3[15]);
        findOne.set(LOF, str3.replace("LOF=", ""));
        findOne.set(NMD, str4.replace("NMD=", ""));
        return findOne;
    }

    @Override // org.molgenis.data.annotation.RepositoryAnnotator
    public EntityMetaData getOutputMetaData() {
        DefaultEntityMetaData defaultEntityMetaData = new DefaultEntityMetaData(getClass().getName(), (Class<? extends Entity>) MapEntity.class);
        DefaultAttributeMetaData defaultAttributeMetaData = new DefaultAttributeMetaData(ANNOTATION, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData.setDescription("Annotated using Sequence Ontology terms. Multiple effects can be concatenated using ‘&’ (source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData);
        DefaultAttributeMetaData defaultAttributeMetaData2 = new DefaultAttributeMetaData(PUTATIVE_IMPACT, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData2.setDescription(" A simple estimation of putative impact / deleteriousness : {HIGH, MODERATE, LOW, MODIFIER}(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData2);
        DefaultAttributeMetaData defaultAttributeMetaData3 = new DefaultAttributeMetaData(GENE_NAME, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData3.setDescription("Common gene name (HGNC). Optional: use closest gene when the variant is “intergenic”(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData3);
        DefaultAttributeMetaData defaultAttributeMetaData4 = new DefaultAttributeMetaData(GENE_ID, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData4.setDescription("Gene ID");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData4);
        DefaultAttributeMetaData defaultAttributeMetaData5 = new DefaultAttributeMetaData(FEATURE_TYPE, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData5.setDescription("Which type of feature is in the next field (e.g. transcript, motif, miRNA, etc.). It is preferred to use Sequence Ontology (SO) terms, but ‘custom’ (user defined) are allowed. ANN=A|stop_gained|HIGH|||transcript|... Tissue specific features may include cell type / tissue information separated by semicolon e.g.: ANN=A|histone_binding_site|LOW|||H3K4me3:HeLa-S3|...\nFeature ID: Depending on the annotation, this may be: Transcript ID (preferably using version number), Motif ID, miRNA, ChipSeq peak, Histone mark, etc. Note: Some features may not have ID (e.g. histone marks from custom Chip-Seq experiments may not have a unique ID). (source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData5);
        DefaultAttributeMetaData defaultAttributeMetaData6 = new DefaultAttributeMetaData(FEATURE_ID, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData6.setDescription("Depending on the annotation, this may be: Transcript ID (preferably using version number), Motif ID, miRNA, ChipSeq peak, Histone mark, etc. Note: Some features may not have ID (e.g. histone marks from custom Chip-Seq experiments may not have a unique ID).(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData6);
        DefaultAttributeMetaData defaultAttributeMetaData7 = new DefaultAttributeMetaData(TRANSCRIPT_BIOTYPE, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData7.setDescription("The bare minimum is at least a description on whether the transcript is {“Coding”, “Noncoding”}. Whenever possible, use ENSEMBL biotypes.(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData7);
        DefaultAttributeMetaData defaultAttributeMetaData8 = new DefaultAttributeMetaData(RANK_TOTAL, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData8.setDescription("Exon or Intron rank / total number of exons or introns(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData8);
        DefaultAttributeMetaData defaultAttributeMetaData9 = new DefaultAttributeMetaData(HGVS_C, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData9.setDescription("Variant using HGVS notation (DNA level)(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData9);
        DefaultAttributeMetaData defaultAttributeMetaData10 = new DefaultAttributeMetaData(HGVS_P, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData10.setDescription("If variant is coding, this field describes the variant using HGVS notation (Protein level). Since transcript ID is already mentioned in ‘feature ID’, it may be omitted here.(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData10);
        DefaultAttributeMetaData defaultAttributeMetaData11 = new DefaultAttributeMetaData(C_DNA_POSITION, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData11.setDescription("Position in cDNA and trancript’s cDNA length (one based)(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData11);
        DefaultAttributeMetaData defaultAttributeMetaData12 = new DefaultAttributeMetaData(CDS_POSITION, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData12.setDescription("Position and number of coding bases (one based includes START and STOP codons)(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData12);
        DefaultAttributeMetaData defaultAttributeMetaData13 = new DefaultAttributeMetaData(PROTEIN_POSITION, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData13.setDescription("Position and number of AA (one based, including START, but not STOP)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData13);
        DefaultAttributeMetaData defaultAttributeMetaData14 = new DefaultAttributeMetaData(DISTANCE_TO_FEATURE, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData14.setDescription("All items in this field are options, so the field could be empty. Up/Downstream: Distance to first / last codon Intergenic: Distance to closest gene Distance to closest Intron boundary in exon (+/- up/downstream). If same, use positive number. Distance to closest exon boundary in Intron (+/- up/downstream) Distance to first base in MOTIF Distance to first base in miRNA Distance to exon-intron boundary in splice_site or splice _region ChipSeq peak: Distance to summit (or peak center) Histone mark / Histone state: Distance to summit (or peak center)(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData14);
        DefaultAttributeMetaData defaultAttributeMetaData15 = new DefaultAttributeMetaData(ERRORS, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData15.setDescription("Add errors, warnings oErrors, Warnings or Information messages: Add errors, warnings or r informative message that can affect annotation accuracy. It can be added using either ‘codes’ (as shown in column 1, e.g. W1) or ‘message types’ (as shown in column 2, e.g. WARNING_REF_DOES_NOT_MATCH_GENOME). All these errors, warnings or information messages messages are optional.(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData15);
        DefaultAttributeMetaData defaultAttributeMetaData16 = new DefaultAttributeMetaData(LOF, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData16.setDescription("snpEff can estimate if a variant is deemed to have a loss of function on the protein.(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData16);
        DefaultAttributeMetaData defaultAttributeMetaData17 = new DefaultAttributeMetaData(NMD, MolgenisFieldTypes.FieldTypeEnum.STRING);
        defaultAttributeMetaData17.setDescription("Nonsense mediate decay assessment. Some mutations may cause mRNA to be degraded thus not translated into a protein. NMD analysis marks mutations that are estimated to trigger nonsense mediated decay.(source:http://snpeff.sourceforge.net)");
        defaultEntityMetaData.addAttributeMetaData(defaultAttributeMetaData17);
        return defaultEntityMetaData;
    }

    @Override // org.molgenis.data.annotation.RepositoryAnnotator
    public EntityMetaData getInputMetaData() {
        DefaultEntityMetaData defaultEntityMetaData = new DefaultEntityMetaData(getClass().getName(), (Class<? extends Entity>) MapEntity.class);
        defaultEntityMetaData.addAttributeMetaData(VcfRepository.CHROM_META);
        defaultEntityMetaData.addAttributeMetaData(VcfRepository.POS_META);
        defaultEntityMetaData.addAttributeMetaData(VcfRepository.REF_META);
        defaultEntityMetaData.addAttributeMetaData(VcfRepository.ALT_META);
        return defaultEntityMetaData;
    }

    @Override // org.molgenis.data.annotation.RepositoryAnnotator
    public String canAnnotate(EntityMetaData entityMetaData) {
        for (AttributeMetaData attributeMetaData : getInputMetaData().getAttributes()) {
            if (entityMetaData.getAttribute(attributeMetaData.getName()) == null) {
                return "missing required attribute";
            }
            if (!entityMetaData.getAttribute(attributeMetaData.getName()).getDataType().equals(attributeMetaData.getDataType())) {
                return "a required attribute has the wrong datatype";
            }
            if (!checkSnpEffPath()) {
                return "SnpEff not found";
            }
        }
        return "true";
    }
}
