package org.molgenis.calibratecadd;

import java.io.File;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.molgenis.calibratecadd.structs.GeneCalibResult;
import org.molgenis.calibratecadd.support.EntityPlus;
import org.molgenis.calibratecadd.support.GavinUtils;
import org.molgenis.calibratecadd.support.ImpactRatios;
import org.molgenis.calibratecadd.support.Step4_Helper;
import org.molgenis.calibratecadd.support.VariantIntersectResult;
import org.molgenis.data.Entity;
import org.molgenis.data.annotation.entity.impl.ExacAnnotator;
import org.molgenis.data.annotation.entity.impl.gavin.GavinEntry;
import org.molgenis.data.annotation.entity.impl.snpEff.SnpEffRunner;
import org.molgenis.data.annotator.tabix.TabixVcfRepository;
import org.molgenis.data.vcf.VcfRepository;

/* loaded from: input_file:org/molgenis/calibratecadd/Step4_MatchingVariantsFromExAC.class */
public class Step4_MatchingVariantsFromExAC {
    File clinvarPathoLoc;
    File exacFile;
    File outFile;

    public static void main(String[] strArr) throws Exception {
        new Step4_MatchingVariantsFromExAC(new File(strArr[0]), new File(strArr[1]), new File(strArr[2])).go();
    }

    public void go() throws Exception {
        HashMap<String, List<Entity>> loadClinvarPatho = loadClinvarPatho(this.clinvarPathoLoc);
        System.out.println("loaded LP/P variants for " + loadClinvarPatho.size() + " genes");
        createMatchingExACsets(this.exacFile, loadClinvarPatho, this.outFile);
    }

    public Step4_MatchingVariantsFromExAC(File file, File file2, File file3) throws Exception {
        this.clinvarPathoLoc = file;
        this.exacFile = file2;
        this.outFile = file3;
    }

    private void createMatchingExACsets(File file, HashMap<String, List<Entity>> hashMap, File file2) throws Exception {
        PrintWriter printWriter = new PrintWriter(file2 + ".variants.tsv");
        PrintWriter printWriter2 = new PrintWriter(file2 + ".cadd.tsv");
        PrintWriter printWriter3 = new PrintWriter(file2 + ".genes.tsv");
        printWriter.println("gene\tchr\tpos\tref\talt\tgroup");
        printWriter3.println("Gene\tCategory\tChr\tStart\tEnd\tNrOfPopulationVariants\tNrOfPathogenicVariants\tNrOfOverlappingVariants\tNrOfFilteredPopVariants\tPathoMAFThreshold\tPopImpactHighPerc\tPopImpactModeratePerc\tPopImpactLowPerc\tPopImpactModifierPerc\tPathoImpactHighPerc\tPathoImpactModeratePerc\tPathoImpactLowPerc\tPathoImpactModifierPerc\tPopImpactHighEq\tPopImpactModerateEq\tPopImpactLowEq\tPopImpactModifierEq");
        Step4_Helper step4_Helper = new Step4_Helper();
        System.out.println("loading matching exac variants..");
        int i = 0;
        for (String str : hashMap.keySet()) {
            i++;
            String str2 = null;
            long j = -1;
            long j2 = -1;
            for (Entity entity : hashMap.get(str)) {
                long longValue = entity.getLong(VcfRepository.POS).longValue();
                String string = entity.getString(VcfRepository.CHROM);
                if (longValue > j2) {
                    j2 = longValue;
                }
                if (longValue < j || j == -1) {
                    j = longValue;
                }
                if (str2 == null) {
                    str2 = string;
                }
            }
            long j3 = j - 100;
            long j4 = j2 + 100;
            TabixVcfRepository tabixVcfRepository = new TabixVcfRepository(file, ExacAnnotator.NAME);
            List<Entity> arrayList = new ArrayList();
            try {
                arrayList = tabixVcfRepository.query(str2, j3, j4);
            } catch (ArrayIndexOutOfBoundsException e) {
            }
            System.out.println("\n#####\n");
            System.out.println(str + " (" + i + " of " + hashMap.keySet().size() + ") at " + str2 + ":" + j3 + "-" + j4 + "  has " + hashMap.get(str).size() + " patho variants, and " + arrayList.size() + " exac sites (possibly multi-allelic)");
            if (hashMap.get(str).size() < 2) {
                System.out.println("DROPPED - Too few clinvar variants");
                printGeneToFile(new GeneCalibResult(new GavinEntry(str, GavinEntry.Category.N1, str2, j3, j4, null), null), printWriter3);
            } else if (arrayList.size() < 2) {
                System.out.println("DROPPED - Too few exac variants");
                printGeneToFile(new GeneCalibResult(new GavinEntry(str, GavinEntry.Category.N2, str2, j3, j4, null), null), printWriter3);
            } else {
                VariantIntersectResult intersectVariants = step4_Helper.intersectVariants(arrayList, hashMap.get(str), str);
                System.out.println("INFO - VariantIntersectResult for '" + str + "' unique variants, clinvaronly: " + intersectVariants.inClinVarOnly.size() + ", exaconly: " + intersectVariants.inExACOnly.size() + ", both: " + intersectVariants.inBoth_exac.size());
                double calculatePathogenicMAF = step4_Helper.calculatePathogenicMAF(intersectVariants.inBoth_exac, intersectVariants.inClinVarOnly.size());
                List<EntityPlus> filterExACvariantsByMAF = step4_Helper.filterExACvariantsByMAF(intersectVariants.inExACOnly, calculatePathogenicMAF);
                if (filterExACvariantsByMAF.size() == 0) {
                    System.out.println("DROPPED - Too few exac variants after filtering with pathogenic MAF" + calculatePathogenicMAF);
                    printGeneToFile(new GeneCalibResult(new GavinEntry(str, GavinEntry.Category.T1, str2, j3, j4, Double.valueOf(calculatePathogenicMAF)), null), printWriter3);
                } else {
                    System.out.println("INFO - Using pathogenic MAF, ExAC-only filtered down to " + filterExACvariantsByMAF.size() + " variants " + calculatePathogenicMAF);
                    ImpactRatios calculateImpactRatios = step4_Helper.calculateImpactRatios((List) Stream.concat(intersectVariants.inClinVarOnly.stream(), intersectVariants.inBoth_clinvar.stream()).collect(Collectors.toList()), str);
                    System.out.println("INFO - ExAC-only impact ratio: " + step4_Helper.calculateImpactRatios(intersectVariants.inExACOnly, str));
                    List<EntityPlus> shapeExACvariantsByImpactRatios = step4_Helper.shapeExACvariantsByImpactRatios(filterExACvariantsByMAF, calculateImpactRatios);
                    System.out.println("INFO - Using Pathogenic impact ratio " + calculateImpactRatios + ", ExAC-only filtered down to " + shapeExACvariantsByImpactRatios.size() + " variants");
                    if (shapeExACvariantsByImpactRatios.size() == 0) {
                        System.out.println("DROPPED - No match for impact profile, but we did learn something");
                        printGeneToFile(new GeneCalibResult(new GavinEntry(str, step4_Helper.determineImpactFilterCat(step4_Helper.calculateImpactRatios(filterExACvariantsByMAF, str), calculateImpactRatios), str2, j3, j4, Double.valueOf(calculatePathogenicMAF)), null), printWriter3);
                    } else {
                        GeneCalibResult geneCalibResult = new GeneCalibResult(new GavinEntry(str, GavinEntry.Category.Cx, str2, j3, j4, Double.valueOf(calculatePathogenicMAF)), shapeExACvariantsByImpactRatios);
                        System.out.println("INFO - calibrated gene ! impact ratio of population reference: " + step4_Helper.calculateImpactRatios(shapeExACvariantsByImpactRatios, str));
                        printGeneToFile(geneCalibResult, printWriter3);
                        printVariantsToFile(str, hashMap.get(str), geneCalibResult, printWriter, printWriter2);
                        printWriter.flush();
                        printWriter3.flush();
                        printWriter2.flush();
                    }
                }
            }
        }
        printWriter.flush();
        printWriter.close();
        printWriter3.flush();
        printWriter3.close();
        printWriter2.flush();
        printWriter2.close();
        System.out.println();
        System.out.println("#### done ####");
        System.out.println();
    }

    private void printGeneToFile(GeneCalibResult geneCalibResult, PrintWriter printWriter) {
        printWriter.println(geneCalibResult.toString());
    }

    private void printVariantsToFile(String str, List<Entity> list, GeneCalibResult geneCalibResult, PrintWriter printWriter, PrintWriter printWriter2) throws Exception {
        for (Entity entity : list) {
            printWriter2.println(entity.getString(VcfRepository.CHROM) + "\t" + entity.getString(VcfRepository.POS) + "\t.\t" + entity.getString(VcfRepository.REF) + "\t" + entity.getString(VcfRepository.ALT));
            printWriter.println(str + "\t" + entity.getString(VcfRepository.CHROM) + "\t" + entity.getString(VcfRepository.POS) + "\t" + entity.getString(VcfRepository.REF) + "\t" + entity.getString(VcfRepository.ALT) + "\tPATHOGENIC\t" + GavinUtils.getEffect(entity.getString(SnpEffRunner.ANN), str, entity.getString(VcfRepository.ALT)) + "\t" + GavinUtils.getImpact(entity.getString(SnpEffRunner.ANN), str, entity.getString(VcfRepository.ALT)));
        }
        for (EntityPlus entityPlus : geneCalibResult.matchedVariants) {
            printWriter2.println(entityPlus.getE().getString(VcfRepository.CHROM) + "\t" + entityPlus.getE().getString(VcfRepository.POS) + "\t.\t" + entityPlus.getE().getString(VcfRepository.REF) + "\t" + entityPlus.getKeyVal().get(VcfRepository.ALT).toString());
            printWriter.println(str + "\t" + entityPlus.getE().getString(VcfRepository.CHROM) + "\t" + entityPlus.getE().getString(VcfRepository.POS) + "\t" + entityPlus.getE().getString(VcfRepository.REF) + "\t" + entityPlus.getKeyVal().get(VcfRepository.ALT).toString() + "\tPOPULATION\t" + entityPlus.getKeyVal().get("EFFECT").toString() + "\t" + entityPlus.getKeyVal().get("IMPACT").toString());
        }
    }

    private HashMap<String, List<Entity>> loadClinvarPatho(File file) throws Exception {
        System.out.println("loading clinvar pathogenic variants from " + file + " ..");
        VcfRepository vcfRepository = new VcfRepository(file, "clinvar");
        HashMap<String, List<Entity>> hashMap = new HashMap<>();
        Iterator<Entity> it = vcfRepository.iterator();
        while (it.hasNext()) {
            Entity next = it.next();
            if (next.getString(SnpEffRunner.ANN) == null) {
                vcfRepository.close();
                throw new Exception("Please annotate the VCF with a recent snpEff version! ANN field not found for " + next.toString());
            }
            if (!next.getString(VcfRepository.ALT).contains(",")) {
                if (next.getString(Step1_GetClinVarPathogenic.CLINVAR_INFO) == null) {
                    vcfRepository.close();
                    throw new Exception("Did you create this VCF running Step1? " + Step1_GetClinVarPathogenic.CLINVAR_INFO + " field not found for " + next.toString());
                }
                if (next.getString(Step1_GetClinVarPathogenic.CLINVAR_INFO).split("\\|", -1)[1].length() == 0) {
                    vcfRepository.close();
                    throw new Exception("geneAccordingToClinVar length 0");
                }
                for (String str : GavinUtils.getGenesFromAnn(next.getString(SnpEffRunner.ANN))) {
                    if (!str.isEmpty()) {
                        if (hashMap.containsKey(str)) {
                            hashMap.get(str).add(next);
                        } else {
                            ArrayList arrayList = new ArrayList();
                            arrayList.add(next);
                            hashMap.put(str, arrayList);
                        }
                    }
                }
            }
        }
        vcfRepository.close();
        return hashMap;
    }
}
