package org.molgenis.calibratecadd;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.commons.lang3.StringUtils;
import org.molgenis.calibratecadd.support.EntityPlus;
import org.molgenis.calibratecadd.support.ImpactRatios;
import org.molgenis.calibratecadd.support.Step4_Helper;
import org.molgenis.calibratecadd.support.VariantIntersectResult;
import org.molgenis.data.Entity;
import org.molgenis.data.annotation.entity.impl.ExacAnnotator;
import org.molgenis.data.annotation.entity.impl.snpEff.SnpEffRunner;
import org.molgenis.data.annotator.tabix.TabixVcfRepository;
import org.molgenis.data.vcf.VcfRepository;

/* loaded from: input_file:org/molgenis/calibratecadd/Step4_MatchingVariantsFromExAC.class */
public class Step4_MatchingVariantsFromExAC {
    public static String NA = "";
    HashMap<String, List<Entity>> clinvarPatho = new HashMap<>();
    HashMap<String, List<EntityPlus>> matchedExACvariants = new HashMap<>();
    HashMap<String, String> geneInfo = new HashMap<>();
    HashMap<String, Integer> variantToNonZeroSnpEffGeneIndex = new HashMap<>();

    public static void main(String[] strArr) throws Exception {
        Step4_MatchingVariantsFromExAC step4_MatchingVariantsFromExAC = new Step4_MatchingVariantsFromExAC();
        step4_MatchingVariantsFromExAC.loadClinvarPatho(strArr[0]);
        step4_MatchingVariantsFromExAC.createMatchingExACsets(strArr[1]);
        step4_MatchingVariantsFromExAC.printVariantsToFile(strArr[2]);
    }

    private void loadClinvarPatho(String str) throws Exception {
        System.out.println("loading clinvar pathogenic variants from " + str + " ..");
        VcfRepository vcfRepository = new VcfRepository(new File(str), "clinvar");
        Iterator<Entity> it = vcfRepository.iterator();
        while (it.hasNext()) {
            Entity next = it.next();
            if (next.getString(SnpEffRunner.ANN) == null) {
                vcfRepository.close();
                throw new Exception("Please annotate the VCF with a recent snpEff version! ANN field not found for " + next.toString());
            }
            if (next.getString(Step1_GetClinVarPathogenic.CLINVAR_INFO) == null) {
                vcfRepository.close();
                throw new Exception("Did you create this VCF running Step1? " + Step1_GetClinVarPathogenic.CLINVAR_INFO + " field not found for " + next.toString());
            }
            String str2 = next.getString(Step1_GetClinVarPathogenic.CLINVAR_INFO).split("\\|", -1)[1];
            if (str2.length() == 0) {
                vcfRepository.close();
                throw new Exception("geneAccordingToClinVar length 0");
            }
            ArrayList arrayList = new ArrayList();
            for (String str3 : next.getString(SnpEffRunner.ANN).split(",", -1)) {
                String str4 = str3.split("\\|")[3];
                if (!str4.isEmpty()) {
                    arrayList.add(str4);
                }
            }
            String str5 = arrayList.isEmpty() ? str2 : null;
            String str6 = next.getString(VcfRepository.CHROM) + "_" + next.getString(VcfRepository.POS) + "_" + next.getString(VcfRepository.REF) + "_" + next.getString(VcfRepository.ALT);
            if (str5 == null) {
                int i = 0;
                Iterator it2 = arrayList.iterator();
                while (true) {
                    if (!it2.hasNext()) {
                        break;
                    }
                    String str7 = (String) it2.next();
                    if (str7.equals(str2)) {
                        str5 = str2;
                    } else if (str7.contains(str2)) {
                        str5 = str2;
                    } else if (str2.contains(str7)) {
                        str5 = str7;
                    }
                    if (str5 == null) {
                        i++;
                    } else if (i > 0) {
                        this.variantToNonZeroSnpEffGeneIndex.put(str6, Integer.valueOf(i));
                    }
                }
            }
            if (str5 == null) {
                if (arrayList.size() == 1) {
                    str5 = str2 + "_" + ((String) arrayList.iterator().next());
                } else if (str2.equals("p.p.Arg801His")) {
                    str5 = "RTEL1";
                } else {
                    String str8 = "";
                    Iterator it3 = arrayList.iterator();
                    while (it3.hasNext()) {
                        str8 = str8 + "_" + ((String) it3.next());
                    }
                    str5 = str2 + str8;
                }
            }
            if (this.clinvarPatho.containsKey(str5)) {
                this.clinvarPatho.get(str5).add(next);
            } else {
                ArrayList arrayList2 = new ArrayList();
                arrayList2.add(next);
                this.clinvarPatho.put(str5, arrayList2);
            }
        }
        System.out.println("there are " + this.variantToNonZeroSnpEffGeneIndex.size() + " ClinVar variants for which the first SnpEff gene symbol is not the one matched to the ClinVar gene symbol");
        vcfRepository.close();
    }

    private void createMatchingExACsets(String str) throws Exception {
        Step4_Helper step4_Helper = new Step4_Helper(this.variantToNonZeroSnpEffGeneIndex);
        System.out.println("loading matching exac variants..");
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        int i5 = 0;
        int i6 = 0;
        for (String str2 : this.clinvarPatho.keySet()) {
            i6++;
            String str3 = null;
            long j = -1;
            long j2 = -1;
            for (Entity entity : this.clinvarPatho.get(str2)) {
                long longValue = entity.getLong(VcfRepository.POS).longValue();
                String string = entity.getString(VcfRepository.CHROM);
                if (longValue > j2) {
                    j2 = longValue;
                }
                if (longValue < j || j == -1) {
                    j = longValue;
                }
                if (str3 == null) {
                    str3 = string;
                }
            }
            long j3 = j - 100;
            long j4 = j2 + 100;
            TabixVcfRepository tabixVcfRepository = new TabixVcfRepository(new File(str), ExacAnnotator.NAME);
            List<Entity> arrayList = new ArrayList();
            try {
                arrayList = tabixVcfRepository.query(str3, j3, j4);
            } catch (ArrayIndexOutOfBoundsException e) {
            }
            if (this.clinvarPatho.get(str2).size() < 2) {
                i3++;
                String repeat = arrayList.size() > 0 ? "\t" + step4_Helper.calculateImpactRatiosFromUnprocessedVariants(arrayList).toString() : StringUtils.repeat("\t" + NA, 4);
                String exACMAFforUnprocessedClinvarVariant = arrayList.size() > 0 ? step4_Helper.getExACMAFforUnprocessedClinvarVariant(this.clinvarPatho.get(str2).get(0), arrayList) : NA;
                this.geneInfo.put(str2, "N1\t" + this.clinvarPatho.get(str2).get(0).getString(VcfRepository.CHROM) + "\t" + this.clinvarPatho.get(str2).get(0).getString(VcfRepository.POS) + "\t" + this.clinvarPatho.get(str2).get(0).getString(VcfRepository.POS) + "\t" + arrayList.size() + "\t" + this.clinvarPatho.get(str2).size() + "\t" + (exACMAFforUnprocessedClinvarVariant.equals(NA) ? 0 : 1) + "\t0\t" + exACMAFforUnprocessedClinvarVariant + repeat + "\t" + step4_Helper.calculateImpactRatiosFromUnprocessedVariants(this.clinvarPatho.get(str2)) + StringUtils.repeat("\t" + NA, 4));
            } else {
                System.out.println("\n#####\n");
                System.out.println(str2 + " (" + i6 + " of " + this.clinvarPatho.keySet().size() + ") " + j3 + " " + j4 + "  has " + arrayList.size());
                if (arrayList.size() > 0) {
                    VariantIntersectResult intersectVariants = step4_Helper.intersectVariants(arrayList, this.clinvarPatho.get(str2));
                    System.out.println("VariantIntersectResult for '" + str2 + "', clinvaronly: " + intersectVariants.inClinVarOnly.size() + ", exaconly: " + intersectVariants.inExACOnly.size() + ", both: " + intersectVariants.inBoth_exac.size());
                    double calculatePathogenicMAF = step4_Helper.calculatePathogenicMAF(intersectVariants.inBoth_exac, intersectVariants.inClinVarOnly.size());
                    List<EntityPlus> filterExACvariantsByMAF = step4_Helper.filterExACvariantsByMAF(intersectVariants.inExACOnly, calculatePathogenicMAF);
                    System.out.println("exaconly filtered down to " + filterExACvariantsByMAF.size() + " variants using pathogenic MAF " + calculatePathogenicMAF);
                    ImpactRatios calculateImpactRatios = step4_Helper.calculateImpactRatios((List) Stream.concat(intersectVariants.inClinVarOnly.stream(), intersectVariants.inBoth_clinvar.stream()).collect(Collectors.toList()));
                    String repeat2 = intersectVariants.inExACOnly.size() > 0 ? "\t" + step4_Helper.calculateImpactRatios(intersectVariants.inExACOnly).toString() : StringUtils.repeat("\t" + NA, 4);
                    if (filterExACvariantsByMAF.size() == 0) {
                        this.geneInfo.put(str2, "T1\t" + str3 + "\t" + j3 + "\t" + j4 + "\t" + arrayList.size() + "\t" + this.clinvarPatho.get(str2).size() + "\t" + intersectVariants.inBoth_clinvar.size() + "\t0\t" + calculatePathogenicMAF + repeat2 + "\t" + calculateImpactRatios.toString() + StringUtils.repeat("\t" + NA, 4));
                    } else {
                        List<EntityPlus> shapeExACvariantsByImpactRatios = step4_Helper.shapeExACvariantsByImpactRatios(filterExACvariantsByMAF, calculateImpactRatios);
                        System.out.println("exaconly filtered down to " + shapeExACvariantsByImpactRatios.size() + " variants");
                        if (shapeExACvariantsByImpactRatios.size() > 0) {
                            i++;
                            this.matchedExACvariants.put(str2, shapeExACvariantsByImpactRatios);
                            i2 += shapeExACvariantsByImpactRatios.size();
                            this.geneInfo.put(str2, "Cx\t" + str3 + "\t" + j3 + "\t" + j4 + "\t" + arrayList.size() + "\t" + this.clinvarPatho.get(str2).size() + "\t" + intersectVariants.inBoth_clinvar.size() + "\t" + shapeExACvariantsByImpactRatios.size() + "\t" + calculatePathogenicMAF + repeat2 + "\t" + calculateImpactRatios.toString().toString() + "\t" + step4_Helper.calculateImpactRatios(shapeExACvariantsByImpactRatios).toString());
                        } else {
                            ImpactRatios calculateImpactRatios2 = step4_Helper.calculateImpactRatios(filterExACvariantsByMAF);
                            this.geneInfo.put(str2, step4_Helper.determineImpactFilterCat(calculateImpactRatios2, calculateImpactRatios, calculatePathogenicMAF) + "\t" + str3 + "\t" + j3 + "\t" + j4 + "\t" + arrayList.size() + "\t" + this.clinvarPatho.get(str2).size() + "\t" + intersectVariants.inBoth_clinvar.size() + "\t" + filterExACvariantsByMAF.size() + "\t" + calculatePathogenicMAF + repeat2 + "\t" + calculateImpactRatios.toString() + "\t" + calculateImpactRatios2.toString());
                            i5++;
                        }
                    }
                } else {
                    i4++;
                    this.geneInfo.put(str2, "N2\t" + str3 + "\t" + j3 + "\t" + j4 + "\t0\t" + this.clinvarPatho.get(str2).size() + "\t0\t0\t0" + StringUtils.repeat("\t" + NA, 4) + "\t" + step4_Helper.calculateImpactRatiosFromUnprocessedVariants(this.clinvarPatho.get(str2)) + StringUtils.repeat("\t" + NA, 4));
                }
                tabixVcfRepository.close();
            }
        }
        System.out.println();
        System.out.println("#### done ####");
        System.out.println();
        System.out.println("passed genes (>0 properly matched interval exac variants): " + i);
        System.out.println("matched variants (total variants used for final calibration): " + i2);
        System.out.println("dropped genes (less than 2 clinvar variants): " + i3);
        System.out.println("dropped genes (2+ clinvar, but 0 interval exac variants): " + i4);
        System.out.println("dropped genes (2+ clinvar, but >0 interval exac variants, but 0 matched variants left after filtering): " + i5);
    }

    private void printVariantsToFile(String str) throws FileNotFoundException {
        PrintWriter printWriter = new PrintWriter(str + ".variants.tsv");
        PrintWriter printWriter2 = new PrintWriter(str + ".cadd.tsv");
        PrintWriter printWriter3 = new PrintWriter(str + ".genes.tsv");
        printWriter.println("gene\tchr\tpos\tref\talt\tgroup");
        printWriter3.println("Gene\tCategory\tChr\tStart\tEnd\tNrOfPopulationVariants\tNrOfPathogenicVariants\tNrOfOverlappingVariants\tNrOfFilteredPopVariants\tPathoMAFThreshold\tPopImpactHighPerc\tPopImpactModeratePerc\tPopImpactLowPerc\tPopImpactModifierPerc\tPathoImpactHighPerc\tPathoImpactModeratePerc\tPathoImpactLowPerc\tPathoImpactModifierPerc\tPopImpactHighEq\tPopImpactModerateEq\tPopImpactLowEq\tPopImpactModifierEq");
        for (String str2 : this.clinvarPatho.keySet()) {
            if (this.matchedExACvariants.containsKey(str2)) {
                for (Entity entity : this.clinvarPatho.get(str2)) {
                    printWriter2.println(entity.getString(VcfRepository.CHROM) + "\t" + entity.getString(VcfRepository.POS) + "\t.\t" + entity.getString(VcfRepository.REF) + "\t" + entity.getString(VcfRepository.ALT));
                    printWriter.println(str2 + "\t" + entity.getString(VcfRepository.CHROM) + "\t" + entity.getString(VcfRepository.POS) + "\t" + entity.getString(VcfRepository.REF) + "\t" + entity.getString(VcfRepository.ALT) + "\tPATHOGENIC");
                }
                for (EntityPlus entityPlus : this.matchedExACvariants.get(str2)) {
                    printWriter2.println(entityPlus.getE().getString(VcfRepository.CHROM) + "\t" + entityPlus.getE().getString(VcfRepository.POS) + "\t.\t" + entityPlus.getE().getString(VcfRepository.REF) + "\t" + entityPlus.getKeyVal().get(VcfRepository.ALT).toString());
                    printWriter.println(str2 + "\t" + entityPlus.getE().getString(VcfRepository.CHROM) + "\t" + entityPlus.getE().getString(VcfRepository.POS) + "\t" + entityPlus.getE().getString(VcfRepository.REF) + "\t" + entityPlus.getKeyVal().get(VcfRepository.ALT).toString() + "\tPOPULATION");
                }
            }
            printWriter3.println(str2.replace("/", "_") + "\t" + this.geneInfo.get(str2));
        }
        printWriter.flush();
        printWriter.close();
        printWriter3.flush();
        printWriter3.close();
        printWriter2.flush();
        printWriter2.close();
    }
}
