package org.molgenis.ontology.sorta;

import com.google.common.base.Function;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Sets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.elasticsearch.common.collect.Iterables;
import org.molgenis.data.DataService;
import org.molgenis.data.Entity;
import org.molgenis.data.QueryRule;
import org.molgenis.data.support.MapEntity;
import org.molgenis.data.support.QueryImpl;
import org.molgenis.ontology.core.meta.OntologyMetaData;
import org.molgenis.ontology.core.meta.OntologyTermDynamicAnnotationMetaData;
import org.molgenis.ontology.core.meta.OntologyTermMetaData;
import org.molgenis.ontology.roc.InformationContentService;
import org.molgenis.ontology.utils.NGramMatchingModel;
import org.springframework.beans.factory.annotation.Autowired;
import org.tartarus.snowball.ext.PorterStemmer;

/* loaded from: input_file:WEB-INF/lib/molgenis-ontology-1.5.0-SNAPSHOT.jar:org/molgenis/ontology/sorta/SortaServiceImpl.class */
public class SortaServiceImpl implements SortaService {
    private static final String NON_WORD_SEPARATOR = "[^a-zA-Z0-9]";
    private static final String ILLEGAL_CHARACTERS_PATTERN = "[^a-zA-Z0-9 ]";
    private static final String FUZZY_MATCH_SIMILARITY = "~0.8";
    private static final String SINGLE_WHITESPACE = " ";
    private static final int MAX_NUMBER_MATCHES = 50;
    private static final int NUMBER_NGRAM_MATCHES = 10;
    public static final String SIGNIFICANT_VALUE = "Significant";
    public static final String DEFAULT_MATCHING_NAME_FIELD = "Name";
    public static final String DEFAULT_MATCHING_SYNONYM_PREFIX_FIELD = "Synonym";
    public static final String DEFAULT_MATCHING_IDENTIFIER = "Identifier";
    public static final String SCORE = "Score";
    public static final String COMBINED_SCORE = "Combined_Score";
    private final PorterStemmer stemmer = new PorterStemmer();
    private final DataService dataService;
    private final InformationContentService informationContentService;
    private static final Set<String> ELASTICSEARCH_RESERVED_WORDS = Sets.newHashSet("or", "and", "if");
    public static final Character DEFAULT_SEPARATOR = ';';

    @Autowired
    public SortaServiceImpl(DataService dataService, InformationContentService informationContentService) {
        if (dataService == null) {
            throw new IllegalArgumentException("DataService is null");
        }
        if (informationContentService == null) {
            throw new IllegalArgumentException("InformationContentService is null");
        }
        this.dataService = dataService;
        this.informationContentService = informationContentService;
    }

    @Override // org.molgenis.ontology.sorta.SortaService
    public Iterable<Entity> getAllOntologyEntities() {
        return this.dataService.findAll(OntologyMetaData.ENTITY_NAME);
    }

    @Override // org.molgenis.ontology.sorta.SortaService
    public Entity getOntologyEntity(String str) {
        return this.dataService.findOne(OntologyMetaData.ENTITY_NAME, new QueryImpl().eq(OntologyMetaData.ONTOLOGY_IRI, str));
    }

    @Override // org.molgenis.ontology.sorta.SortaService
    public Entity getOntologyTermEntity(String str, String str2) {
        Entity ontologyEntity = getOntologyEntity(str2);
        if (ontologyEntity != null) {
            return this.dataService.findOne(OntologyTermMetaData.ENTITY_NAME, new QueryImpl().eq(OntologyTermMetaData.ONTOLOGY_TERM_IRI, str).and().eq(OntologyTermMetaData.ONTOLOGY, ontologyEntity));
        }
        return null;
    }

    @Override // org.molgenis.ontology.sorta.SortaService
    public Iterable<Entity> findOntologyTermEntities(String str, String str2) {
        return findOntologyTermEntities(str, new MapEntity((Map<String, Object>) Collections.singletonMap("Name", str2)));
    }

    @Override // org.molgenis.ontology.sorta.SortaService
    public Iterable<Entity> findOntologyTermEntities(String str, Entity entity) {
        Entity ontologyEntity = getOntologyEntity(str);
        if (ontologyEntity == null) {
            throw new IllegalArgumentException("Ontology IRI " + str + " does not exist in the database!");
        }
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        ArrayList arrayList3 = new ArrayList();
        ArrayList arrayList4 = new ArrayList();
        for (String str2 : entity.getAttributeNames()) {
            if (StringUtils.isNotEmpty(entity.getString(str2)) && !str2.equalsIgnoreCase("Identifier")) {
                if (isAttrNameValidForLexicalMatch(str2)) {
                    String stemQuery = stemQuery(entity.getString(str2));
                    if (StringUtils.isNotEmpty(stemQuery)) {
                        arrayList3.add(new QueryRule("ontologyTermSynonym", QueryRule.Operator.FUZZY_MATCH, fuzzyMatchQuerySyntax(stemQuery)));
                        arrayList4.add(new QueryRule("ontologyTermSynonym", QueryRule.Operator.FUZZY_MATCH_NGRAM, stemQuery));
                    }
                } else {
                    QueryRule queryRule = new QueryRule("name", QueryRule.Operator.EQUALS, str2);
                    QueryRule queryRule2 = new QueryRule("value", QueryRule.Operator.EQUALS, entity.getString(str2));
                    if (arrayList2.size() > 0) {
                        arrayList2.add(new QueryRule(QueryRule.Operator.OR));
                    }
                    arrayList2.add(new QueryRule((List<QueryRule>) Arrays.asList(queryRule, new QueryRule(QueryRule.Operator.AND), queryRule2)));
                }
            }
        }
        if (arrayList2.size() > 0) {
            annotationMatchOntologyTerms(entity, ontologyEntity, arrayList, arrayList2);
        }
        if (arrayList3.size() > 0) {
            lexicalMatchOntologyTerms(str, entity, ontologyEntity, 50 - arrayList.size(), arrayList3, arrayList);
        }
        if (arrayList4.size() > 0) {
            lexicalMatchOntologyTerms(str, entity, ontologyEntity, 10, arrayList4, arrayList);
        }
        Collections.sort(arrayList, new Comparator<Entity>() { // from class: org.molgenis.ontology.sorta.SortaServiceImpl.1
            @Override // java.util.Comparator
            public int compare(Entity entity2, Entity entity3) {
                return entity3.getDouble(SortaServiceImpl.COMBINED_SCORE).compareTo(entity2.getDouble(SortaServiceImpl.COMBINED_SCORE));
            }
        });
        return arrayList;
    }

    private void annotationMatchOntologyTerms(final Entity entity, Entity entity2, List<Entity> list, List<QueryRule> list2) {
        Iterable<Entity> findAll = this.dataService.findAll(OntologyTermDynamicAnnotationMetaData.ENTITY_NAME, new QueryImpl(list2).pageSize(Integer.MAX_VALUE));
        if (Iterables.size(findAll) > 0) {
            list.addAll(ImmutableList.copyOf(FluentIterable.from(this.dataService.findAll(OntologyTermMetaData.ENTITY_NAME, new QueryImpl((List<QueryRule>) Arrays.asList(new QueryRule(OntologyTermMetaData.ONTOLOGY, QueryRule.Operator.EQUALS, entity2), new QueryRule(QueryRule.Operator.AND), new QueryRule(OntologyTermMetaData.ONTOLOGY_TERM_DYNAMIC_ANNOTATION, QueryRule.Operator.IN, findAll))).pageSize(Integer.MAX_VALUE))).transform(new Function<Entity, Entity>() { // from class: org.molgenis.ontology.sorta.SortaServiceImpl.2
                @Override // com.google.common.base.Function
                public Entity apply(Entity entity3) {
                    return SortaServiceImpl.this.calculateNGromOTAnnotations(entity, entity3);
                }
            })));
        }
    }

    private void lexicalMatchOntologyTerms(final String str, final Entity entity, Entity entity2, int i, List<QueryRule> list, List<Entity> list2) {
        QueryRule queryRule = new QueryRule(list);
        queryRule.setOperator(QueryRule.Operator.DIS_MAX);
        for (Entity entity3 : FluentIterable.from(this.dataService.findAll(OntologyTermMetaData.ENTITY_NAME, new QueryImpl((List<QueryRule>) Arrays.asList(new QueryRule(OntologyTermMetaData.ONTOLOGY, QueryRule.Operator.EQUALS, entity2), new QueryRule(QueryRule.Operator.AND), queryRule)).pageSize(i))).transform(new Function<Entity, Entity>() { // from class: org.molgenis.ontology.sorta.SortaServiceImpl.3
            @Override // com.google.common.base.Function
            public Entity apply(Entity entity4) {
                double d = 0.0d;
                double d2 = 0.0d;
                for (String str2 : entity.getAttributeNames()) {
                    String string = entity.getString(str2);
                    if (StringUtils.isNotEmpty(string) && SortaServiceImpl.this.isAttrNameValidForLexicalMatch(str2)) {
                        Entity calculateNGramOTSynonyms = SortaServiceImpl.this.calculateNGramOTSynonyms(str, string, entity4);
                        if (d < calculateNGramOTSynonyms.getDouble("Score").doubleValue()) {
                            d = calculateNGramOTSynonyms.getDouble("Score").doubleValue();
                        }
                        if (d2 < calculateNGramOTSynonyms.getDouble(SortaServiceImpl.COMBINED_SCORE).doubleValue()) {
                            d2 = calculateNGramOTSynonyms.getDouble(SortaServiceImpl.COMBINED_SCORE).doubleValue();
                        }
                    }
                }
                MapEntity mapEntity = new MapEntity(entity4);
                mapEntity.set("Score", Double.valueOf(d));
                mapEntity.set(SortaServiceImpl.COMBINED_SCORE, Double.valueOf(d2));
                return mapEntity;
            }
        })) {
            if (!list2.contains(entity3)) {
                list2.add(entity3);
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Entity calculateNGromOTAnnotations(Entity entity, Entity entity2) {
        MapEntity mapEntity = new MapEntity(entity2);
        for (Entity entity3 : entity2.getEntities(OntologyTermMetaData.ONTOLOGY_TERM_DYNAMIC_ANNOTATION)) {
            String string = entity3.getString("name");
            String string2 = entity3.getString("value");
            for (String str : entity.getAttributeNames()) {
                if (StringUtils.isNotEmpty(entity.getString(str)) && StringUtils.equalsIgnoreCase(str, string) && StringUtils.equalsIgnoreCase(entity.getString(str), string2)) {
                    mapEntity.set("Score", (Object) 100);
                    mapEntity.set(COMBINED_SCORE, (Object) 100);
                    return mapEntity;
                }
            }
        }
        return mapEntity;
    }

    /* JADX INFO: Access modifiers changed from: private */
    public Entity calculateNGramOTSynonyms(String str, String str2, Entity entity) {
        Iterable<Entity> entities = entity.getEntities("ontologyTermSynonym");
        if (Iterables.size(entities) <= 0) {
            return null;
        }
        final String removeIllegalCharWithSingleWhiteSpace = removeIllegalCharWithSingleWhiteSpace(str2);
        ImmutableList sortedList = FluentIterable.from(entities).transform(new Function<Entity, MapEntity>() { // from class: org.molgenis.ontology.sorta.SortaServiceImpl.5
            @Override // com.google.common.base.Function
            public MapEntity apply(Entity entity2) {
                MapEntity mapEntity = new MapEntity(entity2);
                mapEntity.set("Score", Double.valueOf(NGramMatchingModel.stringMatching(removeIllegalCharWithSingleWhiteSpace, SortaServiceImpl.this.removeIllegalCharWithSingleWhiteSpace(entity2.getString("ontologyTermSynonym")))));
                return mapEntity;
            }
        }).toSortedList(new Comparator<MapEntity>() { // from class: org.molgenis.ontology.sorta.SortaServiceImpl.4
            @Override // java.util.Comparator
            public int compare(MapEntity mapEntity, MapEntity mapEntity2) {
                return mapEntity2.getDouble("Score").compareTo(mapEntity.getDouble("Score"));
            }
        });
        MapEntity mapEntity = (MapEntity) Iterables.getFirst(sortedList, new MapEntity());
        double doubleValue = mapEntity.getDouble("Score").doubleValue();
        String string = mapEntity.getString("ontologyTermSynonym");
        Iterator it = Iterables.skip(sortedList, 1).iterator();
        while (it.hasNext()) {
            String string2 = ((Entity) it.next()).getString("ontologyTermSynonym");
            StringBuilder sb = new StringBuilder();
            sb.append(string).append(" ").append(string2);
            double stringMatching = NGramMatchingModel.stringMatching(removeIllegalCharWithSingleWhiteSpace, removeIllegalCharWithSingleWhiteSpace(sb.toString()));
            if (stringMatching > doubleValue) {
                doubleValue = stringMatching;
                string = sb.toString();
            }
        }
        mapEntity.set("ontologyTermSynonym", string);
        mapEntity.set("Score", Double.valueOf(doubleValue));
        mapEntity.set(COMBINED_SCORE, Double.valueOf(doubleValue));
        Map<String, Double> redistributedNGramScore = this.informationContentService.redistributedNGramScore(removeIllegalCharWithSingleWhiteSpace, str);
        Set<String> createStemmedWordSet = this.informationContentService.createStemmedWordSet(string);
        this.informationContentService.createStemmedWordSet(removeIllegalCharWithSingleWhiteSpace).stream().filter(str3 -> {
            return Iterables.contains(createStemmedWordSet, str3) && redistributedNGramScore.containsKey(str3);
        }).forEach(str4 -> {
            mapEntity.set(COMBINED_SCORE, Double.valueOf(mapEntity.getDouble(COMBINED_SCORE).doubleValue() + ((Double) redistributedNGramScore.get(str4)).doubleValue()));
        });
        return mapEntity;
    }

    private String stemQuery(String str) {
        StringBuilder sb = new StringBuilder();
        HashSet<String> newHashSet = Sets.newHashSet(str.toLowerCase().trim().split(NON_WORD_SEPARATOR));
        newHashSet.removeAll(NGramMatchingModel.STOPWORDSLIST);
        for (String str2 : newHashSet) {
            if (StringUtils.isNotEmpty(str2.trim()) && !ELASTICSEARCH_RESERVED_WORDS.contains(str2)) {
                this.stemmer.setCurrent(removeIllegalCharWithEmptyString(str2));
                this.stemmer.stem();
                String current = this.stemmer.getCurrent();
                if (StringUtils.isNotEmpty(current)) {
                    sb.append(current).append(" ");
                }
            }
        }
        return sb.toString().trim();
    }

    private String fuzzyMatchQuerySyntax(String str) {
        StringBuilder sb = new StringBuilder();
        for (String str2 : str.split(" ")) {
            sb.append(str2).append(FUZZY_MATCH_SIMILARITY).append(" ");
        }
        return sb.toString().trim();
    }

    public String removeIllegalCharWithSingleWhiteSpace(String str) {
        return str.replaceAll(ILLEGAL_CHARACTERS_PATTERN, " ");
    }

    public String removeIllegalCharWithEmptyString(String str) {
        return str.replaceAll(ILLEGAL_CHARACTERS_PATTERN, "");
    }

    /* JADX INFO: Access modifiers changed from: private */
    public boolean isAttrNameValidForLexicalMatch(String str) {
        return StringUtils.equalsIgnoreCase(str, "Name") || StringUtils.containsIgnoreCase(str, DEFAULT_MATCHING_SYNONYM_PREFIX_FIELD);
    }
}
