obophenotype · matentzn · Mar 21, 2023
diff --git a/src/ontology/components/upheno-bridge.owl b/src/ontology/components/upheno-bridge.owl
diff --git a/src/ontology/config/match-rules.yaml b/src/ontology/config/match-rules.yaml
@@ -0,0 +1,63 @@
+rules:
+  - description: default
+    postconditions:
+      predicate_id: skos:closeMatch
+      weight: 0.0
+
+  - description: exact to exact
+    preconditions:
+      subject_match_field_one_of:
+        - oio:hasExactSynonym
+        - rdfs:label
+        - skos:prefLabel
+      object_match_field_one_of:
+        - oio:hasExactSynonym
+        - rdfs:label
+        - skos:prefLabel
+    postconditions:
+      predicate_id: skos:exactMatch
+      weight: 2.0
+
+  - description: >-
+     label to label; note this is additive with the exact to exact rule,
+      so the score just represents an additional small boost
+    preconditions:
+      subject_match_field_one_of:
+        - rdfs:label
+      object_match_field_one_of:
+        - rdfs:label
+    postconditions:
+      predicate_id: skos:exactMatch
+      weight: 0.5
+
+#  - description: xref match
+#    preconditions:
+#      subject_match_field_one_of:
+#        - oio:hasDbXref
+#        - skos:exactMatch
+#      object_match_field_one_of:
+#        - oio:hasDbXref
+#        - skos:exactMatch
+#    postconditions:
+#      predicate_id: skos:exactMatch
+#      weight: 4.0
+
+  - preconditions:
+      subject_match_field_one_of:
+        - oio:hasExactSynonym
+        - rdfs:label
+      object_match_field_one_of:
+        - oio:hasBroadSynonym
+    postconditions:
+      predicate_id: skos:broadMatch
+      weight: 2.0
+
+  - preconditions:
+      subject_match_field_one_of:
+        - oio:hasExactSynonym
+        - rdfs:label
+      object_match_field_one_of:
+        - oio:hasNarrowSynonym
+    postconditions:
+      predicate_id: skos:narrowMatch
+      weight: 2.0
diff --git a/src/ontology/config/upheno.sssom.config.yml b/src/ontology/config/upheno.sssom.config.yml
@@ -0,0 +1,5 @@
+curie_map:
+  MGPO: http://purl.obolibrary.org/obo/MGPO_
+mapping_set_id: http://purl.obolibrary.org/obo/upheno/mappings/upheno-logical.sssom.tsv
+license: https://creativecommons.org/publicdomain/zero/1.0/
+mapping_set_description: The uPheno logical matches
diff --git a/src/ontology/mappings/upheno-lexical.oak.sssom.tsv b/src/ontology/mappings/upheno-lexical.oak.sssom.tsv
diff --git a/src/ontology/mappings/upheno-logical.sssom.tsv b/src/ontology/mappings/upheno-logical.sssom.tsv
@@ -0,0 +1,12 @@
+# curie_map:
+#   owl: http://www.w3.org/2002/07/owl#
+#   rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
+#   rdfs: http://www.w3.org/2000/01/rdf-schema#
+#   semapv: https://w3id.org/semapv/
+#   skos: http://www.w3.org/2004/02/skos/core#
+#   sssom: https://w3id.org/sssom/
+# license: https://creativecommons.org/publicdomain/zero/1.0/
+# mapping_set_description: The uPheno logical matches
+# mapping_set_id: http://purl.obolibrary.org/obo/upheno/mappings/upheno-logical.sssom.tsv
+
+
diff --git a/src/ontology/upheno.Makefile b/src/ontology/upheno.Makefile
@@ -56,6 +56,34 @@ upheno_mapping_lexical_all: ../curation/upheno-release/all/upheno_species_lexica
 	python3 ../scripts/lexical_mapping.py all
 	#echo "SKIP upheno_mapping_lexical_"
 
+tmp/sspo-merged.owl: config/sspo-importer.owl
+	$(ROBOT) merge -i $< -o $@
+
+tmp/sspo-base-merged.owl: config/sspo-base-importer.owl
+	$(ROBOT) merge -i $< -o $@
+
+tmp/sspo-filtered.owl: tmp/sspo-merged.owl
+	$(ROBOT) filter -i $< --term http://purl.obolibrary.org/obo/UPHENO_0001001 --term rdfs:label --term oboInOwl:hasExactSynonym --select "self descendants" \
+	query --update ../sparql/upheno-synonyms.ru -o $@
+
+tmp/sspo-base-reasoned.json: tmp/sspo-base-merged.owl
+	$(ROBOT) reason -i $< --exclude-tautologies structural --axiom-generators EquivalentClass \
+	remove --term owl:Nothing convert -f json -o $@
+
+tmp/sspo-merged.db: tmp/sspo-filtered.owl
+	semsql make $@
+
+tmp/sspo-filtered.obo: tmp/sspo-filtered.owl
+	$(ROBOT) convert -i $< -f obo --check false -o $@
+
+mappings/upheno-logical.sssom.tsv: tmp/sspo-base-reasoned.json
+	sssom parse tmp/sspo-base-reasoned.json -I obographs-json -m config/upheno.sssom.config.yml --prefix-map-mode merged --mapping-predicate-filter owl:equivalentClass -o $@
+
+mappings/upheno-lexical.oak.sssom.tsv: tmp/sspo-filtered.obo
+	runoak -i tmp/sspo-filtered.obo lexmatch -R config/match-rules.yaml -L tmp/upheno_lexical.index.yaml -o $@
+
+upheno_mappings: mappings/upheno-lexical.oak.sssom.tsv mappings/upheno-logical.sssom.tsv
+
 #.SECONDEXPANSION:
 ../curation/upheno-release/all/upheno_mapping_logical.csv: ../curation/upheno-release/all/upheno_all_with_relations.owl
 	$(ROBOT) query -f csv -i $< --query ../sparql/cross-species-mappings.sparql $@

diff --git a/src/scripts/lexical_mapping.ipynb b/src/scripts/lexical_mapping.ipynb
diff --git a/src/scripts/lexical_mapping.py b/src/scripts/lexical_mapping.py
@@ -31,12 +31,12 @@
 df.columns = ['iri','p','label']
 
 ## Load logical mappings
-dfl1 = pd.read_csv(upheno_mapping_logical)[['p1','p2']]
+dfl1 = pd.read_csv(upheno_mapping_logical)[['subject_id','object_id']]
 dfl2 = dfl1.copy()
-dfl2.columns = ['p2','p1']
+dfl2.columns = ['object_id','subject_id']
 dfl = pd.concat([dfl1, dfl2], ignore_index=True, sort =False)
 dfl = dfl.drop_duplicates()
-dfl['cat']="logical"
+dfl['mapping_justification']="semapv:LogicalReasoning"
 
 ## Prepare dataframe for labels
 df_label = df[df['p']=="http://www.w3.org/2000/01/rdf-schema#label"][['iri','label']]
@@ -249,14 +249,14 @@ def compute_mappings(dd,l):
                 data.append([pair[1], pair[0]])
     df_mappings =  pd.DataFrame.from_records(data)
     df_mappings = df_mappings.drop_duplicates()
-    df_mappings['cat'] = 'lexical'
-    df_mappings.columns = ['p1','p2','cat']
-    df_maps = pd.merge(df_mappings,l,  how='left', left_on=['p1'], right_on=['iri'])
+    df_mappings['mapping_justification'] = 'semapv:LexicalMatching'
+    df_mappings.columns = ['subject_id','object_id','mapping_justification']
+    df_maps = pd.merge(df_mappings,l,  how='left', left_on=['subject_id'], right_on=['iri'])
     df_maps=df_maps.drop('iri',1)
-    df_maps = pd.merge(df_maps, l,  how='left', left_on=['p2'], right_on=['iri'])
+    df_maps = pd.merge(df_maps, l,  how='left', left_on=['object_id'], right_on=['iri'])
     df_maps=df_maps.drop('iri',1)
-    df_maps['o1']=[re.sub('[_][0-9]+', '', iri.replace("http://purl.obolibrary.org/obo/","")) for iri in df_maps['p1'].values]
-    df_maps['o2']=[re.sub('[_][0-9]+', '', iri.replace("http://purl.obolibrary.org/obo/","")) for iri in df_maps['p2'].values]
+    df_maps['subject_source']=["obo:"+re.sub('[_][0-9]+', '', iri.replace("http://purl.obolibrary.org/obo/","")).lower() for iri in df_maps['subject_id'].values]
+    df_maps['object_source']=["obo:"+re.sub('[_][0-9]+', '', iri.replace("http://purl.obolibrary.org/obo/","")).lower() for iri in df_maps['object_id'].values]
     return df_maps
 
 df_mapping = compute_mappings(dd,l)
@@ -276,8 +276,8 @@ def compute_mappings(dd,l):
 print(len(w))
 w.to_csv(upheno_mapping_problematic,index=False)
 #df_maps
-# print(df_mapping[df_mapping['p1']=="http://purl.obolibrary.org/obo/ZP_0006897"])
-df_mapping_template = df_mapping[['p1','p2']].copy()
+# print(df_mapping[df_mapping['subject_id']=="http://purl.obolibrary.org/obo/ZP_0006897"])
+df_mapping_template = df_mapping[['subject_id','object_id']].copy()
 df_mapping_template.columns = ['Ontology ID','EquivalentClasses']
 
 df_mapping_template.loc[-1] = ['ID', 'AI obo:UPHENO_0000002']  # adding a row
@@ -292,19 +292,19 @@ def compute_mappings(dd,l):
 
 # Merging the logical mappings with the lexical ones for comparison
 print(df_maps.head())
-df_m = pd.merge(df_maps[['p1','p2','cat']], dfl,  how='outer', on=['p1','p2'])
-df_m = pd.merge(df_m,l,  how='left', left_on=['p1'], right_on=['iri'])
+df_m = pd.merge(df_maps[['subject_id','object_id','mapping_justification']], dfl,  how='outer', on=['subject_id','object_id'])
+df_m = pd.merge(df_m,l,  how='left', left_on=['subject_id'], right_on=['iri'])
 df_m=df_m.drop('iri',1)
-df_m = pd.merge(df_m, l,  how='left', left_on=['p2'], right_on=['iri'])
+df_m = pd.merge(df_m, l,  how='left', left_on=['object_id'], right_on=['iri'])
 df_m=df_m.drop('iri',1)
-df_m['cat'] = df_m["cat_x"].astype(str)+"-" + df_m["cat_y"].astype(str)
-df_m['cat'] = df_m['cat'].str.replace("-nan", "")
-df_m['cat'] = df_m['cat'].str.replace("nan-", "")
-df_m=df_m.drop('cat_x',1)
-df_m=df_m.drop('cat_y',1)
-
-print(df_m['cat'].value_counts(normalize=True))
-print(df_m['cat'].value_counts())
+df_m['mapping_justification'] = df_m["mapping_justification_x"].astype(str)+"-" + df_m["mapping_justification_y"].astype(str)
+df_m['mapping_justification'] = df_m['mapping_justification'].str.replace("-nan", "")
+df_m['mapping_justification'] = df_m['mapping_justification'].str.replace("nan-", "")
+df_m=df_m.drop('mapping_justification_x',1)
+df_m=df_m.drop('mapping_justification_y',1)
+
+print(df_m['mapping_justification'].value_counts(normalize=True))
+print(df_m['mapping_justification'].value_counts())
 
 df_m.to_csv(upheno_mapping_all,index=False)