From 05977de815fb15fa06e8bb7bd48791c45a8167a3 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Wed, 4 May 2022 23:59:31 -0400 Subject: [PATCH 1/3] Fix OBO xref handling --- indra/ontology/bio/ontology.py | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) diff --git a/indra/ontology/bio/ontology.py b/indra/ontology/bio/ontology.py index ee0bd496bc..f3ec39245a 100644 --- a/indra/ontology/bio/ontology.py +++ b/indra/ontology/bio/ontology.py @@ -6,6 +6,7 @@ from indra.util import read_unicode_csv from indra.statements import modtype_conditions from indra.resources import get_resource_path +from indra.databases.identifiers import ensure_prefix_if_needed from indra.statements.validate import assert_valid_db_refs @@ -19,7 +20,7 @@ class BioOntology(IndraOntology): # should be incremented to "force" rebuilding the ontology to be consistent # with the underlying resource files. name = 'bio' - version = '1.25' + version = '1.26' ontology_namespaces = [ 'go', 'efo', 'hp', 'doid', 'chebi', 'ido', 'mondo', 'eccode', ] @@ -77,6 +78,7 @@ def _build(self): self.add_hms_lincs_xrefs() self.add_pubchem_xrefs() self.add_biomappings() + self.add_obo_xrefs() # Add hierarchies logger.info('Adding hierarchy...') self.add_famplex_hierarchy() @@ -256,13 +258,28 @@ def add_obo_replacements(self): {'type': 'replaced_by'})) self.add_edges_from(edges) + def add_obo_xrefs(self): + from indra.databases import obo_client + edges = [] + xref_namespaces = {ns.upper() for ns in self.ontology_namespaces} + for ns in self.ontology_namespaces: + oc = obo_client.OntologyClient(prefix=ns) + for db_id, entry in oc.entries.items(): + label = self.label(ns.upper(), db_id) + for xref_ns, xref_ids in entry.get('xrefs', {}).items(): + if xref_ns not in xref_namespaces or len(xref_ids) > 1: + continue + xref_id = ensure_prefix_if_needed(xref_ns, xref_ids[0]) + xref_label = self.label(xref_ns, xref_id) + edges.append((label, xref_label, {'type': 'xref'})) + self.add_edges_from(edges) + def add_obo_hierarchies(self): from indra.databases import obo_client edges = [] # Mapping various source relation types to standardized ones # in this ontology graph rel_mappings = { - 'xref': 'xref', 'isa': 'isa', 'partof': 'partof', 'is_a': 'isa', From 3ec07102d3326693520b4083a74a1cb9d234e919 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Thu, 5 May 2022 00:19:52 -0400 Subject: [PATCH 2/3] Add MeSH to OBO xrefs --- indra/ontology/bio/ontology.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/indra/ontology/bio/ontology.py b/indra/ontology/bio/ontology.py index f3ec39245a..7c250c3219 100644 --- a/indra/ontology/bio/ontology.py +++ b/indra/ontology/bio/ontology.py @@ -261,7 +261,8 @@ def add_obo_replacements(self): def add_obo_xrefs(self): from indra.databases import obo_client edges = [] - xref_namespaces = {ns.upper() for ns in self.ontology_namespaces} + xref_namespaces = {ns.upper() for ns in self.ontology_namespaces} | \ + {'MESH'} for ns in self.ontology_namespaces: oc = obo_client.OntologyClient(prefix=ns) for db_id, entry in oc.entries.items(): From d44bf7f42e47e194171d3014c69702e615dd1604 Mon Sep 17 00:00:00 2001 From: Ben Gyori Date: Thu, 5 May 2022 20:20:28 -0400 Subject: [PATCH 3/3] Remove some invalid xrefs --- indra/ontology/bio/ontology.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/indra/ontology/bio/ontology.py b/indra/ontology/bio/ontology.py index 7c250c3219..72e4e600c2 100644 --- a/indra/ontology/bio/ontology.py +++ b/indra/ontology/bio/ontology.py @@ -268,10 +268,23 @@ def add_obo_xrefs(self): for db_id, entry in oc.entries.items(): label = self.label(ns.upper(), db_id) for xref_ns, xref_ids in entry.get('xrefs', {}).items(): - if xref_ns not in xref_namespaces or len(xref_ids) > 1: + # If the namespace is not in our OBO set, we don't + # consider it. If the set of IDs for a given namespace + # is > 1 in size, it's not a one-to-one mapping so we + # don't take it. Finally, there are sometimes xrefs to the + # namespace itself which we don't consider here. + if xref_ns not in xref_namespaces or len(xref_ids) > 1 or \ + xref_ns == ns: continue xref_id = ensure_prefix_if_needed(xref_ns, xref_ids[0]) xref_label = self.label(xref_ns, xref_id) + # In many cases, obsolete nodes are referred to in + # xrefs, and a simple way to control for this is to + # check if the given node is in the graph. If it is, + # then it is either non-obsolete, or there is an explicit + # edge from it to a non-obsolete node. + if xref_label not in self: + continue edges.append((label, xref_label, {'type': 'xref'})) self.add_edges_from(edges)