FIt, just save what has changed between old version and less old version

dvklopfenstein · May 23, 2022 · 8d289e1 · 8d289e1
1 parent 3c3c3f8
commit 8d289e1
Show file tree

Hide file tree

Showing 21 changed files with 52,168 additions and 40,243 deletions.
diff --git a/src/reactomepy/code/neo4jnode.py b/src/reactomepy/code/neo4jnode.py
@@ -1,6 +1,6 @@
 """Holds information for one data schema item."""
 
-__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
+__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
 __author__ = "DV Klopfenstein"
 
 # import os
@@ -75,7 +75,7 @@ def get_rel2nodes(self):
             qry = self.qrypat.format(DBID=self.item_id)
             #print('\n{Q}'.format(Q=qry))
             with self.kws['gdbdr'].session() as session:
-                for rec in session.run(qry).records():
+                for rec in session.run(qry):
                     rel = rec['rel'].type
                     if rel not in self.rel_excl:
                         #print('{I} {R:19} {NOD}'.format(I=idx, R=rec['rel'].type, NOD=rec['dst']))
@@ -94,4 +94,4 @@ def _get_abc(self, abc_param, species_nodes, objsch):
         return 'XXX'
 
 
-# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
+# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
diff --git a/src/reactomepy/code/neo4jnodebasic.py b/src/reactomepy/code/neo4jnodebasic.py
@@ -31,6 +31,7 @@ def __str__(self):
         # Parameters on all Nodes
         try:
             #### msg = [self.objsch.prtfmt.format(**self.ntp._asdict())]
+            print('DDDDDDDD src/reactomepy/code/neo4jnodebasic.py', self.item_id, dir(self.objsch))
             msg = [self.objsch.prtfmt.format(**self.ntp._asdict(), **self.objsch.get_optstr(self.ntp.optional))]
             # for rel, dsts in self.relationship.items():
             #     msg.append('{REL} dbIds[{N}]: {IDs}'.format(REL=rel, N=len(dsts), IDs=' '.join(str(o.item_id) for o in dsts)))

diff --git a/src/reactomepy/code/query/functions.py b/src/reactomepy/code/query/functions.py
@@ -2,7 +2,7 @@
 
 from __future__ import print_function
 
-__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
+__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
 __author__ = "DV Klopfenstein"
 
 import sys
@@ -93,6 +93,10 @@ def get_dbid2node(self, paramvalstr, rels, exact=True):
         print(query)
         print('GET ALL dbIds from QUERY AND LOWER')
         dbid2node, src_dbids = self.ses_dbid2nodebasic_srcdst(query, prt=sys.stdout)
+        for a, b in dbid2node.items():
+            print('FFFFFFFFFFFFFFFFFFFFFF ../ReactomePy/src/reactomepy/code/query/functions.py', a)
+            print('FFFFFFFFFFFFFFFFFFFFFF ../ReactomePy/src/reactomepy/code/query/functions.py', b)
+        print('GGGGGGGGGGGGGGGGGGGGGG ../ReactomePy/src/reactomepy/code/query/functions.py')
         dbid2dct = self.get_dbid2dct_g_dbid2nodeb(dbid2node, exact)
         print('FILL DICT WITH PARAMETER VALUES AND RELATIONSHIP DESTINATION NODES')
         return {'dbid2node':dbid2node, 'dbid2dct':dbid2dct, 'relationships':rels,
@@ -148,7 +152,10 @@ def ses_relationship_dcts(self, dbid2nodebasic):
     def get_dbid2nodedct(self, dbids):
         """Get Neo4jNodes with neo4j params in a dict and direct children relationships loaded."""
         # Run 1: MATCH (src:DatabaseObject{dbId:DBID}) RETURN src.schemaClass AS schemaClass
+        print('DDDDDDDDDDDDDD ../ReactomePy/src/reactomepy/code/query/functions.py', dbids)
         dbid2sch = self.ses_dbid2sch(dbids)
+        for a, b in dbid2sch.items():
+            print('DDDDDDDDDDDDDD ../ReactomePy/src/reactomepy/code/query/functions.py', a, b)
         assert set(dbids) == set(dbid2sch), "TBD: Report dbIds NOT FOUND"
         dbid2nodebasic = {dbid:Neo4jNodeBasic(dbid, sch) for dbid, sch in dbid2sch.items()}
         # Run 2a: MATCH (s:DatabaseObject{dbId:ID})-[r]->(d) RETURN s, r, d.dbId AS d_Id
@@ -165,7 +172,8 @@ def ses_dbid2sch(self, dbids):
         with self.gdbdr.session() as session:
             for dbid in dbids:
                 query = pre + str(dbid) + post
-                for rec in session.run(query).records():
+                print('QQQQQQQQQQ ../ReactomePy/src/reactomepy/code/query/functions.py', query)
+                for rec in session.run(query):
                     dbid2sch[dbid] = rec['schemaClass']
         return dbid2sch
 
@@ -175,10 +183,10 @@ def ses_dbid2nodebasic_srcdst(self, query, prt=sys.stdout, idxmod=100000):
         dbid2nodebasic = {}
         src_dbids = set()
         tic = timeit.default_timer()
-        print('HELLO')
+        ## print('HELLO')
         with self.gdbdr.session() as session:
-            for idx, rec in enumerate(session.run(query).records()):
-                print('REC:', rec)
+            for idx, rec in enumerate(session.run(query)):
+                ## print('REC:', rec)
                 src_dbid = rec['src_dbId']
                 dst_dbid = rec['dst_dbId']
                 src_dbids.add(src_dbid)
@@ -200,7 +208,7 @@ def ses_dbid2nodebasic_src(self, query):
         src_dbids = set()
         tic = timeit.default_timer()
         with self.gdbdr.session() as session:
-            for idx, rec in enumerate(session.run(query).records()):
+            for idx, rec in enumerate(session.run(query)):
                 src_dbid = rec['src_dbId']
                 src_dbids.add(src_dbid)
                 if src_dbid not in dbid2nodebasic:
@@ -226,7 +234,7 @@ def _addval_src_norel(pat, dbid2node_missing, session):
         tic = timeit.default_timer()
         for dbid, nodebasic in dbid2node_missing.items():
             qry = pat.format(DBID=str(dbid))
-            for rec in session.run(qry).records():
+            for rec in session.run(qry):
                 dbid2dct[dbid] = nodebasic.objsch.get_dict(rec['src'])
         print('  HMS: {HMS} {N:6,} dbIds: {Q}'.format(
             HMS=get_hms(tic), N=len(dbid2node_missing,), Q=qry))
@@ -240,7 +248,7 @@ def _addval_src_rel_dst(self, pat, dbid2nodebasic, session):
         tic = timeit.default_timer()
         for dbid, nodebasic in dbid2nodebasic.items():
             qry = pat.replace('ID', str(dbid))
-            for rec in session.run(qry).records():
+            for rec in session.run(qry):
                 #### nodebasic.dct = nodebasic.objsch.get_dict(rec['s'])
                 dbid2dct[dbid] = nodebasic.objsch.get_dict(rec['s'])
                 rel = rec['r'].type
@@ -290,7 +298,7 @@ def get_version(gdbdr):
     version = None
     query = 'MATCH (v:DBInfo) RETURN v'
     with gdbdr.session() as session:
-        for rec in session.run(query).records():
+        for rec in session.run(query):
             dbinfo = rec['v']
             assert dbinfo.get('name') == 'reactome'
             version = dbinfo.get('version')
@@ -333,9 +341,9 @@ def get_dbids(gdbdr, nodestr='Complex{stId:"R-HSA-167199"}'):
     dbids = set()
     query = 'MATCH (n:NODESTR) RETURN n'.format(NODESTR=nodestr)
     with gdbdr.session() as session:
-        for rec in session.run(query).records():
+        for rec in session.run(query):
             dbids.add(rec['n']['dbId'])
     return dbids
 
 
-# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
+# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
diff --git a/src/reactomepy/code/query/get_nodes.py b/src/reactomepy/code/query/get_nodes.py
@@ -2,7 +2,7 @@
 
 from __future__ import print_function
 
-__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
+__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
 __author__ = "DV Klopfenstein"
 
 import sys
@@ -29,7 +29,7 @@ def get_dbids(self, qry, prt=sys.stdout):
         # Example: 'MATCH (s:Figure) RETURN s.dbId AS dbId'
         dbids = set()
         with self.gdbdr.session() as session:
-            for idx, rec in enumerate(session.run(qry).records()):
+            for idx, rec in enumerate(session.run(qry)):
                 dbids.add(rec['dbId'])
                 if prt and idx%10000 == 0:
                     prt.write('{HMS} {IDX} {DBID}'.format(
@@ -41,7 +41,7 @@ def get_dbid2val(self, qry, prt=sys.stdout):
         dbid2val = {}
         # Example: 'MATCH (f:Figure) RETURN f.dbId AS dbId, f.url AS val'
         with self.gdbdr.session() as session:
-            for rec in session.run(qry).records():
+            for rec in session.run(qry):
                 dbid2val[rec['dbId']] = rec['val']
         if prt:
             prt.write('  {HMS} {N:,} dbIds: {Q}\n'.format(
@@ -53,7 +53,7 @@ def get_dbid2set(self, qry, prt=sys.stdout):
         dbid2set = cx.defaultdict(set)
         # Example: MATCH (s:InstanceEdit)-[r]->(f:Figure) RETURN s.dbId AS dbId, f.dbId AS val
         with self.gdbdr.session() as session:
-            for rec in session.run(qry).records():
+            for rec in session.run(qry):
                 dbid2set[rec['dbId']].add(rec['val'])
         if prt:
             prt.write('  {HMS} {N:,} dbIds: {Q}\n'.format(
@@ -67,7 +67,7 @@ def get_dbid2ntset(self, qry, prt=sys.stdout):
         #     RETURN f.dbId AS key_dbId, type(r) AS rtyp, e.dbId AS val_dbId
         ntobj = cx.namedtuple('NtIdRel', 'dbId rel')
         with self.gdbdr.session() as session:
-            for rec in session.run(qry).records():
+            for rec in session.run(qry):
                 dbid2ntset[rec['key_dbId']].add(ntobj(dbId=rec['val_dbId'], rel=rec['rtyp']))
         if prt:
             prt.write('  {HMS} {N:,} rel-dbIds: {Q}\n'.format(
@@ -81,7 +81,7 @@ def get_dbid2ntnodes(self, qry, prt=sys.stdout):
         ntobjkey = cx.namedtuple('NtIdRel', 'dbId rel')
         ntobjnode = cx.namedtuple('NtSRD', 'src rel dst')
         with self.gdbdr.session() as session:
-            for rec in session.run(qry).records():
+            for rec in session.run(qry):
                 src = rec['s']
                 rel = rec['rtyp']
                 dst = rec['d']
@@ -98,7 +98,7 @@ def get_nodes_query(self, query, msg=None):
         nodes = []
         tic = timeit.default_timer()
         with self.gdbdr.session() as session:
-            for rec in session.run(query).records():
+            for rec in session.run(query):
                 nodes.append(Neo4jNode(rec['s']))
         print('  {HMS} {N:,} {MSG}'.format(
             HMS=get_hms(tic), N=len(nodes), MSG=msg if msg else query))
@@ -110,7 +110,7 @@ def get_nodes_sch(self, srchstr, msg=None):
         tic = timeit.default_timer()
         qry = 'MATCH (s:{SRCHSTR}) RETURN s'.format(SRCHSTR=srchstr)
         with self.gdbdr.session() as session:
-            for rec in session.run(qry).records():
+            for rec in session.run(qry):
                 nodes.append(Neo4jNode(rec['s']))
         print('  {HMS} {N:,} {MSG}'.format(
             HMS=get_hms(tic), N=len(nodes), MSG=msg if msg else srchstr))
@@ -124,7 +124,7 @@ def get_dbid2node(self, dbids, msg='nodes found'):
         with self.gdbdr.session() as session:
             for dbid in dbids:
                 query = qupat.format(DBID=dbid)
-                for rec in session.run(query).records():
+                for rec in session.run(query):
                     dbid2node[dbid] = Neo4jNode(rec['s'])
         print('FASTISH  {HMS} {N:,} {MSG}'.format(HMS=get_hms(tic), N=len(dbid2node), MSG=msg))
         return dbid2node
@@ -140,4 +140,4 @@ def _shorten_queryprt(qry):
         return qry
 
 
-# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
+# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
diff --git a/src/reactomepy/code/utils.py b/src/reactomepy/code/utils.py
@@ -14,12 +14,14 @@ def chk_unique(dcts, fld2expunique):
     """Ex: Expect that taxIds and displayNames are unique while abbreviation is not."""
     num_dcts = len(dcts)
     fld2sets = cx.OrderedDict([(f, set()) for f in fld2expunique])
-    for dct in dcts:
-        for fld in fld2expunique:
+    for fld in fld2expunique:
+        # print('FLD', fld)
+        for dct in dcts:
             fld2sets[fld].add(dct[fld])
     for fld, items in fld2sets.items():
         actually_unique = len(items) == num_dcts
-        assert actually_unique == fld2expunique[fld]
+        assert actually_unique == fld2expunique[fld], "{}:{}!={} {}".format(
+            fld, len(items), num_dcts, fld2expunique[fld])
 
 def get_gdbdr():
     """Return GraphDatabase driver given user args."""

diff --git a/src/reactomepy/code/wrpy/disease.py b/src/reactomepy/code/wrpy/disease.py
@@ -2,7 +2,7 @@
 
 from __future__ import print_function
 
-__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
+__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
 __author__ = "DV Klopfenstein"
 
 import os
@@ -11,7 +11,7 @@
 # from reactomepy.code.wrpy.utils import prt_namedtuple
 from reactomepy.code.wrpy.utils import prt_dict
 from reactomepy.code.wrpy.utils import prt_copyright_comment
-from reactomepy.code.utils import chk_unique
+from reactomepy.code.neo4jnode import Neo4jNode
 
 
 class Diseases(object):
@@ -23,7 +23,6 @@ class Diseases(object):
     def __init__(self, gdbdr):
         self.gdr = gdbdr  # GraphDatabase.driver
         self.diseases = self._init_disease()
-        chk_unique(self.diseases, {'displayName':True})
         self.num_dis = len(self.diseases)
 
     def wrpy_disease2fld(self, fout_py, field, varname):
@@ -53,17 +52,18 @@ def _init_disease(self):
         """Query for all diseases."""
         disease = []
         # fields_exp = set(['name', 'schemaClass', 'abbreviation', 'displayName', 'taxId', 'dbId'])
+        passed = True
         with self.gdr.session() as session:
-            res = session.run(self.dis_qry)
-            for rec in res.records():
+            for rec in session.run(self.dis_qry):
                 node = rec['node']
-                # assert node.keys() == fields_exp
                 assert node.get('schemaClass') == 'Disease'
                 assert node.get('databaseName') == 'DOID'
                 key2val = {f:node.get(f) for f in self.dis_keep}
                 disease.append(key2val)
         disease = sorted(disease, key=lambda d: d['displayName'])
+        if not passed:
+            raise RuntimeError('**FATAL: NOT UNIQUE: displayName')
         return disease
 
 
-# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
+# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
diff --git a/src/reactomepy/code/wrpy/inferredfrom.py b/src/reactomepy/code/wrpy/inferredfrom.py
@@ -69,8 +69,7 @@ def _init_dcts(self, fields_keep):
         ctr_hi = cx.Counter()
         ctr_lo = cx.Counter()
         with self.gdbdr.session() as session:
-            res = session.run(self.QUERY)
-            for rec in res.records():
+            for rec in session.run(self.QUERY):
                 ctr_hi[rec['hi']['speciesName']] += 1
                 ctr_lo[rec['lo']['speciesName']] += 1
                 #       MATCH (hi:Pathway)<-[inferredTo]-(lo:Pathway) RETURN hi, lo
@@ -83,6 +82,7 @@ def _init_dcts(self, fields_keep):
                 # print(rec['hi'])
                 pidlo = rec['lo']['stId']
                 abc = pidlo.split('-')[1].lower()
+                print('EEEEEEEEEEEEEEEEE', abc)
                 org2plo2phi[abc][pidlo].add(rec['hi']['stId'])
         print('HI', ctr_hi.most_common())
         print('LO', ctr_lo.most_common())

diff --git a/src/reactomepy/code/wrpy/pathway_molecules.py b/src/reactomepy/code/wrpy/pathway_molecules.py
@@ -2,7 +2,7 @@
 
 from __future__ import print_function
 
-__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
+__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
 __author__ = "DV Klopfenstein"
 
 import os
@@ -59,7 +59,7 @@ def get_pw2molecules(self, database='UniProt'):
         with self.gdbdr.session() as session:
             query = self.get_query(database)
             # print("QUERY: {Q}".format(Q=query))
-            for rec in session.run(query).records():
+            for rec in session.run(query):
                 # print(rec)
                 pwid2molecules[rec['pwid']].add(rec['mol_id'])
         return pwid2molecules
@@ -86,4 +86,4 @@ def wrpy_pw2molecules(self, fout_py, database='UniProt'):
         print("  {HMS} {MB} Mbytes {MSG} WROTE: {PY}".format(HMS=hms, MB=filesize, MSG=msg, PY=fout_py))
 
 
-# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
+# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
diff --git a/src/reactomepy/code/wrpy/pathway_query.py b/src/reactomepy/code/wrpy/pathway_query.py
@@ -2,7 +2,7 @@
 
 from __future__ import print_function
 
-__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
+__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
 __author__ = "DV Klopfenstein"
 
 import sys
@@ -113,7 +113,7 @@ def get_pw2dcts(self, prt=sys.stdout):
         # reltypes = cx.Counter()
         missing = cx.Counter()
         with self.gdr.session() as session:
-            for rec in session.run(qry).records():
+            for rec in session.run(qry):
                 rel = rec['r']
                 typ = rel.type
                 # reltypes[rel.type] += 1
@@ -364,7 +364,7 @@ def _get_relationship_typecnt(session, pw_stid):
         ctr = cx.Counter()
         qry = 'MATCH (Pathway{{stId:"{ID}"}})-[r]-() RETURN r'.format(ID=pw_stid)
         res = session.run(qry)
-        for rec in res.records():
+        for rec in res.data():
             ctr[rec['r'].type] += 1
         return ctr
 
@@ -382,4 +382,4 @@ def prt_cnts(ctr, prt=sys.stdout):
             prt.write("  {CNT:6} {FLD}\n".format(CNT=cnt, FLD=fld))
 
 
-# Copyright (C) 2018-2019, DV Klopfenstein. All rights reservedsEvent
+# Copyright (C) 2018-present, DV Klopfenstein. All rights reservedsEvent
diff --git a/src/reactomepy/code/wrpy/referencedatabase.py b/src/reactomepy/code/wrpy/referencedatabase.py
@@ -2,7 +2,7 @@
 
 from __future__ import print_function
 
-__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
+__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
 __author__ = "DV Klopfenstein"
 
 import os
@@ -45,7 +45,7 @@ def _init_data(self):
         id2nt = {}
         ntobj = cx.namedtuple('NtObj', 'displayName accessUrl url')
         with self.gdr.session() as session:
-            for rec in session.run(self.qry).records():
+            for rec in session.run(self.qry):
                 node = rec['node']
                 assert set(node.keys()) == set(ReferenceDatabase.params_req), \
                     'EXP({})\nACT({})'.format(
@@ -61,4 +61,4 @@ def _init_data(self):
         return id2nt
 
 
-# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
+# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.