Skip to content

Commit

Permalink
FIt, just save what has changed between old version and less old version
Browse files Browse the repository at this point in the history
  • Loading branch information
dvklopfenstein committed May 23, 2022
1 parent 3c3c3f8 commit 8d289e1
Show file tree
Hide file tree
Showing 21 changed files with 52,168 additions and 40,243 deletions.
6 changes: 3 additions & 3 deletions src/reactomepy/code/neo4jnode.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
"""Holds information for one data schema item."""

__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"

# import os
Expand Down Expand Up @@ -75,7 +75,7 @@ def get_rel2nodes(self):
qry = self.qrypat.format(DBID=self.item_id)
#print('\n{Q}'.format(Q=qry))
with self.kws['gdbdr'].session() as session:
for rec in session.run(qry).records():
for rec in session.run(qry):
rel = rec['rel'].type
if rel not in self.rel_excl:
#print('{I} {R:19} {NOD}'.format(I=idx, R=rec['rel'].type, NOD=rec['dst']))
Expand All @@ -94,4 +94,4 @@ def _get_abc(self, abc_param, species_nodes, objsch):
return 'XXX'


# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
1 change: 1 addition & 0 deletions src/reactomepy/code/neo4jnodebasic.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __str__(self):
# Parameters on all Nodes
try:
#### msg = [self.objsch.prtfmt.format(**self.ntp._asdict())]
print('DDDDDDDD src/reactomepy/code/neo4jnodebasic.py', self.item_id, dir(self.objsch))
msg = [self.objsch.prtfmt.format(**self.ntp._asdict(), **self.objsch.get_optstr(self.ntp.optional))]
# for rel, dsts in self.relationship.items():
# msg.append('{REL} dbIds[{N}]: {IDs}'.format(REL=rel, N=len(dsts), IDs=' '.join(str(o.item_id) for o in dsts)))
Expand Down
30 changes: 19 additions & 11 deletions src/reactomepy/code/query/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import print_function

__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"

import sys
Expand Down Expand Up @@ -93,6 +93,10 @@ def get_dbid2node(self, paramvalstr, rels, exact=True):
print(query)
print('GET ALL dbIds from QUERY AND LOWER')
dbid2node, src_dbids = self.ses_dbid2nodebasic_srcdst(query, prt=sys.stdout)
for a, b in dbid2node.items():
print('FFFFFFFFFFFFFFFFFFFFFF ../ReactomePy/src/reactomepy/code/query/functions.py', a)
print('FFFFFFFFFFFFFFFFFFFFFF ../ReactomePy/src/reactomepy/code/query/functions.py', b)
print('GGGGGGGGGGGGGGGGGGGGGG ../ReactomePy/src/reactomepy/code/query/functions.py')
dbid2dct = self.get_dbid2dct_g_dbid2nodeb(dbid2node, exact)
print('FILL DICT WITH PARAMETER VALUES AND RELATIONSHIP DESTINATION NODES')
return {'dbid2node':dbid2node, 'dbid2dct':dbid2dct, 'relationships':rels,
Expand Down Expand Up @@ -148,7 +152,10 @@ def ses_relationship_dcts(self, dbid2nodebasic):
def get_dbid2nodedct(self, dbids):
"""Get Neo4jNodes with neo4j params in a dict and direct children relationships loaded."""
# Run 1: MATCH (src:DatabaseObject{dbId:DBID}) RETURN src.schemaClass AS schemaClass
print('DDDDDDDDDDDDDD ../ReactomePy/src/reactomepy/code/query/functions.py', dbids)
dbid2sch = self.ses_dbid2sch(dbids)
for a, b in dbid2sch.items():
print('DDDDDDDDDDDDDD ../ReactomePy/src/reactomepy/code/query/functions.py', a, b)
assert set(dbids) == set(dbid2sch), "TBD: Report dbIds NOT FOUND"
dbid2nodebasic = {dbid:Neo4jNodeBasic(dbid, sch) for dbid, sch in dbid2sch.items()}
# Run 2a: MATCH (s:DatabaseObject{dbId:ID})-[r]->(d) RETURN s, r, d.dbId AS d_Id
Expand All @@ -165,7 +172,8 @@ def ses_dbid2sch(self, dbids):
with self.gdbdr.session() as session:
for dbid in dbids:
query = pre + str(dbid) + post
for rec in session.run(query).records():
print('QQQQQQQQQQ ../ReactomePy/src/reactomepy/code/query/functions.py', query)
for rec in session.run(query):
dbid2sch[dbid] = rec['schemaClass']
return dbid2sch

Expand All @@ -175,10 +183,10 @@ def ses_dbid2nodebasic_srcdst(self, query, prt=sys.stdout, idxmod=100000):
dbid2nodebasic = {}
src_dbids = set()
tic = timeit.default_timer()
print('HELLO')
## print('HELLO')
with self.gdbdr.session() as session:
for idx, rec in enumerate(session.run(query).records()):
print('REC:', rec)
for idx, rec in enumerate(session.run(query)):
## print('REC:', rec)
src_dbid = rec['src_dbId']
dst_dbid = rec['dst_dbId']
src_dbids.add(src_dbid)
Expand All @@ -200,7 +208,7 @@ def ses_dbid2nodebasic_src(self, query):
src_dbids = set()
tic = timeit.default_timer()
with self.gdbdr.session() as session:
for idx, rec in enumerate(session.run(query).records()):
for idx, rec in enumerate(session.run(query)):
src_dbid = rec['src_dbId']
src_dbids.add(src_dbid)
if src_dbid not in dbid2nodebasic:
Expand All @@ -226,7 +234,7 @@ def _addval_src_norel(pat, dbid2node_missing, session):
tic = timeit.default_timer()
for dbid, nodebasic in dbid2node_missing.items():
qry = pat.format(DBID=str(dbid))
for rec in session.run(qry).records():
for rec in session.run(qry):
dbid2dct[dbid] = nodebasic.objsch.get_dict(rec['src'])
print(' HMS: {HMS} {N:6,} dbIds: {Q}'.format(
HMS=get_hms(tic), N=len(dbid2node_missing,), Q=qry))
Expand All @@ -240,7 +248,7 @@ def _addval_src_rel_dst(self, pat, dbid2nodebasic, session):
tic = timeit.default_timer()
for dbid, nodebasic in dbid2nodebasic.items():
qry = pat.replace('ID', str(dbid))
for rec in session.run(qry).records():
for rec in session.run(qry):
#### nodebasic.dct = nodebasic.objsch.get_dict(rec['s'])
dbid2dct[dbid] = nodebasic.objsch.get_dict(rec['s'])
rel = rec['r'].type
Expand Down Expand Up @@ -290,7 +298,7 @@ def get_version(gdbdr):
version = None
query = 'MATCH (v:DBInfo) RETURN v'
with gdbdr.session() as session:
for rec in session.run(query).records():
for rec in session.run(query):
dbinfo = rec['v']
assert dbinfo.get('name') == 'reactome'
version = dbinfo.get('version')
Expand Down Expand Up @@ -333,9 +341,9 @@ def get_dbids(gdbdr, nodestr='Complex{stId:"R-HSA-167199"}'):
dbids = set()
query = 'MATCH (n:NODESTR) RETURN n'.format(NODESTR=nodestr)
with gdbdr.session() as session:
for rec in session.run(query).records():
for rec in session.run(query):
dbids.add(rec['n']['dbId'])
return dbids


# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
20 changes: 10 additions & 10 deletions src/reactomepy/code/query/get_nodes.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import print_function

__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"

import sys
Expand All @@ -29,7 +29,7 @@ def get_dbids(self, qry, prt=sys.stdout):
# Example: 'MATCH (s:Figure) RETURN s.dbId AS dbId'
dbids = set()
with self.gdbdr.session() as session:
for idx, rec in enumerate(session.run(qry).records()):
for idx, rec in enumerate(session.run(qry)):
dbids.add(rec['dbId'])
if prt and idx%10000 == 0:
prt.write('{HMS} {IDX} {DBID}'.format(
Expand All @@ -41,7 +41,7 @@ def get_dbid2val(self, qry, prt=sys.stdout):
dbid2val = {}
# Example: 'MATCH (f:Figure) RETURN f.dbId AS dbId, f.url AS val'
with self.gdbdr.session() as session:
for rec in session.run(qry).records():
for rec in session.run(qry):
dbid2val[rec['dbId']] = rec['val']
if prt:
prt.write(' {HMS} {N:,} dbIds: {Q}\n'.format(
Expand All @@ -53,7 +53,7 @@ def get_dbid2set(self, qry, prt=sys.stdout):
dbid2set = cx.defaultdict(set)
# Example: MATCH (s:InstanceEdit)-[r]->(f:Figure) RETURN s.dbId AS dbId, f.dbId AS val
with self.gdbdr.session() as session:
for rec in session.run(qry).records():
for rec in session.run(qry):
dbid2set[rec['dbId']].add(rec['val'])
if prt:
prt.write(' {HMS} {N:,} dbIds: {Q}\n'.format(
Expand All @@ -67,7 +67,7 @@ def get_dbid2ntset(self, qry, prt=sys.stdout):
# RETURN f.dbId AS key_dbId, type(r) AS rtyp, e.dbId AS val_dbId
ntobj = cx.namedtuple('NtIdRel', 'dbId rel')
with self.gdbdr.session() as session:
for rec in session.run(qry).records():
for rec in session.run(qry):
dbid2ntset[rec['key_dbId']].add(ntobj(dbId=rec['val_dbId'], rel=rec['rtyp']))
if prt:
prt.write(' {HMS} {N:,} rel-dbIds: {Q}\n'.format(
Expand All @@ -81,7 +81,7 @@ def get_dbid2ntnodes(self, qry, prt=sys.stdout):
ntobjkey = cx.namedtuple('NtIdRel', 'dbId rel')
ntobjnode = cx.namedtuple('NtSRD', 'src rel dst')
with self.gdbdr.session() as session:
for rec in session.run(qry).records():
for rec in session.run(qry):
src = rec['s']
rel = rec['rtyp']
dst = rec['d']
Expand All @@ -98,7 +98,7 @@ def get_nodes_query(self, query, msg=None):
nodes = []
tic = timeit.default_timer()
with self.gdbdr.session() as session:
for rec in session.run(query).records():
for rec in session.run(query):
nodes.append(Neo4jNode(rec['s']))
print(' {HMS} {N:,} {MSG}'.format(
HMS=get_hms(tic), N=len(nodes), MSG=msg if msg else query))
Expand All @@ -110,7 +110,7 @@ def get_nodes_sch(self, srchstr, msg=None):
tic = timeit.default_timer()
qry = 'MATCH (s:{SRCHSTR}) RETURN s'.format(SRCHSTR=srchstr)
with self.gdbdr.session() as session:
for rec in session.run(qry).records():
for rec in session.run(qry):
nodes.append(Neo4jNode(rec['s']))
print(' {HMS} {N:,} {MSG}'.format(
HMS=get_hms(tic), N=len(nodes), MSG=msg if msg else srchstr))
Expand All @@ -124,7 +124,7 @@ def get_dbid2node(self, dbids, msg='nodes found'):
with self.gdbdr.session() as session:
for dbid in dbids:
query = qupat.format(DBID=dbid)
for rec in session.run(query).records():
for rec in session.run(query):
dbid2node[dbid] = Neo4jNode(rec['s'])
print('FASTISH {HMS} {N:,} {MSG}'.format(HMS=get_hms(tic), N=len(dbid2node), MSG=msg))
return dbid2node
Expand All @@ -140,4 +140,4 @@ def _shorten_queryprt(qry):
return qry


# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
8 changes: 5 additions & 3 deletions src/reactomepy/code/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,12 +14,14 @@ def chk_unique(dcts, fld2expunique):
"""Ex: Expect that taxIds and displayNames are unique while abbreviation is not."""
num_dcts = len(dcts)
fld2sets = cx.OrderedDict([(f, set()) for f in fld2expunique])
for dct in dcts:
for fld in fld2expunique:
for fld in fld2expunique:
# print('FLD', fld)
for dct in dcts:
fld2sets[fld].add(dct[fld])
for fld, items in fld2sets.items():
actually_unique = len(items) == num_dcts
assert actually_unique == fld2expunique[fld]
assert actually_unique == fld2expunique[fld], "{}:{}!={} {}".format(
fld, len(items), num_dcts, fld2expunique[fld])

def get_gdbdr():
"""Return GraphDatabase driver given user args."""
Expand Down
14 changes: 7 additions & 7 deletions src/reactomepy/code/wrpy/disease.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import print_function

__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"

import os
Expand All @@ -11,7 +11,7 @@
# from reactomepy.code.wrpy.utils import prt_namedtuple
from reactomepy.code.wrpy.utils import prt_dict
from reactomepy.code.wrpy.utils import prt_copyright_comment
from reactomepy.code.utils import chk_unique
from reactomepy.code.neo4jnode import Neo4jNode


class Diseases(object):
Expand All @@ -23,7 +23,6 @@ class Diseases(object):
def __init__(self, gdbdr):
self.gdr = gdbdr # GraphDatabase.driver
self.diseases = self._init_disease()
chk_unique(self.diseases, {'displayName':True})
self.num_dis = len(self.diseases)

def wrpy_disease2fld(self, fout_py, field, varname):
Expand Down Expand Up @@ -53,17 +52,18 @@ def _init_disease(self):
"""Query for all diseases."""
disease = []
# fields_exp = set(['name', 'schemaClass', 'abbreviation', 'displayName', 'taxId', 'dbId'])
passed = True
with self.gdr.session() as session:
res = session.run(self.dis_qry)
for rec in res.records():
for rec in session.run(self.dis_qry):
node = rec['node']
# assert node.keys() == fields_exp
assert node.get('schemaClass') == 'Disease'
assert node.get('databaseName') == 'DOID'
key2val = {f:node.get(f) for f in self.dis_keep}
disease.append(key2val)
disease = sorted(disease, key=lambda d: d['displayName'])
if not passed:
raise RuntimeError('**FATAL: NOT UNIQUE: displayName')
return disease


# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
4 changes: 2 additions & 2 deletions src/reactomepy/code/wrpy/inferredfrom.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,7 @@ def _init_dcts(self, fields_keep):
ctr_hi = cx.Counter()
ctr_lo = cx.Counter()
with self.gdbdr.session() as session:
res = session.run(self.QUERY)
for rec in res.records():
for rec in session.run(self.QUERY):
ctr_hi[rec['hi']['speciesName']] += 1
ctr_lo[rec['lo']['speciesName']] += 1
# MATCH (hi:Pathway)<-[inferredTo]-(lo:Pathway) RETURN hi, lo
Expand All @@ -83,6 +82,7 @@ def _init_dcts(self, fields_keep):
# print(rec['hi'])
pidlo = rec['lo']['stId']
abc = pidlo.split('-')[1].lower()
print('EEEEEEEEEEEEEEEEE', abc)
org2plo2phi[abc][pidlo].add(rec['hi']['stId'])
print('HI', ctr_hi.most_common())
print('LO', ctr_lo.most_common())
Expand Down
6 changes: 3 additions & 3 deletions src/reactomepy/code/wrpy/pathway_molecules.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import print_function

__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"

import os
Expand Down Expand Up @@ -59,7 +59,7 @@ def get_pw2molecules(self, database='UniProt'):
with self.gdbdr.session() as session:
query = self.get_query(database)
# print("QUERY: {Q}".format(Q=query))
for rec in session.run(query).records():
for rec in session.run(query):
# print(rec)
pwid2molecules[rec['pwid']].add(rec['mol_id'])
return pwid2molecules
Expand All @@ -86,4 +86,4 @@ def wrpy_pw2molecules(self, fout_py, database='UniProt'):
print(" {HMS} {MB} Mbytes {MSG} WROTE: {PY}".format(HMS=hms, MB=filesize, MSG=msg, PY=fout_py))


# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
8 changes: 4 additions & 4 deletions src/reactomepy/code/wrpy/pathway_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import print_function

__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"

import sys
Expand Down Expand Up @@ -113,7 +113,7 @@ def get_pw2dcts(self, prt=sys.stdout):
# reltypes = cx.Counter()
missing = cx.Counter()
with self.gdr.session() as session:
for rec in session.run(qry).records():
for rec in session.run(qry):
rel = rec['r']
typ = rel.type
# reltypes[rel.type] += 1
Expand Down Expand Up @@ -364,7 +364,7 @@ def _get_relationship_typecnt(session, pw_stid):
ctr = cx.Counter()
qry = 'MATCH (Pathway{{stId:"{ID}"}})-[r]-() RETURN r'.format(ID=pw_stid)
res = session.run(qry)
for rec in res.records():
for rec in res.data():
ctr[rec['r'].type] += 1
return ctr

Expand All @@ -382,4 +382,4 @@ def prt_cnts(ctr, prt=sys.stdout):
prt.write(" {CNT:6} {FLD}\n".format(CNT=cnt, FLD=fld))


# Copyright (C) 2018-2019, DV Klopfenstein. All rights reservedsEvent
# Copyright (C) 2018-present, DV Klopfenstein. All rights reservedsEvent
6 changes: 3 additions & 3 deletions src/reactomepy/code/wrpy/referencedatabase.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

from __future__ import print_function

__copyright__ = "Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved."
__copyright__ = "Copyright (C) 2018-present, DV Klopfenstein. All rights reserved."
__author__ = "DV Klopfenstein"

import os
Expand Down Expand Up @@ -45,7 +45,7 @@ def _init_data(self):
id2nt = {}
ntobj = cx.namedtuple('NtObj', 'displayName accessUrl url')
with self.gdr.session() as session:
for rec in session.run(self.qry).records():
for rec in session.run(self.qry):
node = rec['node']
assert set(node.keys()) == set(ReferenceDatabase.params_req), \
'EXP({})\nACT({})'.format(
Expand All @@ -61,4 +61,4 @@ def _init_data(self):
return id2nt


# Copyright (C) 2018-2019, DV Klopfenstein. All rights reserved.
# Copyright (C) 2018-present, DV Klopfenstein. All rights reserved.
Loading

0 comments on commit 8d289e1

Please # to comment.