Skip to content

Commit

Permalink
Added to post-merge to export.sh script a command that generates gene…
Browse files Browse the repository at this point in the history
…ric-physical-entity-map.json (required for PC app-ui web app).

Changed premerge, Converter to use datasource's id as the xmlbase instead of pc2 base.
Started DrugbankCleaner - todo...
  • Loading branch information
IgorRodchenkov committed Apr 5, 2024
1 parent 594726e commit 506c9a9
Show file tree
Hide file tree
Showing 5 changed files with 47 additions and 6 deletions.
3 changes: 2 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ To see available commands and options, run:
bash cpath2.sh

In order to create a new cpath2 instance, define or update the metadata.json,
prepare input data archives (see below how), and run
prepare input data archives (see below how), also install `jq`, `gunzip`,
and run:

bash cpath2.sh --build 2>&1 >build.log &

Expand Down
27 changes: 27 additions & 0 deletions src/main/java/cpath/cleaner/DrugbankCleaner.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package cpath.cleaner;

import cpath.service.api.Cleaner;

import java.io.InputStream;
import java.io.OutputStream;

public class DrugbankCleaner implements Cleaner {

/*
* drugbank biopax data uses the following values in xref.db properties,
* which biopax validator reports as "Unknown";
* so we need to replace with the corresponding standard prefix/name from bioregistry.io (or identifiers.org):
GenBank Gene Database -> "genbank"
GenBank Protein Database -> "genbank" (numeric IDs, not like it's in ncbiprotein)
Therapeutic Targets Database ->
Guide to Pharmacology ->
HUGO Gene Nomenclature Committee (HGNC) -> "hgnc.symbol" (or "HGNC Symbol")
IUPHAR ->
Drugs Product Database (DPD) ->
*/

@Override
public void clean(InputStream data, OutputStream cleanedData) {
//TODO: implement
}
}
12 changes: 8 additions & 4 deletions src/main/java/cpath/service/ConsoleApplication.java
Original file line number Diff line number Diff line change
Expand Up @@ -351,9 +351,9 @@ private void postmerge() throws IOException {
// generate the "Detailed" pathway data file:
createDetailedBiopax(mainModel, index);

// generate the export.sh script (to run Paxtools commands for exporting the BioPAX files to other formats)
// Generate export.sh script (to convert the data/model to other formats)
LOG.info("writing 'export.sh' script to convert the BioPAX models to SIF, GSEA, SBGN...");
final String commonPrefix = service.settings().exportArchivePrefix(); //e.g., PathwayCommons13
final String commonPrefix = service.settings().exportArchivePrefix();
writer = new PrintWriter(new OutputStreamWriter(Files.newOutputStream(
Paths.get(service.settings().exportScriptFile())), StandardCharsets.UTF_8));
writer.println("#!/bin/sh");
Expand All @@ -364,7 +364,7 @@ private void postmerge() throws IOException {
writer.println("# sh export.sh &");

//write commands to the script file for 'All' and 'Detailed' BioPAX input files:
// writeScriptCommands(service.settings().biopaxFileName("Detailed"), writer, true);
//writeScriptCommands(service.settings().biopaxFileName("Detailed"), writer, true);
writeScriptCommands(service.settings().biopaxFileName("All"), writer, true);

//rename SIF files that were cut from corresponding extended SIF (.txt) ones
Expand All @@ -379,7 +379,11 @@ private void postmerge() throws IOException {
writer.println(String.format("%s %s '%s' '%s' %s 2>&1 &", javaRunPaxtools, "summarize",
service.settings().biopaxFileName("All"), "physical_entities.json", "--uri-ids"));
writer.println("wait");
writer.println("gzip pathways.txt *.json");
writer.println("""
gunzip -c physical_entities.json.gz | jq -cS 'map(select(.generic)) | reduce .[] as $o ({}; . + {($o.uri): {name: $o.name, label:$o.label, synonyms:$o."hgnc.symbol"}})' > generic-physical-entity-map.json
""");
writer.println("wait");
writer.println("gzip pathways.txt physical_entities.json");
writer.println("echo \"All done.\"");
writer.close();
LOG.info("postmerge: done.");
Expand Down
3 changes: 2 additions & 1 deletion src/main/java/cpath/service/PreMerger.java
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ void premerge() {
+ "; skipping for this data source...");
return; // skip due to the error
}
converter.setXmlBase(xmlBase);
// converter.setXmlBase(xmlBase);
converter.setXmlBase(datasource.getIdentifier()+":"); //todo: test if it works/looks better than with instance's xmlBase...
} else {
log.info("premerge(), Converter class is not defined for " + mid);
}
Expand Down
8 changes: 8 additions & 0 deletions work/make_generic_pe_map.bash
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
# After the PC data build is done,
# generates generic-physical-entity-map.json (for PC app-ui webapp).
#
# Prerequisites:
# - gunzip
# - jq (https://stedolan.github.io/jq/; or install with npm: npm install hjson -g)
gunzip -c "file:downloads/physical_entities.json.gz" | jq -cS 'map(select(.generic)) | reduce .[] as $o ({}; . + {($o.uri): {name: $o.name, label:$o.label, synonyms:$o."hgnc.symbol"}})' > downloads/generic-physical-entity-map.json
echo "Generated downloads/generic-physical-entity-map.json"

0 comments on commit 506c9a9

Please # to comment.