Skip to content

Commit

Permalink
Fix fillers pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
matentzn committed Jun 23, 2024
1 parent c6cb03a commit fe24d2e
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 17 deletions.
24 changes: 13 additions & 11 deletions src/scripts/lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -1058,7 +1058,7 @@ def add_upheno_id(df, pattern, upheno_map, blacklisted_upheno_ids, startid, maxi
generate_id(i=i, id_gen=id_gen, upheno_prefix=upheno_prefix) for i in df["defined_class"]
]

upheno_map = pd.concat([upheno_map, df[["id", "defined_class"]]], ignore_index=True)
upheno_map = pd.concat([upheno_map, df[["defined_class", "id"]]], ignore_index=True)
df = df.drop(["pattern", "id"], axis=1)
df = df.drop_duplicates()
return df, upheno_map
Expand Down Expand Up @@ -1139,12 +1139,14 @@ def extract_upheno_fillers_for_all_ontologies(oids, ontology_for_matching_dir,
)


def add_upheno_ids_to_fillers_and_filter_out_bfo(pattern_dir,
upheno_map,
blacklisted_upheno_ids,
upheno_fillers_dir,
upheno_config,
upheno_prefix):
def add_upheno_ids_to_fillers_and_filter_out_bfo(
pattern_dir,
upheno_map,
blacklisted_upheno_ids,
upheno_fillers_dir,
upheno_config,
upheno_prefix
):
minid = upheno_config.get_min_upheno_id()
maxid = upheno_config.get_max_upheno_id()

Expand All @@ -1161,7 +1163,7 @@ def add_upheno_ids_to_fillers_and_filter_out_bfo(pattern_dir,

# Update the highest id from the last runs
startid = get_highest_id(upheno_map["defined_class"], upheno_prefix)

if startid < minid:
startid = minid
df, upheno_map = add_upheno_id(
Expand All @@ -1170,16 +1172,16 @@ def add_upheno_ids_to_fillers_and_filter_out_bfo(pattern_dir,
upheno_map=upheno_map,
blacklisted_upheno_ids=blacklisted_upheno_ids,
startid=startid,
maxid=maxid, upheno_prefix=upheno_prefix
maxid=maxid,
upheno_prefix=upheno_prefix
)

# filter out "independent continuant" locations
if 'location' in df.columns:
df = df[~df["location"].str.startswith("http://purl.obolibrary.org/obo/BFO_")]
# noinspection PyTypeChecker
df.to_csv(tsv, sep="\t", index=False)

return upheno_map


def replace_owl_thing_in_tsvs(pattern_dir, upheno_config, upheno_fillers_dir):
for pattern in os.listdir(pattern_dir):
Expand Down
17 changes: 11 additions & 6 deletions src/scripts/upheno_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,12 +49,17 @@ def add_upheno_ids_to_fillers(patterns_directory, fillers_directory, tmp_directo
with open(blacklisted_upheno_ids_path) as f:
blacklisted_upheno_ids = f.read().splitlines()

add_upheno_ids_to_fillers_and_filter_out_bfo(pattern_dir=patterns_directory,
upheno_map=upheno_map,
blacklisted_upheno_ids=blacklisted_upheno_ids,
upheno_config=config,
upheno_fillers_dir=fillers_directory,
upheno_prefix=upheno_prefix)
add_upheno_ids_to_fillers_and_filter_out_bfo(
pattern_dir=patterns_directory,
upheno_map=upheno_map,
blacklisted_upheno_ids=blacklisted_upheno_ids,
upheno_config=config,
upheno_fillers_dir=fillers_directory,
upheno_prefix=upheno_prefix)

upheno_map = upheno_map.drop_duplicates()
upheno_map.sort_values("defined_class", inplace=True)
upheno_map.to_csv(config.get_upheno_id_map(), sep="\t", index=False)


# Subcommand: create_sssom
Expand Down

0 comments on commit fe24d2e

Please # to comment.