Skip to content

Commit 528945d

Browse files
committed
include fix_outdated script
authored by Matthias Bernt
1 parent 163f60d commit 528945d

File tree

3 files changed

+201
-2
lines changed

3 files changed

+201
-2
lines changed

Diff for: Makefile

+1-2
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,9 @@ lint: ## Lint all yaml files for a given INSTANCE
1313
find ./$(INSTANCE) -name '*.yml' | grep '^\./[^/]*/' | xargs -n 1 -P $(NPROC) python3 scripts/identify_unpinned.py
1414

1515
fix: ## For a given INSTANCE fix all lockfiles and add the latest revision to every repo that has no revision
16-
@# Generates the lockfile or updates it if it is missing tools
1716
find ./$(INSTANCE) -name '*.yml' | grep '^\./[^/]*/' | xargs -n 1 -P $(NPROC) python3 scripts/fix_lockfile.py
18-
@# --without says to add the latest revision to every entry missing one (i.e. update all)
1917
find ./$(INSTANCE) -name '*.yml' | grep '^\./[^/]*/' | xargs -n 1 -P $(NPROC) python3 scripts/update_tool.py --without --log debug
18+
find ./$(INSTANCE) -name '*.yml.lock' | grep '^\./[^/]*/' | grep -v '_test\.yml\.lock$' | xargs -n 1 -P $(NPROC) python3 scripts/fix_outdated.py
2019

2120
update-owner: ## Run the update script for a subset of repos defined by the OWNER var
2221
find ./$(INSTANCE) -name '*.yml' | grep '^\./[^/]*/' | xargs -n 1 -P $(NPROC) python scripts/update_tool.py --owner $(OWNER)

Diff for: requirements.txt

+4
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@ bioblend
22
ephemeris
33
pykwalify
44
PyYAML
5+
6+
# for fix_uninstallable.py
7+
mercurial
8+
galaxy-tool-util

Diff for: scripts/fix_outdated.py

+196
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,196 @@
1+
# Check all revisions in the lockfile if they are installable.
2+
# Remove if not and add the next installable revision.
3+
#
4+
# Only updates the lock file and does not install
5+
# or uninstall any tools from a Galaxy instance.
6+
#
7+
# Backgroud: for each tool version there can be only one revision installed
8+
# (multiple revisions with the same version happen e.g. if the version
9+
# is not bumped but some files are updated)
10+
#
11+
# Revisions that became not-installable are treated as a safe update
12+
# because the author claims the tool did not change its behavior from
13+
# the reproducibility perspective.
14+
#
15+
# The script queries the TS to get_ordered_installable_revisions
16+
# and clones (to /tmp/) the mercurial repos to get all revisions
17+
# (the later is only done for tools with revisions that are not
18+
# installable)
19+
#
20+
# For each revision cur that has been replaced by nxt
21+
# - check that the tool versions of the revisons are really the same
22+
# - if cur and nxt are in the lock file cur is removed
23+
# - if a Galaxy URL is given it is checked that cur is not installed
24+
# - if only cur in in the list then cur is removed and nxt is added
25+
26+
import argparse
27+
import logging
28+
import subprocess
29+
import os.path
30+
import yaml
31+
from typing import (
32+
Dict,
33+
List,
34+
Optional,
35+
Set,
36+
Tuple,
37+
)
38+
39+
import bioblend
40+
from bioblend import galaxy, toolshed
41+
from galaxy.tool_util.loader_directory import load_tool_sources_from_path
42+
43+
44+
logger = logging.getLogger()
45+
46+
47+
def clone(toolshed_url: str, name: str, owner: str, repo_path: str) -> None:
48+
if not os.path.exists(repo_path):
49+
# logger.info(f"Cloning {toolshed_url} {owner} {name} {repo_path}")
50+
cmd = [
51+
"hg",
52+
"clone",
53+
f"{toolshed_url}/repos/{owner}/{name}",
54+
repo_path,
55+
]
56+
proc = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
57+
else:
58+
cmd = ["hg", "pull", "-u"]
59+
proc = subprocess.run(cmd, cwd = repo_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
60+
assert proc.returncode == 0, f"failed {' '.join(cmd)} in {repo_path}"
61+
62+
def get_all_revisions(toolshed_url: str, name: str, owner: str) -> List[str]:
63+
repo_path = f"/tmp/repos/{os.path.basename(toolshed_url)}-{owner}-{name}"
64+
clone(toolshed_url, name, owner, repo_path)
65+
cmd = ["hg", "update", "tip"]
66+
proc = subprocess.run(cmd, cwd=repo_path, capture_output=True, text=True)
67+
assert proc.returncode == 0, f"failed {' '.join(cmd)} in {repo_path}"
68+
cmd = ["hg", "log", "--template", "{node|short}\n"]
69+
assert proc.returncode == 0, f"failed {' '.join(cmd)} in {repo_path}"
70+
result = subprocess.run(cmd, cwd=repo_path, capture_output=True, text=True)
71+
return list(reversed(result.stdout.splitlines()))
72+
73+
74+
def get_all_versions(
75+
toolshed_url: str, name: str, owner: str, revisions: List[str]
76+
) -> Dict[str, Set[Tuple[str, str]]]:
77+
repo_path = f"/tmp/repos/{os.path.basename(toolshed_url)}-{owner}-{name}"
78+
clone(toolshed_url, name, owner, repo_path)
79+
80+
versions: Dict[str, Set[Tuple[str, str]]] = {}
81+
for r in revisions:
82+
cmd = ["hg", "update", r]
83+
subprocess.run(cmd, cwd=repo_path, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
84+
85+
versions[r] = set()
86+
for _, tool in load_tool_sources_from_path(repo_path):
87+
versions[r].add((tool.parse_id(), tool.parse_version()))
88+
assert len(versions[r]) > 0
89+
90+
return versions
91+
92+
93+
def fix_outdated(lockfile_name: str, toolshed_url: str, galaxy_url: Optional[str] = None) -> None:
94+
ts = toolshed.ToolShedInstance(url=toolshed_url)
95+
installed_tools = {}
96+
if galaxy_url:
97+
gi = galaxy.GalaxyInstance(url=galaxy_url, key=None)
98+
for t in gi.toolshed.get_repositories():
99+
if (t['name'], t['owner']) not in installed_tools:
100+
installed_tools[(t['name'], t['owner'])] = set()
101+
# TODO? could also check for 'status': 'Installed'
102+
if t['deleted'] or t['uninstalled']:
103+
continue
104+
installed_tools[(t['name'], t['owner'])].add(t['changeset_revision'])
105+
106+
with open(lockfile_name) as f:
107+
lockfile = yaml.safe_load(f)
108+
locked_tools = lockfile["tools"]
109+
110+
for i, locked_tool in enumerate(locked_tools):
111+
name = locked_tool["name"]
112+
owner = locked_tool["owner"]
113+
logger.info(f"Checking {toolshed_url} {owner} {name} ")
114+
# get ordered_installable_revisions from oldest to newest
115+
try:
116+
ordered_installable_revisions = (
117+
ts.repositories.get_ordered_installable_revisions(name, owner)
118+
)
119+
except bioblend.ConnectionError:
120+
logger.warning(f"Could not determine intstallable revisions for {name} {owner}")
121+
continue
122+
123+
if len(set(locked_tool["revisions"]) - set(ordered_installable_revisions)):
124+
all_revisions = get_all_revisions(toolshed_url, name, owner)
125+
try:
126+
all_versions = get_all_versions(toolshed_url, name, owner, all_revisions)
127+
except:
128+
logger.warning(f"Could not determine versions for {name} {owner}")
129+
continue
130+
131+
to_remove = []
132+
to_append = []
133+
for cur in locked_tool["revisions"]:
134+
if cur in ordered_installable_revisions:
135+
continue
136+
assert cur in all_revisions, f"{cur} is not a valid revision of {name} {owner}"
137+
start = all_revisions.index(cur)
138+
nxt = None
139+
for i in range(start, len(all_revisions)):
140+
if all_revisions[i] in ordered_installable_revisions:
141+
nxt = all_revisions[i]
142+
break
143+
144+
if not nxt:
145+
logger.warning(f"Could not determine the next revision for {cur} {name} {owner}")
146+
continue
147+
148+
if all_versions[cur] != all_versions[nxt]:
149+
logger.warning(f"{name},{owner} {cur} {nxt} have unequal versions")
150+
continue
151+
152+
if nxt not in locked_tool["revisions"]:
153+
logger.info(f"Adding {nxt} which was absent so far {name} {owner}")
154+
to_append.append(nxt)
155+
elif galaxy_url:
156+
assert (name, owner) in installed_tools
157+
if cur in installed_tools[(name, owner)]:
158+
logger.warning(f"{name},{owner} {cur} still installed on {galaxy_url}")
159+
continue
160+
logger.info(f"remove {cur} in favor of {nxt} {name} {owner}")
161+
to_remove.append(cur)
162+
163+
for r in to_remove:
164+
locked_tool["revisions"].remove(r)
165+
locked_tool["revisions"].extend(to_append)
166+
167+
with open(lockfile_name, "w") as handle:
168+
yaml.dump(lockfile, handle, default_flow_style=False)
169+
170+
171+
if __name__ == "__main__":
172+
parser = argparse.ArgumentParser()
173+
parser.add_argument(
174+
"lockfile", type=argparse.FileType("r"), help="Tool.yaml.lock file"
175+
)
176+
parser.add_argument(
177+
"--toolshed",
178+
default="https://toolshed.g2.bx.psu.edu",
179+
help="Toolshed to test against",
180+
)
181+
parser.add_argument('--galaxy_url', default=None, required=False, help="Galaxy instance to check")
182+
args = parser.parse_args()
183+
184+
185+
logger.setLevel(logging.DEBUG)
186+
logging.getLogger('urllib3').setLevel(logging.WARNING)
187+
logging.getLogger('bioblend').setLevel(logging.WARNING)
188+
logging.getLogger('PIL.Image').setLevel(logging.WARNING)
189+
# otherwise tool loading errors (of there are other xml files that can't be parsed?) are reported
190+
logging.getLogger('galaxy.tool_util.loader_directory').disabled = True
191+
handler = logging.StreamHandler()
192+
logger.addHandler(handler)
193+
formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
194+
handler.setFormatter(formatter)
195+
196+
fix_outdated(args.lockfile.name, args.toolshed, args.galaxy_url)

0 commit comments

Comments
 (0)