Skip to content

Commit 36d1c68

Browse files
committed
done with two file save tests and code
1 parent 8fa8ac0 commit 36d1c68

File tree

2 files changed

+94
-15
lines changed

2 files changed

+94
-15
lines changed

util/tests/unittest_workspace.py

+84-9
Original file line numberDiff line numberDiff line change
@@ -301,7 +301,7 @@ def test_save_file_same_dependency_twice(self) -> None:
301301
self.init_workspace_helper()
302302
assert self.workspace is not None and self.expected_structure is not None
303303
prev_run_name = self.workspace.dbgym_this_run_path.name
304-
result_path = self.make_result_helper()
304+
result_path = self.make_result_helper(file_obj=("file",))
305305
self.init_workspace_helper()
306306
self.workspace.save_file(result_path)
307307
self.workspace.save_file(result_path)
@@ -315,10 +315,64 @@ def test_save_file_same_dependency_twice(self) -> None:
315315
verify_structure(self.scratchspace_path, self.expected_structure)
316316
)
317317

318-
def test_save_file_two_different_dependencies_with_same_name(self) -> None:
319-
# TODO
320-
# TODO: also do the config version
321-
pass
318+
def test_save_file_two_different_dependencies_with_same_filename_both_directly_inside_run(
319+
self,
320+
) -> None:
321+
self.init_workspace_helper()
322+
assert self.workspace is not None and self.expected_structure is not None
323+
prev_run_names = []
324+
prev_run_names.append(self.workspace.dbgym_this_run_path.name)
325+
result1_path = self.make_result_helper(file_obj=("file",))
326+
self.init_workspace_helper()
327+
prev_run_names.append(self.workspace.dbgym_this_run_path.name)
328+
result2_path = self.make_result_helper(file_obj=("file",))
329+
filename = result1_path.name
330+
assert filename == result2_path.name
331+
332+
self.init_workspace_helper()
333+
self.workspace.save_file(result1_path)
334+
self.workspace.save_file(result2_path)
335+
# The second save_file() should have overwritten the first one.
336+
self.expected_structure["dbgym_workspace"]["task_runs"][
337+
self.workspace.dbgym_this_run_path.name
338+
][f"{filename}.link"] = (
339+
"symlink",
340+
f"dbgym_workspace/task_runs/{prev_run_names[-1]}/{filename}",
341+
)
342+
self.assertTrue(
343+
verify_structure(self.scratchspace_path, self.expected_structure)
344+
)
345+
346+
def test_save_file_two_different_dependencies_with_same_filename_but_different_outermost_dirs(
347+
self,
348+
) -> None:
349+
self.init_workspace_helper()
350+
assert self.workspace is not None and self.expected_structure is not None
351+
prev_run_name = self.workspace.dbgym_this_run_path.name
352+
result1_path = self.make_result_helper("dir1/result.txt", file_obj=("file",))
353+
result2_path = self.make_result_helper("result.txt", file_obj=("file",))
354+
filename = result1_path.name
355+
assert filename == result2_path.name
356+
357+
self.init_workspace_helper()
358+
self.workspace.save_file(result1_path)
359+
self.workspace.save_file(result2_path)
360+
# The second save_file() should not overwrite the first one because the outermost dirs are different.
361+
self.expected_structure["dbgym_workspace"]["task_runs"][
362+
self.workspace.dbgym_this_run_path.name
363+
][f"{filename}.link"] = (
364+
"symlink",
365+
f"dbgym_workspace/task_runs/{prev_run_name}/{filename}",
366+
)
367+
self.expected_structure["dbgym_workspace"]["task_runs"][
368+
self.workspace.dbgym_this_run_path.name
369+
]["dir1.link"] = (
370+
"symlink",
371+
f"dbgym_workspace/task_runs/{prev_run_name}/dir1",
372+
)
373+
self.assertTrue(
374+
verify_structure(self.scratchspace_path, self.expected_structure)
375+
)
322376

323377
def test_save_file_config(self) -> None:
324378
"""
@@ -340,12 +394,35 @@ def test_save_file_config(self) -> None:
340394
def test_save_file_same_config_twice(self) -> None:
341395
self.init_workspace_helper()
342396
assert self.workspace is not None and self.expected_structure is not None
343-
result_path = self.make_file_helper("external/result.txt")
397+
result_path = self.make_file_helper(
398+
"external/result.txt", file_obj=("file", "contents")
399+
)
344400
self.workspace.save_file(result_path)
345401
self.workspace.save_file(result_path)
346402
self.expected_structure["dbgym_workspace"]["task_runs"][
347403
self.workspace.dbgym_this_run_path.name
348-
][f"{result_path.name}"] = ("file",)
404+
][f"{result_path.name}"] = ("file", "contents")
405+
self.assertTrue(
406+
verify_structure(self.scratchspace_path, self.expected_structure)
407+
)
408+
409+
def test_save_file_two_different_configs_with_same_filename(self) -> None:
410+
self.init_workspace_helper()
411+
assert self.workspace is not None and self.expected_structure is not None
412+
result1_path = self.make_file_helper(
413+
"external/result.txt", file_obj=("file", "contents1")
414+
)
415+
result2_path = self.make_file_helper(
416+
"external/dir1/result.txt", file_obj=("file", "contents2")
417+
)
418+
filename = result1_path.name
419+
assert filename == result2_path.name
420+
421+
self.workspace.save_file(result1_path)
422+
self.workspace.save_file(result2_path)
423+
self.expected_structure["dbgym_workspace"]["task_runs"][
424+
self.workspace.dbgym_this_run_path.name
425+
][f"{filename}"] = ("file", "contents2")
349426
self.assertTrue(
350427
verify_structure(self.scratchspace_path, self.expected_structure)
351428
)
@@ -379,8 +456,6 @@ def test_save_file_generated_this_run_raises_error(self) -> None:
379456
):
380457
self.workspace.save_file(result_path)
381458

382-
# TODO: test saving different configs/dependencies with the same name
383-
384459

385460
if __name__ == "__main__":
386461
unittest.main()

util/workspace.py

+10-6
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,14 @@ def save_file(self, fpath: Path) -> None:
291291
We copy the file if it is a "config", meaning it just exists without having been generated
292292
We create a symlink if it is a "dependency", meaning a task.py command was run to generate it
293293
In these cases we create a symlink so we have full provenance for how the dependency was created
294+
295+
**Notable Behavior**
296+
- When you save a dependency, it actually creates a link to the outermost directory still inside run_*/.
297+
- The second save will overwrite the first.
298+
- If you save the same file twice in the same run, the second save will overwrite the first.
299+
- If you save two configs with the same name, the second save will overwrite the first.
300+
- If you save two dependencies with the same *outermost* directory, or two dependencies with the same filename
301+
both directly inside run_*/, the second save will overwrite the first.
294302
"""
295303
# validate fpath
296304
assert isinstance(fpath, Path)
@@ -312,6 +320,7 @@ def save_file(self, fpath: Path) -> None:
312320
if parent_dpath.samefile(run_dpath):
313321
fname = basename_of_path(fpath)
314322
symlink_fpath = self.dbgym_this_run_path / (fname + ".link")
323+
try_remove_file(symlink_fpath)
315324
try_create_symlink(fpath, symlink_fpath)
316325
# Otherwise, we know the fpath file is _not_ directly inside run_dpath dir.
317326
# We go as far back as we can while still staying in run_dpath and symlink that "base" dir.
@@ -326,6 +335,7 @@ def save_file(self, fpath: Path) -> None:
326335
# Create symlink
327336
open_base_dname = basename_of_path(base_dpath)
328337
symlink_dpath = self.dbgym_this_run_path / (open_base_dname + ".link")
338+
try_remove_file(symlink_dpath)
329339
try_create_symlink(base_dpath, symlink_dpath)
330340
# If the file wasn't generated by a run, we can't just symlink it because we don't know that it's immutable.
331341
else:
@@ -553,12 +563,6 @@ def open_and_save(
553563
If you are generating a "result" for the run, _do not_ use this. Just use the normal open().
554564
This shouldn't be too hard to remember because this function crashes if open_fpath doesn't exist,
555565
and when you write results you're usually opening open_fpaths which do not exist.
556-
557-
**Notable Behavior**
558-
- If you open the same "config" file twice in the same run, it'll only be saved the first time (even if the file has changed in between).
559-
- "Dependency" files should be immutable so there's no problem here.
560-
- If you open two "config" files of the same name but different paths, only the first open will be saved.
561-
- Opening two "dependency" files of the same name but different paths will lead to two different "base dirs" being symlinked.
562566
"""
563567
# validate open_fpath
564568
assert isinstance(open_fpath, Path)

0 commit comments

Comments
 (0)