Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 24 additions & 9 deletions mokelumne/util/file_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,34 @@ def sha256_for_file(path: Path) -> str:
return sha256.hexdigest()


def _should_include_manifest_entry(relative_path: Path, item: Path) -> bool:
"""Return True when a path should be added to the manifest."""
if any(part.startswith(".") for part in relative_path.parts):
return False

if item.name.casefold() in {"thumbs.db"}:

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought we wanted to make it configurable. Otherwise this seems like a decent enough implementation.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I didn't see that in the ticket. Other than "." files and "Thumbs.db" I don't recall seeing other files we'd want to routinely exclude. Making it configurable wouldn't be hard but I won't have time to put that in along with test changes today. I'll be back Monday.

Could always be added later if needed though. It's only called by build_manifest though so probably not a big deal if other changes are merged first

Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's break out the configurability into its own ticket; we can merge this PR and i'll pick that up next.

return False

return True

def build_file_manifest(source_path: Path) -> Manifest:
files = []

for item in source_path.rglob("*"):
if item.is_file():
relative_path = item.relative_to(source_path)
files.append(
{
"path": str(relative_path),
"size": item.stat().st_size,
"sha256": sha256_for_file(item),
}
)
if not item.is_file():
continue

relative_path = item.relative_to(source_path)
if not _should_include_manifest_entry(relative_path, item):
continue

files.append(
{
"path": str(relative_path),
"size": item.stat().st_size,
"sha256": sha256_for_file(item),
}
)

return {
"source_root": str(source_path),
Expand Down
26 changes: 26 additions & 0 deletions test/unit/test_file_transfer.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,32 @@ def test_build_manifest_includes_nested_files(self, tmp_path: Path):
}
assert len(result) == 2

def test_build_manifest_excludes_hidden_and_excluded_files(self, tmp_path: Path):
"""Ensure that build_manifest skips hidden and system junk files."""
visible_file = tmp_path / "visible.tif"
visible_file.write_text("hello", encoding="utf-8")

hidden_file = tmp_path / ".hidden.txt"
hidden_file.write_text("secret", encoding="utf-8")

hidden_dir = tmp_path / ".hidden"
hidden_dir.mkdir()
hidden_dir_file = hidden_dir / "nested.txt"
hidden_dir_file.write_text("nested", encoding="utf-8")

ds_store = tmp_path / ".DS_Store"
ds_store.write_text("folder metadata", encoding="utf-8")

thumbs_db = tmp_path / "Thumbs.db"
thumbs_db.write_text("thumbnail images db", encoding="utf-8")

result = file_transfer.build_file_manifest(tmp_path)

paths = {entry["path"] for entry in result["files"]}

assert paths == {"visible.tif"}
assert len(result["files"]) == 1

def test_verify_manifest_success(self, tmp_path: Path):
"""Ensure verify_manifest succeeds for valid files."""

Expand Down
Loading