diff --git a/mokelumne/util/file_transfer.py b/mokelumne/util/file_transfer.py index d8b4f5b..e26604c 100644 --- a/mokelumne/util/file_transfer.py +++ b/mokelumne/util/file_transfer.py @@ -20,19 +20,34 @@ def sha256_for_file(path: Path) -> str: return sha256.hexdigest() +def _should_include_manifest_entry(relative_path: Path, item: Path) -> bool: + """Return True when a path should be added to the manifest.""" + if any(part.startswith(".") for part in relative_path.parts): + return False + + if item.name.casefold() in {"thumbs.db"}: + return False + + return True + def build_file_manifest(source_path: Path) -> Manifest: files = [] for item in source_path.rglob("*"): - if item.is_file(): - relative_path = item.relative_to(source_path) - files.append( - { - "path": str(relative_path), - "size": item.stat().st_size, - "sha256": sha256_for_file(item), - } - ) + if not item.is_file(): + continue + + relative_path = item.relative_to(source_path) + if not _should_include_manifest_entry(relative_path, item): + continue + + files.append( + { + "path": str(relative_path), + "size": item.stat().st_size, + "sha256": sha256_for_file(item), + } + ) return { "source_root": str(source_path), diff --git a/test/unit/test_file_transfer.py b/test/unit/test_file_transfer.py index dd6b40b..53be53c 100644 --- a/test/unit/test_file_transfer.py +++ b/test/unit/test_file_transfer.py @@ -69,6 +69,32 @@ def test_build_manifest_includes_nested_files(self, tmp_path: Path): } assert len(result) == 2 + def test_build_manifest_excludes_hidden_and_excluded_files(self, tmp_path: Path): + """Ensure that build_manifest skips hidden and system junk files.""" + visible_file = tmp_path / "visible.tif" + visible_file.write_text("hello", encoding="utf-8") + + hidden_file = tmp_path / ".hidden.txt" + hidden_file.write_text("secret", encoding="utf-8") + + hidden_dir = tmp_path / ".hidden" + hidden_dir.mkdir() + hidden_dir_file = hidden_dir / "nested.txt" + hidden_dir_file.write_text("nested", encoding="utf-8") + + ds_store = tmp_path / ".DS_Store" + ds_store.write_text("folder metadata", encoding="utf-8") + + thumbs_db = tmp_path / "Thumbs.db" + thumbs_db.write_text("thumbnail images db", encoding="utf-8") + + result = file_transfer.build_file_manifest(tmp_path) + + paths = {entry["path"] for entry in result["files"]} + + assert paths == {"visible.tif"} + assert len(result["files"]) == 1 + def test_verify_manifest_success(self, tmp_path: Path): """Ensure verify_manifest succeeds for valid files."""