Skip to content

Commit f5e4c06

Browse files
Merge pull request #301 from adamtheturtle/block-equal-sha
Download files during upload to check for equality
2 parents 56077f4 + cbfcbaf commit f5e4c06

File tree

5 files changed

+43
-108
lines changed

5 files changed

+43
-108
lines changed

.github/workflows/publish-notion.yml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,6 @@ jobs:
3434
--parent-type "page" \
3535
--file "./build-sample/index.json" \
3636
--title "Sphinx-Notionbuilder Sample" \
37-
--sha-mapping "./sample/notion-sha-mapping.json" \
3837
--icon "🐍"
3938
env:
4039
NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }}

README.rst

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,6 @@ Arguments:
299299
- ``--parent-id``: The ID of the parent page or database in Notion (must be shared with your integration)
300300
- ``--parent-type``: "page" or "database"
301301
- ``--title``: Title for the new page in Notion
302-
- ``--sha-mapping``: Optional JSON file mapping file SHAs to Notion block IDs for efficient file re-uploads (use one file per document). The command will update this file.
303302

304303
The command will create a new page if one with the given title doesn't exist, or update the existing page if one with the given title already exists.
305304

pyproject.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ dependencies = [
3535
"beartype>=0.21.0",
3636
"click>=8.0.0",
3737
"docutils>=0.21",
38-
"notion-client>=2.5.0",
38+
"requests>=2.32.5",
3939
"sphinx>=8.2.3",
4040
"sphinx-immaterial>=0.13.7",
4141
"sphinx-simplepdf>=1.6.0",
@@ -80,6 +80,7 @@ optional-dependencies.dev = [
8080
"sphinxcontrib-spelling==8.0.1",
8181
"sphinxcontrib-text-styles==0.2.0",
8282
"types-docutils==0.22.2.20250924",
83+
"types-requests==2.32.4.20250913",
8384
"vulture==2.14",
8485
"yamlfix==1.18.0",
8586
]

sample/notion-sha-mapping.json

Lines changed: 0 additions & 6 deletions
This file was deleted.

src/_notion_scripts/upload.py

Lines changed: 41 additions & 99 deletions
Original file line numberDiff line numberDiff line change
@@ -12,12 +12,11 @@
1212
from typing import TYPE_CHECKING, Any
1313
from urllib.parse import urlparse
1414
from urllib.request import url2pathname
15-
from uuid import UUID
1615

1716
import click
17+
import requests
1818
from beartype import beartype
19-
from notion_client.errors import APIResponseError
20-
from ultimate_notion import Emoji, Session
19+
from ultimate_notion import Emoji, NotionFile, Session
2120
from ultimate_notion.blocks import PDF as UnoPDF # noqa: N811
2221
from ultimate_notion.blocks import Audio as UnoAudio
2322
from ultimate_notion.blocks import Block
@@ -47,39 +46,25 @@ def _calculate_file_sha(*, file_path: Path) -> str:
4746

4847

4948
@beartype
50-
def _clean_deleted_blocks_from_mapping(
51-
*,
52-
sha_to_block_id: dict[str, str],
53-
session: Session,
54-
) -> dict[str, str]:
55-
"""Remove deleted blocks from ``SHA`` mapping.
56-
57-
Returns a new dictionary with only existing blocks.
49+
@cache
50+
def _calculate_file_sha_from_url(*, file_url: str) -> str:
5851
"""
59-
cleaned_mapping = sha_to_block_id.copy()
60-
deleted_block_shas: set[str] = set()
61-
62-
for sha, block_id_str in sha_to_block_id.items():
63-
block_id = UUID(hex=block_id_str)
64-
try:
65-
session.api.blocks.retrieve(block=block_id)
66-
except APIResponseError:
67-
deleted_block_shas.add(sha)
68-
msg = f"Block {block_id} does not exist, removing from SHA mapping"
69-
click.echo(message=msg)
70-
71-
for deleted_block_sha in deleted_block_shas:
72-
del cleaned_mapping[deleted_block_sha]
73-
74-
return cleaned_mapping
52+
Calculate SHA-256 hash of a file from a URL.
53+
"""
54+
sha256_hash = hashlib.sha256()
55+
with requests.get(url=file_url, stream=True, timeout=10) as response:
56+
response.raise_for_status()
57+
for chunk in response.iter_content(chunk_size=4096):
58+
if chunk:
59+
sha256_hash.update(chunk)
60+
return sha256_hash.hexdigest()
7561

7662

7763
@beartype
7864
def _find_last_matching_block_index(
7965
*,
8066
existing_blocks: list[Block] | tuple[Block, ...],
8167
local_blocks: list[Block],
82-
sha_to_block_id: dict[str, str],
8368
) -> int | None:
8469
"""Find the last index where existing blocks match local blocks.
8570
@@ -88,11 +73,16 @@ def _find_last_matching_block_index(
8873
"""
8974
last_matching_index: int | None = None
9075
for index, existing_page_block in enumerate(iterable=existing_blocks):
76+
click.echo(
77+
message=(
78+
f"Checking block {index + 1} of {len(existing_blocks)} for "
79+
"equivalence"
80+
),
81+
)
9182
if index < len(local_blocks) and (
9283
_is_existing_equivalent(
9384
existing_page_block=existing_page_block,
9485
local_block=local_blocks[index],
95-
sha_to_block_id=sha_to_block_id,
9686
)
9787
):
9888
last_matching_index = index
@@ -106,31 +96,41 @@ def _is_existing_equivalent(
10696
*,
10797
existing_page_block: Block,
10898
local_block: Block,
109-
sha_to_block_id: dict[str, str],
11099
) -> bool:
111100
"""
112101
Check if a local block is equivalent to an existing page block.
113102
"""
114-
if existing_page_block == local_block:
115-
return True
103+
if type(existing_page_block) is not type(local_block):
104+
return False
116105

117106
if isinstance(local_block, _FILE_BLOCK_TYPES):
118107
parsed = urlparse(url=local_block.url)
119108
if parsed.scheme == "file":
120-
file_path = Path(url2pathname(parsed.path)) # type: ignore[misc]
121-
file_sha = _calculate_file_sha(file_path=file_path)
122-
existing_page_block_id_with_file_sha = sha_to_block_id.get(
123-
file_sha
124-
)
125-
if not existing_page_block_id_with_file_sha:
109+
assert isinstance(existing_page_block, _FILE_BLOCK_TYPES)
110+
if not isinstance(existing_page_block.file_info, NotionFile):
126111
return False
112+
127113
if (
128-
UUID(hex=existing_page_block_id_with_file_sha)
129-
== existing_page_block.id
114+
existing_page_block.file_info.name
115+
!= local_block.file_info.name
130116
):
117+
return False
118+
119+
if (
120+
existing_page_block.file_info.caption
121+
!= local_block.file_info.caption
122+
):
123+
return False
124+
125+
local_file_path = Path(url2pathname(parsed.path)) # type: ignore[misc]
126+
local_file_sha = _calculate_file_sha(file_path=local_file_path)
127+
existing_file_sha = _calculate_file_sha_from_url(
128+
file_url=existing_page_block.file_info.url,
129+
)
130+
if local_file_sha != existing_file_sha:
131131
return True
132132

133-
return False
133+
return existing_page_block == local_block
134134

135135

136136
@beartype
@@ -215,20 +215,6 @@ class _ParentType(Enum):
215215
help="Icon of the page",
216216
required=False,
217217
)
218-
@click.option(
219-
"--sha-mapping",
220-
help=(
221-
"JSON file mapping file SHAs to Notion block IDs "
222-
"(use one file per document)"
223-
),
224-
required=False,
225-
type=click.Path(
226-
exists=True,
227-
path_type=Path,
228-
file_okay=True,
229-
dir_okay=False,
230-
),
231-
)
232218
@beartype
233219
def main(
234220
*,
@@ -237,23 +223,12 @@ def main(
237223
parent_type: _ParentType,
238224
title: str,
239225
icon: str | None = None,
240-
sha_mapping: Path | None = None,
241226
) -> None:
242227
"""
243228
Upload documentation to Notion.
244229
"""
245230
session = Session()
246231

247-
sha_mapping_content = (
248-
sha_mapping.read_text(encoding="utf-8") if sha_mapping else "{}"
249-
)
250-
sha_to_block_id: dict[str, str] = dict(json.loads(s=sha_mapping_content))
251-
252-
sha_to_block_id = _clean_deleted_blocks_from_mapping(
253-
sha_to_block_id=sha_to_block_id,
254-
session=session,
255-
)
256-
257232
blocks = json.loads(s=file.read_text(encoding="utf-8"))
258233

259234
parent: Page | Database
@@ -291,7 +266,6 @@ def main(
291266
last_matching_index = _find_last_matching_block_index(
292267
existing_blocks=page.children,
293268
local_blocks=block_objs,
294-
sha_to_block_id=sha_to_block_id,
295269
)
296270

297271
click.echo(
@@ -310,36 +284,4 @@ def main(
310284
]
311285
page.append(blocks=block_objs_to_upload)
312286

313-
if sha_mapping:
314-
for uploaded_block_index, uploaded_block in enumerate(
315-
iterable=block_objs_to_upload
316-
):
317-
if isinstance(uploaded_block, _FILE_BLOCK_TYPES):
318-
pre_uploaded_block = block_objs[
319-
delete_start_index + uploaded_block_index
320-
]
321-
assert isinstance(pre_uploaded_block, _FILE_BLOCK_TYPES)
322-
parsed = urlparse(url=pre_uploaded_block.url)
323-
if parsed.scheme == "file":
324-
# Ignore ``mypy`` error as the keyword arguments are
325-
# different across Python versions and platforms.
326-
file_path = Path(url2pathname(parsed.path)) # type: ignore[misc]
327-
file_sha = _calculate_file_sha(file_path=file_path)
328-
sha_to_block_id[file_sha] = str(object=uploaded_block.id)
329-
330-
sha_mapping.write_text(
331-
data=json.dumps(
332-
obj=sha_to_block_id, indent=2, sort_keys=True
333-
)
334-
+ "\n",
335-
encoding="utf-8",
336-
)
337-
338-
click.echo(
339-
message=(
340-
f"Updated SHA mapping for {file_path.name}:"
341-
f"{uploaded_block.id}"
342-
)
343-
)
344-
345287
click.echo(message=f"Updated existing page: '{title}' ({page.url})")

0 commit comments

Comments
 (0)