Skip to content

Commit 7f706ca

Browse files
Merge pull request #281 from adamtheturtle/no-reupload-files
Make it possible to not change a page if not needed, even if there are files on the page
2 parents 6b07f34 + bcc0165 commit 7f706ca

File tree

7 files changed

+198
-37
lines changed

7 files changed

+198
-37
lines changed

.github/workflows/publish-notion.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ jobs:
3434
--parent-type "page" \
3535
--file "./build-sample/index.json" \
3636
--title "Sphinx-Notionbuilder Sample" \
37+
--sha-mapping "./sample/notion-sha-mapping.json" \
3738
--icon "🐍"
3839
env:
3940
NOTION_TOKEN: ${{ secrets.NOTION_TOKEN }}

README.rst

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -214,14 +214,15 @@ Usage
214214

215215
.. code-block:: console
216216
217-
$ notion-upload -f path/to/output.json -p parent_page_id -t "Page Title"
217+
$ notion-upload --file path/to/output.json --parent-id parent_page_id --parent-type page --title "Page Title" --sha-mapping notion-sha-mapping.json
218218
219219
Arguments:
220220

221221
- ``--file``: Path to the JSON file generated by the Notion builder
222222
- ``--parent-id``: The ID of the parent page or database in Notion (must be shared with your integration)
223223
- ``--parent-type``: "page" or "database"
224224
- ``--title``: Title for the new page in Notion
225+
- ``--sha-mapping``: Optional JSON file mapping file SHAs to Notion block IDs for efficient file re-uploads (use one file per document). The command will update this file.
225226

226227
The command will create a new page if one with the given title doesn't exist, or update the existing page if one with the given title already exists.
227228

pyproject.toml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ dependencies = [
3535
"beartype>=0.21.0",
3636
"click>=8.0.0",
3737
"docutils>=0.21",
38+
"notion-client>=2.5.0",
3839
"sphinx>=8.2.3",
3940
"sphinx-immaterial>=0.13.7",
4041
"sphinx-simplepdf>=1.6.0",
@@ -89,6 +90,7 @@ optional-dependencies.sample = [
8990
urls.Source = "https://github.com/adamtheturtle/sphinx-notionbuilder"
9091

9192
scripts.notion-upload = "_notion_scripts.upload:main"
93+
scripts.upload-files = "_notion_scripts.upload_files:main"
9294

9395
[tool.setuptools]
9496
zip-safe = false

sample/index.rst

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -289,8 +289,7 @@ The builder supports task lists with checkboxes:
289289
* [x] Task B2
290290
* [] Task B3
291291

292-
A rogue paragraph with a reference to
293-
the `parent task_list <task_list_example>`.
292+
A rogue paragraph.
294293

295294
- A list item without a checkbox.
296295
- [ ] Another bullet point.
@@ -1562,8 +1561,7 @@ The builder supports task lists with checkboxes:
15621561
* [x] Task B2
15631562
* [] Task B3
15641563

1565-
A rogue paragraph with a reference to
1566-
the `parent task_list <task_list_example>`.
1564+
A rogue paragraph.
15671565

15681566
- A list item without a checkbox.
15691567
- [ ] Another bullet point.

sample/notion-sha-mapping.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"10be79a09f2b92602ce383a0455f9fe6268f6792decba9383ddce53fded08137": "2819ce7b-60a4-8160-8351-de0105b99d14",
3+
"822e7aef5ac028d8cf5565420b59a1f3286a0af6fa17d6f0c617151a1f386507": "2819ce7b-60a4-815a-98ed-ddd4664d67ba",
4+
"cd765602a4632f5abb325a66ce59aef506e84fdb6e4fe5590c634f4db567ef2c": "2819ce7b-60a4-816d-b91a-d389313c703c",
5+
"eeeca6a7b26d982c4de616bc3bf36b9f40fb090938081cb828709920613dae72": "2819ce7b-60a4-8188-8372-cde9d4d513b3"
6+
}

spelling_private_dict.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
CLI
12
backtick
23
backticks
34
callout

src/_notion_scripts/upload.py

Lines changed: 184 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -3,53 +3,133 @@
33
Inspired by https://github.com/ftnext/sphinx-notion/blob/main/upload.py.
44
"""
55

6+
import hashlib
67
import json
78
from enum import Enum
9+
from functools import cache
810
from pathlib import Path
911
from typing import TYPE_CHECKING, Any
1012
from urllib.parse import urlparse
1113
from urllib.request import url2pathname
14+
from uuid import UUID
1215

1316
import click
1417
from beartype import beartype
18+
from notion_client.errors import APIResponseError
1519
from ultimate_notion import Emoji, Session
1620
from ultimate_notion.blocks import PDF as UnoPDF # noqa: N811
1721
from ultimate_notion.blocks import Audio as UnoAudio
1822
from ultimate_notion.blocks import Block
1923
from ultimate_notion.blocks import Image as UnoImage
2024
from ultimate_notion.blocks import Video as UnoVideo
21-
from ultimate_notion.file import UploadedFile
2225
from ultimate_notion.obj_api.blocks import Block as UnoObjAPIBlock
2326

2427
if TYPE_CHECKING:
2528
from ultimate_notion.database import Database
2629
from ultimate_notion.page import Page
2730

31+
_FILE_BLOCK_TYPES = (UnoImage, UnoVideo, UnoAudio, UnoPDF)
32+
_FileBlock = UnoImage | UnoVideo | UnoAudio | UnoPDF
33+
34+
35+
@beartype
36+
@cache
37+
def _calculate_file_sha(*, file_path: Path) -> str:
38+
"""
39+
Calculate SHA-256 hash of a file.
40+
"""
41+
sha256_hash = hashlib.sha256()
42+
with file_path.open(mode="rb") as f:
43+
for chunk in iter(lambda: f.read(4096), b""):
44+
sha256_hash.update(chunk)
45+
return sha256_hash.hexdigest()
46+
2847

2948
@beartype
30-
def _upload_local_file(
49+
def _clean_deleted_blocks_from_mapping(
3150
*,
32-
url: str,
51+
sha_to_block_id: dict[str, str],
3352
session: Session,
34-
) -> UploadedFile | None:
53+
) -> dict[str, str]:
54+
"""Remove deleted blocks from SHA mapping.
55+
56+
Returns a new dictionary with only existing blocks.
57+
"""
58+
cleaned_mapping = sha_to_block_id.copy()
59+
deleted_block_shas: set[str] = set()
60+
61+
for sha, block_id_str in sha_to_block_id.items():
62+
block_id = UUID(hex=block_id_str)
63+
try:
64+
session.api.blocks.retrieve(block=block_id)
65+
except APIResponseError:
66+
deleted_block_shas.add(sha)
67+
msg = f"Block {block_id} does not exist, removing from SHA mapping"
68+
click.echo(message=msg)
69+
70+
for deleted_block_sha in deleted_block_shas:
71+
del cleaned_mapping[deleted_block_sha]
72+
73+
return cleaned_mapping
74+
75+
76+
@beartype
77+
def _find_last_matching_block_index(
78+
*,
79+
existing_blocks: list[Block] | tuple[Block, ...],
80+
local_blocks: list[Block],
81+
sha_to_block_id: dict[str, str],
82+
) -> int | None:
83+
"""Find the last index where existing blocks match local blocks.
84+
85+
Returns the last index where blocks are equivalent, or None if no
86+
blocks match.
3587
"""
36-
Upload a local file and return the uploaded file object.
88+
last_matching_index: int | None = None
89+
for index, existing_page_block in enumerate(iterable=existing_blocks):
90+
if index < len(local_blocks) and (
91+
_is_existing_equivalent(
92+
existing_page_block=existing_page_block,
93+
local_block=local_blocks[index],
94+
sha_to_block_id=sha_to_block_id,
95+
)
96+
):
97+
last_matching_index = index
98+
else:
99+
break
100+
return last_matching_index
101+
102+
103+
@beartype
104+
def _is_existing_equivalent(
105+
*,
106+
existing_page_block: Block,
107+
local_block: Block,
108+
sha_to_block_id: dict[str, str],
109+
) -> bool:
37110
"""
38-
parsed = urlparse(url=url)
39-
if parsed.scheme != "file":
40-
return None
111+
Check if a local block is equivalent to an existing page block.
112+
"""
113+
if existing_page_block == local_block:
114+
return True
41115

42-
# Ignore ``mypy`` error as the keyword arguments are different across
43-
# Python versions and platforms.
44-
file_path = Path(url2pathname(parsed.path)) # type: ignore[misc]
45-
with file_path.open(mode="rb") as f:
46-
uploaded_file = session.upload(
47-
file=f,
48-
file_name=file_path.name,
49-
)
116+
if isinstance(local_block, _FILE_BLOCK_TYPES):
117+
parsed = urlparse(url=local_block.url)
118+
if parsed.scheme == "file":
119+
file_path = Path(url2pathname(parsed.path)) # type: ignore[misc]
120+
file_sha = _calculate_file_sha(file_path=file_path)
121+
existing_page_block_id_with_file_sha = sha_to_block_id.get(
122+
file_sha
123+
)
124+
if not existing_page_block_id_with_file_sha:
125+
return False
126+
if (
127+
UUID(hex=existing_page_block_id_with_file_sha)
128+
== existing_page_block.id
129+
):
130+
return True
50131

51-
uploaded_file.wait_until_uploaded()
52-
return uploaded_file
132+
return False
53133

54134

55135
@beartype
@@ -58,15 +138,26 @@ def _block_from_details(
58138
details: dict[str, Any],
59139
session: Session,
60140
) -> Block:
61-
"""Create a Block from a serialized block details.
62-
63-
Upload any required local files.
141+
"""
142+
Create a Block from a serialized block details.
64143
"""
65144
block = Block.wrap_obj_ref(UnoObjAPIBlock.model_validate(obj=details))
66145

67-
if isinstance(block, (UnoImage, UnoVideo, UnoAudio, UnoPDF)):
68-
uploaded_file = _upload_local_file(url=block.url, session=session)
69-
if uploaded_file is not None:
146+
if isinstance(block, _FILE_BLOCK_TYPES):
147+
parsed = urlparse(url=block.url)
148+
if parsed.scheme == "file":
149+
# Ignore ``mypy`` error as the keyword arguments are different
150+
# across Python versions and platforms.
151+
file_path = Path(url2pathname(parsed.path)) # type: ignore[misc]
152+
153+
with file_path.open(mode="rb") as file_stream:
154+
uploaded_file = session.upload(
155+
file=file_stream,
156+
file_name=file_path.name,
157+
)
158+
159+
uploaded_file.wait_until_uploaded()
160+
70161
return block.__class__(file=uploaded_file, caption=block.caption)
71162

72163
return block
@@ -115,6 +206,20 @@ class _ParentType(Enum):
115206
help="Icon of the page",
116207
required=False,
117208
)
209+
@click.option(
210+
"--sha-mapping",
211+
help=(
212+
"JSON file mapping file SHAs to Notion block IDs "
213+
"(use one file per document)",
214+
),
215+
required=False,
216+
type=click.Path(
217+
exists=True,
218+
path_type=Path,
219+
file_okay=True,
220+
dir_okay=False,
221+
),
222+
)
118223
@beartype
119224
def main(
120225
*,
@@ -123,12 +228,23 @@ def main(
123228
parent_type: _ParentType,
124229
title: str,
125230
icon: str | None = None,
231+
sha_mapping: Path | None = None,
126232
) -> None:
127233
"""
128234
Upload documentation to Notion.
129235
"""
130236
session = Session()
131237

238+
sha_mapping_content = (
239+
sha_mapping.read_text(encoding="utf-8") if sha_mapping else "{}"
240+
)
241+
sha_to_block_id: dict[str, str] = dict(json.loads(s=sha_mapping_content))
242+
243+
sha_to_block_id = _clean_deleted_blocks_from_mapping(
244+
sha_to_block_id=sha_to_block_id,
245+
session=session,
246+
)
247+
132248
blocks = json.loads(s=file.read_text(encoding="utf-8"))
133249

134250
parent: Page | Database
@@ -159,16 +275,15 @@ def main(
159275
page.icon = Emoji(emoji=icon)
160276

161277
block_objs = [
162-
_block_from_details(details=details, session=session)
278+
Block.wrap_obj_ref(UnoObjAPIBlock.model_validate(obj=details))
163279
for details in blocks
164280
]
165281

166-
last_matching_index: int | None = None
167-
for index, existing_page_block in enumerate(iterable=page.children):
168-
if index < len(blocks) and existing_page_block == block_objs[index]:
169-
last_matching_index = index
170-
else:
171-
break
282+
last_matching_index = _find_last_matching_block_index(
283+
existing_blocks=page.children,
284+
local_blocks=block_objs,
285+
sha_to_block_id=sha_to_block_id,
286+
)
172287

173288
click.echo(
174289
message=(
@@ -180,5 +295,42 @@ def main(
180295
for existing_page_block in page.children[delete_start_index:]:
181296
existing_page_block.delete()
182297

183-
page.append(blocks=block_objs[delete_start_index:])
298+
block_objs_to_upload = [
299+
_block_from_details(details=details, session=session)
300+
for details in blocks[delete_start_index:]
301+
]
302+
page.append(blocks=block_objs_to_upload)
303+
304+
if sha_mapping:
305+
for uploaded_block_index, uploaded_block in enumerate(
306+
iterable=block_objs_to_upload
307+
):
308+
if isinstance(uploaded_block, _FILE_BLOCK_TYPES):
309+
pre_uploaded_block = block_objs[
310+
delete_start_index + uploaded_block_index
311+
]
312+
assert isinstance(pre_uploaded_block, _FILE_BLOCK_TYPES)
313+
parsed = urlparse(url=pre_uploaded_block.url)
314+
if parsed.scheme == "file":
315+
# Ignore ``mypy`` error as the keyword arguments are
316+
# different across Python versions and platforms.
317+
file_path = Path(url2pathname(parsed.path)) # type: ignore[misc]
318+
file_sha = _calculate_file_sha(file_path=file_path)
319+
sha_to_block_id[file_sha] = str(object=uploaded_block.id)
320+
321+
sha_mapping.write_text(
322+
data=json.dumps(
323+
obj=sha_to_block_id, indent=2, sort_keys=True
324+
)
325+
+ "\n",
326+
encoding="utf-8",
327+
)
328+
329+
click.echo(
330+
message=(
331+
f"Updated SHA mapping for {file_path.name}:"
332+
f"{uploaded_block.id}"
333+
)
334+
)
335+
184336
click.echo(message=f"Updated existing page: '{title}' ({page.url})")

0 commit comments

Comments
 (0)