From a397689ff1dcf35f3380ae38499d0e90fdd1a321 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 2 Jun 2025 17:00:45 +0200 Subject: [PATCH 01/63] Add basic oauth flow --- src/backend/core/api/viewsets.py | 44 +++++++++++++++++++++++++++++++- src/backend/core/urls.py | 5 ++++ 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 3230bd3ce7..3ed0fff6c2 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -5,7 +5,7 @@ import logging import uuid from collections import defaultdict -from urllib.parse import unquote, urlparse +from urllib.parse import unquote, urlparse, urlencode from django.conf import settings from django.contrib.postgres.aggregates import ArrayAgg @@ -19,6 +19,7 @@ from django.db.models.functions import Left, Length from django.http import Http404, StreamingHttpResponse from django.utils.functional import cached_property +from django.shortcuts import redirect from django.utils.text import capfirst, slugify from django.utils.translation import gettext_lazy as _ @@ -1817,3 +1818,44 @@ def _load_theme_customization(self): ) return theme_customization + +notion_client_id = "206d872b-594c-80de-94ff-003760c352e4" +notion_client_secret = "XXX" +notion_redirect_uri = "https://emersion.fr/notion-redirect" + +@drf.decorators.api_view() +def notion_import_redirect(request): + if "notion_token" in request.session: + return redirect("/api/v1.0/notion_import/run") + query = urlencode({ + "client_id": notion_client_id, + "response_type": "code", + "owner": "user", + "redirect_uri": notion_redirect_uri, + }) + return redirect("https://api.notion.com/v1/oauth/authorize?" + query) + +@drf.decorators.api_view() +def notion_import_callback(request): + code = request.GET.get("code") + resp = requests.post( + "https://api.notion.com/v1/oauth/token", + auth=requests.auth.HTTPBasicAuth(notion_client_id, notion_client_secret), + headers={"Accept": "application/json"}, + data={ + "grant_type": "authorization_code", + "code": code, + "redirect_uri": notion_redirect_uri, + }, + ) + resp.raise_for_status() + data = resp.json() + request.session["notion_token"] = data["access_token"] + return redirect("/api/v1.0/notion_import/run") + +#@drf.decorators.api_view(["POST"]) +@drf.decorators.api_view() +def notion_import_run(request): + if "notion_token" not in request.session: + raise drf.exceptions.PermissionDenied() + return drf.response.Response({"sava": "oui et toi ?"}) diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index 0544189547..4233cf3154 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -52,6 +52,11 @@ r"^templates/(?P[0-9a-z-]*)/", include(template_related_router.urls), ), + path("notion_import/", include([ + path("redirect", viewsets.notion_import_redirect), + path("callback", viewsets.notion_import_callback), + path("run", viewsets.notion_import_run), + ])) ] ), ), From 252b87aeda140b9084dca34a34685a1ff9547d21 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Mon, 2 Jun 2025 14:45:21 +0200 Subject: [PATCH 02/63] notion-schemas: add a framework for some schemas of the notion api. Warning : there's a gotcha in the framework, see READMEs in code Signed-off-by: Baptiste Prevot --- .../core/notion_schemas/notion_block.py | 104 ++++++++++++++++++ .../core/notion_schemas/notion_color.py | 21 ++++ .../core/notion_schemas/notion_page.py | 15 +++ .../core/notion_schemas/notion_rich_text.py | 65 +++++++++++ 4 files changed, 205 insertions(+) create mode 100644 src/backend/core/notion_schemas/notion_block.py create mode 100644 src/backend/core/notion_schemas/notion_color.py create mode 100644 src/backend/core/notion_schemas/notion_page.py create mode 100644 src/backend/core/notion_schemas/notion_rich_text.py diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py new file mode 100644 index 0000000000..0915757d58 --- /dev/null +++ b/src/backend/core/notion_schemas/notion_block.py @@ -0,0 +1,104 @@ +from datetime import datetime +from enum import StrEnum +from typing import Annotated, Any, Literal + +from pydantic import BaseModel, Discriminator, Field, model_validator + +from .notion_color import NotionColor +from .notion_rich_text import NotionRichText + +"""Usage: NotionBlock.model_validate(response.json())""" + + +class NotionBlock(BaseModel): + created_time: datetime + last_edited_time: datetime + archived: bool + specific: "NotionBlockSpecifics" + + @model_validator(mode="before") + @classmethod + def move_type_inward_and_rename(cls, data: Any) -> Any: + if not isinstance(data, dict): + return data + + assert "type" in data, "Type must be specified" + data_type = data.pop("type") + data["specific"] = data.pop(data_type) + data["specific"]["type"] = data_type + + return data + + +class NotionBlockType(StrEnum): + """https://developers.notion.com/reference/block""" + + BOOKMARK = "bookmark" + BREADCRUMB = "breadcrumb" + BULLETED_LIST_ITEM = "bulleted_list_item" + CALLOUT = "callout" + CHILD_DATABASE = "child_database" + CHILD_PAGE = "child_page" + COLUMN = "column" + COLUMN_LIST = "column_list" + DIVIDER = "divider" + EMBED = "embed" + EQUATION = "equation" + FILE = "file" + HEADING_1 = "heading_1" + HEADING_2 = "heading_2" + HEADING_3 = "heading_3" + IMAGE = "image" + LINK_PREVIEW = "link_preview" + LINK_TO_PAGE = "link_to_page" + NUMBERED_LIST_ITEM = "numbered_list_item" + PARAGRAPH = "paragraph" + PDF = "pdf" + QUOTE = "quote" + SYNCED_BLOCK = "synced_block" + TABLE = "table" + TABLE_OF_CONTENTS = "table_of_contents" + TABLE_ROW = "table_row" + TEMPLATE = "template" + TO_DO = "to_do" + TOGGLE = "toggle" + UNSUPPORTED = "unsupported" + VIDEO = "video" + + +class NotionBlockHeadingBase(BaseModel): + """https://developers.notion.com/reference/block#headings""" + + type: Literal[ + NotionBlockType.HEADING_1, NotionBlockType.HEADING_2, NotionBlockType.HEADING_3 + ] + rich_text: list[NotionRichText] + color: NotionColor + is_toggleable: bool = False + + +class NotionBlockHeading1(NotionBlockHeadingBase): + type: Literal[NotionBlockType.HEADING_1] = NotionBlockType.HEADING_1 + + +class NotionBlockHeading2(NotionBlockHeadingBase): + type: Literal[NotionBlockType.HEADING_2] = NotionBlockType.HEADING_2 + + +class NotionBlockHeading3(NotionBlockHeadingBase): + type: Literal[NotionBlockType.HEADING_3] = NotionBlockType.HEADING_3 + + +class NotionParagraph(BaseModel): + """https://developers.notion.com/reference/block#paragraph""" + + type: Literal[NotionBlockType.PARAGRAPH] = NotionBlockType.PARAGRAPH + rich_text: list[NotionRichText] + color: NotionColor + children: list["NotionBlock"] = Field(default_factory=list) + + +NotionBlockSpecifics = Annotated[ + NotionBlockHeading1 | NotionBlockHeading2 | NotionBlockHeading3, + Discriminator(discriminator="type"), +] diff --git a/src/backend/core/notion_schemas/notion_color.py b/src/backend/core/notion_schemas/notion_color.py new file mode 100644 index 0000000000..4e65a88096 --- /dev/null +++ b/src/backend/core/notion_schemas/notion_color.py @@ -0,0 +1,21 @@ +from enum import StrEnum + + +class NotionColor(StrEnum): + DEFAULT = "default" + BLUE = "blue" + BLUE_BACKGROUND = "blue_background" + BROWN = "brown" + BROWN_BACKGROUND = "brown_background" + GRAY = "gray" + GRAY_BACKGROUND = "gray_background" + GREEN = "green" + GREEN_BACKGROUND = "green_background" + ORANGE = "orange" + ORANGE_BACKGROUND = "orange_background" + YELLOW = "yellow" + YELLOW_BACKGROUND = "yellow_background" + PINK = "pink" + PINK_BACKGROUND = "pink_background" + PURPLE = "purple" + PURPLE_BACKGROUND = "purple_background" diff --git a/src/backend/core/notion_schemas/notion_page.py b/src/backend/core/notion_schemas/notion_page.py new file mode 100644 index 0000000000..741c689def --- /dev/null +++ b/src/backend/core/notion_schemas/notion_page.py @@ -0,0 +1,15 @@ +from datetime import datetime + +from pydantic import BaseModel + + +class NotionFile(BaseModel): ... + + +class NotionPage(BaseModel): + id: str + created_time: datetime + last_edited_time: datetime + archived: bool + icon: NotionFile + cover: NotionFile diff --git a/src/backend/core/notion_schemas/notion_rich_text.py b/src/backend/core/notion_schemas/notion_rich_text.py new file mode 100644 index 0000000000..c22e67a81d --- /dev/null +++ b/src/backend/core/notion_schemas/notion_rich_text.py @@ -0,0 +1,65 @@ +from enum import StrEnum +from typing import Annotated, Any, Literal + +from pydantic import BaseModel, Discriminator, model_validator + +from .notion_color import NotionColor + + +class NotionRichTextAnnotation(BaseModel): + """https://developers.notion.com/reference/rich-text#the-annotation-object""" + + bold: bool = False + italic: bool = False + strikethrough: bool = False + underline: bool = False + code: bool = False + color: NotionColor = NotionColor.DEFAULT + + +class NotionRichText(BaseModel): + """https://developers.notion.com/reference/rich-text, not a block""" + + annotations: NotionRichTextAnnotation + plain_text: str + href: str | None = None + specific: "NotionRichTextSpecifics" + + @model_validator(mode="before") + @classmethod + def move_type_inward_and_rename(cls, data: Any) -> Any: + if not isinstance(data, dict): + return data + + assert "type" in data, "Type must be specified" + data_type = data.pop("type") + data["specific"] = data.pop(data_type) + data["specific"]["type"] = data_type + + return data + + +class NotionRichTextType(StrEnum): + TEXT = "text" + MENTION = "mention" + EQUATION = "equation" + + +class NotionRichTextText(BaseModel): + type: Literal[NotionRichTextType.TEXT] = NotionRichTextType.TEXT + + +class NotionRichTextMention(BaseModel): + type: Literal[NotionRichTextType.MENTION] = NotionRichTextType.MENTION + # Mention + + +class NotionRichTextEquation(BaseModel): + type: Literal[NotionRichTextType.EQUATION] = NotionRichTextType.EQUATION + expression: str # LaTeX expression + + +NotionRichTextSpecifics = Annotated[ + NotionRichTextText | NotionRichTextMention | NotionRichTextEquation, + Discriminator(discriminator="type"), +] From e3523e7e70e9176aee2b0d7aef4a2a3665f87cc0 Mon Sep 17 00:00:00 2001 From: Clara Ni Date: Mon, 2 Jun 2025 18:06:21 +0200 Subject: [PATCH 03/63] Add import_notion service --- src/backend/core/api/viewsets.py | 4 +- src/backend/core/services/notion_import.py | 128 +++++++++++++++++++++ 2 files changed, 131 insertions(+), 1 deletion(-) create mode 100644 src/backend/core/services/notion_import.py diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 3ed0fff6c2..1c3524a265 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -35,6 +35,7 @@ from core import authentication, choices, enums, models from core.services.ai_services import AIService from core.services.collaboration_services import CollaborationService +from core.services.notion_import import import_notion from core.utils import extract_attachments, filter_descendants from . import permissions, serializers, utils @@ -1853,9 +1854,10 @@ def notion_import_callback(request): request.session["notion_token"] = data["access_token"] return redirect("/api/v1.0/notion_import/run") -#@drf.decorators.api_view(["POST"]) +# @drf.decorators.api_view(["POST"]) @drf.decorators.api_view() def notion_import_run(request): if "notion_token" not in request.session: raise drf.exceptions.PermissionDenied() + import_notion(request.session["notion_token"]) return drf.response.Response({"sava": "oui et toi ?"}) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py new file mode 100644 index 0000000000..89a905a899 --- /dev/null +++ b/src/backend/core/services/notion_import.py @@ -0,0 +1,128 @@ +from enum import Enum +import requests + + +class PageType(Enum): + PAGE = "page" + DATABASE = "database" + + +class Page: + def __init__(self, type, id, name): + self.type = type + self.id = id + self.name = name + + def __repr__(self): + return f"\n Page(type={self.type}, id='{self.id}', name='{self.name}')" + + +def search_notion(token: str, start_cursor: str): + response = requests.post( + "https://api.notion.com/v1/search", + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Notion-Version": "2022-06-28", + "start_cursor": start_cursor if start_cursor else None, + "value": "page", + }, + ) + + if response.status_code == 200: + print("✅ Requête réussie !") + return response.json() + else: + print(f"❌ Erreur lors de la requête : {response.status_code}") + print(response.text) + + +def fetch_root_pages(token: str): + pages = [] + cursor = None + has_more = True + + while has_more: + response = search_notion(token, start_cursor=cursor) + + for item in response["results"]: + if item.get("parent", {}).get("type") == "workspace": + obj_type = item["object"] + if obj_type == "page": + page_type = PageType.PAGE + rich_texts = next( + ( + prop["title"] + for prop in item["properties"].values() + if prop["type"] == "title" + ), + [], + ) + else: + page_type = PageType.DATABASE + rich_texts = item.title + + pages.append( + Page( + type=page_type, + id=item["id"], + name="".join( + rich_text["plain_text"] for rich_text in rich_texts + ), + ) + ) + + has_more = response.get("has_more", False) + cursor = response.get("next_cursor") + + return pages + + +def fetch_blocks(token: str, block_id: str): + response = requests.get( + f"https://api.notion.com/v1/blocks/{block_id}/children", + headers={ + "Authorization": f"Bearer {token}", + "Content-Type": "application/json", + "Notion-Version": "2022-06-28", + }, + ) + + if response.status_code == 200: + return response.json() + else: + print(f"❌ Erreur lors de la requête : {response.status_code}") + print(response.text) + + +def fetch_block_children(token: str, block_id: str): + blocks = [] + cursor = None + has_more = True + + while has_more: + response = fetch_blocks(token, block_id) + + blocks.extend(response["results"]) + + has_more = response.get("has_more", False) + cursor = response.get("next_cursor") + + children = [] + for block in blocks: + if block["has_children"]: + response = fetch_block_children(token, block["id"]) + children.extend(response) + + blocks.extend(children) + return blocks + + +def import_notion(token: str): + """Recursively imports all Notion pages and blocks accessible using the given token.""" + root_pages = fetch_root_pages(token) + for root_page in root_pages: + page_content = fetch_block_children(token, root_page.id) + print(f"Page {root_page.id}") + print(page_content) + print() From 01544f62639699e147777f1e899cc40820a2662f Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Mon, 2 Jun 2025 18:11:48 +0200 Subject: [PATCH 04/63] notion-schemas: add some more schemas --- .../core/notion_schemas/notion_block.py | 115 +++++++++++++++--- .../core/notion_schemas/notion_rich_text.py | 5 +- 2 files changed, 104 insertions(+), 16 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 0915757d58..64577d69e2 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -2,7 +2,7 @@ from enum import StrEnum from typing import Annotated, Any, Literal -from pydantic import BaseModel, Discriminator, Field, model_validator +from pydantic import BaseModel, Discriminator, Field, ValidationError, model_validator from .notion_color import NotionColor from .notion_rich_text import NotionRichText @@ -22,10 +22,12 @@ def move_type_inward_and_rename(cls, data: Any) -> Any: if not isinstance(data, dict): return data - assert "type" in data, "Type must be specified" + if "type" not in data: + raise ValidationError("Type must be specified") + data_type = data.pop("type") data["specific"] = data.pop(data_type) - data["specific"]["type"] = data_type + data["specific"]["block_type"] = data_type return data @@ -39,6 +41,7 @@ class NotionBlockType(StrEnum): CALLOUT = "callout" CHILD_DATABASE = "child_database" CHILD_PAGE = "child_page" + CODE = "code" COLUMN = "column" COLUMN_LIST = "column_list" DIVIDER = "divider" @@ -66,10 +69,10 @@ class NotionBlockType(StrEnum): VIDEO = "video" -class NotionBlockHeadingBase(BaseModel): +class NotionHeadingBase(BaseModel): """https://developers.notion.com/reference/block#headings""" - type: Literal[ + block_type: Literal[ NotionBlockType.HEADING_1, NotionBlockType.HEADING_2, NotionBlockType.HEADING_3 ] rich_text: list[NotionRichText] @@ -77,28 +80,112 @@ class NotionBlockHeadingBase(BaseModel): is_toggleable: bool = False -class NotionBlockHeading1(NotionBlockHeadingBase): - type: Literal[NotionBlockType.HEADING_1] = NotionBlockType.HEADING_1 +class NotionHeading1(NotionHeadingBase): + block_type: Literal[NotionBlockType.HEADING_1] = NotionBlockType.HEADING_1 -class NotionBlockHeading2(NotionBlockHeadingBase): - type: Literal[NotionBlockType.HEADING_2] = NotionBlockType.HEADING_2 +class NotionHeading2(NotionHeadingBase): + block_type: Literal[NotionBlockType.HEADING_2] = NotionBlockType.HEADING_2 -class NotionBlockHeading3(NotionBlockHeadingBase): - type: Literal[NotionBlockType.HEADING_3] = NotionBlockType.HEADING_3 +class NotionHeading3(NotionHeadingBase): + block_type: Literal[NotionBlockType.HEADING_3] = NotionBlockType.HEADING_3 class NotionParagraph(BaseModel): """https://developers.notion.com/reference/block#paragraph""" - type: Literal[NotionBlockType.PARAGRAPH] = NotionBlockType.PARAGRAPH + block_type: Literal[NotionBlockType.PARAGRAPH] = NotionBlockType.PARAGRAPH + rich_text: list[NotionRichText] + color: NotionColor + children: list["NotionBlock"] = Field(default_factory=list) + + +class NotionBulletedListItem(BaseModel): + """https://developers.notion.com/reference/block#bulleted-list-item""" + + block_type: Literal[NotionBlockType.BULLETED_LIST_ITEM] = ( + NotionBlockType.BULLETED_LIST_ITEM + ) + rich_text: list[NotionRichText] + color: NotionColor + children: list["NotionBlock"] = Field(default_factory=list) + + +class NotionNumberedListItem(BaseModel): + """https://developers.notion.com/reference/block#numbered-list-item""" + + block_type: Literal[NotionBlockType.NUMBERED_LIST_ITEM] = ( + NotionBlockType.NUMBERED_LIST_ITEM + ) rich_text: list[NotionRichText] color: NotionColor children: list["NotionBlock"] = Field(default_factory=list) +class NotionCode(BaseModel): + """https://developers.notion.com/reference/block#code""" + + block_type: Literal[NotionBlockType.CODE] = NotionBlockType.CODE + caption: list[NotionRichText] + rich_text: list[NotionRichText] + language: str # Actually an enum + + +class NotionDivider(BaseModel): + """https://developers.notion.com/reference/block#divider""" + + block_type: Literal[NotionBlockType.DIVIDER] = NotionBlockType.DIVIDER + + +class NotionEmbed(BaseModel): + """https://developers.notion.com/reference/block#embed""" + + block_type: Literal[NotionBlockType.EMBED] = NotionBlockType.EMBED + url: str + + +class NotionFileType(StrEnum): + FILE = "file" + EXTERNAL = "external" + FILE_UPLOAD = "file_upload" + + +class NotionFile(BaseModel): + # FIXME: this is actually another occurrence of type discriminating + """https://developers.notion.com/reference/block#file""" + + block_type: Literal[NotionBlockType.FILE] = NotionBlockType.FILE + caption: list[NotionRichText] + type: NotionFileType + ... + + +class NotionImage(BaseModel): + """https://developers.notion.com/reference/block#image""" + + block_type: Literal[NotionBlockType.IMAGE] = NotionBlockType.IMAGE + # FIXME: this actually contains a file reference which will be defined for the above, but with the "image" attribute + + +class NotionLinkPreview(BaseModel): + """https://developers.notion.com/reference/block#link-preview""" + + block_type: Literal[NotionBlockType.LINK_PREVIEW] = NotionBlockType.LINK_PREVIEW + url: str + + NotionBlockSpecifics = Annotated[ - NotionBlockHeading1 | NotionBlockHeading2 | NotionBlockHeading3, - Discriminator(discriminator="type"), + NotionHeading1 + | NotionHeading2 + | NotionHeading3 + | NotionParagraph + | NotionNumberedListItem + | NotionBulletedListItem + | NotionCode + | NotionDivider + | NotionEmbed + | NotionFile + | NotionImage, + Discriminator(discriminator="block_type"), ] diff --git a/src/backend/core/notion_schemas/notion_rich_text.py b/src/backend/core/notion_schemas/notion_rich_text.py index c22e67a81d..0e777e456b 100644 --- a/src/backend/core/notion_schemas/notion_rich_text.py +++ b/src/backend/core/notion_schemas/notion_rich_text.py @@ -1,7 +1,7 @@ from enum import StrEnum from typing import Annotated, Any, Literal -from pydantic import BaseModel, Discriminator, model_validator +from pydantic import BaseModel, Discriminator, ValidationError, model_validator from .notion_color import NotionColor @@ -31,7 +31,8 @@ def move_type_inward_and_rename(cls, data: Any) -> Any: if not isinstance(data, dict): return data - assert "type" in data, "Type must be specified" + if "type" not in data: + raise ValidationError("Type must be specified") data_type = data.pop("type") data["specific"] = data.pop(data_type) data["specific"]["type"] = data_type From 974bb86a6877fe8a5a31a3dfed3849cc6f0ab40f Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 2 Jun 2025 19:36:33 +0200 Subject: [PATCH 05/63] Add blocks converter to y-provider --- .../core/services/converter_services.py | 42 +++++++++++++++++ src/backend/impress/settings.py | 5 ++ .../src/handlers/convertBlocksHandler.ts | 47 +++++++++++++++++++ .../servers/y-provider/src/handlers/index.ts | 1 + src/frontend/servers/y-provider/src/routes.ts | 1 + .../y-provider/src/servers/appServer.ts | 3 ++ 6 files changed, 99 insertions(+) create mode 100644 src/frontend/servers/y-provider/src/handlers/convertBlocksHandler.ts diff --git a/src/backend/core/services/converter_services.py b/src/backend/core/services/converter_services.py index 5213bac86c..633c0ba2ed 100644 --- a/src/backend/core/services/converter_services.py +++ b/src/backend/core/services/converter_services.py @@ -76,3 +76,45 @@ def convert_markdown(self, text): ) from err return document_content + + def convert_blocks(self, blocks): + """Convert a list of blocks into our internal format using an external microservice.""" + + print('BONJOUR') + print(settings.Y_PROVIDER_API_BASE_URL) + try: + response = requests.post( + f"{settings.Y_PROVIDER_API_BASE_URL}{settings.BLOCKS_CONVERSION_API_ENDPOINT}/", + json={ + "blocks": blocks, + }, + headers={ + "Authorization": self.auth_header, + "Content-Type": "application/json", + }, + timeout=settings.CONVERSION_API_TIMEOUT, + verify=settings.CONVERSION_API_SECURE, + ) + response.raise_for_status() + conversion_response = response.json() + + except requests.RequestException as err: + raise ServiceUnavailableError( + "Failed to connect to conversion service", + ) from err + + except ValueError as err: + raise InvalidResponseError( + "Could not parse conversion service response" + ) from err + + try: + document_content = conversion_response[ + settings.CONVERSION_API_CONTENT_FIELD + ] + except KeyError as err: + raise MissingContentError( + f"Response missing required field: {settings.CONVERSION_API_CONTENT_FIELD}" + ) from err + + return document_content diff --git a/src/backend/impress/settings.py b/src/backend/impress/settings.py index 571d7052d8..7093aad6db 100755 --- a/src/backend/impress/settings.py +++ b/src/backend/impress/settings.py @@ -628,6 +628,11 @@ class Base(Configuration): environ_name="CONVERSION_API_ENDPOINT", environ_prefix=None, ) + BLOCKS_CONVERSION_API_ENDPOINT = values.Value( + default="convert-blocks", + environ_name="BLOCKS_CONVERSION_API_ENDPOINT", + environ_prefix=None, + ) CONVERSION_API_CONTENT_FIELD = values.Value( default="content", environ_name="CONVERSION_API_CONTENT_FIELD", diff --git a/src/frontend/servers/y-provider/src/handlers/convertBlocksHandler.ts b/src/frontend/servers/y-provider/src/handlers/convertBlocksHandler.ts new file mode 100644 index 0000000000..38af609342 --- /dev/null +++ b/src/frontend/servers/y-provider/src/handlers/convertBlocksHandler.ts @@ -0,0 +1,47 @@ +//import { PartialBlock } from '@blocknote/core'; +import { ServerBlockNoteEditor } from '@blocknote/server-util'; +import { Request, Response } from 'express'; +import * as Y from 'yjs'; + +import { logger, toBase64 } from '@/utils'; + +interface ConversionRequest { + blocks: any; // TODO: PartialBlock +} + +interface ConversionResponse { + content: string; +} + +interface ErrorResponse { + error: string; +} + +export const convertBlocksHandler = async ( + req: Request< + object, + ConversionResponse | ErrorResponse, + ConversionRequest, + object + >, + res: Response, +) => { + const blocks = req.body?.blocks; + if (!blocks) { + res.status(400).json({ error: 'Invalid request: missing content' }); + return; + } + + try { + const editor = ServerBlockNoteEditor.create(); + + // Create a Yjs Document from blocks, and encode it as a base64 string + const yDocument = editor.blocksToYDoc(blocks, 'document-store'); + const content = toBase64(Y.encodeStateAsUpdate(yDocument)); + + res.status(200).json({ content }); + } catch (e) { + logger('conversion failed:', e); + res.status(500).json({ error: 'An error occurred' }); + } +}; diff --git a/src/frontend/servers/y-provider/src/handlers/index.ts b/src/frontend/servers/y-provider/src/handlers/index.ts index 75bd7f7bbf..167493a306 100644 --- a/src/frontend/servers/y-provider/src/handlers/index.ts +++ b/src/frontend/servers/y-provider/src/handlers/index.ts @@ -1,3 +1,4 @@ export * from './collaborationResetConnectionsHandler'; export * from './collaborationWSHandler'; export * from './convertMarkdownHandler'; +export * from './convertBlocksHandler'; diff --git a/src/frontend/servers/y-provider/src/routes.ts b/src/frontend/servers/y-provider/src/routes.ts index 98803b87f6..7b8d289bb8 100644 --- a/src/frontend/servers/y-provider/src/routes.ts +++ b/src/frontend/servers/y-provider/src/routes.ts @@ -2,4 +2,5 @@ export const routes = { COLLABORATION_WS: '/collaboration/ws/', COLLABORATION_RESET_CONNECTIONS: '/collaboration/api/reset-connections/', CONVERT_MARKDOWN: '/api/convert-markdown/', + CONVERT_BLOCKS: '/api/convert-blocks/', }; diff --git a/src/frontend/servers/y-provider/src/servers/appServer.ts b/src/frontend/servers/y-provider/src/servers/appServer.ts index 5c035db799..2f99db5b1e 100644 --- a/src/frontend/servers/y-provider/src/servers/appServer.ts +++ b/src/frontend/servers/y-provider/src/servers/appServer.ts @@ -9,6 +9,7 @@ import { collaborationResetConnectionsHandler, collaborationWSHandler, convertMarkdownHandler, + convertBlocksHandler, } from '../handlers'; import { corsMiddleware, httpSecurity, wsSecurity } from '../middlewares'; import { routes } from '../routes'; @@ -51,6 +52,8 @@ export const initServer = () => { */ app.post(routes.CONVERT_MARKDOWN, httpSecurity, convertMarkdownHandler); + app.post(routes.CONVERT_BLOCKS, httpSecurity, convertBlocksHandler); + Sentry.setupExpressErrorHandler(app); app.get('/ping', (req, res) => { From f2c575443b1e465aa7d059aef59b48a8bccd0732 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Mon, 2 Jun 2025 19:38:00 +0200 Subject: [PATCH 06/63] wip: add document creation code --- src/backend/core/api/viewsets.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 1c3524a265..55ba6a858b 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -35,6 +35,7 @@ from core import authentication, choices, enums, models from core.services.ai_services import AIService from core.services.collaboration_services import CollaborationService +from core.services.converter_services import YdocConverter from core.services.notion_import import import_notion from core.utils import extract_attachments, filter_descendants @@ -1859,5 +1860,28 @@ def notion_import_callback(request): def notion_import_run(request): if "notion_token" not in request.session: raise drf.exceptions.PermissionDenied() - import_notion(request.session["notion_token"]) + + import_notion(request.session['notion_token']) + + #document_content = YdocConverter().convert_blocks([ + # { + # "type": "paragraph", + # "content": "Bonjour à toustes zé à toussent", + # }, + #]) + + #obj = models.Document.add_root( + # depth=1, + # creator=request.user, + # title="J'aime les courgettes", + # link_reach=models.LinkReachChoices.RESTRICTED, + # content=document_content, + #) + + #models.DocumentAccess.objects.create( + # document=obj, + # user=request.user, + # role=models.RoleChoices.OWNER, + #) + return drf.response.Response({"sava": "oui et toi ?"}) From 53e41bd61e8626910327619c073fc3703ea12468 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Mon, 2 Jun 2025 19:13:16 +0200 Subject: [PATCH 07/63] notion-import: use schemas --- .../core/notion_schemas/notion_block.py | 6 +- .../core/notion_schemas/notion_page.py | 21 ++- .../core/notion_schemas/notion_rich_text.py | 2 + src/backend/core/services/notion_import.py | 129 ++++++++---------- 4 files changed, 77 insertions(+), 81 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 64577d69e2..2afb51b255 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -11,10 +11,13 @@ class NotionBlock(BaseModel): + id: str created_time: datetime last_edited_time: datetime archived: bool specific: "NotionBlockSpecifics" + has_children: bool + children: list["NotionBlock"] = Field(init=False, default_factory=list) @model_validator(mode="before") @classmethod @@ -72,9 +75,6 @@ class NotionBlockType(StrEnum): class NotionHeadingBase(BaseModel): """https://developers.notion.com/reference/block#headings""" - block_type: Literal[ - NotionBlockType.HEADING_1, NotionBlockType.HEADING_2, NotionBlockType.HEADING_3 - ] rich_text: list[NotionRichText] color: NotionColor is_toggleable: bool = False diff --git a/src/backend/core/notion_schemas/notion_page.py b/src/backend/core/notion_schemas/notion_page.py index 741c689def..47a01258cf 100644 --- a/src/backend/core/notion_schemas/notion_page.py +++ b/src/backend/core/notion_schemas/notion_page.py @@ -2,14 +2,27 @@ from pydantic import BaseModel +from .notion_rich_text import NotionRichText + class NotionFile(BaseModel): ... class NotionPage(BaseModel): id: str - created_time: datetime - last_edited_time: datetime archived: bool - icon: NotionFile - cover: NotionFile + + # created_time: datetime + # last_edited_time: datetime + # icon: NotionFile + # cover: NotionFile + + properties: dict # This is a very messy dict, with some RichText somewhere + + def get_title(self) -> str | None: + title_property: dict | None = self.properties.get("title") + if title_property is None: + return None + + rich_text = title_property["title"] # This could be parsed using NotionRichText + return rich_text["plain_text"] diff --git a/src/backend/core/notion_schemas/notion_rich_text.py b/src/backend/core/notion_schemas/notion_rich_text.py index 0e777e456b..4d209d6656 100644 --- a/src/backend/core/notion_schemas/notion_rich_text.py +++ b/src/backend/core/notion_schemas/notion_rich_text.py @@ -48,6 +48,8 @@ class NotionRichTextType(StrEnum): class NotionRichTextText(BaseModel): type: Literal[NotionRichTextType.TEXT] = NotionRichTextType.TEXT + content: str + link: str | None class NotionRichTextMention(BaseModel): diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 89a905a899..d7873bd66e 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -1,128 +1,109 @@ +import logging +from dataclasses import dataclass from enum import Enum -import requests +from typing import Any +import requests +from pydantic import TypeAdapter +from requests import Session -class PageType(Enum): - PAGE = "page" - DATABASE = "database" +from ..notion_schemas.notion_block import NotionBlock +from ..notion_schemas.notion_page import NotionPage +logger = logging.getLogger(__name__) -class Page: - def __init__(self, type, id, name): - self.type = type - self.id = id - self.name = name - def __repr__(self): - return f"\n Page(type={self.type}, id='{self.id}', name='{self.name}')" +def build_notion_session(token: str) -> Session: + session = Session() + session.headers = { + "Authorization": f"Bearer {token}", + "Notion-Version": "2022-06-28", + } + return session -def search_notion(token: str, start_cursor: str): - response = requests.post( +def search_notion(session: Session, start_cursor: str) -> dict[str, Any]: + response = session.post( "https://api.notion.com/v1/search", - headers={ - "Authorization": f"Bearer {token}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28", + json={ "start_cursor": start_cursor if start_cursor else None, "value": "page", }, ) if response.status_code == 200: - print("✅ Requête réussie !") + logger.info("✅ Requête réussie !") return response.json() else: - print(f"❌ Erreur lors de la requête : {response.status_code}") - print(response.text) + logger.error(f"❌ Erreur lors de la requête : {response.status_code}") + logger.debug(response.text) + raise ValueError -def fetch_root_pages(token: str): +def fetch_root_pages(session: Session) -> list[NotionPage]: pages = [] - cursor = None + cursor = "" has_more = True while has_more: - response = search_notion(token, start_cursor=cursor) + response = search_notion(session, start_cursor=cursor) for item in response["results"]: - if item.get("parent", {}).get("type") == "workspace": - obj_type = item["object"] - if obj_type == "page": - page_type = PageType.PAGE - rich_texts = next( - ( - prop["title"] - for prop in item["properties"].values() - if prop["type"] == "title" - ), - [], - ) - else: - page_type = PageType.DATABASE - rich_texts = item.title - - pages.append( - Page( - type=page_type, - id=item["id"], - name="".join( - rich_text["plain_text"] for rich_text in rich_texts - ), - ) - ) + if item.get("parent", {}).get("type") != "workspace": + continue + + assert item["object"] == "page" + + pages.append(NotionPage.model_validate(item)) has_more = response.get("has_more", False) - cursor = response.get("next_cursor") + cursor = response.get("next_cursor", "") return pages -def fetch_blocks(token: str, block_id: str): - response = requests.get( +def fetch_blocks(session: Session, block_id: str, start_cursor: str) -> dict[str, Any]: + response = session.get( f"https://api.notion.com/v1/blocks/{block_id}/children", - headers={ - "Authorization": f"Bearer {token}", - "Content-Type": "application/json", - "Notion-Version": "2022-06-28", + params={ + "start_cursor": start_cursor if start_cursor else None, }, ) if response.status_code == 200: return response.json() else: - print(f"❌ Erreur lors de la requête : {response.status_code}") - print(response.text) + logger.debug(response.text) + raise ValueError(f"❌ Erreur lors de la requête : {response.status_code}") -def fetch_block_children(token: str, block_id: str): - blocks = [] - cursor = None +def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: + blocks: list[NotionBlock] = [] + cursor = "" has_more = True while has_more: - response = fetch_blocks(token, block_id) + response = fetch_blocks(session, block_id, cursor) - blocks.extend(response["results"]) + blocks.extend( + TypeAdapter(list[NotionBlock]).validate_python(response["results"]) + ) has_more = response.get("has_more", False) - cursor = response.get("next_cursor") + cursor = response.get("next_cursor", "") - children = [] for block in blocks: - if block["has_children"]: - response = fetch_block_children(token, block["id"]) - children.extend(response) + if block.has_children: + block.children = fetch_block_children(session, block.id) - blocks.extend(children) return blocks def import_notion(token: str): """Recursively imports all Notion pages and blocks accessible using the given token.""" - root_pages = fetch_root_pages(token) - for root_page in root_pages: - page_content = fetch_block_children(token, root_page.id) - print(f"Page {root_page.id}") - print(page_content) - print() + session = build_notion_session(token) + root_pages = fetch_root_pages(session) + for page in root_pages: + blocks = fetch_block_children(session, page.id) + logger.info(f"Page {page.get_title()} (id {page.id})") + logger.info(blocks) From 63039bee4433f980e2e4432d65fec37b9b22bc63 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 00:25:25 +0200 Subject: [PATCH 08/63] Fix ValueError in NotionPage.get_title() --- src/backend/core/notion_schemas/notion_page.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/core/notion_schemas/notion_page.py b/src/backend/core/notion_schemas/notion_page.py index 47a01258cf..eec379f1fe 100644 --- a/src/backend/core/notion_schemas/notion_page.py +++ b/src/backend/core/notion_schemas/notion_page.py @@ -24,5 +24,5 @@ def get_title(self) -> str | None: if title_property is None: return None - rich_text = title_property["title"] # This could be parsed using NotionRichText + rich_text = title_property["title"][0] # This could be parsed using NotionRichText return rich_text["plain_text"] From 7fad79f1100d67aa5030be8d3a05371522123e72 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 00:26:06 +0200 Subject: [PATCH 09/63] Remove awkward debugging log --- src/backend/core/services/converter_services.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/backend/core/services/converter_services.py b/src/backend/core/services/converter_services.py index 633c0ba2ed..47d9cf8ace 100644 --- a/src/backend/core/services/converter_services.py +++ b/src/backend/core/services/converter_services.py @@ -80,8 +80,6 @@ def convert_markdown(self, text): def convert_blocks(self, blocks): """Convert a list of blocks into our internal format using an external microservice.""" - print('BONJOUR') - print(settings.Y_PROVIDER_API_BASE_URL) try: response = requests.post( f"{settings.Y_PROVIDER_API_BASE_URL}{settings.BLOCKS_CONVERSION_API_ENDPOINT}/", From 6bebe672d1222b0ef1160086d99bfb5e1d5e1407 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 00:26:38 +0200 Subject: [PATCH 10/63] Fix 400 in Notion search --- src/backend/core/services/notion_import.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index d7873bd66e..c7f0d28208 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -23,12 +23,16 @@ def build_notion_session(token: str) -> Session: def search_notion(session: Session, start_cursor: str) -> dict[str, Any]: + req_data = {} + if start_cursor: + req_data = { + "start_cursor": start_cursor, + "value": "page", + } + response = session.post( "https://api.notion.com/v1/search", - json={ - "start_cursor": start_cursor if start_cursor else None, - "value": "page", - }, + json=req_data, ) if response.status_code == 200: From 3f31453a762796c080bfc1d00af3b6af03de7e15 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 00:27:21 +0200 Subject: [PATCH 11/63] Simplify Notion API error handling --- src/backend/core/services/notion_import.py | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index c7f0d28208..ea941bcaa5 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -35,13 +35,11 @@ def search_notion(session: Session, start_cursor: str) -> dict[str, Any]: json=req_data, ) - if response.status_code == 200: - logger.info("✅ Requête réussie !") - return response.json() - else: - logger.error(f"❌ Erreur lors de la requête : {response.status_code}") - logger.debug(response.text) - raise ValueError + if response.status_code != 200: + print(response.json()) + + response.raise_for_status() + return response.json() def fetch_root_pages(session: Session) -> list[NotionPage]: @@ -74,11 +72,8 @@ def fetch_blocks(session: Session, block_id: str, start_cursor: str) -> dict[str }, ) - if response.status_code == 200: - return response.json() - else: - logger.debug(response.text) - raise ValueError(f"❌ Erreur lors de la requête : {response.status_code}") + response.raise_for_status() + return response.json() def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: From 2f0ef4562fed19e53b2879f817d8f32c96f79e61 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 00:28:05 +0200 Subject: [PATCH 12/63] Create one document per root Notion page --- src/backend/core/api/viewsets.py | 45 +++++++++++----------- src/backend/core/services/notion_import.py | 3 +- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 55ba6a858b..e3f5939468 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1861,27 +1861,28 @@ def notion_import_run(request): if "notion_token" not in request.session: raise drf.exceptions.PermissionDenied() - import_notion(request.session['notion_token']) - - #document_content = YdocConverter().convert_blocks([ - # { - # "type": "paragraph", - # "content": "Bonjour à toustes zé à toussent", - # }, - #]) - - #obj = models.Document.add_root( - # depth=1, - # creator=request.user, - # title="J'aime les courgettes", - # link_reach=models.LinkReachChoices.RESTRICTED, - # content=document_content, - #) - - #models.DocumentAccess.objects.create( - # document=obj, - # user=request.user, - # role=models.RoleChoices.OWNER, - #) + pages = import_notion(request.session['notion_token']) + + document_content = YdocConverter().convert_blocks([ + { + "type": "paragraph", + "content": "Bonjour à toustes zé à toussent", + }, + ]) + + for page in pages: + obj = models.Document.add_root( + depth=1, + creator=request.user, + title=page.get_title() or "J'aime les courgettes", + link_reach=models.LinkReachChoices.RESTRICTED, + content=document_content, + ) + + models.DocumentAccess.objects.create( + document=obj, + user=request.user, + role=models.RoleChoices.OWNER, + ) return drf.response.Response({"sava": "oui et toi ?"}) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index ea941bcaa5..5f3047d00f 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -98,7 +98,7 @@ def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: return blocks -def import_notion(token: str): +def import_notion(token: str) -> list[NotionPage]: """Recursively imports all Notion pages and blocks accessible using the given token.""" session = build_notion_session(token) root_pages = fetch_root_pages(session) @@ -106,3 +106,4 @@ def import_notion(token: str): blocks = fetch_block_children(session, page.id) logger.info(f"Page {page.get_title()} (id {page.id})") logger.info(blocks) + return root_pages From b7db0b3ae87436c99e0c26d31e86d59edf63e479 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 00:49:06 +0200 Subject: [PATCH 13/63] Add super dumb block converter --- src/backend/core/api/viewsets.py | 9 ++------ src/backend/core/services/notion_import.py | 25 ++++++++++++++++++++-- 2 files changed, 25 insertions(+), 9 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index e3f5939468..8ef294cbdf 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1863,14 +1863,9 @@ def notion_import_run(request): pages = import_notion(request.session['notion_token']) - document_content = YdocConverter().convert_blocks([ - { - "type": "paragraph", - "content": "Bonjour à toustes zé à toussent", - }, - ]) + for page, blocks in pages: + document_content = YdocConverter().convert_blocks(blocks) - for page in pages: obj = models.Document.add_root( depth=1, creator=request.user, diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 5f3047d00f..4019211eac 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -98,12 +98,33 @@ def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: return blocks -def import_notion(token: str) -> list[NotionPage]: +def convert_block(block: NotionBlock) -> Any: + match type(block): + case NotionParagraph: + return { + "type": "paragraph", + "content": block.rich_text[0].plain_text # TODO: handle multiple + } + + +def convert_block_list(blocks: list[NotionBlock]) -> Any: + converted_blocks = [] + for block in blocks: + converted_block = convert_block(block) + if converted_block == None: + continue + converted_blocks.append(converted_block) + return converted_blocks + + +def import_notion(token: str) -> list[(NotionPage, Any)]: """Recursively imports all Notion pages and blocks accessible using the given token.""" session = build_notion_session(token) root_pages = fetch_root_pages(session) + pages_and_blocks = [] for page in root_pages: blocks = fetch_block_children(session, page.id) logger.info(f"Page {page.get_title()} (id {page.id})") logger.info(blocks) - return root_pages + pages_and_blocks.append((page, convert_blocks(blocks))) + return pages_and_blocks From 7bae379b0200a055d5848427f1db9f5db7467c2d Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 00:51:53 +0200 Subject: [PATCH 14/63] notion-schemas: add catcah-all unsupported block type --- .../core/notion_schemas/notion_block.py | 29 ++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 2afb51b255..4394098e95 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -175,6 +175,32 @@ class NotionLinkPreview(BaseModel): url: str +class NotionBlockUnsupported(BaseModel): + """FIXME: Maybe https://github.com/pydantic/pydantic/discussions/4928#discussioncomment-13079554 would be better""" + + block_type: Literal[ + NotionBlockType.BOOKMARK, + NotionBlockType.BREADCRUMB, + NotionBlockType.CALLOUT, + NotionBlockType.CHILD_DATABASE, + NotionBlockType.CHILD_PAGE, + NotionBlockType.COLUMN, + NotionBlockType.COLUMN_LIST, + NotionBlockType.EQUATION, + NotionBlockType.LINK_TO_PAGE, + NotionBlockType.PDF, + NotionBlockType.QUOTE, + NotionBlockType.SYNCED_BLOCK, + NotionBlockType.TABLE, + NotionBlockType.TABLE_OF_CONTENTS, + NotionBlockType.TABLE_ROW, + NotionBlockType.TEMPLATE, + NotionBlockType.TO_DO, + NotionBlockType.TOGGLE, + NotionBlockType.VIDEO, + ] + + NotionBlockSpecifics = Annotated[ NotionHeading1 | NotionHeading2 @@ -186,6 +212,7 @@ class NotionLinkPreview(BaseModel): | NotionDivider | NotionEmbed | NotionFile - | NotionImage, + | NotionImage + | NotionBlockUnsupported, Discriminator(discriminator="block_type"), ] From 4955ccf269c676e7cf2138d97e2c9226581105ab Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 00:54:56 +0200 Subject: [PATCH 15/63] just add some colors --- src/backend/core/notion_schemas/notion_color.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/backend/core/notion_schemas/notion_color.py b/src/backend/core/notion_schemas/notion_color.py index 4e65a88096..881a6de941 100644 --- a/src/backend/core/notion_schemas/notion_color.py +++ b/src/backend/core/notion_schemas/notion_color.py @@ -19,3 +19,5 @@ class NotionColor(StrEnum): PINK_BACKGROUND = "pink_background" PURPLE = "purple" PURPLE_BACKGROUND = "purple_background" + RED = "red" + RED_BACKGROUND = "red_background" From 216c55fac1688e3473019ca5852d73d336583858 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 00:58:51 +0200 Subject: [PATCH 16/63] just add a link type --- src/backend/core/notion_schemas/notion_rich_text.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_rich_text.py b/src/backend/core/notion_schemas/notion_rich_text.py index 4d209d6656..036a57d867 100644 --- a/src/backend/core/notion_schemas/notion_rich_text.py +++ b/src/backend/core/notion_schemas/notion_rich_text.py @@ -1,7 +1,7 @@ from enum import StrEnum from typing import Annotated, Any, Literal -from pydantic import BaseModel, Discriminator, ValidationError, model_validator +from pydantic import BaseModel, Discriminator, Field, ValidationError, model_validator from .notion_color import NotionColor @@ -46,10 +46,14 @@ class NotionRichTextType(StrEnum): EQUATION = "equation" +class NotionLink(BaseModel): + url: str + + class NotionRichTextText(BaseModel): type: Literal[NotionRichTextType.TEXT] = NotionRichTextType.TEXT content: str - link: str | None + link: NotionLink | None class NotionRichTextMention(BaseModel): From 74bcea77ace01b343facee96d75e1c00c93cd47b Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 01:01:25 +0200 Subject: [PATCH 17/63] Fix typo lol --- src/backend/core/services/notion_import.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 4019211eac..40e9e7b146 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -126,5 +126,5 @@ def import_notion(token: str) -> list[(NotionPage, Any)]: blocks = fetch_block_children(session, page.id) logger.info(f"Page {page.get_title()} (id {page.id})") logger.info(blocks) - pages_and_blocks.append((page, convert_blocks(blocks))) + pages_and_blocks.append((page, convert_block_list(blocks))) return pages_and_blocks From 1c7371ac14f9ea9752a5653756a86ffeb41191a3 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 01:06:19 +0200 Subject: [PATCH 18/63] It's not a match --- src/backend/core/services/notion_import.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 40e9e7b146..d5943a5cb2 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -7,7 +7,7 @@ from pydantic import TypeAdapter from requests import Session -from ..notion_schemas.notion_block import NotionBlock +from ..notion_schemas.notion_block import NotionBlock, NotionParagraph from ..notion_schemas.notion_page import NotionPage logger = logging.getLogger(__name__) @@ -99,12 +99,15 @@ def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: def convert_block(block: NotionBlock) -> Any: - match type(block): - case NotionParagraph: - return { - "type": "paragraph", - "content": block.rich_text[0].plain_text # TODO: handle multiple - } + if isinstance(block.specific, NotionParagraph): + content = "" + if len(block.specific.rich_text) > 0: + # TODO: handle multiple of these + content = block.specific.rich_text[0].plain_text + return { + "type": "paragraph", + "content": content, + } def convert_block_list(blocks: list[NotionBlock]) -> Any: From c8380391a07c9a67394fe1a5adb0cb06d70bf4a8 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 01:07:32 +0200 Subject: [PATCH 19/63] fixup --- src/backend/core/notion_schemas/notion_block.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 4394098e95..689d257daa 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -198,6 +198,7 @@ class NotionBlockUnsupported(BaseModel): NotionBlockType.TO_DO, NotionBlockType.TOGGLE, NotionBlockType.VIDEO, + NotionBlockType.UNSUPPORTED, ] From b5e3f1aac38a20af61971530d98b46b7f718cfa5 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 01:11:08 +0200 Subject: [PATCH 20/63] Unionize all of these rich folks --- src/backend/core/services/notion_import.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index d5943a5cb2..41d1ca4a44 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -101,9 +101,8 @@ def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: def convert_block(block: NotionBlock) -> Any: if isinstance(block.specific, NotionParagraph): content = "" - if len(block.specific.rich_text) > 0: - # TODO: handle multiple of these - content = block.specific.rich_text[0].plain_text + for rich_text in block.specific.rich_text: + content += rich_text.plain_text return { "type": "paragraph", "content": content, From 6aabba6bc26b1bd5540a3456ea57c695b613ebb8 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 01:19:04 +0200 Subject: [PATCH 21/63] notion-import: tidy up Signed-off-by: Baptiste Prevot --- src/backend/core/api/viewsets.py | 26 +++++++++++------- .../core/notion_schemas/notion_block.py | 1 + src/backend/core/services/notion_import.py | 27 ++++++++++--------- 3 files changed, 31 insertions(+), 23 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 8ef294cbdf..ebc2bd6b72 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -5,7 +5,7 @@ import logging import uuid from collections import defaultdict -from urllib.parse import unquote, urlparse, urlencode +from urllib.parse import unquote, urlencode, urlparse from django.conf import settings from django.contrib.postgres.aggregates import ArrayAgg @@ -18,8 +18,8 @@ from django.db.models.expressions import RawSQL from django.db.models.functions import Left, Length from django.http import Http404, StreamingHttpResponse -from django.utils.functional import cached_property from django.shortcuts import redirect +from django.utils.functional import cached_property from django.utils.text import capfirst, slugify from django.utils.translation import gettext_lazy as _ @@ -1821,22 +1821,27 @@ def _load_theme_customization(self): return theme_customization + notion_client_id = "206d872b-594c-80de-94ff-003760c352e4" notion_client_secret = "XXX" notion_redirect_uri = "https://emersion.fr/notion-redirect" + @drf.decorators.api_view() def notion_import_redirect(request): if "notion_token" in request.session: return redirect("/api/v1.0/notion_import/run") - query = urlencode({ - "client_id": notion_client_id, - "response_type": "code", - "owner": "user", - "redirect_uri": notion_redirect_uri, - }) + query = urlencode( + { + "client_id": notion_client_id, + "response_type": "code", + "owner": "user", + "redirect_uri": notion_redirect_uri, + } + ) return redirect("https://api.notion.com/v1/oauth/authorize?" + query) + @drf.decorators.api_view() def notion_import_callback(request): code = request.GET.get("code") @@ -1855,15 +1860,16 @@ def notion_import_callback(request): request.session["notion_token"] = data["access_token"] return redirect("/api/v1.0/notion_import/run") + # @drf.decorators.api_view(["POST"]) @drf.decorators.api_view() def notion_import_run(request): if "notion_token" not in request.session: raise drf.exceptions.PermissionDenied() - pages = import_notion(request.session['notion_token']) + pages_and_blocks = import_notion(request.session["notion_token"]) - for page, blocks in pages: + for page, blocks in pages_and_blocks: document_content = YdocConverter().convert_blocks(blocks) obj = models.Document.add_root( diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 689d257daa..dcb14e5768 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -18,6 +18,7 @@ class NotionBlock(BaseModel): specific: "NotionBlockSpecifics" has_children: bool children: list["NotionBlock"] = Field(init=False, default_factory=list) + # This is not part of the API response, but is used to store children blocks @model_validator(mode="before") @classmethod diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 41d1ca4a44..1b73a4719b 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -98,18 +98,19 @@ def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: return blocks -def convert_block(block: NotionBlock) -> Any: - if isinstance(block.specific, NotionParagraph): - content = "" - for rich_text in block.specific.rich_text: - content += rich_text.plain_text - return { - "type": "paragraph", - "content": content, - } - - -def convert_block_list(blocks: list[NotionBlock]) -> Any: +def convert_block(block: NotionBlock) -> dict[str, Any] | None: + match block.specific: + case NotionParagraph(): + content = "" + for rich_text in block.specific.rich_text: + content += rich_text.plain_text + return { + "type": "paragraph", + "content": content, + } + + +def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]: converted_blocks = [] for block in blocks: converted_block = convert_block(block) @@ -119,7 +120,7 @@ def convert_block_list(blocks: list[NotionBlock]) -> Any: return converted_blocks -def import_notion(token: str) -> list[(NotionPage, Any)]: +def import_notion(token: str) -> list[tuple[NotionPage, list[dict[str, Any]]]]: """Recursively imports all Notion pages and blocks accessible using the given token.""" session = build_notion_session(token) root_pages = fetch_root_pages(session) From c8d44e846a48e86a2b66c466dd457d62afc3b976 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 01:24:36 +0200 Subject: [PATCH 22/63] notion-schemas: better unsupported objects --- .../core/notion_schemas/notion_block.py | 62 +++++++++---------- 1 file changed, 28 insertions(+), 34 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index dcb14e5768..04d2ab746f 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -179,42 +179,36 @@ class NotionLinkPreview(BaseModel): class NotionBlockUnsupported(BaseModel): """FIXME: Maybe https://github.com/pydantic/pydantic/discussions/4928#discussioncomment-13079554 would be better""" - block_type: Literal[ - NotionBlockType.BOOKMARK, - NotionBlockType.BREADCRUMB, - NotionBlockType.CALLOUT, - NotionBlockType.CHILD_DATABASE, - NotionBlockType.CHILD_PAGE, - NotionBlockType.COLUMN, - NotionBlockType.COLUMN_LIST, - NotionBlockType.EQUATION, - NotionBlockType.LINK_TO_PAGE, - NotionBlockType.PDF, - NotionBlockType.QUOTE, - NotionBlockType.SYNCED_BLOCK, - NotionBlockType.TABLE, - NotionBlockType.TABLE_OF_CONTENTS, - NotionBlockType.TABLE_ROW, - NotionBlockType.TEMPLATE, - NotionBlockType.TO_DO, - NotionBlockType.TOGGLE, - NotionBlockType.VIDEO, - NotionBlockType.UNSUPPORTED, - ] + block_type: str + raw: dict[str, Any] | None = None + + @model_validator(mode="before") + @classmethod + def put_all_in_raw(cls, data: Any) -> Any: + if not isinstance(data, dict): + return data + + if "raw" not in data: + data["raw"] = data.copy() + + return data NotionBlockSpecifics = Annotated[ - NotionHeading1 - | NotionHeading2 - | NotionHeading3 - | NotionParagraph - | NotionNumberedListItem - | NotionBulletedListItem - | NotionCode - | NotionDivider - | NotionEmbed - | NotionFile - | NotionImage + Annotated[ + NotionHeading1 + | NotionHeading2 + | NotionHeading3 + | NotionParagraph + | NotionNumberedListItem + | NotionBulletedListItem + | NotionCode + | NotionDivider + | NotionEmbed + | NotionFile + | NotionImage, + Discriminator(discriminator="block_type"), + ] | NotionBlockUnsupported, - Discriminator(discriminator="block_type"), + Field(union_mode="left_to_right"), ] From 0c86a9b09ab6db9175dbdaf95f0882f8efc658cf Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Sun, 22 Jun 2025 10:58:54 +0200 Subject: [PATCH 23/63] Add import button --- .../impress/src/components/DropdownMenu.tsx | 5 +- .../left-panel/components/LeftPanelHeader.tsx | 67 ++++++++++++++++--- 2 files changed, 63 insertions(+), 9 deletions(-) diff --git a/src/frontend/apps/impress/src/components/DropdownMenu.tsx b/src/frontend/apps/impress/src/components/DropdownMenu.tsx index 5513ccb78b..d2a7223162 100644 --- a/src/frontend/apps/impress/src/components/DropdownMenu.tsx +++ b/src/frontend/apps/impress/src/components/DropdownMenu.tsx @@ -13,6 +13,7 @@ export type DropdownMenuOption = { danger?: boolean; isSelected?: boolean; disabled?: boolean; + padding?: BoxProps['$padding']; show?: boolean; }; @@ -129,7 +130,9 @@ export const DropdownMenu = ({ $justify="space-between" $background={colorsTokens['greyscale-000']} $color={colorsTokens['primary-600']} - $padding={{ vertical: 'xs', horizontal: 'base' }} + $padding={ + option.padding ?? { vertical: 'xs', horizontal: 'base' } + } $width="100%" $gap={spacingsTokens['base']} $css={css` diff --git a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeader.tsx b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeader.tsx index 5733b0dff0..b13f690bcc 100644 --- a/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeader.tsx +++ b/src/frontend/apps/impress/src/features/left-panel/components/LeftPanelHeader.tsx @@ -1,16 +1,16 @@ import { Button } from '@openfun/cunningham-react'; -import { useRouter } from 'next/router'; +import { t } from 'i18next'; +import { useRouter } from 'next/navigation'; import { PropsWithChildren, useCallback, useState } from 'react'; -import { Box, Icon, SeparatedSection } from '@/components'; -import { DocSearchModal, DocSearchTarget } from '@/docs/doc-search/'; +import { Box, DropdownMenu, Icon, SeparatedSection } from '@/components'; +import { useCreateDoc } from '@/docs/doc-management'; +import { DocSearchModal } from '@/docs/doc-search'; import { useAuth } from '@/features/auth'; import { useCmdK } from '@/hook/useCmdK'; import { useLeftPanelStore } from '../stores'; -import { LeftPanelHeaderButton } from './LeftPanelHeaderButton'; - export const LeftPanelHeader = ({ children }: PropsWithChildren) => { const router = useRouter(); const { authenticated } = useAuth(); @@ -35,11 +35,32 @@ export const LeftPanelHeader = ({ children }: PropsWithChildren) => { useCmdK(openSearchModal); const { togglePanel } = useLeftPanelStore(); + const { mutate: createDoc, isPending: isCreatingDoc } = useCreateDoc({ + onSuccess: (doc) => { + router.push(`/docs/${doc.id}`); + togglePanel(); + }, + }); + const goToHome = () => { - void router.push('/'); + router.push('/'); togglePanel(); }; + const createNewDoc = () => { + createDoc(); + }; + + const handleImportFilesystem = () => { + // TODO: Implement filesystem import + }; + + const handleImportNotion = () => { + const baseApiUrl = process.env.NEXT_PUBLIC_API_ORIGIN; + const notionAuthUrl = `${baseApiUrl}/api/v1.0/notion_import/redirect`; + window.location.href = notionAuthUrl; + }; + return ( <> @@ -71,8 +92,38 @@ export const LeftPanelHeader = ({ children }: PropsWithChildren) => { /> )} - - {authenticated && } + {authenticated && ( + + + + )} {children} From 7eea4818454b25ac5eda9ce06bc27a88cf7daf69 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 01:37:56 +0200 Subject: [PATCH 24/63] notion-schemas: add tables --- .../core/notion_schemas/notion_block.py | 25 ++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 04d2ab746f..b22258528d 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -176,9 +176,25 @@ class NotionLinkPreview(BaseModel): url: str -class NotionBlockUnsupported(BaseModel): - """FIXME: Maybe https://github.com/pydantic/pydantic/discussions/4928#discussioncomment-13079554 would be better""" +class NotionTable(BaseModel): + """https://developers.notion.com/reference/block#table + + The children of this block are NotionTableRow blocks.""" + + block_type: Literal[NotionBlockType.TABLE] = NotionBlockType.TABLE + table_width: int + has_column_header: bool + has_row_header: bool + +class NotionTableRow(BaseModel): + """https://developers.notion.com/reference/block#table-row""" + + block_type: Literal[NotionBlockType.TABLE_ROW] = NotionBlockType.TABLE_ROW + cells: list[list[NotionRichText]] # Each cell is a list of rich text objects + + +class NotionBlockUnsupported(BaseModel): block_type: str raw: dict[str, Any] | None = None @@ -206,7 +222,10 @@ def put_all_in_raw(cls, data: Any) -> Any: | NotionDivider | NotionEmbed | NotionFile - | NotionImage, + | NotionImage + | NotionLinkPreview + | NotionTable + | NotionTableRow, Discriminator(discriminator="block_type"), ] | NotionBlockUnsupported, From 04965202a67f1ea84398bc8fd6c04f8f0195f292 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 01:43:57 +0200 Subject: [PATCH 25/63] notion-schemas: blocks: add child-page and video --- .../core/notion_schemas/notion_block.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index b22258528d..de39f1a7d7 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -169,6 +169,13 @@ class NotionImage(BaseModel): # FIXME: this actually contains a file reference which will be defined for the above, but with the "image" attribute +class NotionVideo(BaseModel): + """https://developers.notion.com/reference/block#video""" + + block_type: Literal[NotionBlockType.VIDEO] = NotionBlockType.VIDEO + # FIXME: this actually contains a file reference which will be defined for the above, but with the "video" attribute + + class NotionLinkPreview(BaseModel): """https://developers.notion.com/reference/block#link-preview""" @@ -194,6 +201,15 @@ class NotionTableRow(BaseModel): cells: list[list[NotionRichText]] # Each cell is a list of rich text objects +class NotionChildPage(BaseModel): + """https://developers.notion.com/reference/block#child-page + + My guess is that the actual child page is a child of this block ? We don't have the id...""" + + block_type: Literal[NotionBlockType.CHILD_PAGE] = NotionBlockType.CHILD_PAGE + title: str + + class NotionBlockUnsupported(BaseModel): block_type: str raw: dict[str, Any] | None = None @@ -223,9 +239,11 @@ def put_all_in_raw(cls, data: Any) -> Any: | NotionEmbed | NotionFile | NotionImage + | NotionVideo | NotionLinkPreview | NotionTable - | NotionTableRow, + | NotionTableRow + | NotionChildPage, Discriminator(discriminator="block_type"), ] | NotionBlockUnsupported, From 70c283d4c96d0804fce3b1cdf66600b0efa511c6 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 09:44:00 +0200 Subject: [PATCH 26/63] Don't reuse token in redirect endpoint --- src/backend/core/api/viewsets.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index ebc2bd6b72..072488fd41 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1829,8 +1829,6 @@ def _load_theme_customization(self): @drf.decorators.api_view() def notion_import_redirect(request): - if "notion_token" in request.session: - return redirect("/api/v1.0/notion_import/run") query = urlencode( { "client_id": notion_client_id, From 4699870e688a146226d2f6971ee53b2f5f7bfda4 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 09:51:32 +0200 Subject: [PATCH 27/63] Move Notion API details to settings --- src/backend/core/api/viewsets.py | 13 ++++--------- src/backend/impress/settings.py | 16 ++++++++++++++++ 2 files changed, 20 insertions(+), 9 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 072488fd41..2f3ffebe88 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1822,19 +1822,14 @@ def _load_theme_customization(self): return theme_customization -notion_client_id = "206d872b-594c-80de-94ff-003760c352e4" -notion_client_secret = "XXX" -notion_redirect_uri = "https://emersion.fr/notion-redirect" - - @drf.decorators.api_view() def notion_import_redirect(request): query = urlencode( { - "client_id": notion_client_id, + "client_id": settings.NOTION_CLIENT_ID, "response_type": "code", "owner": "user", - "redirect_uri": notion_redirect_uri, + "redirect_uri": settings.NOTION_REDIRECT_URI, } ) return redirect("https://api.notion.com/v1/oauth/authorize?" + query) @@ -1845,12 +1840,12 @@ def notion_import_callback(request): code = request.GET.get("code") resp = requests.post( "https://api.notion.com/v1/oauth/token", - auth=requests.auth.HTTPBasicAuth(notion_client_id, notion_client_secret), + auth=requests.auth.HTTPBasicAuth(settings.NOTION_CLIENT_ID, settings.NOTION_CLIENT_SECRET), headers={"Accept": "application/json"}, data={ "grant_type": "authorization_code", "code": code, - "redirect_uri": notion_redirect_uri, + "redirect_uri": settings.NOTION_REDIRECT_URI, }, ) resp.raise_for_status() diff --git a/src/backend/impress/settings.py b/src/backend/impress/settings.py index 7093aad6db..12c12cc3aa 100755 --- a/src/backend/impress/settings.py +++ b/src/backend/impress/settings.py @@ -649,6 +649,22 @@ class Base(Configuration): environ_prefix=None, ) + NOTION_CLIENT_ID = values.Value( + default=None, + environ_name="NOTION_CLIENT_ID", + environ_prefix=None, + ) + NOTION_CLIENT_SECRET = values.Value( + default=None, + environ_name="NOTION_CLIENT_SECRET", + environ_prefix=None, + ) + NOTION_REDIRECT_URI = values.Value( + default=None, + environ_name="NOTION_REDIRECT_URI", + environ_prefix=None, + ) + # Logging # We want to make it easy to log to console but by default we log production # to Sentry and don't want to log to console. From 979bc07383209e1d23309ed01b5536b813ea482a Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 10:08:04 +0200 Subject: [PATCH 28/63] Introduce ImportedDocument --- src/backend/core/api/viewsets.py | 8 ++++---- src/backend/core/services/notion_import.py | 15 ++++++++++----- 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 2f3ffebe88..66f5f9e4a3 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1860,15 +1860,15 @@ def notion_import_run(request): if "notion_token" not in request.session: raise drf.exceptions.PermissionDenied() - pages_and_blocks = import_notion(request.session["notion_token"]) + imported_docs = import_notion(request.session["notion_token"]) - for page, blocks in pages_and_blocks: - document_content = YdocConverter().convert_blocks(blocks) + for imported_doc in imported_docs: + document_content = YdocConverter().convert_blocks(imported_doc.blocks) obj = models.Document.add_root( depth=1, creator=request.user, - title=page.get_title() or "J'aime les courgettes", + title=imported_doc.page.get_title() or "J'aime les courgettes", link_reach=models.LinkReachChoices.RESTRICTED, content=document_content, ) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 1b73a4719b..f8b677d305 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -4,7 +4,7 @@ from typing import Any import requests -from pydantic import TypeAdapter +from pydantic import BaseModel, TypeAdapter from requests import Session from ..notion_schemas.notion_block import NotionBlock, NotionParagraph @@ -120,14 +120,19 @@ def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]: return converted_blocks -def import_notion(token: str) -> list[tuple[NotionPage, list[dict[str, Any]]]]: +class ImportedDocument(BaseModel): + page: NotionPage + blocks: list[dict[str, Any]] + + +def import_notion(token: str) -> list[ImportedDocument]: """Recursively imports all Notion pages and blocks accessible using the given token.""" session = build_notion_session(token) root_pages = fetch_root_pages(session) - pages_and_blocks = [] + docs = [] for page in root_pages: blocks = fetch_block_children(session, page.id) logger.info(f"Page {page.get_title()} (id {page.id})") logger.info(blocks) - pages_and_blocks.append((page, convert_block_list(blocks))) - return pages_and_blocks + docs.append(ImportedDocument(page=page, blocks=convert_block_list(blocks))) + return docs From 4b56e6c4f323aa729f05fefef6896990d724711f Mon Sep 17 00:00:00 2001 From: Clara Ni Date: Tue, 3 Jun 2025 10:39:44 +0200 Subject: [PATCH 29/63] handle heading blocks --- src/backend/core/services/notion_import.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index f8b677d305..55de1bf5a9 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -7,7 +7,13 @@ from pydantic import BaseModel, TypeAdapter from requests import Session -from ..notion_schemas.notion_block import NotionBlock, NotionParagraph +from ..notion_schemas.notion_block import ( + NotionBlock, + NotionParagraph, + NotionHeading1, + NotionHeading2, + NotionHeading3, +) from ..notion_schemas.notion_page import NotionPage logger = logging.getLogger(__name__) @@ -108,6 +114,14 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: "type": "paragraph", "content": content, } + case NotionHeading1() | NotionHeading2() | NotionHeading3(): + content = "" + for rich_text in block.specific.rich_text: + content += rich_text.plain_text + return { + "type": "heading", + "content": content, + } def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]: From d1d85efd010e969024d2021a1fa2997bb7f7f3ee Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 01:52:56 +0200 Subject: [PATCH 30/63] notion-import: tidy parsing --- src/backend/core/services/notion_import.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 55de1bf5a9..d212dc8c2b 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -9,12 +9,13 @@ from ..notion_schemas.notion_block import ( NotionBlock, - NotionParagraph, NotionHeading1, NotionHeading2, NotionHeading3, + NotionParagraph, ) from ..notion_schemas.notion_page import NotionPage +from ..notion_schemas.notion_rich_text import NotionRichText logger = logging.getLogger(__name__) @@ -104,23 +105,21 @@ def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: return blocks +def convert_rich_texts(rich_texts: list[NotionRichText]) -> str: + return "".join(rich_text.plain_text for rich_text in rich_texts) + + def convert_block(block: NotionBlock) -> dict[str, Any] | None: match block.specific: case NotionParagraph(): - content = "" - for rich_text in block.specific.rich_text: - content += rich_text.plain_text return { "type": "paragraph", - "content": content, + "content": convert_rich_texts(block.specific.rich_text), } case NotionHeading1() | NotionHeading2() | NotionHeading3(): - content = "" - for rich_text in block.specific.rich_text: - content += rich_text.plain_text return { "type": "heading", - "content": content, + "content": convert_rich_texts(block.specific.rich_text), } From 721a888f998c32b00d2fa36c2769253bbfe2d086 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 02:03:11 +0200 Subject: [PATCH 31/63] notion-import: handle dividers --- src/backend/core/services/notion_import.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index d212dc8c2b..29ace550a7 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -9,6 +9,7 @@ from ..notion_schemas.notion_block import ( NotionBlock, + NotionDivider, NotionHeading1, NotionHeading2, NotionHeading3, @@ -120,6 +121,13 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: return { "type": "heading", "content": convert_rich_texts(block.specific.rich_text), + "level": block.specific.block_type.value.split("_")[ + -1 + ], # e.g., "1", "2", or "3" + } + case NotionDivider(): + return { + "type": "divider", } From 35e8ef4dfeaf0a4ae4f861099e60222f43e5f203 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 11:34:59 +0200 Subject: [PATCH 32/63] Add support for child pages --- src/backend/core/api/viewsets.py | 55 +++++++++++----- .../core/notion_schemas/notion_page.py | 40 +++++++++++- src/backend/core/services/notion_import.py | 65 +++++++++++++++---- 3 files changed, 132 insertions(+), 28 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 66f5f9e4a3..c0da159fbb 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1854,6 +1854,44 @@ def notion_import_callback(request): return redirect("/api/v1.0/notion_import/run") +def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_page_id): + document_content = YdocConverter().convert_blocks(imported_doc.blocks) + + obj = parent_doc.add_child( + creator=user, + title=imported_doc.page.get_title() or "J'aime les carottes", + content=document_content, + ) + + imported_docs_by_page_id[imported_doc.page.id] = obj + + for child in imported_doc.children: + _import_notion_child_page(child, obj, user, imported_docs_by_page_id) + + +def _import_notion_root_page(imported_doc, user, imported_docs_by_page_id): + document_content = YdocConverter().convert_blocks(imported_doc.blocks) + + obj = models.Document.add_root( + depth=1, + creator=user, + title=imported_doc.page.get_title() or "J'aime les courgettes", + link_reach=models.LinkReachChoices.RESTRICTED, + content=document_content, + ) + + models.DocumentAccess.objects.create( + document=obj, + user=user, + role=models.RoleChoices.OWNER, + ) + + imported_docs_by_page_id[imported_doc.page.id] = obj + + for child in imported_doc.children: + _import_notion_child_page(child, obj, user, imported_docs_by_page_id) + + # @drf.decorators.api_view(["POST"]) @drf.decorators.api_view() def notion_import_run(request): @@ -1862,21 +1900,8 @@ def notion_import_run(request): imported_docs = import_notion(request.session["notion_token"]) + imported_docs_by_page_id = {} for imported_doc in imported_docs: - document_content = YdocConverter().convert_blocks(imported_doc.blocks) - - obj = models.Document.add_root( - depth=1, - creator=request.user, - title=imported_doc.page.get_title() or "J'aime les courgettes", - link_reach=models.LinkReachChoices.RESTRICTED, - content=document_content, - ) - - models.DocumentAccess.objects.create( - document=obj, - user=request.user, - role=models.RoleChoices.OWNER, - ) + _import_notion_root_page(imported_doc, request.user, imported_docs_by_page_id) return drf.response.Response({"sava": "oui et toi ?"}) diff --git a/src/backend/core/notion_schemas/notion_page.py b/src/backend/core/notion_schemas/notion_page.py index eec379f1fe..8493631cec 100644 --- a/src/backend/core/notion_schemas/notion_page.py +++ b/src/backend/core/notion_schemas/notion_page.py @@ -1,6 +1,8 @@ from datetime import datetime +from enum import StrEnum +from typing import Annotated, Any, Literal -from pydantic import BaseModel +from pydantic import BaseModel, Discriminator, Field, ValidationError, model_validator from .notion_rich_text import NotionRichText @@ -8,9 +10,45 @@ class NotionFile(BaseModel): ... +class NotionParentType(StrEnum): + DATABASE = "database_id" + PAGE = "page_id" + WORKSPACE = "workspace" + BLOCK = "block_id" + + +class NotionParentDatabase(BaseModel): + type: Literal[NotionParentType.DATABASE] = NotionParentType.DATABASE + database_id: str + + +class NotionParentPage(BaseModel): + type: Literal[NotionParentType.PAGE] = NotionParentType.PAGE + page_id: str + + +class NotionParentWorkspace(BaseModel): + type: Literal[NotionParentType.WORKSPACE] = NotionParentType.WORKSPACE + + +class NotionParentBlock(BaseModel): + type: Literal[NotionParentType.BLOCK] = NotionParentType.BLOCK + block_id: str + + +NotionParent = Annotated[ + NotionParentDatabase + | NotionParentPage + | NotionParentWorkspace + | NotionParentBlock, + Discriminator(discriminator="type"), +] + + class NotionPage(BaseModel): id: str archived: bool + parent: NotionParent # created_time: datetime # last_edited_time: datetime diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 29ace550a7..515bcb412f 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -14,8 +14,9 @@ NotionHeading2, NotionHeading3, NotionParagraph, + NotionChildPage, ) -from ..notion_schemas.notion_page import NotionPage +from ..notion_schemas.notion_page import NotionPage, NotionParentWorkspace, NotionParentBlock, NotionParentPage from ..notion_schemas.notion_rich_text import NotionRichText logger = logging.getLogger(__name__) @@ -50,7 +51,7 @@ def search_notion(session: Session, start_cursor: str) -> dict[str, Any]: return response.json() -def fetch_root_pages(session: Session) -> list[NotionPage]: +def fetch_all_pages(session: Session) -> list[NotionPage]: pages = [] cursor = "" has_more = True @@ -59,9 +60,6 @@ def fetch_root_pages(session: Session) -> list[NotionPage]: response = search_notion(session, start_cursor=cursor) for item in response["results"]: - if item.get("parent", {}).get("type") != "workspace": - continue - assert item["object"] == "page" pages.append(NotionPage.model_validate(item)) @@ -143,17 +141,60 @@ def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]: class ImportedDocument(BaseModel): page: NotionPage - blocks: list[dict[str, Any]] + blocks: list[dict[str, Any]] = [] + children: list["ImportedDocument"] = [] + +def find_page(id: str, pages: list[NotionPage]): + for page in all_pages: + if page.id == id: + return page + return None + +def find_block_child_page(block_id: str, all_pages: list[NotionPage]): + for page in all_pages: + if isinstance(page.parent, NotionParentBlock) and page.parent.block_id == block_id: + return page + return None + + +def convert_child_pages(session: Session, parent: NotionPage, blocks: list[NotionBlock], all_pages: list[NotionPage]) -> list[ImportedDocument]: + children = [] + + for page in all_pages: + if isinstance(page.parent, NotionParentPage) and page.parent.page_id == parent.id: + children.append(import_page(session, page, all_pages)) + + for block in blocks: + if not isinstance(block.specific, NotionChildPage): + continue + + # TODO + #parent_page = find_block_child_page(block.id, all_pages) + #if parent_page == None: + # logger.warning(f"Cannot find parent of block {block.id}") + # continue + #children.append(import_page(session, parent_page, all_pages)) + + return children + + +def import_page(session: Session, page: NotionPage, all_pages: list[NotionPage]) -> ImportedDocument: + blocks = fetch_block_children(session, page.id) + logger.info(f"Page {page.get_title()} (id {page.id})") + logger.info(blocks) + return ImportedDocument( + page=page, + blocks=convert_block_list(blocks), + children=convert_child_pages(session, page, blocks, all_pages), + ) def import_notion(token: str) -> list[ImportedDocument]: """Recursively imports all Notion pages and blocks accessible using the given token.""" session = build_notion_session(token) - root_pages = fetch_root_pages(session) + all_pages = fetch_all_pages(session) docs = [] - for page in root_pages: - blocks = fetch_block_children(session, page.id) - logger.info(f"Page {page.get_title()} (id {page.id})") - logger.info(blocks) - docs.append(ImportedDocument(page=page, blocks=convert_block_list(blocks))) + for page in all_pages: + if isinstance(page.parent, NotionParentWorkspace): + docs.append(import_page(session, page, all_pages)) return docs From cf343c6a2e15e041983262b0f32abe859f35b2ae Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 11:41:17 +0200 Subject: [PATCH 33/63] Add DocumentAccess for child docs, just in case MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No idea if it works… --- src/backend/core/api/viewsets.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index c0da159fbb..040d93d884 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1863,6 +1863,12 @@ def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_p content=document_content, ) + models.DocumentAccess.objects.create( + document=obj, + user=user, + role=models.RoleChoices.OWNER, + ) + imported_docs_by_page_id[imported_doc.page.id] = obj for child in imported_doc.children: From 48ba52bb5939d72036b30cb56021d6dc996699c6 Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 11:51:50 +0200 Subject: [PATCH 34/63] Introduce NotionFile --- .../core/notion_schemas/notion_block.py | 3 +- .../core/notion_schemas/notion_file.py | 28 +++++++++++++++++++ .../core/notion_schemas/notion_page.py | 4 +-- 3 files changed, 31 insertions(+), 4 deletions(-) create mode 100644 src/backend/core/notion_schemas/notion_file.py diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index de39f1a7d7..9942364e94 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -6,6 +6,7 @@ from .notion_color import NotionColor from .notion_rich_text import NotionRichText +from .notion_file import NotionFile """Usage: NotionBlock.model_validate(response.json())""" @@ -166,7 +167,7 @@ class NotionImage(BaseModel): """https://developers.notion.com/reference/block#image""" block_type: Literal[NotionBlockType.IMAGE] = NotionBlockType.IMAGE - # FIXME: this actually contains a file reference which will be defined for the above, but with the "image" attribute + file: NotionFile class NotionVideo(BaseModel): diff --git a/src/backend/core/notion_schemas/notion_file.py b/src/backend/core/notion_schemas/notion_file.py new file mode 100644 index 0000000000..59b4a9737b --- /dev/null +++ b/src/backend/core/notion_schemas/notion_file.py @@ -0,0 +1,28 @@ +from enum import StrEnum +from typing import Annotated, Literal + +from pydantic import BaseModel, Discriminator + +class NotionFileType(StrEnum): + HOSTED = "file" + UPLOAD = "file_upload" + EXTERNAL = "external" + +class NotionFileHosted(BaseModel): + type: Literal[NotionFileType.HOSTED] = NotionFileType.HOSTED + file: dict # TODO + +class NotionFileUpload(BaseModel): + type: Literal[NotionFileType.UPLOAD] = NotionFileType.UPLOAD + file_upload: dict # TODO + +class NotionFileExternal(BaseModel): + type: Literal[NotionFileType.EXTERNAL] = NotionFileType.EXTERNAL + external: dict # TODO + +NotionFile = Annotated[ + NotionFileHosted + | NotionFileUpload + | NotionFileExternal, + Discriminator(discriminator="type"), +] diff --git a/src/backend/core/notion_schemas/notion_page.py b/src/backend/core/notion_schemas/notion_page.py index 8493631cec..2aa0b0e58d 100644 --- a/src/backend/core/notion_schemas/notion_page.py +++ b/src/backend/core/notion_schemas/notion_page.py @@ -5,9 +5,7 @@ from pydantic import BaseModel, Discriminator, Field, ValidationError, model_validator from .notion_rich_text import NotionRichText - - -class NotionFile(BaseModel): ... +from .notion_file import NotionFile class NotionParentType(StrEnum): From fb277088ee53b696f8ec6d94bd95e5c2d8d084d7 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 03:51:03 +0200 Subject: [PATCH 35/63] notion-import: add table & fix converter error message --- .../core/notion_schemas/notion_block.py | 4 +- .../core/services/converter_services.py | 5 +- src/backend/core/services/notion_import.py | 113 ++++++++++++++++-- .../src/handlers/convertBlocksHandler.ts | 2 +- 4 files changed, 108 insertions(+), 16 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 9942364e94..bbc3caaa09 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -211,7 +211,7 @@ class NotionChildPage(BaseModel): title: str -class NotionBlockUnsupported(BaseModel): +class NotionUnsupported(BaseModel): block_type: str raw: dict[str, Any] | None = None @@ -247,6 +247,6 @@ def put_all_in_raw(cls, data: Any) -> Any: | NotionChildPage, Discriminator(discriminator="block_type"), ] - | NotionBlockUnsupported, + | NotionUnsupported, Field(union_mode="left_to_right"), ] diff --git a/src/backend/core/services/converter_services.py b/src/backend/core/services/converter_services.py index 47d9cf8ace..7fa603a118 100644 --- a/src/backend/core/services/converter_services.py +++ b/src/backend/core/services/converter_services.py @@ -93,7 +93,10 @@ def convert_blocks(self, blocks): timeout=settings.CONVERSION_API_TIMEOUT, verify=settings.CONVERSION_API_SECURE, ) - response.raise_for_status() + if not response.ok: + raise ValueError( + f"Conversion service returned an error: {response.status_code} - {response.text}" + ) conversion_response = response.json() except requests.RequestException as err: diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 515bcb412f..5ce4d5741a 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -1,3 +1,4 @@ +import json import logging from dataclasses import dataclass from enum import Enum @@ -9,14 +10,22 @@ from ..notion_schemas.notion_block import ( NotionBlock, + NotionChildPage, NotionDivider, NotionHeading1, NotionHeading2, NotionHeading3, NotionParagraph, - NotionChildPage, + NotionTable, + NotionTableRow, + NotionUnsupported, +) +from ..notion_schemas.notion_page import ( + NotionPage, + NotionParentBlock, + NotionParentPage, + NotionParentWorkspace, ) -from ..notion_schemas.notion_page import NotionPage, NotionParentWorkspace, NotionParentBlock, NotionParentPage from ..notion_schemas.notion_rich_text import NotionRichText logger = logging.getLogger(__name__) @@ -60,7 +69,9 @@ def fetch_all_pages(session: Session) -> list[NotionPage]: response = search_notion(session, start_cursor=cursor) for item in response["results"]: - assert item["object"] == "page" + if item["object"] != "page": + logger.warning(f"Skipping non-page object: {item['object']}") + continue pages.append(NotionPage.model_validate(item)) @@ -123,9 +134,72 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: -1 ], # e.g., "1", "2", or "3" } - case NotionDivider(): + # case NotionDivider(): + # return { + # "type": "divider", + # } + case NotionTable(): + rows: list[NotionTableRow] = [child.specific for child in block.children] # type: ignore # I don't know how to assert properly + if len(rows) == 0: + return { + "type": "paragraph", + "content": "Empty table ?!", + } + + n_columns = len( + rows[0].cells + ) # I'll assume all rows have the same number of cells + if n_columns == 0: + return { + "type": "paragraph", + "content": "Empty row ?!", + } + if not all(len(row.cells) == n_columns for row in rows): + return { + "type": "paragraph", + "content": "Rows have different number of cells ?!", + } + return { + "type": "table", + "content": { + "type": "tableContent", + "columnWidths": [1000 / n_columns for _ in range(n_columns)], + "headerRows": int(block.specific.has_column_header), + "headerColumns": int(block.specific.has_row_header), + "props": { + "textColor": "default", + }, + "rows": [ + { + "cells": [ + { + "type": "tableCell", + "content": [ + { + "type": "text", + "text": convert_rich_texts(cell), + "styles": {}, + } + ], + } + for cell in row.cells + ] + } + for row in rows + ], + }, + } + + case NotionUnsupported(): + str_raw = json.dumps(block.specific.raw, indent=2) + return { + "type": "paragraph", + "content": f"This should be a {block.specific.block_type}, not yet supported in docs", + } + case _: return { - "type": "divider", + "type": "paragraph", + "content": f"This should be a {block.specific.block_type}, not yet handled by the importer", } @@ -144,24 +218,37 @@ class ImportedDocument(BaseModel): blocks: list[dict[str, Any]] = [] children: list["ImportedDocument"] = [] + def find_page(id: str, pages: list[NotionPage]): for page in all_pages: if page.id == id: return page return None + def find_block_child_page(block_id: str, all_pages: list[NotionPage]): for page in all_pages: - if isinstance(page.parent, NotionParentBlock) and page.parent.block_id == block_id: + if ( + isinstance(page.parent, NotionParentBlock) + and page.parent.block_id == block_id + ): return page return None -def convert_child_pages(session: Session, parent: NotionPage, blocks: list[NotionBlock], all_pages: list[NotionPage]) -> list[ImportedDocument]: +def convert_child_pages( + session: Session, + parent: NotionPage, + blocks: list[NotionBlock], + all_pages: list[NotionPage], +) -> list[ImportedDocument]: children = [] for page in all_pages: - if isinstance(page.parent, NotionParentPage) and page.parent.page_id == parent.id: + if ( + isinstance(page.parent, NotionParentPage) + and page.parent.page_id == parent.id + ): children.append(import_page(session, page, all_pages)) for block in blocks: @@ -169,16 +256,18 @@ def convert_child_pages(session: Session, parent: NotionPage, blocks: list[Notio continue # TODO - #parent_page = find_block_child_page(block.id, all_pages) - #if parent_page == None: + # parent_page = find_block_child_page(block.id, all_pages) + # if parent_page == None: # logger.warning(f"Cannot find parent of block {block.id}") # continue - #children.append(import_page(session, parent_page, all_pages)) + # children.append(import_page(session, parent_page, all_pages)) return children -def import_page(session: Session, page: NotionPage, all_pages: list[NotionPage]) -> ImportedDocument: +def import_page( + session: Session, page: NotionPage, all_pages: list[NotionPage] +) -> ImportedDocument: blocks = fetch_block_children(session, page.id) logger.info(f"Page {page.get_title()} (id {page.id})") logger.info(blocks) diff --git a/src/frontend/servers/y-provider/src/handlers/convertBlocksHandler.ts b/src/frontend/servers/y-provider/src/handlers/convertBlocksHandler.ts index 38af609342..05665c3b60 100644 --- a/src/frontend/servers/y-provider/src/handlers/convertBlocksHandler.ts +++ b/src/frontend/servers/y-provider/src/handlers/convertBlocksHandler.ts @@ -42,6 +42,6 @@ export const convertBlocksHandler = async ( res.status(200).json({ content }); } catch (e) { logger('conversion failed:', e); - res.status(500).json({ error: 'An error occurred' }); + res.status(500).json({ error: String(e) }); } }; From f2248ccbdbf6ca3f8319d8f2560d8b4df1172696 Mon Sep 17 00:00:00 2001 From: Clara Ni Date: Tue, 3 Jun 2025 12:22:49 +0200 Subject: [PATCH 36/63] add FRONTEND_URL to env settings --- env.d/development/common.dist | 1 + src/backend/impress/settings.py | 3 +++ 2 files changed, 4 insertions(+) diff --git a/env.d/development/common.dist b/env.d/development/common.dist index 4b1389bf44..75e7460fa7 100644 --- a/env.d/development/common.dist +++ b/env.d/development/common.dist @@ -64,3 +64,4 @@ COLLABORATION_WS_URL=ws://localhost:4444/collaboration/ws/ # Frontend FRONTEND_THEME=default +FRONTEND_URL=http://localhost:3000 diff --git a/src/backend/impress/settings.py b/src/backend/impress/settings.py index 12c12cc3aa..2bb3b6f181 100755 --- a/src/backend/impress/settings.py +++ b/src/backend/impress/settings.py @@ -415,6 +415,9 @@ class Base(Configuration): ) # Frontend + FRONTEND_URL = values.Value( + None, environ_name="FRONTEND_URL", environ_prefix=None + ) FRONTEND_THEME = values.Value( None, environ_name="FRONTEND_THEME", environ_prefix=None ) From 2f6880959a828ec83e8456daafbbe748a1d7df8e Mon Sep 17 00:00:00 2001 From: Clara Ni Date: Tue, 3 Jun 2025 12:49:28 +0200 Subject: [PATCH 37/63] Add a loading page during import --- src/backend/core/api/viewsets.py | 5 +-- .../doc-management/api/useImportNotion.tsx | 33 +++++++++++++++ .../features/service-worker/service-worker.ts | 1 + .../apps/impress/src/i18n/translations.json | 10 +++++ .../impress/src/pages/import-notion/index.tsx | 42 +++++++++++++++++++ 5 files changed, 88 insertions(+), 3 deletions(-) create mode 100644 src/frontend/apps/impress/src/features/docs/doc-management/api/useImportNotion.tsx create mode 100644 src/frontend/apps/impress/src/pages/import-notion/index.tsx diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 040d93d884..ebca6bae84 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1851,7 +1851,7 @@ def notion_import_callback(request): resp.raise_for_status() data = resp.json() request.session["notion_token"] = data["access_token"] - return redirect("/api/v1.0/notion_import/run") + return redirect(f"{settings.FRONTEND_URL}/import-notion/") def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_page_id): @@ -1898,8 +1898,7 @@ def _import_notion_root_page(imported_doc, user, imported_docs_by_page_id): _import_notion_child_page(child, obj, user, imported_docs_by_page_id) -# @drf.decorators.api_view(["POST"]) -@drf.decorators.api_view() +@drf.decorators.api_view(["POST"]) def notion_import_run(request): if "notion_token" not in request.session: raise drf.exceptions.PermissionDenied() diff --git a/src/frontend/apps/impress/src/features/docs/doc-management/api/useImportNotion.tsx b/src/frontend/apps/impress/src/features/docs/doc-management/api/useImportNotion.tsx new file mode 100644 index 0000000000..a7234e9609 --- /dev/null +++ b/src/frontend/apps/impress/src/features/docs/doc-management/api/useImportNotion.tsx @@ -0,0 +1,33 @@ +import { useMutation, useQueryClient } from '@tanstack/react-query'; +import { useRouter } from 'next/navigation'; + +import { APIError, errorCauses, fetchAPI } from '@/api'; + +import { KEY_LIST_DOC } from './useDocs'; + +export const importNotion = async (): Promise => { + const response = await fetchAPI('notion_import/run', { + method: 'POST', + }); + + if (!response.ok) { + throw new APIError( + 'Failed to import the Notion', + await errorCauses(response), + ); + } +}; + +export function useImportNotion() { + const router = useRouter(); + const queryClient = useQueryClient(); + return useMutation({ + mutationFn: importNotion, + onSuccess: () => { + void queryClient.resetQueries({ + queryKey: [KEY_LIST_DOC], + }); + router.push('/'); + }, + }); +} diff --git a/src/frontend/apps/impress/src/features/service-worker/service-worker.ts b/src/frontend/apps/impress/src/features/service-worker/service-worker.ts index b4db83f6d2..42772df3db 100644 --- a/src/frontend/apps/impress/src/features/service-worker/service-worker.ts +++ b/src/frontend/apps/impress/src/features/service-worker/service-worker.ts @@ -112,6 +112,7 @@ const precacheResources = [ '/accessibility/', '/legal-notice/', '/personal-data-cookies/', + '/import-notion', FALLBACK.offline, FALLBACK.images, FALLBACK.docs, diff --git a/src/frontend/apps/impress/src/i18n/translations.json b/src/frontend/apps/impress/src/i18n/translations.json index fb906dea0d..1e04f59a7c 100644 --- a/src/frontend/apps/impress/src/i18n/translations.json +++ b/src/frontend/apps/impress/src/i18n/translations.json @@ -166,6 +166,7 @@ "No text selected": "Kein Text ausgewählt", "No versions": "Keine Versionen", "Nothing exceptional, no special privileges related to a .gouv.fr.": "Nichts Außergewöhnliches, keine besonderen Privilegien im Zusammenhang mit .gouv.fr.", + "Notion import in progress...": "Notion-Import in Arbeit...", "OK": "OK", "Offline ?!": "Offline?!", "Only invited people can access": "Nur eingeladene Personen haben Zugriff", @@ -182,6 +183,7 @@ "Pin document icon": "Pinne das Dokumentenlogo an", "Pinned documents": "Angepinnte Dokumente", "Please download it only if it comes from a trusted source.": "Bitte laden Sie es nur herunter, wenn es von einer vertrauenswürdigen Quelle stammt.", + "Please stay on this page and be patient": "Bitte bleiben Sie auf dieser Seite und haben Sie Geduld", "Private": "Privat", "Proconnect Login": "Proconnect-Anmeldung", "Public": "Öffentlich", @@ -399,6 +401,7 @@ "No text selected": "No hay texto seleccionado", "No versions": "No hay versiones", "Nothing exceptional, no special privileges related to a .gouv.fr.": "Nada excepcional, no hay privilegios especiales relacionados con un .gouv.fr.", + "Notion import in progress...": "Importación de Notion en curso...", "OK": "Ok", "Offline ?!": "¿¡Sin conexión!?", "Only invited people can access": "Solo las personas invitadas pueden acceder", @@ -415,6 +418,7 @@ "Pin document icon": "Icono para marcar el documento como favorito", "Pinned documents": "Documentos favoritos", "Please download it only if it comes from a trusted source.": "Por favor, descárguelo solo si viene de una fuente de confianza.", + "Please stay on this page and be patient": "Rimanete su questa pagina e siate pazienti", "Private": "Privado", "Proconnect Login": "Iniciar sesión ProConnect", "Public": "Público", @@ -624,6 +628,7 @@ "No text selected": "Aucun texte sélectionné", "No versions": "Aucune version", "Nothing exceptional, no special privileges related to a .gouv.fr.": "Rien d'exceptionnel, pas de privilèges spéciaux liés à un .gouv.fr.", + "Notion import in progress...": "Import Notion en cours...", "OK": "OK", "Offline ?!": "Hors-ligne ?!", "Only invited people can access": "Seules les personnes invitées peuvent accéder", @@ -640,6 +645,7 @@ "Pin document icon": "Icône épingler un document", "Pinned documents": "Documents épinglés", "Please download it only if it comes from a trusted source.": "Veuillez le télécharger uniquement s'il provient d'une source fiable.", + "Please stay on this page and be patient": "Merci de rester sur cette page et de patienter un peu", "Private": "Privé", "Proconnect Login": "Login Proconnect", "Public": "Public", @@ -828,6 +834,7 @@ "No text selected": "Non è stato selezionato nessun testo", "No versions": "Nessuna versione", "Nothing exceptional, no special privileges related to a .gouv.fr.": "Niente di eccezionale, nessun privilegio speciale legato a un .gouv.fr.", + "Notion import in progress...": "Importazione di nozioni in corso...", "OK": "OK", "Offline ?!": "Offline ?!", "Only invited people can access": "Solo le persone invitate possono accedere", @@ -844,6 +851,7 @@ "Pin document icon": "Icona \"fissa documento\"", "Pinned documents": "Documenti fissati", "Please download it only if it comes from a trusted source.": "Per favore scaricalo solo se proviene da una fonte attendibile", + "Please stay on this page and be patient": "Rimanete su questa pagina e siate pazienti", "Private": "Privato", "Public": "Pubblico", "Public document": "Documento pubblico", @@ -1033,6 +1041,7 @@ "No text selected": "Geen tekst geselecteerd", "No versions": "Geen versies", "Nothing exceptional, no special privileges related to a .gouv.fr.": "Niets uitzonderlijk, geen speciale privileges gerelateerd aan een .gouv.fr.", + "Notion import in progress...": "Notion import bezig...", "OK": "Ok", "Offline ?!": "Offline ?!", "Only invited people can access": "Alleen uitgenodigde gebruikers hebben toegang", @@ -1049,6 +1058,7 @@ "Pin document icon": "Document icoon vastzetten", "Pinned documents": "Vastgepinde documenten", "Please download it only if it comes from a trusted source.": "Alleen downloaden als het van een vertrouwde bron komt.", + "Please stay on this page and be patient": "Blijf op deze pagina en heb geduld", "Private": "Privé", "Proconnect Login": "Login", "Public": "Publiek", diff --git a/src/frontend/apps/impress/src/pages/import-notion/index.tsx b/src/frontend/apps/impress/src/pages/import-notion/index.tsx new file mode 100644 index 0000000000..098cb30684 --- /dev/null +++ b/src/frontend/apps/impress/src/pages/import-notion/index.tsx @@ -0,0 +1,42 @@ +import { Loader } from '@openfun/cunningham-react'; +import { ReactElement, useEffect } from 'react'; +import { useTranslation } from 'react-i18next'; + +import { Box, Text } from '@/components'; +import { useImportNotion } from '@/features/docs/doc-management/api/useImportNotion'; +import { MainLayout } from '@/layouts'; +import { NextPageWithLayout } from '@/types/next'; + +const Page: NextPageWithLayout = () => { + const { t } = useTranslation(); + + const { mutate: importNotion } = useImportNotion(); + + useEffect(() => { + importNotion(); + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + return ( + + + {t('Notion import in progress...')} + + + {t('Please stay on this page and be patient')} + + + + ); +}; + +Page.getLayout = function getLayout(page: ReactElement) { + return {page}; +}; + +export default Page; From a9ed9170cb5eb42dd65e886fb69574ed20d46171 Mon Sep 17 00:00:00 2001 From: Nicolas Ritouet Date: Tue, 3 Jun 2025 12:57:43 +0200 Subject: [PATCH 38/63] Ajout support Bullet list and Number list --- src/backend/core/services/notion_import.py | 30 ++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 5ce4d5741a..3a6cc74c14 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -19,6 +19,8 @@ NotionTable, NotionTableRow, NotionUnsupported, + NotionBulletedListItem, + NotionNumberedListItem, ) from ..notion_schemas.notion_page import ( NotionPage, @@ -41,11 +43,19 @@ def build_notion_session(token: str) -> Session: def search_notion(session: Session, start_cursor: str) -> dict[str, Any]: - req_data = {} + req_data = { + "filter": { + "value": "page", + "property": "object", + }, + } if start_cursor: req_data = { "start_cursor": start_cursor, - "value": "page", + "filter": { + "value": "page", + "property": "object", + }, } response = session.post( @@ -189,6 +199,22 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: ], }, } + case NotionBulletedListItem(): + content = "" + for rich_text in block.specific.rich_text: + content += rich_text.plain_text + return { + "type": "bulletListItem", + "content": content, + } + case NotionNumberedListItem(): + content = "" + for rich_text in block.specific.rich_text: + content += rich_text.plain_text + return { + "type": "numberedListItem", + "content": content, + } case NotionUnsupported(): str_raw = json.dumps(block.specific.raw, indent=2) From 9284879db28349124fc4726582ed9d4c3cac606a Mon Sep 17 00:00:00 2001 From: Thibault Guisnet Date: Tue, 3 Jun 2025 13:50:19 +0200 Subject: [PATCH 39/63] add format text --- src/backend/core/services/notion_import.py | 40 +++++++++++++++++++--- 1 file changed, 36 insertions(+), 4 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 3a6cc74c14..43b4d65f1a 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -28,7 +28,7 @@ NotionParentPage, NotionParentWorkspace, ) -from ..notion_schemas.notion_rich_text import NotionRichText +from ..notion_schemas.notion_rich_text import NotionRichText, NotionRichTextAnnotation logger = logging.getLogger(__name__) @@ -125,16 +125,27 @@ def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: return blocks -def convert_rich_texts(rich_texts: list[NotionRichText]) -> str: - return "".join(rich_text.plain_text for rich_text in rich_texts) +def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]: + content = [] + for rich_text in rich_texts: + stylestab = convert_annotations(rich_text.annotations) + content.append( + { + "type" : "text", + "text" : rich_text.plain_text, + "styles" : stylestab, + } + ) + return content def convert_block(block: NotionBlock) -> dict[str, Any] | None: match block.specific: case NotionParagraph(): + content = convert_rich_texts(block.specific.rich_text) return { "type": "paragraph", - "content": convert_rich_texts(block.specific.rich_text), + "content": content, } case NotionHeading1() | NotionHeading2() | NotionHeading3(): return { @@ -227,6 +238,27 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: "type": "paragraph", "content": f"This should be a {block.specific.block_type}, not yet handled by the importer", } + + +def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]: + res = {} + if annotations.bold: + res["bold"] = "true" + if annotations.italic: + res["italic"] = "true" + if annotations.underline: + res["underline"] = "true" + if annotations.strikethrough: + res["strike"] = "true" + if annotations.color: + if '_' in str(annotations.color): + tmp = str(annotations.color) + res["backgroundColor"] = tmp[:tmp.rfind("_")].lower() + else: + res["textColor"] = str(annotations.color).lower() + return res + + def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]: From 41f44bed96249ad73f392138ee1d996eb0f90b45 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 05:16:00 +0200 Subject: [PATCH 40/63] tidy --- .../core/notion_schemas/notion_file.py | 15 +++-- .../core/notion_schemas/notion_page.py | 16 ++--- src/backend/core/services/notion_import.py | 67 ++++++------------- 3 files changed, 36 insertions(+), 62 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_file.py b/src/backend/core/notion_schemas/notion_file.py index 59b4a9737b..7b1a11f397 100644 --- a/src/backend/core/notion_schemas/notion_file.py +++ b/src/backend/core/notion_schemas/notion_file.py @@ -3,26 +3,29 @@ from pydantic import BaseModel, Discriminator + class NotionFileType(StrEnum): HOSTED = "file" UPLOAD = "file_upload" EXTERNAL = "external" + class NotionFileHosted(BaseModel): type: Literal[NotionFileType.HOSTED] = NotionFileType.HOSTED - file: dict # TODO + file: dict # TODO + class NotionFileUpload(BaseModel): type: Literal[NotionFileType.UPLOAD] = NotionFileType.UPLOAD - file_upload: dict # TODO + file_upload: dict # TODO + class NotionFileExternal(BaseModel): type: Literal[NotionFileType.EXTERNAL] = NotionFileType.EXTERNAL - external: dict # TODO + external: dict # TODO + NotionFile = Annotated[ - NotionFileHosted - | NotionFileUpload - | NotionFileExternal, + NotionFileHosted | NotionFileUpload | NotionFileExternal, Discriminator(discriminator="type"), ] diff --git a/src/backend/core/notion_schemas/notion_page.py b/src/backend/core/notion_schemas/notion_page.py index 2aa0b0e58d..b014b34234 100644 --- a/src/backend/core/notion_schemas/notion_page.py +++ b/src/backend/core/notion_schemas/notion_page.py @@ -1,11 +1,7 @@ -from datetime import datetime from enum import StrEnum -from typing import Annotated, Any, Literal +from typing import Annotated, Literal -from pydantic import BaseModel, Discriminator, Field, ValidationError, model_validator - -from .notion_rich_text import NotionRichText -from .notion_file import NotionFile +from pydantic import BaseModel, Discriminator class NotionParentType(StrEnum): @@ -35,10 +31,7 @@ class NotionParentBlock(BaseModel): NotionParent = Annotated[ - NotionParentDatabase - | NotionParentPage - | NotionParentWorkspace - | NotionParentBlock, + NotionParentDatabase | NotionParentPage | NotionParentWorkspace | NotionParentBlock, Discriminator(discriminator="type"), ] @@ -60,5 +53,6 @@ def get_title(self) -> str | None: if title_property is None: return None - rich_text = title_property["title"][0] # This could be parsed using NotionRichText + # This could be parsed using NotionRichText + rich_text = title_property["title"][0] return rich_text["plain_text"] diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 43b4d65f1a..448c6d122a 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -1,26 +1,23 @@ import json import logging -from dataclasses import dataclass -from enum import Enum from typing import Any -import requests from pydantic import BaseModel, TypeAdapter from requests import Session from ..notion_schemas.notion_block import ( NotionBlock, + NotionBulletedListItem, NotionChildPage, NotionDivider, NotionHeading1, NotionHeading2, NotionHeading3, + NotionNumberedListItem, NotionParagraph, NotionTable, NotionTableRow, NotionUnsupported, - NotionBulletedListItem, - NotionNumberedListItem, ) from ..notion_schemas.notion_page import ( NotionPage, @@ -44,10 +41,10 @@ def build_notion_session(token: str) -> Session: def search_notion(session: Session, start_cursor: str) -> dict[str, Any]: req_data = { - "filter": { - "value": "page", - "property": "object", - }, + "filter": { + "value": "page", + "property": "object", + }, } if start_cursor: req_data = { @@ -131,9 +128,9 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]] stylestab = convert_annotations(rich_text.annotations) content.append( { - "type" : "text", - "text" : rich_text.plain_text, - "styles" : stylestab, + "type": "text", + "text": rich_text.plain_text, + "styles": stylestab, } ) return content @@ -184,24 +181,20 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: "type": "table", "content": { "type": "tableContent", - "columnWidths": [1000 / n_columns for _ in range(n_columns)], + "columnWidths": [ + 1000 / n_columns for _ in range(n_columns) + ], # TODO "headerRows": int(block.specific.has_column_header), "headerColumns": int(block.specific.has_row_header), "props": { - "textColor": "default", + "textColor": "default", # TODO }, "rows": [ { "cells": [ { "type": "tableCell", - "content": [ - { - "type": "text", - "text": convert_rich_texts(cell), - "styles": {}, - } - ], + "content": convert_rich_texts(cell), } for cell in row.cells ] @@ -211,20 +204,14 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: }, } case NotionBulletedListItem(): - content = "" - for rich_text in block.specific.rich_text: - content += rich_text.plain_text return { "type": "bulletListItem", - "content": content, + "content": convert_rich_texts(block.specific.rich_text), } case NotionNumberedListItem(): - content = "" - for rich_text in block.specific.rich_text: - content += rich_text.plain_text return { "type": "numberedListItem", - "content": content, + "content": convert_rich_texts(block.specific.rich_text), } case NotionUnsupported(): @@ -238,7 +225,7 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: "type": "paragraph", "content": f"This should be a {block.specific.block_type}, not yet handled by the importer", } - + def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]: res = {} @@ -250,15 +237,12 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str] res["underline"] = "true" if annotations.strikethrough: res["strike"] = "true" - if annotations.color: - if '_' in str(annotations.color): - tmp = str(annotations.color) - res["backgroundColor"] = tmp[:tmp.rfind("_")].lower() - else: - res["textColor"] = str(annotations.color).lower() - return res - + if "_" in annotations.color: + res["backgroundColor"] = annotations.color.split("_")[0].lower() + else: + res["textColor"] = annotations.color.lower() + return res def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]: @@ -277,13 +261,6 @@ class ImportedDocument(BaseModel): children: list["ImportedDocument"] = [] -def find_page(id: str, pages: list[NotionPage]): - for page in all_pages: - if page.id == id: - return page - return None - - def find_block_child_page(block_id: str, all_pages: list[NotionPage]): for page in all_pages: if ( From 10b85ecad3499269fcac7b9ced424274e6266e5d Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 05:31:26 +0200 Subject: [PATCH 41/63] notion-import: handle sub list items --- src/backend/core/notion_schemas/notion_block.py | 13 ++++++++++++- src/backend/core/services/notion_import.py | 2 ++ 2 files changed, 14 insertions(+), 1 deletion(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index bbc3caaa09..3f265ad79f 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -5,8 +5,8 @@ from pydantic import BaseModel, Discriminator, Field, ValidationError, model_validator from .notion_color import NotionColor -from .notion_rich_text import NotionRichText from .notion_file import NotionFile +from .notion_rich_text import NotionRichText """Usage: NotionBlock.model_validate(response.json())""" @@ -125,6 +125,16 @@ class NotionNumberedListItem(BaseModel): children: list["NotionBlock"] = Field(default_factory=list) +class NotionToDo(BaseModel): + """https://developers.notion.com/reference/block#to-do""" + + block_type: Literal[NotionBlockType.TO_DO] = NotionBlockType.TO_DO + rich_text: list[NotionRichText] + checked: bool + color: NotionColor + children: list["NotionBlock"] = Field(default_factory=list) + + class NotionCode(BaseModel): """https://developers.notion.com/reference/block#code""" @@ -235,6 +245,7 @@ def put_all_in_raw(cls, data: Any) -> Any: | NotionParagraph | NotionNumberedListItem | NotionBulletedListItem + | NotionToDo | NotionCode | NotionDivider | NotionEmbed diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 448c6d122a..fc373b99ee 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -207,11 +207,13 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: return { "type": "bulletListItem", "content": convert_rich_texts(block.specific.rich_text), + "children": convert_block_list(block.children), } case NotionNumberedListItem(): return { "type": "numberedListItem", "content": convert_rich_texts(block.specific.rich_text), + "children": convert_block_list(block.children), } case NotionUnsupported(): From 36fbf65d122586fe56fcb5662fb53c8c5be039f0 Mon Sep 17 00:00:00 2001 From: Clara Ni Date: Tue, 3 Jun 2025 15:07:10 +0200 Subject: [PATCH 42/63] convert_block returns now list of dict --- src/backend/core/services/notion_import.py | 76 ++++++++++++---------- 1 file changed, 42 insertions(+), 34 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index fc373b99ee..416e6ce5e6 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -136,22 +136,26 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]] return content -def convert_block(block: NotionBlock) -> dict[str, Any] | None: +def convert_block(block: NotionBlock) -> list[dict[str, Any]] | None: match block.specific: case NotionParagraph(): content = convert_rich_texts(block.specific.rich_text) - return { - "type": "paragraph", - "content": content, - } + return [ + { + "type": "paragraph", + "content": content, + } + ] case NotionHeading1() | NotionHeading2() | NotionHeading3(): - return { - "type": "heading", - "content": convert_rich_texts(block.specific.rich_text), - "level": block.specific.block_type.value.split("_")[ - -1 - ], # e.g., "1", "2", or "3" - } + return [ + { + "type": "heading", + "content": convert_rich_texts(block.specific.rich_text), + "level": block.specific.block_type.value.split("_")[ + -1 + ], # e.g., "1", "2", or "3" + } + ] # case NotionDivider(): # return { # "type": "divider", @@ -159,25 +163,27 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: case NotionTable(): rows: list[NotionTableRow] = [child.specific for child in block.children] # type: ignore # I don't know how to assert properly if len(rows) == 0: - return { - "type": "paragraph", - "content": "Empty table ?!", - } + return [ + { + "type": "paragraph", + "content": "Empty table ?!", + } + ] n_columns = len( rows[0].cells ) # I'll assume all rows have the same number of cells if n_columns == 0: - return { + return [{ "type": "paragraph", "content": "Empty row ?!", - } + }] if not all(len(row.cells) == n_columns for row in rows): - return { + return [{ "type": "paragraph", "content": "Rows have different number of cells ?!", - } - return { + }] + return [{ "type": "table", "content": { "type": "tableContent", @@ -202,31 +208,33 @@ def convert_block(block: NotionBlock) -> dict[str, Any] | None: for row in rows ], }, - } + }] case NotionBulletedListItem(): - return { + return [{ "type": "bulletListItem", "content": convert_rich_texts(block.specific.rich_text), "children": convert_block_list(block.children), - } + }] case NotionNumberedListItem(): - return { + return [{ "type": "numberedListItem", "content": convert_rich_texts(block.specific.rich_text), "children": convert_block_list(block.children), - } + }] case NotionUnsupported(): str_raw = json.dumps(block.specific.raw, indent=2) - return { - "type": "paragraph", - "content": f"This should be a {block.specific.block_type}, not yet supported in docs", - } + return [ + { + "type": "paragraph", + "content": f"This should be a {block.specific.block_type}, not yet supported in docs", + } + ] case _: - return { + return [{ "type": "paragraph", "content": f"This should be a {block.specific.block_type}, not yet handled by the importer", - } + }] def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]: @@ -251,9 +259,9 @@ def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]: converted_blocks = [] for block in blocks: converted_block = convert_block(block) - if converted_block == None: + if len(converted_block) == 0: continue - converted_blocks.append(converted_block) + converted_blocks.extend(converted_block) return converted_blocks From 0c815f2de0c4d12cb251434f9a459acb2ff7d272 Mon Sep 17 00:00:00 2001 From: Clara Ni Date: Tue, 3 Jun 2025 15:07:58 +0200 Subject: [PATCH 43/63] handle columns and columns list --- src/backend/core/notion_schemas/notion_block.py | 14 ++++++++++++++ src/backend/core/services/notion_import.py | 10 ++++++++++ 2 files changed, 24 insertions(+) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 3f265ad79f..4ac1f67d01 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -212,6 +212,18 @@ class NotionTableRow(BaseModel): cells: list[list[NotionRichText]] # Each cell is a list of rich text objects +class NotionColumnList(BaseModel): + """https://developers.notion.com/reference/block#column-list-and-column""" + + block_type: Literal[NotionBlockType.COLUMN_LIST] = NotionBlockType.COLUMN_LIST + + +class NotionColumn(BaseModel): + """https://developers.notion.com/reference/block#column-list-and-column""" + + block_type: Literal[NotionBlockType.COLUMN] = NotionBlockType.COLUMN + + class NotionChildPage(BaseModel): """https://developers.notion.com/reference/block#child-page @@ -247,6 +259,8 @@ def put_all_in_raw(cls, data: Any) -> Any: | NotionBulletedListItem | NotionToDo | NotionCode + | NotionColumn + | NotionColumnList | NotionDivider | NotionEmbed | NotionFile diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 416e6ce5e6..581d6e6de6 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -9,6 +9,8 @@ NotionBlock, NotionBulletedListItem, NotionChildPage, + NotionColumn, + NotionColumnList, NotionDivider, NotionHeading1, NotionHeading2, @@ -138,6 +140,14 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]] def convert_block(block: NotionBlock) -> list[dict[str, Any]] | None: match block.specific: + case NotionColumnList(): + columns_content = [] + for column in block.children: + columns_content.extend(convert_block(column)) + return columns_content + case NotionColumn(): + return [convert_block(child_content)[0] for child_content in block.children] + case NotionParagraph(): content = convert_rich_texts(block.specific.rich_text) return [ From 15c3ca8b1ee4303a0b051a3a7f268ccfdff02ff1 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 05:44:38 +0200 Subject: [PATCH 44/63] notion-import: handle notion todos --- src/backend/core/services/notion_import.py | 123 +++++++++++---------- 1 file changed, 67 insertions(+), 56 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 581d6e6de6..60234e224a 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -19,6 +19,7 @@ NotionParagraph, NotionTable, NotionTableRow, + NotionToDo, NotionUnsupported, ) from ..notion_schemas.notion_page import ( @@ -138,7 +139,7 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]] return content -def convert_block(block: NotionBlock) -> list[dict[str, Any]] | None: +def convert_block(block: NotionBlock) -> list[dict[str, Any]]: match block.specific: case NotionColumnList(): columns_content = [] @@ -167,9 +168,7 @@ def convert_block(block: NotionBlock) -> list[dict[str, Any]] | None: } ] # case NotionDivider(): - # return { - # "type": "divider", - # } + # return {"type": "divider", "properties": {}} case NotionTable(): rows: list[NotionTableRow] = [child.specific for child in block.children] # type: ignore # I don't know how to assert properly if len(rows) == 0: @@ -184,54 +183,67 @@ def convert_block(block: NotionBlock) -> list[dict[str, Any]] | None: rows[0].cells ) # I'll assume all rows have the same number of cells if n_columns == 0: - return [{ - "type": "paragraph", - "content": "Empty row ?!", - }] + return [{"type": "paragraph", "content": "Empty row ?!"}] if not all(len(row.cells) == n_columns for row in rows): - return [{ - "type": "paragraph", - "content": "Rows have different number of cells ?!", - }] - return [{ - "type": "table", - "content": { - "type": "tableContent", - "columnWidths": [ - 1000 / n_columns for _ in range(n_columns) - ], # TODO - "headerRows": int(block.specific.has_column_header), - "headerColumns": int(block.specific.has_row_header), - "props": { - "textColor": "default", # TODO + return [ + { + "type": "paragraph", + "content": "Rows have different number of cells ?!", + } + ] + return [ + { + "type": "table", + "content": { + "type": "tableContent", + "columnWidths": [ + 1000 / n_columns for _ in range(n_columns) + ], # TODO + "headerRows": int(block.specific.has_column_header), + "headerColumns": int(block.specific.has_row_header), + "props": { + "textColor": "default", # TODO + }, + "rows": [ + { + "cells": [ + { + "type": "tableCell", + "content": convert_rich_texts(cell), + } + for cell in row.cells + ] + } + for row in rows + ], }, - "rows": [ - { - "cells": [ - { - "type": "tableCell", - "content": convert_rich_texts(cell), - } - for cell in row.cells - ] - } - for row in rows - ], - }, - }] + } + ] case NotionBulletedListItem(): - return [{ - "type": "bulletListItem", - "content": convert_rich_texts(block.specific.rich_text), - "children": convert_block_list(block.children), - }] + return [ + { + "type": "bulletListItem", + "content": convert_rich_texts(block.specific.rich_text), + "children": convert_block_list(block.children), + } + ] case NotionNumberedListItem(): - return [{ - "type": "numberedListItem", - "content": convert_rich_texts(block.specific.rich_text), - "children": convert_block_list(block.children), - }] - + return [ + { + "type": "numberedListItem", + "content": convert_rich_texts(block.specific.rich_text), + "children": convert_block_list(block.children), + } + ] + case NotionToDo(): + return [ + { + "type": "checkListItem", + "content": convert_rich_texts(block.specific.rich_text), + "checked": block.specific.checked, + "children": convert_block_list(block.children), + } + ] case NotionUnsupported(): str_raw = json.dumps(block.specific.raw, indent=2) return [ @@ -241,10 +253,12 @@ def convert_block(block: NotionBlock) -> list[dict[str, Any]] | None: } ] case _: - return [{ - "type": "paragraph", - "content": f"This should be a {block.specific.block_type}, not yet handled by the importer", - }] + return [ + { + "type": "paragraph", + "content": f"This should be a {block.specific.block_type}, not yet handled by the importer", + } + ] def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str]: @@ -268,10 +282,7 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str] def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]: converted_blocks = [] for block in blocks: - converted_block = convert_block(block) - if len(converted_block) == 0: - continue - converted_blocks.extend(converted_block) + converted_blocks.extend(convert_block(block)) return converted_blocks From 578409e34ed0c6cc0488a49973a440a57a165de8 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 05:51:22 +0200 Subject: [PATCH 45/63] fixup --- src/backend/core/services/notion_import.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 60234e224a..e46c244177 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -245,12 +245,15 @@ def convert_block(block: NotionBlock) -> list[dict[str, Any]]: } ] case NotionUnsupported(): - str_raw = json.dumps(block.specific.raw, indent=2) return [ { "type": "paragraph", "content": f"This should be a {block.specific.block_type}, not yet supported in docs", - } + }, + # { + # "type": "quote", + # "content": json.dumps(block.specific.raw, indent=2), + # }, ] case _: return [ From 3c938ae6f7e49ece12cd6fdcc6df7cb96baf514a Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 15:21:58 +0200 Subject: [PATCH 46/63] Handle uploaded images --- src/backend/core/api/viewsets.py | 32 +++++++++++--- .../core/notion_schemas/notion_block.py | 21 +++++---- src/backend/core/services/notion_import.py | 44 +++++++++++++++++-- 3 files changed, 75 insertions(+), 22 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index ebca6bae84..79594694cf 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1854,13 +1854,30 @@ def notion_import_callback(request): return redirect(f"{settings.FRONTEND_URL}/import-notion/") -def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_page_id): - document_content = YdocConverter().convert_blocks(imported_doc.blocks) +def _import_notion_doc_content(imported_doc, obj, user): + for att in imported_doc.attachments: + extra_args = { + "Metadata": { + "owner": str(user.id), + "status": enums.DocumentAttachmentStatus.READY, # TODO + }, + } + file_id = uuid.uuid4() + key = f"{obj.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.raw" + with requests.get(att.file.file["url"], stream=True) as resp: + default_storage.connection.meta.client.upload_fileobj( + resp.raw, default_storage.bucket_name, key + ) + obj.attachments.append(key) + att.block["props"]["url"] = f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" + + obj.content = YdocConverter().convert_blocks(imported_doc.blocks) + obj.save() +def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_page_id): obj = parent_doc.add_child( creator=user, title=imported_doc.page.get_title() or "J'aime les carottes", - content=document_content, ) models.DocumentAccess.objects.create( @@ -1869,6 +1886,8 @@ def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_p role=models.RoleChoices.OWNER, ) + _import_notion_doc_content(imported_doc, obj, user) + imported_docs_by_page_id[imported_doc.page.id] = obj for child in imported_doc.children: @@ -1876,14 +1895,11 @@ def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_p def _import_notion_root_page(imported_doc, user, imported_docs_by_page_id): - document_content = YdocConverter().convert_blocks(imported_doc.blocks) - obj = models.Document.add_root( depth=1, creator=user, title=imported_doc.page.get_title() or "J'aime les courgettes", link_reach=models.LinkReachChoices.RESTRICTED, - content=document_content, ) models.DocumentAccess.objects.create( @@ -1892,13 +1908,15 @@ def _import_notion_root_page(imported_doc, user, imported_docs_by_page_id): role=models.RoleChoices.OWNER, ) + _import_notion_doc_content(imported_doc, obj, user) + imported_docs_by_page_id[imported_doc.page.id] = obj for child in imported_doc.children: _import_notion_child_page(child, obj, user, imported_docs_by_page_id) -@drf.decorators.api_view(["POST"]) +@drf.decorators.api_view(["GET", "POST"]) # TODO: drop GET (used for testing) def notion_import_run(request): if "notion_token" not in request.session: raise drf.exceptions.PermissionDenied() diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 4ac1f67d01..30b710ae8b 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -157,20 +157,12 @@ class NotionEmbed(BaseModel): url: str -class NotionFileType(StrEnum): - FILE = "file" - EXTERNAL = "external" - FILE_UPLOAD = "file_upload" - - -class NotionFile(BaseModel): +class NotionBlockFile(BaseModel): # FIXME: this is actually another occurrence of type discriminating """https://developers.notion.com/reference/block#file""" block_type: Literal[NotionBlockType.FILE] = NotionBlockType.FILE - caption: list[NotionRichText] - type: NotionFileType - ... + # TODO: NotionFile class NotionImage(BaseModel): @@ -179,6 +171,13 @@ class NotionImage(BaseModel): block_type: Literal[NotionBlockType.IMAGE] = NotionBlockType.IMAGE file: NotionFile + @model_validator(mode="before") + @classmethod + def move_type_inward_and_rename(cls, data: Any) -> Any: + if not isinstance(data, dict): + return data + return { "block_type": "image", "file": data } + class NotionVideo(BaseModel): """https://developers.notion.com/reference/block#video""" @@ -263,7 +262,7 @@ def put_all_in_raw(cls, data: Any) -> Any: | NotionColumnList | NotionDivider | NotionEmbed - | NotionFile + | NotionBlockFile | NotionImage | NotionVideo | NotionLinkPreview diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index e46c244177..a85539e589 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -16,6 +16,7 @@ NotionHeading2, NotionHeading3, NotionNumberedListItem, + NotionImage, NotionParagraph, NotionTable, NotionTableRow, @@ -28,7 +29,9 @@ NotionParentPage, NotionParentWorkspace, ) +from ..notion_schemas.notion_page import NotionPage, NotionParentWorkspace, NotionParentBlock, NotionParentPage from ..notion_schemas.notion_rich_text import NotionRichText, NotionRichTextAnnotation +from ..notion_schemas.notion_file import NotionFileHosted, NotionFileExternal logger = logging.getLogger(__name__) @@ -139,7 +142,34 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]] return content -def convert_block(block: NotionBlock) -> list[dict[str, Any]]: +class ImportedAttachment(BaseModel): + block: Any + file: NotionFileHosted + + +def convert_image(image: NotionImage, attachments: list[ImportedAttachment]): + # TODO: NotionFileUpload + match image.file: + case NotionFileExternal(): + return [{ + "type": "image", + "props": { + "url": image.file.external["url"], + }, + }] + case NotionFileHosted(): + block = { + "type": "image", + "props": { + "url": "about:blank", # populated later on + }, + } + attachments.append(ImportedAttachment(block=block, file=image.file)) + + return [block] + + +def convert_block(block: NotionBlock, attachments: list[ImportedAttachment]) -> list[dict[str, Any]]: match block.specific: case NotionColumnList(): columns_content = [] @@ -157,6 +187,8 @@ def convert_block(block: NotionBlock) -> list[dict[str, Any]]: "content": content, } ] + case NotionImage(): + return convert_image(block.specific, attachments) case NotionHeading1() | NotionHeading2() | NotionHeading3(): return [ { @@ -282,10 +314,10 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str] return res -def convert_block_list(blocks: list[NotionBlock]) -> list[dict[str, Any]]: +def convert_block_list(blocks: list[NotionBlock], attachments: list[ImportedAttachment]) -> list[dict[str, Any]]: converted_blocks = [] for block in blocks: - converted_blocks.extend(convert_block(block)) + converted_blocks.extend(convert_block(block, attachments)) return converted_blocks @@ -293,6 +325,7 @@ class ImportedDocument(BaseModel): page: NotionPage blocks: list[dict[str, Any]] = [] children: list["ImportedDocument"] = [] + attachments: list[ImportedAttachment] = [] def find_block_child_page(block_id: str, all_pages: list[NotionPage]): @@ -340,10 +373,13 @@ def import_page( blocks = fetch_block_children(session, page.id) logger.info(f"Page {page.get_title()} (id {page.id})") logger.info(blocks) + attachments = [] + converted_blocks = convert_block_list(blocks, attachments) return ImportedDocument( page=page, - blocks=convert_block_list(blocks), + blocks=converted_blocks, children=convert_child_pages(session, page, blocks, all_pages), + attachments=attachments, ) From b3eb0ff948e4beec1c55d64b60d34dbd50a5e8cd Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 15:30:16 +0200 Subject: [PATCH 47/63] Fix missing arg in convert_block() --- src/backend/core/services/notion_import.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index a85539e589..8a107f8406 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -174,10 +174,10 @@ def convert_block(block: NotionBlock, attachments: list[ImportedAttachment]) -> case NotionColumnList(): columns_content = [] for column in block.children: - columns_content.extend(convert_block(column)) + columns_content.extend(convert_block(column, attachments)) return columns_content case NotionColumn(): - return [convert_block(child_content)[0] for child_content in block.children] + return [convert_block(child_content, attachments)[0] for child_content in block.children] case NotionParagraph(): content = convert_rich_texts(block.specific.rich_text) From 33f21ff02d74b9edb261299dd44163d02b35a5e5 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 06:06:25 +0200 Subject: [PATCH 48/63] tidy --- src/backend/core/services/notion_import.py | 40 ++++++++++++++-------- 1 file changed, 26 insertions(+), 14 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 8a107f8406..f1b823cfd4 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -15,23 +15,22 @@ NotionHeading1, NotionHeading2, NotionHeading3, - NotionNumberedListItem, NotionImage, + NotionNumberedListItem, NotionParagraph, NotionTable, NotionTableRow, NotionToDo, NotionUnsupported, ) +from ..notion_schemas.notion_file import NotionFileExternal, NotionFileHosted from ..notion_schemas.notion_page import ( NotionPage, NotionParentBlock, NotionParentPage, NotionParentWorkspace, ) -from ..notion_schemas.notion_page import NotionPage, NotionParentWorkspace, NotionParentBlock, NotionParentPage from ..notion_schemas.notion_rich_text import NotionRichText, NotionRichTextAnnotation -from ..notion_schemas.notion_file import NotionFileHosted, NotionFileExternal logger = logging.getLogger(__name__) @@ -147,29 +146,37 @@ class ImportedAttachment(BaseModel): file: NotionFileHosted -def convert_image(image: NotionImage, attachments: list[ImportedAttachment]): +def convert_image( + image: NotionImage, attachments: list[ImportedAttachment] +) -> list[dict[str, Any]]: # TODO: NotionFileUpload match image.file: case NotionFileExternal(): - return [{ - "type": "image", - "props": { - "url": image.file.external["url"], - }, - }] + return [ + { + "type": "image", + "props": { + "url": image.file.external["url"], + }, + } + ] case NotionFileHosted(): block = { "type": "image", "props": { - "url": "about:blank", # populated later on + "url": "about:blank", # populated later on }, } attachments.append(ImportedAttachment(block=block, file=image.file)) return [block] + case _: + return [{"paragraph": {"content": "Unsupported image type"}}] -def convert_block(block: NotionBlock, attachments: list[ImportedAttachment]) -> list[dict[str, Any]]: +def convert_block( + block: NotionBlock, attachments: list[ImportedAttachment] +) -> list[dict[str, Any]]: match block.specific: case NotionColumnList(): columns_content = [] @@ -177,7 +184,10 @@ def convert_block(block: NotionBlock, attachments: list[ImportedAttachment]) -> columns_content.extend(convert_block(column, attachments)) return columns_content case NotionColumn(): - return [convert_block(child_content, attachments)[0] for child_content in block.children] + return [ + convert_block(child_content, attachments)[0] + for child_content in block.children + ] case NotionParagraph(): content = convert_rich_texts(block.specific.rich_text) @@ -314,7 +324,9 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str] return res -def convert_block_list(blocks: list[NotionBlock], attachments: list[ImportedAttachment]) -> list[dict[str, Any]]: +def convert_block_list( + blocks: list[NotionBlock], attachments: list[ImportedAttachment] +) -> list[dict[str, Any]]: converted_blocks = [] for block in blocks: converted_blocks.extend(convert_block(block, attachments)) From 9337e4262d6d41c580dff156b60ea9387d85ce4e Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 15:38:48 +0200 Subject: [PATCH 49/63] Struggle update --- src/backend/core/services/notion_import.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index f1b823cfd4..e80ac3d7b4 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -369,12 +369,12 @@ def convert_child_pages( if not isinstance(block.specific, NotionChildPage): continue - # TODO - # parent_page = find_block_child_page(block.id, all_pages) - # if parent_page == None: - # logger.warning(f"Cannot find parent of block {block.id}") - # continue - # children.append(import_page(session, parent_page, all_pages)) + # TODO: doesn't work, never finds the child + child_page = find_block_child_page(block.id, all_pages) + if child_page == None: + logger.warning(f"Cannot find child page of block {block.id}") + continue + children.append(import_page(session, child_page, all_pages)) return children From 6c276c27ec598cf9b1d61a2b0ff1c2b6dd822861 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 06:09:50 +0200 Subject: [PATCH 50/63] fix --- src/backend/core/notion_schemas/notion_block.py | 11 ++++++++++- src/backend/core/services/notion_import.py | 14 +++++++++++--- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 30b710ae8b..490f2c634d 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -144,6 +144,15 @@ class NotionCode(BaseModel): language: str # Actually an enum +class NotionCallout(BaseModel): + """https://developers.notion.com/reference/block#callout""" + + block_type: Literal[NotionBlockType.CALLOUT] = NotionBlockType.CALLOUT + rich_text: list[NotionRichText] + # icon: Any # could be an emoji or an image + color: NotionColor + + class NotionDivider(BaseModel): """https://developers.notion.com/reference/block#divider""" @@ -176,7 +185,7 @@ class NotionImage(BaseModel): def move_type_inward_and_rename(cls, data: Any) -> Any: if not isinstance(data, dict): return data - return { "block_type": "image", "file": data } + return {"block_type": "image", "file": data} class NotionVideo(BaseModel): diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index e80ac3d7b4..26eaab502a 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -8,6 +8,7 @@ from ..notion_schemas.notion_block import ( NotionBlock, NotionBulletedListItem, + NotionCallout, NotionChildPage, NotionColumn, NotionColumnList, @@ -211,6 +212,13 @@ def convert_block( ] # case NotionDivider(): # return {"type": "divider", "properties": {}} + case NotionCallout(): + return [ + { + "type": "comment", + "content": convert_rich_texts(block.specific.rich_text), + } + ] case NotionTable(): rows: list[NotionTableRow] = [child.specific for child in block.children] # type: ignore # I don't know how to assert properly if len(rows) == 0: @@ -266,7 +274,7 @@ def convert_block( { "type": "bulletListItem", "content": convert_rich_texts(block.specific.rich_text), - "children": convert_block_list(block.children), + "children": convert_block_list(block.children, attachments), } ] case NotionNumberedListItem(): @@ -274,7 +282,7 @@ def convert_block( { "type": "numberedListItem", "content": convert_rich_texts(block.specific.rich_text), - "children": convert_block_list(block.children), + "children": convert_block_list(block.children, attachments), } ] case NotionToDo(): @@ -283,7 +291,7 @@ def convert_block( "type": "checkListItem", "content": convert_rich_texts(block.specific.rich_text), "checked": block.specific.checked, - "children": convert_block_list(block.children), + "children": convert_block_list(block.children, attachments), } ] case NotionUnsupported(): From ac6742d82729d0cf76955873a80650b3aef409cb Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 06:14:53 +0200 Subject: [PATCH 51/63] notion-schemas: handle callouts --- src/backend/core/notion_schemas/notion_block.py | 3 ++- src/backend/core/services/notion_import.py | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 490f2c634d..9ba1312fbd 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -277,7 +277,8 @@ def put_all_in_raw(cls, data: Any) -> Any: | NotionLinkPreview | NotionTable | NotionTableRow - | NotionChildPage, + | NotionChildPage + | NotionCallout, Discriminator(discriminator="block_type"), ] | NotionUnsupported, diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 26eaab502a..974bb22005 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -215,8 +215,11 @@ def convert_block( case NotionCallout(): return [ { - "type": "comment", + "type": "quote", "content": convert_rich_texts(block.specific.rich_text), + "props": { + "backgroundColor": "yellow", # TODO: use the callout color + }, } ] case NotionTable(): @@ -380,8 +383,8 @@ def convert_child_pages( # TODO: doesn't work, never finds the child child_page = find_block_child_page(block.id, all_pages) if child_page == None: - logger.warning(f"Cannot find child page of block {block.id}") - continue + logger.warning(f"Cannot find child page of block {block.id}") + continue children.append(import_page(session, child_page, all_pages)) return children From da02423dbf31e818d8f27cb6faf1a35447dd7015 Mon Sep 17 00:00:00 2001 From: Thibault Guisnet Date: Tue, 3 Jun 2025 15:46:51 +0200 Subject: [PATCH 52/63] add partial links --- src/backend/core/services/notion_import.py | 25 +++++++++++++++------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 974bb22005..6fd525ba4f 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -131,14 +131,23 @@ def fetch_block_children(session: Session, block_id: str) -> list[NotionBlock]: def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]]: content = [] for rich_text in rich_texts: - stylestab = convert_annotations(rich_text.annotations) - content.append( - { - "type": "text", - "text": rich_text.plain_text, - "styles": stylestab, - } - ) + if rich_text.href: + content.append( + { + "type" : "link", + "content" : rich_text.plain_text, + "href" : rich_text.href, + } + ) + else : + stylestab = convert_annotations(rich_text.annotations) + content.append( + { + "type" : "text", + "text" : rich_text.plain_text, + "styles" : stylestab, + } + ) return content From adc60295863f6756fdfe20383d58503612c722e5 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 06:32:19 +0200 Subject: [PATCH 53/63] notion-schemas: handle code blocks --- src/backend/core/services/notion_import.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 6fd525ba4f..98db9d75aa 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -10,6 +10,7 @@ NotionBulletedListItem, NotionCallout, NotionChildPage, + NotionCode, NotionColumn, NotionColumnList, NotionDivider, @@ -306,6 +307,16 @@ def convert_block( "children": convert_block_list(block.children, attachments), } ] + case NotionCode(): + return [ + { + "type": "codeBlock", + "content": "".join( + rich_text.plain_text for rich_text in block.specific.rich_text + ), + "props": {"language": block.specific.language}, + } + ] case NotionUnsupported(): return [ { From 81ef2e717041fc8682135b73c0ad4b76705eb498 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 06:51:11 +0200 Subject: [PATCH 54/63] notion-schemas: handle bookmarks --- .../core/notion_schemas/notion_block.py | 14 +++++++-- src/backend/core/services/notion_import.py | 29 ++++++++++++++----- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/src/backend/core/notion_schemas/notion_block.py b/src/backend/core/notion_schemas/notion_block.py index 9ba1312fbd..74e7ea4896 100644 --- a/src/backend/core/notion_schemas/notion_block.py +++ b/src/backend/core/notion_schemas/notion_block.py @@ -182,7 +182,7 @@ class NotionImage(BaseModel): @model_validator(mode="before") @classmethod - def move_type_inward_and_rename(cls, data: Any) -> Any: + def move_file_type_inward_and_rename(cls, data: Any) -> Any: if not isinstance(data, dict): return data return {"block_type": "image", "file": data} @@ -202,6 +202,14 @@ class NotionLinkPreview(BaseModel): url: str +class NotionBookmark(BaseModel): + """https://developers.notion.com/reference/block#bookmark""" + + block_type: Literal[NotionBlockType.BOOKMARK] = NotionBlockType.BOOKMARK + url: str + caption: list[NotionRichText] = Field(default_factory=list) + + class NotionTable(BaseModel): """https://developers.notion.com/reference/block#table @@ -278,7 +286,9 @@ def put_all_in_raw(cls, data: Any) -> Any: | NotionTable | NotionTableRow | NotionChildPage - | NotionCallout, + | NotionCallout + | NotionLinkPreview + | NotionBookmark, Discriminator(discriminator="block_type"), ] | NotionUnsupported, diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 98db9d75aa..3a32300c5e 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -7,6 +7,7 @@ from ..notion_schemas.notion_block import ( NotionBlock, + NotionBookmark, NotionBulletedListItem, NotionCallout, NotionChildPage, @@ -135,18 +136,18 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]] if rich_text.href: content.append( { - "type" : "link", - "content" : rich_text.plain_text, - "href" : rich_text.href, + "type": "link", + "content": rich_text.plain_text, + "href": rich_text.href, } ) - else : + else: stylestab = convert_annotations(rich_text.annotations) content.append( { - "type" : "text", - "text" : rich_text.plain_text, - "styles" : stylestab, + "type": "text", + "text": rich_text.plain_text, + "styles": stylestab, } ) return content @@ -317,6 +318,20 @@ def convert_block( "props": {"language": block.specific.language}, } ] + case NotionBookmark(): + caption = convert_rich_texts(block.specific.caption) or block.specific.url + return [ + { + "type": "paragraph", + "content": [ + { + "type": "link", + "content": caption, + "href": block.specific.url, + }, + ], + } + ] case NotionUnsupported(): return [ { From 70451bae1a895b1c8ad8819c004ef08b87870c49 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 06:55:06 +0200 Subject: [PATCH 55/63] notion-schemas: fix heading handling --- src/backend/core/services/notion_import.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 3a32300c5e..a3db40f929 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -216,9 +216,11 @@ def convert_block( { "type": "heading", "content": convert_rich_texts(block.specific.rich_text), - "level": block.specific.block_type.value.split("_")[ - -1 - ], # e.g., "1", "2", or "3" + "props": { + "level": block.specific.block_type.value.split("_")[ + -1 + ], # e.g., "1", "2", or "3" + }, } ] # case NotionDivider(): From 931055009726eaaa599e21e622998a6c1981a266 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 07:01:05 +0200 Subject: [PATCH 56/63] notion-schemas: fix default table width --- src/backend/core/services/notion_import.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index a3db40f929..062e5d88f3 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -257,14 +257,15 @@ def convert_block( "content": "Rows have different number of cells ?!", } ] + SEEMINGLY_DEFAULT_WIDTH = 128 return [ { "type": "table", "content": { "type": "tableContent", "columnWidths": [ - 1000 / n_columns for _ in range(n_columns) - ], # TODO + SEEMINGLY_DEFAULT_WIDTH for _ in range(n_columns) + ], "headerRows": int(block.specific.has_column_header), "headerColumns": int(block.specific.has_row_header), "props": { From e69ce24a6cdd10511bc399c43436bc1aeb4885fb Mon Sep 17 00:00:00 2001 From: Thibault Guisnet Date: Tue, 3 Jun 2025 17:26:25 +0200 Subject: [PATCH 57/63] improve links --- src/backend/core/services/notion_import.py | 25 +++++++++++----------- 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 062e5d88f3..6951621c10 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -136,23 +136,24 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]] if rich_text.href: content.append( { - "type": "link", - "content": rich_text.plain_text, - "href": rich_text.href, - } - ) - else: - stylestab = convert_annotations(rich_text.annotations) - content.append( - { - "type": "text", - "text": rich_text.plain_text, - "styles": stylestab, + "type" : "link", + "content" : [convert_rich_text(rich_text)], + "href" : rich_text.href, } ) + else : + content.append(convert_rich_text(rich_text)) return content +def convert_rich_text(rich_text: NotionRichText) -> dict[str, Any]: + return { + "type" : "text", + "text" : rich_text.plain_text, + "styles" : convert_annotations(rich_text.annotations), + } + + class ImportedAttachment(BaseModel): block: Any file: NotionFileHosted From 3d9547d6fbcf99630c56f9c635d4838e40d6646d Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 16:45:24 +0200 Subject: [PATCH 58/63] C'est le WIP maintenant --- src/backend/core/api/viewsets.py | 40 +++++++++++++++---- .../core/notion_schemas/notion_page.py | 3 ++ 2 files changed, 36 insertions(+), 7 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 79594694cf..b165a1d695 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -36,7 +36,7 @@ from core.services.ai_services import AIService from core.services.collaboration_services import CollaborationService from core.services.converter_services import YdocConverter -from core.services.notion_import import import_notion +from core.services.notion_import import build_notion_session, fetch_all_pages, import_page from core.utils import extract_attachments, filter_descendants from . import permissions, serializers, utils @@ -1916,15 +1916,41 @@ def _import_notion_root_page(imported_doc, user, imported_docs_by_page_id): _import_notion_child_page(child, obj, user, imported_docs_by_page_id) -@drf.decorators.api_view(["GET", "POST"]) # TODO: drop GET (used for testing) -def notion_import_run(request): - if "notion_token" not in request.session: - raise drf.exceptions.PermissionDenied() +def _generate_notion_progress(root_pages, page_statuses): + raw = json.dumps([{ + "title": page.get_title(), + "status": page_statuses[page.id], + } for page in root_pages]) + return f"data: {raw}\n\n" + + +def _notion_import_event_stream(request): + session = build_notion_session(request.session["notion_token"]) + all_pages = fetch_all_pages(session) + root_pages = [page for page in all_pages if page.is_root()] + + page_statuses = {} + for page in root_pages: + page_statuses[page.id] = "pending" - imported_docs = import_notion(request.session["notion_token"]) + yield _generate_notion_progress(root_pages, page_statuses) + + imported_docs = [] + for page in root_pages: + imported_docs.append(import_page(session, page, all_pages)) + page_statuses[page.id] = "fetched" + yield _generate_notion_progress(root_pages, page_statuses) imported_docs_by_page_id = {} for imported_doc in imported_docs: _import_notion_root_page(imported_doc, request.user, imported_docs_by_page_id) + page_statuses[imported_doc.page.id] = "imported" + yield _generate_notion_progress(root_pages, page_statuses) + +@drf.decorators.api_view(["GET", "POST"]) # TODO: drop GET (used for testing) +def notion_import_run(request): + if "notion_token" not in request.session: + raise drf.exceptions.PermissionDenied() - return drf.response.Response({"sava": "oui et toi ?"}) + #return drf.response.Response({"sava": "oui et toi ?"}) + return StreamingHttpResponse(_notion_import_event_stream(request), content_type='text/event-stream') diff --git a/src/backend/core/notion_schemas/notion_page.py b/src/backend/core/notion_schemas/notion_page.py index b014b34234..4d98856c58 100644 --- a/src/backend/core/notion_schemas/notion_page.py +++ b/src/backend/core/notion_schemas/notion_page.py @@ -56,3 +56,6 @@ def get_title(self) -> str | None: # This could be parsed using NotionRichText rich_text = title_property["title"][0] return rich_text["plain_text"] + + def is_root(self): + return isinstance(self.parent, NotionParentWorkspace) From 42f42fc37f16dd90bf1f5b2ef2047ee74d4c361f Mon Sep 17 00:00:00 2001 From: Simon Ser Date: Tue, 3 Jun 2025 17:03:39 +0200 Subject: [PATCH 59/63] Disable content negotiation --- src/backend/core/api/viewsets.py | 22 ++++++++++++++++------ src/backend/core/urls.py | 2 +- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index b165a1d695..323f5590c7 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -1947,10 +1947,20 @@ def _notion_import_event_stream(request): page_statuses[imported_doc.page.id] = "imported" yield _generate_notion_progress(root_pages, page_statuses) -@drf.decorators.api_view(["GET", "POST"]) # TODO: drop GET (used for testing) -def notion_import_run(request): - if "notion_token" not in request.session: - raise drf.exceptions.PermissionDenied() - #return drf.response.Response({"sava": "oui et toi ?"}) - return StreamingHttpResponse(_notion_import_event_stream(request), content_type='text/event-stream') +class IgnoreClientContentNegotiation(drf.negotiation.BaseContentNegotiation): + def select_parser(self, request, parsers): + return parsers[0] + + def select_renderer(self, request, renderers, format_suffix): + return (renderers[0], renderers[0].media_type) + +class NotionImportRunView(drf.views.APIView): + content_negotiation_class = IgnoreClientContentNegotiation + + def get(self, request, format=None): + if "notion_token" not in request.session: + raise drf.exceptions.PermissionDenied() + + #return drf.response.Response({"sava": "oui et toi ?"}) + return StreamingHttpResponse(_notion_import_event_stream(request), content_type='text/event-stream') diff --git a/src/backend/core/urls.py b/src/backend/core/urls.py index 4233cf3154..7c0f25943b 100644 --- a/src/backend/core/urls.py +++ b/src/backend/core/urls.py @@ -55,7 +55,7 @@ path("notion_import/", include([ path("redirect", viewsets.notion_import_redirect), path("callback", viewsets.notion_import_callback), - path("run", viewsets.notion_import_run), + path("run", viewsets.NotionImportRunView.as_view()), ])) ] ), From 116a7e35c52d9d0d4d321a79074b5055715b6bf8 Mon Sep 17 00:00:00 2001 From: Clara Ni Date: Tue, 3 Jun 2025 17:11:59 +0200 Subject: [PATCH 60/63] add eventSource in useImportNotion --- .../doc-management/api/useImportNotion.tsx | 85 ++++++++++++++----- .../impress/src/pages/import-notion/index.tsx | 12 ++- 2 files changed, 68 insertions(+), 29 deletions(-) diff --git a/src/frontend/apps/impress/src/features/docs/doc-management/api/useImportNotion.tsx b/src/frontend/apps/impress/src/features/docs/doc-management/api/useImportNotion.tsx index a7234e9609..e24479d0c5 100644 --- a/src/frontend/apps/impress/src/features/docs/doc-management/api/useImportNotion.tsx +++ b/src/frontend/apps/impress/src/features/docs/doc-management/api/useImportNotion.tsx @@ -1,33 +1,74 @@ -import { useMutation, useQueryClient } from '@tanstack/react-query'; import { useRouter } from 'next/navigation'; +import { useEffect, useState } from 'react'; -import { APIError, errorCauses, fetchAPI } from '@/api'; +import { baseApiUrl } from '@/api'; -import { KEY_LIST_DOC } from './useDocs'; +type ImportState = { + title: string; + status: 'pending' | 'fetched' | 'imported'; +}[]; -export const importNotion = async (): Promise => { - const response = await fetchAPI('notion_import/run', { - method: 'POST', - }); +const computeSuccessPercentage = (importState?: ImportState) => { + if (!importState) { + return 0; + } + if (!importState.length) { + return 100; + } - if (!response.ok) { - throw new APIError( - 'Failed to import the Notion', - await errorCauses(response), - ); + let fetchedFiles = 0; + let importedFiles = 0; + + for (const file of importState) { + if (file.status === 'fetched') { + fetchedFiles += 1; + } else if (file.status === 'imported') { + fetchedFiles += 1; + importedFiles += 1; + } } + + const filesNb = importState.length; + + return Math.round(((fetchedFiles + importedFiles) / (2 * filesNb)) * 100); }; export function useImportNotion() { const router = useRouter(); - const queryClient = useQueryClient(); - return useMutation({ - mutationFn: importNotion, - onSuccess: () => { - void queryClient.resetQueries({ - queryKey: [KEY_LIST_DOC], - }); - router.push('/'); - }, - }); + + const [importState, setImportState] = useState(); + + useEffect(() => { + // send the request with an Event Source + const eventSource = new EventSource( + `${baseApiUrl('1.0')}notion_import/run`, + { + withCredentials: true, + }, + ); + + eventSource.onmessage = (event) => { + console.log('hello', event.data); + const files = JSON.parse(event.data as string) as ImportState; + + // si tous les fichiers sont chargés, rediriger vers la home page + if (files.some((file) => file.status === 'imported')) { + eventSource.close(); + router.push('/'); + } + + // mettre à jour le state d'import + setImportState(files); + }; + + return () => { + eventSource.close(); + }; + // eslint-disable-next-line react-hooks/exhaustive-deps + }, []); + + return { + importState, + percentageValue: computeSuccessPercentage(importState), + }; } diff --git a/src/frontend/apps/impress/src/pages/import-notion/index.tsx b/src/frontend/apps/impress/src/pages/import-notion/index.tsx index 098cb30684..a0301b1f6e 100644 --- a/src/frontend/apps/impress/src/pages/import-notion/index.tsx +++ b/src/frontend/apps/impress/src/pages/import-notion/index.tsx @@ -1,5 +1,5 @@ import { Loader } from '@openfun/cunningham-react'; -import { ReactElement, useEffect } from 'react'; +import { ReactElement } from 'react'; import { useTranslation } from 'react-i18next'; import { Box, Text } from '@/components'; @@ -10,12 +10,7 @@ import { NextPageWithLayout } from '@/types/next'; const Page: NextPageWithLayout = () => { const { t } = useTranslation(); - const { mutate: importNotion } = useImportNotion(); - - useEffect(() => { - importNotion(); - // eslint-disable-next-line react-hooks/exhaustive-deps - }, []); + const { percentageValue } = useImportNotion(); return ( { {t('Please stay on this page and be patient')} + + {percentageValue}% + ); }; From 96e36e615106190b47e77788aebd3ef1f77942bc Mon Sep 17 00:00:00 2001 From: Clara Ni Date: Tue, 3 Jun 2025 17:40:48 +0200 Subject: [PATCH 61/63] fancy progress bar --- src/frontend/apps/impress/src/i18n/translations.json | 3 +++ .../apps/impress/src/pages/import-notion/index.tsx | 9 ++++++++- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/src/frontend/apps/impress/src/i18n/translations.json b/src/frontend/apps/impress/src/i18n/translations.json index 1e04f59a7c..9860fae69c 100644 --- a/src/frontend/apps/impress/src/i18n/translations.json +++ b/src/frontend/apps/impress/src/i18n/translations.json @@ -629,6 +629,9 @@ "No versions": "Aucune version", "Nothing exceptional, no special privileges related to a .gouv.fr.": "Rien d'exceptionnel, pas de privilèges spéciaux liés à un .gouv.fr.", "Notion import in progress...": "Import Notion en cours...", + "Notion import fetched": "🔄 Page Notion récupérée", + "Notion import imported": "✅️ Importé", + "Notion import pending": "⏸️ En attente", "OK": "OK", "Offline ?!": "Hors-ligne ?!", "Only invited people can access": "Seules les personnes invitées peuvent accéder", diff --git a/src/frontend/apps/impress/src/pages/import-notion/index.tsx b/src/frontend/apps/impress/src/pages/import-notion/index.tsx index a0301b1f6e..1685f1a8cf 100644 --- a/src/frontend/apps/impress/src/pages/import-notion/index.tsx +++ b/src/frontend/apps/impress/src/pages/import-notion/index.tsx @@ -10,7 +10,7 @@ import { NextPageWithLayout } from '@/types/next'; const Page: NextPageWithLayout = () => { const { t } = useTranslation(); - const { percentageValue } = useImportNotion(); + const { importState, percentageValue } = useImportNotion(); return ( { {percentageValue}% + + {importState?.map((page) => ( + {`${page.title} - ${t(`Notion import ${page.status}`)}`} + ))} + ); }; From b1d52cc5e6ac95ac4c0b35638b5463e986ea4e76 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 08:24:59 +0200 Subject: [PATCH 62/63] notion-import: handle child page blocks --- src/backend/core/services/notion_import.py | 159 +++++++++++++-------- 1 file changed, 102 insertions(+), 57 deletions(-) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index 6951621c10..e99b19ce08 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -2,7 +2,7 @@ import logging from typing import Any -from pydantic import BaseModel, TypeAdapter +from pydantic import BaseModel, Field, TypeAdapter from requests import Session from ..notion_schemas.notion_block import ( @@ -136,21 +136,21 @@ def convert_rich_texts(rich_texts: list[NotionRichText]) -> list[dict[str, Any]] if rich_text.href: content.append( { - "type" : "link", - "content" : [convert_rich_text(rich_text)], - "href" : rich_text.href, + "type": "link", + "content": [convert_rich_text(rich_text)], + "href": rich_text.href, # FIXME: if it was a notion link, we should convert it to a link to the document } ) - else : + else: content.append(convert_rich_text(rich_text)) return content def convert_rich_text(rich_text: NotionRichText) -> dict[str, Any]: return { - "type" : "text", - "text" : rich_text.plain_text, - "styles" : convert_annotations(rich_text.annotations), + "type": "text", + "text": rich_text.plain_text, + "styles": convert_annotations(rich_text.annotations), } @@ -159,6 +159,11 @@ class ImportedAttachment(BaseModel): file: NotionFileHosted +class ImportedChildPage(BaseModel): + child_page_block: NotionBlock + block_to_update: Any + + def convert_image( image: NotionImage, attachments: list[ImportedAttachment] ) -> list[dict[str, Any]]: @@ -188,17 +193,21 @@ def convert_image( def convert_block( - block: NotionBlock, attachments: list[ImportedAttachment] + block: NotionBlock, + attachments: list[ImportedAttachment], + child_page_blocks: list[ImportedChildPage], ) -> list[dict[str, Any]]: match block.specific: case NotionColumnList(): columns_content = [] for column in block.children: - columns_content.extend(convert_block(column, attachments)) + columns_content.extend( + convert_block(column, attachments, child_page_blocks) + ) return columns_content case NotionColumn(): return [ - convert_block(child_content, attachments)[0] + convert_block(child_content, attachments, child_page_blocks)[0] for child_content in block.children ] @@ -225,7 +234,7 @@ def convert_block( } ] # case NotionDivider(): - # return {"type": "divider", "properties": {}} + # return [{"type": "divider"}] case NotionCallout(): return [ { @@ -292,7 +301,11 @@ def convert_block( { "type": "bulletListItem", "content": convert_rich_texts(block.specific.rich_text), - "children": convert_block_list(block.children, attachments), + "children": convert_block_list( + block.children, + attachments, + child_page_blocks, + ), } ] case NotionNumberedListItem(): @@ -300,7 +313,11 @@ def convert_block( { "type": "numberedListItem", "content": convert_rich_texts(block.specific.rich_text), - "children": convert_block_list(block.children, attachments), + "children": convert_block_list( + block.children, + attachments, + child_page_blocks, + ), } ] case NotionToDo(): @@ -309,7 +326,11 @@ def convert_block( "type": "checkListItem", "content": convert_rich_texts(block.specific.rich_text), "checked": block.specific.checked, - "children": convert_block_list(block.children, attachments), + "children": convert_block_list( + block.children, + attachments, + child_page_blocks, + ), } ] case NotionCode(): @@ -336,6 +357,22 @@ def convert_block( ], } ] + case NotionChildPage(): + # TODO: convert to a link + res = { + "type": "paragraph", + "content": [ + { + "type": "link", + "content": f"Child page: {block.specific.title}", + "href": "about:blank", # populated later on + }, + ], + } + child_page_blocks.append( + ImportedChildPage(child_page_block=block, block_to_update=res) + ) + return [res] case NotionUnsupported(): return [ { @@ -375,19 +412,22 @@ def convert_annotations(annotations: NotionRichTextAnnotation) -> dict[str, str] def convert_block_list( - blocks: list[NotionBlock], attachments: list[ImportedAttachment] + blocks: list[NotionBlock], + attachments: list[ImportedAttachment], + child_page_blocks: list[ImportedChildPage], ) -> list[dict[str, Any]]: converted_blocks = [] for block in blocks: - converted_blocks.extend(convert_block(block, attachments)) + converted_blocks.extend(convert_block(block, attachments, child_page_blocks)) return converted_blocks class ImportedDocument(BaseModel): page: NotionPage - blocks: list[dict[str, Any]] = [] - children: list["ImportedDocument"] = [] - attachments: list[ImportedAttachment] = [] + blocks: list[dict[str, Any]] = Field(default_factory=list) + children: list["ImportedDocument"] = Field(default_factory=list) + attachments: list[ImportedAttachment] = Field(default_factory=list) + child_page_blocks: list[ImportedChildPage] = Field(default_factory=list) def find_block_child_page(block_id: str, all_pages: list[NotionPage]): @@ -400,48 +440,30 @@ def find_block_child_page(block_id: str, all_pages: list[NotionPage]): return None -def convert_child_pages( - session: Session, - parent: NotionPage, - blocks: list[NotionBlock], - all_pages: list[NotionPage], -) -> list[ImportedDocument]: - children = [] - - for page in all_pages: - if ( - isinstance(page.parent, NotionParentPage) - and page.parent.page_id == parent.id - ): - children.append(import_page(session, page, all_pages)) - - for block in blocks: - if not isinstance(block.specific, NotionChildPage): - continue - - # TODO: doesn't work, never finds the child - child_page = find_block_child_page(block.id, all_pages) - if child_page == None: - logger.warning(f"Cannot find child page of block {block.id}") - continue - children.append(import_page(session, child_page, all_pages)) - - return children - - def import_page( - session: Session, page: NotionPage, all_pages: list[NotionPage] + session: Session, + page: NotionPage, + child_page_blocs_ids_to_parent_page_ids: dict[str, str], ) -> ImportedDocument: blocks = fetch_block_children(session, page.id) logger.info(f"Page {page.get_title()} (id {page.id})") logger.info(blocks) - attachments = [] - converted_blocks = convert_block_list(blocks, attachments) + attachments: list[ImportedAttachment] = [] + + child_page_blocks: list[ImportedChildPage] = [] + + converted_blocks = convert_block_list(blocks, attachments, child_page_blocks) + + for child_page_block in child_page_blocks: + child_page_blocs_ids_to_parent_page_ids[ + child_page_block.child_page_block.id + ] = page.id + return ImportedDocument( page=page, blocks=converted_blocks, - children=convert_child_pages(session, page, blocks, all_pages), attachments=attachments, + child_page_blocks=child_page_blocks, ) @@ -449,8 +471,31 @@ def import_notion(token: str) -> list[ImportedDocument]: """Recursively imports all Notion pages and blocks accessible using the given token.""" session = build_notion_session(token) all_pages = fetch_all_pages(session) - docs = [] + docs_by_page_id: dict[str, ImportedDocument] = {} + child_page_blocs_ids_to_parent_page_ids: dict[str, str] = {} for page in all_pages: - if isinstance(page.parent, NotionParentWorkspace): - docs.append(import_page(session, page, all_pages)) - return docs + docs_by_page_id[page.id] = import_page( + session, page, child_page_blocs_ids_to_parent_page_ids + ) + + root_pages = [] + for page in all_pages: + if isinstance(page.parent, NotionParentPage): + docs_by_page_id[page.parent.page_id].children.append( + docs_by_page_id[page.id] + ) + elif isinstance(page.parent, NotionParentBlock): + parent_page_id = child_page_blocs_ids_to_parent_page_ids.get(page.id) + if parent_page_id: + docs_by_page_id[parent_page_id].children.append( + docs_by_page_id[page.id] + ) + else: + logger.warning( + f"Page {page.id} has a parent block, but no parent page found." + ) + elif isinstance(page.parent, NotionParentWorkspace): + # This is a root page, not a child of another page + root_pages.append(docs_by_page_id[page.id]) + + return root_pages From 7af6e8dd9ac9fea38a82c6b698b95dc765a0ff46 Mon Sep 17 00:00:00 2001 From: Baptiste Prevot Date: Tue, 3 Jun 2025 09:10:37 +0200 Subject: [PATCH 63/63] notion-import: adapt child page block to progress stream --- src/backend/core/api/viewsets.py | 95 +++++++++++++++------- src/backend/core/services/notion_import.py | 45 ++++------ 2 files changed, 79 insertions(+), 61 deletions(-) diff --git a/src/backend/core/api/viewsets.py b/src/backend/core/api/viewsets.py index 323f5590c7..ae02231881 100644 --- a/src/backend/core/api/viewsets.py +++ b/src/backend/core/api/viewsets.py @@ -36,9 +36,16 @@ from core.services.ai_services import AIService from core.services.collaboration_services import CollaborationService from core.services.converter_services import YdocConverter -from core.services.notion_import import build_notion_session, fetch_all_pages, import_page +from core.services.notion_import import ( + ImportedDocument, + build_notion_session, + fetch_all_pages, + import_page, + link_child_page_to_parent, +) from core.utils import extract_attachments, filter_descendants +from ..notion_schemas.notion_page import NotionPage from . import permissions, serializers, utils from .filters import DocumentFilter, ListDocumentFilter @@ -1840,7 +1847,9 @@ def notion_import_callback(request): code = request.GET.get("code") resp = requests.post( "https://api.notion.com/v1/oauth/token", - auth=requests.auth.HTTPBasicAuth(settings.NOTION_CLIENT_ID, settings.NOTION_CLIENT_SECRET), + auth=requests.auth.HTTPBasicAuth( + settings.NOTION_CLIENT_ID, settings.NOTION_CLIENT_SECRET + ), headers={"Accept": "application/json"}, data={ "grant_type": "authorization_code", @@ -1859,7 +1868,7 @@ def _import_notion_doc_content(imported_doc, obj, user): extra_args = { "Metadata": { "owner": str(user.id), - "status": enums.DocumentAttachmentStatus.READY, # TODO + "status": enums.DocumentAttachmentStatus.READY, # TODO }, } file_id = uuid.uuid4() @@ -1869,12 +1878,15 @@ def _import_notion_doc_content(imported_doc, obj, user): resp.raw, default_storage.bucket_name, key ) obj.attachments.append(key) - att.block["props"]["url"] = f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" + att.block["props"]["url"] = ( + f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}" + ) obj.content = YdocConverter().convert_blocks(imported_doc.blocks) obj.save() -def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_page_id): + +def _import_notion_child_page(imported_doc, parent_doc, user, imported_ids): obj = parent_doc.add_child( creator=user, title=imported_doc.page.get_title() or "J'aime les carottes", @@ -1888,13 +1900,13 @@ def _import_notion_child_page(imported_doc, parent_doc, user, imported_docs_by_p _import_notion_doc_content(imported_doc, obj, user) - imported_docs_by_page_id[imported_doc.page.id] = obj + imported_ids.append(imported_doc.page.id) for child in imported_doc.children: - _import_notion_child_page(child, obj, user, imported_docs_by_page_id) + _import_notion_child_page(child, obj, user, imported_ids) -def _import_notion_root_page(imported_doc, user, imported_docs_by_page_id): +def _import_notion_root_page(imported_doc, user) -> list[str]: obj = models.Document.add_root( depth=1, creator=user, @@ -1908,44 +1920,64 @@ def _import_notion_root_page(imported_doc, user, imported_docs_by_page_id): role=models.RoleChoices.OWNER, ) - _import_notion_doc_content(imported_doc, obj, user) + imported_ids = [imported_doc.page.id] - imported_docs_by_page_id[imported_doc.page.id] = obj + _import_notion_doc_content(imported_doc, obj, user) for child in imported_doc.children: - _import_notion_child_page(child, obj, user, imported_docs_by_page_id) + _import_notion_child_page(child, obj, user, imported_ids) + + return imported_ids -def _generate_notion_progress(root_pages, page_statuses): - raw = json.dumps([{ - "title": page.get_title(), - "status": page_statuses[page.id], - } for page in root_pages]) +def _generate_notion_progress( + all_pages: list[NotionPage], page_statuses: dict[str, str] +) -> str: + raw = json.dumps( + [ + { + "title": page.get_title(), + "status": page_statuses[page.id], + } + for page in all_pages + ] + ) return f"data: {raw}\n\n" def _notion_import_event_stream(request): session = build_notion_session(request.session["notion_token"]) all_pages = fetch_all_pages(session) - root_pages = [page for page in all_pages if page.is_root()] page_statuses = {} - for page in root_pages: + for page in all_pages: page_statuses[page.id] = "pending" - yield _generate_notion_progress(root_pages, page_statuses) + yield _generate_notion_progress(all_pages, page_statuses) - imported_docs = [] - for page in root_pages: - imported_docs.append(import_page(session, page, all_pages)) + docs_by_page_id: dict[str, ImportedDocument] = {} + child_page_blocs_ids_to_parent_page_ids: dict[str, str] = {} + + for page in all_pages: + docs_by_page_id[page.id] = import_page( + session, page, child_page_blocs_ids_to_parent_page_ids + ) page_statuses[page.id] = "fetched" - yield _generate_notion_progress(root_pages, page_statuses) + yield _generate_notion_progress(all_pages, page_statuses) + + for page in all_pages: + link_child_page_to_parent( + page, docs_by_page_id, child_page_blocs_ids_to_parent_page_ids + ) - imported_docs_by_page_id = {} - for imported_doc in imported_docs: - _import_notion_root_page(imported_doc, request.user, imported_docs_by_page_id) - page_statuses[imported_doc.page.id] = "imported" - yield _generate_notion_progress(root_pages, page_statuses) + root_docs = [doc for doc in docs_by_page_id.values() if doc.page.is_root()] + + for root_doc in root_docs: + imported_ids = _import_notion_root_page(root_doc, request.user) + for imported_id in imported_ids: + page_statuses[imported_id] = "imported" + + yield _generate_notion_progress(all_pages, page_statuses) class IgnoreClientContentNegotiation(drf.negotiation.BaseContentNegotiation): @@ -1955,6 +1987,7 @@ def select_parser(self, request, parsers): def select_renderer(self, request, renderers, format_suffix): return (renderers[0], renderers[0].media_type) + class NotionImportRunView(drf.views.APIView): content_negotiation_class = IgnoreClientContentNegotiation @@ -1962,5 +1995,7 @@ def get(self, request, format=None): if "notion_token" not in request.session: raise drf.exceptions.PermissionDenied() - #return drf.response.Response({"sava": "oui et toi ?"}) - return StreamingHttpResponse(_notion_import_event_stream(request), content_type='text/event-stream') + # return drf.response.Response({"sava": "oui et toi ?"}) + return StreamingHttpResponse( + _notion_import_event_stream(request), content_type="text/event-stream" + ) diff --git a/src/backend/core/services/notion_import.py b/src/backend/core/services/notion_import.py index e99b19ce08..7af2c26f67 100644 --- a/src/backend/core/services/notion_import.py +++ b/src/backend/core/services/notion_import.py @@ -467,35 +467,18 @@ def import_page( ) -def import_notion(token: str) -> list[ImportedDocument]: - """Recursively imports all Notion pages and blocks accessible using the given token.""" - session = build_notion_session(token) - all_pages = fetch_all_pages(session) - docs_by_page_id: dict[str, ImportedDocument] = {} - child_page_blocs_ids_to_parent_page_ids: dict[str, str] = {} - for page in all_pages: - docs_by_page_id[page.id] = import_page( - session, page, child_page_blocs_ids_to_parent_page_ids - ) - - root_pages = [] - for page in all_pages: - if isinstance(page.parent, NotionParentPage): - docs_by_page_id[page.parent.page_id].children.append( - docs_by_page_id[page.id] +def link_child_page_to_parent( + page: NotionPage, + docs_by_page_id: dict[str, ImportedDocument], + child_page_blocs_ids_to_parent_page_ids: dict[str, str], +): + if isinstance(page.parent, NotionParentPage): + docs_by_page_id[page.parent.page_id].children.append(docs_by_page_id[page.id]) + elif isinstance(page.parent, NotionParentBlock): + parent_page_id = child_page_blocs_ids_to_parent_page_ids.get(page.id) + if parent_page_id: + docs_by_page_id[parent_page_id].children.append(docs_by_page_id[page.id]) + else: + logger.warning( + f"Page {page.id} has a parent block, but no parent page found." ) - elif isinstance(page.parent, NotionParentBlock): - parent_page_id = child_page_blocs_ids_to_parent_page_ids.get(page.id) - if parent_page_id: - docs_by_page_id[parent_page_id].children.append( - docs_by_page_id[page.id] - ) - else: - logger.warning( - f"Page {page.id} has a parent block, but no parent page found." - ) - elif isinstance(page.parent, NotionParentWorkspace): - # This is a root page, not a child of another page - root_pages.append(docs_by_page_id[page.id]) - - return root_pages