Skip to content

Add Notion import #1050

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Draft
wants to merge 63 commits into
base: feature/doc-dnd
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
63 commits
Select commit Hold shift + click to select a range
a397689
Add basic oauth flow
emersion Jun 2, 2025
252b87a
notion-schemas: add a framework for some schemas of the notion api.
Castavo Jun 2, 2025
e3523e7
Add import_notion service
clarani Jun 2, 2025
01544f6
notion-schemas: add some more schemas
Castavo Jun 2, 2025
974bb86
Add blocks converter to y-provider
emersion Jun 2, 2025
f2c5754
wip: add document creation code
emersion Jun 2, 2025
53e41bd
notion-import: use schemas
Castavo Jun 2, 2025
63039be
Fix ValueError in NotionPage.get_title()
emersion Jun 2, 2025
7fad79f
Remove awkward debugging log
emersion Jun 2, 2025
6bebe67
Fix 400 in Notion search
emersion Jun 2, 2025
3f31453
Simplify Notion API error handling
emersion Jun 2, 2025
2f0ef45
Create one document per root Notion page
emersion Jun 2, 2025
b7db0b3
Add super dumb block converter
emersion Jun 2, 2025
7bae379
notion-schemas: add catcah-all unsupported block type
Castavo Jun 2, 2025
4955ccf
just add some colors
Castavo Jun 2, 2025
216c55f
just add a link type
Castavo Jun 2, 2025
74bcea7
Fix typo lol
emersion Jun 2, 2025
1c7371a
It's not a match
emersion Jun 2, 2025
c838039
fixup
Castavo Jun 2, 2025
b5e3f1a
Unionize all of these rich folks
emersion Jun 2, 2025
6aabba6
notion-import: tidy up
Castavo Jun 2, 2025
c8d44e8
notion-schemas: better unsupported objects
Castavo Jun 2, 2025
0c86a9b
Add import button
NicolasRitouet Jun 22, 2025
7eea481
notion-schemas: add tables
Castavo Jun 2, 2025
0496520
notion-schemas: blocks: add child-page and video
Castavo Jun 2, 2025
70c283d
Don't reuse token in redirect endpoint
emersion Jun 3, 2025
4699870
Move Notion API details to settings
emersion Jun 3, 2025
979bc07
Introduce ImportedDocument
emersion Jun 3, 2025
4b56e6c
handle heading blocks
clarani Jun 3, 2025
d1d85ef
notion-import: tidy parsing
Castavo Jun 2, 2025
721a888
notion-import: handle dividers
Castavo Jun 3, 2025
35e8ef4
Add support for child pages
emersion Jun 3, 2025
cf343c6
Add DocumentAccess for child docs, just in case
emersion Jun 3, 2025
48ba52b
Introduce NotionFile
emersion Jun 3, 2025
fb27708
notion-import: add table & fix converter error message
Castavo Jun 3, 2025
f2248cc
add FRONTEND_URL to env settings
clarani Jun 3, 2025
2f68809
Add a loading page during import
clarani Jun 3, 2025
a9ed917
Ajout support Bullet list and Number list
NicolasRitouet Jun 3, 2025
9284879
add format text
Tguisnet Jun 3, 2025
41f44be
tidy
Castavo Jun 3, 2025
10b85ec
notion-import: handle sub list items
Castavo Jun 3, 2025
36fbf65
convert_block returns now list of dict
clarani Jun 3, 2025
0c815f2
handle columns and columns list
clarani Jun 3, 2025
15c3ca8
notion-import: handle notion todos
Castavo Jun 3, 2025
578409e
fixup
Castavo Jun 3, 2025
3c938ae
Handle uploaded images
emersion Jun 3, 2025
b3eb0ff
Fix missing arg in convert_block()
emersion Jun 3, 2025
33f21ff
tidy
Castavo Jun 3, 2025
9337e42
Struggle update
emersion Jun 3, 2025
6c276c2
fix
Castavo Jun 3, 2025
ac6742d
notion-schemas: handle callouts
Castavo Jun 3, 2025
da02423
add partial links
Tguisnet Jun 3, 2025
adc6029
notion-schemas: handle code blocks
Castavo Jun 3, 2025
81ef2e7
notion-schemas: handle bookmarks
Castavo Jun 3, 2025
70451ba
notion-schemas: fix heading handling
Castavo Jun 3, 2025
9310550
notion-schemas: fix default table width
Castavo Jun 3, 2025
e69ce24
improve links
Tguisnet Jun 3, 2025
3d9547d
C'est le WIP maintenant
emersion Jun 3, 2025
42f42fc
Disable content negotiation
emersion Jun 3, 2025
116a7e3
add eventSource in useImportNotion
clarani Jun 3, 2025
96e36e6
fancy progress bar
clarani Jun 3, 2025
b1d52cc
notion-import: handle child page blocks
Castavo Jun 3, 2025
7af6e8d
notion-import: adapt child page block to progress stream
Castavo Jun 3, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions env.d/development/common.dist
Original file line number Diff line number Diff line change
Expand Up @@ -64,3 +64,4 @@ COLLABORATION_WS_URL=ws://localhost:4444/collaboration/ws/

# Frontend
FRONTEND_THEME=default
FRONTEND_URL=http://localhost:3000
184 changes: 183 additions & 1 deletion src/backend/core/api/viewsets.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import logging
import uuid
from collections import defaultdict
from urllib.parse import unquote, urlparse
from urllib.parse import unquote, urlencode, urlparse

from django.conf import settings
from django.contrib.postgres.aggregates import ArrayAgg
Expand All @@ -18,6 +18,7 @@
from django.db.models.expressions import RawSQL
from django.db.models.functions import Left, Length
from django.http import Http404, StreamingHttpResponse
from django.shortcuts import redirect
from django.utils.functional import cached_property
from django.utils.text import capfirst, slugify
from django.utils.translation import gettext_lazy as _
Expand All @@ -34,8 +35,17 @@
from core import authentication, choices, enums, models
from core.services.ai_services import AIService
from core.services.collaboration_services import CollaborationService
from core.services.converter_services import YdocConverter
from core.services.notion_import import (
ImportedDocument,
build_notion_session,
fetch_all_pages,
import_page,
link_child_page_to_parent,
)
from core.utils import extract_attachments, filter_descendants

from ..notion_schemas.notion_page import NotionPage
from . import permissions, serializers, utils
from .filters import DocumentFilter, ListDocumentFilter

Expand Down Expand Up @@ -1817,3 +1827,175 @@ def _load_theme_customization(self):
)

return theme_customization


@drf.decorators.api_view()
def notion_import_redirect(request):
query = urlencode(
{
"client_id": settings.NOTION_CLIENT_ID,
"response_type": "code",
"owner": "user",
"redirect_uri": settings.NOTION_REDIRECT_URI,
}
)
return redirect("https://api.notion.com/v1/oauth/authorize?" + query)


@drf.decorators.api_view()
def notion_import_callback(request):
code = request.GET.get("code")
resp = requests.post(
"https://api.notion.com/v1/oauth/token",
auth=requests.auth.HTTPBasicAuth(
settings.NOTION_CLIENT_ID, settings.NOTION_CLIENT_SECRET
),
headers={"Accept": "application/json"},
data={
"grant_type": "authorization_code",
"code": code,
"redirect_uri": settings.NOTION_REDIRECT_URI,
},
)
resp.raise_for_status()
data = resp.json()
request.session["notion_token"] = data["access_token"]
return redirect(f"{settings.FRONTEND_URL}/import-notion/")


def _import_notion_doc_content(imported_doc, obj, user):
for att in imported_doc.attachments:
extra_args = {
"Metadata": {
"owner": str(user.id),
"status": enums.DocumentAttachmentStatus.READY, # TODO
},
}
file_id = uuid.uuid4()
key = f"{obj.key_base}/{enums.ATTACHMENTS_FOLDER:s}/{file_id!s}.raw"
with requests.get(att.file.file["url"], stream=True) as resp:
default_storage.connection.meta.client.upload_fileobj(
resp.raw, default_storage.bucket_name, key
)
obj.attachments.append(key)
att.block["props"]["url"] = (
f"{settings.MEDIA_BASE_URL}{settings.MEDIA_URL}{key}"
)

obj.content = YdocConverter().convert_blocks(imported_doc.blocks)
obj.save()


def _import_notion_child_page(imported_doc, parent_doc, user, imported_ids):
obj = parent_doc.add_child(
creator=user,
title=imported_doc.page.get_title() or "J'aime les carottes",
)

models.DocumentAccess.objects.create(
document=obj,
user=user,
role=models.RoleChoices.OWNER,
)

_import_notion_doc_content(imported_doc, obj, user)

imported_ids.append(imported_doc.page.id)

for child in imported_doc.children:
_import_notion_child_page(child, obj, user, imported_ids)


def _import_notion_root_page(imported_doc, user) -> list[str]:
obj = models.Document.add_root(
depth=1,
creator=user,
title=imported_doc.page.get_title() or "J'aime les courgettes",
link_reach=models.LinkReachChoices.RESTRICTED,
)

models.DocumentAccess.objects.create(
document=obj,
user=user,
role=models.RoleChoices.OWNER,
)

imported_ids = [imported_doc.page.id]

_import_notion_doc_content(imported_doc, obj, user)

for child in imported_doc.children:
_import_notion_child_page(child, obj, user, imported_ids)

return imported_ids


def _generate_notion_progress(
all_pages: list[NotionPage], page_statuses: dict[str, str]
) -> str:
raw = json.dumps(
[
{
"title": page.get_title(),
"status": page_statuses[page.id],
}
for page in all_pages
]
)
return f"data: {raw}\n\n"


def _notion_import_event_stream(request):
session = build_notion_session(request.session["notion_token"])
all_pages = fetch_all_pages(session)

page_statuses = {}
for page in all_pages:
page_statuses[page.id] = "pending"

yield _generate_notion_progress(all_pages, page_statuses)

docs_by_page_id: dict[str, ImportedDocument] = {}
child_page_blocs_ids_to_parent_page_ids: dict[str, str] = {}

for page in all_pages:
docs_by_page_id[page.id] = import_page(
session, page, child_page_blocs_ids_to_parent_page_ids
)
page_statuses[page.id] = "fetched"
yield _generate_notion_progress(all_pages, page_statuses)

for page in all_pages:
link_child_page_to_parent(
page, docs_by_page_id, child_page_blocs_ids_to_parent_page_ids
)

root_docs = [doc for doc in docs_by_page_id.values() if doc.page.is_root()]

for root_doc in root_docs:
imported_ids = _import_notion_root_page(root_doc, request.user)
for imported_id in imported_ids:
page_statuses[imported_id] = "imported"

yield _generate_notion_progress(all_pages, page_statuses)


class IgnoreClientContentNegotiation(drf.negotiation.BaseContentNegotiation):
def select_parser(self, request, parsers):
return parsers[0]

def select_renderer(self, request, renderers, format_suffix):
return (renderers[0], renderers[0].media_type)


class NotionImportRunView(drf.views.APIView):
content_negotiation_class = IgnoreClientContentNegotiation

def get(self, request, format=None):
if "notion_token" not in request.session:
raise drf.exceptions.PermissionDenied()

# return drf.response.Response({"sava": "oui et toi ?"})
return StreamingHttpResponse(
_notion_import_event_stream(request), content_type="text/event-stream"
)
Loading