From ce14e3e003ffbadcee540289e23cac61eb807d54 Mon Sep 17 00:00:00 2001 From: Xia Dong Date: Fri, 3 Jan 2025 18:48:45 +0800 Subject: [PATCH 1/3] feat(document-reader): add Notion document reader support - Add NotionResource class to handle Notion API interactions - Add NotionDocumentReader to implement DocumentReader interface - Support both page and database content types - Add comprehensive metadata handling - Add integration tests Key features: 1. Support reading Notion pages and databases 2. Extract rich metadata (creation time, authors, etc.) 3. Handle null values safely in metadata 4. Follow Spring AI document reader patterns 5. Add proper error handling and logging Related PR: ##274 --- .../notion-document-reader/pom.xml | 94 +++++ .../reader/notion/NotionDocumentReader.java | 181 +++++++++ .../ai/reader/notion/NotionResource.java | 369 ++++++++++++++++++ .../reader/notion/NotionDocumentReaderIT.java | 112 ++++++ 4 files changed, 756 insertions(+) create mode 100644 community/document-readers/notion-document-reader/pom.xml create mode 100644 community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReader.java create mode 100644 community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionResource.java create mode 100644 community/document-readers/notion-document-reader/src/test/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReaderIT.java diff --git a/community/document-readers/notion-document-reader/pom.xml b/community/document-readers/notion-document-reader/pom.xml new file mode 100644 index 00000000..594255fd --- /dev/null +++ b/community/document-readers/notion-document-reader/pom.xml @@ -0,0 +1,94 @@ + + + 4.0.0 + + com.alibaba.cloud.ai + spring-ai-alibaba + ${revision} + ../../../pom.xml + + + notion-document-reader + notion-document-reader + notion reader for Spring AI Alibaba + jar + https://github.com/alibaba/spring-ai-alibaba + + https://github.com/alibaba/spring-ai-alibaba + git://github.com/alibaba/spring-ai-alibaba.git + git@github.com:alibaba/spring-ai-alibaba.git + + + + 17 + 17 + UTF-8 + 3.1.1 + + + + + + com.alibaba.cloud.ai + spring-ai-alibaba-core + ${project.parent.version} + + + + + org.springframework.ai + spring-ai-test + test + + + + org.springframework.boot + spring-boot-starter-test + test + + + + io.projectreactor + reactor-test + test + + + + io.micrometer + micrometer-observation-test + test + + + + + + + + org.springframework.boot + spring-boot-maven-plugin + + + org.apache.maven.plugins + maven-deploy-plugin + ${maven-deploy-plugin.version} + + true + + + + + + + + spring-milestones + Spring Milestones + https://repo.spring.io/milestone + + false + + + + + \ No newline at end of file diff --git a/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReader.java b/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReader.java new file mode 100644 index 00000000..245f091f --- /dev/null +++ b/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReader.java @@ -0,0 +1,181 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.notion; + +import com.alibaba.fastjson.JSONObject; +import org.springframework.ai.document.Document; +import org.springframework.ai.document.DocumentReader; +import org.springframework.util.StringUtils; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStreamReader; +import java.time.Instant; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * Notion Document Reader + * Implements DocumentReader interface to read content from Notion + * + * @author xiadong + * @since 2024-01-06 + */ +public class NotionDocumentReader implements DocumentReader { + + private final NotionResource notionResource; + private final JSONObject pageMetadata; + + /** + * Constructor + * @param notionResource Notion resource + */ + public NotionDocumentReader(NotionResource notionResource) { + this.notionResource = notionResource; + this.pageMetadata = notionResource.getMetadata(); + } + + @Override + public List get() { + try { + // Read content from input stream + String content = readContent(); + + // Create metadata map + Map metadata = buildMetadata(); + + // Create and return document + return Collections.singletonList(new Document(content, metadata)); + } + catch (IOException e) { + throw new RuntimeException("Failed to load document from Notion: " + e.getMessage(), e); + } + } + + /** + * Build metadata map from Notion API response + */ + private Map buildMetadata() { + Map metadata = new HashMap<>(); + + // Add basic metadata + metadata.put(NotionResource.SOURCE, notionResource.getSource()); + metadata.put("resourceType", notionResource.getResourceType().name()); + metadata.put("resourceId", notionResource.getResourceId()); + + // Add metadata from Notion API + if (pageMetadata != null) { + // Creation and update times + String createdTime = pageMetadata.getString("created_time"); + if (StringUtils.hasText(createdTime)) { + metadata.put("createdTime", Instant.parse(createdTime).toEpochMilli()); + } + + String lastEditedTime = pageMetadata.getString("last_edited_time"); + if (StringUtils.hasText(lastEditedTime)) { + metadata.put("lastEditedTime", Instant.parse(lastEditedTime).toEpochMilli()); + } + + // Creator and last editor + JSONObject createdBy = pageMetadata.getJSONObject("created_by"); + if (createdBy != null) { + String creatorName = createdBy.getString("name"); + String creatorId = createdBy.getString("id"); + if (StringUtils.hasText(creatorName)) { + metadata.put("createdBy", creatorName); + } + if (StringUtils.hasText(creatorId)) { + metadata.put("createdById", creatorId); + } + } + + JSONObject lastEditedBy = pageMetadata.getJSONObject("last_edited_by"); + if (lastEditedBy != null) { + String editorName = lastEditedBy.getString("name"); + String editorId = lastEditedBy.getString("id"); + if (StringUtils.hasText(editorName)) { + metadata.put("lastEditedBy", editorName); + } + if (StringUtils.hasText(editorId)) { + metadata.put("lastEditedById", editorId); + } + } + + // URL + String url = pageMetadata.getString("url"); + if (StringUtils.hasText(url)) { + metadata.put("url", url); + } + + // Parent information + JSONObject parent = pageMetadata.getJSONObject("parent"); + if (parent != null) { + String parentType = parent.getString("type"); + if (StringUtils.hasText(parentType)) { + metadata.put("parentType", parentType); + String parentId = parent.getString(parentType + "_id"); + if (StringUtils.hasText(parentId)) { + metadata.put("parentId", parentId); + } + } + } + + // Icon + JSONObject icon = pageMetadata.getJSONObject("icon"); + if (icon != null) { + String iconType = icon.getString("type"); + String iconUrl = icon.getString("url"); + if (StringUtils.hasText(iconType)) { + metadata.put("iconType", iconType); + } + if (StringUtils.hasText(iconUrl)) { + metadata.put("iconUrl", iconUrl); + } + } + + // Cover + JSONObject cover = pageMetadata.getJSONObject("cover"); + if (cover != null) { + String coverType = cover.getString("type"); + String coverUrl = cover.getString("url"); + if (StringUtils.hasText(coverType)) { + metadata.put("coverType", coverType); + } + if (StringUtils.hasText(coverUrl)) { + metadata.put("coverUrl", coverUrl); + } + } + } + + return metadata; + } + + /** + * Read content from input stream + */ + private String readContent() throws IOException { + StringBuilder content = new StringBuilder(); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(notionResource.getInputStream()))) { + String line; + while ((line = reader.readLine()) != null) { + content.append(line).append("\n"); + } + } + return content.toString(); + } +} \ No newline at end of file diff --git a/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionResource.java b/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionResource.java new file mode 100644 index 00000000..dc175629 --- /dev/null +++ b/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionResource.java @@ -0,0 +1,369 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.notion; + +import com.alibaba.fastjson.JSON; +import com.alibaba.fastjson.JSONArray; +import com.alibaba.fastjson.JSONObject; +import org.springframework.core.io.Resource; +import org.springframework.util.Assert; + +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.URI; +import java.net.URL; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.Objects; + +/** + * Notion Resource class + * Supports accessing Notion pages and databases + * + * @author xiadong + * @since 2024-01-06 + */ +public class NotionResource implements Resource { + + public static final String SOURCE = "source"; + + private static final String BASE_URL = "https://api.notion.com/v1"; + private static final String API_VERSION = "2022-06-28"; + + // Resource types + public enum ResourceType { + PAGE, + DATABASE + } + + private final HttpClient httpClient; + private final InputStream inputStream; + private final URI uri; + private final ResourceType resourceType; + private final String resourceId; + private JSONObject metadata; + + /** + * Constructor + * @param notionToken Notion API Token + * @param resourceId Notion resource ID + * @param resourceType Resource type (PAGE or DATABASE) + */ + public NotionResource(String notionToken, String resourceId, ResourceType resourceType) { + Assert.hasText(resourceId, "ResourceId must not be empty"); + Assert.notNull(resourceType, "ResourceType must not be null"); + + this.resourceId = resourceId; + this.resourceType = resourceType; + this.httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_2).build(); + + validateToken(notionToken); + + // Get resource metadata + this.metadata = getResourceMetadata(notionToken, resourceId, resourceType); + + // Get content based on resource type + String content = switch (resourceType) { + case PAGE -> getPageContent(notionToken, resourceId); + case DATABASE -> getDatabaseContent(notionToken, resourceId); + }; + + this.inputStream = new ByteArrayInputStream(content.getBytes()); + this.uri = URI.create(String.format("notion://%s/%s", resourceType.name().toLowerCase(), resourceId)); + } + + /** + * Validate Notion API token + */ + private void validateToken(String notionToken) { + URI uri = URI.create(BASE_URL + "/users/me"); + HttpRequest request = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .uri(uri) + .GET() + .build(); + + try { + HttpResponse response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(response.statusCode() == 200, "Failed to authenticate Notion token"); + } + catch (Exception e) { + throw new RuntimeException("Failed to validate Notion token", e); + } + } + + /** + * Get page content + */ + private String getPageContent(String notionToken, String pageId) { + try { + // 1. Get page content + URI pageUri = URI.create(BASE_URL + "/pages/" + pageId); + HttpRequest pageRequest = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .uri(pageUri) + .GET() + .build(); + + HttpResponse pageResponse = this.httpClient.send(pageRequest, HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(pageResponse.statusCode() == 200, "Failed to fetch page content"); + + // 2. Parse page content + JSONObject pageJson = JSON.parseObject(pageResponse.body()); + StringBuilder content = new StringBuilder(); + + // Extract page title + JSONObject properties = pageJson.getJSONObject("properties"); + if (properties != null && properties.containsKey("title")) { + JSONObject titleProp = properties.getJSONObject("title"); + JSONArray titleArray = titleProp.getJSONArray("title"); + if (titleArray != null && !titleArray.isEmpty()) { + for (int i = 0; i < titleArray.size(); i++) { + content.append(titleArray.getJSONObject(i).getString("plain_text")); + } + content.append("\n\n"); + } + } + + // 3. Get page blocks + URI blocksUri = URI.create(BASE_URL + "/blocks/" + pageId + "/children"); + HttpRequest blocksRequest = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .uri(blocksUri) + .GET() + .build(); + + HttpResponse blocksResponse = this.httpClient.send(blocksRequest, HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(blocksResponse.statusCode() == 200, "Failed to fetch page blocks"); + + // 4. Parse block content + JSONObject blocksJson = JSON.parseObject(blocksResponse.body()); + JSONArray blocks = blocksJson.getJSONArray("results"); + + // 5. Extract text content + for (int i = 0; i < blocks.size(); i++) { + JSONObject block = blocks.getJSONObject(i); + String type = block.getString("type"); + if (block.containsKey(type)) { + JSONObject typeObj = block.getJSONObject(type); + if (typeObj.containsKey("rich_text")) { + JSONArray richText = typeObj.getJSONArray("rich_text"); + for (int j = 0; j < richText.size(); j++) { + content.append(richText.getJSONObject(j).getString("plain_text")); + } + content.append("\n"); + } + } + } + return content.toString(); + } + catch (Exception e) { + throw new RuntimeException("Failed to get page content", e); + } + } + + /** + * Get database content + */ + private String getDatabaseContent(String notionToken, String databaseId) { + try { + // 1. Query database + URI uri = URI.create(BASE_URL + "/databases/" + databaseId + "/query"); + HttpRequest request = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .header("Content-Type", "application/json") + .uri(uri) + .POST(HttpRequest.BodyPublishers.ofString("{}")) + .build(); + + HttpResponse response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(response.statusCode() == 200, "Failed to fetch database content"); + + // 2. Parse database content + JSONObject jsonResponse = JSON.parseObject(response.body()); + JSONArray results = jsonResponse.getJSONArray("results"); + + // 3. Extract property values + StringBuilder content = new StringBuilder(); + for (int i = 0; i < results.size(); i++) { + JSONObject row = results.getJSONObject(i); + JSONObject properties = row.getJSONObject("properties"); + + for (String propertyName : properties.keySet()) { + JSONObject property = properties.getJSONObject(propertyName); + String type = property.getString("type"); + + if (property.containsKey(type)) { + Object value = property.get(type); + if (value instanceof JSONArray) { + JSONArray array = (JSONArray) value; + for (int j = 0; j < array.size(); j++) { + JSONObject item = array.getJSONObject(j); + if (item.containsKey("plain_text")) { + content.append(propertyName) + .append(": ") + .append(item.getString("plain_text")) + .append("\n"); + } + } + } + } + } + content.append("---\n"); + } + return content.toString(); + } + catch (Exception e) { + throw new RuntimeException("Failed to get database content", e); + } + } + + /** + * Get resource metadata + */ + private JSONObject getResourceMetadata(String notionToken, String resourceId, ResourceType resourceType) { + try { + String endpoint = switch (resourceType) { + case PAGE -> "/pages/"; + case DATABASE -> "/databases/"; + }; + + URI uri = URI.create(BASE_URL + endpoint + resourceId); + HttpRequest request = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .uri(uri) + .GET() + .build(); + + HttpResponse response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(response.statusCode() == 200, "Failed to fetch resource metadata"); + + return JSON.parseObject(response.body()); + } + catch (Exception e) { + throw new RuntimeException("Failed to get resource metadata", e); + } + } + + /** + * Get resource metadata + */ + public JSONObject getMetadata() { + return metadata; + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private String notionToken; + private String resourceId; + private ResourceType resourceType; + + public Builder notionToken(String notionToken) { + this.notionToken = notionToken; + return this; + } + + public Builder resourceId(String resourceId) { + this.resourceId = resourceId; + return this; + } + + public Builder resourceType(ResourceType resourceType) { + this.resourceType = resourceType; + return this; + } + + public NotionResource build() { + Assert.notNull(notionToken, "NotionToken must not be null"); + Assert.notNull(resourceId, "ResourceId must not be null"); + Assert.notNull(resourceType, "ResourceType must not be null"); + return new NotionResource(notionToken, resourceId, resourceType); + } + } + + @Override + public boolean exists() { + return true; + } + + @Override + public URL getURL() throws IOException { + return null; + } + + @Override + public URI getURI() throws IOException { + return uri; + } + + @Override + public File getFile() throws IOException { + return null; + } + + @Override + public long contentLength() throws IOException { + return 0; + } + + @Override + public long lastModified() throws IOException { + return 0; + } + + @Override + public Resource createRelative(String relativePath) throws IOException { + return null; + } + + @Override + public String getFilename() { + return resourceId; + } + + @Override + public String getDescription() { + return String.format("Notion %s resource [id=%s]", resourceType, resourceId); + } + + @Override + public InputStream getInputStream() throws IOException { + return inputStream; + } + + public ResourceType getResourceType() { + return resourceType; + } + + public String getResourceId() { + return resourceId; + } + + public String getSource() { + return uri.toString(); + } +} \ No newline at end of file diff --git a/community/document-readers/notion-document-reader/src/test/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReaderIT.java b/community/document-readers/notion-document-reader/src/test/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReaderIT.java new file mode 100644 index 00000000..97bcb8e7 --- /dev/null +++ b/community/document-readers/notion-document-reader/src/test/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReaderIT.java @@ -0,0 +1,112 @@ +/* + * Copyright 2024-2025 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package com.alibaba.cloud.ai.reader.notion; + +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import org.springframework.ai.document.Document; + +import java.util.List; + +import static org.assertj.core.api.Assertions.assertThat; + +/** + * Integration tests for Notion Document Reader + * + * @author xiadong + * @since 2024-01-06 + */ +class NotionDocumentReaderIT { + + private static final String NOTION_TOKEN = System.getenv("NOTION_TOKEN"); + + // Test page ID + private static final String TEST_PAGE_ID = "${pageId}"; + + // Test database ID + private static final String TEST_DATABASE_ID = "${databaseId}"; + + NotionDocumentReader pageReader; + NotionDocumentReader databaseReader; + + @BeforeEach + public void beforeEach() { + // Create page reader + NotionResource pageResource = NotionResource.builder() + .notionToken(NOTION_TOKEN) + .resourceId(TEST_PAGE_ID) + .resourceType(NotionResource.ResourceType.PAGE) + .build(); + pageReader = new NotionDocumentReader(pageResource); + + // Create database reader + NotionResource databaseResource = NotionResource.builder() + .notionToken(NOTION_TOKEN) + .resourceId(TEST_DATABASE_ID) + .resourceType(NotionResource.ResourceType.DATABASE) + .build(); + databaseReader = new NotionDocumentReader(databaseResource); + } + + @Test + void should_load_page() { + // when + List documents = pageReader.get(); + + // then + assertThat(documents).isNotEmpty(); + Document document = documents.get(0); + + // Verify metadata + assertThat(document.getMetadata()).containsKey(NotionResource.SOURCE); + assertThat(document.getMetadata().get(NotionResource.SOURCE)) + .isEqualTo("notion://page/" + TEST_PAGE_ID); + assertThat(document.getMetadata().get("resourceType")) + .isEqualTo(NotionResource.ResourceType.PAGE.name()); + assertThat(document.getMetadata().get("resourceId")) + .isEqualTo(TEST_PAGE_ID); + + // Verify content + String content = document.getContent(); + assertThat(content).isNotEmpty(); + System.out.println("Page content: " + content); + } + + @Test + void should_load_database() { + // when + List documents = databaseReader.get(); + + // then + assertThat(documents).isNotEmpty(); + Document document = documents.get(0); + + // Verify metadata + assertThat(document.getMetadata()).containsKey(NotionResource.SOURCE); + assertThat(document.getMetadata().get(NotionResource.SOURCE)) + .isEqualTo("notion://database/" + TEST_DATABASE_ID); + assertThat(document.getMetadata().get("resourceType")) + .isEqualTo(NotionResource.ResourceType.DATABASE.name()); + assertThat(document.getMetadata().get("resourceId")) + .isEqualTo(TEST_DATABASE_ID); + + // Verify content + String content = document.getContent(); + assertThat(content).isNotEmpty(); + System.out.println("Database content: " + content); + } +} \ No newline at end of file From 5b055467f8b556536641b95592d9181db54c7554 Mon Sep 17 00:00:00 2001 From: Xia Dong Date: Mon, 6 Jan 2025 13:57:05 +0800 Subject: [PATCH 2/3] feat(document-reader): add notion-document-reader to parent pom --- pom.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/pom.xml b/pom.xml index 312be6d0..c3b65f50 100644 --- a/pom.xml +++ b/pom.xml @@ -61,6 +61,7 @@ community/document-readers/tencent-cos-document-reader community/document-readers/feishu-document-reader community/document-readers/yuque-document-reader + community/document-readers/notion-document-reader community/document-parsers/document-parser-apache-pdfbox community/document-parsers/document-parser-markdown From 431a1226b5f4e9d8c9080d19ee033c0790fa5dcf Mon Sep 17 00:00:00 2001 From: Xia Dong Date: Mon, 6 Jan 2025 14:03:38 +0800 Subject: [PATCH 3/3] feat(document-reader): notion-document-reader javaformat apply --- .../reader/notion/NotionDocumentReader.java | 287 ++++---- .../ai/reader/notion/NotionResource.java | 668 +++++++++--------- .../reader/notion/NotionDocumentReaderIT.java | 153 ++-- 3 files changed, 559 insertions(+), 549 deletions(-) diff --git a/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReader.java b/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReader.java index 245f091f..05fdffc6 100644 --- a/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReader.java +++ b/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReader.java @@ -30,152 +30,153 @@ import java.util.Map; /** - * Notion Document Reader - * Implements DocumentReader interface to read content from Notion + * Notion Document Reader Implements DocumentReader interface to read content from Notion * * @author xiadong * @since 2024-01-06 */ public class NotionDocumentReader implements DocumentReader { - private final NotionResource notionResource; - private final JSONObject pageMetadata; - - /** - * Constructor - * @param notionResource Notion resource - */ - public NotionDocumentReader(NotionResource notionResource) { - this.notionResource = notionResource; - this.pageMetadata = notionResource.getMetadata(); - } - - @Override - public List get() { - try { - // Read content from input stream - String content = readContent(); - - // Create metadata map - Map metadata = buildMetadata(); - - // Create and return document - return Collections.singletonList(new Document(content, metadata)); - } - catch (IOException e) { - throw new RuntimeException("Failed to load document from Notion: " + e.getMessage(), e); - } - } - - /** - * Build metadata map from Notion API response - */ - private Map buildMetadata() { - Map metadata = new HashMap<>(); - - // Add basic metadata - metadata.put(NotionResource.SOURCE, notionResource.getSource()); - metadata.put("resourceType", notionResource.getResourceType().name()); - metadata.put("resourceId", notionResource.getResourceId()); - - // Add metadata from Notion API - if (pageMetadata != null) { - // Creation and update times - String createdTime = pageMetadata.getString("created_time"); - if (StringUtils.hasText(createdTime)) { - metadata.put("createdTime", Instant.parse(createdTime).toEpochMilli()); - } - - String lastEditedTime = pageMetadata.getString("last_edited_time"); - if (StringUtils.hasText(lastEditedTime)) { - metadata.put("lastEditedTime", Instant.parse(lastEditedTime).toEpochMilli()); - } - - // Creator and last editor - JSONObject createdBy = pageMetadata.getJSONObject("created_by"); - if (createdBy != null) { - String creatorName = createdBy.getString("name"); - String creatorId = createdBy.getString("id"); - if (StringUtils.hasText(creatorName)) { - metadata.put("createdBy", creatorName); - } - if (StringUtils.hasText(creatorId)) { - metadata.put("createdById", creatorId); - } - } - - JSONObject lastEditedBy = pageMetadata.getJSONObject("last_edited_by"); - if (lastEditedBy != null) { - String editorName = lastEditedBy.getString("name"); - String editorId = lastEditedBy.getString("id"); - if (StringUtils.hasText(editorName)) { - metadata.put("lastEditedBy", editorName); - } - if (StringUtils.hasText(editorId)) { - metadata.put("lastEditedById", editorId); - } - } - - // URL - String url = pageMetadata.getString("url"); - if (StringUtils.hasText(url)) { - metadata.put("url", url); - } - - // Parent information - JSONObject parent = pageMetadata.getJSONObject("parent"); - if (parent != null) { - String parentType = parent.getString("type"); - if (StringUtils.hasText(parentType)) { - metadata.put("parentType", parentType); - String parentId = parent.getString(parentType + "_id"); - if (StringUtils.hasText(parentId)) { - metadata.put("parentId", parentId); - } - } - } - - // Icon - JSONObject icon = pageMetadata.getJSONObject("icon"); - if (icon != null) { - String iconType = icon.getString("type"); - String iconUrl = icon.getString("url"); - if (StringUtils.hasText(iconType)) { - metadata.put("iconType", iconType); - } - if (StringUtils.hasText(iconUrl)) { - metadata.put("iconUrl", iconUrl); - } - } - - // Cover - JSONObject cover = pageMetadata.getJSONObject("cover"); - if (cover != null) { - String coverType = cover.getString("type"); - String coverUrl = cover.getString("url"); - if (StringUtils.hasText(coverType)) { - metadata.put("coverType", coverType); - } - if (StringUtils.hasText(coverUrl)) { - metadata.put("coverUrl", coverUrl); - } - } - } - - return metadata; - } - - /** - * Read content from input stream - */ - private String readContent() throws IOException { - StringBuilder content = new StringBuilder(); - try (BufferedReader reader = new BufferedReader(new InputStreamReader(notionResource.getInputStream()))) { - String line; - while ((line = reader.readLine()) != null) { - content.append(line).append("\n"); - } - } - return content.toString(); - } -} \ No newline at end of file + private final NotionResource notionResource; + + private final JSONObject pageMetadata; + + /** + * Constructor + * @param notionResource Notion resource + */ + public NotionDocumentReader(NotionResource notionResource) { + this.notionResource = notionResource; + this.pageMetadata = notionResource.getMetadata(); + } + + @Override + public List get() { + try { + // Read content from input stream + String content = readContent(); + + // Create metadata map + Map metadata = buildMetadata(); + + // Create and return document + return Collections.singletonList(new Document(content, metadata)); + } + catch (IOException e) { + throw new RuntimeException("Failed to load document from Notion: " + e.getMessage(), e); + } + } + + /** + * Build metadata map from Notion API response + */ + private Map buildMetadata() { + Map metadata = new HashMap<>(); + + // Add basic metadata + metadata.put(NotionResource.SOURCE, notionResource.getSource()); + metadata.put("resourceType", notionResource.getResourceType().name()); + metadata.put("resourceId", notionResource.getResourceId()); + + // Add metadata from Notion API + if (pageMetadata != null) { + // Creation and update times + String createdTime = pageMetadata.getString("created_time"); + if (StringUtils.hasText(createdTime)) { + metadata.put("createdTime", Instant.parse(createdTime).toEpochMilli()); + } + + String lastEditedTime = pageMetadata.getString("last_edited_time"); + if (StringUtils.hasText(lastEditedTime)) { + metadata.put("lastEditedTime", Instant.parse(lastEditedTime).toEpochMilli()); + } + + // Creator and last editor + JSONObject createdBy = pageMetadata.getJSONObject("created_by"); + if (createdBy != null) { + String creatorName = createdBy.getString("name"); + String creatorId = createdBy.getString("id"); + if (StringUtils.hasText(creatorName)) { + metadata.put("createdBy", creatorName); + } + if (StringUtils.hasText(creatorId)) { + metadata.put("createdById", creatorId); + } + } + + JSONObject lastEditedBy = pageMetadata.getJSONObject("last_edited_by"); + if (lastEditedBy != null) { + String editorName = lastEditedBy.getString("name"); + String editorId = lastEditedBy.getString("id"); + if (StringUtils.hasText(editorName)) { + metadata.put("lastEditedBy", editorName); + } + if (StringUtils.hasText(editorId)) { + metadata.put("lastEditedById", editorId); + } + } + + // URL + String url = pageMetadata.getString("url"); + if (StringUtils.hasText(url)) { + metadata.put("url", url); + } + + // Parent information + JSONObject parent = pageMetadata.getJSONObject("parent"); + if (parent != null) { + String parentType = parent.getString("type"); + if (StringUtils.hasText(parentType)) { + metadata.put("parentType", parentType); + String parentId = parent.getString(parentType + "_id"); + if (StringUtils.hasText(parentId)) { + metadata.put("parentId", parentId); + } + } + } + + // Icon + JSONObject icon = pageMetadata.getJSONObject("icon"); + if (icon != null) { + String iconType = icon.getString("type"); + String iconUrl = icon.getString("url"); + if (StringUtils.hasText(iconType)) { + metadata.put("iconType", iconType); + } + if (StringUtils.hasText(iconUrl)) { + metadata.put("iconUrl", iconUrl); + } + } + + // Cover + JSONObject cover = pageMetadata.getJSONObject("cover"); + if (cover != null) { + String coverType = cover.getString("type"); + String coverUrl = cover.getString("url"); + if (StringUtils.hasText(coverType)) { + metadata.put("coverType", coverType); + } + if (StringUtils.hasText(coverUrl)) { + metadata.put("coverUrl", coverUrl); + } + } + } + + return metadata; + } + + /** + * Read content from input stream + */ + private String readContent() throws IOException { + StringBuilder content = new StringBuilder(); + try (BufferedReader reader = new BufferedReader(new InputStreamReader(notionResource.getInputStream()))) { + String line; + while ((line = reader.readLine()) != null) { + content.append(line).append("\n"); + } + } + return content.toString(); + } + +} \ No newline at end of file diff --git a/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionResource.java b/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionResource.java index dc175629..fe76a24b 100644 --- a/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionResource.java +++ b/community/document-readers/notion-document-reader/src/main/java/com/alibaba/cloud/ai/reader/notion/NotionResource.java @@ -33,337 +33,349 @@ import java.util.Objects; /** - * Notion Resource class - * Supports accessing Notion pages and databases + * Notion Resource class Supports accessing Notion pages and databases * * @author xiadong * @since 2024-01-06 */ public class NotionResource implements Resource { - public static final String SOURCE = "source"; - - private static final String BASE_URL = "https://api.notion.com/v1"; - private static final String API_VERSION = "2022-06-28"; - - // Resource types - public enum ResourceType { - PAGE, - DATABASE - } - - private final HttpClient httpClient; - private final InputStream inputStream; - private final URI uri; - private final ResourceType resourceType; - private final String resourceId; - private JSONObject metadata; - - /** - * Constructor - * @param notionToken Notion API Token - * @param resourceId Notion resource ID - * @param resourceType Resource type (PAGE or DATABASE) - */ - public NotionResource(String notionToken, String resourceId, ResourceType resourceType) { - Assert.hasText(resourceId, "ResourceId must not be empty"); - Assert.notNull(resourceType, "ResourceType must not be null"); - - this.resourceId = resourceId; - this.resourceType = resourceType; - this.httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_2).build(); - - validateToken(notionToken); - - // Get resource metadata - this.metadata = getResourceMetadata(notionToken, resourceId, resourceType); - - // Get content based on resource type - String content = switch (resourceType) { - case PAGE -> getPageContent(notionToken, resourceId); - case DATABASE -> getDatabaseContent(notionToken, resourceId); - }; - - this.inputStream = new ByteArrayInputStream(content.getBytes()); - this.uri = URI.create(String.format("notion://%s/%s", resourceType.name().toLowerCase(), resourceId)); - } - - /** - * Validate Notion API token - */ - private void validateToken(String notionToken) { - URI uri = URI.create(BASE_URL + "/users/me"); - HttpRequest request = HttpRequest.newBuilder() - .header("Authorization", "Bearer " + notionToken) - .header("Notion-Version", API_VERSION) - .uri(uri) - .GET() - .build(); - - try { - HttpResponse response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); - Assert.isTrue(response.statusCode() == 200, "Failed to authenticate Notion token"); - } - catch (Exception e) { - throw new RuntimeException("Failed to validate Notion token", e); - } - } - - /** - * Get page content - */ - private String getPageContent(String notionToken, String pageId) { - try { - // 1. Get page content - URI pageUri = URI.create(BASE_URL + "/pages/" + pageId); - HttpRequest pageRequest = HttpRequest.newBuilder() - .header("Authorization", "Bearer " + notionToken) - .header("Notion-Version", API_VERSION) - .uri(pageUri) - .GET() - .build(); - - HttpResponse pageResponse = this.httpClient.send(pageRequest, HttpResponse.BodyHandlers.ofString()); - Assert.isTrue(pageResponse.statusCode() == 200, "Failed to fetch page content"); - - // 2. Parse page content - JSONObject pageJson = JSON.parseObject(pageResponse.body()); - StringBuilder content = new StringBuilder(); - - // Extract page title - JSONObject properties = pageJson.getJSONObject("properties"); - if (properties != null && properties.containsKey("title")) { - JSONObject titleProp = properties.getJSONObject("title"); - JSONArray titleArray = titleProp.getJSONArray("title"); - if (titleArray != null && !titleArray.isEmpty()) { - for (int i = 0; i < titleArray.size(); i++) { - content.append(titleArray.getJSONObject(i).getString("plain_text")); - } - content.append("\n\n"); - } - } - - // 3. Get page blocks - URI blocksUri = URI.create(BASE_URL + "/blocks/" + pageId + "/children"); - HttpRequest blocksRequest = HttpRequest.newBuilder() - .header("Authorization", "Bearer " + notionToken) - .header("Notion-Version", API_VERSION) - .uri(blocksUri) - .GET() - .build(); - - HttpResponse blocksResponse = this.httpClient.send(blocksRequest, HttpResponse.BodyHandlers.ofString()); - Assert.isTrue(blocksResponse.statusCode() == 200, "Failed to fetch page blocks"); - - // 4. Parse block content - JSONObject blocksJson = JSON.parseObject(blocksResponse.body()); - JSONArray blocks = blocksJson.getJSONArray("results"); - - // 5. Extract text content - for (int i = 0; i < blocks.size(); i++) { - JSONObject block = blocks.getJSONObject(i); - String type = block.getString("type"); - if (block.containsKey(type)) { - JSONObject typeObj = block.getJSONObject(type); - if (typeObj.containsKey("rich_text")) { - JSONArray richText = typeObj.getJSONArray("rich_text"); - for (int j = 0; j < richText.size(); j++) { - content.append(richText.getJSONObject(j).getString("plain_text")); - } - content.append("\n"); - } - } - } - return content.toString(); - } - catch (Exception e) { - throw new RuntimeException("Failed to get page content", e); - } - } - - /** - * Get database content - */ - private String getDatabaseContent(String notionToken, String databaseId) { - try { - // 1. Query database - URI uri = URI.create(BASE_URL + "/databases/" + databaseId + "/query"); - HttpRequest request = HttpRequest.newBuilder() - .header("Authorization", "Bearer " + notionToken) - .header("Notion-Version", API_VERSION) - .header("Content-Type", "application/json") - .uri(uri) - .POST(HttpRequest.BodyPublishers.ofString("{}")) - .build(); - - HttpResponse response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); - Assert.isTrue(response.statusCode() == 200, "Failed to fetch database content"); - - // 2. Parse database content - JSONObject jsonResponse = JSON.parseObject(response.body()); - JSONArray results = jsonResponse.getJSONArray("results"); - - // 3. Extract property values - StringBuilder content = new StringBuilder(); - for (int i = 0; i < results.size(); i++) { - JSONObject row = results.getJSONObject(i); - JSONObject properties = row.getJSONObject("properties"); - - for (String propertyName : properties.keySet()) { - JSONObject property = properties.getJSONObject(propertyName); - String type = property.getString("type"); - - if (property.containsKey(type)) { - Object value = property.get(type); - if (value instanceof JSONArray) { - JSONArray array = (JSONArray) value; - for (int j = 0; j < array.size(); j++) { - JSONObject item = array.getJSONObject(j); - if (item.containsKey("plain_text")) { - content.append(propertyName) - .append(": ") - .append(item.getString("plain_text")) - .append("\n"); - } - } - } - } - } - content.append("---\n"); - } - return content.toString(); - } - catch (Exception e) { - throw new RuntimeException("Failed to get database content", e); - } - } - - /** - * Get resource metadata - */ - private JSONObject getResourceMetadata(String notionToken, String resourceId, ResourceType resourceType) { - try { - String endpoint = switch (resourceType) { - case PAGE -> "/pages/"; - case DATABASE -> "/databases/"; - }; - - URI uri = URI.create(BASE_URL + endpoint + resourceId); - HttpRequest request = HttpRequest.newBuilder() - .header("Authorization", "Bearer " + notionToken) - .header("Notion-Version", API_VERSION) - .uri(uri) - .GET() - .build(); - - HttpResponse response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); - Assert.isTrue(response.statusCode() == 200, "Failed to fetch resource metadata"); - - return JSON.parseObject(response.body()); - } - catch (Exception e) { - throw new RuntimeException("Failed to get resource metadata", e); - } - } - - /** - * Get resource metadata - */ - public JSONObject getMetadata() { - return metadata; - } - - public static Builder builder() { - return new Builder(); - } - - public static class Builder { - private String notionToken; - private String resourceId; - private ResourceType resourceType; - - public Builder notionToken(String notionToken) { - this.notionToken = notionToken; - return this; - } - - public Builder resourceId(String resourceId) { - this.resourceId = resourceId; - return this; - } - - public Builder resourceType(ResourceType resourceType) { - this.resourceType = resourceType; - return this; - } - - public NotionResource build() { - Assert.notNull(notionToken, "NotionToken must not be null"); - Assert.notNull(resourceId, "ResourceId must not be null"); - Assert.notNull(resourceType, "ResourceType must not be null"); - return new NotionResource(notionToken, resourceId, resourceType); - } - } - - @Override - public boolean exists() { - return true; - } - - @Override - public URL getURL() throws IOException { - return null; - } - - @Override - public URI getURI() throws IOException { - return uri; - } - - @Override - public File getFile() throws IOException { - return null; - } - - @Override - public long contentLength() throws IOException { - return 0; - } - - @Override - public long lastModified() throws IOException { - return 0; - } - - @Override - public Resource createRelative(String relativePath) throws IOException { - return null; - } - - @Override - public String getFilename() { - return resourceId; - } - - @Override - public String getDescription() { - return String.format("Notion %s resource [id=%s]", resourceType, resourceId); - } - - @Override - public InputStream getInputStream() throws IOException { - return inputStream; - } - - public ResourceType getResourceType() { - return resourceType; - } - - public String getResourceId() { - return resourceId; - } - - public String getSource() { - return uri.toString(); - } -} \ No newline at end of file + public static final String SOURCE = "source"; + + private static final String BASE_URL = "https://api.notion.com/v1"; + + private static final String API_VERSION = "2022-06-28"; + + // Resource types + public enum ResourceType { + + PAGE, DATABASE + + } + + private final HttpClient httpClient; + + private final InputStream inputStream; + + private final URI uri; + + private final ResourceType resourceType; + + private final String resourceId; + + private JSONObject metadata; + + /** + * Constructor + * @param notionToken Notion API Token + * @param resourceId Notion resource ID + * @param resourceType Resource type (PAGE or DATABASE) + */ + public NotionResource(String notionToken, String resourceId, ResourceType resourceType) { + Assert.hasText(resourceId, "ResourceId must not be empty"); + Assert.notNull(resourceType, "ResourceType must not be null"); + + this.resourceId = resourceId; + this.resourceType = resourceType; + this.httpClient = HttpClient.newBuilder().version(HttpClient.Version.HTTP_2).build(); + + validateToken(notionToken); + + // Get resource metadata + this.metadata = getResourceMetadata(notionToken, resourceId, resourceType); + + // Get content based on resource type + String content = switch (resourceType) { + case PAGE -> getPageContent(notionToken, resourceId); + case DATABASE -> getDatabaseContent(notionToken, resourceId); + }; + + this.inputStream = new ByteArrayInputStream(content.getBytes()); + this.uri = URI.create(String.format("notion://%s/%s", resourceType.name().toLowerCase(), resourceId)); + } + + /** + * Validate Notion API token + */ + private void validateToken(String notionToken) { + URI uri = URI.create(BASE_URL + "/users/me"); + HttpRequest request = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .uri(uri) + .GET() + .build(); + + try { + HttpResponse response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(response.statusCode() == 200, "Failed to authenticate Notion token"); + } + catch (Exception e) { + throw new RuntimeException("Failed to validate Notion token", e); + } + } + + /** + * Get page content + */ + private String getPageContent(String notionToken, String pageId) { + try { + // 1. Get page content + URI pageUri = URI.create(BASE_URL + "/pages/" + pageId); + HttpRequest pageRequest = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .uri(pageUri) + .GET() + .build(); + + HttpResponse pageResponse = this.httpClient.send(pageRequest, HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(pageResponse.statusCode() == 200, "Failed to fetch page content"); + + // 2. Parse page content + JSONObject pageJson = JSON.parseObject(pageResponse.body()); + StringBuilder content = new StringBuilder(); + + // Extract page title + JSONObject properties = pageJson.getJSONObject("properties"); + if (properties != null && properties.containsKey("title")) { + JSONObject titleProp = properties.getJSONObject("title"); + JSONArray titleArray = titleProp.getJSONArray("title"); + if (titleArray != null && !titleArray.isEmpty()) { + for (int i = 0; i < titleArray.size(); i++) { + content.append(titleArray.getJSONObject(i).getString("plain_text")); + } + content.append("\n\n"); + } + } + + // 3. Get page blocks + URI blocksUri = URI.create(BASE_URL + "/blocks/" + pageId + "/children"); + HttpRequest blocksRequest = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .uri(blocksUri) + .GET() + .build(); + + HttpResponse blocksResponse = this.httpClient.send(blocksRequest, + HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(blocksResponse.statusCode() == 200, "Failed to fetch page blocks"); + + // 4. Parse block content + JSONObject blocksJson = JSON.parseObject(blocksResponse.body()); + JSONArray blocks = blocksJson.getJSONArray("results"); + + // 5. Extract text content + for (int i = 0; i < blocks.size(); i++) { + JSONObject block = blocks.getJSONObject(i); + String type = block.getString("type"); + if (block.containsKey(type)) { + JSONObject typeObj = block.getJSONObject(type); + if (typeObj.containsKey("rich_text")) { + JSONArray richText = typeObj.getJSONArray("rich_text"); + for (int j = 0; j < richText.size(); j++) { + content.append(richText.getJSONObject(j).getString("plain_text")); + } + content.append("\n"); + } + } + } + return content.toString(); + } + catch (Exception e) { + throw new RuntimeException("Failed to get page content", e); + } + } + + /** + * Get database content + */ + private String getDatabaseContent(String notionToken, String databaseId) { + try { + // 1. Query database + URI uri = URI.create(BASE_URL + "/databases/" + databaseId + "/query"); + HttpRequest request = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .header("Content-Type", "application/json") + .uri(uri) + .POST(HttpRequest.BodyPublishers.ofString("{}")) + .build(); + + HttpResponse response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(response.statusCode() == 200, "Failed to fetch database content"); + + // 2. Parse database content + JSONObject jsonResponse = JSON.parseObject(response.body()); + JSONArray results = jsonResponse.getJSONArray("results"); + + // 3. Extract property values + StringBuilder content = new StringBuilder(); + for (int i = 0; i < results.size(); i++) { + JSONObject row = results.getJSONObject(i); + JSONObject properties = row.getJSONObject("properties"); + + for (String propertyName : properties.keySet()) { + JSONObject property = properties.getJSONObject(propertyName); + String type = property.getString("type"); + + if (property.containsKey(type)) { + Object value = property.get(type); + if (value instanceof JSONArray) { + JSONArray array = (JSONArray) value; + for (int j = 0; j < array.size(); j++) { + JSONObject item = array.getJSONObject(j); + if (item.containsKey("plain_text")) { + content.append(propertyName) + .append(": ") + .append(item.getString("plain_text")) + .append("\n"); + } + } + } + } + } + content.append("---\n"); + } + return content.toString(); + } + catch (Exception e) { + throw new RuntimeException("Failed to get database content", e); + } + } + + /** + * Get resource metadata + */ + private JSONObject getResourceMetadata(String notionToken, String resourceId, ResourceType resourceType) { + try { + String endpoint = switch (resourceType) { + case PAGE -> "/pages/"; + case DATABASE -> "/databases/"; + }; + + URI uri = URI.create(BASE_URL + endpoint + resourceId); + HttpRequest request = HttpRequest.newBuilder() + .header("Authorization", "Bearer " + notionToken) + .header("Notion-Version", API_VERSION) + .uri(uri) + .GET() + .build(); + + HttpResponse response = this.httpClient.send(request, HttpResponse.BodyHandlers.ofString()); + Assert.isTrue(response.statusCode() == 200, "Failed to fetch resource metadata"); + + return JSON.parseObject(response.body()); + } + catch (Exception e) { + throw new RuntimeException("Failed to get resource metadata", e); + } + } + + /** + * Get resource metadata + */ + public JSONObject getMetadata() { + return metadata; + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + + private String notionToken; + + private String resourceId; + + private ResourceType resourceType; + + public Builder notionToken(String notionToken) { + this.notionToken = notionToken; + return this; + } + + public Builder resourceId(String resourceId) { + this.resourceId = resourceId; + return this; + } + + public Builder resourceType(ResourceType resourceType) { + this.resourceType = resourceType; + return this; + } + + public NotionResource build() { + Assert.notNull(notionToken, "NotionToken must not be null"); + Assert.notNull(resourceId, "ResourceId must not be null"); + Assert.notNull(resourceType, "ResourceType must not be null"); + return new NotionResource(notionToken, resourceId, resourceType); + } + + } + + @Override + public boolean exists() { + return true; + } + + @Override + public URL getURL() throws IOException { + return null; + } + + @Override + public URI getURI() throws IOException { + return uri; + } + + @Override + public File getFile() throws IOException { + return null; + } + + @Override + public long contentLength() throws IOException { + return 0; + } + + @Override + public long lastModified() throws IOException { + return 0; + } + + @Override + public Resource createRelative(String relativePath) throws IOException { + return null; + } + + @Override + public String getFilename() { + return resourceId; + } + + @Override + public String getDescription() { + return String.format("Notion %s resource [id=%s]", resourceType, resourceId); + } + + @Override + public InputStream getInputStream() throws IOException { + return inputStream; + } + + public ResourceType getResourceType() { + return resourceType; + } + + public String getResourceId() { + return resourceId; + } + + public String getSource() { + return uri.toString(); + } + +} \ No newline at end of file diff --git a/community/document-readers/notion-document-reader/src/test/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReaderIT.java b/community/document-readers/notion-document-reader/src/test/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReaderIT.java index 97bcb8e7..3e444670 100644 --- a/community/document-readers/notion-document-reader/src/test/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReaderIT.java +++ b/community/document-readers/notion-document-reader/src/test/java/com/alibaba/cloud/ai/reader/notion/NotionDocumentReaderIT.java @@ -32,81 +32,78 @@ */ class NotionDocumentReaderIT { - private static final String NOTION_TOKEN = System.getenv("NOTION_TOKEN"); - - // Test page ID - private static final String TEST_PAGE_ID = "${pageId}"; - - // Test database ID - private static final String TEST_DATABASE_ID = "${databaseId}"; - - NotionDocumentReader pageReader; - NotionDocumentReader databaseReader; - - @BeforeEach - public void beforeEach() { - // Create page reader - NotionResource pageResource = NotionResource.builder() - .notionToken(NOTION_TOKEN) - .resourceId(TEST_PAGE_ID) - .resourceType(NotionResource.ResourceType.PAGE) - .build(); - pageReader = new NotionDocumentReader(pageResource); - - // Create database reader - NotionResource databaseResource = NotionResource.builder() - .notionToken(NOTION_TOKEN) - .resourceId(TEST_DATABASE_ID) - .resourceType(NotionResource.ResourceType.DATABASE) - .build(); - databaseReader = new NotionDocumentReader(databaseResource); - } - - @Test - void should_load_page() { - // when - List documents = pageReader.get(); - - // then - assertThat(documents).isNotEmpty(); - Document document = documents.get(0); - - // Verify metadata - assertThat(document.getMetadata()).containsKey(NotionResource.SOURCE); - assertThat(document.getMetadata().get(NotionResource.SOURCE)) - .isEqualTo("notion://page/" + TEST_PAGE_ID); - assertThat(document.getMetadata().get("resourceType")) - .isEqualTo(NotionResource.ResourceType.PAGE.name()); - assertThat(document.getMetadata().get("resourceId")) - .isEqualTo(TEST_PAGE_ID); - - // Verify content - String content = document.getContent(); - assertThat(content).isNotEmpty(); - System.out.println("Page content: " + content); - } - - @Test - void should_load_database() { - // when - List documents = databaseReader.get(); - - // then - assertThat(documents).isNotEmpty(); - Document document = documents.get(0); - - // Verify metadata - assertThat(document.getMetadata()).containsKey(NotionResource.SOURCE); - assertThat(document.getMetadata().get(NotionResource.SOURCE)) - .isEqualTo("notion://database/" + TEST_DATABASE_ID); - assertThat(document.getMetadata().get("resourceType")) - .isEqualTo(NotionResource.ResourceType.DATABASE.name()); - assertThat(document.getMetadata().get("resourceId")) - .isEqualTo(TEST_DATABASE_ID); - - // Verify content - String content = document.getContent(); - assertThat(content).isNotEmpty(); - System.out.println("Database content: " + content); - } -} \ No newline at end of file + private static final String NOTION_TOKEN = System.getenv("NOTION_TOKEN"); + + // Test page ID + private static final String TEST_PAGE_ID = "${pageId}"; + + // Test database ID + private static final String TEST_DATABASE_ID = "${databaseId}"; + + NotionDocumentReader pageReader; + + NotionDocumentReader databaseReader; + + @BeforeEach + public void beforeEach() { + // Create page reader + NotionResource pageResource = NotionResource.builder() + .notionToken(NOTION_TOKEN) + .resourceId(TEST_PAGE_ID) + .resourceType(NotionResource.ResourceType.PAGE) + .build(); + pageReader = new NotionDocumentReader(pageResource); + + // Create database reader + NotionResource databaseResource = NotionResource.builder() + .notionToken(NOTION_TOKEN) + .resourceId(TEST_DATABASE_ID) + .resourceType(NotionResource.ResourceType.DATABASE) + .build(); + databaseReader = new NotionDocumentReader(databaseResource); + } + + @Test + void should_load_page() { + // when + List documents = pageReader.get(); + + // then + assertThat(documents).isNotEmpty(); + Document document = documents.get(0); + + // Verify metadata + assertThat(document.getMetadata()).containsKey(NotionResource.SOURCE); + assertThat(document.getMetadata().get(NotionResource.SOURCE)).isEqualTo("notion://page/" + TEST_PAGE_ID); + assertThat(document.getMetadata().get("resourceType")).isEqualTo(NotionResource.ResourceType.PAGE.name()); + assertThat(document.getMetadata().get("resourceId")).isEqualTo(TEST_PAGE_ID); + + // Verify content + String content = document.getContent(); + assertThat(content).isNotEmpty(); + System.out.println("Page content: " + content); + } + + @Test + void should_load_database() { + // when + List documents = databaseReader.get(); + + // then + assertThat(documents).isNotEmpty(); + Document document = documents.get(0); + + // Verify metadata + assertThat(document.getMetadata()).containsKey(NotionResource.SOURCE); + assertThat(document.getMetadata().get(NotionResource.SOURCE)) + .isEqualTo("notion://database/" + TEST_DATABASE_ID); + assertThat(document.getMetadata().get("resourceType")).isEqualTo(NotionResource.ResourceType.DATABASE.name()); + assertThat(document.getMetadata().get("resourceId")).isEqualTo(TEST_DATABASE_ID); + + // Verify content + String content = document.getContent(); + assertThat(content).isNotEmpty(); + System.out.println("Database content: " + content); + } + +} \ No newline at end of file