Skip to content

Commit

Permalink
Merge pull request #399 from alibaba/0211-yuluo/update-saa-core
Browse files Browse the repository at this point in the history
feat: optimize spring-ai-alibaba-core
  • Loading branch information
yuluo-yx authored Feb 15, 2025
2 parents dfe329f + 25010ea commit c181648
Show file tree
Hide file tree
Showing 42 changed files with 341 additions and 2,860 deletions.
2 changes: 2 additions & 0 deletions .licenserc.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -72,5 +72,7 @@ header:
- '**/*.st'
- '**/*.eml'
- '**/*.mbox'
- '**/resource/**'
- '**/pom.xml'

comment: never
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,6 @@
*/
package com.alibaba.cloud.ai.reader.chatgpt.data;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.core.io.Resource;
import org.springframework.util.StreamUtils;

import java.io.File;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
Expand All @@ -32,12 +23,20 @@
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;

import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;

import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;

/**
* Document reader for loading exported ChatGPT conversation data
Expand All @@ -50,46 +49,30 @@ public class ChatGptDataDocumentReader implements DocumentReader {

private final int numLogs;

/**
* Initialize ChatGPT data loader with file path
* @param logFilePath The path to the log file
* @param numLogs Number of logs to load, load all if 0
*/
private final ObjectMapper objectMapper = new ObjectMapper();

public ChatGptDataDocumentReader(String logFilePath, int numLogs) {
this.logFilePath = logFilePath;
this.numLogs = numLogs;
}

/**
* Initialize ChatGPT data loader with file path, load all logs
* @param logFilePath The path to the log file
*/
public ChatGptDataDocumentReader(String logFilePath) {
this(logFilePath, 0);
}

/**
* Format message content into readable string
* @param message Message object to format
* @param title Conversation title
* @return Formatted message string
*/
private String concatenateRows(JSONObject message, String title) {
private String concatenateRows(JsonNode message, String title) {
if (message == null || message.isEmpty()) {
return "";
}

// Get sender role
JSONObject author = message.getJSONObject("author");
String sender = author != null ? author.getString("role") : "unknown";
JsonNode author = message.get("author");
String sender = author != null ? author.get("role").asText() : "unknown";

// Get message content
JSONObject content = message.getJSONObject("content");
JSONArray parts = content.getJSONArray("parts");
String text = parts.getString(0);
JsonNode content = message.get("content");
JsonNode parts = content.get("parts");
String text = parts.get(0).asText();

// Get and format timestamp
long createTime = message.getLongValue("create_time");
long createTime = message.get("create_time").asLong();
LocalDateTime dateTime = LocalDateTime.ofInstant(Instant.ofEpochSecond(createTime), ZoneId.systemDefault());
String formattedDate = dateTime.format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"));

Expand All @@ -99,37 +82,34 @@ private String concatenateRows(JSONObject message, String title) {
@Override
public List<Document> get() {
try {
// Read JSON file content
String jsonContent = Files.readString(Paths.get(logFilePath), StandardCharsets.UTF_8);
JSONArray data = JSON.parseArray(jsonContent);
List<Document> documents = new ArrayList<>();
JsonNode data = objectMapper.readTree(jsonContent);
List<Document> documents;

// Limit number of logs if specified
int limit = numLogs > 0 ? Math.min(numLogs, data.size()) : data.size();

// Process first limit entries using Stream API
documents = IntStream.range(0, limit).mapToObj(i -> {
// Get conversation data
JSONObject conversation = data.getJSONObject(i);
String title = conversation.getString("title");
JSONObject messages = conversation.getJSONObject("mapping");

// Process messages using Stream
String text = messages.keySet().stream().map(key -> {
JSONObject messageWrapper = messages.getJSONObject(key);
JSONObject message = messageWrapper.getJSONObject("message");

// Skip first system role message
if ("0".equals(key) && "system".equals(message.getJSONObject("author").getString("role"))) {
return "";
}
return concatenateRows(message, title);
}).filter(s -> !s.isEmpty()).collect(Collectors.joining());
JsonNode conversation = data.get(i);
String title = conversation.get("title").asText();
JsonNode messages = conversation.get("mapping");

String text = StreamSupport
.stream(Spliterators.spliteratorUnknownSize(messages.fieldNames(), Spliterator.ORDERED), false)
.map(key -> {
JsonNode messageWrapper = messages.get(key);
JsonNode message = messageWrapper.get("message");

if ("0".equals(key) && "system".equals(message.get("author").get("role").asText())) {
return "";
}
return concatenateRows(message, title);
})
.filter(s -> !s.isEmpty())
.collect(Collectors.joining());

// Create document metadata
Map<String, Object> metadata = new HashMap<>();
metadata.put("source", logFilePath);

// Return new Document object
return new Document(text, metadata);
}).collect(Collectors.toList());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,11 +15,6 @@
*/
package com.alibaba.cloud.ai.reader.notion;

import com.alibaba.fastjson.JSONObject;
import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.util.StringUtils;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
Expand All @@ -29,17 +24,24 @@
import java.util.List;
import java.util.Map;

import com.fasterxml.jackson.databind.JsonNode;

import org.springframework.ai.document.Document;
import org.springframework.ai.document.DocumentReader;
import org.springframework.util.StringUtils;

/**
* Notion Document Reader Implements DocumentReader interface to read content from Notion
*
* @author xiadong
* @since 2024-01-06
*/

public class NotionDocumentReader implements DocumentReader {

private final NotionResource notionResource;

private final JSONObject pageMetadata;
private final JsonNode pageMetadata;

/**
* Constructor
Expand Down Expand Up @@ -81,21 +83,21 @@ private Map<String, Object> buildMetadata() {
// Add metadata from Notion API
if (pageMetadata != null) {
// Creation and update times
String createdTime = pageMetadata.getString("created_time");
String createdTime = pageMetadata.get("created_time").asText();
if (StringUtils.hasText(createdTime)) {
metadata.put("createdTime", Instant.parse(createdTime).toEpochMilli());
}

String lastEditedTime = pageMetadata.getString("last_edited_time");
String lastEditedTime = pageMetadata.get("last_edited_time").asText();
if (StringUtils.hasText(lastEditedTime)) {
metadata.put("lastEditedTime", Instant.parse(lastEditedTime).toEpochMilli());
}

// Creator and last editor
JSONObject createdBy = pageMetadata.getJSONObject("created_by");
JsonNode createdBy = pageMetadata.get("created_by");
if (createdBy != null) {
String creatorName = createdBy.getString("name");
String creatorId = createdBy.getString("id");
String creatorName = createdBy.get("name").asText();
String creatorId = createdBy.get("id").asText();
if (StringUtils.hasText(creatorName)) {
metadata.put("createdBy", creatorName);
}
Expand All @@ -104,10 +106,10 @@ private Map<String, Object> buildMetadata() {
}
}

JSONObject lastEditedBy = pageMetadata.getJSONObject("last_edited_by");
JsonNode lastEditedBy = pageMetadata.get("last_edited_by");
if (lastEditedBy != null) {
String editorName = lastEditedBy.getString("name");
String editorId = lastEditedBy.getString("id");
String editorName = lastEditedBy.get("name").asText();
String editorId = lastEditedBy.get("id").asText();
if (StringUtils.hasText(editorName)) {
metadata.put("lastEditedBy", editorName);
}
Expand All @@ -117,29 +119,29 @@ private Map<String, Object> buildMetadata() {
}

// URL
String url = pageMetadata.getString("url");
String url = pageMetadata.get("url").asText();
if (StringUtils.hasText(url)) {
metadata.put("url", url);
}

// Parent information
JSONObject parent = pageMetadata.getJSONObject("parent");
JsonNode parent = pageMetadata.get("parent");
if (parent != null) {
String parentType = parent.getString("type");
String parentType = parent.get("type").asText();
if (StringUtils.hasText(parentType)) {
metadata.put("parentType", parentType);
String parentId = parent.getString(parentType + "_id");
String parentId = parent.get(parentType + "_id").asText();
if (StringUtils.hasText(parentId)) {
metadata.put("parentId", parentId);
}
}
}

// Icon
JSONObject icon = pageMetadata.getJSONObject("icon");
JsonNode icon = pageMetadata.get("icon");
if (icon != null) {
String iconType = icon.getString("type");
String iconUrl = icon.getString("url");
String iconType = icon.get("type").asText();
String iconUrl = icon.get("url").asText();
if (StringUtils.hasText(iconType)) {
metadata.put("iconType", iconType);
}
Expand All @@ -149,10 +151,10 @@ private Map<String, Object> buildMetadata() {
}

// Cover
JSONObject cover = pageMetadata.getJSONObject("cover");
JsonNode cover = pageMetadata.get("cover");
if (cover != null) {
String coverType = cover.getString("type");
String coverUrl = cover.getString("url");
String coverType = cover.get("type").asText();
String coverUrl = cover.get("url").asText();
if (StringUtils.hasText(coverType)) {
metadata.put("coverType", coverType);
}
Expand All @@ -179,4 +181,4 @@ private String readContent() throws IOException {
return content.toString();
}

}
}
Loading

0 comments on commit c181648

Please # to comment.