diff --git a/build.gradle.kts b/build.gradle.kts index 6bedbc75..55af9136 100644 --- a/build.gradle.kts +++ b/build.gradle.kts @@ -146,6 +146,8 @@ dependencies { jacocoAggregation(project(":chapi-ast-kotlin")) jacocoAggregation(project(":chapi-ast-scala")) jacocoAggregation(project(":chapi-ast-cpp")) + jacocoAggregation(project(":chapi-ast-protobuf")) + jacocoAggregation(project(":chapi-parser-toml")) jacocoAggregation(project(":chapi-parser-cmake")) } diff --git a/chapi-ast-protobuf/build.gradle.kts b/chapi-ast-protobuf/build.gradle.kts new file mode 100644 index 00000000..bc85b1cd --- /dev/null +++ b/chapi-ast-protobuf/build.gradle.kts @@ -0,0 +1,60 @@ +plugins { + id("antlr") + java + kotlin("jvm") + kotlin("plugin.serialization") version "1.6.10" + + `jacoco-conventions` +} + +repositories { + mavenCentral() + mavenLocal() +} + +dependencies { + antlr("org.antlr:antlr4:4.13.1") + + // project deps + implementation(project(":chapi-domain")) + + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.3.2") + + implementation(kotlin("stdlib-jdk8")) + implementation(kotlin("reflect")) + // Kotlin reflection. + testImplementation(kotlin("test")) + + // JUnit 5 + testImplementation("org.junit.jupiter:junit-jupiter-api:5.6.0") + testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:5.6.0") + testRuntimeOnly("org.junit.platform:junit-platform-console:1.6.0") + + implementation("org.antlr:antlr4:4.13.1") + implementation("org.antlr:antlr4-runtime:4.13.1") +} + +sourceSets.main { + java.srcDirs("${project.buildDir}/generated-src") +} + +tasks.generateGrammarSource { + maxHeapSize = "64m" + arguments = arguments + listOf("-package", "chapi.ast.antlr") + listOf("-visitor", "-long-messages") + outputDirectory = file("${project.buildDir}/generated-src/chapi/ast/antlr") +} + +tasks.withType { + +} + +tasks.named("compileKotlin") { + dependsOn(tasks.withType()) +} + +tasks.withType { + useJUnitPlatform() + testLogging { + events("passed", "skipped", "failed") + } +} diff --git a/chapi-ast-protobuf/src/main/antlr/Protobuf3.g4 b/chapi-ast-protobuf/src/main/antlr/Protobuf3.g4 new file mode 100644 index 00000000..bb7302d1 --- /dev/null +++ b/chapi-ast-protobuf/src/main/antlr/Protobuf3.g4 @@ -0,0 +1,662 @@ +// SPDX-License-Identifier: Apache-2.0 +/** + * A Protocol Buffers 3 grammar + * + * Original source: https://developers.google.com/protocol-buffers/docs/reference/proto3-spec + * Original source is published under Apache License 2.0. + * + * Changes from the source above: + * - rewrite to antlr + * - extract some group to rule. + * + * @author anatawa12 + */ + +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + +grammar Protobuf3; + +proto + : syntax (importStatement | packageStatement | optionStatement | topLevelDef | emptyStatement_)* EOF + ; + +// Syntax + +syntax + : SYNTAX EQ (PROTO3_LIT_SINGLE | PROTO3_LIT_DOBULE) SEMI + ; + +// Import Statement + +importStatement + : IMPORT (WEAK | PUBLIC)? strLit SEMI + ; + +// Package + +packageStatement + : PACKAGE fullIdent SEMI + ; + +// Option + +optionStatement + : OPTION optionName EQ constant SEMI + ; + +optionName + : fullIdent + | LP fullIdent RP ( DOT fullIdent)? + ; + +// Normal Field +fieldLabel + : OPTIONAL + | REPEATED + ; + +field + : fieldLabel? type_ fieldName EQ fieldNumber (LB fieldOptions RB)? SEMI + ; + +fieldOptions + : fieldOption (COMMA fieldOption)* + ; + +fieldOption + : optionName EQ constant + ; + +fieldNumber + : intLit + ; + +// Oneof and oneof field + +oneof + : ONEOF oneofName LC (optionStatement | oneofField | emptyStatement_)* RC + ; + +oneofField + : type_ fieldName EQ fieldNumber (LB fieldOptions RB)? SEMI + ; + +// Map field + +mapField + : MAP LT keyType COMMA type_ GT mapName EQ fieldNumber (LB fieldOptions RB)? SEMI + ; + +keyType + : INT32 + | INT64 + | UINT32 + | UINT64 + | SINT32 + | SINT64 + | FIXED32 + | FIXED64 + | SFIXED32 + | SFIXED64 + | BOOL + | STRING + ; + +// field types + +type_ + : DOUBLE + | FLOAT + | INT32 + | INT64 + | UINT32 + | UINT64 + | SINT32 + | SINT64 + | FIXED32 + | FIXED64 + | SFIXED32 + | SFIXED64 + | BOOL + | STRING + | BYTES + | messageType + | enumType + ; + +// Reserved + +reserved + : RESERVED (ranges | reservedFieldNames) SEMI + ; + +ranges + : range_ (COMMA range_)* + ; + +range_ + : intLit (TO ( intLit | MAX))? + ; + +reservedFieldNames + : strLit (COMMA strLit)* + ; + +// Top Level definitions + +topLevelDef + : messageDef + | enumDef + | extendDef + | serviceDef + ; + +// enum + +enumDef + : ENUM enumName enumBody + ; + +enumBody + : LC enumElement* RC + ; + +enumElement + : optionStatement + | enumField + | emptyStatement_ + ; + +enumField + : ident EQ (MINUS)? intLit enumValueOptions? SEMI + ; + +enumValueOptions + : LB enumValueOption (COMMA enumValueOption)* RB + ; + +enumValueOption + : optionName EQ constant + ; + +// message + +messageDef + : MESSAGE messageName messageBody + ; + +messageBody + : LC messageElement* RC + ; + +messageElement + : field + | enumDef + | messageDef + | extendDef + | optionStatement + | oneof + | mapField + | reserved + | emptyStatement_ + ; + +// Extend definition +// +// NB: not defined in the spec but supported by protoc and covered by protobuf3 tests +// see e.g. php/tests/proto/test_import_descriptor_proto.proto +// of https://github.com/protocolbuffers/protobuf +// it also was discussed here: https://github.com/protocolbuffers/protobuf/issues/4610 + +extendDef + : EXTEND messageType LC (field | emptyStatement_)* RC + ; + +// service + +serviceDef + : SERVICE serviceName LC serviceElement* RC + ; + +serviceElement + : optionStatement + | rpc + | emptyStatement_ + ; + +rpc + : RPC rpcName LP (STREAM)? messageType RP RETURNS LP (STREAM)? messageType RP ( + LC ( optionStatement | emptyStatement_)* RC + | SEMI + ) + ; + +// lexical + +constant + : fullIdent + | (MINUS | PLUS)? intLit + | ( MINUS | PLUS)? floatLit + | strLit + | boolLit + | blockLit + ; + +// not specified in specification but used in tests +blockLit + : LC (ident COLON constant)* RC + ; + +emptyStatement_ + : SEMI + ; + +// Lexical elements + +ident + : IDENTIFIER + | keywords + ; + +fullIdent + : ident (DOT ident)* + ; + +messageName + : ident + ; + +enumName + : ident + ; + +fieldName + : ident + ; + +oneofName + : ident + ; + +mapName + : ident + ; + +serviceName + : ident + ; + +rpcName + : ident + ; + +messageType + : (DOT)? (ident DOT)* messageName + ; + +enumType + : (DOT)? (ident DOT)* enumName + ; + +intLit + : INT_LIT + ; + +strLit + : STR_LIT + | PROTO3_LIT_SINGLE + | PROTO3_LIT_DOBULE + ; + +boolLit + : BOOL_LIT + ; + +floatLit + : FLOAT_LIT + ; + +// keywords +SYNTAX + : 'syntax' + ; + +IMPORT + : 'import' + ; + +WEAK + : 'weak' + ; + +PUBLIC + : 'public' + ; + +PACKAGE + : 'package' + ; + +OPTION + : 'option' + ; + +OPTIONAL + : 'optional' + ; + +REPEATED + : 'repeated' + ; + +ONEOF + : 'oneof' + ; + +MAP + : 'map' + ; + +INT32 + : 'int32' + ; + +INT64 + : 'int64' + ; + +UINT32 + : 'uint32' + ; + +UINT64 + : 'uint64' + ; + +SINT32 + : 'sint32' + ; + +SINT64 + : 'sint64' + ; + +FIXED32 + : 'fixed32' + ; + +FIXED64 + : 'fixed64' + ; + +SFIXED32 + : 'sfixed32' + ; + +SFIXED64 + : 'sfixed64' + ; + +BOOL + : 'bool' + ; + +STRING + : 'string' + ; + +DOUBLE + : 'double' + ; + +FLOAT + : 'float' + ; + +BYTES + : 'bytes' + ; + +RESERVED + : 'reserved' + ; + +TO + : 'to' + ; + +MAX + : 'max' + ; + +ENUM + : 'enum' + ; + +MESSAGE + : 'message' + ; + +SERVICE + : 'service' + ; + +EXTEND + : 'extend' + ; + +RPC + : 'rpc' + ; + +STREAM + : 'stream' + ; + +RETURNS + : 'returns' + ; + +PROTO3_LIT_SINGLE + : '"proto3"' + ; + +PROTO3_LIT_DOBULE + : '\'proto3\'' + ; + +// symbols + +SEMI + : ';' + ; + +EQ + : '=' + ; + +LP + : '(' + ; + +RP + : ')' + ; + +LB + : '[' + ; + +RB + : ']' + ; + +LC + : '{' + ; + +RC + : '}' + ; + +LT + : '<' + ; + +GT + : '>' + ; + +DOT + : '.' + ; + +COMMA + : ',' + ; + +COLON + : ':' + ; + +PLUS + : '+' + ; + +MINUS + : '-' + ; + +STR_LIT + : ('\'' ( CHAR_VALUE)*? '\'') + | ( '"' ( CHAR_VALUE)*? '"') + ; + +fragment CHAR_VALUE + : HEX_ESCAPE + | OCT_ESCAPE + | CHAR_ESCAPE + | ~[\u0000\n\\] + ; + +fragment HEX_ESCAPE + : '\\' ('x' | 'X') HEX_DIGIT HEX_DIGIT + ; + +fragment OCT_ESCAPE + : '\\' OCTAL_DIGIT OCTAL_DIGIT OCTAL_DIGIT + ; + +fragment CHAR_ESCAPE + : '\\' ('a' | 'b' | 'f' | 'n' | 'r' | 't' | 'v' | '\\' | '\'' | '"') + ; + +BOOL_LIT + : 'true' + | 'false' + ; + +FLOAT_LIT + : (DECIMALS DOT DECIMALS? EXPONENT? | DECIMALS EXPONENT | DOT DECIMALS EXPONENT?) + | 'inf' + | 'nan' + ; + +fragment EXPONENT + : ('e' | 'E') (PLUS | MINUS)? DECIMALS + ; + +fragment DECIMALS + : DECIMAL_DIGIT+ + ; + +INT_LIT + : DECIMAL_LIT + | OCTAL_LIT + | HEX_LIT + ; + +fragment DECIMAL_LIT + : ([1-9]) DECIMAL_DIGIT* + ; + +fragment OCTAL_LIT + : '0' OCTAL_DIGIT* + ; + +fragment HEX_LIT + : '0' ('x' | 'X') HEX_DIGIT+ + ; + +IDENTIFIER + : LETTER (LETTER | DECIMAL_DIGIT)* + ; + +fragment LETTER + : [A-Za-z_] + ; + +fragment DECIMAL_DIGIT + : [0-9] + ; + +fragment OCTAL_DIGIT + : [0-7] + ; + +fragment HEX_DIGIT + : [0-9A-Fa-f] + ; + +// comments +WS + : [ \t\r\n\u000C]+ -> skip + ; + +LINE_COMMENT + : '//' ~[\r\n]* -> channel(HIDDEN) + ; + +COMMENT + : '/*' .*? '*/' -> channel(HIDDEN) + ; + +keywords + : SYNTAX + | IMPORT + | WEAK + | PUBLIC + | PACKAGE + | OPTION + | OPTIONAL + | REPEATED + | ONEOF + | MAP + | INT32 + | INT64 + | UINT32 + | UINT64 + | SINT32 + | SINT64 + | FIXED32 + | FIXED64 + | SFIXED32 + | SFIXED64 + | BOOL + | STRING + | DOUBLE + | FLOAT + | BYTES + | RESERVED + | TO + | MAX + | ENUM + | MESSAGE + | SERVICE + | EXTEND + | RPC + | STREAM + | RETURNS + | BOOL_LIT + ; \ No newline at end of file diff --git a/chapi-ast-protobuf/src/main/kotlin/chapi/ast/protobuf/ProtobufAnalyser.kt b/chapi-ast-protobuf/src/main/kotlin/chapi/ast/protobuf/ProtobufAnalyser.kt new file mode 100644 index 00000000..462bf207 --- /dev/null +++ b/chapi-ast-protobuf/src/main/kotlin/chapi/ast/protobuf/ProtobufAnalyser.kt @@ -0,0 +1,27 @@ +package chapi.ast.protobuf + +import chapi.ast.antlr.Protobuf3Lexer +import chapi.ast.antlr.Protobuf3Parser +import chapi.domain.core.CodeContainer +import chapi.parser.Analyser +import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.CommonTokenStream +import org.antlr.v4.runtime.tree.ParseTreeWalker + +class ProtobufAnalyser: Analyser { + override fun analysis(code: String, filePath: String): CodeContainer { + val context = this.parse(code).proto() + val listener = ProtobufFullIdentListener(fileName = filePath) + ParseTreeWalker().walk(listener, context) + + return listener.getNodeInfo() + } + + private fun parse(str: String): Protobuf3Parser { + val fromString = CharStreams.fromString(str) + val lexer = Protobuf3Lexer(fromString) + val tokenStream = CommonTokenStream(lexer) + val parser = Protobuf3Parser(tokenStream) + return parser + } +} diff --git a/chapi-ast-protobuf/src/main/kotlin/chapi/ast/protobuf/ProtobufFullIdentListener.kt b/chapi-ast-protobuf/src/main/kotlin/chapi/ast/protobuf/ProtobufFullIdentListener.kt new file mode 100644 index 00000000..2d91a3de --- /dev/null +++ b/chapi-ast-protobuf/src/main/kotlin/chapi/ast/protobuf/ProtobufFullIdentListener.kt @@ -0,0 +1,29 @@ +package chapi.ast.protobuf + +import chapi.ast.antlr.Protobuf3BaseListener +import chapi.ast.antlr.Protobuf3Parser +import chapi.domain.core.CodeContainer +import chapi.domain.core.CodeDataStruct + +class ProtobufFullIdentListener(var fileName: String) : Protobuf3BaseListener() { + private var codeContainer: CodeContainer = CodeContainer(FullName = fileName) + + override fun enterPackageStatement(ctx: Protobuf3Parser.PackageStatementContext) { + val packageName = ctx.fullIdent().text + codeContainer.PackageName = packageName + } + + override fun enterMessageDef(ctx: Protobuf3Parser.MessageDefContext?) { + val messageName = ctx!!.messageName().text + codeContainer.DataStructures += CodeDataStruct( + NodeName = messageName, + Module = codeContainer.PackageName, + FilePath = codeContainer.FullName, + Package = codeContainer.PackageName + ) + } + + fun getNodeInfo(): CodeContainer { + return codeContainer + } +} diff --git a/chapi-ast-protobuf/src/test/kotlin/chapi/ast/protobuf/ProtobufAnalyserTest.kt b/chapi-ast-protobuf/src/test/kotlin/chapi/ast/protobuf/ProtobufAnalyserTest.kt new file mode 100644 index 00000000..2f2ba616 --- /dev/null +++ b/chapi-ast-protobuf/src/test/kotlin/chapi/ast/protobuf/ProtobufAnalyserTest.kt @@ -0,0 +1,25 @@ +// Save this code in a file named ProtobufAnalyserTest.kt under test directory + +package chapi.ast.protobuf + +import org.junit.jupiter.api.Test +import org.junit.jupiter.api.Assertions.* + +class ProtobufAnalyserTest { + + @Test + fun `should parse valid protobuf code and return a CodeContainer`() { + // Given + val protobufCode = "syntax = \"proto3\";\npackage example;\n\nmessage Person {\n string name = 1;\n int32 id = 2;\n}" + val filePath = "path/to/file.proto" + val analyser = ProtobufAnalyser() + + // When + val codeContainer = analyser.analysis(protobufCode, filePath) + + // Then + assertNotNull(codeContainer) + assertEquals("example", codeContainer.PackageName) + assertTrue(codeContainer.DataStructures.isNotEmpty()) + } +} diff --git a/chapi-ast-protobuf/src/test/resources/common.proto b/chapi-ast-protobuf/src/test/resources/common.proto new file mode 100644 index 00000000..bb82e992 --- /dev/null +++ b/chapi-ast-protobuf/src/test/resources/common.proto @@ -0,0 +1,9 @@ +syntax = "proto3"; + +package common; + +enum Category { + ELECTRONICS = 0; + CLOTHING = 1; + FOOD = 2; +} diff --git a/chapi-ast-protobuf/src/test/resources/gpt-4o-user.proto b/chapi-ast-protobuf/src/test/resources/gpt-4o-user.proto new file mode 100644 index 00000000..f0d00ea3 --- /dev/null +++ b/chapi-ast-protobuf/src/test/resources/gpt-4o-user.proto @@ -0,0 +1,71 @@ +syntax = "proto3"; + +package api.v1; + +// 定义 API DTO 对象,通常反映领域模型的数据 +message UserDTO { + string id = 1; // 用户唯一标识符,通常使用 string 而非 int32 + string name = 2; // 用户名称 + repeated string roles = 3; // 用户角色列表 + map metadata = 4; // 用于存储键值对的附加元数据 + Status status = 5; // 枚举类型,用户状态 + ContactInfo contact_info = 6; // 嵌套消息,用于联系信息 + + // 嵌套的联系信息(作为 DTO 的一部分) + message ContactInfo { + string email = 1; + string phone = 2; + } +} + +// 用户状态枚举,符合常见 API 模型中状态设计 +enum Status { + ACTIVE = 0; // 用户激活 + INACTIVE = 1; // 用户非活跃 + DELETED = 2; // 用户被删除 +} + +// 定义标准的 CRUD API 接口 +service UserService { + // 获取用户详细信息 + rpc GetUser(GetUserRequest) returns (GetUserResponse); + + // 创建新用户 + rpc CreateUser(CreateUserRequest) returns (UserResponse); + + // 更新现有用户 + rpc UpdateUser(UpdateUserRequest) returns (UserResponse); + + // 删除用户 + rpc DeleteUser(DeleteUserRequest) returns (DeleteUserResponse); +} + +// 请求和响应对象设计 +message GetUserRequest { + string user_id = 1; // 通过用户 ID 来获取用户信息 +} + +message GetUserResponse { + UserDTO user = 1; // 返回用户 DTO 对象 +} + +message CreateUserRequest { + UserDTO user = 1; // 传递完整的用户信息来创建用户 +} + +message UpdateUserRequest { + UserDTO user = 1; // 更新时传递完整的用户信息 +} + +message DeleteUserRequest { + string user_id = 1; // 通过用户 ID 来删除用户 +} + +message DeleteUserResponse { + bool success = 1; // 是否成功删除用户 +} + +// 通用响应消息 +message UserResponse { + UserDTO user = 1; // 返回用户 DTO 对象 +} diff --git a/chapi-ast-protobuf/src/test/resources/gpt-o1-user.proto b/chapi-ast-protobuf/src/test/resources/gpt-o1-user.proto new file mode 100644 index 00000000..4d3fc5dc --- /dev/null +++ b/chapi-ast-protobuf/src/test/resources/gpt-o1-user.proto @@ -0,0 +1,138 @@ +syntax = "proto3"; + +package example; + +import "google/protobuf/descriptor.proto"; +import "common.proto"; // 假设存在此文件 + +// 基本的用户消息 +message User { + int32 id = 1; + string name = 2; + string email = 3; +} + +// 包含枚举和嵌套消息的订单消息 +message Order { + int32 order_id = 1; + Status status = 2; + repeated Item items = 3; + + enum Status { + PENDING = 0; + PROCESSING = 1; + COMPLETED = 2; + CANCELLED = 3; + } + + message Item { + int32 item_id = 1; + string description = 2; + float price = 3; + } +} + +// 包含Oneof和Map类型的响应消息 +message Response { + oneof result { + Success success = 1; + Error error = 2; + } +} + +message Success { + string message = 1; + map data = 2; +} + +message Error { + int32 code = 1; + string message = 2; +} + +// 包含默认值和扩展的人员消息(Proto3中默认值已被移除,这里仅作为示例) +message Person { + string name = 1; + int32 age = 2; + repeated string emails = 3; +} + +// 扩展字段选项 +extend google.protobuf.FieldOptions { + string custom_option = 50001; +} + +message Employee { + string position = 1 [(custom_option) = "Manager"]; + Person person = 2; +} + +// 产品消息,包含外部引用和包名冲突的处理 +message Product { + int32 id = 1; + string name = 2; + common.Category category = 3; // 引用外部定义的枚举 +} + +// 包含Reserved关键字和Deprecated字段的旧消息 +message LegacyMessage { + int32 id = 1; + string old_field = 2 [deprecated = true]; + reserved 3, 4; // 以前使用过的字段号码 + reserved "old_name"; // 以前使用过的字段名称 + + string new_field = 5; +} + +// 基础消息 +message BaseMessage { + int32 id = 1; +} + +// 由于Proto3不支持继承(extends),这里使用组合的方式 +message DerivedMessage { + BaseMessage base = 1; + string info = 2; +} + +// 包含基础消息的容器消息 +message ContainerMessage { + repeated BaseMessage messages = 1; +} + +// 服务定义和流式RPC +service ChatService { + rpc Chat(stream ChatMessage) returns (stream ChatMessage); +} + +// 聊天消息 +message ChatMessage { + string user = 1; + string message = 2; + int64 timestamp = 3; +} + +// 自引用的节点消息,实现循环依赖 +message Node { + int32 id = 1; + repeated Node children = 2; +} + +// 包含所有字段类型的消息 +message AllTypes { + double a_double = 1; + float a_float = 2; + int32 an_int32 = 3; + int64 an_int64 = 4; + uint32 a_uint32 = 5; + uint64 a_uint64 = 6; + sint32 a_sint32 = 7; + sint64 a_sint64 = 8; + fixed32 a_fixed32 = 9; + fixed64 a_fixed64 = 10; + sfixed32 a_sfixed32 = 11; + sfixed64 a_sfixed64 = 12; + bool a_bool = 13; + string a_string = 14; + bytes a_bytes = 15; +} diff --git a/settings.gradle.kts b/settings.gradle.kts index c1ba7941..58ec72b1 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -6,6 +6,9 @@ include( // tier 1 languages ":chapi-ast-java", ":chapi-ast-typescript", + + // tier 1 model language + ":chapi-ast-protobuf", // tier 2 languages ":chapi-ast-kotlin",