From 534d7f8c3ea03b5965b879ceececc9158c7d6199 Mon Sep 17 00:00:00 2001 From: Tom Yu Date: Sat, 30 Dec 2023 00:37:16 +0800 Subject: [PATCH] Fix log content duplication in ParseJsonNativeProcessor (#1295) This commit resolves an issue in ParseJsonNativeProcessor where the original log content was unintentionally retained in the "content" field due to improper state management. The processor incorrectly maintained a class-level state indicating whether the "content" key was overwritten during JSON parsing. While this state should be reset for each new log, it was persistently kept across logs. Consequently, if a JSON log contained a "content" key, the processor would mark the state as overwritten and not drop the "content" field in subsequent logs, leading to duplicated content. To address this, the state tracking the "content" key overwrite is moved from class level to local level, ensuring it is reset at the start of each log parsing operation. --- core/processor/ProcessorParseJsonNative.cpp | 17 +++--- core/processor/ProcessorParseJsonNative.h | 7 ++- .../ProcessorParseJsonNativeUnittest.cpp | 58 +++++++++++++++++++ 3 files changed, 73 insertions(+), 9 deletions(-) diff --git a/core/processor/ProcessorParseJsonNative.cpp b/core/processor/ProcessorParseJsonNative.cpp index d993e79be9..6241b1faa9 100644 --- a/core/processor/ProcessorParseJsonNative.cpp +++ b/core/processor/ProcessorParseJsonNative.cpp @@ -77,8 +77,9 @@ bool ProcessorParseJsonNative::ProcessEvent(const StringView& logPath, PipelineE auto rawContent = sourceEvent.GetContent(mSourceKey); - bool res = true; - res = JsonLogLineParser(sourceEvent, logPath, e); + bool sourceKeyOverwritten = mSourceKeyOverwritten; + bool rawLogTagOverwritten = false; + bool res = JsonLogLineParser(sourceEvent, logPath, e, sourceKeyOverwritten, rawLogTagOverwritten); if (!res && !mDiscardUnmatch) { AddLog(LogParser::UNMATCH_LOG_KEY, // __raw_log__ @@ -86,10 +87,10 @@ bool ProcessorParseJsonNative::ProcessEvent(const StringView& logPath, PipelineE sourceEvent); // legacy behavior, should use sourceKey } if (res || !mDiscardUnmatch) { - if (mUploadRawLog && (!res || !mRawLogTagOverwritten)) { + if (mUploadRawLog && (!res || !rawLogTagOverwritten)) { AddLog(mRawLogTag, rawContent, sourceEvent); // __raw__ } - if (res && !mSourceKeyOverwritten) { + if (res && !sourceKeyOverwritten) { sourceEvent.DelContent(mSourceKey); } return true; @@ -100,7 +101,9 @@ bool ProcessorParseJsonNative::ProcessEvent(const StringView& logPath, PipelineE bool ProcessorParseJsonNative::JsonLogLineParser(LogEvent& sourceEvent, const StringView& logPath, - PipelineEventPtr& e) { + PipelineEventPtr& e, + bool& sourceKeyOverwritten, + bool& rawLogTagOverwritten) { StringView buffer = sourceEvent.GetContent(mSourceKey); if (buffer.empty()) @@ -153,10 +156,10 @@ bool ProcessorParseJsonNative::JsonLogLineParser(LogEvent& sourceEvent, StringBuffer contentValueBuffer = sourceEvent.GetSourceBuffer()->CopyString(contentValue); if (contentKey.c_str() == mSourceKey) { - mSourceKeyOverwritten = true; + sourceKeyOverwritten = true; } if (contentKey.c_str() == mRawLogTag) { - mRawLogTagOverwritten = true; + rawLogTagOverwritten = true; } AddLog(StringView(contentKeyBuffer.data, contentKeyBuffer.size), diff --git a/core/processor/ProcessorParseJsonNative.h b/core/processor/ProcessorParseJsonNative.h index 113e7ed786..3bfee8890b 100644 --- a/core/processor/ProcessorParseJsonNative.h +++ b/core/processor/ProcessorParseJsonNative.h @@ -39,7 +39,6 @@ class ProcessorParseJsonNative : public Processor { bool mUploadRawLog = false; bool mSourceKeyOverwritten = false; std::string mRawLogTag; - bool mRawLogTagOverwritten = false; int* mParseFailures = nullptr; int* mLogGroupSize = nullptr; @@ -49,7 +48,11 @@ class ProcessorParseJsonNative : public Processor { CounterPtr mProcDiscardRecordsTotal; CounterPtr mProcParseErrorTotal; - bool JsonLogLineParser(LogEvent& sourceEvent, const StringView& logPath, PipelineEventPtr& e); + bool JsonLogLineParser(LogEvent& sourceEvent, + const StringView& logPath, + PipelineEventPtr& e, + bool& sourceKeyOverwritten, + bool& rawLogTagOverwritten); void AddLog(const StringView& key, const StringView& value, LogEvent& targetEvent); bool ProcessEvent(const StringView& logPath, PipelineEventPtr& e); static std::string RapidjsonValueToString(const rapidjson::Value& value); diff --git a/core/unittest/processor/ProcessorParseJsonNativeUnittest.cpp b/core/unittest/processor/ProcessorParseJsonNativeUnittest.cpp index b58f35268e..bbf66c5f58 100644 --- a/core/unittest/processor/ProcessorParseJsonNativeUnittest.cpp +++ b/core/unittest/processor/ProcessorParseJsonNativeUnittest.cpp @@ -352,6 +352,10 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonContent() { config.mAdvancedConfig.mRawLogTag = "__raw__"; // make events + // the first event has key "content" in json key with overwrites sourceKey "content" + // the second event doesn't have key "content" in json + // after parsing, the first event's content should be the value in json, the original content should be the value of + // "__raw__" the second event's content should be dropped, the original content should be the value of "__raw__" auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::string inJson = R"({ @@ -366,6 +370,15 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonContent() { "timestampNanosecond" : 0, "timestamp" : 12345678901, "type" : 1 + }, + { + "contents" : + { + "content" : "{\"name\":\"Mike\",\"age\":25,\"is_student\":false,\"address\":{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"},\"courses\":[\"Math\",\"English\",\"Science\"],\"scores\":{\"Math\":90,\"English\":85,\"Science\":95}}" + }, + "timestampNanosecond" : 0, + "timestamp" : 12345678902, + "type" : 1 } ] })"; @@ -397,6 +410,21 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonContent() { "timestamp" : 12345678901, "timestampNanosecond" : 0, "type" : 1 + }, + { + "contents" : + { + "__raw__" : "{\"name\":\"Mike\",\"age\":25,\"is_student\":false,\"address\":{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"},\"courses\":[\"Math\",\"English\",\"Science\"],\"scores\":{\"Math\":90,\"English\":85,\"Science\":95}}", + "address" : "{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"}", + "age":"25", + "courses":"[\"Math\",\"English\",\"Science\"]", + "is_student":"false", + "name":"Mike", + "scores":"{\"Math\":90,\"English\":85,\"Science\":95}" + }, + "timestamp" : 12345678902, + "timestampNanosecond" : 0, + "type" : 1 } ] })"; @@ -413,6 +441,10 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonRaw() { config.mAdvancedConfig.mRawLogTag = "__raw__"; // make events + // the first event has key "__raw__" in json key with overwrites rawLogTag "__raw__" + // the second event doesn't have key "__raw__" in json + // after parsing, the first event's __raw__ should be the value in json, the original content should be discarded + // the second event's __raw__ should be the original content auto sourceBuffer = std::make_shared(); PipelineEventGroup eventGroup(sourceBuffer); std::string inJson = R"({ @@ -427,6 +459,16 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonRaw() { "timestampNanosecond" : 0, "timestamp" : 12345678901, "type" : 1 + }, + { + "contents" : + { + "content" : "{\"name\":\"Mike\",\"age\":25,\"is_student\":false,\"address\":{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"},\"courses\":[\"Math\",\"English\",\"Science\"],\"scores\":{\"Math\":90,\"English\":85,\"Science\":95}}", + "log.file.offset": "0" + }, + "timestampNanosecond" : 0, + "timestamp" : 12345678902, + "type" : 1 } ] })"; @@ -457,6 +499,22 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonRaw() { "timestamp" : 12345678901, "timestampNanosecond" : 0, "type" : 1 + }, + { + "contents" : + { + "__raw__" : "{\"name\":\"Mike\",\"age\":25,\"is_student\":false,\"address\":{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"},\"courses\":[\"Math\",\"English\",\"Science\"],\"scores\":{\"Math\":90,\"English\":85,\"Science\":95}}", + "address" : "{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"}", + "age":"25", + "courses":"[\"Math\",\"English\",\"Science\"]", + "is_student":"false", + "log.file.offset":"0", + "name":"Mike", + "scores":"{\"Math\":90,\"English\":85,\"Science\":95}" + }, + "timestamp" : 12345678902, + "timestampNanosecond" : 0, + "type" : 1 } ] })";