Skip to content

Commit

Permalink
Fix log content duplication in ParseJsonNativeProcessor (#1295)
Browse files Browse the repository at this point in the history
This commit resolves an issue in ParseJsonNativeProcessor
where the original log content was unintentionally retained in the
"content" field due to improper state management.

The processor incorrectly maintained a class-level state indicating
whether the "content" key was overwritten during JSON parsing. While
this state should be reset for each new log, it was persistently kept
across logs. Consequently, if a JSON log contained a "content" key, the
processor would mark the state as overwritten and not drop the "content"
field in subsequent logs, leading to duplicated content.

To address this, the state tracking the "content" key overwrite is
moved from class level to local level, ensuring it is reset at the start of each log parsing operation.
  • Loading branch information
yyuuttaaoo authored Dec 29, 2023
1 parent 184c414 commit 534d7f8
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 9 deletions.
17 changes: 10 additions & 7 deletions core/processor/ProcessorParseJsonNative.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,19 +77,20 @@ bool ProcessorParseJsonNative::ProcessEvent(const StringView& logPath, PipelineE

auto rawContent = sourceEvent.GetContent(mSourceKey);

bool res = true;
res = JsonLogLineParser(sourceEvent, logPath, e);
bool sourceKeyOverwritten = mSourceKeyOverwritten;
bool rawLogTagOverwritten = false;
bool res = JsonLogLineParser(sourceEvent, logPath, e, sourceKeyOverwritten, rawLogTagOverwritten);

if (!res && !mDiscardUnmatch) {
AddLog(LogParser::UNMATCH_LOG_KEY, // __raw_log__
rawContent,
sourceEvent); // legacy behavior, should use sourceKey
}
if (res || !mDiscardUnmatch) {
if (mUploadRawLog && (!res || !mRawLogTagOverwritten)) {
if (mUploadRawLog && (!res || !rawLogTagOverwritten)) {
AddLog(mRawLogTag, rawContent, sourceEvent); // __raw__
}
if (res && !mSourceKeyOverwritten) {
if (res && !sourceKeyOverwritten) {
sourceEvent.DelContent(mSourceKey);
}
return true;
Expand All @@ -100,7 +101,9 @@ bool ProcessorParseJsonNative::ProcessEvent(const StringView& logPath, PipelineE

bool ProcessorParseJsonNative::JsonLogLineParser(LogEvent& sourceEvent,
const StringView& logPath,
PipelineEventPtr& e) {
PipelineEventPtr& e,
bool& sourceKeyOverwritten,
bool& rawLogTagOverwritten) {
StringView buffer = sourceEvent.GetContent(mSourceKey);

if (buffer.empty())
Expand Down Expand Up @@ -153,10 +156,10 @@ bool ProcessorParseJsonNative::JsonLogLineParser(LogEvent& sourceEvent,
StringBuffer contentValueBuffer = sourceEvent.GetSourceBuffer()->CopyString(contentValue);

if (contentKey.c_str() == mSourceKey) {
mSourceKeyOverwritten = true;
sourceKeyOverwritten = true;
}
if (contentKey.c_str() == mRawLogTag) {
mRawLogTagOverwritten = true;
rawLogTagOverwritten = true;
}

AddLog(StringView(contentKeyBuffer.data, contentKeyBuffer.size),
Expand Down
7 changes: 5 additions & 2 deletions core/processor/ProcessorParseJsonNative.h
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,6 @@ class ProcessorParseJsonNative : public Processor {
bool mUploadRawLog = false;
bool mSourceKeyOverwritten = false;
std::string mRawLogTag;
bool mRawLogTagOverwritten = false;

int* mParseFailures = nullptr;
int* mLogGroupSize = nullptr;
Expand All @@ -49,7 +48,11 @@ class ProcessorParseJsonNative : public Processor {
CounterPtr mProcDiscardRecordsTotal;
CounterPtr mProcParseErrorTotal;

bool JsonLogLineParser(LogEvent& sourceEvent, const StringView& logPath, PipelineEventPtr& e);
bool JsonLogLineParser(LogEvent& sourceEvent,
const StringView& logPath,
PipelineEventPtr& e,
bool& sourceKeyOverwritten,
bool& rawLogTagOverwritten);
void AddLog(const StringView& key, const StringView& value, LogEvent& targetEvent);
bool ProcessEvent(const StringView& logPath, PipelineEventPtr& e);
static std::string RapidjsonValueToString(const rapidjson::Value& value);
Expand Down
58 changes: 58 additions & 0 deletions core/unittest/processor/ProcessorParseJsonNativeUnittest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,10 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonContent() {
config.mAdvancedConfig.mRawLogTag = "__raw__";

// make events
// the first event has key "content" in json key with overwrites sourceKey "content"
// the second event doesn't have key "content" in json
// after parsing, the first event's content should be the value in json, the original content should be the value of
// "__raw__" the second event's content should be dropped, the original content should be the value of "__raw__"
auto sourceBuffer = std::make_shared<SourceBuffer>();
PipelineEventGroup eventGroup(sourceBuffer);
std::string inJson = R"({
Expand All @@ -366,6 +370,15 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonContent() {
"timestampNanosecond" : 0,
"timestamp" : 12345678901,
"type" : 1
},
{
"contents" :
{
"content" : "{\"name\":\"Mike\",\"age\":25,\"is_student\":false,\"address\":{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"},\"courses\":[\"Math\",\"English\",\"Science\"],\"scores\":{\"Math\":90,\"English\":85,\"Science\":95}}"
},
"timestampNanosecond" : 0,
"timestamp" : 12345678902,
"type" : 1
}
]
})";
Expand Down Expand Up @@ -397,6 +410,21 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonContent() {
"timestamp" : 12345678901,
"timestampNanosecond" : 0,
"type" : 1
},
{
"contents" :
{
"__raw__" : "{\"name\":\"Mike\",\"age\":25,\"is_student\":false,\"address\":{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"},\"courses\":[\"Math\",\"English\",\"Science\"],\"scores\":{\"Math\":90,\"English\":85,\"Science\":95}}",
"address" : "{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"}",
"age":"25",
"courses":"[\"Math\",\"English\",\"Science\"]",
"is_student":"false",
"name":"Mike",
"scores":"{\"Math\":90,\"English\":85,\"Science\":95}"
},
"timestamp" : 12345678902,
"timestampNanosecond" : 0,
"type" : 1
}
]
})";
Expand All @@ -413,6 +441,10 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonRaw() {
config.mAdvancedConfig.mRawLogTag = "__raw__";

// make events
// the first event has key "__raw__" in json key with overwrites rawLogTag "__raw__"
// the second event doesn't have key "__raw__" in json
// after parsing, the first event's __raw__ should be the value in json, the original content should be discarded
// the second event's __raw__ should be the original content
auto sourceBuffer = std::make_shared<SourceBuffer>();
PipelineEventGroup eventGroup(sourceBuffer);
std::string inJson = R"({
Expand All @@ -427,6 +459,16 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonRaw() {
"timestampNanosecond" : 0,
"timestamp" : 12345678901,
"type" : 1
},
{
"contents" :
{
"content" : "{\"name\":\"Mike\",\"age\":25,\"is_student\":false,\"address\":{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"},\"courses\":[\"Math\",\"English\",\"Science\"],\"scores\":{\"Math\":90,\"English\":85,\"Science\":95}}",
"log.file.offset": "0"
},
"timestampNanosecond" : 0,
"timestamp" : 12345678902,
"type" : 1
}
]
})";
Expand Down Expand Up @@ -457,6 +499,22 @@ void ProcessorParseJsonNativeUnittest::TestProcessJsonRaw() {
"timestamp" : 12345678901,
"timestampNanosecond" : 0,
"type" : 1
},
{
"contents" :
{
"__raw__" : "{\"name\":\"Mike\",\"age\":25,\"is_student\":false,\"address\":{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"},\"courses\":[\"Math\",\"English\",\"Science\"],\"scores\":{\"Math\":90,\"English\":85,\"Science\":95}}",
"address" : "{\"city\":\"Hangzhou\",\"postal_code\":\"100000\"}",
"age":"25",
"courses":"[\"Math\",\"English\",\"Science\"]",
"is_student":"false",
"log.file.offset":"0",
"name":"Mike",
"scores":"{\"Math\":90,\"English\":85,\"Science\":95}"
},
"timestamp" : 12345678902,
"timestampNanosecond" : 0,
"type" : 1
}
]
})";
Expand Down

0 comments on commit 534d7f8

Please # to comment.