From a6a744ccfd089cc202a39ca92461780f3c82feea Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Sun, 19 Jan 2025 23:22:10 +0100 Subject: [PATCH 1/2] filterx/func-str: use strstr() from libc instead of glib It uses a better algorithm (both glibc and musl). Signed-off-by: Balazs Scheidler --- lib/filterx/func-str.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/filterx/func-str.c b/lib/filterx/func-str.c index 02db65704..c6de63070 100644 --- a/lib/filterx/func-str.c +++ b/lib/filterx/func-str.c @@ -483,7 +483,7 @@ _function_includes_process(const gchar *haystack, gsize haystack_len, const gcha { if (needle_len > haystack_len) return FALSE; - return g_strstr_len(haystack, haystack_len, needle) != NULL; + return strstr(haystack, needle) != NULL; } FilterXExpr * From d8c0bbdb71f10d75f5058b04480ffbce963d5ecc Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Tue, 21 Jan 2025 19:08:41 +0100 Subject: [PATCH 2/2] filterx/func-str: refactor to avoid using GString/GPtrArrays internally Signed-off-by: Balazs Scheidler --- lib/filterx/func-str.c | 252 +++++++++--------- .../functional_tests/filterx/test_filterx.py | 34 --- .../filterx/test_filterx_funcs.py | 154 +++++++++++ 3 files changed, 283 insertions(+), 157 deletions(-) diff --git a/lib/filterx/func-str.c b/lib/filterx/func-str.c index c6de63070..3cfcbdf4a 100644 --- a/lib/filterx/func-str.c +++ b/lib/filterx/func-str.c @@ -66,76 +66,77 @@ typedef struct _FilterXExprAffix FilterXExprAffixProcessFunc process; } FilterXExprAffix; -static GString * -_format_str_obj(FilterXObject *obj) +static gboolean +_format_str_obj(FilterXObject *obj, const gchar **str, gsize *str_len) { - GString *str = scratch_buffers_alloc(); - - const gchar *obj_str; - gsize str_len; - if(!filterx_object_extract_string_ref(obj, &obj_str, &str_len)) - return NULL; - g_string_assign_len(str, obj_str, str_len); - - return str; + return filterx_object_extract_string_ref(obj, str, str_len); } -static inline gboolean -_do_casefold(GString *str) +static inline void +_do_casefold(const gchar **str, gsize *str_len) { - for (gsize i = 0; i < str->len; i++) - str->str[i] = ch_toupper(str->str[i]); - return TRUE; + GString *buf = scratch_buffers_alloc(); + + g_string_set_size(buf, *str_len); + for (gsize i = 0; i < (*str_len); i++) + buf->str[i] = ch_toupper((*str)[i]); + *str = buf->str; } -static GString * -_expr_format(FilterXExpr *expr, gboolean ignore_case) +static gboolean +_expr_format(FilterXExpr *expr, const gchar **str, gsize *str_len, gboolean ignore_case) { - FilterXObject *obj = filterx_expr_eval_typed(expr); + FilterXObject *obj = filterx_expr_eval(expr); + gboolean result = FALSE; + if (!obj) - return NULL; + return FALSE; - GString *result = _format_str_obj(obj); - if (!result) - filterx_eval_push_error("failed to extract string value, repr() failed", expr, obj); - filterx_object_unref(obj); + if (!_format_str_obj(obj, str, str_len)) + { + filterx_eval_push_error("failed to extract string value, repr() failed", expr, obj); + goto exit; + } - if (ignore_case && !_do_casefold(result)) - return NULL; + if (ignore_case) + _do_casefold(str, str_len); + result = TRUE; +exit: + filterx_object_unref(obj); return result; } -static GString * -_obj_format(FilterXObject *obj, gboolean ignore_case) +static gboolean +_obj_format(FilterXObject *obj, const gchar **str, gsize *str_len, gboolean ignore_case) { - GString *result = _format_str_obj(obj); - if (!result) - filterx_eval_push_error("failed to extract string value, repr() failed", NULL, obj); - - if (ignore_case && !_do_casefold(result)) - return NULL; + gboolean result = FALSE; + if (!_format_str_obj(obj, str, str_len)) + { + filterx_eval_push_error("failed to extract string value, repr() failed", NULL, obj); + goto exit; + } + if (ignore_case) + _do_casefold(str, str_len); + result = TRUE; +exit: return result; } static gboolean _string_with_cache_fill_cache(FilterXStringWithCache *self, gboolean ignore_case) { - if(!filterx_expr_is_literal(self->expr)) + if (!filterx_expr_is_literal(self->expr)) return TRUE; - ScratchBuffersMarker marker; - scratch_buffers_mark(&marker); - - GString *res = _expr_format(self->expr, ignore_case); - if (!res) + const gchar *str; + gsize str_len; + if (!_expr_format(self->expr, &str, &str_len, ignore_case)) return FALSE; - self->str_value = g_strdup(res->str); - self->str_len = res->len; - - scratch_buffers_reclaim_marked(marker); + self->str_value = g_strndup(str, str_len); + self->str_len = str_len; return TRUE; } @@ -164,19 +165,20 @@ static gboolean _string_with_cache_get_string_value(FilterXStringWithCache *self, gboolean ignore_case, const gchar **dst, gsize *len) { - if(self->str_value) + if (self->str_value) { *dst = self->str_value; *len = self->str_len; return TRUE; } - GString *res = _expr_format(self->expr, ignore_case); - if (!res) + const gchar *str; + gsize str_len; + if (!_expr_format(self->expr, &str, &str_len, ignore_case)) return FALSE; - *dst = res->str; - *len = res->len; + *dst = str; + *len = str_len; return TRUE; } @@ -194,7 +196,6 @@ _cache_needle(gsize index, FilterXExpr *value, gpointer user_data) return TRUE; } - static gboolean _expr_affix_cache_needle(FilterXExprAffix *self) { @@ -257,59 +258,89 @@ _expr_affix_free(FilterXExpr *s) filterx_function_free_method(&self->super); } -static GPtrArray * -_expr_affix_eval_needle(FilterXExprAffix *self) +static FilterXObject * +_eval_against_literal_needles(FilterXExprAffix *self, const gchar *haystack, gsize haystack_len) { - FilterXObject *needle_obj = filterx_expr_eval(self->needle.expr); - if (!needle_obj) - return NULL; - GPtrArray *needles = NULL; - - if (filterx_object_is_type(needle_obj, &FILTERX_TYPE_NAME(string)) - || filterx_object_is_type(needle_obj, &FILTERX_TYPE_NAME(message_value))) + gboolean matches = FALSE; + for (gsize i = 0; i < self->needle.cached_strings->len && !matches; i++) { - GString *str_value = _obj_format(needle_obj, self->ignore_case); - filterx_object_unref(needle_obj); - if (!str_value) + FilterXStringWithCache *current_needle = g_ptr_array_index(self->needle.cached_strings, i); + const gchar *needle_str; + gsize needle_len; + + if (!_string_with_cache_get_string_value(current_needle, self->ignore_case, &needle_str, &needle_len)) return NULL; - needles = g_ptr_array_sized_new(1); - g_ptr_array_add(needles, str_value); - return needles; + + matches = self->process(haystack, haystack_len, needle_str, needle_len); } + return filterx_boolean_new(matches); +} +static FilterXObject * +_eval_against_needle_expr_list(FilterXExprAffix *self, const gchar *haystack, gsize haystack_len, + FilterXObject *needle_obj) +{ FilterXObject *list_needle = filterx_ref_unwrap_ro(needle_obj); - if (filterx_object_is_type(list_needle, &FILTERX_TYPE_NAME(list))) + + if (!filterx_object_is_type(list_needle, &FILTERX_TYPE_NAME(list))) + return NULL; + + guint64 len; + filterx_object_len(needle_obj, &len); + + if (len == 0) + return NULL; + + gboolean matches = FALSE; + for (gsize i = 0; i < len && !matches; i++) { - guint64 len; - filterx_object_len(needle_obj, &len); - - if (len == 0) - { - filterx_object_unref(needle_obj); - return NULL; - } - needles = g_ptr_array_sized_new(len); - - for (guint64 i = 0; i < len; i++) - { - FilterXObject *elem = filterx_list_get_subscript(list_needle, i); - - GString *str_value = _obj_format(elem, self->ignore_case); - filterx_object_unref(elem); - if (!str_value) - goto error; - g_ptr_array_add(needles, str_value); - } - filterx_object_unref(needle_obj); - return needles; + FilterXObject *elem = filterx_list_get_subscript(list_needle, i); + const gchar *current_needle; + gsize current_needle_len; + + gboolean res = _obj_format(elem, ¤t_needle, ¤t_needle_len, self->ignore_case); + filterx_object_unref(elem); + + if (!res) + return NULL; + + matches = self->process(haystack, haystack_len, current_needle, current_needle_len); } + return filterx_boolean_new(matches); +} -error: - if(needles) - g_ptr_array_free(needles, TRUE); +static FilterXObject * +_eval_against_needle_expr_single(FilterXExprAffix *self, const gchar *haystack, gsize haystack_len, + FilterXObject *needle_obj) +{ + const gchar *needle; + gsize needle_len; + + if (_obj_format(needle_obj, &needle, &needle_len, self->ignore_case)) + { + return filterx_boolean_new(self->process(haystack, haystack_len, needle, needle_len)); + } return NULL; } +static FilterXObject * +_eval_against_needle_expr(FilterXExprAffix *self, const gchar *haystack, gsize haystack_len) +{ + FilterXObject *needle_obj = filterx_expr_eval(self->needle.expr); + if (!needle_obj) + return NULL; + + FilterXObject *result = _eval_against_needle_expr_single(self, haystack, haystack_len, needle_obj); + if (!result) + result = _eval_against_needle_expr_list(self, haystack, haystack_len, needle_obj); + if (!result) + filterx_eval_push_error("failed to evaluate needle, expects a string value or a list of strings", self->needle.expr, + needle_obj); + + filterx_object_unref(needle_obj); + return result; +} + static FilterXObject * _expr_affix_eval(FilterXExpr *s) { @@ -318,41 +349,16 @@ _expr_affix_eval(FilterXExpr *s) ScratchBuffersMarker marker; scratch_buffers_mark(&marker); - GString *haystack_str = _expr_format(self->haystack, self->ignore_case); - if (!haystack_str) - goto exit; - - gboolean matches = FALSE; - const gchar *needle_str; - gsize needle_len; - if(self->needle.cached_strings->len != 0) - { - for(guint64 i = 0; i < self->needle.cached_strings->len && !matches; i++) - { - FilterXStringWithCache *current_needle = g_ptr_array_index(self->needle.cached_strings, i); - if (!_string_with_cache_get_string_value(current_needle, self->ignore_case, &needle_str, &needle_len)) - goto exit; - matches = self->process(haystack_str->str, haystack_str->len, needle_str, needle_len); - } - result = filterx_boolean_new(matches); - goto exit; - } - - GPtrArray *needle_list = _expr_affix_eval_needle(self); - if (!needle_list) + const gchar *haystack; + gsize haystack_len; + if (!_expr_format(self->haystack, &haystack, &haystack_len, self->ignore_case)) goto exit; - if(needle_list->len != 0) - { - for(guint64 i = 0; i < needle_list->len && !matches; i++) - { - GString *current_needle = g_ptr_array_index(needle_list, i); - matches = self->process(haystack_str->str, haystack_str->len, current_needle->str, current_needle->len); - } - } - g_ptr_array_free(needle_list, TRUE); - result = filterx_boolean_new(matches); + if (self->needle.cached_strings->len != 0) + result = _eval_against_literal_needles(self, haystack, haystack_len); + else + result = _eval_against_needle_expr(self, haystack, haystack_len); exit: scratch_buffers_reclaim_marked(marker); diff --git a/tests/light/functional_tests/filterx/test_filterx.py b/tests/light/functional_tests/filterx/test_filterx.py index 2953751f3..d17ccb631 100644 --- a/tests/light/functional_tests/filterx/test_filterx.py +++ b/tests/light/functional_tests/filterx/test_filterx.py @@ -2432,40 +2432,6 @@ def test_parse_windows_eventlog_xml(config, syslog_ng): } -def test_startswith_endswith_includes(config, syslog_ng): - (file_true, file_false) = create_config( - config, r""" - result = json(); - if (startswith($MSG, ["dummy_prefix", "foo"])) - { - result.startswith_foo = true; - }; - bar_var = "bar"; - if (includes($MSG, bar_var, ignorecase=true)) - { - result.contains_bar = true; - }; - baz_var = "baz"; - baz_list = ["dummy_suffix", baz_var]; - if (endswith($MSG, baz_list, ignorecase=true)) - { - result.endswith_baz = true; - }; - log_msg_value = ${values.str}; - if (includes(log_msg_value, log_msg_value)) - { - result.works_with_message_value = true; - }; - $MSG = json(); - $MSG = result; - """, msg="fooBARbAz", - ) - syslog_ng.start(config) - - assert "processed" not in file_false.get_stats() - assert file_true.read_log() == '{"startswith_foo":true,"contains_bar":true,"endswith_baz":true,"works_with_message_value":true}\n' - - def test_parse_cef(config, syslog_ng): (file_true, file_false) = create_config( config, r""" diff --git a/tests/light/functional_tests/filterx/test_filterx_funcs.py b/tests/light/functional_tests/filterx/test_filterx_funcs.py index eaab903d9..7574681fe 100644 --- a/tests/light/functional_tests/filterx/test_filterx_funcs.py +++ b/tests/light/functional_tests/filterx/test_filterx_funcs.py @@ -105,3 +105,157 @@ def test_lower(config, syslog_ng): assert file_final.get_stats()["processed"] == 1 assert file_final.read_log() == """{"literal":"abc","variable":"foobar","host":"hostname"}\n""" + + +def test_startswith_with_various_arguments(config, syslog_ng): + (file_final,) = create_config( + config, r""" + result = json(); + foo = "foo"; + bar = "bar"; + # literal string + if (startswith($MSG, "foo")) + { + result.startswith_foo1 = true; + }; + # literal array + if (startswith($MSG, ["foo"])) + { + result.startswith_foo2 = true; + }; + # literal array, not the first element + if (startswith($MSG, ["bar", "foo"])) + { + result.startswith_foo3 = true; + }; + # non-literal + if (startswith($MSG, foo)) + { + result.startswith_foo4 = true; + }; + # non-literal + if (startswith($MSG, [bar, foo])) + { + result.startswith_foo4 = true; + }; + $MSG = result; + """, msg="fooBARbAz", + ) + syslog_ng.start(config) + + assert file_final.get_stats()["processed"] == 1 + assert file_final.read_log() == '{"startswith_foo1":true,"startswith_foo2":true,"startswith_foo3":true,"startswith_foo4":true}\n' + + +def test_endswith_with_various_arguments(config, syslog_ng): + (file_final,) = create_config( + config, r""" + result = json(); + foo = "foo"; + bar = "bar"; + # literal string + if (endswith($MSG, "foo")) + { + result.endswith_foo1 = true; + }; + # literal array + if (endswith($MSG, ["foo"])) + { + result.endswith_foo2 = true; + }; + # literal array, not the first element + if (endswith($MSG, ["bar", "foo"])) + { + result.endswith_foo3 = true; + }; + # non-literal + if (endswith($MSG, foo)) + { + result.endswith_foo4 = true; + }; + # non-literal + if (endswith($MSG, [bar, foo])) + { + result.endswith_foo4 = true; + }; + $MSG = result; + """, msg="bAzBARfoo", + ) + syslog_ng.start(config) + + assert file_final.get_stats()["processed"] == 1 + assert file_final.read_log() == '{"endswith_foo1":true,"endswith_foo2":true,"endswith_foo3":true,"endswith_foo4":true}\n' + + +def test_includes_with_various_arguments(config, syslog_ng): + (file_final,) = create_config( + config, r""" + result = json(); + foo = "foo"; + bar = "bar"; + # literal string + if (includes($MSG, "foo")) + { + result.includes_foo1 = true; + }; + # literal array + if (includes($MSG, ["foo"])) + { + result.includes_foo2 = true; + }; + # literal array, not the first element + if (includes($MSG, ["bar", "foo"])) + { + result.includes_foo3 = true; + }; + # non-literal + if (includes($MSG, foo)) + { + result.includes_foo4 = true; + }; + # non-literal + if (includes($MSG, [bar, foo])) + { + result.includes_foo4 = true; + }; + $MSG = result; + """, msg="bAzBARfoo", + ) + syslog_ng.start(config) + + assert file_final.get_stats()["processed"] == 1 + assert file_final.read_log() == '{"includes_foo1":true,"includes_foo2":true,"includes_foo3":true,"includes_foo4":true}\n' + + +def test_startswith_endswith_includes(config, syslog_ng): + (file_final,) = create_config( + config, r""" + result = json(); + if (startswith($MSG, ["dummy_prefix", "foo"])) + { + result.startswith_foo = true; + }; + bar_var = "bar"; + if (includes($MSG, bar_var, ignorecase=true)) + { + result.contains_bar = true; + }; + baz_var = "baz"; + baz_list = ["dummy_suffix", baz_var]; + if (endswith($MSG, baz_list, ignorecase=true)) + { + result.endswith_baz = true; + }; + log_msg_value = ${values.str}; + if (includes(log_msg_value, log_msg_value)) + { + result.works_with_message_value = true; + }; + $MSG = json(); + $MSG = result; + """, msg="fooBARbAz", + ) + syslog_ng.start(config) + + assert file_final.get_stats()["processed"] == 1 + assert file_final.read_log() == '{"startswith_foo":true,"contains_bar":true,"endswith_baz":true,"works_with_message_value":true}\n'