From 4de2643931520bbd9d8602e29d7609ca80a027e4 Mon Sep 17 00:00:00 2001 From: ptmcg Date: Sat, 28 Sep 2024 04:10:57 -0500 Subject: [PATCH] Fix failing timestamp conversion tests --- logmerger/timestamp_wrapper.py | 20 +++--- tests/test_timestamp_formats.py | 105 +++++++++++++++++++++++++++++--- 2 files changed, 107 insertions(+), 18 deletions(-) diff --git a/logmerger/timestamp_wrapper.py b/logmerger/timestamp_wrapper.py index 8184ec8..817e4b9 100644 --- a/logmerger/timestamp_wrapper.py +++ b/logmerger/timestamp_wrapper.py @@ -159,11 +159,11 @@ def __call__(self, obj: T) -> tuple[datetime | None, T]: return ret[0], strip_escape_sequences(ret[1]).rstrip() -class YMDHMScommaFTZ(TimestampedLineTransformer): +class YMDHMScommaFZ(TimestampedLineTransformer): # log files with timestamp "YYYY-MM-DD HH:MM:SS,SSS" timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3,}\s?(?:Z|[+-]\d{4})" pattern = fr"(({timestamp_pattern})\s)" - strptime_format = datetime.fromisoformat + strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%d %H:%M:%S,%f%z") has_timezone = True def __init__(self): @@ -174,7 +174,7 @@ class YMDHMScommaF(TimestampedLineTransformer): # log files with timestamp "YYYY-MM-DD HH:MM:SS,SSS" timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3,}" pattern = fr"(({timestamp_pattern})\s)" - strptime_format = datetime.fromisoformat + strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%d %H:%M:%S,%f") def __init__(self): super().__init__(self.pattern, self.strptime_format) @@ -184,7 +184,7 @@ class YMDHMSdotFZ(TimestampedLineTransformer): # log files with timestamp "YYYY-MM-DD HH:MM:SS.SSS" timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3,}\s?(?:Z|[+-]\d{4})" pattern = fr"(({timestamp_pattern})\s)" - strptime_format = datetime.fromisoformat + strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f%z") has_timezone = True def __init__(self): @@ -195,7 +195,7 @@ class YMDHMSdotF(TimestampedLineTransformer): # log files with timestamp "YYYY-MM-DD HH:MM:SS.SSS" timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3,}" pattern = fr"(({timestamp_pattern})\s)" - strptime_format = datetime.fromisoformat + strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f") def __init__(self): super().__init__(self.pattern, self.strptime_format) @@ -226,7 +226,7 @@ class YMDTHMScommaFZ(TimestampedLineTransformer): # log files with timestamp "YYYY-MM-DDTHH:MM:SS,SSS" timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2},\d{3,}\s?(?:Z|[+-]\d{4})" pattern = fr"(({timestamp_pattern})\s)" - strptime_format = datetime.fromisoformat + strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S,%f%z") has_timezone = True def __init__(self): @@ -237,7 +237,7 @@ class YMDTHMScommaF(TimestampedLineTransformer): # log files with timestamp "YYYY-MM-DDTHH:MM:SS,SSS" timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2},\d{3,}" pattern = fr"(({timestamp_pattern})\s)" - strptime_format = datetime.fromisoformat + strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S,%f") def __init__(self): super().__init__(self.pattern, self.strptime_format) @@ -247,7 +247,7 @@ class YMDTHMSdotFZ(TimestampedLineTransformer): # log files with timestamp "YYYY-MM-DDTHH:MM:SS.SSS" timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3,}\s?(?:Z|[+-]\d{4}Z?)" pattern = fr"(({timestamp_pattern})\s)" - strptime_format = datetime.fromisoformat + strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S.%f%z") has_timezone = True def __init__(self): @@ -258,7 +258,7 @@ class YMDTHMSdotF(TimestampedLineTransformer): # log files with timestamp "YYYY-MM-DDTHH:MM:SS.SSS" timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3,}" pattern = fr"(({timestamp_pattern})\s)" - strptime_format = datetime.fromisoformat + strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S.%f") def __init__(self): super().__init__(self.pattern, self.strptime_format) @@ -268,7 +268,7 @@ class YMDTHMSZ(TimestampedLineTransformer): # log files with timestamp "YYYY-MM-DDTHH:MM:SS" timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\s?(?:Z|[+-]\d{4})" pattern = fr"(({timestamp_pattern})\s)" - strptime_format = datetime.fromisoformat + strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z") has_timezone = True def __init__(self): diff --git a/tests/test_timestamp_formats.py b/tests/test_timestamp_formats.py index bbd537c..26c8f95 100644 --- a/tests/test_timestamp_formats.py +++ b/tests/test_timestamp_formats.py @@ -12,57 +12,146 @@ def _test_timestamp_format_parsing(string_date: str, expected_datetime: datetime string_date ) - parsed_datetime, _ = transformer(string_date) + try: + parsed_datetime, _ = transformer(string_date) + except ValueError as ve: + raise AssertionError( + f"failed to parse {string_date!r} with transformer {type(transformer).__name__}" + ) from ve + parsed_datetime = parsed_datetime.astimezone(timezone.utc) print(repr(string_date)) + print(type(transformer).__name__) print("Parsed time :", parsed_datetime) print("Expected time:", expected_datetime) - assert parsed_datetime == expected_datetime + assert parsed_datetime == expected_datetime, f"failed to convert {string_date!r} with transformer {type(transformer).__name__}" @pytest.mark.parametrize( - "string_date, expected_datetime", + "tz_class, string_date, expected_datetime", [ ( + "YMDHMScommaFZ", "2023-07-14 08:00:01,000Z Log", datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc), ), ( + "YMDHMScommaFZ", "2023-07-14 08:00:01,123+0200 Log", datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=timezone(timedelta(hours=2))), ), ( - "2023-07-14 08:00:01.000Z Log", - datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc), + "YMDHMScommaF", + "2023-07-14 08:00:01,123 Log", + datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=local_tz), ), ( + "YMDHMSdotFZ", + "2023-07-14 08:00:01.123Z Log", + datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=timezone.utc), + ), + ( + "YMDHMSdotFZ", "2023-07-14 08:00:01.123+0200 Log", datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=timezone(timedelta(hours=2))), ), ( - "2023-07-14 08:00:01 Log", - datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz)), + "YMDHMSdotF", + "2023-07-14 08:00:01.123 Log", + datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=local_tz) + ), ( + "YMDHMSZ", + "2023-07-14T08:00:01Z Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc), + ), + ( + "YMDHMSZ", + "2023-07-14T08:00:01+0200 Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone(timedelta(hours=2))), + ), + ( + "YMDHMS", + "2023-07-14 08:00:01 Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz) + ), + ( + "YMDTHMScommaFZ", "2023-07-14T08:00:01,000Z Log", datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc), ), ( + "YMDTHMScommaFZ", + "2023-07-14T08:00:01,000+0200 Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone(timedelta(hours=2))), + ), + ( + "YMDTHMScommaF", + "2023-07-14T08:00:01,000 Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz), + ), + ( + "YMDTHMSdotFZ", + "2023-07-14T08:00:01.000Z Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc), + ), + ( + "YMDTHMSdotFZ", + "2023-07-14T08:00:01.000+0200 Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone(timedelta(hours=2))), + ), + ( + "YMDTHMSdotF", + "2023-07-14T08:00:01.000 Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz), + ), + ( + "YMDTHMSZ", + "2023-07-14T08:00:01Z Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc), + ), + ( + "YMDTHMSZ", + "2023-07-14T08:00:01+0200 Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone(timedelta(hours=2))), + ), + ( + "YMDTHMS", + "2023-07-14T08:00:01 Log", + datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz) + ), + ( + "BDHMS", "Jul 14 08:00:01 Log", datetime(datetime.now().year, 7, 14, 8, 0, 1, tzinfo=local_tz), ), + # HMSdot - TODO - gets date from file timestamp + ( + "PythonHttpServerLog", + '''::1 - - [22/Sep/2023 21:58:40] "GET /log1.txt HTTP/1.1" 200 -''', + datetime(2023, 9, 22, 21, 58, 40, tzinfo=local_tz), + ), + ( + "HttpServerAccessLog", + '''91.194.60.14 - - [16/Sep/2023:19:05:06 +0000] "GET /python_nutshell_app_a_search HTTP/1.1" 200 1027 "-"''', + datetime(2023, 9, 16, 19, 5, 6, tzinfo=timezone.utc), + ), ( + "FloatSecondsSinceEpoch", "1694561169.550987 Log", datetime.fromtimestamp(1694561169.550987, tz=timezone.utc), ), ( + "MilliSecondsSinceEpoch", "1694561169550 Log", datetime.fromtimestamp(1694561169550 / 1000, tz=timezone.utc), ), ( + "SecondsSinceEpoch", "1694561169 Log", datetime.fromtimestamp(1694561169, tz=timezone.utc), ), ], ) -def test_timestamp_format_parsing(string_date: str, expected_datetime: datetime): +def test_timestamp_format_parsing(tz_class: str, string_date: str, expected_datetime: datetime): _test_timestamp_format_parsing(string_date, expected_datetime)