Skip to content

Commit

Permalink
Fix failing timestamp conversion tests
Browse files Browse the repository at this point in the history
  • Loading branch information
ptmcg committed Sep 28, 2024
1 parent 297fc54 commit 4de2643
Show file tree
Hide file tree
Showing 2 changed files with 107 additions and 18 deletions.
20 changes: 10 additions & 10 deletions logmerger/timestamp_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -159,11 +159,11 @@ def __call__(self, obj: T) -> tuple[datetime | None, T]:
return ret[0], strip_escape_sequences(ret[1]).rstrip()


class YMDHMScommaFTZ(TimestampedLineTransformer):
class YMDHMScommaFZ(TimestampedLineTransformer):
# log files with timestamp "YYYY-MM-DD HH:MM:SS,SSS<timezone>"
timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3,}\s?(?:Z|[+-]\d{4})"
pattern = fr"(({timestamp_pattern})\s)"
strptime_format = datetime.fromisoformat
strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%d %H:%M:%S,%f%z")
has_timezone = True

def __init__(self):
Expand All @@ -174,7 +174,7 @@ class YMDHMScommaF(TimestampedLineTransformer):
# log files with timestamp "YYYY-MM-DD HH:MM:SS,SSS"
timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3,}"
pattern = fr"(({timestamp_pattern})\s)"
strptime_format = datetime.fromisoformat
strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%d %H:%M:%S,%f")

def __init__(self):
super().__init__(self.pattern, self.strptime_format)
Expand All @@ -184,7 +184,7 @@ class YMDHMSdotFZ(TimestampedLineTransformer):
# log files with timestamp "YYYY-MM-DD HH:MM:SS.SSS"
timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3,}\s?(?:Z|[+-]\d{4})"
pattern = fr"(({timestamp_pattern})\s)"
strptime_format = datetime.fromisoformat
strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f%z")
has_timezone = True

def __init__(self):
Expand All @@ -195,7 +195,7 @@ class YMDHMSdotF(TimestampedLineTransformer):
# log files with timestamp "YYYY-MM-DD HH:MM:SS.SSS"
timestamp_pattern = r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}\.\d{3,}"
pattern = fr"(({timestamp_pattern})\s)"
strptime_format = datetime.fromisoformat
strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%d %H:%M:%S.%f")

def __init__(self):
super().__init__(self.pattern, self.strptime_format)
Expand Down Expand Up @@ -226,7 +226,7 @@ class YMDTHMScommaFZ(TimestampedLineTransformer):
# log files with timestamp "YYYY-MM-DDTHH:MM:SS,SSS"
timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2},\d{3,}\s?(?:Z|[+-]\d{4})"
pattern = fr"(({timestamp_pattern})\s)"
strptime_format = datetime.fromisoformat
strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S,%f%z")
has_timezone = True

def __init__(self):
Expand All @@ -237,7 +237,7 @@ class YMDTHMScommaF(TimestampedLineTransformer):
# log files with timestamp "YYYY-MM-DDTHH:MM:SS,SSS"
timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2},\d{3,}"
pattern = fr"(({timestamp_pattern})\s)"
strptime_format = datetime.fromisoformat
strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S,%f")

def __init__(self):
super().__init__(self.pattern, self.strptime_format)
Expand All @@ -247,7 +247,7 @@ class YMDTHMSdotFZ(TimestampedLineTransformer):
# log files with timestamp "YYYY-MM-DDTHH:MM:SS.SSS"
timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3,}\s?(?:Z|[+-]\d{4}Z?)"
pattern = fr"(({timestamp_pattern})\s)"
strptime_format = datetime.fromisoformat
strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S.%f%z")
has_timezone = True

def __init__(self):
Expand All @@ -258,7 +258,7 @@ class YMDTHMSdotF(TimestampedLineTransformer):
# log files with timestamp "YYYY-MM-DDTHH:MM:SS.SSS"
timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\.\d{3,}"
pattern = fr"(({timestamp_pattern})\s)"
strptime_format = datetime.fromisoformat
strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S.%f")

def __init__(self):
super().__init__(self.pattern, self.strptime_format)
Expand All @@ -268,7 +268,7 @@ class YMDTHMSZ(TimestampedLineTransformer):
# log files with timestamp "YYYY-MM-DDTHH:MM:SS"
timestamp_pattern = r"\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\s?(?:Z|[+-]\d{4})"
pattern = fr"(({timestamp_pattern})\s)"
strptime_format = datetime.fromisoformat
strptime_format = lambda _, s: datetime.strptime(s, "%Y-%m-%dT%H:%M:%S%z")
has_timezone = True

def __init__(self):
Expand Down
105 changes: 97 additions & 8 deletions tests/test_timestamp_formats.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,57 +12,146 @@ def _test_timestamp_format_parsing(string_date: str, expected_datetime: datetime
string_date
)

parsed_datetime, _ = transformer(string_date)
try:
parsed_datetime, _ = transformer(string_date)
except ValueError as ve:
raise AssertionError(
f"failed to parse {string_date!r} with transformer {type(transformer).__name__}"
) from ve

parsed_datetime = parsed_datetime.astimezone(timezone.utc)
print(repr(string_date))
print(type(transformer).__name__)
print("Parsed time :", parsed_datetime)
print("Expected time:", expected_datetime)
assert parsed_datetime == expected_datetime
assert parsed_datetime == expected_datetime, f"failed to convert {string_date!r} with transformer {type(transformer).__name__}"


@pytest.mark.parametrize(
"string_date, expected_datetime",
"tz_class, string_date, expected_datetime",
[
(
"YMDHMScommaFZ",
"2023-07-14 08:00:01,000Z Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc),
),
(
"YMDHMScommaFZ",
"2023-07-14 08:00:01,123+0200 Log",
datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=timezone(timedelta(hours=2))),
),
(
"2023-07-14 08:00:01.000Z Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc),
"YMDHMScommaF",
"2023-07-14 08:00:01,123 Log",
datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=local_tz),
),
(
"YMDHMSdotFZ",
"2023-07-14 08:00:01.123Z Log",
datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=timezone.utc),
),
(
"YMDHMSdotFZ",
"2023-07-14 08:00:01.123+0200 Log",
datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=timezone(timedelta(hours=2))),
),
(
"2023-07-14 08:00:01 Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz)),
"YMDHMSdotF",
"2023-07-14 08:00:01.123 Log",
datetime(2023, 7, 14, 8, 0, 1, 123000, tzinfo=local_tz)
),
(
"YMDHMSZ",
"2023-07-14T08:00:01Z Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc),
),
(
"YMDHMSZ",
"2023-07-14T08:00:01+0200 Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone(timedelta(hours=2))),
),
(
"YMDHMS",
"2023-07-14 08:00:01 Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz)
),
(
"YMDTHMScommaFZ",
"2023-07-14T08:00:01,000Z Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc),
),
(
"YMDTHMScommaFZ",
"2023-07-14T08:00:01,000+0200 Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone(timedelta(hours=2))),
),
(
"YMDTHMScommaF",
"2023-07-14T08:00:01,000 Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz),
),
(
"YMDTHMSdotFZ",
"2023-07-14T08:00:01.000Z Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc),
),
(
"YMDTHMSdotFZ",
"2023-07-14T08:00:01.000+0200 Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone(timedelta(hours=2))),
),
(
"YMDTHMSdotF",
"2023-07-14T08:00:01.000 Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz),
),
(
"YMDTHMSZ",
"2023-07-14T08:00:01Z Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone.utc),
),
(
"YMDTHMSZ",
"2023-07-14T08:00:01+0200 Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=timezone(timedelta(hours=2))),
),
(
"YMDTHMS",
"2023-07-14T08:00:01 Log",
datetime(2023, 7, 14, 8, 0, 1, tzinfo=local_tz)
),
(
"BDHMS",
"Jul 14 08:00:01 Log",
datetime(datetime.now().year, 7, 14, 8, 0, 1, tzinfo=local_tz),
),
# HMSdot - TODO - gets date from file timestamp
(
"PythonHttpServerLog",
'''::1 - - [22/Sep/2023 21:58:40] "GET /log1.txt HTTP/1.1" 200 -''',
datetime(2023, 9, 22, 21, 58, 40, tzinfo=local_tz),
),
(
"HttpServerAccessLog",
'''91.194.60.14 - - [16/Sep/2023:19:05:06 +0000] "GET /python_nutshell_app_a_search HTTP/1.1" 200 1027 "-"''',
datetime(2023, 9, 16, 19, 5, 6, tzinfo=timezone.utc),
),
(
"FloatSecondsSinceEpoch",
"1694561169.550987 Log",
datetime.fromtimestamp(1694561169.550987, tz=timezone.utc),
),
(
"MilliSecondsSinceEpoch",
"1694561169550 Log",
datetime.fromtimestamp(1694561169550 / 1000, tz=timezone.utc),
),
(
"SecondsSinceEpoch",
"1694561169 Log",
datetime.fromtimestamp(1694561169, tz=timezone.utc),
),
],
)
def test_timestamp_format_parsing(string_date: str, expected_datetime: datetime):
def test_timestamp_format_parsing(tz_class: str, string_date: str, expected_datetime: datetime):
_test_timestamp_format_parsing(string_date, expected_datetime)

0 comments on commit 4de2643

Please # to comment.