From 5539c51646d3d2150df7476aa5ac9d075998b235 Mon Sep 17 00:00:00 2001 From: Kevin Zhang Date: Fri, 15 Apr 2022 06:58:22 -0700 Subject: [PATCH] fix: Addresses ZeroDivisionError when materializing file source with same timestamps (#2551) * Add docs for Go feature server Signed-off-by: Felix Wang * Update go feature server docs Signed-off-by: Kevin Zhang * Address review components Signed-off-by: Kevin Zhang * Fix Signed-off-by: Kevin Zhang * Revert Signed-off-by: Kevin Zhang * Fix Signed-off-by: Kevin Zhang * Update comment Signed-off-by: Kevin Zhang * Fix Signed-off-by: Kevin Zhang * Fix Signed-off-by: Kevin Zhang * Revert indent Signed-off-by: Kevin Zhang * fix comment Signed-off-by: Kevin Zhang Co-authored-by: Felix Wang --- .../feature-servers/go-feature-retrieval.md | 2 +- sdk/python/feast/infra/offline_stores/file.py | 22 +++++++++++++++---- 2 files changed, 19 insertions(+), 5 deletions(-) diff --git a/docs/reference/feature-servers/go-feature-retrieval.md b/docs/reference/feature-servers/go-feature-retrieval.md index 999a142c07d..415817dd85e 100644 --- a/docs/reference/feature-servers/go-feature-retrieval.md +++ b/docs/reference/feature-servers/go-feature-retrieval.md @@ -10,7 +10,7 @@ The Go Feature Retrieval component currently only supports Redis and Sqlite as o ## Installation -As long as you are running macOS or linux x86 with python version 3.7-3.10, the go component comes pre-compiled when you run install feast. +As long as you are running macOS or linux, on x86, with python version 3.7-3.10, the go component comes pre-compiled when you install feast. For developers, if you want to build from source, run `make compile-go-lib` to build and compile the go server. diff --git a/sdk/python/feast/infra/offline_stores/file.py b/sdk/python/feast/infra/offline_stores/file.py index cb6e874f8a9..a7d8b25abfb 100644 --- a/sdk/python/feast/infra/offline_stores/file.py +++ b/sdk/python/feast/infra/offline_stores/file.py @@ -299,11 +299,25 @@ def evaluate_offline_job(): if created_timestamp_column else [event_timestamp_column] ) + # try-catch block is added to deal with this issue https://github.com/dask/dask/issues/8939. + # TODO(kevjumba): remove try catch when fix is merged upstream in Dask. + try: + if created_timestamp_column: + source_df = source_df.sort_values(by=created_timestamp_column,) + + source_df = source_df.sort_values(by=event_timestamp_column) + + except ZeroDivisionError: + # Use 1 partition to get around case where everything in timestamp column is the same so the partition algorithm doesn't + # try to divide by zero. + if created_timestamp_column: + source_df = source_df.sort_values( + by=created_timestamp_column, npartitions=1 + ) - if created_timestamp_column: - source_df = source_df.sort_values(by=created_timestamp_column) - - source_df = source_df.sort_values(by=event_timestamp_column) + source_df = source_df.sort_values( + by=event_timestamp_column, npartitions=1 + ) source_df = source_df[ (source_df[event_timestamp_column] >= start_date)