From 30ffc0e7d9947eff3da8f10a931a84b430d9f449 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Tue, 12 Apr 2022 17:09:29 -0600 Subject: [PATCH] Update csv float tests Signed-off-by: Andy Grove --- integration_tests/src/main/python/csv_test.py | 17 ++++++++++++++- .../test/resources/simple_float_values.csv | 1 - .../src/test/resources/small_float_values.csv | 21 +++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) create mode 100644 integration_tests/src/test/resources/small_float_values.csv diff --git a/integration_tests/src/main/python/csv_test.py b/integration_tests/src/main/python/csv_test.py index 5a65efa56bd..f7297e1e5d3 100644 --- a/integration_tests/src/main/python/csv_test.py +++ b/integration_tests/src/main/python/csv_test.py @@ -239,7 +239,7 @@ def read_impl(spark): pytest.param('simple_float_values.csv', _int_schema, {'header': 'true'}), pytest.param('simple_float_values.csv', _long_schema, {'header': 'true'}), pytest.param('simple_float_values.csv', _float_schema, {'header': 'true'}), - pytest.param('simple_float_values.csv', _double_schema, {'header': 'true'}, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/5211')), + pytest.param('simple_float_values.csv', _double_schema, {'header': 'true'}), pytest.param('simple_float_values.csv', _decimal_10_2_schema, {'header': 'true'}), pytest.param('simple_float_values.csv', _decimal_10_3_schema, {'header': 'true'}), pytest.param('simple_boolean_values.csv', _bool_schema, {'header': 'true'}), @@ -257,6 +257,21 @@ def test_basic_csv_read(std_input_path, name, schema, options, read_func, v1_ena assert_gpu_and_cpu_are_equal_collect(read_func(std_input_path + '/' + name, schema, spark_tmp_table_factory, options), conf=updated_conf) +@pytest.mark.parametrize('name,schema,options', [ + pytest.param('small_float_values.csv', _float_schema, {'header': 'true'}), + pytest.param('small_float_values.csv', _double_schema, {'header': 'true'}), +], ids=idfn) +@pytest.mark.parametrize('read_func', [read_csv_df, read_csv_sql]) +@pytest.mark.parametrize('v1_enabled_list', ["", "csv"]) +@pytest.mark.parametrize('ansi_enabled', ["true", "false"]) +def test_csv_read_small_floats(std_input_path, name, schema, options, read_func, v1_enabled_list, ansi_enabled, spark_tmp_table_factory): + updated_conf=copy_and_update(_enable_all_types_conf, { + 'spark.sql.sources.useV1SourceList': v1_enabled_list, + 'spark.sql.ansi.enabled': ansi_enabled + }) + assert_gpu_and_cpu_are_equal_collect(read_func(std_input_path + '/' + name, schema, spark_tmp_table_factory, options), + conf=updated_conf) + csv_supported_gens = [ # Spark does not escape '\r' or '\n' even though it uses it to mark end of record # This would require multiLine reads to work correctly so we avoid these chars diff --git a/integration_tests/src/test/resources/simple_float_values.csv b/integration_tests/src/test/resources/simple_float_values.csv index 1fdc6e048b5..0d9cfc6fc2c 100644 --- a/integration_tests/src/test/resources/simple_float_values.csv +++ b/integration_tests/src/test/resources/simple_float_values.csv @@ -15,5 +15,4 @@ bad 3.4028236e+38 1.7976931348623157E308 1.7976931348623157e+308 -1.7976931348623158E308 1.2e-234 \ No newline at end of file diff --git a/integration_tests/src/test/resources/small_float_values.csv b/integration_tests/src/test/resources/small_float_values.csv new file mode 100644 index 00000000000..58562fbe5cc --- /dev/null +++ b/integration_tests/src/test/resources/small_float_values.csv @@ -0,0 +1,21 @@ +"number" +0.001 +0.01 +0.1 +0.2 +0.3 +0.4 +1.0 +2.0 +3.0 +4.0 +-0.001 +-0.01 +-0.1 +-0.2 +-0.3 +-0.4 +-1.0 +-2.0 +-3.0 +-4.0 \ No newline at end of file