From 30ffc0e7d9947eff3da8f10a931a84b430d9f449 Mon Sep 17 00:00:00 2001
From: Andy Grove <andygrove@nvidia.com>
Date: Tue, 12 Apr 2022 17:09:29 -0600
Subject: [PATCH] Update csv float tests

Signed-off-by: Andy Grove <andygrove@nvidia.com>
---
 integration_tests/src/main/python/csv_test.py | 17 ++++++++++++++-
 .../test/resources/simple_float_values.csv    |  1 -
 .../src/test/resources/small_float_values.csv | 21 +++++++++++++++++++
 3 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 integration_tests/src/test/resources/small_float_values.csv

diff --git a/integration_tests/src/main/python/csv_test.py b/integration_tests/src/main/python/csv_test.py
index 5a65efa56bd..f7297e1e5d3 100644
--- a/integration_tests/src/main/python/csv_test.py
+++ b/integration_tests/src/main/python/csv_test.py
@@ -239,7 +239,7 @@ def read_impl(spark):
     pytest.param('simple_float_values.csv', _int_schema, {'header': 'true'}),
     pytest.param('simple_float_values.csv', _long_schema, {'header': 'true'}),
     pytest.param('simple_float_values.csv', _float_schema, {'header': 'true'}),
-    pytest.param('simple_float_values.csv', _double_schema, {'header': 'true'}, marks=pytest.mark.xfail(reason='https://github.com/NVIDIA/spark-rapids/issues/5211')),
+    pytest.param('simple_float_values.csv', _double_schema, {'header': 'true'}),
     pytest.param('simple_float_values.csv', _decimal_10_2_schema, {'header': 'true'}),
     pytest.param('simple_float_values.csv', _decimal_10_3_schema, {'header': 'true'}),
     pytest.param('simple_boolean_values.csv', _bool_schema, {'header': 'true'}),
@@ -257,6 +257,21 @@ def test_basic_csv_read(std_input_path, name, schema, options, read_func, v1_ena
     assert_gpu_and_cpu_are_equal_collect(read_func(std_input_path + '/' + name, schema, spark_tmp_table_factory, options),
             conf=updated_conf)
 
+@pytest.mark.parametrize('name,schema,options', [
+    pytest.param('small_float_values.csv', _float_schema, {'header': 'true'}),
+    pytest.param('small_float_values.csv', _double_schema, {'header': 'true'}),
+], ids=idfn)
+@pytest.mark.parametrize('read_func', [read_csv_df, read_csv_sql])
+@pytest.mark.parametrize('v1_enabled_list', ["", "csv"])
+@pytest.mark.parametrize('ansi_enabled', ["true", "false"])
+def test_csv_read_small_floats(std_input_path, name, schema, options, read_func, v1_enabled_list, ansi_enabled, spark_tmp_table_factory):
+    updated_conf=copy_and_update(_enable_all_types_conf, {
+        'spark.sql.sources.useV1SourceList': v1_enabled_list,
+        'spark.sql.ansi.enabled': ansi_enabled
+    })
+    assert_gpu_and_cpu_are_equal_collect(read_func(std_input_path + '/' + name, schema, spark_tmp_table_factory, options),
+                                         conf=updated_conf)
+
 csv_supported_gens = [
         # Spark does not escape '\r' or '\n' even though it uses it to mark end of record
         # This would require multiLine reads to work correctly so we avoid these chars
diff --git a/integration_tests/src/test/resources/simple_float_values.csv b/integration_tests/src/test/resources/simple_float_values.csv
index 1fdc6e048b5..0d9cfc6fc2c 100644
--- a/integration_tests/src/test/resources/simple_float_values.csv
+++ b/integration_tests/src/test/resources/simple_float_values.csv
@@ -15,5 +15,4 @@ bad
 3.4028236e+38
 1.7976931348623157E308
 1.7976931348623157e+308
-1.7976931348623158E308
 1.2e-234
\ No newline at end of file
diff --git a/integration_tests/src/test/resources/small_float_values.csv b/integration_tests/src/test/resources/small_float_values.csv
new file mode 100644
index 00000000000..58562fbe5cc
--- /dev/null
+++ b/integration_tests/src/test/resources/small_float_values.csv
@@ -0,0 +1,21 @@
+"number"
+0.001
+0.01
+0.1
+0.2
+0.3
+0.4
+1.0
+2.0
+3.0
+4.0
+-0.001
+-0.01
+-0.1
+-0.2
+-0.3
+-0.4
+-1.0
+-2.0
+-3.0
+-4.0
\ No newline at end of file