diff --git a/python/docs/source/getting_started/install.rst b/python/docs/source/getting_started/install.rst index 13c6f8f3a28e2..601b45d00a7cf 100644 --- a/python/docs/source/getting_started/install.rst +++ b/python/docs/source/getting_started/install.rst @@ -154,11 +154,11 @@ Dependencies ============= ========================= ====================================== Package Minimum supported version Note ============= ========================= ====================================== -`pandas` 0.23.2 Optional for Spark SQL +`pandas` 1.0.5 Optional for Spark SQL `NumPy` 1.7 Required for MLlib DataFrame-based API `pyarrow` 1.0.0 Optional for Spark SQL `Py4J` 0.10.9.2 Required -`pandas` 0.23.2 Required for pandas API on Spark +`pandas` 1.0.5 Required for pandas API on Spark `pyarrow` 1.0.0 Required for pandas API on Spark `Numpy` 1.14 Required for pandas API on Spark ============= ========================= ====================================== diff --git a/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst b/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst index 90651a9e77055..f2701d4fb7216 100644 --- a/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst +++ b/python/docs/source/migration_guide/pyspark_3.2_to_3.3.rst @@ -22,3 +22,4 @@ Upgrading from PySpark 3.2 to 3.3 * In Spark 3.3, the ``pyspark.pandas.sql`` method follows [the standard Python string formatter](https://docs.python.org/3/library/string.html#format-string-syntax). To restore the previous behavior, set ``PYSPARK_PANDAS_SQL_LEGACY`` environment variable to ``1``. * In Spark 3.3, the ``drop`` method of pandas API on Spark DataFrame supports dropping rows by ``index``, and sets dropping by index instead of column by default. +* In Spark 3.3, PySpark upgrades Pandas version, the new minimum required version changes from 0.23.2 to 1.0.5. diff --git a/python/docs/source/user_guide/sql/arrow_pandas.rst b/python/docs/source/user_guide/sql/arrow_pandas.rst index 78d3e7ad84e3f..20a9f935d586f 100644 --- a/python/docs/source/user_guide/sql/arrow_pandas.rst +++ b/python/docs/source/user_guide/sql/arrow_pandas.rst @@ -387,7 +387,7 @@ working with timestamps in ``pandas_udf``\s to get the best performance, see Recommended Pandas and PyArrow Versions ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -For usage with pyspark.sql, the minimum supported versions of Pandas is 0.23.2 and PyArrow is 1.0.0. +For usage with pyspark.sql, the minimum supported versions of Pandas is 1.0.5 and PyArrow is 1.0.0. Higher versions may be used, however, compatibility and data correctness can not be guaranteed and should be verified by the user. diff --git a/python/pyspark/pandas/tests/test_series.py b/python/pyspark/pandas/tests/test_series.py index 0fc5d3d0aa005..51c26ad8301b0 100644 --- a/python/pyspark/pandas/tests/test_series.py +++ b/python/pyspark/pandas/tests/test_series.py @@ -2245,7 +2245,9 @@ def test_mad(self): pser.index = pmidx psser = ps.from_pandas(pser) - self.assert_eq(pser.mad(), psser.mad()) + # Mark almost as True to avoid precision issue like: + # "21.555555555555554 != 21.555555555555557" + self.assert_eq(pser.mad(), psser.mad(), almost=True) def test_to_frame(self): pser = pd.Series(["a", "b", "c"]) diff --git a/python/pyspark/sql/pandas/utils.py b/python/pyspark/sql/pandas/utils.py index cc0db017c301f..bc6202f854639 100644 --- a/python/pyspark/sql/pandas/utils.py +++ b/python/pyspark/sql/pandas/utils.py @@ -19,7 +19,7 @@ def require_minimum_pandas_version() -> None: """Raise ImportError if minimum version of Pandas is not installed""" # TODO(HyukjinKwon): Relocate and deduplicate the version specification. - minimum_pandas_version = "0.23.2" + minimum_pandas_version = "1.0.5" from distutils.version import LooseVersion diff --git a/python/setup.py b/python/setup.py index 4507a2686e2c5..174995d4aec49 100755 --- a/python/setup.py +++ b/python/setup.py @@ -111,7 +111,7 @@ def _supports_symlinks(): # For Arrow, you should also check ./pom.xml and ensure there are no breaking changes in the # binary format protocol with the Java version, see ARROW_HOME/format/* for specifications. # Also don't forget to update python/docs/source/getting_started/install.rst. -_minimum_pandas_version = "0.23.2" +_minimum_pandas_version = "1.0.5" _minimum_pyarrow_version = "1.0.0"