From 02126ddc21ca510e8a0652431abb6df7028ca12e Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 18:12:59 +0200 Subject: [PATCH 1/5] BUG: Fix implicit conversion to float64 with isin() --- doc/source/reference/arrays.rst | 1 + doc/source/whatsnew/v3.0.0.rst | 1 + pandas/core/algorithms.py | 6 +-- pandas/core/dtypes/api.py | 2 + pandas/core/dtypes/common.py | 75 +++++++++++++++++++++++++++++++++ pandas/tests/api/test_api.py | 1 + pandas/tests/api/test_types.py | 1 + pandas/tests/test_algos.py | 7 +++ 8 files changed, 90 insertions(+), 4 deletions(-) diff --git a/doc/source/reference/arrays.rst b/doc/source/reference/arrays.rst index d37eebef5c0c0..fd3bbff1843a3 100644 --- a/doc/source/reference/arrays.rst +++ b/doc/source/reference/arrays.rst @@ -667,6 +667,7 @@ Data type introspection api.types.is_dtype_equal api.types.is_extension_array_dtype api.types.is_float_dtype + api.types.is_implicit_conversion_to_float64 api.types.is_int64_dtype api.types.is_integer_dtype api.types.is_interval_dtype diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 5c53267158eab..f3cbe9126a443 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -404,6 +404,7 @@ Other API changes - Index set operations (like union or intersection) will now ignore the dtype of an empty ``RangeIndex`` or empty ``Index`` with object dtype when determining the dtype of the resulting Index (:issue:`60797`) +- Added :func:`pandas.api.types.is_implicit_conversion_to_float64` to check if there is a silent conversion to float64 between two dtypes(:issue:`61676`) .. --------------------------------------------------------------------------- .. _whatsnew_300.deprecations: diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index 7fc391d3ffb51..d3b276e55cd42 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -47,15 +47,14 @@ is_bool_dtype, is_complex_dtype, is_dict_like, - is_dtype_equal, is_extension_array_dtype, is_float, is_float_dtype, + is_implicit_conversion_to_float64, is_integer, is_integer_dtype, is_list_like, is_object_dtype, - is_signed_integer_dtype, needs_i8_conversion, ) from pandas.core.dtypes.concat import concat_compat @@ -511,8 +510,7 @@ def isin(comps: ListLike, values: ListLike) -> npt.NDArray[np.bool_]: if ( len(values) > 0 and values.dtype.kind in "iufcb" - and not is_signed_integer_dtype(comps) - and not is_dtype_equal(values, comps) + and is_implicit_conversion_to_float64(values, comps) ): # GH#46485 Use object to avoid upcast to float64 later # TODO: Share with _find_common_type_compat diff --git a/pandas/core/dtypes/api.py b/pandas/core/dtypes/api.py index e66104d6afcd9..3f504228b15e7 100644 --- a/pandas/core/dtypes/api.py +++ b/pandas/core/dtypes/api.py @@ -17,6 +17,7 @@ is_float, is_float_dtype, is_hashable, + is_implicit_conversion_to_float64, is_int64_dtype, is_integer, is_integer_dtype, @@ -59,6 +60,7 @@ "is_float", "is_float_dtype", "is_hashable", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 68d99937f728c..595dcddea463e 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -713,6 +713,80 @@ def is_dtype_equal(source, target) -> bool: return False +def is_implicit_conversion_to_float64(source, target) -> bool: + """ + Check if there is an implicit conversion to float64 with both dtypes. + + Parameters + ---------- + source : type or str + The first dtype to compare. + target : type or str + The second dtype to compare. + + Returns + ------- + boolean + Whether or not there is an implicit conversion to float64. + + See AlsoMore actions + -------- + api.types.is_categorical_dtype : Check whether the provided array or dtype + is of the Categorical dtype. + api.types.is_string_dtype : Check whether the provided array or dtype + is of the string dtype. + api.types.is_object_dtype : Check whether an array-like or dtype is of the + object dtype. + + Examples + -------- + >>> from pandas.api.types import is_implicit_conversion_to_float64 + >>> is_implicit_conversion_to_float64(int, float) + False + >>> is_implicit_conversion_to_float64("int", int) + False + >>> import numpy as np + >>> is_implicit_conversion_to_float64(int, np.int64) + False + >>> is_implicit_conversion_to_float64(np.uint64, np.int64) + True + >>> is_implicit_conversion_to_float64(np.uint64, np.float64) + False + >>> is_implicit_conversion_to_float64(np.uint64, np.uint64) + False + >>> is_implicit_conversion_to_float64(np.uint32, np.uint32) + False + >>> is_implicit_conversion_to_float64(np.uint32, np.int32) + False + >>> is_implicit_conversion_to_float64(np.int32, np.int32) + False + >>> is_implicit_conversion_to_float64(object, "category") + False + import pandas as pd + >>> is_implicit_conversion_to_float64(np.int64, pd.UInt64Dtype()) + True + >>> from pandas.core.dtypes.dtypes import CategoricalDtype + >>> is_implicit_conversion_to_float64(CategoricalDtype(), "category") + False + """ + try: + src = _get_dtype(source) + tar = _get_dtype(target) + # check only valid dtypes related to implicit conversion to float64 + # other data types derived from 64-bit integers such as U/Int64Dtype + # should also work + if ( + src.kind in "iu" and src.itemsize == 8 + and tar.kind in "iu" and tar.itemsize == 8 + ): + return src != tar + else: + return False + except (TypeError, AttributeError, ImportError): + # invalid comparison + return False + + def is_integer_dtype(arr_or_dtype) -> bool: """ Check whether the provided array or dtype is of an integer dtype. @@ -1934,6 +2008,7 @@ def is_all_strings(value: ArrayLike) -> bool: "is_extension_array_dtype", "is_file_like", "is_float_dtype", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer_dtype", "is_interval_dtype", diff --git a/pandas/tests/api/test_api.py b/pandas/tests/api/test_api.py index 871e977cbe2f8..24019b8e036a1 100644 --- a/pandas/tests/api/test_api.py +++ b/pandas/tests/api/test_api.py @@ -295,6 +295,7 @@ class TestApi(Base): "is_float", "is_float_dtype", "is_hashable", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/tests/api/test_types.py b/pandas/tests/api/test_types.py index bf39370c49d76..659b81a417cb6 100644 --- a/pandas/tests/api/test_types.py +++ b/pandas/tests/api/test_types.py @@ -20,6 +20,7 @@ class TestTypes(Base): "is_dtype_equal", "is_float", "is_float_dtype", + "is_implicit_conversion_to_float64", "is_int64_dtype", "is_integer", "is_integer_dtype", diff --git a/pandas/tests/test_algos.py b/pandas/tests/test_algos.py index 7fb421e27bb40..ec87441e3941a 100644 --- a/pandas/tests/test_algos.py +++ b/pandas/tests/test_algos.py @@ -1197,6 +1197,13 @@ def test_isin_unsigned_dtype(self): expected = Series(False) tm.assert_series_equal(result, expected) + def test_isin_unsigned_dtype_other_side(self): + # GH#46485 + ser = Series([1378774140726870442], dtype=np.int64) + result = ser.isin([np.uint64(1378774140726870528)]) + expected = Series(False) + tm.assert_series_equal(result, expected) + class TestValueCounts: def test_value_counts(self): From 73ad0dc86f9a279f49254c819889a66dafd052e2 Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 19:48:49 +0200 Subject: [PATCH 2/5] fix pre-commit --- pandas/core/dtypes/common.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 595dcddea463e..42224b4c528fc 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -776,8 +776,10 @@ def is_implicit_conversion_to_float64(source, target) -> bool: # other data types derived from 64-bit integers such as U/Int64Dtype # should also work if ( - src.kind in "iu" and src.itemsize == 8 - and tar.kind in "iu" and tar.itemsize == 8 + src.kind in "iu" + and src.itemsize == 8 + and tar.kind in "iu" + and tar.itemsize == 8 ): return src != tar else: From e0afe0bde584a59e5c107d7fd5886b311d65ab8f Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 20:44:18 +0200 Subject: [PATCH 3/5] fix type --- pandas/core/dtypes/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index 42224b4c528fc..ea152a98a78cc 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -777,9 +777,9 @@ def is_implicit_conversion_to_float64(source, target) -> bool: # should also work if ( src.kind in "iu" - and src.itemsize == 8 + and src.itemsize == 8 # type: ignore[union-attr] and tar.kind in "iu" - and tar.itemsize == 8 + and tar.itemsize == 8 # type: ignore[union-attr] ): return src != tar else: From 802ac84887f9d80c3d912e9d6f51ed0f1ba5a6d0 Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 21:17:14 +0200 Subject: [PATCH 4/5] fix typo --- pandas/core/dtypes/common.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index ea152a98a78cc..dc0f969c2a741 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -729,7 +729,7 @@ def is_implicit_conversion_to_float64(source, target) -> bool: boolean Whether or not there is an implicit conversion to float64. - See AlsoMore actions + See Also -------- api.types.is_categorical_dtype : Check whether the provided array or dtype is of the Categorical dtype. From 670a8f9f38b9b0f316e74cff9a6e16bfe95577bd Mon Sep 17 00:00:00 2001 From: pbrochart Date: Thu, 19 Jun 2025 21:40:30 +0200 Subject: [PATCH 5/5] remove import of numpy and pandas --- pandas/core/dtypes/common.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/dtypes/common.py b/pandas/core/dtypes/common.py index dc0f969c2a741..9bc3daa151bcb 100644 --- a/pandas/core/dtypes/common.py +++ b/pandas/core/dtypes/common.py @@ -745,7 +745,6 @@ def is_implicit_conversion_to_float64(source, target) -> bool: False >>> is_implicit_conversion_to_float64("int", int) False - >>> import numpy as np >>> is_implicit_conversion_to_float64(int, np.int64) False >>> is_implicit_conversion_to_float64(np.uint64, np.int64) @@ -762,7 +761,6 @@ def is_implicit_conversion_to_float64(source, target) -> bool: False >>> is_implicit_conversion_to_float64(object, "category") False - import pandas as pd >>> is_implicit_conversion_to_float64(np.int64, pd.UInt64Dtype()) True >>> from pandas.core.dtypes.dtypes import CategoricalDtype