Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Supports list-like Python objects for Series comparison. #2022

Open
wants to merge 13 commits into
base: master
Choose a base branch
from
Next Next commit
Series.eq supports list-like python objects
  • Loading branch information
itholic committed Jan 27, 2021
commit 708968a71aa9e8caee907e05e9591d2bc54def45
8 changes: 8 additions & 0 deletions databricks/koalas/series.py
Original file line number Diff line number Diff line change
@@ -658,6 +658,14 @@ def eq(self, other) -> bool:
d False
Name: b, dtype: bool
"""
if isinstance(other, (list, tuple)):
if len(self) == len(other):
other = ks.Series(other)
else:
raise ValueError("Lengths must be equal")
# pandas always returns False for all items with dict and set.
elif isinstance(other, (dict, set)):
return self != self
return self == other

equals = eq
36 changes: 36 additions & 0 deletions databricks/koalas/tests/test_ops_on_diff_frames.py
Original file line number Diff line number Diff line change
@@ -1370,6 +1370,28 @@ def test_index_ops(self):
else:
self.assert_eq(kidx1 * 10 + kidx3, (pidx1 * 10 + pidx3).rename(None))

def test_series_eq(self):
pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
kser = ks.from_pandas(pser)

# other = Series
pandas_other = pd.Series([np.nan, 1, 3, 4, np.nan, 6], name="x")
koalas_other = ks.from_pandas(pandas_other)
self.assert_eq(pser.eq(pandas_other), kser.eq(koalas_other).sort_index())

# other = Index
pandas_other = pd.Index([np.nan, 1, 3, 4, np.nan, 6], name="x")
koalas_other = ks.from_pandas(pandas_other)
self.assert_eq(pser.eq(pandas_other), kser.eq(koalas_other).sort_index())

# other = list
other = [np.nan, 1, 3, 4, np.nan, 6]
self.assert_eq(pser.eq(other), kser.eq(other).sort_index())

# other = tuple
other = (np.nan, 1, 3, 4, np.nan, 6)
self.assert_eq(pser.eq(other), kser.eq(other).sort_index())


class OpsOnDiffFramesDisabledTest(ReusedSQLTestCase, SQLTestUtils):
@classmethod
@@ -1511,3 +1533,17 @@ def test_mask(self):

with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
kdf1.mask(kdf2 > -250)

def test_series_eq(self):
pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
kser = ks.from_pandas(pser)

others = (
ks.Series([np.nan, 1, 3, 4, np.nan, 6], name="x"),
ks.Index([np.nan, 1, 3, 4, np.nan, 6], name="x"),
[np.nan, 1, 3, 4, np.nan, 6],
(np.nan, 1, 3, 4, np.nan, 6),
)
for other in others:
with self.assertRaisesRegex(ValueError, "Cannot combine the series or dataframe"):
kser.eq(other)
25 changes: 25 additions & 0 deletions databricks/koalas/tests/test_series.py
Original file line number Diff line number Diff line change
@@ -2639,3 +2639,28 @@ def test_backfill(self):
# Test `inplace=True`
kser.backfill(inplace=True)
self.assert_eq(expected, kser)

def test_eq(self):
pser = pd.Series([1, 2, 3, 4, 5, 6], name="x")
kser = ks.from_pandas(pser)

# other = Series
self.assert_eq(pser.eq(pser), kser.eq(kser))

# other = dict
other = {1: None, 2: None, 3: None, 4: None, np.nan: None, 6: None}
self.assert_eq(pser.eq(other), kser.eq(other))

# other = set
other = {1, 2, 3, 4, np.nan, 6}
self.assert_eq(pser.eq(other), kser.eq(other))

# other = list with the different length
other = [np.nan, 1, 3, 4, np.nan]
with self.assertRaisesRegex(ValueError, "Lengths must be equal"):
self.assert_eq(pser.eq(other), kser.eq(other))

# other = tuple with the different length
other = (np.nan, 1, 3, 4, np.nan)
with self.assertRaisesRegex(ValueError, "Lengths must be equal"):
self.assert_eq(pser.eq(other), kser.eq(other))