Skip to content

Add __setitem__ to tables. #1600

New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

Merged
merged 1 commit into from
Aug 3, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 9 additions & 1 deletion c/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
----------------------
[0.99.13] - 2021-07-08
[0.99.14] - 2021-0X-XX
----------------------

**Features**

- Add `tsk_X_table_update_row` methods which allow modifying single rows of tables
(:user:`jeromekelleher`, :issue:`1545`, :pr:`1552`).

----------------------
[0.99.13] - 2021-07-08
----------------------
**Fixes**

- Fix segfault when very large columns overflow
Expand Down
10 changes: 10 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
@@ -1,3 +1,13 @@
--------------------
[0.3.8] - 2021-0X-XX
--------------------

**Features**

- Add `__setitem__` to all tables allowing single rows to be updated. For example
`tables.nodes[0] = tables.nodes[0].replace(flags=tskit.NODE_IS_SAMPLE)`
(:user:`jeromekelleher`, :user:`benjeffery`, :issue:`1545`, :pr:`1600`).

--------------------
[0.3.7] - 2021-07-08
--------------------
Expand Down
75 changes: 69 additions & 6 deletions python/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -448,6 +448,64 @@ class NotADuck:
with pytest.raises(AttributeError, match="'NotADuck' object has no attribute"):
self.table_class().append(NotADuck())

def test_setitem(self):
table = self.table_class()
for row in self.make_transposed_input_data(10):
table.append(table.row_class(**row))
table2 = self.table_class()
for row in self.make_transposed_input_data(20)[10:]:
table2.append(table.row_class(**row))
assert table != table2

copy = table.copy()
for j in range(10):
table[j] = table[j]
table.assert_equals(copy)

for j in range(10):
table[j] = table2[j]
table.assert_equals(table2)

def test_setitem_duck_type(self):
class Duck:
pass

table = self.table_class()
for row in self.make_transposed_input_data(10):
table.append(table.row_class(**row))
table2 = self.table_class()
for row in self.make_transposed_input_data(20)[10:]:
table2.append(table.row_class(**row))
assert table != table2

for j in range(10):
duck = Duck()
for k, v in dataclasses.asdict(table2[j]).items():
setattr(duck, k, v)
table[j] = duck
table.assert_equals(table2)

def test_setitem_error(self):
class NotADuck:
pass

table = self.table_class()
table.append(table.row_class(**self.make_transposed_input_data(1)[0]))
with pytest.raises(AttributeError, match="'NotADuck' object has no attribute"):
table[0] = NotADuck()

with pytest.raises(IndexError, match="Index out of bounds"):
self.table_class()[0] = table[0]
with pytest.raises(IndexError, match="Index out of bounds"):
self.table_class()[-1] = table[0]

with pytest.raises(TypeError, match="Index must be integer"):
self.table_class()[0.5] = table[0]
with pytest.raises(TypeError, match="Index must be integer"):
self.table_class()[None] = table[0]
with pytest.raises(TypeError, match="Index must be integer"):
self.table_class()[[1]] = table[0]

def test_set_columns_data(self):
for num_rows in [0, 10, 100, 1000]:
input_data = {col.name: col.get_input(num_rows) for col in self.columns}
Expand Down Expand Up @@ -1347,12 +1405,6 @@ def test_bad_indexes(self, table):
with pytest.raises(TypeError, match="not supported between instances"):
table["foobar"]

def test_not_writable(self, table):
with pytest.raises(TypeError, match="object does not support item assignment"):
table[5] = 5
with pytest.raises(TypeError, match="object does not support item assignment"):
table[[5]] = 5


common_tests = [
CommonTestsMixin,
Expand Down Expand Up @@ -4665,3 +4717,14 @@ def test_with_mutation_times(self):
tables.compute_mutation_times()
ts = tables.tree_sequence()
self.verify_subset_union(ts)


class TestTableSetitemMetadata:
@pytest.mark.parametrize("table_name", tskit.TABLE_NAMES)
def test_setitem_metadata(self, ts_fixture, table_name):
table = getattr(ts_fixture.tables, table_name)
if hasattr(table, "metadata_schema"):
assert table.metadata_schema == tskit.MetadataSchema({"codec": "json"})
assert table[0].metadata != table[1].metadata
table[0] = table[1]
assert table[0] == table[1]
39 changes: 38 additions & 1 deletion python/tskit/tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def __getitem__(self, index):
is True. Note that as the result is a new table, the row ids will change as tskit
row ids are row indexes.

:param index: the zero-index of a desired row, a slice of the desired rows, an
:param index: the index of a desired row, a slice of the desired rows, an
iterable or array of the desired row numbers, or a boolean array to use as
a mask.
"""
Expand Down Expand Up @@ -374,6 +374,43 @@ def __getitem__(self, index):

return ret

def __setitem__(self, index, new_row):
"""
Replaces a row of this table at the specified index with information from a
row-like object. Metadata, will be validated and encoded according to the table's
:attr:`metadata_schema<tskit.IndividualTable.metadata_schema>`.

:param index: the index of the row to change
:param row-like new_row: An object that has attributes corresponding to the
properties of the new row. Both the objects returned from ``table[i]`` and
e.g. ``ts.individual(i)`` work for this purpose, along with any other
object with the correct attributes.
"""
if isinstance(index, numbers.Integral):
# Single row by integer
if index < 0:
index += len(self)
if index < 0 or index >= len(self):
raise IndexError("Index out of bounds")
else:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Let's just think ahead here to make sure we're not painting ourselves into a corner. What would the semantics be for other types of updates? I guess we could support other types of assignment here.

If the index argument is a slice/boolean array, then the new_row argument should be a Sequence of row_like values (of the same length as the index argument), and then we sequentially call update_row on these? Wouldn't be particularly efficient, but we could push the implementation down into C if we wanted to, and it would allow us to do things like

tables.edges[:] = tables.edges[::-1]

which I'm sure would be handy.

I'm not suggesting we do this now, just making sure we leave the door open for it in the future. I think TypeError on anything that's not an Integral for the index value does this?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yep, that all makes sense. The code as-is raises TypeError on anything non-Integral.

raise TypeError("Index must be integer")

row_data = {
column: getattr(new_row, column)
for column in self.column_names
if "_offset" not in column
}

# Encode the metadata - note that if this becomes a perf bottleneck it is
# possible to use the cached, encoded metadata in the row object, rather than
# decode and reencode
if "metadata" in row_data:
row_data["metadata"] = self.metadata_schema.validate_and_encode_row(
row_data["metadata"]
)

self.ll_table.update_row(row_index=index, **row_data)

def append(self, row):
"""
Adds a new row to this table and returns the ID of the new row. Metadata, if
Expand Down