From fd5d3fce133d03be2c0a55dee0756a754a9b289c Mon Sep 17 00:00:00 2001 From: edknv <109497216+edknv@users.noreply.github.com> Date: Tue, 27 Dec 2022 11:03:04 -0800 Subject: [PATCH] Use tf.function for list column operations (#89) --- tests/unit/dataloader/test_tf_dataloader.py | 31 ++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) diff --git a/tests/unit/dataloader/test_tf_dataloader.py b/tests/unit/dataloader/test_tf_dataloader.py index 06d70d44..b395a2d5 100644 --- a/tests/unit/dataloader/test_tf_dataloader.py +++ b/tests/unit/dataloader/test_tf_dataloader.py @@ -94,25 +94,36 @@ def test_nested_list(): schema = ds.schema schema["label"] = schema["label"].with_tags([Tags.TARGET]) ds.schema = schema - train_dataset = tf_dataloader.Loader( + loader = tf_dataloader.Loader( ds, batch_size=batch_size, shuffle=False, ) - batch = next(train_dataset) + batch = next(loader) + # [[1,2,3],[3,1],[...],[]] - nested_data_col = tf.RaggedTensor.from_row_lengths( - batch[0]["data"][0][:, 0], tf.cast(batch[0]["data"][1][:, 0], tf.int32) - ).to_tensor() + @tf.function + def _ragged_for_nested_data_col(): + nested_data_col = tf.RaggedTensor.from_row_lengths( + batch[0]["data"][0][:, 0], tf.cast(batch[0]["data"][1][:, 0], tf.int32) + ).to_tensor() + return nested_data_col + + nested_data_col = _ragged_for_nested_data_col() true_data_col = tf.reshape( - tf.ragged.constant(df.iloc[:batch_size, 0].tolist()).to_tensor(), - [batch_size, -1], + tf.ragged.constant(df.iloc[:batch_size, 0].tolist()).to_tensor(), [batch_size, -1] ) + # [1,2,3] - multihot_data2_col = tf.RaggedTensor.from_row_lengths( - batch[0]["data2"][0][:, 0], tf.cast(batch[0]["data2"][1][:, 0], tf.int32) - ).to_tensor() + @tf.function + def _ragged_for_multihot_data_col(): + multihot_data2_col = tf.RaggedTensor.from_row_lengths( + batch[0]["data2"][0][:, 0], tf.cast(batch[0]["data2"][1][:, 0], tf.int32) + ).to_tensor() + return multihot_data2_col + + multihot_data2_col = _ragged_for_multihot_data_col() true_data2_col = tf.reshape( tf.ragged.constant(df.iloc[:batch_size, 1].tolist()).to_tensor(), [batch_size, -1],