From 8c6145a889d4e612ae32c1708341b79bab6d019e Mon Sep 17 00:00:00 2001 From: Riddhi Battu Date: Thu, 11 Apr 2024 17:14:56 -0700 Subject: [PATCH] removing functions not used --- tests/test_data_preprocessing.py | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) diff --git a/tests/test_data_preprocessing.py b/tests/test_data_preprocessing.py index 83f2171..0d7af3f 100644 --- a/tests/test_data_preprocessing.py +++ b/tests/test_data_preprocessing.py @@ -49,14 +49,6 @@ def mock_data(): 'price_category': ['0-50', '50-100', '100-150', '150-200', '200-250', '250-300', '300-350', '350+', '350+', '350+'] }) -# def test_preprocess_data(mock_data, tmpdir): -# csv_path = os.path.join(tmpdir, "mock_data.csv") -# mock_data.to_csv(csv_path, index=False) -# processed_data = data_preprocessing(csv_path, tmpdir) -# assert all(processed_data['id'].apply(lambda x: isinstance(x, str))) -# assert all(processed_data['host_id'].apply(lambda x: isinstance(x, str))) -# assert 'reviews_per_month' in processed_data.columns - def test_convert_missing_values(): """ Test the conversion of missing values in a DataFrame, including the transformation of numerical IDs to string type and the handling of NaNs. @@ -258,25 +250,4 @@ def test_data_splitting_proportions(mock_data): test_len = len(test_df) # Check if the proportions approximately match the expected 80-20 split assert train_len / total_len == pytest.approx(0.8, 0.05) - assert test_len / total_len == pytest.approx(0.2, 0.05) - -# def test_data_preprocessing(tmpdir): -# """ -# Test the end-to-end data preprocessing functionality. -# """ -# # Setup - Create a sample CSV file in tmpdir -# sample_data = pd.DataFrame({'price': [25, 75, 375], 'reviews_per_month': [1, 2, None]}) -# sample_data_path = tmpdir.join("sample_data.csv") -# sample_data.to_csv(str(sample_data_path), index=False) - -# processed_data_dir = tmpdir -# input_path = str(sample_data_path) -# out_dir = str(processed_data_dir) - -# # Execute -# data_preprocessing(input_path, out_dir) - -# # Validate - Check if processed files exist -# for filename in ['train_df.csv', 'test_df.csv', 'X_train.csv', 'y_train.csv', 'X_test.csv', 'y_test.csv']: -# saved_file = os.path.join(out_dir, filename) -# assert os.path.exists(saved_file), f"{filename} was not processed or saved correctly" + assert test_len / total_len == pytest.approx(0.2, 0.05) \ No newline at end of file