From 7c7fc4a2849335961de829fa5bde63357f9b13aa Mon Sep 17 00:00:00 2001 From: Akhilesh Date: Fri, 31 Jan 2025 20:37:36 +0530 Subject: [PATCH] [fix] Handle importing UTF-16 encoded CSV files #550 This change ensures proper handling of both UTF-8 and UTF-16 encoded CSV files. Fixes #550 --------- Co-authored-by: Federico Capoano --- openwisp_radius/base/models.py | 3 ++- openwisp_radius/receivers.py | 1 + .../tests/static/test_batch_utf16_file1.csv | Bin 0 -> 132 bytes .../tests/static/test_batch_utf16_file2.csv | Bin 0 -> 134 bytes .../tests/static/test_batch_utf8Sig_file2.csv | 1 + openwisp_radius/tests/test_utils.py | 13 +++++++++ openwisp_radius/utils.py | 25 +++++++++++++++++- 7 files changed, 41 insertions(+), 2 deletions(-) create mode 100644 openwisp_radius/tests/static/test_batch_utf16_file1.csv create mode 100644 openwisp_radius/tests/static/test_batch_utf16_file2.csv create mode 100644 openwisp_radius/tests/static/test_batch_utf8Sig_file2.csv diff --git a/openwisp_radius/base/models.py b/openwisp_radius/base/models.py index df7d62ee..fd7b04ff 100644 --- a/openwisp_radius/base/models.py +++ b/openwisp_radius/base/models.py @@ -47,6 +47,7 @@ ) from ..utils import ( SmsMessage, + decode_byte_data, find_available_username, generate_sms_token, get_sms_default_valid_until, @@ -951,7 +952,7 @@ def csvfile_upload( if not csvfile: csvfile = self.csvfile csv_data = csvfile.read() - csv_data = csv_data.decode('utf-8') if isinstance(csv_data, bytes) else csv_data + csv_data = decode_byte_data(csv_data) reader = csv.reader(StringIO(csv_data), delimiter=',') self.full_clean() self.save() diff --git a/openwisp_radius/receivers.py b/openwisp_radius/receivers.py index a3a999be..c19299d9 100644 --- a/openwisp_radius/receivers.py +++ b/openwisp_radius/receivers.py @@ -1,6 +1,7 @@ """ Receiver functions for django signals (eg: post_save) """ + import logging from celery.exceptions import OperationalError diff --git a/openwisp_radius/tests/static/test_batch_utf16_file1.csv b/openwisp_radius/tests/static/test_batch_utf16_file1.csv new file mode 100644 index 0000000000000000000000000000000000000000..4ea3e163568ae3181f58102da05abf22210da459 GIT binary patch literal 132 zcmZQDWiVxMVK8EFV{io0E)3=j#z1Vzpu>>Nki(G5kjPNPPy(bYfTRjg-hiPT$n#+^ q17ZuHDia`dVKBtd?*P_S3?%h{Cgn5a0@a2x1T(lY1OZhev$+7@WD?x~ literal 0 HcmV?d00001 diff --git a/openwisp_radius/tests/static/test_batch_utf16_file2.csv b/openwisp_radius/tests/static/test_batch_utf16_file2.csv new file mode 100644 index 0000000000000000000000000000000000000000..0907bf990449542650bd069e21375e830a9531d6 GIT binary patch literal 134 zcmezW&y>NG!G*zy!HvNYNV_nYGZ+K0C4&w_GD8kSDnlYe5km=(t^krMKzRd(av;x# t!3>BkfT~P@(1pPeL%#!9S22*(1Dcf2kPB2B$`H)p$`Axpjm+j^008^J6xsj) literal 0 HcmV?d00001 diff --git a/openwisp_radius/tests/static/test_batch_utf8Sig_file2.csv b/openwisp_radius/tests/static/test_batch_utf8Sig_file2.csv new file mode 100644 index 00000000..dcd4a3fc --- /dev/null +++ b/openwisp_radius/tests/static/test_batch_utf8Sig_file2.csv @@ -0,0 +1 @@ +44D1FADD7379,cleartext$D0weL6L8,44D1FADD7379@umoja.com,EAPUSER1,USER1 diff --git a/openwisp_radius/tests/test_utils.py b/openwisp_radius/tests/test_utils.py index ce4d84d7..1503a4d7 100644 --- a/openwisp_radius/tests/test_utils.py +++ b/openwisp_radius/tests/test_utils.py @@ -24,6 +24,19 @@ def test_validate_file_format(self): in error.exception.message ) + def test_validate_utf16_file_format(self): + utf_16_file_1_format_path = self._get_path('static/test_batch_utf16_file1.csv') + assert validate_csvfile(open(utf_16_file_1_format_path, 'rb')) is None + + utf_16_file_2_format_path = self._get_path('static/test_batch_utf16_file2.csv') + assert validate_csvfile(open(utf_16_file_2_format_path, 'rb')) is None + + def test_validate_utf8Sig_file_format(self): + utf_16_file_2_format_path = self._get_path( + 'static/test_batch_utf8Sig_file2.csv' + ) + assert validate_csvfile(open(utf_16_file_2_format_path, 'rb')) is None + def test_validate_csvfile(self): invalid_csv_path = self._get_path('static/test_batch_invalid.csv') improper_csv_path = self._get_path('static/test_batch_improper.csv') diff --git a/openwisp_radius/utils.py b/openwisp_radius/utils.py index 499059fb..1e79df9a 100644 --- a/openwisp_radius/utils.py +++ b/openwisp_radius/utils.py @@ -129,10 +129,32 @@ def find_available_username(username, users_list, prefix=False): return tmp +def get_encoding_format(byte_data): + # Explicitly handle some common encodings, including utf-16le + common_encodings = ['utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 'ascii'] + + for enc in common_encodings: + try: + byte_data.decode(enc) + return enc + except (UnicodeDecodeError, TypeError): + continue + + return 'utf-8' + + +def decode_byte_data(data): + if isinstance(data, bytes): + data = data.decode(get_encoding_format(data)) + data = data.replace('\x00', '') # Removing null bytes + return data + + def validate_csvfile(csvfile): csv_data = csvfile.read() + try: - csv_data = csv_data.decode('utf-8') if isinstance(csv_data, bytes) else csv_data + csv_data = decode_byte_data(csv_data) except UnicodeDecodeError: raise ValidationError( _( @@ -140,6 +162,7 @@ def validate_csvfile(csvfile): 'does not look like a CSV file.' ) ) + reader = csv.reader(StringIO(csv_data), delimiter=',') error_message = 'The CSV contains a line with invalid data,\ line number {} triggered the following error: {}'