Skip to content

Commit

Permalink
[fix] Handle importing UTF-16 encoded CSV files #550
Browse files Browse the repository at this point in the history
This change ensures proper handling of both UTF-8 and UTF-16 encoded CSV files.

Fixes #550

---------

Co-authored-by: Federico Capoano <f.capoano@openwisp.io>
  • Loading branch information
akhilsharmaa and nemesifier authored Jan 31, 2025
1 parent 52d5bef commit 7c7fc4a
Show file tree
Hide file tree
Showing 7 changed files with 41 additions and 2 deletions.
3 changes: 2 additions & 1 deletion openwisp_radius/base/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@
)
from ..utils import (
SmsMessage,
decode_byte_data,
find_available_username,
generate_sms_token,
get_sms_default_valid_until,
Expand Down Expand Up @@ -951,7 +952,7 @@ def csvfile_upload(
if not csvfile:
csvfile = self.csvfile
csv_data = csvfile.read()
csv_data = csv_data.decode('utf-8') if isinstance(csv_data, bytes) else csv_data
csv_data = decode_byte_data(csv_data)
reader = csv.reader(StringIO(csv_data), delimiter=',')
self.full_clean()
self.save()
Expand Down
1 change: 1 addition & 0 deletions openwisp_radius/receivers.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Receiver functions for django signals (eg: post_save)
"""

import logging

from celery.exceptions import OperationalError
Expand Down
Binary file not shown.
Binary file not shown.
1 change: 1 addition & 0 deletions openwisp_radius/tests/static/test_batch_utf8Sig_file2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
44D1FADD7379,cleartext$D0weL6L8,44D1FADD7379@umoja.com,EAPUSER1,USER1
13 changes: 13 additions & 0 deletions openwisp_radius/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,19 @@ def test_validate_file_format(self):
in error.exception.message
)

def test_validate_utf16_file_format(self):
utf_16_file_1_format_path = self._get_path('static/test_batch_utf16_file1.csv')
assert validate_csvfile(open(utf_16_file_1_format_path, 'rb')) is None

utf_16_file_2_format_path = self._get_path('static/test_batch_utf16_file2.csv')
assert validate_csvfile(open(utf_16_file_2_format_path, 'rb')) is None

def test_validate_utf8Sig_file_format(self):
utf_16_file_2_format_path = self._get_path(
'static/test_batch_utf8Sig_file2.csv'
)
assert validate_csvfile(open(utf_16_file_2_format_path, 'rb')) is None

def test_validate_csvfile(self):
invalid_csv_path = self._get_path('static/test_batch_invalid.csv')
improper_csv_path = self._get_path('static/test_batch_improper.csv')
Expand Down
25 changes: 24 additions & 1 deletion openwisp_radius/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,17 +129,40 @@ def find_available_username(username, users_list, prefix=False):
return tmp


def get_encoding_format(byte_data):
# Explicitly handle some common encodings, including utf-16le
common_encodings = ['utf-8-sig', 'utf-16', 'utf-16be', 'utf-16le', 'ascii']

for enc in common_encodings:
try:
byte_data.decode(enc)
return enc
except (UnicodeDecodeError, TypeError):
continue

return 'utf-8'


def decode_byte_data(data):
if isinstance(data, bytes):
data = data.decode(get_encoding_format(data))
data = data.replace('\x00', '') # Removing null bytes
return data


def validate_csvfile(csvfile):
csv_data = csvfile.read()

try:
csv_data = csv_data.decode('utf-8') if isinstance(csv_data, bytes) else csv_data
csv_data = decode_byte_data(csv_data)
except UnicodeDecodeError:
raise ValidationError(
_(
'Unrecognized file format, the supplied file '
'does not look like a CSV file.'
)
)

reader = csv.reader(StringIO(csv_data), delimiter=',')
error_message = 'The CSV contains a line with invalid data,\
line number {} triggered the following error: {}'
Expand Down

0 comments on commit 7c7fc4a

Please # to comment.