Skip to content

Commit

Permalink
Allow '=' in URLs, which is a valid character in HDFS path (#1428)
Browse files Browse the repository at this point in the history
  • Loading branch information
markhatch authored Nov 20, 2023
1 parent 405406f commit 23b275e
Show file tree
Hide file tree
Showing 2 changed files with 61 additions and 1 deletion.
60 changes: 60 additions & 0 deletions fsspec/implementations/tests/test_webhdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,3 +135,63 @@ def test_webhdfs_cp_file(hdfs_cluster):
assert fs.exists(src)
assert fs.exists(dst)
assert fs.cat(src) == fs.cat(dst)


def test_path_with_equals(hdfs_cluster):
fs = WebHDFS(
hdfs_cluster, user="testuser", data_proxy={"worker.example.com": "localhost"}
)
path_with_equals = "/user/testuser/some_table/datestamp=2023-11-11"

fs.mkdir(path_with_equals)

result = fs.ls(path_with_equals)
assert result is not None
assert fs.exists(path_with_equals)


def test_error_handling_with_equals_in_path(hdfs_cluster):
fs = WebHDFS(hdfs_cluster, user="testuser")
invalid_path_with_equals = (
"/user/testuser/some_table/invalid_path=datestamp=2023-11-11"
)

with pytest.raises(FileNotFoundError):
fs.ls(invalid_path_with_equals)


def test_create_and_touch_file_with_equals(hdfs_cluster):
fs = WebHDFS(
hdfs_cluster,
user="testuser",
data_proxy={"worker.example.com": "localhost"},
)
base_path = "/user/testuser/some_table/datestamp=2023-11-11"
file_path = f"{base_path}/testfile.txt"

fs.mkdir(base_path)
fs.touch(file_path, "wb")
assert fs.exists(file_path)


def test_write_read_verify_file_with_equals(hdfs_cluster):
fs = WebHDFS(
hdfs_cluster,
user="testuser",
data_proxy={"worker.example.com": "localhost"},
)
base_path = "/user/testuser/some_table/datestamp=2023-11-11"
file_path = f"{base_path}/testfile.txt"
content = b"This is some content!"

fs.mkdir(base_path)
with fs.open(file_path, "wb") as f:
f.write(content)

with fs.open(file_path, "rb") as f:
assert f.read() == content

file_info = fs.ls(base_path, detail=True)
assert len(file_info) == 1
assert file_info[0]["name"] == file_path
assert file_info[0]["size"] == len(content)
2 changes: 1 addition & 1 deletion fsspec/implementations/webhdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,7 @@ def _connect(self):
self.session.auth = HTTPBasicAuth(self.user, self.password)

def _call(self, op, method="get", path=None, data=None, redirect=True, **kwargs):
url = self._apply_proxy(self.url + quote(path or ""))
url = self._apply_proxy(self.url + quote(path or "", safe="/="))
args = kwargs.copy()
args.update(self.pars)
args["op"] = op.upper()
Expand Down

0 comments on commit 23b275e

Please # to comment.