From 429b7c2185a4e50d91b82f99f42e5f7d805c17a8 Mon Sep 17 00:00:00 2001 From: David Lakin Date: Wed, 27 Mar 2024 00:38:47 -0400 Subject: [PATCH] [requests] Fix missing dependency breaking the build (#11730) Fixes the fuzz target build for the `requests` package that has been broken since shortly after it's initial integration in 2022 (see [ClusterFuzz Issue 50582](https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=50582&q=requests&can=2)). The build as failing because of missing dependencies in the Pyinstaller bundle; upgrading setuptools to a version 42 enables Pyinstaller to load the `pyinstaller-hooks-contrib` which enables proper dependency resolution. This also replaces the existing threading based harness with a mock adapter based approach that: - enables the target to be run with multiple jobs at once - increases the execution speed of the target significantly - increases the requests API surface area covered by the fuzzer - removes false positive failure cases (see [ClusterFuzz Issue 62711](https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=62711&q=requests&can=2)) - includes a seed corpus and dictionary file hosted in a separate repository --- projects/requests/Dockerfile | 17 +++- projects/requests/build.sh | 28 ++++-- projects/requests/fuzz_requests.py | 134 +++++++++++++++++++++++++++++ projects/requests/fuzz_server.py | 77 ----------------- 4 files changed, 171 insertions(+), 85 deletions(-) create mode 100644 projects/requests/fuzz_requests.py delete mode 100644 projects/requests/fuzz_server.py diff --git a/projects/requests/Dockerfile b/projects/requests/Dockerfile index 5a2c704d2c5f..4e463ce078b2 100644 --- a/projects/requests/Dockerfile +++ b/projects/requests/Dockerfile @@ -16,7 +16,20 @@ FROM gcr.io/oss-fuzz-base/base-builder-python -RUN git clone --depth 1 --branch main https://github.com/psf/requests.git +RUN git clone --depth 1 --branch main https://github.com/psf/requests.git requests \ + && python3 -m pip install --upgrade pip \ + && python3 -m pip install 'setuptools~=69.0' 'pyinstaller~=6.0' 'requests-mock~=1.0'; +RUN mkdir -p $SRC/seed_data \ + && git clone --depth 1 https://github.com/DaveLak/oss-fuzz-inputs.git oss-fuzz-inputs \ + && rsync -avc oss-fuzz-inputs/requests/ $SRC/seed_data/ \ + && rm -rf oss-fuzz-inputs; + +RUN git clone --depth 1 https://github.com/google/fuzzing fuzzing \ + && cat fuzzing/dictionaries/http.dict \ + fuzzing/dictionaries/url.dict \ + >> $SRC/seed_data/__base.dict \ + && rm -rf fuzzing; + +COPY build.sh *.py $SRC/ WORKDIR $SRC/requests -COPY build.sh fuzz_server.py $SRC/ diff --git a/projects/requests/build.sh b/projects/requests/build.sh index 7b3b95c7f02d..ef63632cef7c 100644 --- a/projects/requests/build.sh +++ b/projects/requests/build.sh @@ -14,14 +14,30 @@ # limitations under the License. # ################################################################################ +# Directory to look in for dictionaries, options files, and seed corpa: +SEED_DATA_DIR="$SRC/seed_data" + +# Help Fuzz Introspector find the package entrypoint. +# See https://github.com/ossf/fuzz-introspector/issues/1010 +export PYFUZZPACKAGE="$SRC/requests/src/requests" # Build and install project (using current CFLAGS, CXXFLAGS). -pip3 install . +python3 -m pip install . + +find $SEED_DATA_DIR \( -name '*_seed_corpus.zip' -o -name '*.options' -o -name '*.dict' \) \ + ! \( -name '__base.*' \) -exec printf 'Copying: %s\n' {} \; \ + -exec chmod a-x {} \; \ + -exec cp {} "$OUT" \; + +find "$SRC" -maxdepth 1 -name 'fuzz_*.py' -print0 | while IFS= read -r -d $'\0' fuzz_harness; do + compile_python_fuzzer "$fuzz_harness" -mkdir tests/fuzz/ -cp ../fuzz_server.py tests/fuzz/ + common_base_dictionary_filename="$SEED_DATA_DIR/__base.dict" + if [[ -r "$common_base_dictionary_filename" ]]; then + # Strip the `.py` extension from the filename and replace it with `.dict`. + fuzz_harness_dictionary_filename="$(basename "$fuzz_harness" .py).dict" -# Build fuzzers in $OUT. -for fuzzer in $(ls tests/fuzz/fuzz*.py); do - compile_python_fuzzer $fuzzer + printf 'Appending %s to %s\n' "$common_base_dictionary_filename" "$OUT/$fuzz_harness_dictionary_filename" + cat "$common_base_dictionary_filename" >> "$OUT/$fuzz_harness_dictionary_filename" + fi done diff --git a/projects/requests/fuzz_requests.py b/projects/requests/fuzz_requests.py new file mode 100644 index 000000000000..22d978b67c86 --- /dev/null +++ b/projects/requests/fuzz_requests.py @@ -0,0 +1,134 @@ +#!/usr/bin/python3 +# +# Copyright 2024 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +################################################################################ + +import atheris +import sys + +# urllib3 slows down the initial startup and analysis phases of fuzz target runs +# because of how it is imported in requests.compat so it is excluded here. +with atheris.instrument_imports( + exclude=['urllib3', 'urllib3.util', 'urllib.parse', 'urllib.request']): + import requests_mock + import requests + from requests.auth import HTTPDigestAuth + from requests.cookies import cookiejar_from_dict, CookieConflictError + from requests.exceptions import RequestException + + +def is_expected_error(error_content_list, error_msg): + for error in error_content_list: + if error in error_msg: + return True + return False + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + http_methods = ['GET', 'POST', 'PUT', 'DELETE', 'HEAD', 'OPTIONS', 'PATCH'] + + try: + cookie_jar = cookiejar_from_dict({ + fdp.ConsumeString(10): fdp.ConsumeString(20) + for _ in range(fdp.ConsumeIntInRange(1, 3)) + }) + except CookieConflictError: + return -1 + + try: + with requests_mock.Mocker() as global_mock: + global_mock.request(method=requests_mock.ANY, + url=requests_mock.ANY, + status_code=fdp.ConsumeIntInRange(0, 599), + reason=fdp.ConsumeString(fdp.ConsumeIntInRange( + 0, 100)), + text=fdp.ConsumeString(fdp.ConsumeIntInRange(0, 100)), + headers={ + fdp.ConsumeString(10): fdp.ConsumeString(20) + for _ in range(fdp.ConsumeIntInRange(1, 3)) + }, + cookies={ + fdp.ConsumeString(10): fdp.ConsumeString(20) + for _ in range(fdp.ConsumeIntInRange(1, 3)) + }) + + r1 = requests.request( + fdp.PickValueInList(http_methods), + url=fdp.ConsumeString(fdp.ConsumeIntInRange(0, 100)), + allow_redirects=fdp.ConsumeBool(), + auth=HTTPDigestAuth(fdp.ConsumeString(fdp.ConsumeIntInRange(0, 100)), + fdp.ConsumeString(fdp.ConsumeIntInRange(0, 100))), + params=fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512)), + timeout=fdp.ConsumeFloatInRange(0.1, 5.0), + headers={ + fdp.ConsumeString(10): fdp.ConsumeString(20) + for _ in range(fdp.ConsumeIntInRange(1, 3)) + }, + cookies=cookie_jar) + _ = r1.status_code + _ = r1.reason + _ = r1.headers + _ = r1.cookies + _ = r1.encoding + _ = r1.text + r1.close() + + s = requests.Session() + s.auth = (fdp.ConsumeString(fdp.ConsumeIntInRange(0, 100)), + fdp.ConsumeString(fdp.ConsumeIntInRange(0, 100))) + s.headers.update({ + fdp.ConsumeString(10): fdp.ConsumeString(20) + for _ in range(fdp.ConsumeIntInRange(1, 5)) + }) + + proxies = { + 'http': fdp.ConsumeString(fdp.ConsumeIntInRange(0, 100)), + 'https': fdp.ConsumeString(fdp.ConsumeIntInRange(0, 100)), + } + s.proxies.update(proxies) + + custom_method = fdp.ConsumeString(fdp.ConsumeIntInRange(0, 20)) + url_with_port = f"'https://'{fdp.ConsumeString(fdp.ConsumeIntInRange(0, 100))}:{fdp.ConsumeIntInRange(0, 10000)}/" + req = requests.Request(custom_method, + url=url_with_port, + data=fdp.ConsumeBytes( + fdp.ConsumeIntInRange(1, 1024))) + prepped_request = req.prepare() + + with requests_mock.Mocker(session=s) as session_mock: + session_mock.request(method=requests_mock.ANY, + url=requests_mock.ANY, + status_code=fdp.ConsumeIntInRange(0, 599), + content=fdp.ConsumeBytes( + fdp.ConsumeIntInRange(0, sys.maxsize))) + r2 = s.send(prepped_request) + _ = r2.content + r2.close() + except (RequestException, ValueError) as e: + expected_error_message_content = ["Invalid IPV4 URL", "Invalid IPV6 URL"] + if (isinstance(e, RequestException) or (isinstance(e, ValueError)) and + is_expected_error(expected_error_message_content, str(e))): + return -1 + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/projects/requests/fuzz_server.py b/projects/requests/fuzz_server.py deleted file mode 100644 index e66f511638b2..000000000000 --- a/projects/requests/fuzz_server.py +++ /dev/null @@ -1,77 +0,0 @@ -#!/usr/bin/python3 - -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import atheris - -import socket -import sys -import time -import threading - -with atheris.instrument_imports(): - import requests - -fuzzed_input = b"" - -# somehow ugly as fuzzing cannot be run in parallel -def SetFuzzedInput(input_bytes): - global fuzzed_input - fuzzed_input = input_bytes - -class ServerThread(threading.Thread): - - def __init__(self): - self.s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1) - self.s.bind(("127.0.0.1", 8001)) - self.s.listen(1) - - threading.Thread.__init__(self) - - def run(self): - global fuzzed_input - conn, addr = self.s.accept() - conn.recv(1024) - conn.send(fuzzed_input) - time.sleep(0.005) - conn.close() - self.s.shutdown(1) - self.s.close() - time.sleep(0.01) - -def TestOneInput(input_bytes): - t1 = ServerThread() - # Launch threads - t1.start() - SetFuzzedInput(input_bytes) - try: - r = requests.get('http://127.0.0.1:8001/', timeout=2.0) - r.status_code - r.headers - r.text - except requests.exceptions.RequestException as e: - pass - t1.join() - - -def main(): - atheris.instrument_all() - atheris.Setup(sys.argv, TestOneInput, enable_python_coverage=True) - atheris.Fuzz() - - -if __name__ == "__main__": - main()