Skip to content

Commit 7aced45

Browse files
committed
Add hypothesis harness, tarfile strategies
1 parent 0b96a17 commit 7aced45

7 files changed

Lines changed: 178 additions & 1 deletion

File tree

Makefile

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-zipfile fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml
1+
all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml
22

33
PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config
44
CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags)
@@ -26,6 +26,8 @@ fuzzer-zipfile:
2626
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zipfile.py\"" -ldl $(LDFLAGS) -o fuzzer-zipfile
2727
fuzzer-tarfile:
2828
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"tarfile.py\"" -ldl $(LDFLAGS) -o fuzzer-tarfile
29+
fuzzer-tarfile-hypothesis:
30+
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"tarfile_hypothesis.py\"" -ldl $(LDFLAGS) -o fuzzer-tarfile-hypothesis
2931
fuzzer-configparser:
3032
clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"configparser.py\"" -ldl $(LDFLAGS) -o fuzzer-configparser
3133
fuzzer-tomllib:

hypothesis_strategies/__init__.py

Whitespace-only changes.

hypothesis_strategies/fspaths.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
# Copyright 2017 Christoph Reiter
2+
#
3+
# Permission is hereby granted, free of charge, to any person obtaining
4+
# a copy of this software and associated documentation files (the
5+
# "Software"), to deal in the Software without restriction, including
6+
# without limitation the rights to use, copy, modify, merge, publish,
7+
# distribute, sublicense, and/or sell copies of the Software, and to
8+
# permit persons to whom the Software is furnished to do so, subject to
9+
# the following conditions:
10+
#
11+
# The above copyright notice and this permission notice shall be included
12+
# in all copies or substantial portions of the Software.
13+
#
14+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15+
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16+
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17+
# IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18+
# CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19+
# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20+
# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21+
22+
import os
23+
import sys
24+
25+
from hypothesis.strategies import (
26+
composite,
27+
sampled_from,
28+
lists,
29+
integers,
30+
binary,
31+
randoms,
32+
)
33+
34+
35+
@composite
36+
def fspaths(draw, max_size: int | None = None) -> str:
37+
"""A hypothesis strategy which gives valid path values.
38+
39+
Valid path values are everything which when passed to open() will not raise
40+
ValueError or TypeError (but might raise OSError due to file system or
41+
operating system restrictions).
42+
"""
43+
44+
if os.name == "nt":
45+
hight_surrogate = integers(min_value=0xD800, max_value=0xDBFF).map(
46+
lambda i: chr(i)
47+
)
48+
low_surrogate = integers(min_value=0xDC00, max_value=0xDFFF).map(
49+
lambda i: chr(i)
50+
)
51+
uni_char = integers(min_value=1, max_value=sys.maxunicode).map(lambda i: chr(i))
52+
any_char = sampled_from(
53+
[draw(uni_char), draw(hight_surrogate), draw(low_surrogate)]
54+
)
55+
any_text = lists(any_char, max_size=max_size).map(lambda l: "".join(l))
56+
path_text = any_text
57+
else:
58+
unix_path_bytes = binary(max_size=max_size).map(lambda b: b.replace(b"\x00", b" "))
59+
path_text = unix_path_bytes.map(
60+
lambda b: b.decode(sys.getfilesystemencoding(), "surrogateescape")
61+
)
62+
r = draw(randoms())
63+
64+
def shuffle_text(t):
65+
l = list(t)
66+
r.shuffle(l)
67+
return "".join(l)
68+
69+
path_text = path_text.map(shuffle_text)
70+
71+
return draw(path_text)

hypothesis_strategies/tar.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
from hypothesis import strategies as st, given
2+
3+
4+
import io
5+
import tarfile
6+
7+
import hypothesis.strategies as st
8+
from .fspaths import fspaths
9+
10+
11+
def tar_integers(
12+
format: int, digits: int = 1, allow_negative: bool = False
13+
) -> st.SearchStrategy[int]:
14+
"""tar has a unique way of encoding integers that is format-dependent
15+
and based on the number of "digits" allowed for a value.
16+
"""
17+
if digits <= 0:
18+
raise ValueError("Digits must be greater than one.")
19+
if format == tarfile.GNU_FORMAT:
20+
min_value = -(256 ** (digits - 1)) if allow_negative else 0
21+
max_value = (256 ** (digits - 1)) - 1
22+
else:
23+
min_value = 0
24+
max_value = (4**digits) - 1
25+
return st.integers(min_value=min_value, max_value=max_value)
26+
27+
28+
@st.composite
29+
def tar_archives(draw):
30+
buf = io.BytesIO()
31+
format = draw(
32+
st.sampled_from((tarfile.GNU_FORMAT, tarfile.PAX_FORMAT, tarfile.USTAR_FORMAT))
33+
)
34+
tar = tarfile.TarFile(fileobj=buf, format=format, mode="w")
35+
types = list(tarfile.REGULAR_TYPES)
36+
37+
for _ in range(draw(st.integers(min_value=1, max_value=10))):
38+
info = tarfile.TarInfo(
39+
name=draw(fspaths(max_size=tarfile.LENGTH_NAME))
40+
)
41+
fileobj = None
42+
43+
info.type = draw(st.sampled_from(types))
44+
info.mode = draw(tar_integers(format=format, digits=8))
45+
info.uid = draw(tar_integers(format=format, digits=8))
46+
info.gid = draw(tar_integers(format=format, digits=8))
47+
info.mtime = draw(tar_integers(format=format, digits=12))
48+
info.devmajor = draw(tar_integers(format=format, digits=8))
49+
info.devminor = draw(tar_integers(format=format, digits=8))
50+
51+
if draw(st.booleans()):
52+
info.linkname = draw(fspaths(max_size=tarfile.LENGTH_LINK))
53+
54+
def maybe_set_pax_header(obj, name, value):
55+
if draw(st.booleans()):
56+
obj.pax_headers[name] = value
57+
58+
if format == tarfile.PAX_FORMAT:
59+
maybe_set_pax_header(info, "uname", draw(st.text(max_size=32)))
60+
maybe_set_pax_header(info, "gname", draw(st.text(max_size=32)))
61+
maybe_set_pax_header(
62+
info,
63+
"path",
64+
draw(fspaths(max_size=tarfile.LENGTH_NAME)),
65+
)
66+
maybe_set_pax_header(
67+
info,
68+
"linkpath",
69+
draw(fspaths(max_size=tarfile.LENGTH_LINK)),
70+
)
71+
72+
tar.addfile(info, fileobj=fileobj)
73+
74+
return buf.getvalue()

requirements.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
hypothesis

requirements.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
#
2+
# This file is autogenerated by pip-compile with Python 3.13
3+
# by the following command:
4+
#
5+
# pip-compile --generate-hashes --output-file=requirements.txt requirements.in
6+
#
7+
hypothesis==6.148.8 \
8+
--hash=sha256:c1842f47f974d74661b3779a26032f8b91bc1eb30d84741714d3712d7f43e85e \
9+
--hash=sha256:fa6b2ae029bc02f9d2d6c2257b0cbf2dc3782362457d2027a038ad7f4209c385
10+
# via -r requirements.in
11+
sortedcontainers==2.4.0 \
12+
--hash=sha256:25caa5a06cc30b6b83d11423433f65d1f9d76c4c6a0c90e3379eaa43b9bfdb88 \
13+
--hash=sha256:a163dcaede0f1c021485e957a39245190e74249897e2ae4b2aa38595db237ee0
14+
# via hypothesis

tarfile_hypothesis.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
import io
2+
import tarfile
3+
4+
from hypothesis import given
5+
from hypothesis_strategies import tar
6+
7+
8+
9+
@given(tar.tar_archives())
10+
def tar_archive_fuzz_target(tar_archive: bytes) -> None:
11+
tarfile.TarFile(fileobj=io.BytesIO(tar_archive))
12+
13+
14+
# Exposes the Hypothesis fuzz target for integrating with OSS-Fuzz.
15+
FuzzerRunOne = tar_archive_fuzz_target.hypothesis.fuzz_one_input

0 commit comments

Comments
 (0)