diff --git a/Makefile b/Makefile index 9103a1c..7b3d9c8 100644 --- a/Makefile +++ b/Makefile @@ -1,8 +1,8 @@ -all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo +all : fuzzer-html fuzzer-email fuzzer-httpclient fuzzer-json fuzzer-difflib fuzzer-csv fuzzer-decode fuzzer-ast fuzzer-tarfile fuzzer-tarfile-hypothesis fuzzer-zipfile fuzzer-zipfile-hypothesis fuzzer-re fuzzer-configparser fuzzer-tomllib fuzzer-plistlib fuzzer-xml fuzzer-zoneinfo fuzzer-array fuzzer-binascii fuzzer-codecs fuzzer-collections fuzzer-compression fuzzer-crypto fuzzer-datetime fuzzer-dbm fuzzer-expat fuzzer-ioops fuzzer-json-decode fuzzer-json-encode fuzzer-locale fuzzer-mmap fuzzer-operator fuzzer-pickle fuzzer-ssl fuzzer-time fuzzer-unicodedata PYTHON_CONFIG_PATH=$(CPYTHON_INSTALL_PATH)/bin/python3-config CXXFLAGS += $(shell $(PYTHON_CONFIG_PATH) --cflags) -LDFLAGS += -rdynamic $(shell $(PYTHON_CONFIG_PATH) --ldflags --embed) +LDFLAGS += -rdynamic $(shell $(PYTHON_CONFIG_PATH) --ldflags --embed) $(CPYTHON_MODLIBS) $(CPYTHON_HACL_LIBS) -Wl,--allow-multiple-definition fuzzer-html: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"html.py\"" -ldl $(LDFLAGS) -o fuzzer-html @@ -40,3 +40,41 @@ fuzzer-xml: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"xml.py\"" -ldl $(LDFLAGS) -o fuzzer-xml fuzzer-zoneinfo: clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"zoneinfo.py\"" -ldl $(LDFLAGS) -o fuzzer-zoneinfo +fuzzer-array: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_array.py\"" -ldl $(LDFLAGS) -o fuzzer-array +fuzzer-binascii: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_binascii.py\"" -ldl $(LDFLAGS) -o fuzzer-binascii +fuzzer-codecs: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_codecs.py\"" -ldl $(LDFLAGS) -o fuzzer-codecs +fuzzer-collections: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_collections.py\"" -ldl $(LDFLAGS) -o fuzzer-collections +fuzzer-compression: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_compression.py\"" -ldl $(LDFLAGS) -o fuzzer-compression +fuzzer-crypto: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_crypto.py\"" -ldl $(LDFLAGS) -o fuzzer-crypto +fuzzer-datetime: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_datetime.py\"" -ldl $(LDFLAGS) -o fuzzer-datetime +fuzzer-dbm: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_dbm.py\"" -ldl $(LDFLAGS) -o fuzzer-dbm +fuzzer-expat: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_expat.py\"" -ldl $(LDFLAGS) -o fuzzer-expat +fuzzer-ioops: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_ioops.py\"" -ldl $(LDFLAGS) -o fuzzer-ioops +fuzzer-json-decode: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_json_decode.py\"" -ldl $(LDFLAGS) -o fuzzer-json-decode +fuzzer-json-encode: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_json_encode.py\"" -ldl $(LDFLAGS) -o fuzzer-json-encode +fuzzer-locale: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_locale.py\"" -ldl $(LDFLAGS) -o fuzzer-locale +fuzzer-mmap: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_mmap.py\"" -ldl $(LDFLAGS) -o fuzzer-mmap +fuzzer-operator: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_operator.py\"" -ldl $(LDFLAGS) -o fuzzer-operator +fuzzer-pickle: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_pickle.py\"" -ldl $(LDFLAGS) -o fuzzer-pickle +fuzzer-ssl: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_ssl.py\"" -ldl $(LDFLAGS) -o fuzzer-ssl +fuzzer-time: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_time.py\"" -ldl $(LDFLAGS) -o fuzzer-time +fuzzer-unicodedata: + clang++ $(CXXFLAGS) $(LIB_FUZZING_ENGINE) -std=c++17 fuzzer.cpp -DPYTHON_HARNESS_PATH="\"fuzz_unicodedata.py\"" -ldl $(LDFLAGS) -o fuzzer-unicodedata diff --git a/fuzz_array.py b/fuzz_array.py new file mode 100644 index 0000000..6a453dd --- /dev/null +++ b/fuzz_array.py @@ -0,0 +1,100 @@ +from fuzz_dp import FuzzedDataProvider +import array + +TYPECODES = list('bBhHiIlLqQfd') + +# Top-level operation constants for FuzzerRunOne +OP_FROMBYTES = 0 +OP_METHODS = 1 +OP_SLICE = 2 + +# Array method operation constants for op_array_methods +METHOD_REVERSE = 0 +METHOD_BYTESWAP = 1 +METHOD_POP = 2 +METHOD_COUNT = 3 +METHOD_INDEX = 4 +METHOD_INSERT = 5 +METHOD_REMOVE = 6 +METHOD_TOBYTES = 7 + +def _consume_array(fdp): + tc = fdp.PickValueInList(TYPECODES) + itemsize = array.array(tc).itemsize + n_items = fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes() // itemsize, 200)) + data = fdp.ConsumeBytes(n_items * itemsize) + a = array.array(tc) + a.frombytes(data) + return a, tc + + +def op_array_frombytes(fdp): + a, tc = _consume_array(fdp) + a.tobytes() + a.tolist() + +def op_array_methods(fdp): + a, tc = _consume_array(fdp) + if len(a) == 0: + return + num_ops = fdp.ConsumeIntInRange(1, 20) + for _ in range(num_ops): + if fdp.remaining_bytes() == 0: + break + op = fdp.ConsumeIntInRange(METHOD_REVERSE, METHOD_TOBYTES) + if op == METHOD_REVERSE: + a.reverse() + elif op == METHOD_BYTESWAP: + a.byteswap() + elif op == METHOD_POP and len(a) > 0: + a.pop() + elif op == METHOD_COUNT and len(a) > 0: + val = fdp.ConsumeRandomValue() + a.count(val) + elif op == METHOD_INDEX and len(a) > 0: + val = fdp.ConsumeRandomValue() + try: + a.index(val) + except ValueError: + pass + elif op == METHOD_INSERT and len(a) > 0: + idx = fdp.ConsumeIntInRange(0, len(a) - 1) + val = fdp.ConsumeRandomValue() + a.insert(idx, val) + elif op == METHOD_REMOVE and len(a) > 0: + val = fdp.ConsumeRandomValue() + try: + a.remove(val) + except ValueError: + pass + elif op == METHOD_TOBYTES: + a.tobytes() + +def op_array_slice(fdp): + a, tc = _consume_array(fdp) + if len(a) < 2: + return + start = fdp.ConsumeIntInRange(0, len(a) - 1) + end = fdp.ConsumeIntInRange(start, len(a)) + _ = a[start:end] + b = array.array(tc, a[start:end]) + a[start:end] = b + +# Fuzzes the array module's C implementation (Modules/arraymodule.c). +# Exercises array construction from raw bytes via frombytes(), element-level +# operations (reverse, byteswap, pop, count, index, insert, remove), and +# slice read/write across all 12 typecodes (b/B/h/H/i/I/l/L/q/Q/f/d). +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(OP_FROMBYTES, OP_SLICE) + try: + if op == OP_FROMBYTES: + op_array_frombytes(fdp) + elif op == OP_METHODS: + op_array_methods(fdp) + elif op == OP_SLICE: + op_array_slice(fdp) + except Exception: + pass diff --git a/fuzz_binascii.py b/fuzz_binascii.py new file mode 100644 index 0000000..c3ac426 --- /dev/null +++ b/fuzz_binascii.py @@ -0,0 +1,90 @@ +from fuzz_dp import FuzzedDataProvider +import binascii + +# Top-level operation constants for FuzzerRunOne +OP_DECODE = 0 +OP_ENCODE = 1 +OP_CHECKSUM = 2 +OP_ROUNDTRIP = 3 + +# Decode/encode sub-operation constants +CODEC_BASE64_STRICT = 0 +CODEC_HEX = 1 +CODEC_UU = 2 +CODEC_QP = 3 +CODEC_BASE64 = 4 +CODEC_BASE64_ALT = 5 + +def op_decode(fdp): + which = fdp.ConsumeIntInRange(CODEC_BASE64_STRICT, CODEC_BASE64_ALT) + strict = fdp.ConsumeBool() + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + if which == CODEC_BASE64_STRICT: + if strict: + binascii.a2b_base64(data, strict_mode=True) + else: + binascii.a2b_base64(data) + elif which == CODEC_HEX: + binascii.a2b_hex(data) + elif which == CODEC_UU: + binascii.a2b_uu(data) + elif which == CODEC_QP: + binascii.a2b_qp(data) + elif which == CODEC_BASE64: + binascii.a2b_base64(data) + elif which == CODEC_BASE64_ALT: + binascii.a2b_base64(data) + +def op_encode(fdp): + which = fdp.ConsumeIntInRange(CODEC_BASE64_STRICT, CODEC_BASE64_ALT) + newline = fdp.ConsumeBool() + data = fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, 10000)) + if not data: + return + if which == CODEC_BASE64_STRICT: + binascii.b2a_base64(data, newline=newline) + elif which == CODEC_HEX: + binascii.b2a_hex(data) + elif which == CODEC_UU: + uu_data = fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, 45)) + binascii.b2a_uu(uu_data) + elif which == CODEC_QP: + binascii.b2a_qp(data) + elif which == CODEC_BASE64: + binascii.b2a_base64(data) + elif which == CODEC_BASE64_ALT: + binascii.b2a_base64(data) + +def op_checksum(fdp): + use_crc32 = fdp.ConsumeBool() + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + if use_crc32: + binascii.crc32(data) + else: + binascii.crc_hqx(data, 0) + +def op_roundtrip(fdp): + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + hexed = binascii.hexlify(data) + binascii.unhexlify(hexed) + +# Fuzzes the binascii module's C implementation (Modules/binascii.c). +# Exercises binary-to-ASCII and ASCII-to-binary conversions for base64, +# hex, UU-encoding, and quoted-printable codecs. Also tests CRC32, +# CRC-HQX checksums, and hexlify/unhexlify roundtrips. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(OP_DECODE, OP_ROUNDTRIP) + try: + if op == OP_DECODE: + op_decode(fdp) + elif op == OP_ENCODE: + op_encode(fdp) + elif op == OP_CHECKSUM: + op_checksum(fdp) + else: + op_roundtrip(fdp) + except Exception: + pass diff --git a/fuzz_codecs.py b/fuzz_codecs.py new file mode 100644 index 0000000..8ff6879 --- /dev/null +++ b/fuzz_codecs.py @@ -0,0 +1,92 @@ +from fuzz_dp import FuzzedDataProvider +import codecs +import io + +DECODERS = [ + "utf-7", "shift_jis", "euc-jp", "gb2312", "big5", "iso-2022-jp", + "euc-kr", "gb18030", "big5hkscs", "charmap", "ascii", "latin-1", + "cp1252", "unicode_escape", "raw_unicode_escape", "utf-16", "utf-32", +] + +ENCODERS = [ + "shift_jis", "euc-jp", "gb2312", "big5", "iso-2022-jp", "euc-kr", + "gb18030", "big5hkscs", "unicode_escape", "raw_unicode_escape", + "utf-7", "utf-8", "utf-16", "utf-16-le", "utf-16-be", "utf-32", + "latin-1", "ascii", "charmap", +] + +INC_DEC_CODECS = ["shift_jis", "gb18030", "utf-16"] +INC_ENC_CODECS = ["shift_jis", "utf-8"] + +OP_DECODE = 0 +OP_ENCODE = 1 +OP_INCREMENTAL_DECODE = 2 +OP_INCREMENTAL_ENCODE = 3 +OP_STREAM_READ = 4 + +def op_decode(fdp): + codec = fdp.PickValueInList(DECODERS) + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + codecs.decode(data, codec, 'replace') + +def op_encode(fdp): + codec = fdp.PickValueInList(ENCODERS) + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + s = fdp.ConsumeUnicode(n) + codecs.encode(s, codec, 'replace') + +def op_incremental_decode(fdp): + codec = fdp.PickValueInList(INC_DEC_CODECS) + chunk1_size = fdp.ConsumeIntInRange(0, 10000) + chunk1 = fdp.ConsumeBytes(chunk1_size) + chunk2 = fdp.ConsumeBytes(fdp.remaining_bytes()) + decoder = codecs.getincrementaldecoder(codec)('replace') + decoder.decode(chunk1) + decoder.decode(chunk2, True) + decoder.getstate() + decoder.reset() + +def op_incremental_encode(fdp): + codec = fdp.PickValueInList(INC_ENC_CODECS) + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + s = fdp.ConsumeUnicode(n) + split = fdp.ConsumeIntInRange(0, len(s)) + encoder = codecs.getincrementalencoder(codec)('replace') + encoder.encode(s[:split]) + encoder.reset() + encoder.encode(s[split:]) + encoder.getstate() + +def op_stream(fdp): + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + bio = io.BytesIO(data) + reader = codecs.getreader('utf-8')(bio, 'replace') + reader.read() + +# Fuzzes CPython's codec infrastructure (Modules/cjkcodecs/, Python/codecs.c). +# Exercises full and incremental encode/decode for CJK codecs (Shift-JIS, +# EUC-JP, GB2312, Big5, ISO-2022-JP, EUC-KR, GB18030, Big5-HKSCS) and +# Western/Unicode codecs (UTF-7/16/32, charmap, unicode_escape, latin-1). +# Also tests stream-based reading via codecs.getreader(). +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(OP_DECODE, OP_STREAM_READ) + try: + if op == OP_DECODE: + op_decode(fdp) + elif op == OP_ENCODE: + op_encode(fdp) + elif op == OP_INCREMENTAL_DECODE: + op_incremental_decode(fdp) + elif op == OP_INCREMENTAL_ENCODE: + op_incremental_encode(fdp) + else: + op_stream(fdp) + except Exception: + pass diff --git a/fuzz_collections.py b/fuzz_collections.py new file mode 100644 index 0000000..ede34c3 --- /dev/null +++ b/fuzz_collections.py @@ -0,0 +1,182 @@ +from fuzz_dp import FuzzedDataProvider +import collections + +# Top-level fuzzer dispatch operations +OP_FUZZER_COUNT_ELEMENTS = 0 +OP_FUZZER_DEQUE = 1 +OP_FUZZER_DEFAULTDICT = 2 +OP_FUZZER_ORDERED_DICT = 3 + +# Deque operations +OP_DEQUE_APPEND = 0 +OP_DEQUE_APPENDLEFT = 1 +OP_DEQUE_POP = 2 +OP_DEQUE_POPLEFT = 3 +OP_DEQUE_EXTEND = 4 +OP_DEQUE_EXTENDLEFT = 5 +OP_DEQUE_ROTATE = 6 +OP_DEQUE_REVERSE = 7 +OP_DEQUE_COUNT = 8 +OP_DEQUE_INDEX = 9 +OP_DEQUE_REMOVE = 10 +OP_DEQUE_CLEAR = 11 +OP_DEQUE_COPY = 12 +OP_DEQUE_COMPARE = 13 +OP_DEQUE_ITERATE = 14 + +# Defaultdict operations +OP_DDICT_INCREMENT = 0 +OP_DDICT_ACCESS = 1 +OP_DDICT_CONTAINS = 2 +OP_DDICT_POP = 3 + +# OrderedDict operations +OP_ODICT_SET = 0 +OP_ODICT_POP = 1 +OP_ODICT_MOVE_TO_END = 2 +OP_ODICT_LIST_KEYS = 3 +OP_ODICT_REVERSED = 4 +OP_ODICT_POPITEM = 5 + +# Exercises collections._count_elements(), an internal C helper that counts +# occurrences of each character in a string into a dict. Targets the +# _count_elements C function which has fast-path logic for exact-dict types +# vs dict subclasses. +def op_count_elements(fdp): + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + s = fdp.ConsumeBytes(n).decode('latin-1') + d = {} + collections._count_elements(d, s) + +# Exercises collections.deque with an optional maxlen constraint. Runs a +# sequence of fuzzed operations that exercise the deque's C implementation: +# append/pop from both ends, extend/extendleft with lists, rotate, reverse, +# search (count/index/remove with random-typed values for error path +# coverage), clear, copy, rich comparison against a second deque, and +# iteration via list()/len()/bool(). +def op_deque(fdp): + maxlen = fdp.ConsumeIntInRange(0, 100) if fdp.ConsumeBool() else None + init_n = fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), 50)) + init_data = fdp.ConsumeIntList(init_n, 1) + dq = collections.deque(init_data, maxlen=maxlen) + num_ops = fdp.ConsumeIntInRange(1, 30) + for _ in range(num_ops): + if fdp.remaining_bytes() == 0: + break + op = fdp.ConsumeIntInRange(OP_DEQUE_APPEND, OP_DEQUE_ITERATE) + if op == OP_DEQUE_APPEND: + dq.append(fdp.ConsumeRandomValue()) + elif op == OP_DEQUE_APPENDLEFT: + dq.appendleft(fdp.ConsumeRandomValue()) + elif op == OP_DEQUE_POP and len(dq) > 0: + dq.pop() + elif op == OP_DEQUE_POPLEFT and len(dq) > 0: + dq.popleft() + elif op == OP_DEQUE_EXTEND: + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 50)) + dq.extend(fdp.ConsumeIntList(n, 1)) + elif op == OP_DEQUE_EXTENDLEFT: + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 50)) + dq.extendleft(fdp.ConsumeIntList(n, 1)) + elif op == OP_DEQUE_ROTATE: + dq.rotate(fdp.ConsumeIntInRange(-10, 10)) + elif op == OP_DEQUE_REVERSE: + dq.reverse() + elif op == OP_DEQUE_COUNT: + dq.count(fdp.ConsumeRandomValue()) + elif op == OP_DEQUE_INDEX and len(dq) > 0: + try: + dq.index(fdp.ConsumeRandomValue()) + except ValueError: + pass + elif op == OP_DEQUE_REMOVE and len(dq) > 0: + try: + dq.remove(fdp.ConsumeRandomValue()) + except ValueError: + pass + elif op == OP_DEQUE_CLEAR: + dq.clear() + elif op == OP_DEQUE_COPY: + dq.copy() + elif op == OP_DEQUE_COMPARE: + dq2 = collections.deque(fdp.ConsumeIntList( + fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), 20)), 1)) + _ = dq == dq2 + _ = dq < dq2 + elif op == OP_DEQUE_ITERATE: + _ = list(dq) + _ = len(dq) + _ = bool(dq) + +# Exercises collections.defaultdict with int as the default factory. +# Runs fuzzed sequences of key increment (triggers __missing__ on new keys), +# key access, containment checks, and pop operations. Keys are fuzzed +# latin-1 strings so the same key may be accessed multiple times, exercising +# both the hit and miss paths in the underlying dict C implementation. +def op_defaultdict(fdp): + dd = collections.defaultdict(int) + num_ops = fdp.ConsumeIntInRange(1, 20) + for _ in range(num_ops): + if fdp.remaining_bytes() == 0: + break + op = fdp.ConsumeIntInRange(OP_DDICT_INCREMENT, OP_DDICT_POP) + key = fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 10)).decode('latin-1') + if op == OP_DDICT_INCREMENT: + dd[key] += fdp.ConsumeInt(1) + elif op == OP_DDICT_ACCESS: + _ = dd[key] + elif op == OP_DDICT_CONTAINS: + _ = key in dd + elif op == OP_DDICT_POP: + dd.pop(key, None) + +# Exercises collections.OrderedDict's C implementation (odictobject.c). +# Runs fuzzed sequences of set (with random-typed values), pop, +# move_to_end (with fuzzed last= direction), key listing, reversed +# iteration, and popitem (with fuzzed last= direction). The key reuse +# from short fuzzed strings exercises the internal linked-list +# reordering logic. +def op_ordered_dict(fdp): + od = collections.OrderedDict() + num_ops = fdp.ConsumeIntInRange(1, 20) + for _ in range(num_ops): + if fdp.remaining_bytes() == 0: + break + op = fdp.ConsumeIntInRange(OP_ODICT_SET, OP_ODICT_POPITEM) + key = fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 10)).decode('latin-1') + if op == OP_ODICT_SET: + od[key] = fdp.ConsumeRandomValue() + elif op == OP_ODICT_POP: + od.pop(key, None) + elif op == OP_ODICT_MOVE_TO_END: + od.move_to_end(key, last=fdp.ConsumeBool()) if key in od else None + elif op == OP_ODICT_LIST_KEYS: + _ = list(od.keys()) + elif op == OP_ODICT_REVERSED: + _ = list(reversed(od)) + elif op == OP_ODICT_POPITEM and len(od) > 0: + od.popitem(last=fdp.ConsumeBool()) + +# Fuzzes the _collections C module (Modules/_collectionsmodule.c). +# Exercises _count_elements() with fuzzed iterables, deque operations +# (append, pop, extend, rotate, reverse, count, index, remove, copy), +# defaultdict key access patterns, and OrderedDict manipulation +# (set, pop, move_to_end, popitem, reversed iteration). +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(OP_FUZZER_COUNT_ELEMENTS, OP_FUZZER_ORDERED_DICT) + try: + if op == OP_FUZZER_COUNT_ELEMENTS: + op_count_elements(fdp) + elif op == OP_FUZZER_DEQUE: + op_deque(fdp) + elif op == OP_FUZZER_DEFAULTDICT: + op_defaultdict(fdp) + else: + op_ordered_dict(fdp) + except Exception: + pass diff --git a/fuzz_compression.py b/fuzz_compression.py new file mode 100644 index 0000000..4dec33a --- /dev/null +++ b/fuzz_compression.py @@ -0,0 +1,121 @@ +from fuzz_dp import FuzzedDataProvider +import zlib +import bz2 +import lzma + +WBITS_CHOICES = [-15, 0, 15, 31, 47] +MAX_DECOMPRESS_LEN = 1024 * 1024 # 1 MiB cap to prevent OOM from small inputs + +OP_ZLIB_DECOMPRESS = 0 +OP_ZLIB_COMPRESS = 1 +OP_ZLIB_CHECKSUM = 2 +OP_BZ2_COMPRESS_DECOMPRESS = 3 +OP_LZMA_DECOMPRESS = 4 +OP_LZMA_COMPRESS = 5 +NUM_OPS = 6 + +def op_zlib_decompress(fdp): + wbits = fdp.PickValueInList(WBITS_CHOICES) + use_zdict = fdp.ConsumeBool() + do_flush = fdp.ConsumeBool() + do_copy = fdp.ConsumeBool() + zdict = b'' + if use_zdict: + zdict_size = fdp.ConsumeIntInRange(1, 32768) + zdict = fdp.ConsumeBytes(zdict_size) + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + kwargs = {} + if zdict: + kwargs['zdict'] = zdict + dobj = zlib.decompressobj(wbits, **kwargs) + dobj.decompress(data, MAX_DECOMPRESS_LEN) + if do_flush: + dobj.flush() + if do_copy: + copy_obj = dobj.copy() + copy_obj.decompress(data, MAX_DECOMPRESS_LEN) + +def op_zlib_compress(fdp): + level = fdp.ConsumeIntInRange(0, 9) + use_obj = fdp.ConsumeBool() + do_copy = fdp.ConsumeBool() + n = fdp.ConsumeIntInRange(1, 10000) + data = fdp.ConsumeBytes(n) + if not data: + return + if use_obj: + cobj = zlib.compressobj(level) + cobj.compress(data) + if do_copy: + copy_obj = cobj.copy() + copy_obj.flush() + cobj.flush() + else: + zlib.compress(data, level) + +def op_zlib_checksum(fdp): + use_crc = fdp.ConsumeBool() + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + if use_crc: + zlib.crc32(data) + else: + zlib.adler32(data) + +def op_bz2(fdp): + do_compress = fdp.ConsumeBool() + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + data = fdp.ConsumeBytes(n) + if do_compress: + bz2.compress(data) + else: + dobj = bz2.BZ2Decompressor() + dobj.decompress(data, MAX_DECOMPRESS_LEN) + +def op_lzma_decompress(fdp): + formats = [lzma.FORMAT_AUTO, lzma.FORMAT_XZ, lzma.FORMAT_ALONE, lzma.FORMAT_RAW] + fmt = fdp.PickValueInList(formats) + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + data = fdp.ConsumeBytes(n) + kwargs = {'format': fmt, 'memlimit': 16 * 1024 * 1024} + if fmt == lzma.FORMAT_RAW: + kwargs['filters'] = [{'id': lzma.FILTER_LZMA2}] + del kwargs['memlimit'] + dobj = lzma.LZMADecompressor(**kwargs) + dobj.decompress(data, MAX_DECOMPRESS_LEN) + +def op_lzma_compress(fdp): + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + data = fdp.ConsumeBytes(n) + lzma.compress(data) + +# Fuzzes zlib, bz2, and lzma C modules (Modules/zlibmodule.c, +# Modules/_bz2module.c, Modules/_lzmamodule.c). Exercises decompression +# with various wbits/format settings and optional zlib dictionaries, +# compression at different levels with compressobj/compress, CRC32/Adler32 +# checksums, and BZ2/LZMA decompressor objects with memory limits. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(0, NUM_OPS - 1) + try: + if op == OP_ZLIB_DECOMPRESS: + op_zlib_decompress(fdp) + elif op == OP_ZLIB_COMPRESS: + op_zlib_compress(fdp) + elif op == OP_ZLIB_CHECKSUM: + op_zlib_checksum(fdp) + elif op == OP_BZ2_COMPRESS_DECOMPRESS: + op_bz2(fdp) + elif op == OP_LZMA_DECOMPRESS: + op_lzma_decompress(fdp) + else: + op_lzma_compress(fdp) + except Exception: + pass diff --git a/fuzz_crypto.py b/fuzz_crypto.py new file mode 100644 index 0000000..cf0bc97 --- /dev/null +++ b/fuzz_crypto.py @@ -0,0 +1,207 @@ +from fuzz_dp import FuzzedDataProvider +import hashlib +import hmac +import io + +import _md5, _sha1, _sha2, _sha3, _blake2, _hmac + +HASH_CTORS = [ + _md5.md5, _sha1.sha1, + _sha2.sha224, _sha2.sha256, _sha2.sha384, _sha2.sha512, + _sha3.sha3_224, _sha3.sha3_256, _sha3.sha3_384, _sha3.sha3_512, + _blake2.blake2b, _blake2.blake2s, +] +SHAKE_CTORS = [_sha3.shake_128, _sha3.shake_256] + +HMAC_COMPUTE_FUNCS = [ + getattr(_hmac, name) + for name in ['compute_md5', 'compute_sha1', 'compute_sha256', 'compute_sha512'] + if hasattr(_hmac, name) +] + +HMAC_ALGOS = ['md5', 'sha224', 'sha256', 'sha384', 'sha512', 'sha3_256', 'blake2s'] +PBKDF2_ALGOS = ['sha1', 'sha256', 'sha512'] +HASHLIB_ALGOS = ['md5', 'sha256', 'sha3_256', 'sha512'] + +# --- chain_hash_actions action constants --- +HASH_ACTION_UPDATE = 0 +HASH_ACTION_DIGEST = 1 +HASH_ACTION_HEXDIGEST = 2 +HASH_ACTION_COPY_DIGEST = 3 +HASH_ACTION_READ_ATTRS = 4 + +# --- op_shake_chain action constants --- +SHAKE_ACTION_UPDATE = 0 +SHAKE_ACTION_DIGEST = 1 +SHAKE_ACTION_COPY_DIGEST = 2 + +# --- FuzzerRunOne operation constants --- +OP_HASH_CHAIN = 0 +OP_SHAKE_CHAIN = 1 +OP_BLAKE2B_KEYED = 2 +OP_BLAKE2S_KEYED = 3 +OP_BLAKE2B_VARDIGEST = 4 +OP_BLAKE2S_VARDIGEST = 5 +OP_HMAC_COMPUTE = 6 +OP_PYHMAC_CHAIN = 7 +OP_HMAC_DIGEST = 8 +OP_HMAC_COMPARE = 9 +OP_HASHLIB_CHAIN = 10 +OP_HASHLIB_FILE_DIGEST = 11 +OP_PBKDF2 = 12 + +def chain_hash_actions(h, fdp): + for _ in range(fdp.ConsumeIntInRange(1, 100)): + if fdp.remaining_bytes() == 0: + break + action = fdp.ConsumeIntInRange(HASH_ACTION_UPDATE, HASH_ACTION_READ_ATTRS) + if action == HASH_ACTION_UPDATE: + n = fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), 10000)) + h.update(fdp.ConsumeBytes(n)) + elif action == HASH_ACTION_DIGEST: + h.digest() + elif action == HASH_ACTION_HEXDIGEST: + h.hexdigest() + elif action == HASH_ACTION_COPY_DIGEST: + h.copy().digest() + elif action == HASH_ACTION_READ_ATTRS: + _ = h.name + _ = h.digest_size + _ = h.block_size + +def op_hash_chain(fdp): + ctor = fdp.PickValueInList(HASH_CTORS) + n = fdp.ConsumeIntInRange(0, 10000) + init_data = fdp.ConsumeBytes(n) + h = ctor(init_data) + chain_hash_actions(h, fdp) + +def op_shake_chain(fdp): + ctor = fdp.PickValueInList(SHAKE_CTORS) + n = fdp.ConsumeIntInRange(0, 10000) + init_data = fdp.ConsumeBytes(n) + h = ctor(init_data) + for _ in range(fdp.ConsumeIntInRange(1, 100)): + if fdp.remaining_bytes() == 0: + break + action = fdp.ConsumeIntInRange(SHAKE_ACTION_UPDATE, SHAKE_ACTION_COPY_DIGEST) + if action == SHAKE_ACTION_UPDATE: + n2 = fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), 10000)) + h.update(fdp.ConsumeBytes(n2)) + elif action == SHAKE_ACTION_DIGEST: + length = fdp.ConsumeIntInRange(1, 10000) + h.digest(length) + elif action == SHAKE_ACTION_COPY_DIGEST: + h2 = h.copy() + length = fdp.ConsumeIntInRange(1, 10000) + h2.digest(length) + +def op_blake2_keyed(fdp, ctor, max_key, max_salt, max_person): + key_len = fdp.ConsumeIntInRange(0, max_key) + key = fdp.ConsumeBytes(key_len) + salt_len = fdp.ConsumeIntInRange(0, max_salt) + salt = fdp.ConsumeBytes(salt_len) + person_len = fdp.ConsumeIntInRange(0, max_person) + person = fdp.ConsumeBytes(person_len) + n = fdp.ConsumeIntInRange(0, 10000) + data = fdp.ConsumeBytes(n) + h = ctor(data, key=key, salt=salt, person=person) + chain_hash_actions(h, fdp) + +def op_blake2_vardigest(fdp, ctor, max_ds): + ds = fdp.ConsumeIntInRange(1, max_ds) + n = fdp.ConsumeIntInRange(0, 10000) + data = fdp.ConsumeBytes(n) + h = ctor(data, digest_size=ds) + chain_hash_actions(h, fdp) + +def op_hmac_compute(fdp): + if not HMAC_COMPUTE_FUNCS: + return + func = fdp.PickValueInList(HMAC_COMPUTE_FUNCS) + key_len = fdp.ConsumeIntInRange(1, 10000) + key = fdp.ConsumeBytes(key_len) or b'\x00' + msg = fdp.ConsumeBytes(fdp.remaining_bytes()) + func(key, msg) + +def op_pyhmac_chain(fdp): + algo = fdp.PickValueInList(HMAC_ALGOS) + key_len = fdp.ConsumeIntInRange(1, 10000) + key = fdp.ConsumeBytes(key_len) or b'\x00' + h = hmac.new(key, digestmod=algo) + chain_hash_actions(h, fdp) + +def op_hmac_digest(fdp): + key_len = fdp.ConsumeIntInRange(1, 10000) + key = fdp.ConsumeBytes(key_len) or b'\x00' + msg = fdp.ConsumeBytes(fdp.remaining_bytes()) + hmac.digest(key, msg, 'sha256') + +def op_hmac_compare(fdp): + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + h = hmac.new(b'k', data, 'sha256') + dig = h.digest() + cmp_data = fdp.ConsumeBytes(len(dig)) + hmac.compare_digest(dig, cmp_data) + +def op_hashlib_chain(fdp): + algo = fdp.PickValueInList(HASHLIB_ALGOS) + n = fdp.ConsumeIntInRange(0, 10000) + init_data = fdp.ConsumeBytes(n) + h = hashlib.new(algo, init_data, usedforsecurity=False) + chain_hash_actions(h, fdp) + +def op_hashlib_file_digest(fdp): + algo = fdp.PickValueInList(HASHLIB_ALGOS) + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + bio = io.BytesIO(data) + h = hashlib.file_digest(bio, algo) + h.hexdigest() + +def op_pbkdf2(fdp): + algo = fdp.PickValueInList(PBKDF2_ALGOS) + salt_len = fdp.ConsumeIntInRange(1, 10000) + salt = fdp.ConsumeBytes(salt_len) or b'\x00' + pw = fdp.ConsumeBytes(fdp.remaining_bytes()) + hashlib.pbkdf2_hmac(algo, pw, salt, 1) + +# Fuzzes CPython's cryptographic C modules (Modules/_hashopenssl.c, +# Modules/blake2module.c, Modules/sha2module.c, Modules/sha3module.c, +# Modules/hmacmodule.c). Exercises hash chains with update/digest/copy +# for MD5, SHA-1/2/3, BLAKE2b/s (with key/salt/personalization), SHAKE +# variable-length digests, HMAC construction and comparison, file_digest, +# and PBKDF2 key derivation. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(OP_HASH_CHAIN, OP_PBKDF2) + try: + if op == OP_HASH_CHAIN: + op_hash_chain(fdp) + elif op == OP_SHAKE_CHAIN: + op_shake_chain(fdp) + elif op == OP_BLAKE2B_KEYED: + op_blake2_keyed(fdp, hashlib.blake2b, 64, 16, 16) + elif op == OP_BLAKE2S_KEYED: + op_blake2_keyed(fdp, hashlib.blake2s, 32, 8, 8) + elif op == OP_BLAKE2B_VARDIGEST: + op_blake2_vardigest(fdp, hashlib.blake2b, 64) + elif op == OP_BLAKE2S_VARDIGEST: + op_blake2_vardigest(fdp, hashlib.blake2s, 32) + elif op == OP_HMAC_COMPUTE: + op_hmac_compute(fdp) + elif op == OP_PYHMAC_CHAIN: + op_pyhmac_chain(fdp) + elif op == OP_HMAC_DIGEST: + op_hmac_digest(fdp) + elif op == OP_HMAC_COMPARE: + op_hmac_compare(fdp) + elif op == OP_HASHLIB_CHAIN: + op_hashlib_chain(fdp) + elif op == OP_HASHLIB_FILE_DIGEST: + op_hashlib_file_digest(fdp) + elif op == OP_PBKDF2: + op_pbkdf2(fdp) + except Exception: + pass diff --git a/fuzz_datetime.py b/fuzz_datetime.py new file mode 100644 index 0000000..f941bcb --- /dev/null +++ b/fuzz_datetime.py @@ -0,0 +1,79 @@ +from fuzz_dp import FuzzedDataProvider +from datetime import date, time, datetime, timedelta, timezone + +# Parse target constants (op_parse) +PARSE_DATE_FROMISOFORMAT = 0 +PARSE_TIME_FROMISOFORMAT = 1 +PARSE_DATETIME_FROMISOFORMAT = 2 +PARSE_DATETIME_STRPTIME = 3 + +# Format target constants (op_format) +FORMAT_DATE = 0 +FORMAT_TIME = 1 +FORMAT_DATETIME = 2 + +OP_PARSE = 0 +OP_FORMAT = 1 + +STRPTIME_FORMATS = [ + "%Y-%m-%d", "%Y-%m-%d %H:%M:%S", "%d/%m/%Y", "%m/%d/%Y", + "%Y%m%d", "%H:%M:%S", "%I:%M %p", "%Y-%m-%dT%H:%M:%S", + "%a %b %d %H:%M:%S %Y", "%c", +] + +def op_parse(fdp): + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, 100)) + if not s: + return + target = fdp.ConsumeIntInRange(PARSE_DATE_FROMISOFORMAT, PARSE_DATETIME_STRPTIME) + if target == PARSE_DATE_FROMISOFORMAT: + date.fromisoformat(s) + elif target == PARSE_TIME_FROMISOFORMAT: + time.fromisoformat(s) + elif target == PARSE_DATETIME_FROMISOFORMAT: + datetime.fromisoformat(s) + elif target == PARSE_DATETIME_STRPTIME: + fmt = fdp.PickValueInList(STRPTIME_FORMATS) + datetime.strptime(s, fmt) + +def op_format(fdp): + fmt = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, 100)) + if not fmt: + return + target = fdp.ConsumeIntInRange(FORMAT_DATE, FORMAT_DATETIME) + if target == FORMAT_DATE: + year = fdp.ConsumeIntInRange(1, 9999) + month = fdp.ConsumeIntInRange(1, 12) + day = fdp.ConsumeIntInRange(1, 28) + date(year, month, day).strftime(fmt) + elif target == FORMAT_TIME: + hour = fdp.ConsumeIntInRange(0, 23) + minute = fdp.ConsumeIntInRange(0, 59) + second = fdp.ConsumeIntInRange(0, 59) + time(hour, minute, second).strftime(fmt) + elif target == FORMAT_DATETIME: + year = fdp.ConsumeIntInRange(1, 9999) + month = fdp.ConsumeIntInRange(1, 12) + day = fdp.ConsumeIntInRange(1, 28) + hour = fdp.ConsumeIntInRange(0, 23) + minute = fdp.ConsumeIntInRange(0, 59) + second = fdp.ConsumeIntInRange(0, 59) + datetime(year, month, day, hour, minute, second).strftime(fmt) + +# Fuzzes the _datetime C module (Modules/_datetimemodule.c). +# Exercises ISO format parsing (date/time/datetime.fromisoformat), +# strptime with multiple predefined format strings, and strftime with +# fuzz-generated format strings. Only operations that pass fuzzed +# text into the C parser are included. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(OP_PARSE, OP_FORMAT) + try: + if op == OP_PARSE: + op_parse(fdp) + elif op == OP_FORMAT: + op_format(fdp) + except Exception: + pass diff --git a/fuzz_dbm.py b/fuzz_dbm.py new file mode 100644 index 0000000..d5a2a8f --- /dev/null +++ b/fuzz_dbm.py @@ -0,0 +1,51 @@ +from fuzz_dp import FuzzedDataProvider +import os +import dbm +import tempfile + +OP_STORE = 0 +OP_GET = 1 +OP_LIST_KEYS = 2 +OP_DELETE = 3 +OP_ITERATE = 4 + +# Fuzzes the _gdbm C module (Modules/_gdbmmodule.c). +# Exercises key-value store operations on a temporary GDBM database: +# store, get, key listing, deletion, and iteration with fuzzed +# keys and values. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + try: + with tempfile.TemporaryDirectory() as tmpdir: + dbpath = os.path.join(tmpdir, 'fuzzdb') + with dbm.open(dbpath, 'c') as db: + num_ops = fdp.ConsumeIntInRange(1, 20) + for _ in range(num_ops): + if fdp.remaining_bytes() == 0: + break + op = fdp.ConsumeIntInRange(OP_STORE, OP_ITERATE) + if op == OP_STORE: + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 100)) + key = fdp.ConsumeBytes(n) + n2 = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 1000)) if fdp.remaining_bytes() > 0 else 0 + val = fdp.ConsumeBytes(n2) if n2 > 0 else b'' + db[key] = val + elif op == OP_GET: + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 100)) + key = fdp.ConsumeBytes(n) + _ = db.get(key) + elif op == OP_LIST_KEYS: + _ = list(db.keys()) + elif op == OP_DELETE: + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 100)) + key = fdp.ConsumeBytes(n) + if key in db: + del db[key] + elif op == OP_ITERATE: + for k in db: + _ = db[k] + break + except Exception: + pass diff --git a/fuzz_dp.py b/fuzz_dp.py new file mode 100644 index 0000000..e609a32 --- /dev/null +++ b/fuzz_dp.py @@ -0,0 +1,244 @@ +"""Pure-Python FuzzedDataProvider matching the atheris API. + +This is a drop-in replacement for atheris.FuzzedDataProvider that requires +no native compilation. It matches atheris's consumption semantics: + - ConsumeBytes/ConsumeInt/ConsumeFloat/ConsumeUnicode consume from the FRONT + - ConsumeIntInRange/ConsumeBool/PickValueInList consume from the BACK + +Reference: https://github.com/google/atheris +""" + +import struct + + +class FuzzedDataProvider: + def __init__(self, data): + if not isinstance(data, (bytes, bytearray)): + raise TypeError("data must be bytes or bytearray") + self._data = bytes(data) + self._front = 0 + self._back = len(self._data) + + def remaining_bytes(self): + return max(0, self._back - self._front) + + def buffer(self): + return self._data[self._front:self._back] + + # -- Front-consuming methods (ConsumeBytes, ConsumeInt, etc.) -- + + def _consume_front(self, n): + n = min(n, self.remaining_bytes()) + result = self._data[self._front:self._front + n] + self._front += n + return result + + def ConsumeBytes(self, count): + count = max(0, int(count)) + return self._consume_front(count) + + def ConsumeInt(self, byte_count): + byte_count = max(0, int(byte_count)) + raw = self._consume_front(byte_count) + if not raw: + return 0 + val = int.from_bytes(raw, 'little') + bits = len(raw) * 8 + if val >= (1 << (bits - 1)): + val -= (1 << bits) + return val + + def ConsumeUInt(self, byte_count): + byte_count = max(0, int(byte_count)) + raw = self._consume_front(byte_count) + if not raw: + return 0 + return int.from_bytes(raw, 'little') + + def ConsumeFloat(self): + raw = self._consume_front(8) + if len(raw) < 8: + raw = raw + b'\x00' * (8 - len(raw)) + return struct.unpack(' hi: + lo, hi = hi, lo + if lo == hi: + return lo + rng = hi - lo + # Match LLVM: consume ceil(bits_needed/8) bytes from back + # LLVM loops while offset < sizeof(T)*8 && (range >> offset) > 0 + nbytes = (rng.bit_length() + 7) // 8 + raw = self._consume_back(nbytes) + if not raw: + return lo + # LLVM reads bytes from back as big-endian accumulation: + # result = (result << 8) | next_byte_from_back + # which equals int.from_bytes(reversed_bytes, 'big') + # But since _consume_back returns bytes in memory order and + # int.from_bytes(raw, 'little') produces the same value, we use that. + val = int.from_bytes(raw, 'little') + return lo + (val % (rng + 1)) + + # Alias for LLVM naming compatibility + ConsumeIntegralInRange = ConsumeIntInRange + + def ConsumeBool(self): + # Matches LLVM: 1 & ConsumeIntegral() + # ConsumeIntegral() = ConsumeIntegralInRange(0, 255) + return (self.ConsumeIntInRange(0, 255) & 1) == 1 + + def ConsumeProbability(self): + # Matches LLVM: ConsumeIntegral() / UINT64_MAX + # ConsumeIntegral() = ConsumeIntegralInRange(0, 2^64-1) + # When range == UINT64_MAX, no modulo is applied (special case) + raw = self._consume_back(8) + if not raw: + return 0.0 + val = int.from_bytes(raw, 'little') + return val / float((1 << 64) - 1) + + def ConsumeFloatInRange(self, lo, hi): + lo, hi = float(lo), float(hi) + if lo > hi: + lo, hi = hi, lo + p = self.ConsumeProbability() + return lo + (hi - lo) * p + + def PickValueInList(self, lst): + if not lst: + raise ValueError("list must not be empty") + idx = self.ConsumeIntInRange(0, len(lst) - 1) + return lst[idx] + + # -- List methods -- + + def ConsumeIntList(self, count, byte_count): + count = max(0, int(count)) + return [self.ConsumeInt(byte_count) for _ in range(count)] + + def ConsumeIntListInRange(self, count, lo, hi): + count = max(0, int(count)) + return [self.ConsumeIntInRange(lo, hi) for _ in range(count)] + + def ConsumeFloatList(self, count): + count = max(0, int(count)) + return [self.ConsumeFloat() for _ in range(count)] + + def ConsumeFloatListInRange(self, count, lo, hi): + count = max(0, int(count)) + return [self.ConsumeFloatInRange(lo, hi) for _ in range(count)] + + def ConsumeProbabilityList(self, count): + count = max(0, int(count)) + return [self.ConsumeProbability() for _ in range(count)] + + def ConsumeRegularFloatList(self, count): + count = max(0, int(count)) + return [self.ConsumeRegularFloat() for _ in range(count)] + + # -- Arbitrary value -- + + _ANY_TYPE_INT = 0 + _ANY_TYPE_FLOAT = 1 + _ANY_TYPE_BOOL = 2 + _ANY_TYPE_BYTES = 3 + _ANY_TYPE_STRING = 4 + _ANY_TYPE_NONE = 5 + + def ConsumeRandomValue(self): + """Return a value of a randomly chosen primitive type.""" + t = self.ConsumeIntInRange(self._ANY_TYPE_INT, self._ANY_TYPE_NONE) + if t == self._ANY_TYPE_INT: + return self.ConsumeInt(4) + elif t == self._ANY_TYPE_FLOAT: + return self.ConsumeFloat() + elif t == self._ANY_TYPE_BOOL: + return self.ConsumeBool() + elif t == self._ANY_TYPE_BYTES: + return self.ConsumeBytes(self.ConsumeIntInRange(0, 64)) + elif t == self._ANY_TYPE_STRING: + return self.ConsumeUnicode(self.ConsumeIntInRange(0, 64)) + else: + return None diff --git a/fuzz_expat.py b/fuzz_expat.py new file mode 100644 index 0000000..ae0280f --- /dev/null +++ b/fuzz_expat.py @@ -0,0 +1,37 @@ +from fuzz_dp import FuzzedDataProvider +from xml.parsers import expat +import io + +ENCODINGS = [None, 'utf-8', 'iso-8859-1'] + +# Fuzzes the expat XML parser (Modules/expat/xmlparse.c, Modules/pyexpat.c). +# Creates a parser with a fuzzed encoding selection (None, UTF-8, +# ISO-8859-1), installs handlers for elements, character data, PIs, +# comments, and CDATA sections, then parses fuzzed bytes via Parse() +# or ParseFile(). +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + use_parse_file = fdp.ConsumeBool() + encoding = fdp.PickValueInList(ENCODINGS) + try: + p = expat.ParserCreate(encoding) + p.StartElementHandler = lambda name, attrs: None + p.EndElementHandler = lambda name: None + p.CharacterDataHandler = lambda data: None + p.ProcessingInstructionHandler = lambda target, data: None + p.CommentHandler = lambda data: None + p.StartCdataSectionHandler = lambda: None + p.EndCdataSectionHandler = lambda: None + p.DefaultHandler = lambda data: None + + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + if use_parse_file: + p.ParseFile(io.BytesIO(data)) + else: + p.Parse(data, True) + except expat.ExpatError: + pass + except Exception: + pass diff --git a/fuzz_ioops.py b/fuzz_ioops.py new file mode 100644 index 0000000..36d7ca8 --- /dev/null +++ b/fuzz_ioops.py @@ -0,0 +1,189 @@ +from fuzz_dp import FuzzedDataProvider +import os +import io +import tempfile + +# Top-level operation constants for FuzzerRunOne dispatch +OP_BYTESIO = 0 +OP_TEXTIOWRAPPER = 1 +OP_BUFFERED_IO = 2 +OP_FILEIO = 3 +OP_IO_OPEN = 4 +OP_NEWLINE_DECODER = 5 +OP_STRINGIO = 6 + +# Buffered IO target constants for op_buffered_io +BUFFERED_READER = 0 +BUFFERED_WRITER = 1 +BUFFERED_RANDOM = 2 + +# Tests BytesIO (Modules/_io/bytesio.c): write, seeked read, readline, +# readinto a pre-allocated buffer, getbuffer for the memoryview path, +# truncate at a fuzzed position, and getvalue. +def op_bytesio(fdp): + trunc_pos = fdp.ConsumeIntInRange(0, fdp.remaining_bytes()) + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + bio = io.BytesIO() + bio.write(data) + bio.seek(0) + bio.read() + bio.seek(0) + bio.readline() + buf = bytearray(min(len(data), 100)) + bio.seek(0) + bio.readinto(buf) + bio.getbuffer() + bio.truncate(trunc_pos) + bio.getvalue() + +# Tests TextIOWrapper (Modules/_io/textio.c): wraps a BytesIO in a text +# decoder with a fuzzed encoding (utf-8, latin-1, ascii, utf-16) and +# newline mode (None, '', \n, \r, \r\n), then exercises read, readline, +# and detach. Targets the C-level text decoding and newline translation. +def op_textiowrapper(fdp): + encodings = ['utf-8', 'latin-1', 'ascii', 'utf-16'] + encoding = fdp.PickValueInList(encodings) + newlines = [None, '', '\n', '\r', '\r\n'] + newline = fdp.PickValueInList(newlines) + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + bio = io.BytesIO(data) + wrapper = io.TextIOWrapper(bio, encoding=encoding, errors='replace', newline=newline) + wrapper.read() + wrapper.seek(0) + wrapper.readline() + wrapper.detach() + +# Tests BufferedReader/Writer/Random (Modules/_io/bufferedio.c): picks +# one of the three buffered I/O types and exercises read, write, seek, +# and flush through the C buffering layer over a BytesIO raw stream. +def op_buffered_io(fdp): + target = fdp.ConsumeIntInRange(BUFFERED_READER, BUFFERED_RANDOM) + read_size = fdp.ConsumeIntInRange(0, 10000) + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + if target == BUFFERED_READER: + raw = io.BytesIO(data) + br = io.BufferedReader(raw) + br.read() + elif target == BUFFERED_WRITER: + raw = io.BytesIO() + bw = io.BufferedWriter(raw) + bw.write(data) + bw.flush() + else: + write_data = fdp.ConsumeBytes(fdp.ConsumeIntInRange(0, 10000)) + raw = io.BytesIO(data) + brw = io.BufferedRandom(raw) + brw.read(read_size) + brw.write(write_data) + brw.seek(0) + brw.read() + +# Tests FileIO (Modules/_io/fileio.c): writes fuzzed data to a temp file +# then reads it back, or reads pre-written data. Exercises the C-level +# file descriptor I/O paths (open, write, read, close). +def op_fileio(fdp): + do_write = fdp.ConsumeBool() + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + tmpname = None + try: + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmpname = tmp.name + if do_write: + f = io.FileIO(tmpname, 'w') + f.write(data) + f.close() + f = io.FileIO(tmpname, 'r') + f.read() + f.close() + else: + tmp.write(data) + tmp.flush() + f = io.FileIO(tmpname, 'r') + f.read() + f.close() + finally: + if tmpname: + try: + os.unlink(tmpname) + except Exception: + pass + +# Tests io.open() (Modules/_io/_iomodule.c): the high-level open function +# that selects the appropriate I/O class based on mode. Writes fuzzed data +# to a temp file then opens it in binary or text mode with error handling. +def op_io_open(fdp): + modes = ['rb', 'r', 'rb'] + mode = fdp.PickValueInList(modes) + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + tmpname = None + try: + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmpname = tmp.name + tmp.write(data) + tmp.flush() + with io.open(tmpname, mode, errors='replace' if 'b' not in mode else None) as f: + f.read() + finally: + if tmpname: + try: + os.unlink(tmpname) + except Exception: + pass + +# Tests IncrementalNewlineDecoder (Modules/_io/textio.c): the C-level +# newline translator that handles \r, \n, \r\n conversion. Exercises +# decode with fuzzed text, then getstate/reset for the state machine. +def op_newline_decoder(fdp): + translate = fdp.ConsumeBool() + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + s = fdp.ConsumeBytes(n).decode('latin-1') + decoder = io.IncrementalNewlineDecoder(None, translate) + decoder.decode(s) + decoder.getstate() + decoder.reset() + +# Tests StringIO (Modules/_io/stringio.c): in-memory text stream. +# Exercises read, readline, seeked write, and getvalue with fuzzed +# Unicode text content. +def op_stringio(fdp): + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + s = fdp.ConsumeBytes(n).decode('latin-1') + sio = io.StringIO(s) + sio.read() + sio.seek(0) + sio.readline() + sio.seek(0) + sio.write(s) + sio.getvalue() + +# Fuzzes CPython's I/O C modules (Modules/_io/). Exercises BytesIO +# (write, seek, read, truncate), TextIOWrapper (read, readline, detach +# with varied encodings and newline modes), BufferedReader/Writer/Random, +# FileIO (read and write modes), io.open(), IncrementalNewlineDecoder +# (decode, getstate, reset), and StringIO operations. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(OP_BYTESIO, OP_STRINGIO) + try: + if op == OP_BYTESIO: + op_bytesio(fdp) + elif op == OP_TEXTIOWRAPPER: + op_textiowrapper(fdp) + elif op == OP_BUFFERED_IO: + op_buffered_io(fdp) + elif op == OP_FILEIO: + op_fileio(fdp) + elif op == OP_IO_OPEN: + op_io_open(fdp) + elif op == OP_NEWLINE_DECODER: + op_newline_decoder(fdp) + else: + op_stringio(fdp) + except Exception: + pass diff --git a/fuzz_json_decode.py b/fuzz_json_decode.py new file mode 100644 index 0000000..b396f89 --- /dev/null +++ b/fuzz_json_decode.py @@ -0,0 +1,32 @@ +from fuzz_dp import FuzzedDataProvider +import json + +LOADS = 0 +DECODER_DECODE = 1 +DECODER_RAW_DECODE = 2 + +# Fuzzes the _json C module's decoding paths (Modules/_json.c). +# Exercises json.loads(), JSONDecoder.decode(), and +# JSONDecoder.raw_decode() with fuzzed byte input decoded as latin-1. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + target = fdp.ConsumeIntInRange(LOADS, DECODER_RAW_DECODE) + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + s = fdp.ConsumeBytes(n).decode('latin-1') + try: + if target == LOADS: + json.loads(s) + elif target == DECODER_DECODE: + dec = json.JSONDecoder() + dec.decode(s) + elif target == DECODER_RAW_DECODE: + dec = json.JSONDecoder() + dec.raw_decode(s) + except (json.JSONDecodeError, ValueError, RecursionError): + pass + except Exception: + pass diff --git a/fuzz_json_encode.py b/fuzz_json_encode.py new file mode 100644 index 0000000..7db8974 --- /dev/null +++ b/fuzz_json_encode.py @@ -0,0 +1,80 @@ +from fuzz_dp import FuzzedDataProvider +import json + +# Container type constants for build_container +CONTAINER_INT_LIST = 0 +CONTAINER_STRING = 1 +CONTAINER_DICT = 2 +CONTAINER_TUPLE = 3 +CONTAINER_FLOAT = 4 +CONTAINER_INT = 5 + +# Encode operation constants for FuzzerRunOne +ENCODE_DEFAULT = 0 +ENCODE_ASCII = 1 +ENCODE_NON_ASCII = 2 +ENCODE_SORTED = 3 +ENCODE_INDENTED = 4 +ENCODE_CUSTOM = 5 + +def build_container(fdp): + ctype = fdp.ConsumeIntInRange(CONTAINER_INT_LIST, CONTAINER_INT) + if ctype == CONTAINER_INT_LIST: + n = fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), 200)) + return fdp.ConsumeIntList(n, 1) + elif ctype == CONTAINER_STRING: + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 1000)) if fdp.remaining_bytes() > 0 else 0 + return fdp.ConsumeBytes(n).decode('latin-1') if n > 0 else "" + elif ctype == CONTAINER_DICT: + n = fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), 50)) + d = {} + for _ in range(n): + if fdp.remaining_bytes() == 0: + break + kn = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 20)) + key = fdp.ConsumeBytes(kn).decode('latin-1') + val = fdp.ConsumeRandomValue() + d[key] = val + return d + elif ctype == CONTAINER_TUPLE: + n = fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), 200)) + return tuple(fdp.ConsumeIntList(n, 1)) + elif ctype == CONTAINER_FLOAT: + return fdp.ConsumeFloat() + else: + return fdp.ConsumeInt(4) + +# Fuzzes the _json C module's encoding paths (Modules/_json.c). +# Builds Python containers (int lists, string dicts, tuples, floats) +# from fuzzed data and encodes them with json.dumps() using varied +# options (ensure_ascii, sort_keys, indent) and custom JSONEncoder +# settings (separators, allow_nan, default handler). +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + target = fdp.ConsumeIntInRange(ENCODE_DEFAULT, ENCODE_CUSTOM) + try: + obj = build_container(fdp) + if target == ENCODE_DEFAULT: + json.dumps(obj) + elif target == ENCODE_ASCII: + json.dumps(obj, ensure_ascii=True) + elif target == ENCODE_NON_ASCII: + json.dumps(obj, ensure_ascii=False) + elif target == ENCODE_SORTED: + json.dumps(obj, sort_keys=True) + elif target == ENCODE_INDENTED: + indent = fdp.ConsumeIntInRange(0, 8) + json.dumps(obj, indent=indent) + else: + enc = json.JSONEncoder( + ensure_ascii=fdp.ConsumeBool(), + sort_keys=fdp.ConsumeBool(), + indent=fdp.ConsumeIntInRange(0, 4) if fdp.ConsumeBool() else None, + ) + enc.encode(obj) + except (ValueError, TypeError, RecursionError, OverflowError): + pass + except Exception: + pass diff --git a/fuzz_locale.py b/fuzz_locale.py new file mode 100644 index 0000000..5ab9569 --- /dev/null +++ b/fuzz_locale.py @@ -0,0 +1,28 @@ +from fuzz_dp import FuzzedDataProvider +import locale + +OP_STRXFRM = 0 +OP_STRCOLL = 1 + +# Fuzzes the _locale C module (Modules/_localemodule.c). +# Exercises locale.strxfrm() for locale-aware string transformation +# and locale.strcoll() for locale-aware string comparison, both with +# fuzz-generated Unicode input. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + target = fdp.ConsumeIntInRange(OP_STRXFRM, OP_STRCOLL) + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + s = fdp.ConsumeUnicode(n) + try: + if target == OP_STRXFRM: + locale.strxfrm(s) + elif target == OP_STRCOLL: + n2 = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 10000)) if fdp.remaining_bytes() > 0 else 0 + s2 = fdp.ConsumeUnicode(n2) if n2 > 0 else "" + locale.strcoll(s, s2) + except Exception: + pass diff --git a/fuzz_mmap.py b/fuzz_mmap.py new file mode 100644 index 0000000..8da5f63 --- /dev/null +++ b/fuzz_mmap.py @@ -0,0 +1,89 @@ +from fuzz_dp import FuzzedDataProvider +import os +import mmap +import tempfile + +OP_FIND = 0 +OP_RFIND = 1 +OP_READ = 2 +OP_READLINE = 3 +OP_SEEK = 4 +OP_GETITEM = 5 +OP_WRITE = 6 +OP_SETITEM = 7 +OP_MOVE = 8 +OP_FLUSH = 9 + +_OP_MAX = OP_FLUSH + +# Fuzzes the mmap C module (Modules/mmapmodule.c). Creates a temporary +# file-backed mmap and exercises find, rfind, seek, read, readline, +# getitem, write, setitem, move, and flush operations with fuzzed +# offsets, sizes, and byte content. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + init_size = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 4096)) if fdp.remaining_bytes() > 0 else 0 + if init_size == 0: + return + init_data = fdp.ConsumeBytes(init_size) + tmpname = None + try: + with tempfile.NamedTemporaryFile(delete=False) as tmp: + tmpname = tmp.name + tmp.write(init_data) + tmp.flush() + + with open(tmpname, 'r+b') as f: + mm = mmap.mmap(f.fileno(), 0) + num_ops = fdp.ConsumeIntInRange(1, 10) + for _ in range(num_ops): + if fdp.remaining_bytes() == 0: + break + op = fdp.ConsumeIntInRange(0, _OP_MAX) + if op == OP_FIND: + needle = fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 20))) + mm.find(needle) + elif op == OP_RFIND: + needle = fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 20))) + mm.rfind(needle) + elif op == OP_READ: + mm.seek(0) + mm.read(fdp.ConsumeIntInRange(0, len(mm))) + elif op == OP_READLINE: + mm.seek(0) + mm.readline() + elif op == OP_SEEK: + pos = fdp.ConsumeIntInRange(0, max(0, len(mm) - 1)) + mm.seek(pos) + elif op == OP_GETITEM: + if len(mm) > 0: + idx = fdp.ConsumeIntInRange(0, len(mm) - 1) + _ = mm[idx] + elif op == OP_WRITE: + data = fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 50))) + pos = fdp.ConsumeIntInRange(0, max(0, len(mm) - len(data))) + mm.seek(pos) + mm.write(data) + elif op == OP_SETITEM: + if len(mm) > 0: + idx = fdp.ConsumeIntInRange(0, len(mm) - 1) + mm[idx] = fdp.ConsumeInt(1) + elif op == OP_MOVE: + if len(mm) > 1: + count = fdp.ConsumeIntInRange(1, len(mm) // 2) + src = fdp.ConsumeIntInRange(0, len(mm) - count) + dest = fdp.ConsumeIntInRange(0, len(mm) - count) + mm.move(dest, src, count) + elif op == OP_FLUSH: + mm.flush() + mm.close() + except Exception: + pass + finally: + if tmpname: + try: + os.unlink(tmpname) + except Exception: + pass diff --git a/fuzz_operator.py b/fuzz_operator.py new file mode 100644 index 0000000..4b6a4ee --- /dev/null +++ b/fuzz_operator.py @@ -0,0 +1,137 @@ +from fuzz_dp import FuzzedDataProvider +import operator + +MAX_LIST_SIZE = 50 # cap on generated list/sequence sizes to avoid OOM + +# Top-level fuzzer operation targets +OP_COMPARISONS = 0 +OP_ARITHMETIC = 1 +OP_UNARY = 2 +OP_SEQUENCE = 3 +OP_ITEMGETTER = 4 +OP_ATTRGETTER = 5 +OP_METHODCALLER = 6 + +# Sequence operation targets +SEQ_CONTAINS = 0 +SEQ_COUNT_OF = 1 +SEQ_INDEX_OF = 2 +SEQ_GETITEM = 3 +SEQ_CONCAT = 4 +SEQ_SETITEM = 5 +SEQ_DELITEM = 6 +SEQ_LENGTH_HINT = 7 + +def op_comparisons(fdp): + a = fdp.ConsumeRandomValue() + b = fdp.ConsumeRandomValue() + ops = [operator.lt, operator.le, operator.gt, operator.ge, + operator.eq, operator.ne] + op = fdp.PickValueInList(ops) + op(a, b) + +def op_arithmetic(fdp): + a = fdp.ConsumeInt(4) + b = fdp.ConsumeInt(4) + ops = [operator.add, operator.sub, operator.mul, operator.mod, + operator.floordiv, operator.truediv, operator.pow, + operator.lshift, operator.rshift, + operator.and_, operator.or_, operator.xor] + op = fdp.PickValueInList(ops) + if op == operator.pow and isinstance(b, (int, float)): + b = b % 20 if isinstance(b, int) else b + if op in (operator.lshift, operator.rshift) and isinstance(b, int): + b = abs(b) % 64 + op(a, b) + +def op_unary(fdp): + a = fdp.ConsumeRandomValue() + ops = [operator.neg, operator.pos, operator.abs, operator.invert, + operator.index] + op = fdp.PickValueInList(ops) + op(a) + +def op_sequence(fdp): + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), 100)) + lst = fdp.ConsumeIntList(n, 1) + target = fdp.ConsumeIntInRange(SEQ_CONTAINS, SEQ_LENGTH_HINT) + if target == SEQ_CONTAINS: + operator.contains(lst, fdp.ConsumeInt(1)) + elif target == SEQ_COUNT_OF: + operator.countOf(lst, fdp.ConsumeInt(1)) + elif target == SEQ_INDEX_OF: + try: + operator.indexOf(lst, fdp.ConsumeInt(1)) + except ValueError: + pass + elif target == SEQ_GETITEM: + idx = fdp.ConsumeIntInRange(0, max(len(lst) - 1, 0)) + operator.getitem(lst, idx) + elif target == SEQ_CONCAT: + operator.concat(lst, fdp.ConsumeIntList(fdp.ConsumeIntInRange(0, MAX_LIST_SIZE), 1)) + elif target == SEQ_SETITEM: + idx = fdp.ConsumeIntInRange(0, max(len(lst) - 1, 0)) + operator.setitem(lst, idx, fdp.ConsumeInt(1)) + elif target == SEQ_DELITEM: + idx = fdp.ConsumeIntInRange(0, max(len(lst) - 1, 0)) + operator.delitem(lst, idx) + elif target == SEQ_LENGTH_HINT: + operator.length_hint(lst) + +def op_itemgetter(fdp): + n = fdp.ConsumeIntInRange(1, MAX_LIST_SIZE) + lst = fdp.ConsumeIntList(n, 1) + if not lst: + return + num_keys = fdp.ConsumeIntInRange(1, len(lst)) + keys = [fdp.ConsumeIntInRange(0, len(lst) - 1) for _ in range(num_keys)] + getter = operator.itemgetter(*keys) if len(keys) > 1 else operator.itemgetter(keys[0]) + getter(lst) + +def op_attrgetter(fdp): + class Obj: + pass + obj = Obj() + attrs = ['x', 'y', 'z', 'w'] + for a in attrs: + setattr(obj, a, fdp.ConsumeInt(1)) + num_attrs = fdp.ConsumeIntInRange(1, len(attrs)) + chosen = [fdp.PickValueInList(attrs) for _ in range(num_attrs)] + getter = operator.attrgetter(*chosen) if len(chosen) > 1 else operator.attrgetter(chosen[0]) + getter(obj) + +def op_methodcaller(fdp): + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, 100)) + methods = ['upper', 'lower', 'strip', 'title', 'swapcase'] + method = fdp.PickValueInList(methods) + caller = operator.methodcaller(method) + caller(s) + +# Fuzzes the _operator C module (Modules/_operator.c). Exercises +# comparison operators (lt/le/gt/ge/eq/ne), arithmetic operators +# (add/sub/mul/mod/div/pow/shifts/bitwise), unary operators +# (neg/pos/abs/invert/index), sequence operations (contains/countOf/ +# indexOf/getitem/concat/setitem/delitem/length_hint), and the +# itemgetter, attrgetter, and methodcaller helpers. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(OP_COMPARISONS, OP_METHODCALLER) + try: + if op == OP_COMPARISONS: + op_comparisons(fdp) + elif op == OP_ARITHMETIC: + op_arithmetic(fdp) + elif op == OP_UNARY: + op_unary(fdp) + elif op == OP_SEQUENCE: + op_sequence(fdp) + elif op == OP_ITEMGETTER: + op_itemgetter(fdp) + elif op == OP_ATTRGETTER: + op_attrgetter(fdp) + elif op == OP_METHODCALLER: + op_methodcaller(fdp) + except Exception: + pass diff --git a/fuzz_pickle.py b/fuzz_pickle.py new file mode 100644 index 0000000..377ff5c --- /dev/null +++ b/fuzz_pickle.py @@ -0,0 +1,126 @@ +from fuzz_dp import FuzzedDataProvider +import pickle +import io + +MAX_CONTAINER_SIZE = 200 # cap on generated container/string sizes to avoid OOM + +# Top-level operation constants for FuzzerRunOne dispatch +OP_DUMPS = 0 +OP_LOADS = 1 +OP_PICKLER = 2 +OP_ROUNDTRIP = 3 + +# Container type constants for build_container +CTYPE_BYTES = 0 +CTYPE_STRING = 1 +CTYPE_INT_LIST = 2 +CTYPE_TUPLE = 3 +CTYPE_SET = 4 +CTYPE_FROZENSET = 5 +CTYPE_BYTEARRAY = 6 +CTYPE_DICT = 7 + +# Unpickler variant constants for op_loads +VARIANT_RESTRICTED = 0 +VARIANT_PERSISTENT = 1 +VARIANT_RESTRICTED_FIX_IMPORTS = 2 + +class RestrictedUnpickler(pickle.Unpickler): + def find_class(self, module, name): + raise pickle.UnpicklingError('restricted') + +class PersistentUnpickler(pickle.Unpickler): + def persistent_load(self, pid): + return pid + def find_class(self, module, name): + raise pickle.UnpicklingError('restricted') + +def build_container(fdp, ctype): + n = fdp.ConsumeIntInRange(0, min(fdp.remaining_bytes(), MAX_CONTAINER_SIZE)) + if ctype == CTYPE_BYTES: + return fdp.ConsumeBytes(n) + elif ctype == CTYPE_STRING: + return fdp.ConsumeUnicode(n) + elif ctype == CTYPE_INT_LIST: + return fdp.ConsumeIntList(n, 1) + elif ctype == CTYPE_TUPLE: + return tuple(fdp.ConsumeIntList(n, 1)) + elif ctype == CTYPE_SET: + return set(fdp.ConsumeIntList(n, 1)) + elif ctype == CTYPE_FROZENSET: + return frozenset(fdp.ConsumeIntList(n, 1)) + elif ctype == CTYPE_BYTEARRAY: + return bytearray(fdp.ConsumeBytes(n)) + elif ctype == CTYPE_DICT: + d = {} + entries = fdp.ConsumeIntInRange(0, min(n, 64)) + for _ in range(entries): + if fdp.remaining_bytes() == 0: + break + kn = fdp.ConsumeIntInRange(1, 20) + key = fdp.ConsumeUnicode(kn) + val = fdp.ConsumeRandomValue() + d[key] = val + return d + return fdp.ConsumeBytes(n) + +def op_dumps(fdp): + ctype = fdp.ConsumeIntInRange(CTYPE_BYTES, CTYPE_DICT) + protocol = fdp.ConsumeIntInRange(0, 5) + fix_imports = fdp.ConsumeBool() + obj = build_container(fdp, ctype) + pickle.dumps(obj, protocol=protocol, fix_imports=fix_imports) + +def op_loads(fdp): + variant = fdp.ConsumeIntInRange(VARIANT_RESTRICTED, VARIANT_RESTRICTED_FIX_IMPORTS) + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + bio = io.BytesIO(data) + if variant == VARIANT_RESTRICTED: + unpickler = RestrictedUnpickler(bio) + elif variant == VARIANT_PERSISTENT: + unpickler = PersistentUnpickler(bio) + else: + unpickler = RestrictedUnpickler(bio, fix_imports=True, encoding='bytes') + unpickler.load() + +def op_pickler(fdp): + protocol = fdp.ConsumeIntInRange(0, 5) + n = fdp.ConsumeIntInRange(1, min(fdp.remaining_bytes(), MAX_CONTAINER_SIZE)) if fdp.remaining_bytes() > 0 else 0 + if n == 0: + return + obj1 = fdp.ConsumeIntList(n, 1) + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(0, MAX_CONTAINER_SIZE)) + bio = io.BytesIO() + p = pickle.Pickler(bio, protocol) + p.dump(obj1) + p.clear_memo() + p.dump(s) + bio.getvalue() + +def op_roundtrip(fdp): + ctype = fdp.ConsumeIntInRange(CTYPE_BYTES, CTYPE_DICT) + obj = build_container(fdp, ctype) + dumped = pickle.dumps(obj) + pickle.loads(dumped) + +# Fuzzes the _pickle C module (Modules/_pickle.c). Exercises pickle.dumps() +# with protocols 0-5 on various container types (bytes, strings, int lists, +# tuples, sets, frozensets, bytearrays, dicts), pickle.loads() with +# restricted and persistent-load unpickler variants, Pickler.dump() with +# memo clearing, and dumps/loads roundtrips. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + op = fdp.ConsumeIntInRange(OP_DUMPS, OP_ROUNDTRIP) + try: + if op == OP_DUMPS: + op_dumps(fdp) + elif op == OP_LOADS: + op_loads(fdp) + elif op == OP_PICKLER: + op_pickler(fdp) + else: + op_roundtrip(fdp) + except Exception: + pass diff --git a/fuzz_ssl.py b/fuzz_ssl.py new file mode 100644 index 0000000..420ffd6 --- /dev/null +++ b/fuzz_ssl.py @@ -0,0 +1,33 @@ +from fuzz_dp import FuzzedDataProvider +import os +import ssl +import tempfile + +DER_TO_PEM_CERT = 0 +LOAD_VERIFY_LOCATIONS = 1 + +# Fuzzes the _ssl C module (Modules/_ssl.c). Exercises DER-to-PEM +# certificate conversion via ssl.DER_cert_to_PEM_cert(), and +# SSLContext certificate loading via load_verify_locations() with +# fuzzed PEM data written to a temporary file. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x100000: + return + fdp = FuzzedDataProvider(FuzzerInput) + target = fdp.ConsumeIntInRange(DER_TO_PEM_CERT, LOAD_VERIFY_LOCATIONS) + data = fdp.ConsumeBytes(fdp.remaining_bytes()) + try: + if target == DER_TO_PEM_CERT: + ssl.DER_cert_to_PEM_cert(data) + elif target == LOAD_VERIFY_LOCATIONS: + ctx = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) + with tempfile.NamedTemporaryFile(suffix='.pem', delete=False) as tmp: + tmpname = tmp.name + tmp.write(data) + tmp.flush() + try: + ctx.load_verify_locations(tmpname) + finally: + os.unlink(tmpname) + except Exception: + pass diff --git a/fuzz_time.py b/fuzz_time.py new file mode 100644 index 0000000..2fa8748 --- /dev/null +++ b/fuzz_time.py @@ -0,0 +1,41 @@ +from fuzz_dp import FuzzedDataProvider +import time + +FORMATS = [ + "%Y-%m-%d", "%Y-%m-%d %H:%M:%S", "%d/%m/%Y", "%m/%d/%Y", + "%H:%M:%S", "%I:%M %p", "%c", "%x", "%X", + "%A %B %d, %Y", "%j", "%U", "%W", +] + +MAX_STRING_SIZE = 10000 # cap on generated format/input string sizes + +STRFTIME_FUZZED_FORMAT = 0 +STRPTIME_KNOWN_FORMAT = 1 +STRPTIME_FUZZED_FORMAT = 2 + +# Fuzzes the time C module (Modules/timemodule.c). Exercises +# time.strftime() with fuzz-generated format strings, and +# time.strptime() with both predefined and fuzz-generated format +# strings against fuzzed date/time input strings. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + target = fdp.ConsumeIntInRange(STRFTIME_FUZZED_FORMAT, STRPTIME_FUZZED_FORMAT) + try: + if target == STRFTIME_FUZZED_FORMAT: + fmt = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_STRING_SIZE)) + if fmt: + time.strftime(fmt) + elif target == STRPTIME_KNOWN_FORMAT: + fmt = fdp.PickValueInList(FORMATS) + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_STRING_SIZE)) + if s: + time.strptime(s, fmt) + elif target == STRPTIME_FUZZED_FORMAT: + fmt = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_STRING_SIZE)) + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_STRING_SIZE)) + if fmt and s: + time.strptime(s, fmt) + except Exception: + pass diff --git a/fuzz_unicodedata.py b/fuzz_unicodedata.py new file mode 100644 index 0000000..0346658 --- /dev/null +++ b/fuzz_unicodedata.py @@ -0,0 +1,101 @@ +from fuzz_dp import FuzzedDataProvider +import unicodedata + +NORMALIZE_FORMS = ['NFC', 'NFD', 'NFKC', 'NFKD'] + +MAX_CHARS = 5000 # cap on per-character iteration loops +MAX_STRING_SIZE = 10000 # cap on strings passed to normalize/lookup + +OP_CATEGORY = 0 +OP_BIDIRECTIONAL = 1 +OP_NORMALIZE = 2 +OP_NUMERIC = 3 +OP_DECIMAL = 4 +OP_COMBINING = 5 +OP_EAST_ASIAN_WIDTH = 6 +OP_MIRRORED = 7 +OP_NAME = 8 +OP_DECOMPOSITION = 9 +OP_LOOKUP = 10 +OP_DIGIT = 11 +OP_IS_NORMALIZED = 12 + +# Fuzzes the unicodedata C module (Modules/unicodedata.c). Exercises +# character property lookups (category, bidirectional, combining, +# east_asian_width, mirrored), normalization (NFC/NFD/NFKC/NFKD and +# is_normalized), numeric/decimal/digit value extraction, character +# name/decomposition queries, and unicodedata.lookup() by name. +def FuzzerRunOne(FuzzerInput): + if len(FuzzerInput) < 1 or len(FuzzerInput) > 0x10000: + return + fdp = FuzzedDataProvider(FuzzerInput) + target = fdp.ConsumeIntInRange(OP_CATEGORY, OP_IS_NORMALIZED) + try: + if target == OP_CATEGORY: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + unicodedata.category(ch) + elif target == OP_BIDIRECTIONAL: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + unicodedata.bidirectional(ch) + elif target == OP_NORMALIZE: + form = fdp.PickValueInList(NORMALIZE_FORMS) + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_STRING_SIZE)) + unicodedata.normalize(form, s) + elif target == OP_NUMERIC: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + try: + unicodedata.numeric(ch) + except ValueError: + pass + elif target == OP_DECIMAL: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + try: + unicodedata.decimal(ch) + except ValueError: + pass + elif target == OP_COMBINING: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + unicodedata.combining(ch) + elif target == OP_EAST_ASIAN_WIDTH: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + unicodedata.east_asian_width(ch) + elif target == OP_MIRRORED: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + unicodedata.mirrored(ch) + elif target == OP_NAME: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + try: + unicodedata.name(ch) + except ValueError: + pass + elif target == OP_DECOMPOSITION: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + unicodedata.decomposition(ch) + elif target == OP_LOOKUP: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_STRING_SIZE)) + try: + unicodedata.lookup(s) + except KeyError: + pass + elif target == OP_DIGIT: + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_CHARS)) + for ch in s: + try: + unicodedata.digit(ch) + except ValueError: + pass + elif target == OP_IS_NORMALIZED: + form = fdp.PickValueInList(NORMALIZE_FORMS) + s = fdp.ConsumeUnicode(fdp.ConsumeIntInRange(1, MAX_STRING_SIZE)) + unicodedata.is_normalized(form, s) + except Exception: + pass