From 3dbffe5e77fd8c356220c798880b7252ac7288fd Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Thu, 26 Mar 2026 20:20:29 +0000 Subject: [PATCH 1/2] Rewrite `decode.py` fuzzer --- decode.py | 20 ++- fuzzer-decode.dict | 324 --------------------------------------------- 2 files changed, 15 insertions(+), 329 deletions(-) delete mode 100644 fuzzer-decode.dict diff --git a/decode.py b/decode.py index f057b6f..3a646a4 100644 --- a/decode.py +++ b/decode.py @@ -1,10 +1,20 @@ +from encodings.aliases import aliases + +ALL_CODECS = sorted(set(aliases.values())) + def FuzzerRunOne(FuzzerInput): - l = int(len(FuzzerInput)/2) - A = FuzzerInput[:l] - B = FuzzerInput[l:].decode("utf-8", "replace").strip() + if len(FuzzerInput) < 2: + return + if FuzzerInput[0] & 1: + codec = ALL_CODECS[FuzzerInput[1] % len(ALL_CODECS)] + data = FuzzerInput[2:] + else: + l = len(FuzzerInput) // 2 + codec = FuzzerInput[l:].decode("utf-8", "replace").strip() + data = FuzzerInput[:l] try: - A.decode(B) + data.decode(codec) except SystemError: raise - except: + except Exception: pass diff --git a/fuzzer-decode.dict b/fuzzer-decode.dict deleted file mode 100644 index 46ad0f3..0000000 --- a/fuzzer-decode.dict +++ /dev/null @@ -1,324 +0,0 @@ -"646" -"ansi_x3.4_1968" -"ansi_x3_4_1968" -"ansi_x3.4_1986" -"cp367" -"csascii" -"ibm367" -"iso646_us" -"iso_646.irv_1991" -"iso_ir_6" -"us" -"us_ascii" -"base64" -"base_64" -"big5_tw" -"csbig5" -"big5_hkscs" -"hkscs" -"bz2" -"037" -"csibm037" -"ebcdic_cp_ca" -"ebcdic_cp_nl" -"ebcdic_cp_us" -"ebcdic_cp_wt" -"ibm037" -"ibm039" -"1026" -"csibm1026" -"ibm1026" -"1125" -"ibm1125" -"cp866u" -"ruscii" -"1140" -"ibm1140" -"1250" -"windows_1250" -"1251" -"windows_1251" -"1252" -"windows_1252" -"1253" -"windows_1253" -"1254" -"windows_1254" -"1255" -"windows_1255" -"1256" -"windows_1256" -"1257" -"windows_1257" -"1258" -"windows_1258" -"273" -"ibm273" -"csibm273" -"424" -"csibm424" -"ebcdic_cp_he" -"ibm424" -"437" -"cspc8codepage437" -"ibm437" -"500" -"csibm500" -"ebcdic_cp_be" -"ebcdic_cp_ch" -"ibm500" -"775" -"cspc775baltic" -"ibm775" -"850" -"cspc850multilingual" -"ibm850" -"852" -"cspcp852" -"ibm852" -"855" -"csibm855" -"ibm855" -"857" -"csibm857" -"ibm857" -"858" -"csibm858" -"ibm858" -"860" -"csibm860" -"ibm860" -"861" -"cp_is" -"csibm861" -"ibm861" -"862" -"cspc862latinhebrew" -"ibm862" -"863" -"csibm863" -"ibm863" -"864" -"csibm864" -"ibm864" -"865" -"csibm865" -"ibm865" -"866" -"csibm866" -"ibm866" -"869" -"cp_gr" -"csibm869" -"ibm869" -"932" -"ms932" -"mskanji" -"ms_kanji" -"949" -"ms949" -"uhc" -"950" -"ms950" -"jisx0213" -"eucjis2004" -"euc_jis2004" -"eucjisx0213" -"eucjp" -"ujis" -"u_jis" -"euckr" -"korean" -"ksc5601" -"ks_c_5601" -"ks_c_5601_1987" -"ksx1001" -"ks_x_1001" -"gb18030_2000" -"chinese" -"csiso58gb231280" -"euc_cn" -"euccn" -"eucgb2312_cn" -"gb2312_1980" -"gb2312_80" -"iso_ir_58" -"936" -"cp936" -"ms936" -"hex" -"roman8" -"r8" -"csHPRoman8" -"hzgb" -"hz_gb" -"hz_gb_2312" -"csiso2022jp" -"iso2022jp" -"iso_2022_jp" -"iso2022jp_1" -"iso_2022_jp_1" -"iso2022jp_2" -"iso_2022_jp_2" -"iso_2022_jp_2004" -"iso2022jp_2004" -"iso2022jp_3" -"iso_2022_jp_3" -"iso2022jp_ext" -"iso_2022_jp_ext" -"csiso2022kr" -"iso2022kr" -"iso_2022_kr" -"csisolatin6" -"iso_8859_10" -"iso_8859_10_1992" -"iso_ir_157" -"l6" -"latin6" -"thai" -"iso_8859_11" -"iso_8859_11_2001" -"iso_8859_13" -"l7" -"latin7" -"iso_8859_14" -"iso_8859_14_1998" -"iso_celtic" -"iso_ir_199" -"l8" -"latin8" -"iso_8859_15" -"l9" -"latin9" -"iso_8859_16" -"iso_8859_16_2001" -"iso_ir_226" -"l10" -"latin10" -"csisolatin2" -"iso_8859_2" -"iso_8859_2_1987" -"iso_ir_101" -"l2" -"latin2" -"csisolatin3" -"iso_8859_3" -"iso_8859_3_1988" -"iso_ir_109" -"l3" -"latin3" -"csisolatin4" -"iso_8859_4" -"iso_8859_4_1988" -"iso_ir_110" -"l4" -"latin4" -"csisolatincyrillic" -"cyrillic" -"iso_8859_5" -"iso_8859_5_1988" -"iso_ir_144" -"arabic" -"asmo_708" -"csisolatinarabic" -"ecma_114" -"iso_8859_6" -"iso_8859_6_1987" -"iso_ir_127" -"csisolatingreek" -"ecma_118" -"elot_928" -"greek" -"greek8" -"iso_8859_7" -"iso_8859_7_1987" -"iso_ir_126" -"csisolatinhebrew" -"hebrew" -"iso_8859_8" -"iso_8859_8_1988" -"iso_ir_138" -"csisolatin5" -"iso_8859_9" -"iso_8859_9_1989" -"iso_ir_148" -"l5" -"latin5" -"cp1361" -"ms1361" -"cskoi8r" -"kz_1048" -"rk1048" -"strk1048_2002" -"8859" -"cp819" -"csisolatin1" -"ibm819" -"iso8859" -"iso8859_1" -"iso_8859_1" -"iso_8859_1_1987" -"iso_ir_100" -"l1" -"latin" -"latin1" -"maccyrillic" -"macgreek" -"maciceland" -"maccentraleurope" -"mac_centeuro" -"maclatin2" -"macintosh" -"macroman" -"macturkish" -"ansi" -"dbcs" -"csptcp154" -"pt154" -"cp154" -"cyrillic_asian" -"quopri" -"quoted_printable" -"quotedprintable" -"rot13" -"csshiftjis" -"shiftjis" -"sjis" -"s_jis" -"shiftjis2004" -"sjis_2004" -"s_jis_2004" -"shiftjisx0213" -"sjisx0213" -"s_jisx0213" -"tis620" -"tis_620_0" -"tis_620_2529_0" -"tis_620_2529_1" -"iso_ir_166" -"u16" -"utf16" -"unicodebigunmarked" -"utf_16be" -"unicodelittleunmarked" -"utf_16le" -"u32" -"utf32" -"utf_32be" -"utf_32le" -"u7" -"utf7" -"unicode_1_1_utf_7" -"u8" -"utf" -"utf8" -"utf8_ucs2" -"utf8_ucs4" -"cp65001" -"uu" -"zip" -"zlib" -"x_mac_japanese" -"x_mac_korean" -"x_mac_simp_chinese" -"x_mac_trad_chinese" From 2fa45bf030804aa42d8a4d41b4439b8c6cf82e5c Mon Sep 17 00:00:00 2001 From: Stan Ulbrych Date: Thu, 26 Mar 2026 20:25:04 +0000 Subject: [PATCH 2/2] drop old approach altogether --- decode.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/decode.py b/decode.py index 3a646a4..c1a4595 100644 --- a/decode.py +++ b/decode.py @@ -5,13 +5,8 @@ def FuzzerRunOne(FuzzerInput): if len(FuzzerInput) < 2: return - if FuzzerInput[0] & 1: - codec = ALL_CODECS[FuzzerInput[1] % len(ALL_CODECS)] - data = FuzzerInput[2:] - else: - l = len(FuzzerInput) // 2 - codec = FuzzerInput[l:].decode("utf-8", "replace").strip() - data = FuzzerInput[:l] + codec = ALL_CODECS[FuzzerInput[0] % len(ALL_CODECS)] + data = FuzzerInput[1:] try: data.decode(codec) except SystemError: