Add a YAML-parsing benchmark (#342)

mdboom · ericsnowcurrently · hugovk · web-flow · commit bb8495cfd151 · 2025-12-17T17:24:10.000-05:00
* Add a YAML-parsing benchmark

* Fix description

* Update pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py

Co-authored-by: Eric Snow &lt;ericsnowcurrently@gmail.com&gt;

---------

Co-authored-by: Eric Snow &lt;ericsnowcurrently@gmail.com&gt;
Co-authored-by: Hugo van Kemenade &lt;1324225+hugovk@users.noreply.github.com&gt;
diff --git a/pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml b/pyperformance/data-files/benchmarks/bm_yaml/pyproject.toml
@@ -0,0 +1,10 @@
+[project]
+name = "pyperformance_bm_yaml"
+requires-python = ">=3.8"
+dependencies = ["pyperf"]
+urls = {repository = "https://github.com/python/pyperformance"}
+dynamic = ["version"]
+
+[tool.pyperformance]
+name = "yaml"
+tags = "serialize"
diff --git a/pyperformance/data-files/benchmarks/bm_yaml/requirements.txt b/pyperformance/data-files/benchmarks/bm_yaml/requirements.txt
@@ -0,0 +1 @@
+pyyaml==6.0.1
diff --git a/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py b/pyperformance/data-files/benchmarks/bm_yaml/run_benchmark.py
@@ -0,0 +1,91 @@
+"""
+Script for testing the performance of YAML parsing, using yaml.
+
+This will dump/load several real world-representative objects a few thousand
+times. The methodology below was chosen  to be similar to
+real-world scenarios which operate on single objects at a time.
+
+This explicitly tests the pure Python implementation in pyyaml, not its C
+extension.
+
+The object structure is copied from the `json_load` benchmark.
+"""
+
+
+import random
+import sys
+
+
+import pyperf
+import yaml
+
+
+DICT = {
+    'ads_flags': 0,
+    'age': 18,
+    'bulletin_count': 0,
+    'comment_count': 0,
+    'country': 'BR',
+    'encrypted_id': 'G9urXXAJwjE',
+    'favorite_count': 9,
+    'first_name': '',
+    'flags': 412317970704,
+    'friend_count': 0,
+    'gender': 'm',
+    'gender_for_display': 'Male',
+    'id': 302935349,
+    'is_custom_profile_icon': 0,
+    'last_name': '',
+    'locale_preference': 'pt_BR',
+    'member': 0,
+    'tags': ['a', 'b', 'c', 'd', 'e', 'f', 'g'],
+    'profile_foo_id': 827119638,
+    'secure_encrypted_id': 'Z_xxx2dYx3t4YAdnmfgyKw',
+    'session_number': 2,
+    'signup_id': '201-19225-223',
+    'status': 'A',
+    'theme': 1,
+    'time_created': 1225237014,
+    'time_updated': 1233134493,
+    'unread_message_count': 0,
+    'user_group': '0',
+    'username': 'collinwinter',
+    'play_count': 9,
+    'view_count': 7,
+    'zip': ''}
+
+TUPLE = (
+    [265867233, 265868503, 265252341, 265243910, 265879514,
+     266219766, 266021701, 265843726, 265592821, 265246784,
+     265853180, 45526486, 265463699, 265848143, 265863062,
+     265392591, 265877490, 265823665, 265828884, 265753032], 60)
+
+
+def mutate_dict(orig_dict, random_source):
+    new_dict = dict(orig_dict)
+    for key, value in new_dict.items():
+        rand_val = random_source.random() * sys.maxsize
+        if isinstance(key, (int, bytes, str)):
+            new_dict[key] = type(key)(rand_val)
+    return new_dict
+
+
+random_source = random.Random(5)  # Fixed seed.
+DICT_GROUP = [mutate_dict(DICT, random_source) for _ in range(3)]
+
+
+def bench_yaml(objs):
+    for obj in objs:
+        yaml.load(obj, Loader=yaml.Loader)
+
+
+if __name__ == "__main__":
+    runner = pyperf.Runner()
+    runner.metadata['description'] = "Benchmark yaml.load()"
+
+    yaml_dict = yaml.dump(DICT)
+    yaml_tuple = yaml.dump(TUPLE)
+    yaml_dict_group = yaml.dump(DICT_GROUP)
+    objs = (yaml_dict, yaml_tuple, yaml_dict_group)
+
+    runner.bench_func('yaml', bench_yaml, objs, inner_loops=20)