From 5abcf3943e168277d7480e5fb64bb36d8786c3de Mon Sep 17 00:00:00 2001
From: Jiradet Ounjai <jiradet.jd@gmail.com>
Date: Tue, 3 May 2022 11:56:26 +0700
Subject: [PATCH 1/3] implement target fuzzing

---
 common/experiment_utils.py                    |   6 +
 common/target_fuzzing_utils.py                | 151 ++++++++++++++++++
 database/models.py                            |  13 ++
 experiment/dispatcher.py                      |   8 +-
 experiment/measurer/coverage_utils.py         |   4 +-
 experiment/measurer/measure_manager.py        | 110 +++++++++----
 .../runner-startup-script-template.sh         |   2 +
 experiment/run_experiment.py                  |  16 +-
 experiment/runner.py                          |  19 ++-
 experiment/scheduler.py                       |  11 +-
 10 files changed, 306 insertions(+), 34 deletions(-)
 create mode 100644 common/target_fuzzing_utils.py

diff --git a/common/experiment_utils.py b/common/experiment_utils.py
index 2bbd64837..ab6684e43 100644
--- a/common/experiment_utils.py
+++ b/common/experiment_utils.py
@@ -78,6 +78,12 @@ def get_custom_seed_corpora_filestore_path():
                           'custom_seed_corpora')
 
 
+def get_target_fuzzing_corpora_filestore_path():
+    """Returns path containing seed corpora for the target fuzzing experiment."""
+    return posixpath.join(get_experiment_filestore_path(),
+                          'target-fuzzing-corpora')
+
+
 def get_dispatcher_instance_name(experiment: str) -> str:
     """Returns a dispatcher instance name for an experiment."""
     return 'd-%s' % experiment
diff --git a/common/target_fuzzing_utils.py b/common/target_fuzzing_utils.py
new file mode 100644
index 000000000..15d5330cb
--- /dev/null
+++ b/common/target_fuzzing_utils.py
@@ -0,0 +1,151 @@
+import random
+import os
+import zipfile
+import tempfile
+import tarfile
+import multiprocessing
+import itertools
+from typing import List
+
+from common import experiment_utils
+from common import filesystem
+from experiment.measurer import coverage_utils
+from experiment.measurer import run_coverage
+from database import utils as db_utils
+from database import models
+from common import logs
+from common import benchmark_utils
+from experiment.build import build_utils
+from common import experiment_path as exp_path
+
+MAX_CORPUS_FILES = 5
+
+
+def get_covered_branches_per_function(coverage_info):
+    function_coverage_info = coverage_info["data"][0]["functions"]
+    covered_branches = set([])
+    for function in function_coverage_info:
+        function_name = function["name"]
+        for branch in function["branches"]:
+            if branch[4]:
+                coverage_key = "{} {}:{}-{}:{} T".format(
+                    function_name, branch[0], branch[1], branch[2], branch[3])
+                covered_branches.add(coverage_key)
+            if branch[5]:
+                coverage_key = "{} {}:{}-{}:{} F".format(
+                    function_name, branch[0], branch[1], branch[2], branch[3])
+                covered_branches.add(coverage_key)
+    return covered_branches
+
+
+def get_covered_branches(coverage_binary, corpus_dir):
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        profdata_file = os.path.join(tmp_dir, 'data.profdata')
+        merged_profdata_file = os.path.join(tmp_dir, 'merged.profdata')
+        merged_summary_json_file = os.path.join(tmp_dir, 'merged.json')
+        crashes_dir = os.path.join(tmp_dir, 'crashes')
+        filesystem.create_directory(crashes_dir)
+
+        run_coverage.do_coverage_run(coverage_binary, corpus_dir, profdata_file,
+                                     crashes_dir)
+        coverage_utils.merge_profdata_files([profdata_file],
+                                            merged_profdata_file)
+        coverage_utils.generate_json_summary(coverage_binary,
+                                             merged_profdata_file,
+                                             merged_summary_json_file,
+                                             summary_only=False)
+        coverage_info = coverage_utils.get_coverage_infomation(
+            merged_summary_json_file)
+        return get_covered_branches_per_function(coverage_info)
+
+
+def main_loop(benchmarks: List[str], num_trials: int):
+    pool_args = ()
+    with multiprocessing.Pool(*pool_args) as pool:
+        target_coverage_list = pool.starmap(
+            setup_fuzzing_target,
+            [(benchmark, num_trials) for benchmark in benchmarks])
+        target_coverage = list(itertools.chain(*target_coverage_list))
+        logs.info('Done Preparing target fuzzing (total %d target)',
+                  len(target_coverage))
+        db_utils.bulk_save(target_coverage)
+
+
+def setup_fuzzing_target(benchmark: str, num_trials: int):
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
+        archive_name = 'coverage-build-%s.tar.gz' % benchmark
+        archive_filestore_path = exp_path.filestore(coverage_binaries_dir /
+                                                    archive_name)
+        filesystem.copy(archive_filestore_path, tmp_dir)
+        archive_path = os.path.join(tmp_dir, archive_name)
+        tar = tarfile.open(archive_path, 'r:gz')
+        tar.extractall(tmp_dir)
+        os.remove(archive_path)
+        coverage_binary = os.path.join(
+            tmp_dir, benchmark_utils.get_fuzz_target(benchmark))
+        return prepare_target_fuzzing_corpus(benchmark, num_trials,
+                                             coverage_binary)
+
+
+def prepare_target_fuzzing_corpus(benchmark: str, num_trials: int,
+                                  coverage_binary: str):
+    """Prepare corpus for target fuzzing."""
+
+    target_coverage = []
+
+    # path used to store and feed seed corpus for benchmark runner
+    # each trial group will have the same seed input(s)
+    target_fuzzing_benchmark = os.path.join(
+        experiment_utils.get_target_fuzzing_corpora_filestore_path(), benchmark)
+    filesystem.create_directory(target_fuzzing_benchmark)
+
+    # randomly pick from custom seed corpus
+    corpus_archive_filename = os.path.join(
+        experiment_utils.get_custom_seed_corpora_filestore_path(),
+        f'{benchmark}.zip')
+    with tempfile.TemporaryDirectory() as tmp_dir:
+        with zipfile.ZipFile(corpus_archive_filename) as zip_file:
+            # only consider file not directory
+            corpus_files = [
+                f for f in zip_file.infolist() if not f.filename.endswith('/')
+            ]
+            for trial_group_num in range(num_trials):
+                logs.info('Preparing target fuzzing: %s, trial_group: %d',
+                          benchmark, trial_group_num)
+
+                trial_group_subdir = 'trial-group-%d' % trial_group_num
+                target_fuzzing_trial_dir = os.path.join(
+                    target_fuzzing_benchmark, trial_group_subdir)
+                src_dir = os.path.join(tmp_dir, "source")
+                dest_dir = os.path.join(tmp_dir, "dest")
+                filesystem.recreate_directory(src_dir)
+                filesystem.recreate_directory(dest_dir)
+
+                source_files = random.sample(corpus_files, MAX_CORPUS_FILES)
+                for file in source_files:
+                    zip_file.extract(file, src_dir)
+
+                dest_files = random.sample(corpus_files, MAX_CORPUS_FILES)
+                for file in dest_files:
+                    zip_file.extract(file, dest_dir)
+
+                src_branches = get_covered_branches(coverage_binary, src_dir)
+                dest_branches = get_covered_branches(coverage_binary, dest_dir)
+                target_branches = dest_branches - src_branches
+
+                if not target_branches:
+                    raise RuntimeError(
+                        'Unable to find target branches for %s.' % benchmark)
+
+                for branch in target_branches:
+                    target_cov = models.TargetCoverage()
+                    target_cov.trial_group_num = int(trial_group_num)
+                    target_cov.benchmark = benchmark
+                    target_cov.target_location = branch
+                    target_coverage.append(target_cov)
+
+                # copy only the src directory
+                filesystem.copytree(src_dir, target_fuzzing_trial_dir)
+
+    return target_coverage
diff --git a/database/models.py b/database/models.py
index 7cf902397..848849df9 100644
--- a/database/models.py
+++ b/database/models.py
@@ -50,6 +50,7 @@ class Trial(Base):
     benchmark = Column(String, nullable=False)
     time_started = Column(DateTime(), nullable=True)
     time_ended = Column(DateTime(), nullable=True)
+    trial_group_num = Column(Integer, nullable=True)
 
     # Columns used for preemptible experiments.
     preemptible = Column(Boolean, default=False, nullable=False)
@@ -71,6 +72,8 @@ class Snapshot(Base):
     trial_id = Column(Integer, ForeignKey('trial.id'), primary_key=True)
     trial = sqlalchemy.orm.relationship('Trial', back_populates='snapshots')
     edges_covered = Column(Integer, nullable=False)
+    targets_covered = Column(Integer, nullable=False)
+    trial_group_num = Column(Integer, nullable=False)
     fuzzer_stats = Column(JSON, nullable=True)
     crashes = sqlalchemy.orm.relationship(
         'Crash',
@@ -94,3 +97,13 @@ class Crash(Base):
 
     __table_args__ = (ForeignKeyConstraint(
         [time, trial_id], ['snapshot.time', 'snapshot.trial_id']),)
+
+
+class TargetCoverage(Base):
+    """Represents target branches for the target fuzzing mode."""
+    __tablename__ = 'target_coverage'
+
+    id = Column(Integer, primary_key=True)
+    benchmark = Column(String, nullable=False)
+    trial_group_num = Column(Integer, nullable=False)
+    target_location = Column(String, nullable=False)
diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py
index c14d50c16..d801d5d25 100755
--- a/experiment/dispatcher.py
+++ b/experiment/dispatcher.py
@@ -24,6 +24,7 @@
 import time
 from typing import List
 
+from common import target_fuzzing_utils
 from common import experiment_path as exp_path
 from common import experiment_utils
 from common import logs
@@ -131,7 +132,8 @@ def build_images_for_trials(fuzzers: List[str],
             models.Trial(fuzzer=fuzzer,
                          experiment=experiment_name,
                          benchmark=benchmark,
-                         preemptible=preemptible) for _ in range(num_trials)
+                         preemptible=preemptible,
+                         trial_group_num=trial) for trial in range(num_trials)
         ]
         trials.extend(fuzzer_benchmark_trials)
     return trials
@@ -159,6 +161,10 @@ def dispatcher_main():
                                      experiment.config['concurrent_builds'])
     _initialize_trials_in_db(trials)
 
+    if experiment.config['target_fuzzing']:
+        target_fuzzing_utils.main_loop(experiment.benchmarks,
+                                       experiment.num_trials)
+
     create_work_subdirs(['experiment-folders', 'measurement-folders'])
 
     # Start measurer and scheduler in seperate threads/processes.
diff --git a/experiment/measurer/coverage_utils.py b/experiment/measurer/coverage_utils.py
index 0122b8454..935e77402 100644
--- a/experiment/measurer/coverage_utils.py
+++ b/experiment/measurer/coverage_utils.py
@@ -233,10 +233,12 @@ def get_coverage_infomation(coverage_summary_file):
 class TrialCoverage:  # pylint: disable=too-many-instance-attributes
     """Base class for storing and getting coverage data for a trial."""
 
-    def __init__(self, fuzzer: str, benchmark: str, trial_num: int):
+    def __init__(self, fuzzer: str, benchmark: str, trial_num: int,
+                 trial_group_num: int):
         self.fuzzer = fuzzer
         self.benchmark = benchmark
         self.trial_num = trial_num
+        self.trial_group_num = trial_group_num
         self.benchmark_fuzzer_trial_dir = exp_utils.get_trial_dir(
             fuzzer, benchmark, trial_num)
         self.work_dir = exp_utils.get_work_dir()
diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py
index 07e48dda3..05338321f 100644
--- a/experiment/measurer/measure_manager.py
+++ b/experiment/measurer/measure_manager.py
@@ -32,6 +32,7 @@
 from sqlalchemy import func
 from sqlalchemy import orm
 
+from common import target_fuzzing_utils
 from common import benchmark_config
 from common import experiment_utils
 from common import experiment_path as exp_path
@@ -51,7 +52,8 @@
 logger = logs.Logger('measurer')  # pylint: disable=invalid-name
 
 SnapshotMeasureRequest = collections.namedtuple(
-    'SnapshotMeasureRequest', ['fuzzer', 'benchmark', 'trial_id', 'cycle'])
+    'SnapshotMeasureRequest',
+    ['fuzzer', 'benchmark', 'trial_id', 'cycle', 'trial_group_num'])
 
 NUM_RETRIES = 3
 RETRY_DELAY = 3
@@ -76,7 +78,10 @@ def measure_main(experiment_config):
     max_total_time = experiment_config['max_total_time']
     measurers_cpus = experiment_config['measurers_cpus']
     runners_cpus = experiment_config['runners_cpus']
-    measure_loop(experiment, max_total_time, measurers_cpus, runners_cpus)
+    trials = experiment_config['trials']
+    target_fuzzing = experiment_config['target_fuzzing']
+    measure_loop(experiment, trials, max_total_time, measurers_cpus,
+                 runners_cpus, target_fuzzing)
 
     # Clean up resources.
     gc.collect()
@@ -95,9 +100,11 @@ def _process_init(cores_queue):
 
 
 def measure_loop(experiment: str,
+                 trials: int,
                  max_total_time: int,
                  measurers_cpus=None,
-                 runners_cpus=None):
+                 runners_cpus=None,
+                 target_fuzzing=False):
     """Continuously measure trials for |experiment|."""
     logger.info('Start measure_loop.')
 
@@ -116,7 +123,7 @@ def measure_loop(experiment: str,
 
     with multiprocessing.Pool(
             *pool_args) as pool, multiprocessing.Manager() as manager:
-        set_up_coverage_binaries(pool, experiment)
+        set_up_coverage_binaries(pool, experiment, trials)
         # Using Multiprocessing.Queue will fail with a complaint about
         # inheriting queue.
         q = manager.Queue()  # pytype: disable=attribute-error
@@ -126,7 +133,8 @@ def measure_loop(experiment: str,
                 # races.
                 all_trials_ended = scheduler.all_trials_ended(experiment)
 
-                if not measure_all_trials(experiment, max_total_time, pool, q):
+                if not measure_all_trials(experiment, max_total_time, pool, q,
+                                          target_fuzzing):
                     # We didn't measure any trials.
                     if all_trials_ended:
                         # There are no trials producing snapshots to measure.
@@ -141,7 +149,11 @@ def measure_loop(experiment: str,
     logger.info('Finished measure loop.')
 
 
-def measure_all_trials(experiment: str, max_total_time: int, pool, q) -> bool:  # pylint: disable=invalid-name
+def measure_all_trials(experiment: str,
+                       max_total_time: int,
+                       pool,
+                       q,
+                       target_fuzzing=False) -> bool:  # pylint: disable=invalid-name
     """Get coverage data (with coverage runs) for all active trials. Note that
     this should not be called unless multiprocessing.set_start_method('spawn')
     was called first. Otherwise it will use fork which breaks logging."""
@@ -158,7 +170,7 @@ def measure_all_trials(experiment: str, max_total_time: int, pool, q) -> bool:
         return False
 
     measure_trial_coverage_args = [
-        (unmeasured_snapshot, max_cycle, q)
+        (unmeasured_snapshot, max_cycle, q, target_fuzzing)
         for unmeasured_snapshot in unmeasured_snapshots
     ]
 
@@ -253,13 +265,15 @@ def _get_unmeasured_first_snapshots(
     snapshot for their trial. The trials are trials in |experiment|."""
     trials_without_snapshots = _query_unmeasured_trials(experiment)
     return [
-        SnapshotMeasureRequest(trial.fuzzer, trial.benchmark, trial.id, 1)
+        SnapshotMeasureRequest(trial.fuzzer, trial.benchmark, trial.id, 1,
+                               trial.trial_group_num)
         for trial in trials_without_snapshots
     ]
 
 
 SnapshotWithTime = collections.namedtuple(
-    'SnapshotWithTime', ['fuzzer', 'benchmark', 'trial_id', 'time'])
+    'SnapshotWithTime',
+    ['fuzzer', 'benchmark', 'trial_id', 'time', 'trial_group_num'])
 
 
 def _query_measured_latest_snapshots(experiment: str):
@@ -270,7 +284,8 @@ def _query_measured_latest_snapshots(experiment: str):
     # The order of these columns must correspond to the fields in
     # SnapshotWithTime.
     columns = (models.Trial.fuzzer, models.Trial.benchmark,
-               models.Snapshot.trial_id, latest_time_column)
+               models.Snapshot.trial_id, latest_time_column,
+               models.Trial.trial_group_num)
     experiment_filter = models.Snapshot.trial.has(experiment=experiment)
     group_by_columns = (models.Snapshot.trial_id, models.Trial.benchmark,
                         models.Trial.fuzzer)
@@ -300,7 +315,8 @@ def _get_unmeasured_next_snapshots(
         snapshot_with_cycle = SnapshotMeasureRequest(snapshot.fuzzer,
                                                      snapshot.benchmark,
                                                      snapshot.trial_id,
-                                                     next_cycle)
+                                                     next_cycle,
+                                                     snapshot.trial_group_num)
         next_snapshots.append(snapshot_with_cycle)
     return next_snapshots
 
@@ -357,8 +373,8 @@ class SnapshotMeasurer(coverage_utils.TrialCoverage):  # pylint: disable=too-man
     UNIT_BLACKLIST = collections.defaultdict(set)
 
     def __init__(self, fuzzer: str, benchmark: str, trial_num: int,
-                 trial_logger: logs.Logger):
-        super().__init__(fuzzer, benchmark, trial_num)
+                 trial_logger: logs.Logger, trial_group_num: int):
+        super().__init__(fuzzer, benchmark, trial_num, trial_group_num)
         self.logger = trial_logger
         self.corpus_dir = os.path.join(self.measurement_dir, 'corpus')
 
@@ -428,6 +444,31 @@ def generate_summary(self, cycle: int, summary_only=False):
                 self.logger.error(
                     'Coverage summary json file generation failed in the end.')
 
+    def get_current_target_coverage(self) -> int:
+        """Get the current number of lines covered."""
+        if not os.path.exists(self.cov_summary_file):
+            self.logger.warning('No coverage summary json file found.')
+            return 0
+        try:
+            total_target_covered = 0
+            coverage_info = coverage_utils.get_coverage_infomation(
+                self.cov_summary_file)
+            covered_branches = target_fuzzing_utils.get_covered_branches_per_function(
+                coverage_info)
+            # measure target coverage
+            with db_utils.session_scope() as session:
+                target_branches = session.query(models.TargetCoverage).filter(
+                    models.TargetCoverage.trial_group_num ==
+                    self.trial_group_num).all()
+                for target_branch in target_branches:
+                    if target_branch.target_location in covered_branches:
+                        total_target_covered += 1
+            return total_target_covered
+        except Exception:  # pylint: disable=broad-except
+            self.logger.error(
+                'Coverage summary json file defective or missing.')
+            return 0
+
     def get_current_coverage(self) -> int:
         """Get the current number of lines covered."""
         if not os.path.exists(self.cov_summary_file):
@@ -612,8 +653,8 @@ def get_fuzzer_stats(stats_filestore_path):
 
 
 def measure_trial_coverage(  # pylint: disable=invalid-name
-        measure_req, max_cycle: int,
-        q: multiprocessing.Queue) -> models.Snapshot:
+        measure_req, max_cycle: int, q: multiprocessing.Queue,
+        target_fuzzing) -> models.Snapshot:
     """Measure the coverage obtained by |trial_num| on |benchmark| using
     |fuzzer|."""
     initialize_logs()
@@ -624,24 +665,28 @@ def measure_trial_coverage(  # pylint: disable=invalid-name
         try:
             snapshot = measure_snapshot_coverage(measure_req.fuzzer,
                                                  measure_req.benchmark,
-                                                 measure_req.trial_id, cycle)
+                                                 measure_req.trial_id, cycle,
+                                                 measure_req.trial_group_num,
+                                                 target_fuzzing)
             if not snapshot:
                 break
             q.put(snapshot)
         except Exception:  # pylint: disable=broad-except
-            logger.error('Error measuring cycle.',
-                         extras={
-                             'fuzzer': measure_req.fuzzer,
-                             'benchmark': measure_req.benchmark,
-                             'trial_id': str(measure_req.trial_id),
-                             'cycle': str(cycle),
-                         })
+            logger.error(
+                'Error measuring cycle.',
+                extras={
+                    'fuzzer': measure_req.fuzzer,
+                    'benchmark': measure_req.benchmark,
+                    'trial_id': str(measure_req.trial_id),
+                    'trial_group_num': str(measure_req.trial_group_num),
+                    'cycle': str(cycle),
+                })
     logger.debug('Done measuring trial: %d.', measure_req.trial_id)
 
 
 def measure_snapshot_coverage(  # pylint: disable=too-many-locals
-        fuzzer: str, benchmark: str, trial_num: int,
-        cycle: int) -> models.Snapshot:
+        fuzzer: str, benchmark: str, trial_num: int, cycle: int,
+        trial_group_num: int, target_fuzzing: bool) -> models.Snapshot:
     """Measure coverage of the snapshot for |cycle| for |trial_num| of |fuzzer|
     and |benchmark|."""
     snapshot_logger = logs.Logger('measurer',
@@ -650,9 +695,10 @@ def measure_snapshot_coverage(  # pylint: disable=too-many-locals
                                       'benchmark': benchmark,
                                       'trial_id': str(trial_num),
                                       'cycle': str(cycle),
+                                      'trial_group_num': str(trial_group_num)
                                   })
     snapshot_measurer = SnapshotMeasurer(fuzzer, benchmark, trial_num,
-                                         snapshot_logger)
+                                         snapshot_logger, trial_group_num)
 
     measuring_start_time = time.time()
     snapshot_logger.info('Measuring cycle: %d.', cycle)
@@ -660,9 +706,14 @@ def measure_snapshot_coverage(  # pylint: disable=too-many-locals
     if snapshot_measurer.is_cycle_unchanged(cycle):
         snapshot_logger.info('Cycle: %d is unchanged.', cycle)
         regions_covered = snapshot_measurer.get_current_coverage()
+        targets_covered = 0
+        if target_fuzzing:
+            targets_covered = snapshot_measurer.get_current_target_coverage()
         fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle)
         return models.Snapshot(time=this_time,
                                trial_id=trial_num,
+                               trial_group_num=trial_group_num,
+                               targets_covered=targets_covered,
                                edges_covered=regions_covered,
                                fuzzer_stats=fuzzer_stats_data,
                                crashes=[])
@@ -698,8 +749,13 @@ def measure_snapshot_coverage(  # pylint: disable=too-many-locals
 
     # Get the coverage of the new corpus units.
     regions_covered = snapshot_measurer.get_current_coverage()
+    targets_covered = 0
+    if target_fuzzing:
+        targets_covered = snapshot_measurer.get_current_target_coverage()
     fuzzer_stats_data = snapshot_measurer.get_fuzzer_stats(cycle)
     snapshot = models.Snapshot(time=this_time,
+                               trial_group_num=trial_group_num,
+                               targets_covered=targets_covered,
                                trial_id=trial_num,
                                edges_covered=regions_covered,
                                fuzzer_stats=fuzzer_stats_data,
@@ -714,7 +770,7 @@ def measure_snapshot_coverage(  # pylint: disable=too-many-locals
     return snapshot
 
 
-def set_up_coverage_binaries(pool, experiment):
+def set_up_coverage_binaries(pool, experiment, trials):
     """Set up coverage binaries for all benchmarks in |experiment|."""
     # Use set comprehension to select distinct benchmarks.
     with db_utils.session_scope() as session:
diff --git a/experiment/resources/runner-startup-script-template.sh b/experiment/resources/runner-startup-script-template.sh
index e0ff19f2d..653d939ca 100644
--- a/experiment/resources/runner-startup-script-template.sh
+++ b/experiment/resources/runner-startup-script-template.sh
@@ -42,8 +42,10 @@ docker run \
 -e BENCHMARK={{benchmark}} \
 -e EXPERIMENT={{experiment}} \
 -e TRIAL_ID={{trial_id}} \
+-e TRIAL_GROUP_NUM={{trial_group_num}} \
 -e MAX_TOTAL_TIME={{max_total_time}} \
 -e NO_SEEDS={{no_seeds}} \
+-e TARGET_FUZZING={{target_fuzzing}} \
 -e NO_DICTIONARIES={{no_dictionaries}} \
 -e OSS_FUZZ_CORPUS={{oss_fuzz_corpus}} \
 -e CUSTOM_SEED_CORPUS_DIR={{custom_seed_corpus_dir}} \
diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py
index 04d85a656..ee43871ed 100644
--- a/experiment/run_experiment.py
+++ b/experiment/run_experiment.py
@@ -261,7 +261,8 @@ def start_experiment(  # pylint: disable=too-many-arguments
         concurrent_builds=None,
         measurers_cpus=None,
         runners_cpus=None,
-        custom_seed_corpus_dir=None):
+        custom_seed_corpus_dir=None,
+        target_fuzzing=False):
     """Start a fuzzer benchmarking experiment."""
     if not allow_uncommitted_changes:
         check_no_uncommitted_changes()
@@ -295,6 +296,7 @@ def start_experiment(  # pylint: disable=too-many-arguments
     if config['custom_seed_corpus_dir']:
         validate_and_pack_custom_seed_corpus(config['custom_seed_corpus_dir'],
                                              benchmarks)
+    config['target_fuzzing'] = target_fuzzing
 
     return start_experiment_from_full_config(config)
 
@@ -611,6 +613,12 @@ def main():
                         required=False,
                         default=False,
                         action='store_true')
+    parser.add_argument('-tf',
+                        '--target-fuzzing',
+                        help='Target fuzzing mode.',
+                        required=False,
+                        default=False,
+                        action='store_true')
     parser.add_argument(
         '-o',
         '--oss-fuzz-corpus',
@@ -655,6 +663,9 @@ def main():
             parser.error('Cannot enable options "custom_seed_corpus_dir" and '
                          '"oss_fuzz_corpus" at the same time')
 
+    if args.target_fuzzing and not args.custom_seed_corpus_dir:
+        parser.error('Target fuzzing can only be used with custom seed corpus')
+
     start_experiment(args.experiment_name,
                      args.experiment_config,
                      args.benchmarks,
@@ -667,7 +678,8 @@ def main():
                      concurrent_builds=concurrent_builds,
                      measurers_cpus=measurers_cpus,
                      runners_cpus=runners_cpus,
-                     custom_seed_corpus_dir=args.custom_seed_corpus_dir)
+                     custom_seed_corpus_dir=args.custom_seed_corpus_dir,
+                     target_fuzzing=args.target_fuzzing)
     return 0
 
 
diff --git a/experiment/runner.py b/experiment/runner.py
index 6dcb2c49c..9dd45cb54 100644
--- a/experiment/runner.py
+++ b/experiment/runner.py
@@ -115,6 +115,20 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path):
     return seed_corpus_path if os.path.exists(seed_corpus_path) else None
 
 
+def _unpack_target_fuzzing_corpus(corpus_directory):
+    # remove initial seed corpus
+    shutil.rmtree(corpus_directory)
+
+    benchmark = environment.get('BENCHMARK')
+    trial_group_num = environment.get('TRIAL_GROUP_NUM')
+    target_fuzzing_corpora_dir = experiment_utils.get_target_fuzzing_corpora_filestore_path(
+    )
+    target_fuzzing_sub_dir = 'trial-group-%s' % int(trial_group_num)
+    target_fuzzing_dir = posixpath.join(target_fuzzing_corpora_dir, benchmark,
+                                        target_fuzzing_sub_dir)
+    shutil.copytree(target_fuzzing_dir, corpus_directory)
+
+
 def _unpack_custom_seed_corpus(corpus_directory):
     "Unpack seed corpus provided by user"
     # remove initial seed corpus
@@ -200,7 +214,10 @@ def run_fuzzer(max_total_time, log_filename):
         return
 
     if environment.get('CUSTOM_SEED_CORPUS_DIR'):
-        _unpack_custom_seed_corpus(input_corpus)
+        if environment.get('TARGET_FUZZING'):
+            _unpack_target_fuzzing_corpus(input_corpus)
+        else:
+            _unpack_custom_seed_corpus(input_corpus)
     else:
         _unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
     _clean_seed_corpus(input_corpus)
diff --git a/experiment/scheduler.py b/experiment/scheduler.py
index effb9f319..a7a732d36 100644
--- a/experiment/scheduler.py
+++ b/experiment/scheduler.py
@@ -651,6 +651,7 @@ def __init__(self, trial):
         self.time_started = trial.time_started
         self.time_ended = trial.time_ended
         self.preemptible = trial.preemptible
+        self.trial_group_num = trial.trial_group_num
 
 
 def _initialize_logs(experiment):
@@ -678,7 +679,8 @@ def _start_trial(trial: TrialProxy, experiment_config: dict):
     _initialize_logs(experiment_config['experiment'])
     logger.info('Start trial %d.', trial.id)
     started = create_trial_instance(trial.fuzzer, trial.benchmark, trial.id,
-                                    experiment_config, trial.preemptible)
+                                    trial.trial_group_num, experiment_config,
+                                    trial.preemptible)
     if started:
         trial.time_started = datetime_now()
         return trial
@@ -688,6 +690,7 @@ def _start_trial(trial: TrialProxy, experiment_config: dict):
 
 def render_startup_script_template(instance_name: str, fuzzer: str,
                                    benchmark: str, trial_id: int,
+                                   trial_group_num: int,
                                    experiment_config: dict):
     """Render the startup script using the template and the parameters
     provided and return the result."""
@@ -705,6 +708,7 @@ def render_startup_script_template(instance_name: str, fuzzer: str,
         'experiment': experiment,
         'fuzzer': fuzzer,
         'trial_id': trial_id,
+        'trial_group_num': trial_group_num,
         'max_total_time': experiment_config['max_total_time'],
         'experiment_filestore': experiment_config['experiment_filestore'],
         'report_filestore': experiment_config['report_filestore'],
@@ -713,6 +717,7 @@ def render_startup_script_template(instance_name: str, fuzzer: str,
         'docker_registry': experiment_config['docker_registry'],
         'local_experiment': local_experiment,
         'no_seeds': experiment_config['no_seeds'],
+        'target_fuzzing': experiment_config['target_fuzzing'],
         'no_dictionaries': experiment_config['no_dictionaries'],
         'oss_fuzz_corpus': experiment_config['oss_fuzz_corpus'],
         'num_cpu_cores': experiment_config['runner_num_cpu_cores'],
@@ -728,13 +733,15 @@ def render_startup_script_template(instance_name: str, fuzzer: str,
 
 
 def create_trial_instance(fuzzer: str, benchmark: str, trial_id: int,
-                          experiment_config: dict, preemptible: bool) -> bool:
+                          trial_group_num: int, experiment_config: dict,
+                          preemptible: bool) -> bool:
     """Create or start a trial instance for a specific
     trial_id,fuzzer,benchmark."""
     instance_name = experiment_utils.get_trial_instance_name(
         experiment_config['experiment'], trial_id)
     startup_script = render_startup_script_template(instance_name, fuzzer,
                                                     benchmark, trial_id,
+                                                    trial_group_num,
                                                     experiment_config)
     startup_script_path = '/tmp/%s-start-docker.sh' % instance_name
     with open(startup_script_path, 'w') as file_handle:

From 14d5152487aab9f96a1e2f95d8e88f43607b0348 Mon Sep 17 00:00:00 2001
From: Jiradet Ounjai <jiradet.jd@gmail.com>
Date: Tue, 10 May 2022 20:23:27 +0700
Subject: [PATCH 2/3] add option to run random seed selection

---
 common/experiment_utils.py                    |   4 +-
 ...tils.py => random_corpus_fuzzing_utils.py} | 126 ++++++++++--------
 experiment/dispatcher.py                      |   9 +-
 experiment/measurer/measure_manager.py        |   4 +-
 .../runner-startup-script-template.sh         |   1 +
 experiment/run_experiment.py                  |  14 +-
 experiment/runner.py                          |  16 +--
 experiment/scheduler.py                       |   1 +
 8 files changed, 102 insertions(+), 73 deletions(-)
 rename common/{target_fuzzing_utils.py => random_corpus_fuzzing_utils.py} (51%)

diff --git a/common/experiment_utils.py b/common/experiment_utils.py
index ab6684e43..58a7a1c3d 100644
--- a/common/experiment_utils.py
+++ b/common/experiment_utils.py
@@ -78,10 +78,10 @@ def get_custom_seed_corpora_filestore_path():
                           'custom_seed_corpora')
 
 
-def get_target_fuzzing_corpora_filestore_path():
+def get_random_corpora_filestore_path():
     """Returns path containing seed corpora for the target fuzzing experiment."""
     return posixpath.join(get_experiment_filestore_path(),
-                          'target-fuzzing-corpora')
+                          'random_corpora')
 
 
 def get_dispatcher_instance_name(experiment: str) -> str:
diff --git a/common/target_fuzzing_utils.py b/common/random_corpus_fuzzing_utils.py
similarity index 51%
rename from common/target_fuzzing_utils.py
rename to common/random_corpus_fuzzing_utils.py
index 15d5330cb..e219997ff 100644
--- a/common/target_fuzzing_utils.py
+++ b/common/random_corpus_fuzzing_utils.py
@@ -18,8 +18,7 @@
 from experiment.build import build_utils
 from common import experiment_path as exp_path
 
-MAX_CORPUS_FILES = 5
-
+MAX_RANDOM_CORPUS_FILES = 5
 
 def get_covered_branches_per_function(coverage_info):
     function_coverage_info = coverage_info["data"][0]["functions"]
@@ -59,93 +58,108 @@ def get_covered_branches(coverage_binary, corpus_dir):
         return get_covered_branches_per_function(coverage_info)
 
 
-def main_loop(benchmarks: List[str], num_trials: int):
+def initialize_random_corpus_fuzzing(benchmarks: List[str],
+                                     num_trials: int,
+                                     target_fuzzing: bool = False):
+    """Get targeting coverage from the given corpus."""
     pool_args = ()
     with multiprocessing.Pool(*pool_args) as pool:
-        target_coverage_list = pool.starmap(
-            setup_fuzzing_target,
-            [(benchmark, num_trials) for benchmark in benchmarks])
+        target_coverage_list = pool.starmap(prepare_benchmark_random_corpus, [
+            (benchmark, num_trials, target_fuzzing) for benchmark in benchmarks
+        ])
         target_coverage = list(itertools.chain(*target_coverage_list))
         logs.info('Done Preparing target fuzzing (total %d target)',
                   len(target_coverage))
         db_utils.bulk_save(target_coverage)
 
 
-def setup_fuzzing_target(benchmark: str, num_trials: int):
-    with tempfile.TemporaryDirectory() as tmp_dir:
-        coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
-        archive_name = 'coverage-build-%s.tar.gz' % benchmark
-        archive_filestore_path = exp_path.filestore(coverage_binaries_dir /
-                                                    archive_name)
-        filesystem.copy(archive_filestore_path, tmp_dir)
-        archive_path = os.path.join(tmp_dir, archive_name)
-        tar = tarfile.open(archive_path, 'r:gz')
-        tar.extractall(tmp_dir)
-        os.remove(archive_path)
-        coverage_binary = os.path.join(
-            tmp_dir, benchmark_utils.get_fuzz_target(benchmark))
-        return prepare_target_fuzzing_corpus(benchmark, num_trials,
-                                             coverage_binary)
-
-
-def prepare_target_fuzzing_corpus(benchmark: str, num_trials: int,
-                                  coverage_binary: str):
+def get_coverage_binary(benchmark, tmp_dir):
+    """Copy coverage binary to temp directory for temporary usage."""
+    coverage_binaries_dir = build_utils.get_coverage_binaries_dir()
+    archive_name = 'coverage-build-%s.tar.gz' % benchmark
+    archive_filestore_path = exp_path.filestore(coverage_binaries_dir /
+                                                archive_name)
+    filesystem.copy(archive_filestore_path, tmp_dir)
+    archive_path = os.path.join(tmp_dir, archive_name)
+    tar = tarfile.open(archive_path, 'r:gz')
+    tar.extractall(tmp_dir)
+    os.remove(archive_path)
+    coverage_binary = os.path.join(tmp_dir,
+                                   benchmark_utils.get_fuzz_target(benchmark))
+    return coverage_binary
+
+
+def prepare_benchmark_random_corpus(benchmark: str,
+                                    num_trials: int,
+                                    target_fuzzing: bool = False):
     """Prepare corpus for target fuzzing."""
-
+    coverage_binary = None
     target_coverage = []
-
     # path used to store and feed seed corpus for benchmark runner
     # each trial group will have the same seed input(s)
-    target_fuzzing_benchmark = os.path.join(
-        experiment_utils.get_target_fuzzing_corpora_filestore_path(), benchmark)
-    filesystem.create_directory(target_fuzzing_benchmark)
+    benchmark_random_corpora = os.path.join(
+        experiment_utils.get_random_corpora_filestore_path(), benchmark)
+    filesystem.create_directory(benchmark_random_corpora)
 
-    # randomly pick from custom seed corpus
+    # get inputs from the custom seed corpus directory
     corpus_archive_filename = os.path.join(
         experiment_utils.get_custom_seed_corpora_filestore_path(),
         f'{benchmark}.zip')
+
     with tempfile.TemporaryDirectory() as tmp_dir:
+        if target_fuzzing:
+            coverage_binary = get_coverage_binary(benchmark, tmp_dir)
+
         with zipfile.ZipFile(corpus_archive_filename) as zip_file:
             # only consider file not directory
             corpus_files = [
                 f for f in zip_file.infolist() if not f.filename.endswith('/')
             ]
             for trial_group_num in range(num_trials):
-                logs.info('Preparing target fuzzing: %s, trial_group: %d',
+                logs.info('Preparing random corpus: %s, trial_group: %d',
                           benchmark, trial_group_num)
 
                 trial_group_subdir = 'trial-group-%d' % trial_group_num
-                target_fuzzing_trial_dir = os.path.join(
-                    target_fuzzing_benchmark, trial_group_subdir)
+                custom_corpus_trial_dir = os.path.join(benchmark_random_corpora,
+                                                       trial_group_subdir)
                 src_dir = os.path.join(tmp_dir, "source")
-                dest_dir = os.path.join(tmp_dir, "dest")
                 filesystem.recreate_directory(src_dir)
-                filesystem.recreate_directory(dest_dir)
 
-                source_files = random.sample(corpus_files, MAX_CORPUS_FILES)
+                source_files = random.sample(corpus_files,
+                                             MAX_RANDOM_CORPUS_FILES)
                 for file in source_files:
                     zip_file.extract(file, src_dir)
 
-                dest_files = random.sample(corpus_files, MAX_CORPUS_FILES)
-                for file in dest_files:
-                    zip_file.extract(file, dest_dir)
-
-                src_branches = get_covered_branches(coverage_binary, src_dir)
-                dest_branches = get_covered_branches(coverage_binary, dest_dir)
-                target_branches = dest_branches - src_branches
-
-                if not target_branches:
-                    raise RuntimeError(
-                        'Unable to find target branches for %s.' % benchmark)
-
-                for branch in target_branches:
-                    target_cov = models.TargetCoverage()
-                    target_cov.trial_group_num = int(trial_group_num)
-                    target_cov.benchmark = benchmark
-                    target_cov.target_location = branch
-                    target_coverage.append(target_cov)
+                if target_fuzzing:
+                    dest_dir = os.path.join(tmp_dir, "dest")
+                    filesystem.recreate_directory(dest_dir)
+
+                    dest_files = random.sample(corpus_files,
+                                               MAX_RANDOM_CORPUS_FILES)
+                    for file in dest_files:
+                        zip_file.extract(file, dest_dir)
+                    
+                    # extract covered branches of source and destination inputs
+                    # then subtract to get targeting branches
+                    src_branches = get_covered_branches(coverage_binary,
+                                                        src_dir)
+                    dest_branches = get_covered_branches(
+                        coverage_binary, dest_dir)
+                    target_branches = dest_branches - src_branches
+
+                    if not target_branches:
+                        raise RuntimeError(
+                            'Unable to find target branches for %s.' %
+                            benchmark)
+
+                    for branch in target_branches:
+                        target_cov = models.TargetCoverage()
+                        target_cov.trial_group_num = int(trial_group_num)
+                        target_cov.benchmark = benchmark
+                        target_cov.target_location = branch
+                        target_coverage.append(target_cov)
 
                 # copy only the src directory
-                filesystem.copytree(src_dir, target_fuzzing_trial_dir)
+                filesystem.copytree(src_dir, custom_corpus_trial_dir)
 
     return target_coverage
diff --git a/experiment/dispatcher.py b/experiment/dispatcher.py
index d801d5d25..71aae29b0 100755
--- a/experiment/dispatcher.py
+++ b/experiment/dispatcher.py
@@ -24,7 +24,7 @@
 import time
 from typing import List
 
-from common import target_fuzzing_utils
+from common import random_corpus_fuzzing_utils
 from common import experiment_path as exp_path
 from common import experiment_utils
 from common import logs
@@ -161,9 +161,10 @@ def dispatcher_main():
                                      experiment.config['concurrent_builds'])
     _initialize_trials_in_db(trials)
 
-    if experiment.config['target_fuzzing']:
-        target_fuzzing_utils.main_loop(experiment.benchmarks,
-                                       experiment.num_trials)
+    if experiment.config['random_corpus'] or experiment.config['target_fuzzing']:
+        random_corpus_fuzzing_utils.initialize_random_corpus_fuzzing(
+            experiment.benchmarks, experiment.num_trials,
+            experiment.config['target_fuzzing'])
 
     create_work_subdirs(['experiment-folders', 'measurement-folders'])
 
diff --git a/experiment/measurer/measure_manager.py b/experiment/measurer/measure_manager.py
index 05338321f..5779e376e 100644
--- a/experiment/measurer/measure_manager.py
+++ b/experiment/measurer/measure_manager.py
@@ -32,7 +32,7 @@
 from sqlalchemy import func
 from sqlalchemy import orm
 
-from common import target_fuzzing_utils
+from common import random_corpus_fuzzing_utils
 from common import benchmark_config
 from common import experiment_utils
 from common import experiment_path as exp_path
@@ -453,7 +453,7 @@ def get_current_target_coverage(self) -> int:
             total_target_covered = 0
             coverage_info = coverage_utils.get_coverage_infomation(
                 self.cov_summary_file)
-            covered_branches = target_fuzzing_utils.get_covered_branches_per_function(
+            covered_branches = random_corpus_fuzzing_utils.get_covered_branches_per_function(
                 coverage_info)
             # measure target coverage
             with db_utils.session_scope() as session:
diff --git a/experiment/resources/runner-startup-script-template.sh b/experiment/resources/runner-startup-script-template.sh
index 653d939ca..aed81d188 100644
--- a/experiment/resources/runner-startup-script-template.sh
+++ b/experiment/resources/runner-startup-script-template.sh
@@ -45,6 +45,7 @@ docker run \
 -e TRIAL_GROUP_NUM={{trial_group_num}} \
 -e MAX_TOTAL_TIME={{max_total_time}} \
 -e NO_SEEDS={{no_seeds}} \
+-e RANDOM_CORPUS={{random_corpus}} \
 -e TARGET_FUZZING={{target_fuzzing}} \
 -e NO_DICTIONARIES={{no_dictionaries}} \
 -e OSS_FUZZ_CORPUS={{oss_fuzz_corpus}} \
diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py
index ee43871ed..9889476ff 100644
--- a/experiment/run_experiment.py
+++ b/experiment/run_experiment.py
@@ -262,6 +262,7 @@ def start_experiment(  # pylint: disable=too-many-arguments
         measurers_cpus=None,
         runners_cpus=None,
         custom_seed_corpus_dir=None,
+        random_corpus=None,
         target_fuzzing=False):
     """Start a fuzzer benchmarking experiment."""
     if not allow_uncommitted_changes:
@@ -296,6 +297,7 @@ def start_experiment(  # pylint: disable=too-many-arguments
     if config['custom_seed_corpus_dir']:
         validate_and_pack_custom_seed_corpus(config['custom_seed_corpus_dir'],
                                              benchmarks)
+    config['random_corpus'] = random_corpus
     config['target_fuzzing'] = target_fuzzing
 
     return start_experiment_from_full_config(config)
@@ -613,6 +615,12 @@ def main():
                         required=False,
                         default=False,
                         action='store_true')
+    parser.add_argument('-rs',
+                        '--random-corpus',
+                        help='Randomly pick seed corpus.',
+                        required=False,
+                        default=False,
+                        action='store_true')
     parser.add_argument('-tf',
                         '--target-fuzzing',
                         help='Target fuzzing mode.',
@@ -664,7 +672,10 @@ def main():
                          '"oss_fuzz_corpus" at the same time')
 
     if args.target_fuzzing and not args.custom_seed_corpus_dir:
-        parser.error('Target fuzzing can only be used with custom seed corpus')
+        parser.error('Target fuzzing can only be run with custom seed corpus')
+
+    if args.random_corpus and not args.custom_seed_corpus_dir:
+        parser.error('Random corpus experiment can only be run with custom seed corpus')
 
     start_experiment(args.experiment_name,
                      args.experiment_config,
@@ -679,6 +690,7 @@ def main():
                      measurers_cpus=measurers_cpus,
                      runners_cpus=runners_cpus,
                      custom_seed_corpus_dir=args.custom_seed_corpus_dir,
+                     random_corpus=args.random_corpus,
                      target_fuzzing=args.target_fuzzing)
     return 0
 
diff --git a/experiment/runner.py b/experiment/runner.py
index 9dd45cb54..9f526e1a1 100644
--- a/experiment/runner.py
+++ b/experiment/runner.py
@@ -115,18 +115,18 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path):
     return seed_corpus_path if os.path.exists(seed_corpus_path) else None
 
 
-def _unpack_target_fuzzing_corpus(corpus_directory):
+def _unpack_random_corpus(corpus_directory):
     # remove initial seed corpus
     shutil.rmtree(corpus_directory)
 
     benchmark = environment.get('BENCHMARK')
     trial_group_num = environment.get('TRIAL_GROUP_NUM')
-    target_fuzzing_corpora_dir = experiment_utils.get_target_fuzzing_corpora_filestore_path(
+    random_corpora_dir = experiment_utils.get_random_corpora_filestore_path(
     )
-    target_fuzzing_sub_dir = 'trial-group-%s' % int(trial_group_num)
-    target_fuzzing_dir = posixpath.join(target_fuzzing_corpora_dir, benchmark,
-                                        target_fuzzing_sub_dir)
-    shutil.copytree(target_fuzzing_dir, corpus_directory)
+    random_corpora_sub_dir = 'trial-group-%s' % int(trial_group_num)
+    random_corpus_dir = posixpath.join(random_corpora_dir, benchmark,
+                                        random_corpora_sub_dir)
+    shutil.copytree(random_corpus_dir, corpus_directory)
 
 
 def _unpack_custom_seed_corpus(corpus_directory):
@@ -214,8 +214,8 @@ def run_fuzzer(max_total_time, log_filename):
         return
 
     if environment.get('CUSTOM_SEED_CORPUS_DIR'):
-        if environment.get('TARGET_FUZZING'):
-            _unpack_target_fuzzing_corpus(input_corpus)
+        if environment.get('RANDOM_CORPUS') or environment.get('TARGET_FUZZING'):
+            _unpack_random_corpus(input_corpus)
         else:
             _unpack_custom_seed_corpus(input_corpus)
     else:
diff --git a/experiment/scheduler.py b/experiment/scheduler.py
index a7a732d36..e04e368cb 100644
--- a/experiment/scheduler.py
+++ b/experiment/scheduler.py
@@ -717,6 +717,7 @@ def render_startup_script_template(instance_name: str, fuzzer: str,
         'docker_registry': experiment_config['docker_registry'],
         'local_experiment': local_experiment,
         'no_seeds': experiment_config['no_seeds'],
+        'random_corpus': experiment_config['random_corpus'],
         'target_fuzzing': experiment_config['target_fuzzing'],
         'no_dictionaries': experiment_config['no_dictionaries'],
         'oss_fuzz_corpus': experiment_config['oss_fuzz_corpus'],

From f4e531426d516a35ec423d3286e0ac04becbf344 Mon Sep 17 00:00:00 2001
From: Jiradet Ounjai <jiradet.jd@gmail.com>
Date: Tue, 10 May 2022 20:29:14 +0700
Subject: [PATCH 3/3] format code

---
 common/experiment_utils.py            | 3 +--
 common/random_corpus_fuzzing_utils.py | 3 ++-
 experiment/run_experiment.py          | 3 ++-
 experiment/runner.py                  | 8 ++++----
 4 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/common/experiment_utils.py b/common/experiment_utils.py
index 58a7a1c3d..97adde82f 100644
--- a/common/experiment_utils.py
+++ b/common/experiment_utils.py
@@ -80,8 +80,7 @@ def get_custom_seed_corpora_filestore_path():
 
 def get_random_corpora_filestore_path():
     """Returns path containing seed corpora for the target fuzzing experiment."""
-    return posixpath.join(get_experiment_filestore_path(),
-                          'random_corpora')
+    return posixpath.join(get_experiment_filestore_path(), 'random_corpora')
 
 
 def get_dispatcher_instance_name(experiment: str) -> str:
diff --git a/common/random_corpus_fuzzing_utils.py b/common/random_corpus_fuzzing_utils.py
index e219997ff..444e0f323 100644
--- a/common/random_corpus_fuzzing_utils.py
+++ b/common/random_corpus_fuzzing_utils.py
@@ -20,6 +20,7 @@
 
 MAX_RANDOM_CORPUS_FILES = 5
 
+
 def get_covered_branches_per_function(coverage_info):
     function_coverage_info = coverage_info["data"][0]["functions"]
     covered_branches = set([])
@@ -138,7 +139,7 @@ def prepare_benchmark_random_corpus(benchmark: str,
                                                MAX_RANDOM_CORPUS_FILES)
                     for file in dest_files:
                         zip_file.extract(file, dest_dir)
-                    
+
                     # extract covered branches of source and destination inputs
                     # then subtract to get targeting branches
                     src_branches = get_covered_branches(coverage_binary,
diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py
index 9889476ff..19927fe53 100644
--- a/experiment/run_experiment.py
+++ b/experiment/run_experiment.py
@@ -675,7 +675,8 @@ def main():
         parser.error('Target fuzzing can only be run with custom seed corpus')
 
     if args.random_corpus and not args.custom_seed_corpus_dir:
-        parser.error('Random corpus experiment can only be run with custom seed corpus')
+        parser.error(
+            'Random corpus option can only be run with custom seed corpus')
 
     start_experiment(args.experiment_name,
                      args.experiment_config,
diff --git a/experiment/runner.py b/experiment/runner.py
index 9f526e1a1..ba99787ea 100644
--- a/experiment/runner.py
+++ b/experiment/runner.py
@@ -121,11 +121,10 @@ def _unpack_random_corpus(corpus_directory):
 
     benchmark = environment.get('BENCHMARK')
     trial_group_num = environment.get('TRIAL_GROUP_NUM')
-    random_corpora_dir = experiment_utils.get_random_corpora_filestore_path(
-    )
+    random_corpora_dir = experiment_utils.get_random_corpora_filestore_path()
     random_corpora_sub_dir = 'trial-group-%s' % int(trial_group_num)
     random_corpus_dir = posixpath.join(random_corpora_dir, benchmark,
-                                        random_corpora_sub_dir)
+                                       random_corpora_sub_dir)
     shutil.copytree(random_corpus_dir, corpus_directory)
 
 
@@ -214,7 +213,8 @@ def run_fuzzer(max_total_time, log_filename):
         return
 
     if environment.get('CUSTOM_SEED_CORPUS_DIR'):
-        if environment.get('RANDOM_CORPUS') or environment.get('TARGET_FUZZING'):
+        if environment.get('RANDOM_CORPUS') or environment.get(
+                'TARGET_FUZZING'):
             _unpack_random_corpus(input_corpus)
         else:
             _unpack_custom_seed_corpus(input_corpus)