From 451e4212dfb233bde8a8e6e005b1c4a8d7991230 Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Sat, 27 Jul 2024 09:45:17 +0100
Subject: [PATCH 1/4] data generate: fix typo in `--pipeline` docs

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 src/instructlab/data/generate.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/instructlab/data/generate.py b/src/instructlab/data/generate.py
index 5075c9b037..fbc01957f5 100644
--- a/src/instructlab/data/generate.py
+++ b/src/instructlab/data/generate.py
@@ -142,7 +142,7 @@
     # Hidden until instructlab-sdg releases a version with multiple pipelines
     # For now only "simple" is supported in the latest release.
     hidden=True,
-    help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline worlfow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.",
+    help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline workflow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.",
 )
 @click.option(
     "--enable-serving-output",

From f60105bce02c6af97355467254d8583338a0effa Mon Sep 17 00:00:00 2001
From: Gabe Goodhart <ghart@us.ibm.com>
Date: Thu, 18 Jul 2024 16:12:20 -0600
Subject: [PATCH 2/4] feat: enable SDG batching, except with local llama-cpp
 serving backend

Relates to instructlab/sdg#135

Since instructlab-sdg-0.1.3, data generation in batches is supported and
controlled by an parameter to the `generate_data()` function.

This is not supported with llama-cpp, and so we disable it in that case.

Co-authored-by: Mark McLoughlin <markmc@redhat.com>
Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 .spellcheck-en-custom.txt        |  1 +
 CHANGELOG.md                     |  3 +++
 src/instructlab/data/generate.py | 23 +++++++++++++++++++++++
 3 files changed, 27 insertions(+)

diff --git a/.spellcheck-en-custom.txt b/.spellcheck-en-custom.txt
index 0b4d20be67..0121a3c982 100644
--- a/.spellcheck-en-custom.txt
+++ b/.spellcheck-en-custom.txt
@@ -131,6 +131,7 @@ nb
 oneMKL
 orchestrator
 ots
+parallelized
 png
 pre
 preceeds
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7320e81814..65a47d10e5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -2,6 +2,9 @@
 
 ### Features
 
+* `ilab data generate` now supports parallelized data generation across batches of the seed
+   data when running with a the vLLM serving. The `--batch-size` argument can be used to
+   control this behavior.
 * `ilab model download` now supports downloading models from OCI registries. Repositories
    that are prefixed by "docker://" and specified against `--repository` are treated as OCI
    registries.
diff --git a/src/instructlab/data/generate.py b/src/instructlab/data/generate.py
index fbc01957f5..90e30b9324 100644
--- a/src/instructlab/data/generate.py
+++ b/src/instructlab/data/generate.py
@@ -144,6 +144,12 @@
     hidden=True,
     help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline workflow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.",
 )
+@click.option(
+    "--batch-size",
+    type=click.IntRange(min=0),
+    default=None,
+    help="Number of elements to process in each batch through the SDG pipeline. Enabled by default for the vLLM serving backend, with a batch size of 8 chosen based on experiments to optimize for throughput. Use 0 to disable.",
+)
 @click.option(
     "--enable-serving-output",
     is_flag=True,
@@ -174,6 +180,7 @@ def generate(
     model_family,
     pipeline,
     enable_serving_output,
+    batch_size,
 ):
     """Generates synthetic data to enhance your example data"""
     # pylint: disable=import-outside-toplevel
@@ -192,12 +199,19 @@ def generate(
     if ctx.obj is not None:
         prompt_file_path = ctx.obj.config.generate.prompt_file
 
+    # If batch size is not set explicitly, default to 8
+    # Once https://github.com/instructlab/sdg/issues/224 is resolved we can
+    # pass batch_size=None to the library instead
+    if batch_size is None:
+        batch_size = 8
+
     backend_instance = None
     if endpoint_url:
         api_base = endpoint_url
     else:
         # First Party
         from instructlab.model.backends import backends
+        from instructlab.model.backends.llama_cpp import Server as llama_cpp_server
 
         ctx.obj.config.serve.llama_cpp.llm_family = model_family
         backend_instance = backends.select_backend(ctx.obj.config.generate.teacher)
@@ -210,6 +224,14 @@ def generate(
         except Exception as exc:
             click.secho(f"Failed to start server: {exc}", fg="red")
             raise click.exceptions.Exit(1)
+
+        # disable batching when running with the local llama.cpp server
+        if isinstance(backend_instance, llama_cpp_server):
+            if batch_size is not None:
+                logger.warning(
+                    "Disabling SDG batching - unsupported with llama.cpp serving"
+                )
+            batch_size = 0
     try:
         click.echo(
             f"Generating synthetic data using '{model}' model, taxonomy:'{taxonomy_path}' against {api_base} server"
@@ -236,6 +258,7 @@ def generate(
             tls_client_key=tls_client_key,
             tls_client_passwd=tls_client_passwd,
             pipeline=pipeline,
+            batch_size=batch_size,
         )
     except GenerateException as exc:
         click.secho(

From 907930b845f18d723685e3a87954bc89a76321ca Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Sat, 27 Jul 2024 10:01:50 +0100
Subject: [PATCH 3/4] data generate: expose the --pipeline option

Now that we require a new enough version of instructlab-sdg, we
can expose this.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 src/instructlab/data/generate.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/instructlab/data/generate.py b/src/instructlab/data/generate.py
index 90e30b9324..678ad11fcd 100644
--- a/src/instructlab/data/generate.py
+++ b/src/instructlab/data/generate.py
@@ -139,9 +139,6 @@
     "--pipeline",
     type=click.STRING,
     default="simple",
-    # Hidden until instructlab-sdg releases a version with multiple pipelines
-    # For now only "simple" is supported in the latest release.
-    hidden=True,
     help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline workflow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.",
 )
 @click.option(

From 3c03122425c7f57832273d72d94fbed1982db38d Mon Sep 17 00:00:00 2001
From: Mark McLoughlin <markmc@redhat.com>
Date: Sat, 27 Jul 2024 13:16:32 +0100
Subject: [PATCH 4/4] data generate: disable batching with remote llama-cpp in
 CI

Also add a troubleshooting note to reference #1892 which
tracks a todo item to add some way to automatically disable
in this case.

Signed-off-by: Mark McLoughlin <markmc@redhat.com>
---
 TROUBLESHOOTING.md              | 7 +++++++
 scripts/basic-workflow-tests.sh | 6 ++++++
 2 files changed, 13 insertions(+)

diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md
index 44a2c38c57..b662a75f78 100644
--- a/TROUBLESHOOTING.md
+++ b/TROUBLESHOOTING.md
@@ -4,6 +4,13 @@ This document is for commonly found problems and their solutions when using `ila
 
 ## `ilab` troubleshooting
 
+### `ilab data generate --endpoint-url` with llama-cpp fails with `openai.InternalServerError: Service Unavailable`
+
+llama-cpp does not support batching, which is enabled by default with remote
+endpoints. To resolve this error, disable batching using `--batch-size=0`.
+
+See [this issue](https://github.com/instructlab/instructlab/issues/1892).
+
 ### `ilab data generate` command running slow on macOS
 
 If you notice `ilab data generate` running for several hours or more on a Mac M-series, you should first check out the available memory on your system (See [Activity Monitor](https://support.apple.com/en-ie/guide/activity-monitor/welcome/mac) for more details). If there is < 8GM RAM available before serving a model, then check to see if you can free up some memory.
diff --git a/scripts/basic-workflow-tests.sh b/scripts/basic-workflow-tests.sh
index c8ce6e278b..81f77dce62 100755
--- a/scripts/basic-workflow-tests.sh
+++ b/scripts/basic-workflow-tests.sh
@@ -259,6 +259,12 @@ test_generate() {
     if [ "$SDG_PIPELINE" = "full" ]; then
         GENERATE_ARGS+=("--pipeline" "full")
     fi
+
+    # Disable batching with llama-cpp. See https://github.com/instructlab/instructlab/issues/1892
+    if [ "$BACKEND" = "llama-cpp" ]; then
+        GENERATE_ARGS+=("--batch-size" "0")
+    fi
+
     ilab data generate --num-instructions ${NUM_INSTRUCTIONS} "${GENERATE_ARGS[@]}"
 }