From 451e4212dfb233bde8a8e6e005b1c4a8d7991230 Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Sat, 27 Jul 2024 09:45:17 +0100 Subject: [PATCH 1/4] data generate: fix typo in `--pipeline` docs Signed-off-by: Mark McLoughlin --- src/instructlab/data/generate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/instructlab/data/generate.py b/src/instructlab/data/generate.py index 5075c9b037..fbc01957f5 100644 --- a/src/instructlab/data/generate.py +++ b/src/instructlab/data/generate.py @@ -142,7 +142,7 @@ # Hidden until instructlab-sdg releases a version with multiple pipelines # For now only "simple" is supported in the latest release. hidden=True, - help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline worlfow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.", + help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline workflow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.", ) @click.option( "--enable-serving-output", From f60105bce02c6af97355467254d8583338a0effa Mon Sep 17 00:00:00 2001 From: Gabe Goodhart Date: Thu, 18 Jul 2024 16:12:20 -0600 Subject: [PATCH 2/4] feat: enable SDG batching, except with local llama-cpp serving backend Relates to instructlab/sdg#135 Since instructlab-sdg-0.1.3, data generation in batches is supported and controlled by an parameter to the `generate_data()` function. This is not supported with llama-cpp, and so we disable it in that case. Co-authored-by: Mark McLoughlin Signed-off-by: Gabe Goodhart Signed-off-by: Mark McLoughlin --- .spellcheck-en-custom.txt | 1 + CHANGELOG.md | 3 +++ src/instructlab/data/generate.py | 23 +++++++++++++++++++++++ 3 files changed, 27 insertions(+) diff --git a/.spellcheck-en-custom.txt b/.spellcheck-en-custom.txt index 0b4d20be67..0121a3c982 100644 --- a/.spellcheck-en-custom.txt +++ b/.spellcheck-en-custom.txt @@ -131,6 +131,7 @@ nb oneMKL orchestrator ots +parallelized png pre preceeds diff --git a/CHANGELOG.md b/CHANGELOG.md index 7320e81814..65a47d10e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ### Features +* `ilab data generate` now supports parallelized data generation across batches of the seed + data when running with a the vLLM serving. The `--batch-size` argument can be used to + control this behavior. * `ilab model download` now supports downloading models from OCI registries. Repositories that are prefixed by "docker://" and specified against `--repository` are treated as OCI registries. diff --git a/src/instructlab/data/generate.py b/src/instructlab/data/generate.py index fbc01957f5..90e30b9324 100644 --- a/src/instructlab/data/generate.py +++ b/src/instructlab/data/generate.py @@ -144,6 +144,12 @@ hidden=True, help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline workflow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.", ) +@click.option( + "--batch-size", + type=click.IntRange(min=0), + default=None, + help="Number of elements to process in each batch through the SDG pipeline. Enabled by default for the vLLM serving backend, with a batch size of 8 chosen based on experiments to optimize for throughput. Use 0 to disable.", +) @click.option( "--enable-serving-output", is_flag=True, @@ -174,6 +180,7 @@ def generate( model_family, pipeline, enable_serving_output, + batch_size, ): """Generates synthetic data to enhance your example data""" # pylint: disable=import-outside-toplevel @@ -192,12 +199,19 @@ def generate( if ctx.obj is not None: prompt_file_path = ctx.obj.config.generate.prompt_file + # If batch size is not set explicitly, default to 8 + # Once https://github.com/instructlab/sdg/issues/224 is resolved we can + # pass batch_size=None to the library instead + if batch_size is None: + batch_size = 8 + backend_instance = None if endpoint_url: api_base = endpoint_url else: # First Party from instructlab.model.backends import backends + from instructlab.model.backends.llama_cpp import Server as llama_cpp_server ctx.obj.config.serve.llama_cpp.llm_family = model_family backend_instance = backends.select_backend(ctx.obj.config.generate.teacher) @@ -210,6 +224,14 @@ def generate( except Exception as exc: click.secho(f"Failed to start server: {exc}", fg="red") raise click.exceptions.Exit(1) + + # disable batching when running with the local llama.cpp server + if isinstance(backend_instance, llama_cpp_server): + if batch_size is not None: + logger.warning( + "Disabling SDG batching - unsupported with llama.cpp serving" + ) + batch_size = 0 try: click.echo( f"Generating synthetic data using '{model}' model, taxonomy:'{taxonomy_path}' against {api_base} server" @@ -236,6 +258,7 @@ def generate( tls_client_key=tls_client_key, tls_client_passwd=tls_client_passwd, pipeline=pipeline, + batch_size=batch_size, ) except GenerateException as exc: click.secho( From 907930b845f18d723685e3a87954bc89a76321ca Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Sat, 27 Jul 2024 10:01:50 +0100 Subject: [PATCH 3/4] data generate: expose the --pipeline option Now that we require a new enough version of instructlab-sdg, we can expose this. Signed-off-by: Mark McLoughlin --- src/instructlab/data/generate.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/instructlab/data/generate.py b/src/instructlab/data/generate.py index 90e30b9324..678ad11fcd 100644 --- a/src/instructlab/data/generate.py +++ b/src/instructlab/data/generate.py @@ -139,9 +139,6 @@ "--pipeline", type=click.STRING, default="simple", - # Hidden until instructlab-sdg releases a version with multiple pipelines - # For now only "simple" is supported in the latest release. - hidden=True, help="Data generation pipeline to use. Available: simple, full, or a valid path to a directory of pipeline workflow YAML files. Note that 'full' requires a larger teacher model, Mixtral-8x7b.", ) @click.option( From 3c03122425c7f57832273d72d94fbed1982db38d Mon Sep 17 00:00:00 2001 From: Mark McLoughlin Date: Sat, 27 Jul 2024 13:16:32 +0100 Subject: [PATCH 4/4] data generate: disable batching with remote llama-cpp in CI Also add a troubleshooting note to reference #1892 which tracks a todo item to add some way to automatically disable in this case. Signed-off-by: Mark McLoughlin --- TROUBLESHOOTING.md | 7 +++++++ scripts/basic-workflow-tests.sh | 6 ++++++ 2 files changed, 13 insertions(+) diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md index 44a2c38c57..b662a75f78 100644 --- a/TROUBLESHOOTING.md +++ b/TROUBLESHOOTING.md @@ -4,6 +4,13 @@ This document is for commonly found problems and their solutions when using `ila ## `ilab` troubleshooting +### `ilab data generate --endpoint-url` with llama-cpp fails with `openai.InternalServerError: Service Unavailable` + +llama-cpp does not support batching, which is enabled by default with remote +endpoints. To resolve this error, disable batching using `--batch-size=0`. + +See [this issue](https://github.com/instructlab/instructlab/issues/1892). + ### `ilab data generate` command running slow on macOS If you notice `ilab data generate` running for several hours or more on a Mac M-series, you should first check out the available memory on your system (See [Activity Monitor](https://support.apple.com/en-ie/guide/activity-monitor/welcome/mac) for more details). If there is < 8GM RAM available before serving a model, then check to see if you can free up some memory. diff --git a/scripts/basic-workflow-tests.sh b/scripts/basic-workflow-tests.sh index c8ce6e278b..81f77dce62 100755 --- a/scripts/basic-workflow-tests.sh +++ b/scripts/basic-workflow-tests.sh @@ -259,6 +259,12 @@ test_generate() { if [ "$SDG_PIPELINE" = "full" ]; then GENERATE_ARGS+=("--pipeline" "full") fi + + # Disable batching with llama-cpp. See https://github.com/instructlab/instructlab/issues/1892 + if [ "$BACKEND" = "llama-cpp" ]; then + GENERATE_ARGS+=("--batch-size" "0") + fi + ilab data generate --num-instructions ${NUM_INSTRUCTIONS} "${GENERATE_ARGS[@]}" }