Skip to content

Commit 2dcbec7

Browse files
authored
Merge pull request #64 from markmc/api-tweaks
Remove the unnecessary SDG class
2 parents 2a91e7c + a9d93c4 commit 2dcbec7

File tree

6 files changed

+14
-46
lines changed

6 files changed

+14
-46
lines changed

scripts/test_freeform_skills.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from openai import OpenAI
77

88
# First Party
9-
from src.instructlab.sdg import SDG
109
from src.instructlab.sdg.pipeline import (
1110
FULL_PIPELINES_PACKAGE,
1211
Pipeline,
@@ -60,8 +59,7 @@
6059
with resources.path(FULL_PIPELINES_PACKAGE, "freeform_skills.yaml") as yaml_path:
6160
skills_pipe = Pipeline.from_file(ctx, yaml_path)
6261

63-
sdg = SDG([skills_pipe])
64-
gen_data = sdg.generate(ds)
62+
gen_data = skills_pipe.generate(ds)
6563

6664
print(gen_data)
6765
print(gen_data[0])

scripts/test_grounded_skills.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@
66
from openai import OpenAI
77

88
# First Party
9-
from src.instructlab.sdg import SDG
109
from src.instructlab.sdg.pipeline import (
1110
FULL_PIPELINES_PACKAGE,
1211
Pipeline,
@@ -108,8 +107,7 @@
108107
with resources.path(FULL_PIPELINES_PACKAGE, "grounded_skills.yaml") as yaml_path:
109108
skills_pipe = Pipeline.from_file(ctx, yaml_path)
110109

111-
sdg = SDG([skills_pipe])
112-
gen_data = sdg.generate(ds)
110+
gen_data = skills_pipe.generate(ds)
113111

114112
print(gen_data)
115113
print(gen_data[0])

scripts/test_knowledge.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
from openai import OpenAI
88

99
# First Party
10-
from src.instructlab.sdg import SDG
1110
from src.instructlab.sdg.pipeline import (
1211
FULL_PIPELINES_PACKAGE,
1312
Pipeline,
@@ -47,8 +46,7 @@
4746
with resources.path(FULL_PIPELINES_PACKAGE, "knowledge.yaml") as yaml_path:
4847
knowledge_pipe = Pipeline.from_file(ctx, yaml_path)
4948

50-
sdg = SDG([knowledge_pipe])
51-
mmlubench_data = sdg.generate(ds)
49+
mmlubench_data = knowledge_pipe.generate(ds)
5250

5351
print(mmlubench_data)
5452
print(mmlubench_data[0])

src/instructlab/sdg/__init__.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@
2121
"SamplePopulatorBlock",
2222
"SelectorBlock",
2323
"SetToMajorityValueBlock",
24-
"SDG",
2524
"SIMPLE_PIPELINES_PACKAGE",
2625
"FULL_PIPELINES_PACKAGE",
2726
"generate_data",
@@ -42,7 +41,6 @@
4241
PipelineConfigParserError,
4342
PipelineContext,
4443
)
45-
from .sdg import SDG
4644
from .utilblocks import (
4745
CombineColumnsBlock,
4846
DuplicateColumnsBlock,

src/instructlab/sdg/generate_data.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
Pipeline,
3131
PipelineContext,
3232
)
33-
from instructlab.sdg.sdg import SDG
3433
from instructlab.sdg.utils import GenerateException, models
3534
from instructlab.sdg.utils.taxonomy import (
3635
leaf_node_to_samples,
@@ -241,9 +240,9 @@ def load_pipeline(yaml_basename):
241240
return Pipeline.from_file(ctx, os.path.join(pipeline, yaml_basename))
242241

243242
return (
244-
SDG([load_pipeline("knowledge.yaml")]),
245-
SDG([load_pipeline("freeform_skills.yaml")]),
246-
SDG([load_pipeline("grounded_skills.yaml")]),
243+
load_pipeline("knowledge.yaml"),
244+
load_pipeline("freeform_skills.yaml"),
245+
load_pipeline("grounded_skills.yaml"),
247246
)
248247

249248

@@ -362,16 +361,15 @@ def generate_data(
362361
batch_num_workers=num_cpus,
363362
)
364363

365-
sdg_knowledge, sdg_freeform_skill, sdg_grounded_skill = _sdg_init(ctx, pipeline)
364+
knowledge_pipe, freeform_skills_pipe, grounded_skills_pipe = _sdg_init(
365+
ctx, pipeline
366+
)
366367

367368
# Make sure checkpointing is disabled (we don't want this pipeline to load checkpoints from the main pipeline)
368369
mmlu_ctx = dataclasses.replace(ctx, checkpoint_dir=None)
369370
mmlu_bench_pipe = mmlubench_pipe_init(mmlu_ctx)
370371

371-
# FIXME: remove SDG https://github.com/instructlab/sdg/pull/64
372-
mixer = _mixer_init(
373-
ctx, output_dir, date_suffix, sdg_knowledge.pipelines[0].auxiliary_inst
374-
)
372+
mixer = _mixer_init(ctx, output_dir, date_suffix, knowledge_pipe.auxiliary_inst)
375373

376374
if console_output:
377375
logger.info(
@@ -388,19 +386,19 @@ def generate_data(
388386
raise GenerateException("Error: No samples found in leaf node.")
389387

390388
if samples[0].get("document"):
391-
sdg = sdg_knowledge
389+
pipe = knowledge_pipe
392390
is_knowledge = True
393391

394392
elif samples[0].get("seed_context"):
395-
sdg = sdg_grounded_skill
393+
pipe = grounded_skills_pipe
396394

397395
else:
398-
sdg = sdg_freeform_skill
396+
pipe = freeform_skills_pipe
399397

400398
logger.debug("Samples: %s", samples)
401399
ds = Dataset.from_list(samples)
402400
logger.debug("Dataset: %s", ds)
403-
new_generated_data = sdg.generate(ds)
401+
new_generated_data = pipe.generate(ds)
404402
if len(new_generated_data) == 0:
405403
raise EmptyDatasetError(
406404
"Pipeline stopped: Empty dataset after running pipe"

src/instructlab/sdg/sdg.py

Lines changed: 0 additions & 22 deletions
This file was deleted.

0 commit comments

Comments
 (0)