Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -103,8 +103,8 @@ config = ClassificationDatasetConfig(
```python
# Create LLM providers
providers = [
OpenAIProvider(model_id="gpt-4.1-mini-2025-04-14"),
AnthropicProvider(model_id="claude-3-5-haiku-latest"),
OpenAIProvider(model_id="gpt-5-mini-2025-08-07"),
AnthropicProvider(model_id="claude-haiku-4-5-20251001"),
GeminiProvider(model_id="gemini-2.0-flash")
]
```
Expand Down
4 changes: 2 additions & 2 deletions datafast/examples/classification_trail_conditions_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,8 +58,8 @@

# Set up providers
providers = [
OpenAIProvider(model_id="gpt-4.1-mini-2025-04-14"),
AnthropicProvider(model_id="claude-3-5-haiku-latest")
OpenAIProvider(model_id="gpt-5-mini-2025-08-07"),
AnthropicProvider(model_id="claude-haiku-4-5-20251001")
]

# Generate dataset
Expand Down
2 changes: 1 addition & 1 deletion datafast/examples/generic_pipeline_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def main():
model_id="gpt-5-mini-2025-08-07",
temperature=1
),
# AnthropicProvider(model_id="claude-3-5-haiku-latest"),
# AnthropicProvider(model_id="claude-haiku-4-5-20251001"),
# GeminiProvider(model_id="gemini-2.5-flash-lite", rpm_limit=15),
# OllamaProvider(model_id="gemma3:4b"),
]
Expand Down
2 changes: 1 addition & 1 deletion datafast/examples/generic_pipeline_row_model_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
question="Qu'est ce qui t'a plu?",

# System fields
model_id="gpt-4",
model_id="gpt-5-mini-2025-08-07",
language="fr"
)

Expand Down
4 changes: 2 additions & 2 deletions datafast/examples/inspect_dataset_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,9 @@
from datafast.llms import OpenAIProvider, AnthropicProvider, GeminiProvider

providers = [
OpenAIProvider(model_id="gpt-4.1-nano"),
OpenAIProvider(model_id="gpt-5-mini-2025-08-07"),
# Uncomment to use additional providers
# AnthropicProvider(model_id="claude-3-5-haiku-latest"),
# AnthropicProvider(model_id="claude-haiku-4-5-20251001"),
# GeminiProvider(model_id="gemini-2.0-flash"),
]

Expand Down
4 changes: 2 additions & 2 deletions datafast/examples/mcq_contextual_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,9 @@ def main():
output_file="mcq_ar6_contextual_dataset.jsonl",
)

# 3. Initialize OpenAI provider with gpt-4.1-mini
# 3. Initialize OpenAI provider with gpt-5-mini-2025-08-07
providers = [
OpenAIProvider(model_id="gpt-4.1-mini"),
OpenAIProvider(model_id="gpt-5-mini-2025-08-07"),
]

# 4. Generate the dataset
Expand Down
4 changes: 2 additions & 2 deletions datafast/examples/mcq_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,8 @@ def main():

# 2. Initialize LLM providers
providers = [
OpenAIProvider(model_id="gpt-4.1-mini-2025-04-14"),
# AnthropicProvider(model_id="claude-3-5-haiku-latest"),
OpenAIProvider(model_id="gpt-5-mini-2025-08-07"),
# AnthropicProvider(model_id="claude-haiku-4-5-20251001"),
# GeminiProvider(model_id="gemini-2.0-flash"),
]

Expand Down
4 changes: 2 additions & 2 deletions datafast/examples/preference_dataset_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,10 +44,10 @@ def main():
)

# 2. Initialize LLM providers
question_gen_llm = OpenAIProvider(model_id="gpt-4.1-mini")
question_gen_llm = OpenAIProvider(model_id="gpt-5-mini-2025-08-07")
chosen_response_gen_llm = AnthropicProvider(model_id="claude-3-7-sonnet-latest")
rejected_response_gen_llm = GeminiProvider(model_id="gemini-2.0-flash")
judge_llm = OpenAIProvider(model_id="gpt-4.1")
judge_llm = OpenAIProvider(model_id="gpt-5-mini-2025-08-07")

# 3. Generate the dataset
dataset = PreferenceDataset(config)
Expand Down
2 changes: 1 addition & 1 deletion datafast/examples/quickstart_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@

providers = [
OpenAIProvider(model_id="gpt-5-nano-2025-08-07"),
# AnthropicProvider(model_id="claude-3-5-haiku-latest"),
# AnthropicProvider(model_id="claude-haiku-4-5-20251001"),
# GeminiProvider(model_id="gemini-2.0-flash"),
# OllamaProvider(model_id="gemma3:12b")
]
Expand Down
4 changes: 2 additions & 2 deletions datafast/examples/raw_text_space_engineering_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def main():

# 2. Create LLM providers with specific models
providers = [
OpenAIProvider(model_id="gpt-4.1-mini-2025-04-14"), # You may want to use stronger models
AnthropicProvider(model_id="claude-3-5-haiku-latest"),
OpenAIProvider(model_id="gpt-5-mini-2025-08-07"), # You may want to use stronger models
AnthropicProvider(model_id="claude-haiku-4-5-20251001"),
]

# 3. Generate the dataset
Expand Down
4 changes: 2 additions & 2 deletions datafast/examples/show_dataset_examples.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@
classification_row2 = TextClassificationRow(
text="The trail is well maintained and easy to follow.",
label="positive_conditions",
model_id="claude-3-5-haiku-latest",
model_id="claude-haiku-4-5-20251001",
language="en",
)
classification_dataset.data_rows = [classification_row, classification_row2]
Expand Down Expand Up @@ -85,7 +85,7 @@
question="What was the main goal of the Mars 2020 mission?",
chosen_response="To search for signs of ancient life and collect samples.",
rejected_response="To launch a satellite.",
chosen_model_id="claude-3-5-haiku-latest",
chosen_model_id="claude-haiku-4-5-20251001",
rejected_model_id="gpt-4.1-nano",
chosen_response_score=9,
rejected_response_score=3,
Expand Down
2 changes: 1 addition & 1 deletion datafast/examples/ultrachat_materials_science.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ def main():

# 2. Initialize LLM providers - using just one for simplicity
providers = [
AnthropicProvider(model_id="claude-3-5-haiku-latest"),
AnthropicProvider(model_id="claude-haiku-4-5-20251001"),
]

# 3. Get expected number of rows
Expand Down
12 changes: 6 additions & 6 deletions datafast/llms.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ def env_key_name(self) -> str:

def __init__(
self,
model_id: str = "gpt-4.1-mini-2025-04-14",
model_id: str = "gpt-5-mini-2025-08-07",
api_key: str | None = None,
temperature: float | None = None,
max_completion_tokens: int | None = None,
Expand All @@ -252,7 +252,7 @@ def __init__(
"""Initialize the OpenAI provider.

Args:
model_id: The model ID (defaults to gpt-4.1-mini-2025-04-14)
model_id: The model ID (defaults to gpt-5-mini-2025-08-07)
api_key: API key (if None, will get from environment)
temperature: The sampling temperature to be used, between 0 and 2. Higher values like 0.8 produce more random outputs, while lower values like 0.2 make outputs more focused and deterministic
max_completion_tokens: An upper bound for the number of tokens that can be generated for a completion, including visible output tokens and reasoning tokens.
Expand Down Expand Up @@ -282,7 +282,7 @@ def env_key_name(self) -> str:

def __init__(
self,
model_id: str = "claude-3-5-haiku-latest",
model_id: str = "claude-haiku-4-5-20251001",
api_key: str | None = None,
temperature: float | None = None,
max_completion_tokens: int | None = None,
Expand All @@ -292,7 +292,7 @@ def __init__(
"""Initialize the Anthropic provider.

Args:
model_id: The model ID (defaults to claude-3-5-haiku-latest)
model_id: The model ID (defaults to claude-haiku-4-5-20251001)
api_key: API key (if None, will get from environment)
temperature: Temperature for generation (0.0 to 1.0)
max_completion_tokens: Maximum tokens to generate
Expand Down Expand Up @@ -419,7 +419,7 @@ def env_key_name(self) -> str:

def __init__(
self,
model_id: str = "openai/gpt-4.1-mini", # for default model
model_id: str = "openai/gpt-5-mini", # for default model
api_key: str | None = None,
temperature: float | None = None,
max_completion_tokens: int | None = None,
Expand All @@ -429,7 +429,7 @@ def __init__(
"""Initialize the OpenRouter provider.

Args:
model_id: The model ID (defaults to openai/gpt-4.1-mini)
model_id: The model ID (defaults to openai/gpt-5-mini)
api_key: API key (if None, will get from environment)
temperature: Temperature for generation (0.0 to 1.0)
max_completion_tokens: Maximum tokens to generate
Expand Down
Loading
Loading