diff --git a/py/src/braintrust/framework.py b/py/src/braintrust/framework.py index 5367dd40..0e5b8884 100644 --- a/py/src/braintrust/framework.py +++ b/py/src/braintrust/framework.py @@ -863,7 +863,7 @@ async def EvalAsync( summarized and compared to this experiment. :param base_experiment_id: An optional experiment id to use as a base. If specified, the new experiment will be summarized and compared to this experiment. This takes precedence over `base_experiment_name` if specified. - :param git_metadata_settings: Optional settings for collecting git metadata. By default, will collect all git metadata fields allowed in org-level settings. + :param git_metadata_settings: Optional settings for collecting git metadata. By default, will collect git metadata fields allowed in org-level settings, excluding diff content unless the org opts in. :param repo_info: Optionally explicitly specify the git metadata for this experiment. This takes precedence over `git_metadata_settings` if specified. :param error_score_handler: Optionally supply a custom function to specifically handle score values when tasks or scoring functions have errored. :param description: An optional description for the experiment. @@ -991,7 +991,7 @@ def Eval( summarized and compared to this experiment. :param base_experiment_id: An optional experiment id to use as a base. If specified, the new experiment will be summarized and compared to this experiment. This takes precedence over `base_experiment_name` if specified. - :param git_metadata_settings: Optional settings for collecting git metadata. By default, will collect all git metadata fields allowed in org-level settings. + :param git_metadata_settings: Optional settings for collecting git metadata. By default, will collect git metadata fields allowed in org-level settings, excluding diff content unless the org opts in. :param repo_info: Optionally explicitly specify the git metadata for this experiment. This takes precedence over `git_metadata_settings` if specified. :param error_score_handler: Optionally supply a custom function to specifically handle score values when tasks or scoring functions have errored. :param description: An optional description for the experiment. diff --git a/py/src/braintrust/git_fields.py b/py/src/braintrust/git_fields.py index 264102fa..06e54e06 100644 --- a/py/src/braintrust/git_fields.py +++ b/py/src/braintrust/git_fields.py @@ -43,3 +43,22 @@ def merge(cls, s1: "GitMetadataSettings", s2: "GitMetadataSettings") -> "GitMeta if not ret.fields: ret.collect = "none" return ret + + +DEFAULT_GIT_METADATA_FIELDS = [ + "commit", + "branch", + "tag", + "dirty", + "author_name", + "author_email", + "commit_message", + "commit_time", +] + + +def default_git_metadata_settings() -> GitMetadataSettings: + return GitMetadataSettings( + collect="some", + fields=list(DEFAULT_GIT_METADATA_FIELDS), + ) diff --git a/py/src/braintrust/gitutil.py b/py/src/braintrust/gitutil.py index 416dc59b..76b66405 100644 --- a/py/src/braintrust/gitutil.py +++ b/py/src/braintrust/gitutil.py @@ -5,7 +5,7 @@ import threading from functools import lru_cache as _cache -from .git_fields import GitMetadataSettings, RepoInfo +from .git_fields import GitMetadataSettings, RepoInfo, default_git_metadata_settings # https://stackoverflow.com/questions/48399498/git-executable-not-found-in-python @@ -123,7 +123,7 @@ def truncate_to_byte_limit(input_string, byte_limit=65536): def get_repo_info(settings: GitMetadataSettings | None = None): if settings is None: - settings = GitMetadataSettings() + settings = default_git_metadata_settings() if settings.collect == "none": return None diff --git a/py/src/braintrust/logger.py b/py/src/braintrust/logger.py index 522b31e1..e7d78d38 100644 --- a/py/src/braintrust/logger.py +++ b/py/src/braintrust/logger.py @@ -60,7 +60,7 @@ PromptOptions, SpanAttributes, ) -from .git_fields import GitMetadataSettings, RepoInfo +from .git_fields import GitMetadataSettings, RepoInfo, default_git_metadata_settings from .gitutil import get_past_n_ancestors, get_repo_info from .merge_row_batch import batch_items, merge_row_batch from .object import DEFAULT_IS_LEGACY_DATASET, ensure_dataset_record @@ -1620,7 +1620,7 @@ def init( :param org_name: (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple. :param metadata: (Optional) a dictionary with additional data about the test example, model outputs, or just about anything else that's relevant, that you can use to help find and analyze examples later. For example, you could log the `prompt`, example's `id`, or anything else that would be useful to slice/dice later. The values in `metadata` can be any JSON-serializable type, but its keys must be strings. :param tags: (Optional) a list of strings to tag the experiment with. Tags can be used to filter and organize experiments. - :param git_metadata_settings: (Optional) Settings for collecting git metadata. By default, will collect all git metadata fields allowed in org-level settings. + :param git_metadata_settings: (Optional) Settings for collecting git metadata. By default, will collect git metadata fields allowed in org-level settings, excluding diff content unless the org opts in. :param set_current: If true (the default), set the global current-experiment to the newly-created one. :param open: If the experiment already exists, open it in read-only mode. Throws an error if the experiment does not already exist. :param project_id: The id of the project to create the experiment in. This takes precedence over `project` if specified. @@ -2667,7 +2667,9 @@ def _check_org_info(state, org_info, org_name): state.org_name = orgs["name"] state.api_url = os.environ.get("BRAINTRUST_API_URL", orgs["api_url"]) state.proxy_url = os.environ.get("BRAINTRUST_PROXY_URL", orgs["proxy_url"]) - state.git_metadata_settings = GitMetadataSettings(**(orgs.get("git_metadata") or {})) + state.git_metadata_settings = GitMetadataSettings( + **(orgs.get("git_metadata") or default_git_metadata_settings().as_dict()) + ) break if state.org_id is None: