-
Notifications
You must be signed in to change notification settings - Fork 675
feat(js/plugins/compat-oai): add translation adapter and add translation flow in testapps #4786
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
4f8e3e6
abdee5e
b4a7064
6689e38
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,147 @@ | ||
| /** | ||
| * Copyright 2024 The Fire Company | ||
| * Copyright 2024 Google LLC | ||
| * | ||
| * Licensed under the Apache License, Version 2.0 (the "License"); | ||
| * you may not use this file except in compliance with the License. | ||
| * You may obtain a copy of the License at | ||
| * | ||
| * http://www.apache.org/licenses/LICENSE-2.0 | ||
| * | ||
| * Unless required by applicable law or agreed to in writing, software | ||
| * distributed under the License is distributed on an "AS IS" BASIS, | ||
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
| * See the License for the specific language governing permissions and | ||
| * limitations under the License. | ||
| */ | ||
|
|
||
| import type { ModelReference } from 'genkit'; | ||
| import { modelRef, z } from 'genkit'; | ||
| import type { ModelAction, ModelInfo } from 'genkit/model'; | ||
| import { model } from 'genkit/plugin'; | ||
| import OpenAI from 'openai'; | ||
| import { | ||
| TranscriptionConfigSchema, | ||
| toSttRequest, | ||
| transcriptionToGenerateResponse, | ||
| } from '../audio.js'; | ||
| import type { PluginOptions } from '../index.js'; | ||
| import { | ||
| toTranslationRequest, | ||
| translationToGenerateResponse, | ||
| } from '../translate.js'; | ||
| import { maybeCreateRequestScopedOpenAIClient, toModelName } from '../utils.js'; | ||
|
|
||
| export const WHISPER_MODEL_INFO: ModelInfo = { | ||
| supports: { | ||
| media: true, | ||
| output: ['text', 'json'], | ||
| multiturn: false, | ||
| systemRole: false, | ||
| tools: false, | ||
| }, | ||
| }; | ||
|
|
||
| /** | ||
| * Config schema for Whisper models. Extends the transcription config with | ||
| * a `translate` flag that switches between transcription and translation APIs. | ||
| */ | ||
| export const WhisperConfigSchema = TranscriptionConfigSchema.extend({ | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Should this be a combination of TranscriptionConfigSchema AND TranslationConfigSchema? We can use discriminated unions here... (off the
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you try out discriminated unions, can you please verify it it works well with the Dev UI (Model Runner -> Select whisper-1 -> Check the Model config panel looks/works)
If it is unusable, let us just stick to simple union + extension (translate: boolean) for now. I will revisit this to see if it can be improved later. Thanks! |
||
| /** When true, uses Translation API instead of Transcription. Default: false */ | ||
| translate: z.boolean().optional().default(false), | ||
| }); | ||
|
|
||
| /** | ||
| * Method to define an OpenAI Whisper model that can perform both transcription and | ||
| * translation based on the `translate` config flag. | ||
| * | ||
| * @param params.ai The Genkit AI instance. | ||
| * @param params.name The name of the model. | ||
| * @param params.client The OpenAI client instance. | ||
| * @param params.modelRef Optional reference to the model's configuration and | ||
| * custom options. | ||
| * | ||
7hokerz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| * @returns the created {@link ModelAction} | ||
| */ | ||
| export function defineOpenAIWhisperModel< | ||
| CustomOptions extends z.ZodTypeAny = z.ZodTypeAny, | ||
| >(params: { | ||
| name: string; | ||
| client: OpenAI; | ||
| modelRef?: ModelReference<CustomOptions>; | ||
| pluginOptions?: PluginOptions; | ||
| }): ModelAction { | ||
| const { name, client: defaultClient, pluginOptions, modelRef } = params; | ||
| const modelName = toModelName(name, pluginOptions?.name); | ||
| const actionName = | ||
| modelRef?.name ?? `${pluginOptions?.name ?? 'openai'}/${modelName}`; | ||
|
|
||
| return model( | ||
| { | ||
| name: actionName, | ||
| ...modelRef?.info, | ||
| configSchema: modelRef?.configSchema, | ||
| }, | ||
| async (request, { abortSignal }) => { | ||
| const { translate, ...cleanConfig } = (request.config ?? {}) as Record< | ||
| string, | ||
| unknown | ||
| >; | ||
7hokerz marked this conversation as resolved.
Show resolved
Hide resolved
|
||
| const cleanRequest = { ...request, config: cleanConfig }; | ||
| const client = maybeCreateRequestScopedOpenAIClient( | ||
| pluginOptions, | ||
| request, | ||
| defaultClient | ||
| ); | ||
|
|
||
| if (translate === true) { | ||
| const params = toTranslationRequest(modelName, cleanRequest); | ||
| const result = await client.audio.translations.create(params, { | ||
| signal: abortSignal, | ||
| }); | ||
| return translationToGenerateResponse(result); | ||
| } else { | ||
| const params = toSttRequest(modelName, cleanRequest); | ||
| // Explicitly setting stream to false ensures we use the non-streaming overload | ||
| const result = await client.audio.transcriptions.create( | ||
| { | ||
| ...params, | ||
| stream: false, | ||
| }, | ||
| { signal: abortSignal } | ||
| ); | ||
| return transcriptionToGenerateResponse(result); | ||
| } | ||
| } | ||
| ); | ||
| } | ||
|
|
||
| /** OpenAI whisper ModelRef helper. */ | ||
| export function openAIWhisperModelRef< | ||
| CustomOptions extends z.ZodTypeAny = z.ZodTypeAny, | ||
| >(params: { | ||
| name: string; | ||
| info?: ModelInfo; | ||
| configSchema?: CustomOptions; | ||
| config?: any; | ||
| }) { | ||
| const { | ||
| name, | ||
| info = WHISPER_MODEL_INFO, | ||
| configSchema, | ||
| config = undefined, | ||
| } = params; | ||
| return modelRef({ | ||
| name, | ||
| configSchema: configSchema || (WhisperConfigSchema as any), | ||
| info, | ||
| config, | ||
| namespace: 'openai', | ||
| }); | ||
| } | ||
|
|
||
| export const SUPPORTED_WHISPER_MODELS = { | ||
| 'whisper-1': openAIWhisperModelRef({ | ||
| name: 'whisper-1', | ||
| }), | ||
| }; | ||

Uh oh!
There was an error while loading. Please reload this page.