callstackincubator · grabbou · Jan 16, 2026 · Jan 15, 2026 · Jan 15, 2026
diff --git a/apps/expo-example/package.json b/apps/expo-example/package.json
@@ -35,7 +35,6 @@
     "react": "19.1.0",
     "react-native": "0.81.4",
     "react-native-audio-api": "^0.7.1",
-    "react-native-blob-util": "^0.24.5",
     "react-native-bottom-tabs": "^0.11.0",
     "react-native-keyboard-controller": "1.18.5",
     "react-native-reanimated": "~4.1.0",

diff --git a/bun.lock b/bun.lock
diff --git a/packages/llama/README.md b/packages/llama/README.md
@@ -1,126 +1,48 @@
-# @react-native-ai/llama
+# Llama Provider for Vercel AI SDK
 
-llama.rn provider for Vercel AI SDK - run GGUF models on-device in React Native.
+A Vercel AI SDK provider for llama.rn, enabling on-device inference with GGUF models in React Native applications.
 
-## Installation
-
-```bash
-npm install @react-native-ai/llama llama.rn react-native-blob-util ai
-```
-
-## Usage with AI SDK
+**Requirements:**
+- React Native >= 0.76.0
+- llama.rn >= 0.10.0-rc.0
+- Vercel AI SDK v5
 
-```typescript
+```ts
 import { llama } from '@react-native-ai/llama'
-import { generateText, streamText } from 'ai'
+import { generateText } from 'ai'
 
-// Create model instance (Model ID format: "owner/repo/filename.gguf")
+// Create model (format: "owner/repo/filename.gguf")
 const model = llama.languageModel(
-  'ggml-org/SmolLM3-3B-GGUF/SmolLM3-Q4_K_M.gguf',
-  {
-    n_ctx: 2048,
-    n_gpu_layers: 99,
-  }
+  'ggml-org/SmolLM3-3B-GGUF/SmolLM3-Q4_K_M.gguf'
 )
 
-// Download from HuggingFace (with progress)
-await model.download((progress) => {
-  console.log(`Downloading: ${progress.percentage}%`)
-})
-
-// Initialize model (loads into memory)
+// Download and prepare
+await model.download()
 await model.prepare()
 
-// Generate text (non-streaming)
+// Generate
 const { text } = await generateText({
   model,
-  messages: [
-    { role: 'system', content: 'You are a helpful assistant.' },
-    { role: 'user', content: 'Write a haiku about coding.' },
-  ],
-  maxOutputTokens: 100,
-  temperature: 0.7,
-})
-
-console.log(text)
-
-// Stream text
-const result = streamText({
-  model,
-  messages: [
-    { role: 'system', content: 'You are a helpful assistant.' },
-    { role: 'user', content: 'Tell me a story.' },
-  ],
-  maxOutputTokens: 500,
-  temperature: 0.7,
-})
-
-for await (const chunk of result.textStream) {
-  process.stdout.write(chunk)
-}
-
-// Cleanup
-await model.unload()
-```
-
-## Direct Context Usage
-
-For advanced use cases, you can access the underlying `LlamaContext` directly:
-
-```typescript
-import { llama, LlamaEngine } from '@react-native-ai/llama'
-
-// List downloaded models
-const models = await LlamaEngine.getModels()
-
-// Create and prepare model
-const model = llama.languageModel(
-  'ggml-org/SmolLM3-3B-GGUF/SmolLM3-Q4_K_M.gguf'
-)
-await model.prepare()
-
-// Access underlying LlamaContext
-const context = model.getContext()
-const result = await context.completion({
-  messages: [{ role: 'user', content: 'Hello!' }],
-  n_predict: 100,
+  prompt: 'Write a haiku about coding.'
 })
-
-// Cleanup
-await model.unload()
-
-// Remove from disk
-await model.remove()
 ```
 
-## API
+## Features
 
-### `llama.languageModel(modelId, options?)`
+- ✅ Text generation with GGUF models
+- ✅ Streaming
+- ✅ On-device inference (iOS & Android)
+- ✅ Model download management
+- ✅ GPU acceleration support
 
-Creates a language model instance.
+## Documentation
 
-- `modelId`: Model identifier in format `owner/repo/filename.gguf`
-- `options`:
-  - `n_ctx`: Context size (default: 2048)
-  - `n_gpu_layers`: Number of GPU layers (default: 99)
-  - `contextParams`: Additional llama.rn context parameters
+For complete installation instructions and API documentation, visit our [documentation site](https://react-native-ai.com/docs/llama).
 
-### `LlamaEngine`
+## License
 
-- `getModels()`: Get list of downloaded models
-- `isDownloaded(modelId)`: Check if a model is downloaded
-- `setStoragePath(path)`: Set custom storage directory
+MIT
 
-### Model Instance Methods
+---
 
-- `download(progressCallback?)`: Download model from HuggingFace
-- `isDownloaded()`: Check if this model is downloaded
-- `prepare()`: Initialize/load model into memory
-- `getContext()`: Get underlying LlamaContext
-- `unload()`: Release model from memory
-- `remove()`: Delete model from disk
-
-## Requirements
-
-- React Native >= 0.76.0
-- llama.rn >= 0.10.0-rc.0
+Made with ❤️ and [create-react-native-library](https://github.com/callstack/react-native-builder-bob)
diff --git a/packages/llama/package.json b/packages/llama/package.json
@@ -28,6 +28,9 @@
     "registry": "https://registry.npmjs.org/"
   },
   "author": "Szymon Rybczak <szymon.rybczak@gmail.com> (https://github.com/szymonrybczak)",
+  "contributors": [
+    "Mike Grabowski <grabbou@gmail.com> (https://github.com/grabbou)"
+  ],
   "scripts": {
     "clean": "del-cli lib",
     "typecheck": "tsc --noEmit",