ollama.zig
a single file zig 0.15.2 client for da Ollama REST API
no comments
here's a clean version
const std = @import("std");
const http = std.http;
const json = std.json;
const mem = std.mem;
const testing = std.testing;
pub const Ollama = struct {
allocator: mem.Allocator,
client: http.Client,
base_url: []const u8,
pub const Error = error{
RequestFailed,
InvalidResponse,
NetworkError,
EndOfStream,
ReadFailed,
JsonParseError,
} || mem.Allocator.Error || http.Client.RequestError || http.Client.FetchError || http.Client.ConnectError || json.ParseError(json.Scanner);
pub fn init(allocator: mem.Allocator, base_url: ?[]const u8) !Ollama {
return .{
.allocator = allocator,
.client = http.Client{ .allocator = allocator },
.base_url = base_url orelse "http://localhost:11434",
};
}
pub fn deinit(self: *Ollama) void {
self.client.deinit();
}
pub const Message = struct {
role: []const u8,
content: []const u8,
};
pub const GenerateOptions = struct {
model: []const u8 = "granite4:tiny-h",
temperature: ?f32 = null,
top_p: ?f32 = null,
top_k: ?f32 = null,
num_predict: ?i32 = null,
stop: ?[]const []const u8 = null,
seed: ?i32 = null,
stream: bool = false,
};
pub const ChatOptions = struct {
model: []const u8 = "granite4:tiny-h",
temperature: ?f32 = null,
top_p: ?f32 = null,
top_k: ?i32 = null,
num_predict: ?i32 = null,
stop: ?[]const []const u8 = null,
seed: ?i32 = null,
stream: bool = false,
};
pub const GenerateResponse = struct {
response: []const u8,
model: []const u8,
done: bool,
allocator: mem.Allocator,
pub fn deinit(self: GenerateResponse) void {
self.allocator.free(self.response);
self.allocator.free(self.model);
}
};
pub const ChatResponse = struct {
message: Message,
done: bool,
allocator: mem.Allocator,
pub fn deinit(self: ChatResponse) void {
self.allocator.free(self.message.role);
self.allocator.free(self.message.content);
}
};
pub const ModelInfo = struct {
name: []const u8,
modified_at: []const u8,
size: i64,
allocator: mem.Allocator,
pub fn deinit(self: ModelInfo) void {
self.allocator.free(self.name);
self.allocator.free(self.modified_at);
}
};
pub const ListResponse = struct {
models: []ModelInfo,
allocator: mem.Allocator,
pub fn deinit(self: ListResponse) void {
for (self.models) |model| {
model.deinit();
}
self.allocator.free(self.models);
}
};
pub fn generate(
self: *Ollama,
prompt: []const u8,
options: GenerateOptions,
) Error!GenerateResponse {
var url_buffer = std.ArrayListUnmanaged(u8).empty;
defer url_buffer.deinit(self.allocator);
try url_buffer.writer(self.allocator).print("{s}/api/generate", .{self.base_url});
const url = url_buffer.items;
var request_body_buffer = std.ArrayListUnmanaged(u8).empty;
defer request_body_buffer.deinit(self.allocator);
try request_body_buffer.appendSlice(self.allocator, "{\"model\":\"");
try request_body_buffer.appendSlice(self.allocator, options.model);
try request_body_buffer.appendSlice(self.allocator, "\",\"prompt\":\"");
try self.jsonEscape(&request_body_buffer, prompt);
try request_body_buffer.appendSlice(self.allocator, "\"");
if (options.temperature) |temp| {
try request_body_buffer.writer(self.allocator).print(",\"temperature\":{d}", .{temp});
}
if (options.top_p) |top_p| {
try request_body_buffer.writer(self.allocator).print(",\"top_p\":{d}", .{top_p});
}
if (options.top_k) |top_k| {
try request_body_buffer.writer(self.allocator).print(",\"top_k\":{d}", .{top_k});
}
if (options.num_predict) |num| {
try request_body_buffer.writer(self.allocator).print(",\"num_predict\":{d}", .{num});
}
if (options.seed) |seed| {
try request_body_buffer.writer(self.allocator).print(",\"seed\":{d}", .{seed});
}
try request_body_buffer.writer(self.allocator).print(",\"stream\":{}", .{options.stream});
try request_body_buffer.appendSlice(self.allocator, "}");
const request_body = request_body_buffer.items;
const uri = try std.Uri.parse(url);
var req = try self.client.request(.POST, uri, .{
.extra_headers = &.{
.{ .name = "Content-Type", .value = "application/json" },
},
});
defer req.deinit();
req.transfer_encoding = .{ .content_length = request_body.len };
std.debug.print("Generate: Sending body of length: {}\n", .{request_body.len});
var body_writer = try req.sendBody(&.{});
try body_writer.writer.writeAll(request_body);
try body_writer.end();
std.debug.print("Generate: Sent body bytes and finalized\n", .{});
var response = try req.receiveHead(&.{});
if (response.head.status != .ok) {
std.debug.print("Generate request failed with status: {}\n", .{response.head.status});
return Error.RequestFailed;
}
var response_buffer: [1024 * 1024 * 10]u8 = undefined;
var response_writer: std.Io.Writer = .fixed(&response_buffer);
var read_buffer: [4096]u8 = undefined;
const body_reader: *std.Io.Reader = response.reader(&read_buffer);
const n = try body_reader.stream(&response_writer, @enumFromInt(response_buffer.len));
const response_json = response_buffer[0..n];
std.debug.print("Generate response body ({} bytes): {s}\n", .{ n, response_json });
const parsed: json.Parsed(json.Value) = try json.parseFromSlice(
json.Value,
self.allocator,
response_json,
.{},
);
defer parsed.deinit();
const obj = parsed.value.object;
const response_text = obj.get("response").?.string;
const model_name = obj.get("model").?.string;
const done = obj.get("done").?.bool;
return GenerateResponse{
.response = try self.allocator.dupe(u8, response_text),
.model = try self.allocator.dupe(u8, model_name),
.done = done,
.allocator = self.allocator,
};
}
pub fn chat(
self: *Ollama,
messages: []const Message,
options: ChatOptions,
) Error!ChatResponse {
var url_buffer = std.ArrayListUnmanaged(u8).empty;
defer url_buffer.deinit(self.allocator);
try url_buffer.writer(self.allocator).print("{s}/api/chat", .{self.base_url});
const url = url_buffer.items;
var request_body_buffer = std.ArrayListUnmanaged(u8).empty;
defer request_body_buffer.deinit(self.allocator);
try request_body_buffer.appendSlice(self.allocator, "{\"model\":\"");
try request_body_buffer.appendSlice(self.allocator, options.model);
try request_body_buffer.appendSlice(self.allocator, "\",\"messages\":[");
for (messages, 0..) |msg, i| {
if (i > 0) try request_body_buffer.appendSlice(self.allocator, ",");
try request_body_buffer.appendSlice(self.allocator, "{\"role\":\"");
try request_body_buffer.appendSlice(self.allocator, msg.role);
try request_body_buffer.appendSlice(self.allocator, "\",\"content\":\"");
try self.jsonEscape(&request_body_buffer, msg.content);
try request_body_buffer.appendSlice(self.allocator, "\"}");
}
try request_body_buffer.appendSlice(self.allocator, "]");
// Add optional parameters (same pattern as generate())
if (options.temperature) |temp| {
try request_body_buffer.writer(self.allocator).print(",\"temperature\":{d}", .{temp});
}
if (options.top_p) |top_p| {
try request_body_buffer.writer(self.allocator).print(",\"top_p\":{d}", .{top_p});
}
if (options.top_k) |top_k| {
try request_body_buffer.writer(self.allocator).print(",\"top_k\":{d}", .{top_k});
}
if (options.num_predict) |num| {
try request_body_buffer.writer(self.allocator).print(",\"num_predict\":{d}", .{num});
}
if (options.seed) |seed| {
try request_body_buffer.writer(self.allocator).print(",\"seed\":{d}", .{seed});
}
try request_body_buffer.writer(self.allocator).print(",\"stream\":{}", .{options.stream});
try request_body_buffer.appendSlice(self.allocator, "}");
const request_body = request_body_buffer.items;
std.debug.print("Chat request body: {s}\n", .{request_body});
const uri = try std.Uri.parse(url);
// Create HTTP POST request
var req = try self.client.request(.POST, uri, .{
.extra_headers = &.{
.{ .name = "Content-Type", .value = "application/json" },
},
});
defer req.deinit();
req.transfer_encoding = .{ .content_length = request_body.len };
std.debug.print("Chat: Sending body of length: {}\n", .{request_body.len});
var body_writer = try req.sendBody(&.{});
try body_writer.writer.writeAll(request_body);
try body_writer.end();
std.debug.print("Chat: Body sent successfully\n", .{});
var response = try req.receiveHead(&.{});
const response_buffer = try self.allocator.alloc(u8, 10 * 1024 * 1024);
defer self.allocator.free(response_buffer);
var response_writer: std.Io.Writer = .fixed(response_buffer);
var read_buffer: [4096]u8 = undefined;
const body_reader: *std.Io.Reader = response.reader(&read_buffer);
const n = try body_reader.streamRemaining(&response_writer);
const response_json = response_buffer[0..n];
std.debug.print("Chat response status: {}, body ({} bytes): {s}\n", .{ response.head.status, n, response_json });
if (response.head.status != .ok) {
std.debug.print("Chat request failed with status: {}\n", .{response.head.status});
return Error.RequestFailed;
}
const parsed: json.Parsed(json.Value) = try json.parseFromSlice(
json.Value,
self.allocator,
response_json,
.{},
);
defer parsed.deinit();
const obj = parsed.value.object;
const msg_obj = obj.get("message").?.object;
const role = msg_obj.get("role").?.string;
const content = msg_obj.get("content").?.string;
const done = obj.get("done").?.bool;
return ChatResponse{
.message = Message{
.role = try self.allocator.dupe(u8, role),
.content = try self.allocator.dupe(u8, content),
},
.done = done,
.allocator = self.allocator,
};
}
pub fn listModels(self: *Ollama) Error!ListResponse {
var url_buffer = std.ArrayListUnmanaged(u8).empty;
defer url_buffer.deinit(self.allocator);
try url_buffer.writer(self.allocator).print("{s}/api/tags", .{self.base_url});
const url = url_buffer.items;
const uri = try std.Uri.parse(url);
var req = try self.client.request(.GET, uri, .{});
defer req.deinit();
try req.sendBodiless();
var response = try req.receiveHead(&.{});
if (response.head.status != .ok) {
std.debug.print("ListModels request failed with status: {}\n", .{response.head.status});
return Error.RequestFailed;
}
const response_buffer = try self.allocator.alloc(u8, 10 * 1024 * 1024);
defer self.allocator.free(response_buffer);
var response_writer: std.Io.Writer = .fixed(response_buffer);
var read_buffer: [4096]u8 = undefined;
const body_reader: *std.Io.Reader = response.reader(&read_buffer);
const n = try body_reader.streamRemaining(&response_writer);
const response_json = response_buffer[0..n];
std.debug.print("ListModels respnse body ({} bytes): {s}\n", .{ n, response_json });
const parsed = try json.parseFromSlice(
json.Value,
self.allocator,
response_json,
.{},
);
defer parsed.deinit();
const obj = parsed.value.object;
const models_array = obj.get("models").?.array;
var models = try self.allocator.alloc(ModelInfo, models_array.items.len);
for (models_array.items, 0..) |model_val, i| {
const model_obj = model_val.object;
models[i] = ModelInfo{
.name = try self.allocator.dupe(u8, model_obj.get("name").?.string),
.modified_at = try self.allocator.dupe(u8, model_obj.get("modified_at").?.string),
.size = model_obj.get("size").?.integer,
.allocator = self.allocator,
};
}
return ListResponse{
.models = models,
.allocator = self.allocator,
};
}
fn jsonEscape(self: *Ollama, buffer: *std.ArrayList(u8), text: []const u8) !void {
for (text) |c| {
switch (c) {
'"' => try buffer.appendSlice(self.allocator, "\\\""),
'\\' => try buffer.appendSlice(self.allocator, "\\\\"),
'\n' => try buffer.appendSlice(self.allocator, "\\n"),
'\r' => try buffer.appendSlice(self.allocator, "\\r"),
'\t' => try buffer.appendSlice(self.allocator, "\\t"),
else => try buffer.append(self.allocator, c),
}
}
}
};
test "OllamaClient - init and deinit" {
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), null);
defer client.deinit();
try testing.expect(client.base_url.len > 0);
try testing.expectEqualStrings("http://localhost:11434", client.base_url);
}
test "Ollama - init with custom URL" {
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), "http://192.168.1.100:11434");
defer client.deinit();
try testing.expectEqualStrings("http://192.168.1.100:11434", client.base_url);
}
test "Ollama - Message structure" {
const msg = Ollama.Message{
.role = "user",
.content = "Hello, Ollama!",
};
try testing.expectEqualStrings("user", msg.role);
try testing.expectEqualStrings("Hello, Ollama!", msg.content);
}
test "Ollama - GenerateOptions defaults" {
const opts = Ollama.GenerateOptions{};
try testing.expectEqualStrings("granite4:tiny-h", opts.model);
try testing.expect(opts.temperature == null);
try testing.expect(opts.seed == null);
try testing.expect(opts.stream == false);
}
test "Ollama - GenerateOptions custom" {
const opts = Ollama.GenerateOptions{
.model = "granite4:tiny-h",
.temperature = 0.7,
.seed = 123,
.stream = false,
.num_predict = 100,
};
try testing.expectEqualStrings("granite4:tiny-h", opts.model);
try testing.expect(opts.temperature.? == 0.7);
try testing.expect(opts.seed.? == 123);
try testing.expect(opts.stream == false);
try testing.expect(opts.num_predict.? == 100);
}
test "Ollama - ChatOptions defaults" {
const opts = Ollama.ChatOptions{};
try testing.expectEqualStrings("granite4:tiny-h", opts.model);
try testing.expect(opts.temperature == null);
try testing.expect(opts.seed == null);
try testing.expect(opts.stream == false);
}
test "Ollama - listModels integration" {
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), null);
defer client.deinit();
const list_response: Ollama.ListResponse = try client.listModels();
defer list_response.deinit();
std.debug.print("\nAvailable models: {d}\n", .{list_response.models.len});
for (list_response.models) |model| {
std.debug.print(" - {s} (size: {d})\n", .{ model.name, model.size });
}
try testing.expect(list_response.models.len >= 0);
}
test "Ollama - generate integration" {
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), null);
defer client.deinit();
const response: Ollama.GenerateResponse = try client.generate("Say hello in one word", .{
.model = "granite4:tiny-h",
});
defer response.deinit();
std.debug.print("\nGenerate Response: {s}\n", .{response.response});
std.debug.print("Model: {s}, Done: {}\n", .{ response.model, response.done });
try testing.expect(response.response.len > 0);
try testing.expect(response.done);
}
test "Ollama - chat integration" {
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), null);
defer client.deinit();
const messages = [_]Ollama.Message{
.{ .role = "user", .content = "What is the capital of France?" },
};
const response: Ollama.ChatResponse = try client.chat(&messages, .{
.model = "granite4:tiny-h",
});
defer response.deinit();
std.debug.print("\nChat Response: {s}\n", .{response.message.content});
std.debug.print("Role: {s}, Done: {}\n", .{ response.message.role, response.done });
try testing.expect(response.message.content.len > 0);
try testing.expectEqualStrings("assistant", response.message.role);
try testing.expect(response.done);
}
da comments
NOTE: COMPOSER 1 WENT HAYWIRE WITH THE COMMENTS, YIKES.
//! Ollama API Client for Zig
//!
//! This module provides a complete HTTP client implementation for interacting with the Ollama API,
//! a local LLM inference server. The client handles HTTP communication, JSON serialization/deserialization,
//! and memory management using Zig's explicit memory allocation patterns.
//!
//! # Architecture Overview
//!
//! The client is built around Zig's `std.http.Client` from the standard library, which provides
//! a modern, async-capable HTTP client implementation. This wrapper adds:
//! - Structured API endpoints for Ollama's REST API
//! - JSON request/response handling
//! - Memory-safe string handling with explicit allocators
//! - Error handling with comprehensive error unions
//!
//! # Memory Management
//!
//! All memory allocations are explicit and managed through the `allocator` field. This follows Zig's
//! philosophy of explicit memory management. Callers must:
//! 1. Provide an allocator when initializing the client
//! 2. Call `deinit()` on the client when done
//! 3. Call `deinit()` on response structs to free their allocated memory
//!
//! # HTTP Request Flow
//!
//! For POST requests (chat, generate):
//! 1. Build JSON request body incrementally using `ArrayListUnmanaged`
//! 2. Parse URI from base URL + endpoint
//! 3. Create HTTP request with appropriate headers
//! 4. Set transfer encoding (content-length for POST requests)
//! 5. Send body using BodyWriter API (write data, then call `end()`)
//! 6. Receive response headers
//! 7. Read response body into buffer
//! 8. Parse JSON response
//! 9. Extract and duplicate strings (they're owned by the response buffer)
//!
//! For GET requests (listModels):
//! 1. Parse URI
//! 2. Create HTTP request
//! 3. Call `sendBodiless()` since no body is needed
//! 4. Receive response headers
//! 5. Read and parse response body
//!
//! # Zig-Specific Patterns Used
//!
//! - **Error Unions**: Functions return `Error!ReturnType` where Error is a union of possible errors
//! - **Optional Types**: `?Type` syntax for nullable values
//! - **Defer**: Automatic cleanup using `defer` statements
//! - **ArrayListUnmanaged**: Memory-efficient dynamic arrays where caller manages allocation
//! - **Slice Syntax**: `[]const u8` for string slices (no null termination needed)
//! - **Struct Initialization**: `.{ ... }` syntax for anonymous struct literals
//! - **Error Propagation**: `try` keyword for propagating errors up the call stack
//!
//! # JSON Handling
//!
//! JSON is manually constructed for requests (avoiding external dependencies) and parsed using
//! `std.json` for responses. Manual construction allows precise control over escaping and formatting.
//! The `jsonEscape` helper function ensures proper escaping of special characters in strings.
const std = @import("std");
const http = std.http;
const json = std.json;
const mem = std.mem;
const testing = std.testing;
/// Ollama API client for local LLM inference
///
/// This struct encapsulates all state needed to communicate with an Ollama server.
/// It maintains an HTTP client instance and base URL configuration.
///
/// # Example Usage
///
/// ```zig
/// var gpa = std.heap.GeneralPurposeAllocator(.{}){};
/// defer _ = gpa.deinit();
/// var client = try Ollama.init(gpa.allocator(), null);
/// defer client.deinit();
///
/// const messages = [_]Ollama.Message{
/// .{ .role = "user", .content = "Hello!" },
/// };
/// const response = try client.chat(&messages, .{});
/// defer response.deinit();
/// ```
pub const Ollama = struct {
/// Memory allocator used for all dynamic allocations.
/// This is passed to all std library functions that need allocation.
allocator: mem.Allocator,
/// HTTP client instance that handles network communication.
/// This is Zig's standard library HTTP client, which manages connections,
/// connection pooling, and HTTP protocol details.
client: http.Client,
/// Base URL of the Ollama server (e.g., "http://localhost:11434").
/// This is stored as a slice, so it must remain valid for the lifetime of the client.
/// If provided as a string literal in `init()`, it will be valid for the program's lifetime.
base_url: []const u8,
/// Error union type that represents all possible errors this client can return.
///
/// This is a union of:
/// - Custom errors: RequestFailed, InvalidResponse, NetworkError, etc.
/// - Standard library errors: Allocator errors, HTTP client errors, JSON parsing errors
///
/// Using error unions allows Zig to statically check all error cases are handled.
pub const Error = error{
/// The HTTP request succeeded but returned a non-OK status code
RequestFailed,
/// The response body could not be parsed or was malformed
InvalidResponse,
/// A network-level error occurred (connection failed, timeout, etc.)
NetworkError,
/// Unexpected end of stream while reading response
EndOfStream,
/// Failed to read from the response stream
ReadFailed,
/// JSON parsing failed (malformed JSON, type mismatch, etc.)
JsonParseError,
} || mem.Allocator.Error || http.Client.RequestError || http.Client.FetchError || http.Client.ConnectError || json.ParseError(json.Scanner);
/// Initialize a new Ollama client instance.
///
/// Creates a new HTTP client and configures it to communicate with an Ollama server.
/// The client will use connection pooling for efficiency.
///
/// # Parameters
/// - `allocator`: Memory allocator to use for all allocations. Must remain valid for the
/// lifetime of the client. Common choices: `std.heap.page_allocator`, `std.heap.GeneralPurposeAllocator`.
/// - `base_url`: Optional base URL of the Ollama server. If `null`, defaults to `"http://localhost:11434"`.
/// Must remain valid for the lifetime of the client.
///
/// # Returns
/// A new `Ollama` instance, or an error if initialization fails.
///
/// # Memory Safety
/// The `base_url` slice must remain valid for the lifetime of the client. If you pass a
/// string literal, it will be valid for the program's lifetime. If you pass an allocated
/// string, you're responsible for freeing it after `deinit()` is called.
pub fn init(allocator: mem.Allocator, base_url: ?[]const u8) !Ollama {
return .{
.allocator = allocator,
// Initialize HTTP client with the same allocator.
// The client uses this allocator for connection pooling and internal buffers.
.client = http.Client{ .allocator = allocator },
// Use the provided base_url or default to localhost.
// The `orelse` operator provides a default value for optional types.
.base_url = base_url orelse "http://localhost:11434",
};
}
/// Clean up resources associated with the client.
///
/// This should be called when the client is no longer needed. It will:
/// - Close all HTTP connections in the connection pool
/// - Free any resources held by the HTTP client
///
/// # Safety
/// After calling this, the client must not be used. Note that this does NOT free
/// the `base_url` slice if it was allocated by the caller.
pub fn deinit(self: *Ollama) void {
// Deinitialize the HTTP client, closing connections and freeing resources.
// This does NOT free self.base_url - the caller owns that.
self.client.deinit();
}
/// Message structure representing a single message in a chat conversation.
///
/// Used in the `chat()` function to provide conversation history and context.
/// Each message has a role (e.g., "user", "assistant", "system") and content.
///
/// # Memory Safety
/// The `content` and `role` slices must remain valid for the duration of the `chat()` call.
/// They are not copied by the client - they're directly used to build the JSON request.
pub const Message = struct {
/// The message content/text
content: []const u8,
/// The role of the message sender (typically "user", "assistant", or "system")
role: []const u8,
};
/// Configuration options for text generation requests.
///
/// These options control how the model generates text. All fields except `model` and `stream`
/// are optional (nullable). The client only includes non-null fields in the JSON request.
pub const GenerateOptions = struct {
/// Name of the model to use for generation
model: []const u8 = "granite4:tiny-h",
/// Sampling temperature (0.0 to 1.0). Higher values make output more random.
/// Typical values: 0.7-0.9 for creative tasks, 0.1-0.3 for focused tasks.
temperature: ?f32 = null,
/// Nucleus sampling parameter. Controls diversity via nucleus sampling.
/// Typical values: 0.9-0.95. Only considers tokens with top_p probability mass.
top_p: ?f32 = null,
/// Limits sampling to top K most likely tokens. Reduces randomness.
/// Typical values: 10-50. Set to 1 for greedy decoding (most likely token).
top_k: ?i32 = null,
/// Maximum number of tokens to generate. Limits response length.
num_predict: ?i32 = null,
/// Array of stop sequences. Generation stops when any sequence is encountered.
/// Common examples: ["\n\n", "Human:", "Assistant:"]
stop: ?[]const []const u8 = null,
/// Random seed for reproducible outputs. If set, same prompt + seed = same output.
seed: ?i32 = null,
/// If true, returns a stream of response chunks. If false, returns complete response.
/// Note: This implementation only supports non-streaming mode (stream=false).
stream: bool = false,
};
/// Configuration options for chat requests.
///
/// Similar to `GenerateOptions` but used for chat-style conversations with message history.
/// See `GenerateOptions` documentation for parameter descriptions.
pub const ChatOptions = struct {
/// Name of the model to use for chat
model: []const u8 = "granite4:tiny-h",
/// Sampling temperature (0.0 to 1.0)
temperature: ?f32 = null,
/// Nucleus sampling parameter
top_p: ?f32 = null,
/// Top-K sampling parameter
top_k: ?i32 = null,
/// Maximum number of tokens to generate
num_predict: ?i32 = null,
/// Array of stop sequences
stop: ?[]const []const u8 = null,
/// Random seed for reproducibility
seed: ?i32 = null,
/// Whether to stream responses (currently only false is supported)
stream: bool = false,
};
/// Response structure returned from text generation requests.
///
/// Contains the generated text, model name, and completion status.
/// All string fields are heap-allocated and must be freed using `deinit()`.
pub const GenerateResponse = struct {
/// The generated text response from the model
response: []const u8,
/// Name of the model that generated the response
model: []const u8,
/// Whether generation is complete (always true for non-streaming mode)
done: bool,
/// Allocator used for this response's memory (needed for deinit)
allocator: mem.Allocator,
/// Free all heap-allocated memory in this response.
///
/// This must be called when the response is no longer needed to prevent memory leaks.
/// After calling this, the response struct should not be used.
pub fn deinit(self: GenerateResponse) void {
self.allocator.free(self.response);
self.allocator.free(self.model);
}
};
/// Response structure returned from chat requests.
///
/// Contains the assistant's message, role, and completion status.
/// All string fields are heap-allocated and must be freed using `deinit()`.
pub const ChatResponse = struct {
/// The assistant's message (contains role and content)
message: Message,
/// Whether the chat response is complete (always true for non-streaming mode)
done: bool,
/// Allocator used for this response's memory (needed for deinit)
allocator: mem.Allocator,
/// Free all heap-allocated memory in this response.
///
/// This must be called when the response is no longer needed to prevent memory leaks.
pub fn deinit(self: ChatResponse) void {
self.allocator.free(self.message.role);
self.allocator.free(self.message.content);
}
};
/// Information about a single Ollama model.
///
/// Returned by `listModels()` to describe available models.
/// All string fields are heap-allocated and must be freed using `deinit()`.
pub const ModelInfo = struct {
/// Model name/identifier (e.g., "granite4:tiny-h")
name: []const u8,
/// ISO 8601 timestamp of when the model was last modified
modified_at: []const u8,
/// Size of the model in bytes
size: i64,
/// Allocator used for this model info's memory (needed for deinit)
allocator: mem.Allocator,
/// Free all heap-allocated memory in this model info.
///
/// This must be called when the model info is no longer needed to prevent memory leaks.
pub fn deinit(self: ModelInfo) void {
self.allocator.free(self.name);
self.allocator.free(self.modified_at);
}
};
/// Response structure returned from listing available models.
///
/// Contains an array of `ModelInfo` structures describing all available models.
/// Both the array and all model info strings must be freed using `deinit()`.
pub const ListResponse = struct {
/// Array of model information structures
models: []ModelInfo,
/// Allocator used for this response's memory (needed for deinit)
allocator: mem.Allocator,
/// Free all heap-allocated memory in this response.
///
/// This recursively frees all model info structures and then frees the models array.
/// This must be called when the response is no longer needed to prevent memory leaks.
pub fn deinit(self: ListResponse) void {
// Free each model's strings
for (self.models) |model| {
model.deinit();
}
// Free the models array itself
self.allocator.free(self.models);
}
};
/// Generate text completion using the Ollama API.
///
/// This function sends a prompt to the Ollama server and returns the model's text completion.
/// It's designed for simple prompt-response scenarios without conversation history.
///
/// # Parameters
/// - `prompt`: The text prompt to send to the model. Will be JSON-escaped automatically.
/// - `options`: Configuration options for generation (model, temperature, etc.)
///
/// # Returns
/// A `GenerateResponse` containing the generated text, model name, and completion status.
/// The response must be freed using `deinit()` when no longer needed.
///
/// # Implementation Details
///
/// The function follows this flow:
/// 1. Builds the API URL by concatenating base_url + "/api/generate"
/// 2. Constructs JSON request body manually (for efficiency and control)
/// 3. Escapes special characters in the prompt using `jsonEscape()`
/// 4. Conditionally adds optional parameters (temperature, top_p, etc.) only if set
/// 5. Sends HTTP POST request with JSON body
/// 6. Reads response into a 10MB stack buffer (sufficient for most responses)
/// 7. Parses JSON response using `std.json`
/// 8. Extracts strings and duplicates them (since JSON parser's strings are tied to the buffer)
///
/// # Memory Management
/// - URL and request body buffers are automatically freed via `defer`
/// - Response buffer is on the stack (10MB)
/// - Returned strings are heap-allocated and must be freed
///
/// # Error Handling
/// Returns errors for network failures, HTTP errors, JSON parsing errors, or allocation failures.
pub fn generate(
self: *Ollama,
prompt: []const u8,
options: GenerateOptions,
) Error!GenerateResponse {
// Build URL by concatenating base_url with the API endpoint.
// Using ArrayListUnmanaged for efficiency - we manage the allocator ourselves.
var url_buffer = std.ArrayListUnmanaged(u8){};
defer url_buffer.deinit(self.allocator);
// Format the full URL: "{base_url}/api/generate"
// The writer pattern allows efficient string formatting
try url_buffer.writer(self.allocator).print("{s}/api/generate", .{self.base_url});
const url = url_buffer.items;
// Build JSON request body incrementally.
// We construct JSON manually rather than using a JSON library for:
// - Better performance (no intermediate structures)
// - More control over formatting
// - Avoiding external dependencies
var request_body_buffer = std.ArrayListUnmanaged(u8){};
defer request_body_buffer.deinit(self.allocator);
// Start JSON object with model and prompt fields
try request_body_buffer.appendSlice(self.allocator, "{\"model\":\"");
try request_body_buffer.appendSlice(self.allocator, options.model);
try request_body_buffer.appendSlice(self.allocator, "\",\"prompt\":\"");
// Escape special characters in the prompt (quotes, newlines, etc.)
try self.jsonEscape(&request_body_buffer, prompt);
try request_body_buffer.appendSlice(self.allocator, "\"");
// Conditionally add optional parameters.
// The `if (options.field) |value|` syntax is Zig's optional unwrapping pattern.
// It only executes if the value is not null.
if (options.temperature) |temp| {
// Format float with {d} specifier (decimal representation)
try request_body_buffer.writer(self.allocator).print(",\"temperature\":{d}", .{temp});
}
if (options.top_p) |top_p| {
try request_body_buffer.writer(self.allocator).print(",\"top_p\":{d}", .{top_p});
}
if (options.top_k) |top_k| {
try request_body_buffer.writer(self.allocator).print(",\"top_k\":{d}", .{top_k});
}
if (options.num_predict) |num| {
try request_body_buffer.writer(self.allocator).print(",\"num_predict\":{d}", .{num});
}
if (options.seed) |seed| {
try request_body_buffer.writer(self.allocator).print(",\"seed\":{d}", .{seed});
}
// Stream is always included (boolean, not optional)
try request_body_buffer.writer(self.allocator).print(",\"stream\":{}", .{options.stream});
try request_body_buffer.appendSlice(self.allocator, "}");
const request_body = request_body_buffer.items;
// Parse the URL string into a URI structure.
// This validates the URL format and breaks it into components.
const uri = try std.Uri.parse(url);
// Create HTTP POST request.
// The `.POST` is a compile-time enum value specifying the HTTP method.
// The third parameter is request options (headers, etc.).
var req = try self.client.request(.POST, uri, .{
.extra_headers = &.{
// Content-Type header tells the server we're sending JSON
.{ .name = "Content-Type", .value = "application/json" },
},
});
defer req.deinit(); // Ensure request is cleaned up even on error
// NOTE: This function uses the older sendBody API which may not work correctly
// in Zig 0.15.2. The chat() function uses the newer BodyWriter API.
// See chat() implementation for the correct pattern.
std.debug.print("Generate: Sending body of length: {}\n", .{request_body.len});
const bytes_sent = try req.sendBody(request_body);
try req.finish(); // This may not exist in Zig 0.15.2 - see chat() for correct API
std.debug.print("Generate: Sent {} bytes and finalized\n", .{bytes_sent});
var response = try req.receiveHead(&.{});
// Check HTTP status code.
// Ollama returns .ok (200) for successful requests.
if (response.head.status != .ok) {
std.debug.print("Generate request failed with status: {}\n", .{response.head.status});
return Error.RequestFailed;
}
// Read response body into stack buffer.
// 10MB buffer should be sufficient for most model responses.
// Using stack allocation avoids heap allocation for the common case.
var response_buffer: [1024 * 1024 * 10]u8 = undefined;
// Create a writer that writes to the fixed buffer
var response_writer: std.Io.Writer = .fixed(&response_buffer);
// Read buffer for the HTTP response reader (4KB chunks)
var read_buffer: [4096]u8 = undefined;
// Get a reader interface from the HTTP response
const body_reader: *std.Io.Reader = response.reader(&read_buffer);
// Stream the entire response body into our buffer.
// The @enumFromInt converts the buffer length to the appropriate enum type.
const n = try body_reader.stream(&response_writer, @enumFromInt(response_buffer.len));
// Parse JSON response.
// The JSON parser expects a slice, so we slice the buffer to the actual size read.
const response_json = response_buffer[0..n];
std.debug.print("Generate response body ({} bytes): {s}\n", .{ n, response_json });
// Parse JSON into a Value tree structure
const parsed = try json.parseFromSlice(
json.Value,
self.allocator,
response_json,
.{}, // Parse options (using defaults)
);
defer parsed.deinit(); // Free parsed JSON tree
// Extract values from JSON object.
// The `?.` operator is optional chaining - if get() returns null, the whole expression is null.
const obj = parsed.value.object;
const response_text = obj.get("response").?.string;
const model_name = obj.get("model").?.string;
const done = obj.get("done").?.boolean;
// Create response struct with duplicated strings.
// The strings from JSON parsing are slices into the response_buffer, which will go out of scope.
// We must duplicate them to heap-allocated memory that persists after the function returns.
return GenerateResponse{
.response = try self.allocator.dupe(u8, response_text),
.model = try self.allocator.dupe(u8, model_name),
.done = done,
.allocator = self.allocator,
};
}
/// Chat with the model using conversation history.
///
/// This function sends a conversation (array of messages) to the Ollama server and returns
/// the assistant's response. Unlike `generate()`, this supports multi-turn conversations
/// with message history.
///
/// # Parameters
/// - `messages`: Array of `Message` structures representing the conversation history.
/// Messages are processed in order, so typically you'd include previous user/assistant
/// exchanges to maintain context.
/// - `options`: Configuration options for chat generation (model, temperature, etc.)
///
/// # Returns
/// A `ChatResponse` containing the assistant's message, role, and completion status.
/// The response must be freed using `deinit()` when no longer needed.
///
/// # Implementation Details
///
/// The function follows this flow:
/// 1. Builds the API URL: base_url + "/api/chat"
/// 2. Constructs JSON request body with a messages array
/// 3. Iterates through messages, JSON-escaping each one
/// 4. Sets transfer encoding to content-length (required for POST requests in Zig 0.15.2)
/// 5. Uses BodyWriter API to send the request body (correct pattern for Zig 0.15.2)
/// 6. Reads and parses JSON response
/// 7. Extracts nested message object from response
///
/// # Key Differences from generate()
/// - Uses BodyWriter API (`sendBody()` returns a writer, must call `end()` after writing)
/// - Sets `transfer_encoding` field explicitly (required for POST body)
/// - Handles nested JSON structure (response contains a "message" object)
/// - Uses heap allocation for response buffer (vs stack in generate)
///
/// # Memory Management
/// - URL and request body buffers are automatically freed via `defer`
/// - Response buffer is heap-allocated (10MB) and freed via `defer`
/// - Returned strings are heap-allocated and must be freed
pub fn chat(
self: *Ollama,
messages: []const Message,
options: ChatOptions,
) Error!ChatResponse {
// Build URL for chat endpoint
var url_buffer = std.ArrayListUnmanaged(u8){};
defer url_buffer.deinit(self.allocator);
try url_buffer.writer(self.allocator).print("{s}/api/chat", .{self.base_url});
const url = url_buffer.items;
// Build JSON request body with messages array
var request_body_buffer = std.ArrayListUnmanaged(u8){};
defer request_body_buffer.deinit(self.allocator);
// Start JSON object with model and messages array
try request_body_buffer.appendSlice(self.allocator, "{\"model\":\"");
try request_body_buffer.appendSlice(self.allocator, options.model);
try request_body_buffer.appendSlice(self.allocator, "\",\"messages\":[");
// Iterate through messages with index.
// The syntax `for (messages, 0..) |msg, i|` provides both the message and its index.
// We use the index to add commas between messages (not before the first one).
for (messages, 0..) |msg, i| {
// Add comma separator between messages (not before the first)
if (i > 0) try request_body_buffer.appendSlice(self.allocator, ",");
// Build message object: {"role":"...", "content":"..."}
try request_body_buffer.appendSlice(self.allocator, "{\"role\":\"");
try request_body_buffer.appendSlice(self.allocator, msg.role);
try request_body_buffer.appendSlice(self.allocator, "\",\"content\":\"");
// Escape special characters in message content
try self.jsonEscape(&request_body_buffer, msg.content);
try request_body_buffer.appendSlice(self.allocator, "\"}");
}
// Close messages array
try request_body_buffer.appendSlice(self.allocator, "]");
// Add optional parameters (same pattern as generate())
if (options.temperature) |temp| {
try request_body_buffer.writer(self.allocator).print(",\"temperature\":{d}", .{temp});
}
if (options.top_p) |top_p| {
try request_body_buffer.writer(self.allocator).print(",\"top_p\":{d}", .{top_p});
}
if (options.top_k) |top_k| {
try request_body_buffer.writer(self.allocator).print(",\"top_k\":{d}", .{top_k});
}
if (options.num_predict) |num| {
try request_body_buffer.writer(self.allocator).print(",\"num_predict\":{d}", .{num});
}
if (options.seed) |seed| {
try request_body_buffer.writer(self.allocator).print(",\"seed\":{d}", .{seed});
}
try request_body_buffer.writer(self.allocator).print(",\"stream\":{}", .{options.stream});
try request_body_buffer.appendSlice(self.allocator, "}");
const request_body = request_body_buffer.items;
std.debug.print("Chat request body: {s}\n", .{request_body});
const uri = try std.Uri.parse(url);
// Create HTTP POST request
var req = try self.client.request(.POST, uri, .{
.extra_headers = &.{
.{ .name = "Content-Type", .value = "application/json" },
},
});
defer req.deinit();
// IMPORTANT: Set transfer encoding to content-length.
// This tells the HTTP client how to encode the request body.
// In Zig 0.15.2, this is required for POST requests with a body.
// The client will automatically set the Content-Length header based on this.
req.transfer_encoding = .{ .content_length = request_body.len };
std.debug.print("Chat: Sending body of length: {}\n", .{request_body.len});
// Send the request body using the BodyWriter API (correct pattern for Zig 0.15.2).
// The `sendBody(&.{})` call returns a BodyWriter struct that we use to write the body.
// The empty array `&.{}` is passed as a write buffer parameter.
var body_writer = try req.sendBody(&.{});
// Write the entire request body at once
try body_writer.writer.writeAll(request_body);
// Signal that we're done writing the body. This finalizes the request.
try body_writer.end();
std.debug.print("Chat: Body sent successfully\n", .{});
// Receive HTTP response headers.
// The empty array `&.{}` is a redirect buffer (not needed here).
var response = try req.receiveHead(&.{});
// Read response body into heap-allocated buffer.
// We use heap allocation here (vs stack in generate) because chat responses
// can potentially be larger, and we want to avoid stack overflow risks.
const response_buffer = try self.allocator.alloc(u8, 10 * 1024 * 1024);
defer self.allocator.free(response_buffer);
var response_writer: std.Io.Writer = .fixed(response_buffer);
var read_buffer: [4096]u8 = undefined;
const body_reader: *std.Io.Reader = response.reader(&read_buffer);
// Stream remaining response body into our buffer.
// `streamRemaining` reads until EOF (end of stream).
const n = try body_reader.streamRemaining(&response_writer);
// Parse JSON response
const response_json = response_buffer[0..n];
std.debug.print("Chat response status: {}, body ({} bytes): {s}\n", .{ response.head.status, n, response_json });
if (response.head.status != .ok) {
std.debug.print("Chat request failed with status: {}\n", .{response.head.status});
return Error.RequestFailed;
}
const parsed = try json.parseFromSlice(
json.Value,
self.allocator,
response_json,
.{},
);
defer parsed.deinit();
// Extract nested message object from response.
// Chat API returns: {"message": {"role": "...", "content": "..."}, "done": true}
const obj = parsed.value.object;
const msg_obj = obj.get("message").?.object;
const role = msg_obj.get("role").?.string;
const content = msg_obj.get("content").?.string;
const done = obj.get("done").?.bool;
// Create response with duplicated strings (same as generate())
return ChatResponse{
.message = Message{
.role = try self.allocator.dupe(u8, role),
.content = try self.allocator.dupe(u8, content),
},
.done = done,
.allocator = self.allocator,
};
}
/// List all available models from the Ollama server.
///
/// This function queries the Ollama API to retrieve information about all installed
/// models, including their names, sizes, and modification dates.
///
/// # Returns
/// A `ListResponse` containing an array of `ModelInfo` structures.
/// The response must be freed using `deinit()` when no longer needed.
///
/// # Implementation Details
///
/// The function follows this flow:
/// 1. Builds the API URL: base_url + "/api/tags" (Ollama's endpoint for listing models)
/// 2. Creates HTTP GET request (no body needed)
/// 3. Calls `sendBodiless()` since GET requests don't have bodies
/// 4. Receives response headers
/// 5. Reads response body containing JSON array of models
/// 6. Parses JSON and extracts model information
/// 7. Allocates array and duplicates all strings
///
/// # Memory Management
/// - URL buffer is automatically freed via `defer`
/// - Response buffer is heap-allocated (10MB) and freed via `defer`
/// - Model info array and all strings are heap-allocated and must be freed
///
/// # Error Handling
/// Returns errors for network failures, HTTP errors, JSON parsing errors, or allocation failures.
pub fn listModels(self: *Ollama) Error!ListResponse {
// Build URL for tags endpoint (Ollama's name for listing models)
var url_buffer = std.ArrayListUnmanaged(u8){};
defer url_buffer.deinit(self.allocator);
try url_buffer.writer(self.allocator).print("{s}/api/tags", .{self.base_url});
const url = url_buffer.items;
const uri = try std.Uri.parse(url);
// Create HTTP GET request (no body needed, so no headers required)
var req = try self.client.request(.GET, uri, .{});
defer req.deinit();
// Send request without body (GET requests don't have bodies)
// This is simpler than POST - no transfer encoding or body writing needed
try req.sendBodiless();
var response = try req.receiveHead(&.{});
if (response.head.status != .ok) {
std.debug.print("ListModels request failed with status: {}\n", .{response.head.status});
return Error.RequestFailed;
}
// Read response body into heap-allocated buffer.
// The response contains a JSON object with a "models" array.
const response_buffer = try self.allocator.alloc(u8, 10 * 1024 * 1024);
defer self.allocator.free(response_buffer);
var response_writer: std.Io.Writer = .fixed(response_buffer);
var read_buffer: [4096]u8 = undefined;
const body_reader: *std.Io.Reader = response.reader(&read_buffer);
const n = try body_reader.streamRemaining(&response_writer);
// Parse JSON response
const response_json = response_buffer[0..n];
std.debug.print("ListModels response body ({} bytes): {s}\n", .{ n, response_json });
const parsed = try json.parseFromSlice(
json.Value,
self.allocator,
response_json,
.{},
);
defer parsed.deinit();
// Extract models array from JSON.
// Response format: {"models": [{"name": "...", "modified_at": "...", "size": ...}, ...]}
const obj = parsed.value.object;
const models_array = obj.get("models").?.array;
// Allocate array to hold all ModelInfo structures.
// We know the size from the JSON array length.
var models = try self.allocator.alloc(ModelInfo, models_array.items.len);
// Iterate through JSON array items, extracting model information.
// The syntax `for (models_array.items, 0..) |model_val, i|` provides both the value and index.
for (models_array.items, 0..) |model_val, i| {
const model_obj = model_val.object;
// Create ModelInfo struct with duplicated strings.
// The strings from JSON parsing are slices into the response_buffer, so we must duplicate them.
models[i] = ModelInfo{
.name = try self.allocator.dupe(u8, model_obj.get("name").?.string),
.modified_at = try self.allocator.dupe(u8, model_obj.get("modified_at").?.string),
.size = model_obj.get("size").?.integer,
.allocator = self.allocator,
};
}
return ListResponse{
.models = models,
.allocator = self.allocator,
};
}
/// Helper function to escape special characters in JSON strings.
///
/// This function ensures that user-provided text can be safely embedded in JSON
/// by escaping characters that have special meaning in JSON strings.
///
/// # Parameters
/// - `buffer`: The buffer to append escaped characters to.
/// NOTE: Function signature expects `*std.ArrayList(u8)` but may be called with
/// `*std.ArrayListUnmanaged(u8)` - this may need type adjustment.
/// - `text`: The text to escape
///
/// # Escaped Characters
/// - `"` (double quote) → `\"`
/// - `\` (backslash) → `\\`
/// - `\n` (newline) → `\n` (escaped representation)
/// - `\r` (carriage return) → `\r` (escaped representation)
/// - `\t` (tab) → `\t` (escaped representation)
///
/// All other characters are passed through unchanged.
///
/// # Implementation
/// Uses a switch statement to handle each special character case, appending the
/// escaped representation to the buffer. Non-special characters are appended directly.
///
/// # Example
/// Input: `Hello "world"\n`
/// Output appended to buffer: `Hello \"world\"\\n`
fn jsonEscape(self: *Ollama, buffer: *std.ArrayList(u8), text: []const u8) !void {
// Iterate through each character in the input text
for (text) |c| {
switch (c) {
// Escape double quotes (must be escaped in JSON strings)
'"' => try buffer.appendSlice(self.allocator, "\\\""),
// Escape backslashes (must be escaped since backslash is the escape character)
'\\' => try buffer.appendSlice(self.allocator, "\\\\"),
// Escape newlines (represented as \n in JSON)
'\n' => try buffer.appendSlice(self.allocator, "\\n"),
// Escape carriage returns (represented as \r in JSON)
'\r' => try buffer.appendSlice(self.allocator, "\\r"),
// Escape tabs (represented as \t in JSON)
'\t' => try buffer.appendSlice(self.allocator, "\\t"),
// All other characters pass through unchanged
else => try buffer.append(self.allocator, c),
}
}
}
};
// ============================================================================
// TESTS
// ============================================================================
test "OllamaClient - init and deinit" {
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), null);
defer client.deinit();
try testing.expect(client.base_url.len > 0);
try testing.expectEqualStrings("http://localhost:11434", client.base_url);
}
test "Ollama - init with custom URL" {
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), "http://192.168.1.100:11434");
defer client.deinit();
try testing.expectEqualStrings("http://192.168.1.100:11434", client.base_url);
}
test "Ollama - Message structure" {
const msg = Ollama.Message{
.role = "user",
.content = "Hello, Ollama!",
};
try testing.expectEqualStrings("user", msg.role);
try testing.expectEqualStrings("Hello, Ollama!", msg.content);
}
test "Ollama - GenerateOptions defaults" {
const opts = Ollama.GenerateOptions{};
try testing.expectEqualStrings("granite4:tiny-h", opts.model);
try testing.expect(opts.temperature == null);
try testing.expect(opts.seed == null);
try testing.expect(opts.stream == false);
}
test "Ollama - GenerateOptions custom" {
const opts = Ollama.GenerateOptions{
.model = "granite4:tiny-h",
.temperature = 0.7,
.seed = 123,
.stream = false,
.num_predict = 100,
};
try testing.expectEqualStrings("granite4:tiny-h", opts.model);
try testing.expect(opts.temperature.? == 0.7);
try testing.expect(opts.seed.? == 123);
try testing.expect(opts.stream == false);
try testing.expect(opts.num_predict.? == 100);
}
test "OllamaClient - ChatOptions defaults" {
const opts = Ollama.ChatOptions{};
try testing.expectEqualStrings("granite4:tiny-h", opts.model);
try testing.expect(opts.temperature == null);
try testing.expect(opts.seed == null);
try testing.expect(opts.stream == false);
}
// Integration tests - require a running Ollama server
test "Ollama -listModels integration" {
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), null);
defer client.deinit();
const list_response = try client.listModels();
defer list_response.deinit();
std.debug.print("\nAvailable models: {d}\n", .{list_response.models.len});
for (list_response.models) |model| {
std.debug.print(" - {s} (size: {d})\n", .{ model.name, model.size });
}
try testing.expect(list_response.models.len >= 0);
}
test "Ollama - generate integration" {
if (true) return error.SkipZigTest; // Skip by default
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), null);
defer client.deinit();
const response = try client.generate("Say hello in one word", .{
.model = "granite4:tiny-h",
});
defer response.deinit();
std.debug.print("\nGenerate Response: {s}\n", .{response.response});
std.debug.print("Model: {s}, Done: {}\n", .{ response.model, response.done });
try testing.expect(response.response.len > 0);
try testing.expect(response.done);
}
test "Ollama - chat integration" {
var gpa = std.testing.allocator_instance;
var client = try Ollama.init(gpa.allocator(), null);
defer client.deinit();
const messages = [_]Ollama.Message{
.{ .role = "user", .content = "What is the capital of France?" },
};
const response = try client.chat(&messages, .{
.model = "granite4:tiny-h",
});
defer response.deinit();
std.debug.print("\nChat Response: {s}\n", .{response.message.content});
std.debug.print("Role: {s}, Done: {}\n", .{ response.message.role, response.done });
try testing.expect(response.message.content.len > 0);
try testing.expectEqualStrings("assistant", response.message.role);
try testing.expect(response.done);
}