feat(ci): v0.9.0 — Distribution & Expansion milestone complete

---ci--- project: ci phase: 6 milestone: v0.9 status: complete artifacts: tags: [v0.9.0] decisions: - id: D-047 decision: v0.9 theme = Distribution & Expansion rationale: npm publish + OpenAI/Anthropic backends + agent flesh + parallel execution confidence: 0.92 - id: D-049 decision: Feature milestone — patch tags v0.8.1-v0.8.6 then v0.9.0 rationale: OpenAI backend, agent flesh, npm publish all feat confidence: 0.95 - id: D-059 decision: Rename OllamaBaseBackend to LLMBaseBackend + thin OllamaBaseBackend subclass rationale: 15 of 17 methods backend-agnostic confidence: 0.92 - id: D-060 decision: OpenAI/Anthropic backends use native fetch() not SDK packages rationale: No dependency bloat; fetch native in Node 18+ confidence: 0.85 - id: D-066 decision: Concurrency limiter internal (no p-limit dependency) rationale: 15 lines; avoids dependency for trivial feature confidence: 0.90 - id: D-067 decision: Promise.allSettled for review agents at orchestrator lines 373-400 rationale: Current sequential loop replaced with parallel execution confidence: 0.88 requirements: covered: [PUBLISH-01, PUBLISH-02, PUBLISH-03, PUBLISH-04, OPENAI-01, OPENAI-02, OPENAI-03, OPENAI-04, OPENAI-05, FLESH-01, FLESH-02, FLESH-03, FLESH-04, FLESH-05, ANTHROPIC-01, ANTHROPIC-02, FLESH-06, FLESH-07, NPM-01, NPM-02, PARALLEL-01, PARALLEL-02, PARALLEL-03, INTEG-01, INTEG-02, INTEG-03, INTEG-04, INTEG-05] ---/ci--- 6 phases, 28 tasks, 4077 net lines added, 57 test suites, 527 tests, zero stub agents
2026-05-30 02:19:44 +00:00
parent 4b7d16247d
commit a8b50f5109
40 changed files with 4075 additions and 455 deletions
@@ -0,0 +1,196 @@
+import { AnthropicBackend } from "../backends/anthropic.js";
+import { ChatCompletionResponse } from "../backends/llm-base.js";
+
+describe("AnthropicBackend", () => {
+  const originalFetch = globalThis.fetch;
+  let fetchCalls: Array<{ url: string; headers: Record<string, string>; body: string }>;
+
+  beforeEach(() => {
+    fetchCalls = [];
+  });
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+    delete process.env.TEST_ANTHROPIC_KEY;
+    delete process.env.TEST_ANTHROPIC_KEY_EMPTY;
+  });
+
+  function mockFetch(response: Record<string, unknown>, status = 200): void {
+    globalThis.fetch = ((url: string, init: RequestInit) => {
+      fetchCalls.push({
+        url,
+        headers: (init.headers as Record<string, string>) || {},
+        body: init.body as string,
+      });
+      return Promise.resolve({
+        ok: status >= 200 && status < 300,
+        status,
+        text: () => Promise.resolve(JSON.stringify(response)),
+        json: () => Promise.resolve(response),
+      } as Response);
+    }) as typeof fetch;
+  }
+
+  function makeAnthropicResponse(text: string, usage = { input_tokens: 10, output_tokens: 20 }): Record<string, unknown> {
+    return {
+      content: [{ type: "text", text }],
+      usage,
+      model: "claude-sonnet-4-20250514",
+    };
+  }
+
+  describe("isAvailable", () => {
+    it("returns true when API key is present", async () => {
+      process.env.TEST_ANTHROPIC_KEY = "sk-ant-test-key-123";
+      const backend = new AnthropicBackend({
+        base_url: "https://api.anthropic.com",
+        api_key_env: "TEST_ANTHROPIC_KEY",
+        model: "claude-sonnet-4-20250514",
+        model_profile: "quality",
+      });
+      expect(await backend.isAvailable()).toBe(true);
+    });
+
+    it("returns false when API key is absent", async () => {
+      const backend = new AnthropicBackend({
+        base_url: "https://api.anthropic.com",
+        api_key_env: "NONEXISTENT_ANTHROPIC_KEY_VAR_99999",
+        model: "claude-sonnet-4-20250514",
+        model_profile: "quality",
+      });
+      expect(await backend.isAvailable()).toBe(false);
+    });
+  });
+
+  describe("resolveModel", () => {
+    it("returns config.model when set", async () => {
+      process.env.TEST_ANTHROPIC_KEY = "sk-ant-test";
+      mockFetch(makeAnthropicResponse('{"success": true, "output": "done"}'));
+      const backend = new AnthropicBackend({
+        base_url: "https://api.anthropic.com",
+        api_key_env: "TEST_ANTHROPIC_KEY",
+        model: "claude-3-haiku-20240307",
+        model_profile: "speed",
+      });
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "test",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+      await backend.execute(request);
+      const body = JSON.parse(fetchCalls[0].body);
+      expect(body.model).toBe("claude-3-haiku-20240307");
+    });
+
+    it("defaults to claude-sonnet-4-20250514 when model not specified", async () => {
+      process.env.TEST_ANTHROPIC_KEY = "sk-ant-test";
+      mockFetch(makeAnthropicResponse('{"success": true, "output": "done"}'));
+      const backend = new AnthropicBackend({
+        base_url: "https://api.anthropic.com",
+        api_key_env: "TEST_ANTHROPIC_KEY",
+        model: "",
+        model_profile: "quality",
+      });
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "test",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+      await backend.execute(request);
+      const body = JSON.parse(fetchCalls[0].body);
+      expect(body.model).toBe("claude-sonnet-4-20250514");
+    });
+  });
+
+  describe("callModel request format", () => {
+    it("sends correct URL, x-api-key header, anthropic-version header, system field, max_tokens", async () => {
+      process.env.TEST_ANTHROPIC_KEY = "sk-ant-test-key-abc";
+      mockFetch(makeAnthropicResponse('{"success": true, "output": "done"}'));
+
+      const backend = new AnthropicBackend({
+        base_url: "https://api.anthropic.com",
+        api_key_env: "TEST_ANTHROPIC_KEY",
+        model: "claude-sonnet-4-20250514",
+        model_profile: "quality",
+      });
+
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "Do the thing",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+
+      await backend.execute(request);
+
+      expect(fetchCalls.length).toBe(1);
+      expect(fetchCalls[0].url).toBe("https://api.anthropic.com/v1/messages");
+      expect(fetchCalls[0].headers["x-api-key"]).toBe("sk-ant-test-key-abc");
+      expect(fetchCalls[0].headers["anthropic-version"]).toBe("2023-06-01");
+      expect(fetchCalls[0].headers["Content-Type"]).toBe("application/json");
+      expect(fetchCalls[0].headers["Authorization"]).toBeUndefined();
+
+      const body = JSON.parse(fetchCalls[0].body);
+      expect(body.model).toBe("claude-sonnet-4-20250514");
+      expect(body.max_tokens).toBe(4096);
+      expect(typeof body.system).toBe("string");
+      expect(body.system.length).toBeGreaterThan(0);
+      expect(Array.isArray(body.messages)).toBe(true);
+      expect(body.messages.length).toBeGreaterThanOrEqual(1);
+    });
+  });
+
+  describe("custom base_url override", () => {
+    it("sends request to custom base_url", async () => {
+      process.env.TEST_ANTHROPIC_KEY = "sk-ant-test";
+      mockFetch(makeAnthropicResponse('{"success": true, "output": "done"}'));
+
+      const backend = new AnthropicBackend({
+        base_url: "https://custom-proxy.example.com/api",
+        api_key_env: "TEST_ANTHROPIC_KEY",
+        model: "claude-sonnet-4-20250514",
+        model_profile: "quality",
+      });
+
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "test",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+
+      await backend.execute(request);
+      expect(fetchCalls[0].url).toBe("https://custom-proxy.example.com/api/v1/messages");
+    });
+  });
+});
@@ -0,0 +1,171 @@
+import { LLMBaseBackend, ChatMessage, ChatCompletionResponse } from "./llm-base.js";
+import { BackendType, AnthropicConfig, emptyBackendResult } from "./types.js";
+import { ToolRegistry, ToolDefinition } from "./tool-registry.js";
+
+export class AnthropicBackend extends LLMBaseBackend {
+  readonly name = "anthropic";
+  readonly type: BackendType = "llm";
+
+  private anthropicConfig: AnthropicConfig;
+
+  constructor(config: AnthropicConfig) {
+    super({ ...config, base_url: config.base_url || "https://api.anthropic.com" });
+    this.anthropicConfig = config;
+  }
+
+  async isAvailable(): Promise<boolean> {
+    const key = process.env[this.anthropicConfig.api_key_env];
+    return !!key && key.length > 0;
+  }
+
+  protected resolveModel(): string {
+    return this.anthropicConfig.model || "claude-sonnet-4-20250514";
+  }
+
+  protected async fetchAvailableModels(): Promise<string[]> {
+    return [];
+  }
+
+  protected async callModel(
+    messages: ChatMessage[],
+    model: string,
+    toolRegistry: ToolRegistry
+  ): Promise<ChatCompletionResponse> {
+    const apiKey = process.env[this.anthropicConfig.api_key_env];
+    if (!apiKey) {
+      throw new Error(`API key not found. Set ${this.anthropicConfig.api_key_env} environment variable.`);
+    }
+
+    const apiVersion = this.anthropicConfig.api_version || "2023-06-01";
+
+    const headers: Record<string, string> = {
+      "Content-Type": "application/json",
+      "x-api-key": apiKey,
+      "anthropic-version": apiVersion,
+    };
+
+    let systemContent = "";
+    const filteredMessages: Array<{ role: string; content: Array<{ type: string; text: string }> }> = [];
+
+    for (const m of messages) {
+      if (m.role === "system") {
+        systemContent += (systemContent ? "\n" : "") + m.content;
+      } else if (m.role === "tool") {
+        filteredMessages.push({
+          role: "user",
+          content: [{ type: "text", text: m.content }],
+        });
+      } else if (m.role === "assistant") {
+        const contentBlocks: Array<{ type: string; text: string }> = [];
+        if (m.content) {
+          contentBlocks.push({ type: "text", text: m.content });
+        }
+        if (m.tool_calls) {
+          for (const tc of m.tool_calls) {
+            contentBlocks.push({
+              type: "tool_use",
+              text: JSON.stringify({ name: tc.function.name, input: JSON.parse(tc.function.arguments) }),
+            });
+          }
+        }
+        filteredMessages.push({
+          role: "assistant",
+          content: contentBlocks,
+        });
+      } else {
+        filteredMessages.push({
+          role: m.role,
+          content: [{ type: "text", text: m.content }],
+        });
+      }
+    }
+
+    const toolDefinitions = this.getActiveToolSchema(toolRegistry);
+    const anthropicTools = toolDefinitions.map((tool) => {
+      const fn = (tool as Record<string, unknown>).function as Record<string, unknown>;
+      return {
+        name: fn.name,
+        description: fn.description,
+        input_schema: fn.parameters,
+      };
+    });
+
+    const body: Record<string, unknown> = {
+      model,
+      max_tokens: 4096,
+      messages: filteredMessages,
+    };
+
+    if (systemContent) {
+      body.system = systemContent;
+    }
+
+    if (anthropicTools.length > 0) {
+      body.tools = anthropicTools;
+    }
+
+    const timeout = this.anthropicConfig.timeout_ms || 60000;
+    const baseUrl = this.config.base_url;
+    const url = `${baseUrl}/v1/messages`;
+
+    const response = await fetch(url, {
+      method: "POST",
+      headers,
+      body: JSON.stringify(body),
+      signal: AbortSignal.timeout(timeout),
+    });
+
+    if (response.status === 401 || response.status === 403) {
+      throw new Error(`Authentication failed. Check ${this.anthropicConfig.api_key_env} environment variable.`);
+    }
+
+    if (response.status === 429) {
+      throw new Error("Rate limited by Anthropic API. Please retry after a delay.");
+    }
+
+    if (!response.ok) {
+      const errorText = await response.text().catch(() => "unknown error");
+      throw new Error(`Anthropic API error (${response.status}): ${errorText}`);
+    }
+
+    const anthropicResponse = await response.json() as Record<string, unknown>;
+    return this.translateResponse(anthropicResponse);
+  }
+
+  private translateResponse(response: Record<string, unknown>): ChatCompletionResponse {
+    const content = (response.content as Array<Record<string, unknown>>) || [];
+    let textContent = "";
+    const toolCalls: Array<{ function: { name: string; arguments: string } }> = [];
+
+    for (const block of content) {
+      if (block.type === "text") {
+        textContent += (block.text as string) || "";
+      } else if (block.type === "tool_use") {
+        toolCalls.push({
+          function: {
+            name: (block.name as string) || "",
+            arguments: JSON.stringify(block.input || {}),
+          },
+        });
+      }
+    }
+
+    const usage = response.usage as { input_tokens: number; output_tokens: number } | undefined;
+
+    return {
+      choices: [
+        {
+          message: {
+            content: textContent,
+            tool_calls: toolCalls.length > 0 ? toolCalls : undefined,
+          },
+        },
+      ],
+      usage: {
+        prompt_tokens: usage?.input_tokens || 0,
+        completion_tokens: usage?.output_tokens || 0,
+        total_tokens: (usage?.input_tokens || 0) + (usage?.output_tokens || 0),
+      },
+    };
+  }
+}
@@ -96,6 +96,7 @@ describe("Backend Availability Detection", () => {
    it("contains installation hints", () => {
      const err = new BackendUnavailableError("auto");
      expect(err.message).toContain("opencode");
+      expect(err.message).toContain("OpenAI");
      expect(err.message).toContain("Ollama");
      expect(err.message).toContain("OLLAMA_CLOUD_API_KEY");
    });
@@ -54,6 +54,7 @@ describe("DEFAULT_BACKEND_CONFIG", () => {
  });

  it("has ollama-local and ollama-cloud llm backends", () => {
+    expect(DEFAULT_BACKEND_CONFIG.llm_backends["openai"]).toBeDefined();
    expect(DEFAULT_BACKEND_CONFIG.llm_backends["ollama-local"]).toBeDefined();
    expect(DEFAULT_BACKEND_CONFIG.llm_backends["ollama-cloud"]).toBeDefined();
  });
@@ -1,12 +1,16 @@
 import { IntelligenceBackend, BackendConfigSection, BackendUnavailableError } from "./types.js";
 import { OpencodeBackend } from "./opencode.js";
+import { OpenAIBackend } from "./openai.js";
 import { OllamaLocalBackend } from "./ollama-local.js";
 import { OllamaCloudBackend } from "./ollama-cloud.js";
+import { AnthropicBackend } from "./anthropic.js";

-const AUTO_DETECT_ORDER: Array<"opencode" | "ollama-local" | "ollama-cloud"> = [
+const AUTO_DETECT_ORDER: Array<"opencode" | "openai" | "ollama-local" | "ollama-cloud" | "anthropic"> = [
  "opencode",
+  "openai",
  "ollama-local",
  "ollama-cloud",
+  "anthropic",
 ];

 export function createBackend(
@@ -16,10 +20,20 @@ export function createBackend(
  switch (name) {
    case "opencode":
      return new OpencodeBackend(config.agent_backends.opencode);
+    case "openai":
+      if (!config.llm_backends["openai"]) {
+        throw new BackendUnavailableError("openai");
+      }
+      return new OpenAIBackend(config.llm_backends["openai"]);
    case "ollama-local":
      return new OllamaLocalBackend(config.llm_backends["ollama-local"]);
    case "ollama-cloud":
      return new OllamaCloudBackend(config.llm_backends["ollama-cloud"]);
+    case "anthropic":
+      if (!config.llm_backends["anthropic"]) {
+        throw new BackendUnavailableError("anthropic");
+      }
+      return new AnthropicBackend(config.llm_backends["anthropic"]);
    default:
      throw new BackendUnavailableError(name);
  }
@@ -49,7 +63,10 @@ export async function resolveBackend(
 }

 export { IntelligenceBackend, BackendConfigSection, BackendUnavailableError } from "./types.js";
+export { LLMBaseBackend, ChatMessage, ChatCompletionResponse } from "./llm-base.js";
 export { ToolRegistry, ToolDefinition, ToolCall, ToolResult } from "./tool-registry.js";
 export { OpencodeBackend } from "./opencode.js";
+export { OpenAIBackend } from "./openai.js";
 export { OllamaLocalBackend } from "./ollama-local.js";
-export { OllamaCloudBackend } from "./ollama-cloud.js";
+export { OllamaCloudBackend } from "./ollama-cloud.js";
+export { AnthropicBackend } from "./anthropic.js";
@@ -0,0 +1,361 @@
+import * as fs from "node:fs";
+import * as path from "node:path";
+import * as os from "node:os";
+import {
+  IntelligenceBackend,
+  BackendRequest,
+  BackendResult,
+  BackendType,
+  LLMBackendConfig,
+  TokenUsage,
+  Artifact,
+  emptyTokenUsage,
+  emptyBackendResult,
+} from "./types.js";
+import { AgentName, ModelProfile } from "../types/config.js";
+import { Decision } from "../types/decisions.js";
+import { Escalation } from "../types/escalation.js";
+import { ToolRegistry, ToolCall, ToolResult, ToolDefinition } from "./tool-registry.js";
+
+const MAX_TOOL_ROUNDS = 50;
+
+const PERSONA_TOOL_MAP: Record<string, string> = {
+  read: "readFile",
+  write: "writeFile",
+  edit: "editFile",
+  bash: "runBash",
+  glob: "glob",
+  grep: "grep",
+};
+
+export interface ChatMessage {
+  role: "system" | "user" | "assistant" | "tool";
+  content: string;
+  name?: string;
+  tool_calls?: Array<{
+    function: { name: string; arguments: string };
+  }>;
+}
+
+export interface ChatCompletionResponse {
+  choices?: Array<{
+    message: {
+      content: string;
+      tool_calls?: Array<{
+        function: { name: string; arguments: string };
+      }>;
+    };
+  }>;
+  usage?: {
+    prompt_tokens: number;
+    completion_tokens: number;
+    total_tokens: number;
+  };
+}
+
+export abstract class LLMBaseBackend implements IntelligenceBackend {
+  abstract readonly name: string;
+  readonly type: BackendType = "llm";
+
+  protected config: LLMBackendConfig;
+  protected projectPath: string;
+  protected filteredToolSchema: Array<Record<string, unknown>> | null = null;
+
+  constructor(config: LLMBackendConfig | undefined) {
+    this.config = config || { base_url: "http://localhost:11434", model_profile: "balanced" };
+    this.projectPath = process.cwd();
+  }
+
+  abstract isAvailable(): Promise<boolean>;
+
+  async execute(request: BackendRequest): Promise<BackendResult> {
+    const startTime = Date.now();
+
+    try {
+      const personaContent = this.loadPersona(request.persona);
+      const workflowContent = this.loadWorkflow(request.workflow);
+      const model = this.resolveModel();
+
+      const toolRegistry = new ToolRegistry(request.context.project_path);
+      const allowedTools = this.parsePersonaTools(personaContent);
+      const filteredDefinitions = this.filterToolDefinitions(toolRegistry.getDefinitions(), allowedTools);
+      this.filteredToolSchema = this.definitionsToOpenAISchema(filteredDefinitions);
+
+      const messages: ChatMessage[] = [];
+      messages.push({
+        role: "system",
+        content: this.buildSystemPrompt(personaContent, workflowContent, request),
+      });
+      messages.push({
+        role: "user",
+        content: request.task,
+      });
+
+      let totalInputTokens = 0;
+      let totalOutputTokens = 0;
+      let round = 0;
+      const allArtifacts: Artifact[] = [];
+      const allDecisions: Decision[] = [];
+      const allEscalations: Escalation[] = [];
+
+      while (round < MAX_TOOL_ROUNDS) {
+        round++;
+        const response = await this.callModelWithTools(messages, model, filteredDefinitions);
+
+        totalInputTokens += response.usage?.prompt_tokens || 0;
+        totalOutputTokens += response.usage?.completion_tokens || 0;
+
+        const assistantContent = response.choices?.[0]?.message?.content || "";
+        const toolCalls = response.choices?.[0]?.message?.tool_calls;
+
+        messages.push({
+          role: "assistant",
+          content: assistantContent,
+          tool_calls: toolCalls,
+        });
+
+        if (!toolCalls || toolCalls.length === 0) {
+          return this.parseFinalResponse(assistantContent, allArtifacts, allDecisions, allEscalations, {
+            input_tokens: totalInputTokens,
+            output_tokens: totalOutputTokens,
+            total_tokens: totalInputTokens + totalOutputTokens,
+            estimated_cost_usd: 0,
+          });
+        }
+
+        for (const toolCall of toolCalls) {
+          const call: ToolCall = {
+            name: toolCall.function.name,
+            arguments: JSON.parse(toolCall.function.arguments),
+          };
+          const result = toolRegistry.execute(call);
+          messages.push({
+            role: "tool",
+            name: call.name,
+            content: result.content,
+          });
+
+          if (call.name === "writeFile" && !result.isError) {
+            allArtifacts.push({
+              path: String(call.arguments.path),
+              content: String(call.arguments.content),
+              operation: "create",
+            });
+          }
+        }
+      }
+
+      const finalContent = messages
+        .filter((m) => m.role === "assistant" && m.content)
+        .map((m) => m.content)
+        .join("\n");
+
+      return this.parseFinalResponse(
+        `Tool loop reached maximum rounds (${MAX_TOOL_ROUNDS}). Partial progress:\n${finalContent}`,
+        allArtifacts,
+        allDecisions,
+        allEscalations,
+        { input_tokens: totalInputTokens, output_tokens: totalOutputTokens, total_tokens: totalInputTokens + totalOutputTokens, estimated_cost_usd: 0 }
+      );
+    } catch (err) {
+      return emptyBackendResult(`Backend execution failed: ${err instanceof Error ? err.message : String(err)}`);
+    }
+  }
+
+  protected parsePersonaTools(personaContent: string): string[] | null {
+    const frontmatterMatch = personaContent.match(/^---\n([\s\S]*?)\n---/);
+    if (!frontmatterMatch) return null;
+
+    const frontmatter = frontmatterMatch[1];
+    const toolsMatch = frontmatter.match(/tools:\s*\n((?:\s+\w+:.+\n?)+)/);
+    if (!toolsMatch) {
+      const inlineMatch = frontmatter.match(/tools:\s*\[([^\]]+)\]/);
+      if (inlineMatch) {
+        return inlineMatch[1]
+          .split(",")
+          .map((t) => t.trim())
+          .filter(Boolean)
+          .map((t) => PERSONA_TOOL_MAP[t] || t);
+      }
+      return null;
+    }
+
+    const toolsBlock = toolsMatch[1];
+    const toolNames: string[] = [];
+    const lineRegex = /^\s+(\w+):/gm;
+    let lineMatch;
+    while ((lineMatch = lineRegex.exec(toolsBlock)) !== null) {
+      const personaToolName = lineMatch[1];
+      toolNames.push(PERSONA_TOOL_MAP[personaToolName] || personaToolName);
+    }
+
+    return toolNames.length > 0 ? toolNames : null;
+  }
+
+  protected filterToolDefinitions(definitions: ToolDefinition[], allowedTools: string[] | null): ToolDefinition[] {
+    if (!allowedTools) return definitions;
+    const allowedSet = new Set(allowedTools);
+    return definitions.filter((def) => allowedSet.has(def.name));
+  }
+
+  protected async callModelWithTools(
+    messages: ChatMessage[],
+    model: string,
+    toolDefinitions: ToolDefinition[]
+  ): Promise<ChatCompletionResponse> {
+    return this.callModel(messages, model, new ToolRegistry(this.projectPath));
+  }
+
+  protected definitionsToOpenAISchema(definitions: ToolDefinition[]): Array<Record<string, unknown>> {
+    return definitions.map((def) => ({
+      type: "function",
+      function: {
+        name: def.name,
+        description: def.description,
+        parameters: def.parameters,
+      },
+    }));
+  }
+
+  protected getActiveToolSchema(toolRegistry: ToolRegistry): Array<Record<string, unknown>> {
+    return this.filteredToolSchema || toolRegistry.getOpenAIToolSchema();
+  }
+
+  protected abstract callModel(
+    messages: ChatMessage[],
+    model: string,
+    toolRegistry: ToolRegistry
+  ): Promise<ChatCompletionResponse>;
+
+  protected abstract resolveModel(): string;
+
+  protected abstract fetchAvailableModels(): Promise<string[]>;
+
+  protected buildSystemPrompt(persona: string, workflow: string, request: BackendRequest): string {
+    const parts = [persona];
+    if (workflow) {
+      parts.push("", "## Workflow Instructions", workflow);
+    }
+    parts.push(
+      "",
+      "## Execution Context",
+      `Autonomy level: ${request.autonomy}`,
+      `Project path: ${request.context.project_path}`,
+      `Phase: ${request.context.phase}`,
+      `Stage: ${request.context.stage}`,
+      "",
+      "## Output Format",
+      "When you have completed your task, output a JSON object with this structure:",
+      "```json",
+      '{',
+      '  "success": true,',
+      '  "output": "Summary of what was accomplished",',
+      '  "artifacts": [{"path": "file/path", "content": "...", "operation": "create"}],',
+      '  "decisions": [{"id": "D-NNN", "decision": "what", "rationale": "why", "confidence": 0.85, "category": "general", "alternatives_considered": [], "human_override": null, "timestamp": ""}],',
+      '  "escalations": []',
+      '}',
+      "```"
+    );
+    return parts.join("\n");
+  }
+
+  protected loadPersona(persona: AgentName): string {
+    const candidates = [
+      path.join(os.homedir(), ".config", "opencode", "agents", `ci-${persona}.md`),
+      path.join(process.cwd(), "opencode", "agents", `ci-${persona}.md`),
+    ];
+    for (const candidate of candidates) {
+      if (fs.existsSync(candidate)) {
+        return fs.readFileSync(candidate, "utf-8");
+      }
+    }
+    return `You are the CIAgent ${persona} agent. Execute the requested task thoroughly and autonomously.`;
+  }
+
+  protected loadWorkflow(workflow: string): string {
+    const candidates = [
+      path.join(os.homedir(), ".config", "opencode", "ci", "workflows", `${workflow}.md`),
+      path.join(process.cwd(), "opencode", "workflows", `${workflow}.md`),
+    ];
+    for (const candidate of candidates) {
+      if (fs.existsSync(candidate)) {
+        return fs.readFileSync(candidate, "utf-8");
+      }
+    }
+    return "";
+  }
+
+  protected parseFinalResponse(
+    content: string,
+    artifacts: Artifact[],
+    decisions: Decision[],
+    escalations: Escalation[],
+    usage: TokenUsage
+  ): BackendResult {
+    const jsonMatch = content.match(/\{[\s\S]*"success"[\s\S]*\}/);
+    if (jsonMatch) {
+      try {
+        const parsed = JSON.parse(jsonMatch[0]);
+        return {
+          success: parsed.success ?? true,
+          output: parsed.output || content,
+          artifacts: parsed.artifacts?.length ? this.parseArtifacts(parsed.artifacts) : artifacts,
+          decisions: parsed.decisions?.length ? this.parseDecisions(parsed.decisions) : decisions,
+          escalations: parsed.escalations?.length ? this.parseEscalations(parsed.escalations) : escalations,
+          usage,
+        };
+      } catch {}
+    }
+
+    return {
+      success: true,
+      output: content,
+      artifacts,
+      decisions,
+      escalations,
+      usage,
+    };
+  }
+
+  private parseArtifacts(raw: unknown[]): Artifact[] {
+    return raw.filter((a): a is Record<string, unknown> => !!a).map((a) => ({
+      path: String(a.path || ""),
+      content: String(a.content || ""),
+      operation: (a.operation as Artifact["operation"]) || "create",
+    }));
+  }
+
+  private parseDecisions(raw: unknown[]): Decision[] {
+    return raw.filter((d): d is Record<string, unknown> => !!d).map((d) => ({
+      id: String(d.id || "D-000"),
+      decision: String(d.decision || ""),
+      rationale: String(d.rationale || ""),
+      confidence: Number(d.confidence || 0.5),
+      category: (d.category as Decision["category"]) || "general",
+      alternatives_considered: Array.isArray(d.alternatives_considered)
+        ? d.alternatives_considered.map((a: unknown) =>
+            typeof a === "string"
+              ? { option: a, rejected_reason: "" }
+              : (a as { option: string; rejected_reason: string })
+          )
+        : [],
+      human_override: d.human_override ? String(d.human_override) : null,
+      timestamp: String(d.timestamp || new Date().toISOString()),
+    }));
+  }
+
+  private parseEscalations(raw: unknown[]): Escalation[] {
+    return raw.filter((e): e is Record<string, unknown> => !!e).map((e) => ({
+      id: String(e.id || "E-000"),
+      timestamp: String(e.timestamp || new Date().toISOString()),
+      type: (e.type as Escalation["type"]) || "specification_ambiguity",
+      phase: String(e.phase || ""),
+      description: String(e.description || ""),
+      context: String(e.context || ""),
+      options: Array.isArray(e.options) ? e.options : [],
+      default_option_id: String(e.default_option_id || ""),
+      resolution: (e.resolution as Escalation["resolution"]) || "pending",
+      commit_hash: String(e.commit_hash || ""),
+    }));
+  }
+}
@@ -1,335 +1,11 @@
-import * as fs from "node:fs";
-import * as path from "node:path";
-import * as os from "node:os";
-import {
-  IntelligenceBackend,
-  BackendRequest,
-  BackendResult,
-  BackendType,
-  LLMBackendConfig,
-  TokenUsage,
-  Artifact,
-  emptyTokenUsage,
-  emptyBackendResult,
-} from "./types.js";
-import { AgentName, ModelProfile } from "../types/config.js";
-import { Decision } from "../types/decisions.js";
-import { Escalation } from "../types/escalation.js";
-import { ToolRegistry, ToolCall, ToolResult, ToolDefinition } from "./tool-registry.js";
-
-const MAX_TOOL_ROUNDS = 50;
-
-const PERSONA_TOOL_MAP: Record<string, string> = {
-  read: "readFile",
-  write: "writeFile",
-  edit: "editFile",
-  bash: "runBash",
-  glob: "glob",
-  grep: "grep",
-};
-
-export abstract class OllamaBaseBackend implements IntelligenceBackend {
-  abstract readonly name: string;
-  readonly type: BackendType = "llm";
-
-  protected config: LLMBackendConfig;
-  protected projectPath: string;
-  protected filteredToolSchema: Array<Record<string, unknown>> | null = null;
+import { LLMBaseBackend, ChatMessage, ChatCompletionResponse } from "./llm-base.js";
+import { LLMBackendConfig } from "./types.js";
+import { ModelProfile } from "../types/config.js";
+import { ToolRegistry } from "./tool-registry.js";

+export abstract class OllamaBaseBackend extends LLMBaseBackend {
  constructor(config: LLMBackendConfig | undefined) {
-    this.config = config || { base_url: "http://localhost:11434", model_profile: "balanced" };
-    this.projectPath = process.cwd();
-  }
-
-  abstract isAvailable(): Promise<boolean>;
-
-  async execute(request: BackendRequest): Promise<BackendResult> {
-    const startTime = Date.now();
-
-    try {
-      const personaContent = this.loadPersona(request.persona);
-      const workflowContent = this.loadWorkflow(request.workflow);
-      const model = this.resolveModel();
-
-      const toolRegistry = new ToolRegistry(request.context.project_path);
-      const allowedTools = this.parsePersonaTools(personaContent);
-      const filteredDefinitions = this.filterToolDefinitions(toolRegistry.getDefinitions(), allowedTools);
-      this.filteredToolSchema = this.definitionsToOpenAISchema(filteredDefinitions);
-
-      const messages: OllamaMessage[] = [];
-      messages.push({
-        role: "system",
-        content: this.buildSystemPrompt(personaContent, workflowContent, request),
-      });
-      messages.push({
-        role: "user",
-        content: request.task,
-      });
-
-      let totalInputTokens = 0;
-      let totalOutputTokens = 0;
-      let round = 0;
-      const allArtifacts: Artifact[] = [];
-      const allDecisions: Decision[] = [];
-      const allEscalations: Escalation[] = [];
-
-      while (round < MAX_TOOL_ROUNDS) {
-        round++;
-        const response = await this.callModelWithTools(messages, model, filteredDefinitions);
-
-        totalInputTokens += response.usage?.prompt_tokens || 0;
-        totalOutputTokens += response.usage?.completion_tokens || 0;
-
-        const assistantContent = response.choices?.[0]?.message?.content || "";
-        const toolCalls = response.choices?.[0]?.message?.tool_calls;
-
-        messages.push({
-          role: "assistant",
-          content: assistantContent,
-          tool_calls: toolCalls,
-        });
-
-        if (!toolCalls || toolCalls.length === 0) {
-          return this.parseFinalResponse(assistantContent, allArtifacts, allDecisions, allEscalations, {
-            input_tokens: totalInputTokens,
-            output_tokens: totalOutputTokens,
-            total_tokens: totalInputTokens + totalOutputTokens,
-            estimated_cost_usd: 0,
-          });
-        }
-
-        for (const toolCall of toolCalls) {
-          const call: ToolCall = {
-            name: toolCall.function.name,
-            arguments: JSON.parse(toolCall.function.arguments),
-          };
-          const result = toolRegistry.execute(call);
-          messages.push({
-            role: "tool",
-            name: call.name,
-            content: result.content,
-          });
-
-          if (call.name === "writeFile" && !result.isError) {
-            allArtifacts.push({
-              path: String(call.arguments.path),
-              content: String(call.arguments.content),
-              operation: "create",
-            });
-          }
-        }
-      }
-
-      const finalContent = messages
-        .filter((m) => m.role === "assistant" && m.content)
-        .map((m) => m.content)
-        .join("\n");
-
-      return this.parseFinalResponse(
-        `Tool loop reached maximum rounds (${MAX_TOOL_ROUNDS}). Partial progress:\n${finalContent}`,
-        allArtifacts,
-        allDecisions,
-        allEscalations,
-        { input_tokens: totalInputTokens, output_tokens: totalOutputTokens, total_tokens: totalInputTokens + totalOutputTokens, estimated_cost_usd: 0 }
-      );
-    } catch (err) {
-      return emptyBackendResult(`Backend execution failed: ${err instanceof Error ? err.message : String(err)}`);
-    }
-  }
-
-  protected parsePersonaTools(personaContent: string): string[] | null {
-    const frontmatterMatch = personaContent.match(/^---\n([\s\S]*?)\n---/);
-    if (!frontmatterMatch) return null;
-
-    const frontmatter = frontmatterMatch[1];
-    const toolsMatch = frontmatter.match(/tools:\s*\n((?:\s+\w+:.+\n?)+)/);
-    if (!toolsMatch) {
-      const inlineMatch = frontmatter.match(/tools:\s*\[([^\]]+)\]/);
-      if (inlineMatch) {
-        return inlineMatch[1]
-          .split(",")
-          .map((t) => t.trim())
-          .filter(Boolean)
-          .map((t) => PERSONA_TOOL_MAP[t] || t);
-      }
-      return null;
-    }
-
-    const toolsBlock = toolsMatch[1];
-    const toolNames: string[] = [];
-    const lineRegex = /^\s+(\w+):/gm;
-    let lineMatch;
-    while ((lineMatch = lineRegex.exec(toolsBlock)) !== null) {
-      const personaToolName = lineMatch[1];
-      toolNames.push(PERSONA_TOOL_MAP[personaToolName] || personaToolName);
-    }
-
-    return toolNames.length > 0 ? toolNames : null;
-  }
-
-  protected filterToolDefinitions(definitions: ToolDefinition[], allowedTools: string[] | null): ToolDefinition[] {
-    if (!allowedTools) return definitions;
-    const allowedSet = new Set(allowedTools);
-    return definitions.filter((def) => allowedSet.has(def.name));
-  }
-
-  protected async callModelWithTools(
-    messages: OllamaMessage[],
-    model: string,
-    toolDefinitions: ToolDefinition[]
-  ): Promise<OllamaChatResponse> {
-    return this.callModel(messages, model, new ToolRegistry(this.projectPath));
-  }
-
-  protected definitionsToOpenAISchema(definitions: ToolDefinition[]): Array<Record<string, unknown>> {
-    return definitions.map((def) => ({
-      type: "function",
-      function: {
-        name: def.name,
-        description: def.description,
-        parameters: def.parameters,
-      },
-    }));
-  }
-
-  protected getActiveToolSchema(toolRegistry: ToolRegistry): Array<Record<string, unknown>> {
-    return this.filteredToolSchema || toolRegistry.getOpenAIToolSchema();
-  }
-
-  protected abstract callModel(
-    messages: OllamaMessage[],
-    model: string,
-    toolRegistry: ToolRegistry
-  ): Promise<OllamaChatResponse>;
-
-  protected abstract resolveModel(): string;
-
-  protected buildSystemPrompt(persona: string, workflow: string, request: BackendRequest): string {
-    const parts = [persona];
-    if (workflow) {
-      parts.push("", "## Workflow Instructions", workflow);
-    }
-    parts.push(
-      "",
-      "## Execution Context",
-      `Autonomy level: ${request.autonomy}`,
-      `Project path: ${request.context.project_path}`,
-      `Phase: ${request.context.phase}`,
-      `Stage: ${request.context.stage}`,
-      "",
-      "## Output Format",
-      "When you have completed your task, output a JSON object with this structure:",
-      "```json",
-      '{',
-      '  "success": true,',
-      '  "output": "Summary of what was accomplished",',
-      '  "artifacts": [{"path": "file/path", "content": "...", "operation": "create"}],',
-      '  "decisions": [{"id": "D-NNN", "decision": "what", "rationale": "why", "confidence": 0.85, "category": "general", "alternatives_considered": [], "human_override": null, "timestamp": ""}],',
-      '  "escalations": []',
-      '}',
-      "```"
-    );
-    return parts.join("\n");
-  }
-
-  protected loadPersona(persona: AgentName): string {
-    const candidates = [
-      path.join(os.homedir(), ".config", "opencode", "agents", `ci-${persona}.md`),
-      path.join(process.cwd(), "opencode", "agents", `ci-${persona}.md`),
-    ];
-    for (const candidate of candidates) {
-      if (fs.existsSync(candidate)) {
-        return fs.readFileSync(candidate, "utf-8");
-      }
-    }
-    return `You are the CIAgent ${persona} agent. Execute the requested task thoroughly and autonomously.`;
-  }
-
-  protected loadWorkflow(workflow: string): string {
-    const candidates = [
-      path.join(os.homedir(), ".config", "opencode", "ci", "workflows", `${workflow}.md`),
-      path.join(process.cwd(), "opencode", "workflows", `${workflow}.md`),
-    ];
-    for (const candidate of candidates) {
-      if (fs.existsSync(candidate)) {
-        return fs.readFileSync(candidate, "utf-8");
-      }
-    }
-    return "";
-  }
-
-  protected parseFinalResponse(
-    content: string,
-    artifacts: Artifact[],
-    decisions: Decision[],
-    escalations: Escalation[],
-    usage: TokenUsage
-  ): BackendResult {
-    const jsonMatch = content.match(/\{[\s\S]*"success"[\s\S]*\}/);
-    if (jsonMatch) {
-      try {
-        const parsed = JSON.parse(jsonMatch[0]);
-        return {
-          success: parsed.success ?? true,
-          output: parsed.output || content,
-          artifacts: parsed.artifacts?.length ? this.parseArtifacts(parsed.artifacts) : artifacts,
-          decisions: parsed.decisions?.length ? this.parseDecisions(parsed.decisions) : decisions,
-          escalations: parsed.escalations?.length ? this.parseEscalations(parsed.escalations) : escalations,
-          usage,
-        };
-      } catch {}
-    }
-
-    return {
-      success: true,
-      output: content,
-      artifacts,
-      decisions,
-      escalations,
-      usage,
-    };
-  }
-
-  private parseArtifacts(raw: unknown[]): Artifact[] {
-    return raw.filter((a): a is Record<string, unknown> => !!a).map((a) => ({
-      path: String(a.path || ""),
-      content: String(a.content || ""),
-      operation: (a.operation as Artifact["operation"]) || "create",
-    }));
-  }
-
-  private parseDecisions(raw: unknown[]): Decision[] {
-    return raw.filter((d): d is Record<string, unknown> => !!d).map((d) => ({
-      id: String(d.id || "D-000"),
-      decision: String(d.decision || ""),
-      rationale: String(d.rationale || ""),
-      confidence: Number(d.confidence || 0.5),
-      category: (d.category as Decision["category"]) || "general",
-      alternatives_considered: Array.isArray(d.alternatives_considered)
-        ? d.alternatives_considered.map((a: unknown) =>
-            typeof a === "string"
-              ? { option: a, rejected_reason: "" }
-              : (a as { option: string; rejected_reason: string })
-          )
-        : [],
-      human_override: d.human_override ? String(d.human_override) : null,
-      timestamp: String(d.timestamp || new Date().toISOString()),
-    }));
-  }
-
-  private parseEscalations(raw: unknown[]): Escalation[] {
-    return raw.filter((e): e is Record<string, unknown> => !!e).map((e) => ({
-      id: String(e.id || "E-000"),
-      timestamp: String(e.timestamp || new Date().toISOString()),
-      type: (e.type as Escalation["type"]) || "specification_ambiguity",
-      phase: String(e.phase || ""),
-      description: String(e.description || ""),
-      context: String(e.context || ""),
-      options: Array.isArray(e.options) ? e.options : [],
-      default_option_id: String(e.default_option_id || ""),
-      resolution: (e.resolution as Escalation["resolution"]) || "pending",
-      commit_hash: String(e.commit_hash || ""),
-    }));
+    super(config || { base_url: "http://localhost:11434", model_profile: "balanced" });
  }

  protected modelProfileToModel(profile: ModelProfile, availableModels: string[]): string {
@@ -359,29 +35,4 @@ export abstract class OllamaBaseBackend implements IntelligenceBackend {
  }
 }

-interface OllamaMessage {
-  role: "system" | "user" | "assistant" | "tool";
-  content: string;
-  name?: string;
-  tool_calls?: Array<{
-    function: { name: string; arguments: string };
-  }>;
-}
-
-interface OllamaChatResponse {
-  choices?: Array<{
-    message: {
-      content: string;
-      tool_calls?: Array<{
-        function: { name: string; arguments: string };
-      }>;
-    };
-  }>;
-  usage?: {
-    prompt_tokens: number;
-    completion_tokens: number;
-    total_tokens: number;
-  };
-}
-
-export { OllamaMessage, OllamaChatResponse };
+export { ChatMessage as OllamaMessage, ChatCompletionResponse as OllamaChatResponse };
@@ -0,0 +1,279 @@
+import { OpenAIBackend } from "../backends/openai.js";
+import { ChatCompletionResponse } from "../backends/llm-base.js";
+
+describe("OpenAIBackend", () => {
+  const originalFetch = globalThis.fetch;
+  let fetchCalls: Array<{ url: string; headers: Record<string, string>; body: string }>;
+
+  beforeEach(() => {
+    fetchCalls = [];
+  });
+
+  afterEach(() => {
+    globalThis.fetch = originalFetch;
+    delete process.env.TEST_OPENAI_KEY;
+    delete process.env.TEST_OPENAI_KEY_EMPTY;
+  });
+
+  function mockFetch(response: ChatCompletionResponse, status = 200): void {
+    globalThis.fetch = ((url: string, init: RequestInit) => {
+      fetchCalls.push({
+        url,
+        headers: (init.headers as Record<string, string>) || {},
+        body: init.body as string,
+      });
+      return Promise.resolve({
+        ok: status >= 200 && status < 300,
+        status,
+        text: () => Promise.resolve(JSON.stringify(response)),
+        json: () => Promise.resolve(response),
+      } as Response);
+    }) as typeof fetch;
+  }
+
+  describe("isAvailable", () => {
+    it("returns true when API key is present", async () => {
+      process.env.TEST_OPENAI_KEY = "sk-test-key-123";
+      const backend = new OpenAIBackend({
+        base_url: "https://api.openai.com/v1",
+        api_key_env: "TEST_OPENAI_KEY",
+        model: "gpt-4o",
+        model_profile: "quality",
+      });
+      expect(await backend.isAvailable()).toBe(true);
+    });
+
+    it("returns false when API key is absent", async () => {
+      const backend = new OpenAIBackend({
+        base_url: "https://api.openai.com/v1",
+        api_key_env: "NONEXISTENT_OPENAI_KEY_VAR_99999",
+        model: "gpt-4o",
+        model_profile: "quality",
+      });
+      expect(await backend.isAvailable()).toBe(false);
+    });
+
+    it("returns false when API key is empty string", async () => {
+      process.env.TEST_OPENAI_KEY_EMPTY = "";
+      const backend = new OpenAIBackend({
+        base_url: "https://api.openai.com/v1",
+        api_key_env: "TEST_OPENAI_KEY_EMPTY",
+        model: "gpt-4o",
+        model_profile: "quality",
+      });
+      expect(await backend.isAvailable()).toBe(false);
+    });
+  });
+
+  describe("resolveModel", () => {
+    it("returns config.model when set", async () => {
+      process.env.TEST_OPENAI_KEY = "sk-test";
+      mockFetch({
+        choices: [{ message: { content: '{"success": true, "output": "done"}' } }],
+        usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
+      });
+      const backend = new OpenAIBackend({
+        base_url: "https://api.openai.com/v1",
+        api_key_env: "TEST_OPENAI_KEY",
+        model: "gpt-4o-mini",
+        model_profile: "speed",
+      });
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "test",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+      await backend.execute(request);
+      const body = JSON.parse(fetchCalls[0].body);
+      expect(body.model).toBe("gpt-4o-mini");
+    });
+
+    it("defaults to gpt-4o when model not specified", async () => {
+      process.env.TEST_OPENAI_KEY = "sk-test";
+      mockFetch({
+        choices: [{ message: { content: '{"success": true, "output": "done"}' } }],
+        usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
+      });
+      const backend = new OpenAIBackend({
+        base_url: "https://api.openai.com/v1",
+        api_key_env: "TEST_OPENAI_KEY",
+        model: "",
+        model_profile: "quality",
+      });
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "test",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+      await backend.execute(request);
+      const body = JSON.parse(fetchCalls[0].body);
+      expect(body.model).toBe("gpt-4o");
+    });
+  });
+
+  describe("callModel request format", () => {
+    it("sends correct URL, Authorization header, and body structure", async () => {
+      process.env.TEST_OPENAI_KEY = "sk-test-key-abc";
+      const mockResponse: ChatCompletionResponse = {
+        choices: [{ message: { content: '{"success": true, "output": "done"}' } }],
+        usage: { prompt_tokens: 10, completion_tokens: 20, total_tokens: 30 },
+      };
+      mockFetch(mockResponse);
+
+      const backend = new OpenAIBackend({
+        base_url: "https://api.openai.com/v1",
+        api_key_env: "TEST_OPENAI_KEY",
+        model: "gpt-4o",
+        model_profile: "quality",
+      });
+
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "Do the thing",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+
+      await backend.execute(request);
+
+      expect(fetchCalls.length).toBe(1);
+      expect(fetchCalls[0].url).toBe("https://api.openai.com/v1/chat/completions");
+      expect(fetchCalls[0].headers["Authorization"]).toBe("Bearer sk-test-key-abc");
+      expect(fetchCalls[0].headers["Content-Type"]).toBe("application/json");
+
+      const body = JSON.parse(fetchCalls[0].body);
+      expect(body.model).toBe("gpt-4o");
+      expect(body.stream).toBe(false);
+      expect(Array.isArray(body.messages)).toBe(true);
+      expect(body.messages.length).toBeGreaterThanOrEqual(2);
+      expect(body.messages[0].role).toBe("system");
+      expect(body.messages[1].role).toBe("user");
+      expect(body.messages[1].content).toBe("Do the thing");
+      expect(Array.isArray(body.tools)).toBe(true);
+    });
+  });
+
+  describe("custom base_url override", () => {
+    it("sends request to custom base_url", async () => {
+      process.env.TEST_OPENAI_KEY = "sk-test";
+      mockFetch({
+        choices: [{ message: { content: '{"success": true, "output": "done"}' } }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      });
+
+      const backend = new OpenAIBackend({
+        base_url: "https://custom-proxy.example.com/api",
+        api_key_env: "TEST_OPENAI_KEY",
+        model: "gpt-4o",
+        model_profile: "quality",
+      });
+
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "test",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+
+      await backend.execute(request);
+      expect(fetchCalls[0].url).toBe("https://custom-proxy.example.com/api/chat/completions");
+    });
+  });
+
+  describe("organization header", () => {
+    it("sends OpenAI-Organization header when config.organization is set", async () => {
+      process.env.TEST_OPENAI_KEY = "sk-test";
+      mockFetch({
+        choices: [{ message: { content: '{"success": true, "output": "done"}' } }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      });
+
+      const backend = new OpenAIBackend({
+        base_url: "https://api.openai.com/v1",
+        api_key_env: "TEST_OPENAI_KEY",
+        model: "gpt-4o",
+        model_profile: "quality",
+        organization: "org-abc123",
+      });
+
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "test",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+
+      await backend.execute(request);
+      expect(fetchCalls[0].headers["OpenAI-Organization"]).toBe("org-abc123");
+    });
+
+    it("does not send OpenAI-Organization header when config.organization is not set", async () => {
+      process.env.TEST_OPENAI_KEY = "sk-test";
+      mockFetch({
+        choices: [{ message: { content: '{"success": true, "output": "done"}' } }],
+        usage: { prompt_tokens: 1, completion_tokens: 1, total_tokens: 2 },
+      });
+
+      const backend = new OpenAIBackend({
+        base_url: "https://api.openai.com/v1",
+        api_key_env: "TEST_OPENAI_KEY",
+        model: "gpt-4o",
+        model_profile: "quality",
+      });
+
+      const request = {
+        persona: "executor" as const,
+        workflow: "execute",
+        task: "test",
+        context: {
+          project_path: "/tmp",
+          phase: 1,
+          stage: "execute" as const,
+          specification: "",
+          config_path: "",
+        },
+        autonomy: "full" as const,
+      };
+
+      await backend.execute(request);
+      expect(fetchCalls[0].headers["OpenAI-Organization"]).toBeUndefined();
+    });
+  });
+});
@@ -0,0 +1,84 @@
+import { LLMBaseBackend, ChatMessage, ChatCompletionResponse } from "./llm-base.js";
+import { BackendType, OpenAIConfig, emptyBackendResult } from "./types.js";
+import { ToolRegistry, ToolDefinition } from "./tool-registry.js";
+
+export class OpenAIBackend extends LLMBaseBackend {
+  readonly name = "openai";
+  readonly type: BackendType = "llm";
+
+  private openaiConfig: OpenAIConfig;
+
+  constructor(config: OpenAIConfig) {
+    super({ ...config, base_url: config.base_url || "https://api.openai.com/v1" });
+    this.openaiConfig = config;
+  }
+
+  async isAvailable(): Promise<boolean> {
+    const key = process.env[this.openaiConfig.api_key_env];
+    return !!key && key.length > 0;
+  }
+
+  protected resolveModel(): string {
+    return this.openaiConfig.model || "gpt-4o";
+  }
+
+  protected async fetchAvailableModels(): Promise<string[]> {
+    return [];
+  }
+
+  protected async callModel(
+    messages: ChatMessage[],
+    model: string,
+    toolRegistry: ToolRegistry
+  ): Promise<ChatCompletionResponse> {
+    const apiKey = process.env[this.openaiConfig.api_key_env];
+    if (!apiKey) {
+      throw new Error(`API key not found. Set ${this.openaiConfig.api_key_env} environment variable.`);
+    }
+
+    const headers: Record<string, string> = {
+      "Content-Type": "application/json",
+      "Authorization": `Bearer ${apiKey}`,
+    };
+    if (this.openaiConfig.organization) {
+      headers["OpenAI-Organization"] = this.openaiConfig.organization;
+    }
+
+    const body: Record<string, unknown> = {
+      model,
+      messages: messages.map((m) => {
+        const msg: Record<string, unknown> = { role: m.role, content: m.content };
+        if (m.name) msg.name = m.name;
+        if (m.tool_calls) msg.tool_calls = m.tool_calls;
+        return msg;
+      }),
+      tools: this.getActiveToolSchema(toolRegistry),
+      stream: false,
+    };
+
+    const timeout = this.openaiConfig.timeout_ms || 60000;
+    const url = `${this.config.base_url}/chat/completions`;
+
+    const response = await fetch(url, {
+      method: "POST",
+      headers,
+      body: JSON.stringify(body),
+      signal: AbortSignal.timeout(timeout),
+    });
+
+    if (response.status === 401 || response.status === 403) {
+      throw new Error(`Authentication failed. Check ${this.openaiConfig.api_key_env} environment variable.`);
+    }
+
+    if (response.status === 429) {
+      throw new Error("Rate limited by OpenAI API. Please retry after a delay.");
+    }
+
+    if (!response.ok) {
+      const errorText = await response.text().catch(() => "unknown error");
+      throw new Error(`OpenAI API error (${response.status}): ${errorText}`);
+    }
+
+    return (await response.json()) as ChatCompletionResponse;
+  }
+}
@@ -115,20 +115,34 @@ export interface OllamaCloudConfig extends LLMBackendConfig {
  timeout_ms?: number;
 }

+export interface OpenAIConfig extends LLMBackendConfig {
+  api_key_env: string;
+  model: string;
+  organization?: string;
+}
+
+export interface AnthropicConfig extends LLMBackendConfig {
+  api_key_env: string;
+  model: string;
+  api_version?: string;
+}
+
 export interface OpencodeBackendConfig {
  enabled: boolean;
  executable?: string;
 }

 export interface BackendConfigSection {
-  provider: "auto" | "opencode" | "ollama-local" | "ollama-cloud";
-  fallback?: "opencode" | "ollama-local" | "ollama-cloud";
+  provider: "auto" | "opencode" | "openai" | "ollama-local" | "ollama-cloud" | "anthropic";
+  fallback?: "opencode" | "openai" | "ollama-local" | "ollama-cloud" | "anthropic";
  agent_backends: {
    opencode?: OpencodeBackendConfig;
  };
  llm_backends: {
+    "openai"?: OpenAIConfig;
    "ollama-local"?: OllamaLocalConfig;
    "ollama-cloud"?: OllamaCloudConfig;
+    "anthropic"?: AnthropicConfig;
  };
 }

@@ -138,6 +152,13 @@ export const DEFAULT_BACKEND_CONFIG: BackendConfigSection = {
    opencode: { enabled: true },
  },
  llm_backends: {
+    "openai": {
+      base_url: "https://api.openai.com/v1",
+      api_key_env: "OPENAI_API_KEY",
+      model: "gpt-4o",
+      model_profile: "quality",
+      timeout_ms: 60000,
+    },
    "ollama-local": {
      base_url: "http://localhost:11434",
      model_profile: "balanced",
@@ -148,6 +169,14 @@ export const DEFAULT_BACKEND_CONFIG: BackendConfigSection = {
      model_profile: "quality",
      timeout_ms: 60000,
    },
+    "anthropic": {
+      base_url: "https://api.anthropic.com",
+      api_key_env: "ANTHROPIC_API_KEY",
+      model: "claude-sonnet-4-20250514",
+      api_version: "2023-06-01",
+      model_profile: "quality",
+      timeout_ms: 60000,
+    },
  },
 };

@@ -161,8 +190,10 @@ export class BackendUnavailableError extends Error {
      `Intelligence backend "${backendName}" is not available${agentMsg}. ` +
      `Configure one of:\n` +
      `  1. Install opencode: npm i -g opencode\n` +
-      `  2. Run Ollama locally: ollama serve\n` +
-      `  3. Set OLLAMA_CLOUD_API_KEY for remote inference`
+      `  2. Set OPENAI_API_KEY for OpenAI API access\n` +
+      `  3. Set ANTHROPIC_API_KEY for Anthropic API access\n` +
+      `  4. Run Ollama locally: ollama serve\n` +
+      `  5. Set OLLAMA_CLOUD_API_KEY for remote inference`
    );
    this.name = "BackendUnavailableError";
    this.backendName = backendName;
@@ -184,4 +215,6 @@ export function emptyBackendResult(error?: string): BackendResult {
    usage: emptyTokenUsage(),
    error,
  };
-}
+}
+
+export { ChatMessage, ChatCompletionResponse } from "./llm-base.js";