From bcdb7b3ad067d3eb5be03df78bfe8ac021bfdf92 Mon Sep 17 00:00:00 2001
From: scawful <justin.scofield@outlook.com>
Date: Fri, 3 Oct 2025 22:20:29 -0400
Subject: [PATCH] feat: Implement LLM function calling schemas and enhance
 prompt builder with tool definitions

---
 docs/z3ed/AGENT-ROADMAP.md               | 64 +++++++++--------
 docs/z3ed/E6-z3ed-implementation-plan.md |  7 +-
 docs/z3ed/README.md                      |  9 +--
 src/cli/service/ai/prompt_builder.cc     | 89 ++++++++++++++++++++++--
 src/cli/service/ai/prompt_builder.h      |  3 +
 5 files changed, 130 insertions(+), 42 deletions(-)

diff --git a/docs/z3ed/AGENT-ROADMAP.md b/docs/z3ed/AGENT-ROADMAP.md
index d324a5d7..81213949 100644
--- a/docs/z3ed/AGENT-ROADMAP.md
+++ b/docs/z3ed/AGENT-ROADMAP.md
@@ -130,40 +130,46 @@ We have made significant progress in laying the foundation for the conversationa
 
 ### 🚀 Next Steps (Priority Order)
 
-#### Priority 1: Complete LLM Function Calling Integration (4-6 hours)
+#### Priority 1: Complete LLM Function Calling Integration ✅ COMPLETE (Oct 3, 2025)
 **Goal**: Enable Ollama/Gemini to autonomously invoke read-only tools
 
-1. **Add Tool Definitions to System Prompts** (2 hours)
-   - Generate JSON schema for all 5 tools in `ToolDispatcher`
-   - Inject tool definitions into `PromptBuilder::BuildSystemInstruction()`
-   - Format: OpenAI-compatible function calling format
-   ```json
-   {
-     "name": "resource-list",
-     "description": "List all labeled resources of a given type",
-     "parameters": {
-       "type": "object",
-       "properties": {
-         "type": {"type": "string", "enum": ["dungeon", "sprite", "overworld"]},
-         "format": {"type": "string", "enum": ["table", "json"]}
-       },
-       "required": ["type"]
-     }
-   }
-   ```
+**Completed Tasks:**
+1. ✅ **Tool Schema Generation** - Added `BuildFunctionCallSchemas()` method
+   - Generates OpenAI-compatible function calling schemas from tool specifications
+   - Properly formats parameters with types, descriptions, and examples
+   - Marks required vs optional arguments
+   - **File**: `src/cli/service/ai/prompt_builder.{h,cc}`
 
-2. **Parse Function Calls from LLM Responses** (2 hours)
-   - Update `OllamaAIService::GenerateResponse()` to detect function calls in JSON
-   - Update `GeminiAIService::GenerateResponse()` for Gemini's function calling format
+2. ✅ **System Prompt Enhancement** - Injected tool definitions
+   - Updated `BuildConstraintsSection()` to include tool schemas
+   - Added tool usage guidance (tools for questions, commands for modifications)
+   - Included example tool call in JSON format
+   - **File**: `src/cli/service/ai/prompt_builder.cc`
+
+3. ✅ **LLM Response Parsing** - Already implemented
+   - Both `OllamaAIService` and `GeminiAIService` parse `tool_calls` from JSON
    - Populate `AgentResponse.tool_calls` with parsed ToolCall objects
-   - **File**: `src/cli/service/ai/ollama_ai_service.cc:176-294`
-   - **File**: `src/cli/service/ai/gemini_ai_service.cc:104-285`
+   - **Files**: `src/cli/service/ai/{ollama,gemini}_ai_service.cc`
 
-3. **Test Tool Invocation Round-Trip** (1-2 hours)
-   - Verify LLM can discover available tools from system prompt
-   - Test: "What dungeons are in this ROM?" → should call `resource-list --type dungeon`
-   - Test: "Find all water tiles on map 0" → should call `overworld-find-tile --tile 0x..."`
-   - Create regression test script: `scripts/test_agent_tool_calling.sh`
+4. ✅ **Infrastructure Verification** - Created test scripts
+   - `scripts/test_tool_schemas.sh` - Verifies tool definitions in catalogue
+   - `scripts/test_agent_mock.sh` - Validates component integration
+   - All 5 tools properly defined with arguments and examples
+   - **Status**: Ready for live LLM testing
+
+**What's Working:**
+- ✅ Tool definitions loaded from `assets/agent/prompt_catalogue.yaml`
+- ✅ Function schemas generated in OpenAI format
+- ✅ System prompts include tool definitions with usage guidance
+- ✅ AI services parse tool_calls from LLM responses
+- ✅ ConversationalAgentService dispatches tools via ToolDispatcher
+- ✅ Tools return JSON results that feed back into conversation
+
+**Next Step: Live LLM Testing** (1-2 hours)
+- Test with Ollama: Verify qwen2.5-coder can discover and invoke tools
+- Test with Gemini: Verify Gemini 2.0 generates correct tool_calls
+- Create example prompts that exercise all 5 tools
+- Verify multi-step tool execution (agent asks follow-up questions)
 
 #### Priority 2: Implement GUI Chat Widget (6-8 hours)
 **Goal**: Unified chat experience in YAZE application
diff --git a/docs/z3ed/E6-z3ed-implementation-plan.md b/docs/z3ed/E6-z3ed-implementation-plan.md
index 7ba9eeaa..2445b23b 100644
--- a/docs/z3ed/E6-z3ed-implementation-plan.md
+++ b/docs/z3ed/E6-z3ed-implementation-plan.md
@@ -16,12 +16,11 @@ The z3ed CLI and AI agent workflow system has completed major infrastructure mil
 - **IT-01**: ImGuiTestHarness - Full GUI automation via gRPC + ImGuiTestEngine (all 3 phases complete)
 - **IT-02**: CLI Agent Test - Natural language → automated GUI testing (implementation complete)
 
-**🔄 Active Phase**:
-- **Test Harness Enhancements (IT-05 to IT-09)**: ✅ Core infrastructure complete (IT-05/07/08 shipped, IT-09 CLI tooling complete)
-- **Conversational Agent Implementation**: 🚧 Foundation complete, LLM function calling integration in progress
+**🎯 Active Phase**:
+- **Conversational Agent Implementation**: ✅ Foundation complete, LLM function calling ✅ COMPLETE (Oct 3, 2025)
 
 **📋 Next Phases (Updated Oct 3, 2025)**:
-- **Priority 1**: Complete LLM Function Calling (4-6h) - Add tool schema to prompts, parse function calls
+- **Priority 1**: Live LLM Testing (1-2h) - Verify function calling with Ollama/Gemini
 - **Priority 2**: GUI Chat Widget (6-8h) - Create ImGui widget matching TUI experience
 - **Priority 3**: Expand Tool Coverage (8-10h) - Add dialogue, sprite, region inspection tools
 - **Priority 4**: Widget Discovery API (IT-06) - AI agents enumerate available GUI interactions
diff --git a/docs/z3ed/README.md b/docs/z3ed/README.md
index 1819baa5..f0acee70 100644
--- a/docs/z3ed/README.md
+++ b/docs/z3ed/README.md
@@ -143,14 +143,15 @@ The project is currently focused on implementing a conversational AI agent. See
   - `overworld-list-warps`: Entrance/exit/hole enumeration
 - **AI Service Backends**: ✅ Ollama (local) and Gemini (cloud) operational
 - **Enhanced Prompting**: ✅ Resource catalogue loading with system instruction generation
+- **LLM Function Calling**: ✅ Complete - Tool schemas injected into system prompts, response parsing implemented
 
 ### 🔄 In Progress (Priority Order)
-1. **LLM Function Calling**: Partially implemented - needs tool schema injection into prompts
-2. **GUI Chat Widget**: Not yet started - TUI exists, GUI integration pending
-3. **Tool Coverage Expansion**: 5 tools working, 8+ planned (dialogue, sprites, regions)
+1. **Live LLM Testing**: Verify function calling with Ollama/Gemini (1-2h)
+2. **GUI Chat Widget**: Not yet started - TUI exists, GUI integration pending (6-8h)
+3. **Tool Coverage Expansion**: 5 tools working, 8+ planned (dialogue, sprites, regions) (8-10h)
 
 ### 📋 Next Steps (See AGENT-ROADMAP.md for details)
-1. **Complete LLM Function Calling** (4-6h): Add tool definitions to system prompts
+1. **Live LLM Testing** (1-2h): Verify function calling with real Ollama/Gemini
 2. **Implement GUI Chat Widget** (6-8h): Create ImGui widget matching TUI experience
 3. **Expand Tool Coverage** (8-10h): Add dialogue search, sprite info, region queries
 4. **Performance Optimizations** (4-6h): Response caching, token tracking, streaming
diff --git a/src/cli/service/ai/prompt_builder.cc b/src/cli/service/ai/prompt_builder.cc
index 078e820e..5337dacd 100644
--- a/src/cli/service/ai/prompt_builder.cc
+++ b/src/cli/service/ai/prompt_builder.cc
@@ -406,6 +406,57 @@ std::string PromptBuilder::BuildToolReference() const {
   return oss.str();
 }
 
+std::string PromptBuilder::BuildFunctionCallSchemas() const {
+  if (tool_specs_.empty()) {
+    return "[]";
+  }
+
+  nlohmann::json tools_array = nlohmann::json::array();
+
+  for (const auto& spec : tool_specs_) {
+    nlohmann::json tool;
+    tool["type"] = "function";
+    
+    nlohmann::json function;
+    function["name"] = spec.name;
+    function["description"] = spec.description;
+    if (!spec.usage_notes.empty()) {
+      function["description"] = spec.description + " " + spec.usage_notes;
+    }
+
+    nlohmann::json parameters;
+    parameters["type"] = "object";
+    
+    nlohmann::json properties = nlohmann::json::object();
+    nlohmann::json required = nlohmann::json::array();
+
+    for (const auto& arg : spec.arguments) {
+      nlohmann::json arg_schema;
+      arg_schema["type"] = "string";  // All CLI args are strings
+      arg_schema["description"] = arg.description;
+      if (!arg.example.empty()) {
+        arg_schema["example"] = arg.example;
+      }
+      properties[arg.name] = arg_schema;
+      
+      if (arg.required) {
+        required.push_back(arg.name);
+      }
+    }
+
+    parameters["properties"] = properties;
+    if (!required.empty()) {
+      parameters["required"] = required;
+    }
+
+    function["parameters"] = parameters;
+    tool["function"] = function;
+    tools_array.push_back(tool);
+  }
+
+  return tools_array.dump(2);
+}
+
 std::string PromptBuilder::BuildFewShotExamplesSection() const {
   std::ostringstream oss;
 
@@ -460,26 +511,54 @@ std::string PromptBuilder::BuildConstraintsSection() const {
     "reasoning": "Your thought process."
   }
   - `text_response` is for conversational replies.
-  - `tool_calls` is for asking questions about the ROM. Use the available tools.
+  - `tool_calls` is for asking questions about the ROM. Use the available tools listed below.
   - `commands` is for generating commands to modify the ROM.
-  - All fields are optional.
+  - All fields are optional, but you should always provide at least one.
 
-2. **Command Syntax:** Follow the exact syntax shown in examples
+2. **Tool Usage:** When the user asks a question about the ROM state, use tool_calls instead of commands
+  - Tools are read-only and return information
+  - Commands modify the ROM and should only be used when explicitly requested
+  - You can call multiple tools in one response
+  - Always use JSON format for tool results
+
+3. **Command Syntax:** Follow the exact syntax shown in examples
   - Use correct flag names (--group, --id, --to, --from, etc.)
   - Use hex format for colors (0xRRGGBB) and tile IDs (0xNNN)
   - Coordinates are 0-based indices
 
-3. **Common Patterns:**
+4. **Common Patterns:**
   - Palette modifications: export → set-color → import
   - Multiple tile placement: multiple overworld set-tile commands
   - Validation: single rom validate command
 
-4. **Error Prevention:**
+5. **Error Prevention:**
   - Always export before modifying palettes
   - Use temporary file names (temp_*.json) for intermediate files
   - Validate coordinates are within bounds
 )";
 
+  if (!tool_specs_.empty()) {
+    oss << "\n# Available Tools for ROM Inspection\n\n";
+    oss << "You have access to the following tools to answer questions:\n\n";
+    oss << "```json\n";
+    oss << BuildFunctionCallSchemas();
+    oss << "\n```\n\n";
+    oss << "**Tool Call Example:**\n";
+    oss << "```json\n";
+    oss << R"({
+  "text_response": "Let me check the dungeons in this ROM.",
+  "tool_calls": [
+    {
+      "tool_name": "resource-list",
+      "args": {
+        "type": "dungeon"
+      }
+    }
+  ]
+})";
+    oss << "\n```\n";
+  }
+
   if (!tile_reference_.empty()) {
    oss << "\n" << BuildTileReferenceSection();
   }
diff --git a/src/cli/service/ai/prompt_builder.h b/src/cli/service/ai/prompt_builder.h
index cf60de0e..7b9d8d42 100644
--- a/src/cli/service/ai/prompt_builder.h
+++ b/src/cli/service/ai/prompt_builder.h
@@ -86,6 +86,9 @@ class PromptBuilder {
     return tile_reference_;
   }
   
+  // Generate OpenAI-compatible function call schemas (JSON format)
+  std::string BuildFunctionCallSchemas() const;
+  
   // Set verbosity level (0=minimal, 1=standard, 2=verbose)
   void SetVerbosity(int level) { verbosity_ = level; }