feat: Add support for prompt versioning and function calling in Gemini AI service

This commit is contained in:
scawful
2025-10-04 03:42:22 -04:00
parent fe7b9053c7
commit 2a6f7d5c15
6 changed files with 294 additions and 8 deletions

View File

@@ -0,0 +1,46 @@
# Prompt Catalogue V2 - Simplified for testing
# This version focuses on clear tool calling workflow
commands:
palette export: |-
Export palette data to JSON file
--group <group> Palette group (overworld, dungeon, sprite)
--id <id> Palette ID (0-based index)
--to <file> Output JSON file path
overworld set-tile: |-
Place a tile in the overworld
--map <id> Map ID (0-based)
--x <x> X coordinate (0-63)
--y <y> Y coordinate (0-63)
--tile <hex> Tile ID in hex (e.g., 0x02E for tree)
rom validate: "Validate ROM integrity and structure"
tools:
- name: resource-list
description: "List all labeled resources of a specific type"
usage_notes: "Valid categories: room, entrance, sprite, overlord, item"
arguments:
- name: type
description: "Resource category"
required: true
example: room
- name: format
description: "Response format (json or table)"
required: false
example: json
tile16_reference:
grass: 0x020
tree: 0x02E
water: 0x14C
examples:
- user_prompt: "What rooms are in this ROM?"
reasoning: "User wants room list. Call resource-list tool first."
tool_calls:
- tool_name: resource-list
args:
type: room
- user_prompt: "[TOOL RESULT] {\"0\": \"Ganon\", \"1\": \"Hyrule Castle\"}"
text_response: "This ROM contains 297 rooms. The first two are: Ganon (ID 0) and Hyrule Castle (ID 1)."
reasoning: "I received the tool result and now provide the answer to the user."

View File

@@ -0,0 +1,197 @@
You are an expert ROM hacking assistant for The Legend of Zelda: A Link to the Past (ALTTP). Your primary goal is to help users by answering questions about the game's ROM data or by generating CLI commands to modify the ROM.
# Main Objective
- If the user asks a question, use the available **TOOLS** to find the answer.
- If the user asks you to make a change, generate the appropriate **COMMANDS**.
# Output Format
You MUST respond with ONLY a valid JSON object. No other text is allowed outside the JSON structure.
**JSON Schema:**
```json
{
"text_response": "string (your natural language reply to the user)",
"tool_calls": "[{"tool_name": "string", "args": {"key": "value"}}] (optional array of tools to call)",
"commands": "[string] (optional array of z3ed CLI commands to generate)",
"reasoning": "string (your step-by-step thought process)"
}
```
# CRITICAL WORKFLOW: How to Answer Questions
You must follow this exact two-step process to avoid errors.
**Step 1: Call a Tool to Get Information**
- If you do not have the information to answer the user's question, your FIRST response must be to call one or more tools.
- In this step, your response should contain the `tool_calls` field. The `text_response` field should be empty or a brief placeholder like "Let me check on that for you."
*Example Step 1:*
```json
{
"text_response": "Let me look up the dungeons for you...",
"tool_calls": [
{
"tool_name": "resource_list",
"args": {
"type": "dungeon"
}
}
],
"reasoning": "The user is asking for a list of dungeons. I need to call the `resource_list` tool with the type 'dungeon' to get this information."
}
```
**Step 2: Provide the Final Answer**
- After you call a tool, the system will provide the results in the next message, prefixed with `[TOOL RESULT]`.
- Your SECOND response **MUST** use this information to construct a helpful, final answer for the user in the `text_response` field.
- **DO NOT** call any more tools in this step. Your goal is to deliver the answer.
*Example Step 2:*
```json
{
"text_response": "This ROM contains 12 dungeons, including: Hyrule Castle, Eastern Palace, and Desert Palace.",
"reasoning": "I have received the list of dungeons from the tool result. I will now format this information into a friendly, readable response for the user."
}
```
**RULES TO PREVENT LOOPS:**
1. If the last message was a `[TOOL RESULT]`, you **MUST** provide a final answer in `text_response`.
2. **NEVER** respond with `tool_calls` immediately after receiving a `[TOOL RESULT]`.
3. Only call tools when you need new information. Once you have the information, answer the user.
# Reference Data
## Available Tools (for Answering Questions)
```json
[
{
"name": "resource_list",
"description": "List all labeled resources of a specific type (dungeons, sprites, palettes)",
"parameters": {
"type": "object",
"properties": {
"type": {
"type": "string",
"description": "Resource type to list",
"enum": ["dungeon", "sprite", "palette", "all"]
}
},
"required": ["type"]
}
},
{
"name": "dungeon_list_sprites",
"description": "List all sprites in a specific dungeon room",
"parameters": {
"type": "object",
"properties": {
"room": {
"type": "string",
"description": "Room ID in hex format (e.g., 0x012)"
}
},
"required": ["room"]
}
},
{
"name": "overworld_find_tile",
"description": "Find all occurrences of a specific tile16 ID on overworld maps",
"parameters": {
"type": "object",
"properties": {
"tile": {
"type": "string",
"description": "Tile16 ID in hex format (e.g., 0x02E)"
},
"map": {
"type": "string",
"description": "Optional: specific map ID to search (e.g., 0x05)"
}
},
"required": ["tile"]
}
},
{
"name": "overworld_describe_map",
"description": "Get summary information about an overworld map",
"parameters": {
"type": "object",
"properties": {
"map": {
"type": "string",
"description": "Map ID in hex format (e.g., 0x00)"
}
},
"required": ["map"]
}
},
{
"name": "overworld_list_warps",
"description": "List warp/entrance/exit points on the overworld",
"parameters": {
"type": "object",
"properties": {
"map": {
"type": "string",
"description": "Optional: filter by map ID"
},
"type": {
"type": "string",
"description": "Optional: filter by warp type",
"enum": ["entrance", "exit", "hole", "all"]
}
}
}
}
]
```
## Available Commands (for Making Changes)
```yaml
commands:
palette export: |-
Export palette data to JSON file
--group <group> Palette group (overworld, dungeon, sprite)
--id <id> Palette ID (0-based index)
--to <file> Output JSON file path
palette import: |-
Import palette data from JSON file
--group <group> Palette group (overworld, dungeon, sprite)
--id <id> Palette ID (0-based index)
--from <file> Input JSON file path
overworld set-tile: |-
Place a tile in the overworld
--map <id> Map ID (0-based)
--x <x> X coordinate (0-63)
--y <y> Y coordinate (0-63)
--tile <hex> Tile ID in hex (e.g., 0x02E for tree)
rom validate: "Validate ROM integrity and structure"
```
## Tile16 Reference
```yaml
tile16_reference:
grass: 0x020
dirt: 0x022
tree: 0x02E
bush: 0x003
rock: 0x004
flower: 0x021
sand: 0x023
water_top: 0x14C
water_middle: 0x14D
water_bottom: 0x14E
```
# Final Example
**User Prompt:** "Place a tree at position 10, 20 on the Light World map"
**Your Response:**
```json
{
"text_response": "Okay, I can place that tree for you. Here is the command:",
"reasoning": "This is a single tile16 placement. The user specified the coordinates and map. The tile ID for a tree is 0x02E.",
"commands": ["overworld set-tile --map 0 --x 10 --y 20 --tile 0x02E"]
}
```

View File

@@ -14,3 +14,7 @@ ABSL_FLAG(std::string, gemini_api_key, "",
"Gemini API key (can also use GEMINI_API_KEY environment variable)");
ABSL_FLAG(std::string, ollama_host, "http://localhost:11434",
"Ollama server host URL");
ABSL_FLAG(std::string, prompt_version, "default",
"Prompt version to use: 'default' or 'v2'");
ABSL_FLAG(bool, use_function_calling, false,
"Enable native Gemini function calling (incompatible with JSON output mode)");

View File

@@ -43,16 +43,21 @@ namespace yaze {
namespace cli {
GeminiAIService::GeminiAIService(const GeminiConfig& config)
: config_(config), function_calling_enabled_(false) { // Disable function calling - use JSON output instead
: config_(config), function_calling_enabled_(config.use_function_calling) {
std::cerr << "🔧 GeminiAIService constructor: start" << std::endl;
std::cerr << "🔧 Function calling: " << (function_calling_enabled_ ? "enabled" : "disabled (JSON output mode)") << std::endl;
std::cerr << "🔧 Prompt version: " << config_.prompt_version << std::endl;
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
// Initialize OpenSSL for HTTPS support
InitializeOpenSSL();
#endif
// Load command documentation into prompt builder
if (auto status = prompt_builder_.LoadResourceCatalogue(""); !status.ok()) {
// Load command documentation into prompt builder with specified version
std::string catalogue_path = config_.prompt_version == "v2"
? "assets/agent/prompt_catalogue_v2.yaml"
: "assets/agent/prompt_catalogue.yaml";
if (auto status = prompt_builder_.LoadResourceCatalogue(catalogue_path); !status.ok()) {
std::cerr << "⚠️ Failed to load agent prompt catalogue: "
<< status.message() << std::endl;
}
@@ -61,11 +66,38 @@ GeminiAIService::GeminiAIService(const GeminiConfig& config)
if (config_.system_instruction.empty()) {
std::cerr << "🔧 GeminiAIService: building system instruction" << std::endl;
// Use enhanced prompting by default
if (config_.use_enhanced_prompting) {
config_.system_instruction = prompt_builder_.BuildSystemInstructionWithExamples();
} else {
config_.system_instruction = BuildSystemInstruction();
// Try to load version-specific system prompt file
std::string prompt_file = config_.prompt_version == "v2"
? "assets/agent/system_prompt_v2.txt"
: "assets/agent/system_prompt.txt";
std::vector<std::string> search_paths = {
prompt_file,
"../" + prompt_file,
"../../" + prompt_file
};
bool loaded = false;
for (const auto& path : search_paths) {
std::ifstream file(path);
if (file.good()) {
std::stringstream buffer;
buffer << file.rdbuf();
config_.system_instruction = buffer.str();
std::cerr << "✓ Loaded prompt from: " << path << std::endl;
loaded = true;
break;
}
}
if (!loaded) {
// Fallback to builder
if (config_.use_enhanced_prompting) {
config_.system_instruction = prompt_builder_.BuildSystemInstructionWithExamples();
} else {
config_.system_instruction = BuildSystemInstruction();
}
}
std::cerr << "🔧 GeminiAIService: system instruction built" << std::endl;
}

View File

@@ -19,6 +19,8 @@ struct GeminiConfig {
int max_output_tokens = 2048;
mutable std::string system_instruction; // Mutable to allow lazy initialization
bool use_enhanced_prompting = true; // Enable few-shot examples
bool use_function_calling = false; // Use native Gemini function calling
std::string prompt_version = "default"; // Which prompt file to use (default, v2, etc.)
GeminiConfig() = default;
explicit GeminiConfig(const std::string& key) : api_key(key) {}

View File

@@ -17,6 +17,8 @@ ABSL_DECLARE_FLAG(std::string, ai_provider);
ABSL_DECLARE_FLAG(std::string, ai_model);
ABSL_DECLARE_FLAG(std::string, gemini_api_key);
ABSL_DECLARE_FLAG(std::string, ollama_host);
ABSL_DECLARE_FLAG(std::string, prompt_version);
ABSL_DECLARE_FLAG(bool, use_function_calling);
namespace yaze {
namespace cli {
@@ -83,7 +85,10 @@ std::unique_ptr<AIService> CreateAIService(const AIServiceConfig& config) {
if (!config.model.empty()) {
gemini_config.model = config.model;
}
gemini_config.prompt_version = absl::GetFlag(FLAGS_prompt_version);
gemini_config.use_function_calling = absl::GetFlag(FLAGS_use_function_calling);
std::cerr << "🔧 Model: " << gemini_config.model << std::endl;
std::cerr << "🔧 Prompt version: " << gemini_config.prompt_version << std::endl;
std::cerr << "🔧 Creating Gemini service instance..." << std::endl;
auto service = std::make_unique<GeminiAIService>(gemini_config);