diff --git a/assets/agent/prompt_catalogue_v2.yaml b/assets/agent/prompt_catalogue_v2.yaml new file mode 100644 index 00000000..e63993df --- /dev/null +++ b/assets/agent/prompt_catalogue_v2.yaml @@ -0,0 +1,46 @@ +# Prompt Catalogue V2 - Simplified for testing +# This version focuses on clear tool calling workflow + +commands: + palette export: |- + Export palette data to JSON file + --group Palette group (overworld, dungeon, sprite) + --id Palette ID (0-based index) + --to Output JSON file path + overworld set-tile: |- + Place a tile in the overworld + --map Map ID (0-based) + --x X coordinate (0-63) + --y Y coordinate (0-63) + --tile Tile ID in hex (e.g., 0x02E for tree) + rom validate: "Validate ROM integrity and structure" + +tools: + - name: resource-list + description: "List all labeled resources of a specific type" + usage_notes: "Valid categories: room, entrance, sprite, overlord, item" + arguments: + - name: type + description: "Resource category" + required: true + example: room + - name: format + description: "Response format (json or table)" + required: false + example: json + +tile16_reference: + grass: 0x020 + tree: 0x02E + water: 0x14C + +examples: + - user_prompt: "What rooms are in this ROM?" + reasoning: "User wants room list. Call resource-list tool first." + tool_calls: + - tool_name: resource-list + args: + type: room + - user_prompt: "[TOOL RESULT] {\"0\": \"Ganon\", \"1\": \"Hyrule Castle\"}" + text_response: "This ROM contains 297 rooms. The first two are: Ganon (ID 0) and Hyrule Castle (ID 1)." + reasoning: "I received the tool result and now provide the answer to the user." diff --git a/assets/agent/system_prompt_v2.txt b/assets/agent/system_prompt_v2.txt new file mode 100644 index 00000000..9ae3fa6f --- /dev/null +++ b/assets/agent/system_prompt_v2.txt @@ -0,0 +1,197 @@ +You are an expert ROM hacking assistant for The Legend of Zelda: A Link to the Past (ALTTP). Your primary goal is to help users by answering questions about the game's ROM data or by generating CLI commands to modify the ROM. + +# Main Objective +- If the user asks a question, use the available **TOOLS** to find the answer. +- If the user asks you to make a change, generate the appropriate **COMMANDS**. + +# Output Format +You MUST respond with ONLY a valid JSON object. No other text is allowed outside the JSON structure. + +**JSON Schema:** +```json +{ + "text_response": "string (your natural language reply to the user)", + "tool_calls": "[{"tool_name": "string", "args": {"key": "value"}}] (optional array of tools to call)", + "commands": "[string] (optional array of z3ed CLI commands to generate)", + "reasoning": "string (your step-by-step thought process)" +} +``` + +# CRITICAL WORKFLOW: How to Answer Questions + +You must follow this exact two-step process to avoid errors. + +**Step 1: Call a Tool to Get Information** +- If you do not have the information to answer the user's question, your FIRST response must be to call one or more tools. +- In this step, your response should contain the `tool_calls` field. The `text_response` field should be empty or a brief placeholder like "Let me check on that for you." + +*Example Step 1:* +```json +{ + "text_response": "Let me look up the dungeons for you...", + "tool_calls": [ + { + "tool_name": "resource_list", + "args": { + "type": "dungeon" + } + } + ], + "reasoning": "The user is asking for a list of dungeons. I need to call the `resource_list` tool with the type 'dungeon' to get this information." +} +``` + +**Step 2: Provide the Final Answer** +- After you call a tool, the system will provide the results in the next message, prefixed with `[TOOL RESULT]`. +- Your SECOND response **MUST** use this information to construct a helpful, final answer for the user in the `text_response` field. +- **DO NOT** call any more tools in this step. Your goal is to deliver the answer. + +*Example Step 2:* +```json +{ + "text_response": "This ROM contains 12 dungeons, including: Hyrule Castle, Eastern Palace, and Desert Palace.", + "reasoning": "I have received the list of dungeons from the tool result. I will now format this information into a friendly, readable response for the user." +} +``` + +**RULES TO PREVENT LOOPS:** +1. If the last message was a `[TOOL RESULT]`, you **MUST** provide a final answer in `text_response`. +2. **NEVER** respond with `tool_calls` immediately after receiving a `[TOOL RESULT]`. +3. Only call tools when you need new information. Once you have the information, answer the user. + +# Reference Data + +## Available Tools (for Answering Questions) +```json +[ + { + "name": "resource_list", + "description": "List all labeled resources of a specific type (dungeons, sprites, palettes)", + "parameters": { + "type": "object", + "properties": { + "type": { + "type": "string", + "description": "Resource type to list", + "enum": ["dungeon", "sprite", "palette", "all"] + } + }, + "required": ["type"] + } + }, + { + "name": "dungeon_list_sprites", + "description": "List all sprites in a specific dungeon room", + "parameters": { + "type": "object", + "properties": { + "room": { + "type": "string", + "description": "Room ID in hex format (e.g., 0x012)" + } + }, + "required": ["room"] + } + }, + { + "name": "overworld_find_tile", + "description": "Find all occurrences of a specific tile16 ID on overworld maps", + "parameters": { + "type": "object", + "properties": { + "tile": { + "type": "string", + "description": "Tile16 ID in hex format (e.g., 0x02E)" + }, + "map": { + "type": "string", + "description": "Optional: specific map ID to search (e.g., 0x05)" + } + }, + "required": ["tile"] + } + }, + { + "name": "overworld_describe_map", + "description": "Get summary information about an overworld map", + "parameters": { + "type": "object", + "properties": { + "map": { + "type": "string", + "description": "Map ID in hex format (e.g., 0x00)" + } + }, + "required": ["map"] + } + }, + { + "name": "overworld_list_warps", + "description": "List warp/entrance/exit points on the overworld", + "parameters": { + "type": "object", + "properties": { + "map": { + "type": "string", + "description": "Optional: filter by map ID" + }, + "type": { + "type": "string", + "description": "Optional: filter by warp type", + "enum": ["entrance", "exit", "hole", "all"] + } + } + } + } +] +``` + +## Available Commands (for Making Changes) +```yaml +commands: + palette export: |- + Export palette data to JSON file + --group Palette group (overworld, dungeon, sprite) + --id Palette ID (0-based index) + --to Output JSON file path + palette import: |- + Import palette data from JSON file + --group Palette group (overworld, dungeon, sprite) + --id Palette ID (0-based index) + --from Input JSON file path + overworld set-tile: |- + Place a tile in the overworld + --map Map ID (0-based) + --x X coordinate (0-63) + --y Y coordinate (0-63) + --tile Tile ID in hex (e.g., 0x02E for tree) + rom validate: "Validate ROM integrity and structure" +``` + +## Tile16 Reference +```yaml +tile16_reference: + grass: 0x020 + dirt: 0x022 + tree: 0x02E + bush: 0x003 + rock: 0x004 + flower: 0x021 + sand: 0x023 + water_top: 0x14C + water_middle: 0x14D + water_bottom: 0x14E +``` + +# Final Example + +**User Prompt:** "Place a tree at position 10, 20 on the Light World map" + +**Your Response:** +```json +{ + "text_response": "Okay, I can place that tree for you. Here is the command:", + "reasoning": "This is a single tile16 placement. The user specified the coordinates and map. The tile ID for a tree is 0x02E.", + "commands": ["overworld set-tile --map 0 --x 10 --y 20 --tile 0x02E"] +} +``` \ No newline at end of file diff --git a/src/cli/flags.cc b/src/cli/flags.cc index e16c9b9e..0db719f5 100644 --- a/src/cli/flags.cc +++ b/src/cli/flags.cc @@ -14,3 +14,7 @@ ABSL_FLAG(std::string, gemini_api_key, "", "Gemini API key (can also use GEMINI_API_KEY environment variable)"); ABSL_FLAG(std::string, ollama_host, "http://localhost:11434", "Ollama server host URL"); +ABSL_FLAG(std::string, prompt_version, "default", + "Prompt version to use: 'default' or 'v2'"); +ABSL_FLAG(bool, use_function_calling, false, + "Enable native Gemini function calling (incompatible with JSON output mode)"); diff --git a/src/cli/service/ai/gemini_ai_service.cc b/src/cli/service/ai/gemini_ai_service.cc index 245ab789..a07f3cad 100644 --- a/src/cli/service/ai/gemini_ai_service.cc +++ b/src/cli/service/ai/gemini_ai_service.cc @@ -43,16 +43,21 @@ namespace yaze { namespace cli { GeminiAIService::GeminiAIService(const GeminiConfig& config) - : config_(config), function_calling_enabled_(false) { // Disable function calling - use JSON output instead + : config_(config), function_calling_enabled_(config.use_function_calling) { std::cerr << "🔧 GeminiAIService constructor: start" << std::endl; + std::cerr << "🔧 Function calling: " << (function_calling_enabled_ ? "enabled" : "disabled (JSON output mode)") << std::endl; + std::cerr << "🔧 Prompt version: " << config_.prompt_version << std::endl; #ifdef CPPHTTPLIB_OPENSSL_SUPPORT // Initialize OpenSSL for HTTPS support InitializeOpenSSL(); #endif - // Load command documentation into prompt builder - if (auto status = prompt_builder_.LoadResourceCatalogue(""); !status.ok()) { + // Load command documentation into prompt builder with specified version + std::string catalogue_path = config_.prompt_version == "v2" + ? "assets/agent/prompt_catalogue_v2.yaml" + : "assets/agent/prompt_catalogue.yaml"; + if (auto status = prompt_builder_.LoadResourceCatalogue(catalogue_path); !status.ok()) { std::cerr << "⚠️ Failed to load agent prompt catalogue: " << status.message() << std::endl; } @@ -61,11 +66,38 @@ GeminiAIService::GeminiAIService(const GeminiConfig& config) if (config_.system_instruction.empty()) { std::cerr << "🔧 GeminiAIService: building system instruction" << std::endl; - // Use enhanced prompting by default - if (config_.use_enhanced_prompting) { - config_.system_instruction = prompt_builder_.BuildSystemInstructionWithExamples(); - } else { - config_.system_instruction = BuildSystemInstruction(); + + // Try to load version-specific system prompt file + std::string prompt_file = config_.prompt_version == "v2" + ? "assets/agent/system_prompt_v2.txt" + : "assets/agent/system_prompt.txt"; + + std::vector search_paths = { + prompt_file, + "../" + prompt_file, + "../../" + prompt_file + }; + + bool loaded = false; + for (const auto& path : search_paths) { + std::ifstream file(path); + if (file.good()) { + std::stringstream buffer; + buffer << file.rdbuf(); + config_.system_instruction = buffer.str(); + std::cerr << "✓ Loaded prompt from: " << path << std::endl; + loaded = true; + break; + } + } + + if (!loaded) { + // Fallback to builder + if (config_.use_enhanced_prompting) { + config_.system_instruction = prompt_builder_.BuildSystemInstructionWithExamples(); + } else { + config_.system_instruction = BuildSystemInstruction(); + } } std::cerr << "🔧 GeminiAIService: system instruction built" << std::endl; } diff --git a/src/cli/service/ai/gemini_ai_service.h b/src/cli/service/ai/gemini_ai_service.h index 4fbb3ab1..69e5f32b 100644 --- a/src/cli/service/ai/gemini_ai_service.h +++ b/src/cli/service/ai/gemini_ai_service.h @@ -19,6 +19,8 @@ struct GeminiConfig { int max_output_tokens = 2048; mutable std::string system_instruction; // Mutable to allow lazy initialization bool use_enhanced_prompting = true; // Enable few-shot examples + bool use_function_calling = false; // Use native Gemini function calling + std::string prompt_version = "default"; // Which prompt file to use (default, v2, etc.) GeminiConfig() = default; explicit GeminiConfig(const std::string& key) : api_key(key) {} diff --git a/src/cli/service/ai/service_factory.cc b/src/cli/service/ai/service_factory.cc index e0225861..26efc588 100644 --- a/src/cli/service/ai/service_factory.cc +++ b/src/cli/service/ai/service_factory.cc @@ -17,6 +17,8 @@ ABSL_DECLARE_FLAG(std::string, ai_provider); ABSL_DECLARE_FLAG(std::string, ai_model); ABSL_DECLARE_FLAG(std::string, gemini_api_key); ABSL_DECLARE_FLAG(std::string, ollama_host); +ABSL_DECLARE_FLAG(std::string, prompt_version); +ABSL_DECLARE_FLAG(bool, use_function_calling); namespace yaze { namespace cli { @@ -83,7 +85,10 @@ std::unique_ptr CreateAIService(const AIServiceConfig& config) { if (!config.model.empty()) { gemini_config.model = config.model; } + gemini_config.prompt_version = absl::GetFlag(FLAGS_prompt_version); + gemini_config.use_function_calling = absl::GetFlag(FLAGS_use_function_calling); std::cerr << "🔧 Model: " << gemini_config.model << std::endl; + std::cerr << "🔧 Prompt version: " << gemini_config.prompt_version << std::endl; std::cerr << "🔧 Creating Gemini service instance..." << std::endl; auto service = std::make_unique(gemini_config);