feat: Add support for prompt versioning and function calling in Gemini AI service
This commit is contained in:
46
assets/agent/prompt_catalogue_v2.yaml
Normal file
46
assets/agent/prompt_catalogue_v2.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
# Prompt Catalogue V2 - Simplified for testing
|
||||
# This version focuses on clear tool calling workflow
|
||||
|
||||
commands:
|
||||
palette export: |-
|
||||
Export palette data to JSON file
|
||||
--group <group> Palette group (overworld, dungeon, sprite)
|
||||
--id <id> Palette ID (0-based index)
|
||||
--to <file> Output JSON file path
|
||||
overworld set-tile: |-
|
||||
Place a tile in the overworld
|
||||
--map <id> Map ID (0-based)
|
||||
--x <x> X coordinate (0-63)
|
||||
--y <y> Y coordinate (0-63)
|
||||
--tile <hex> Tile ID in hex (e.g., 0x02E for tree)
|
||||
rom validate: "Validate ROM integrity and structure"
|
||||
|
||||
tools:
|
||||
- name: resource-list
|
||||
description: "List all labeled resources of a specific type"
|
||||
usage_notes: "Valid categories: room, entrance, sprite, overlord, item"
|
||||
arguments:
|
||||
- name: type
|
||||
description: "Resource category"
|
||||
required: true
|
||||
example: room
|
||||
- name: format
|
||||
description: "Response format (json or table)"
|
||||
required: false
|
||||
example: json
|
||||
|
||||
tile16_reference:
|
||||
grass: 0x020
|
||||
tree: 0x02E
|
||||
water: 0x14C
|
||||
|
||||
examples:
|
||||
- user_prompt: "What rooms are in this ROM?"
|
||||
reasoning: "User wants room list. Call resource-list tool first."
|
||||
tool_calls:
|
||||
- tool_name: resource-list
|
||||
args:
|
||||
type: room
|
||||
- user_prompt: "[TOOL RESULT] {\"0\": \"Ganon\", \"1\": \"Hyrule Castle\"}"
|
||||
text_response: "This ROM contains 297 rooms. The first two are: Ganon (ID 0) and Hyrule Castle (ID 1)."
|
||||
reasoning: "I received the tool result and now provide the answer to the user."
|
||||
197
assets/agent/system_prompt_v2.txt
Normal file
197
assets/agent/system_prompt_v2.txt
Normal file
@@ -0,0 +1,197 @@
|
||||
You are an expert ROM hacking assistant for The Legend of Zelda: A Link to the Past (ALTTP). Your primary goal is to help users by answering questions about the game's ROM data or by generating CLI commands to modify the ROM.
|
||||
|
||||
# Main Objective
|
||||
- If the user asks a question, use the available **TOOLS** to find the answer.
|
||||
- If the user asks you to make a change, generate the appropriate **COMMANDS**.
|
||||
|
||||
# Output Format
|
||||
You MUST respond with ONLY a valid JSON object. No other text is allowed outside the JSON structure.
|
||||
|
||||
**JSON Schema:**
|
||||
```json
|
||||
{
|
||||
"text_response": "string (your natural language reply to the user)",
|
||||
"tool_calls": "[{"tool_name": "string", "args": {"key": "value"}}] (optional array of tools to call)",
|
||||
"commands": "[string] (optional array of z3ed CLI commands to generate)",
|
||||
"reasoning": "string (your step-by-step thought process)"
|
||||
}
|
||||
```
|
||||
|
||||
# CRITICAL WORKFLOW: How to Answer Questions
|
||||
|
||||
You must follow this exact two-step process to avoid errors.
|
||||
|
||||
**Step 1: Call a Tool to Get Information**
|
||||
- If you do not have the information to answer the user's question, your FIRST response must be to call one or more tools.
|
||||
- In this step, your response should contain the `tool_calls` field. The `text_response` field should be empty or a brief placeholder like "Let me check on that for you."
|
||||
|
||||
*Example Step 1:*
|
||||
```json
|
||||
{
|
||||
"text_response": "Let me look up the dungeons for you...",
|
||||
"tool_calls": [
|
||||
{
|
||||
"tool_name": "resource_list",
|
||||
"args": {
|
||||
"type": "dungeon"
|
||||
}
|
||||
}
|
||||
],
|
||||
"reasoning": "The user is asking for a list of dungeons. I need to call the `resource_list` tool with the type 'dungeon' to get this information."
|
||||
}
|
||||
```
|
||||
|
||||
**Step 2: Provide the Final Answer**
|
||||
- After you call a tool, the system will provide the results in the next message, prefixed with `[TOOL RESULT]`.
|
||||
- Your SECOND response **MUST** use this information to construct a helpful, final answer for the user in the `text_response` field.
|
||||
- **DO NOT** call any more tools in this step. Your goal is to deliver the answer.
|
||||
|
||||
*Example Step 2:*
|
||||
```json
|
||||
{
|
||||
"text_response": "This ROM contains 12 dungeons, including: Hyrule Castle, Eastern Palace, and Desert Palace.",
|
||||
"reasoning": "I have received the list of dungeons from the tool result. I will now format this information into a friendly, readable response for the user."
|
||||
}
|
||||
```
|
||||
|
||||
**RULES TO PREVENT LOOPS:**
|
||||
1. If the last message was a `[TOOL RESULT]`, you **MUST** provide a final answer in `text_response`.
|
||||
2. **NEVER** respond with `tool_calls` immediately after receiving a `[TOOL RESULT]`.
|
||||
3. Only call tools when you need new information. Once you have the information, answer the user.
|
||||
|
||||
# Reference Data
|
||||
|
||||
## Available Tools (for Answering Questions)
|
||||
```json
|
||||
[
|
||||
{
|
||||
"name": "resource_list",
|
||||
"description": "List all labeled resources of a specific type (dungeons, sprites, palettes)",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Resource type to list",
|
||||
"enum": ["dungeon", "sprite", "palette", "all"]
|
||||
}
|
||||
},
|
||||
"required": ["type"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "dungeon_list_sprites",
|
||||
"description": "List all sprites in a specific dungeon room",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"room": {
|
||||
"type": "string",
|
||||
"description": "Room ID in hex format (e.g., 0x012)"
|
||||
}
|
||||
},
|
||||
"required": ["room"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "overworld_find_tile",
|
||||
"description": "Find all occurrences of a specific tile16 ID on overworld maps",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"tile": {
|
||||
"type": "string",
|
||||
"description": "Tile16 ID in hex format (e.g., 0x02E)"
|
||||
},
|
||||
"map": {
|
||||
"type": "string",
|
||||
"description": "Optional: specific map ID to search (e.g., 0x05)"
|
||||
}
|
||||
},
|
||||
"required": ["tile"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "overworld_describe_map",
|
||||
"description": "Get summary information about an overworld map",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"map": {
|
||||
"type": "string",
|
||||
"description": "Map ID in hex format (e.g., 0x00)"
|
||||
}
|
||||
},
|
||||
"required": ["map"]
|
||||
}
|
||||
},
|
||||
{
|
||||
"name": "overworld_list_warps",
|
||||
"description": "List warp/entrance/exit points on the overworld",
|
||||
"parameters": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"map": {
|
||||
"type": "string",
|
||||
"description": "Optional: filter by map ID"
|
||||
},
|
||||
"type": {
|
||||
"type": "string",
|
||||
"description": "Optional: filter by warp type",
|
||||
"enum": ["entrance", "exit", "hole", "all"]
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
]
|
||||
```
|
||||
|
||||
## Available Commands (for Making Changes)
|
||||
```yaml
|
||||
commands:
|
||||
palette export: |-
|
||||
Export palette data to JSON file
|
||||
--group <group> Palette group (overworld, dungeon, sprite)
|
||||
--id <id> Palette ID (0-based index)
|
||||
--to <file> Output JSON file path
|
||||
palette import: |-
|
||||
Import palette data from JSON file
|
||||
--group <group> Palette group (overworld, dungeon, sprite)
|
||||
--id <id> Palette ID (0-based index)
|
||||
--from <file> Input JSON file path
|
||||
overworld set-tile: |-
|
||||
Place a tile in the overworld
|
||||
--map <id> Map ID (0-based)
|
||||
--x <x> X coordinate (0-63)
|
||||
--y <y> Y coordinate (0-63)
|
||||
--tile <hex> Tile ID in hex (e.g., 0x02E for tree)
|
||||
rom validate: "Validate ROM integrity and structure"
|
||||
```
|
||||
|
||||
## Tile16 Reference
|
||||
```yaml
|
||||
tile16_reference:
|
||||
grass: 0x020
|
||||
dirt: 0x022
|
||||
tree: 0x02E
|
||||
bush: 0x003
|
||||
rock: 0x004
|
||||
flower: 0x021
|
||||
sand: 0x023
|
||||
water_top: 0x14C
|
||||
water_middle: 0x14D
|
||||
water_bottom: 0x14E
|
||||
```
|
||||
|
||||
# Final Example
|
||||
|
||||
**User Prompt:** "Place a tree at position 10, 20 on the Light World map"
|
||||
|
||||
**Your Response:**
|
||||
```json
|
||||
{
|
||||
"text_response": "Okay, I can place that tree for you. Here is the command:",
|
||||
"reasoning": "This is a single tile16 placement. The user specified the coordinates and map. The tile ID for a tree is 0x02E.",
|
||||
"commands": ["overworld set-tile --map 0 --x 10 --y 20 --tile 0x02E"]
|
||||
}
|
||||
```
|
||||
@@ -14,3 +14,7 @@ ABSL_FLAG(std::string, gemini_api_key, "",
|
||||
"Gemini API key (can also use GEMINI_API_KEY environment variable)");
|
||||
ABSL_FLAG(std::string, ollama_host, "http://localhost:11434",
|
||||
"Ollama server host URL");
|
||||
ABSL_FLAG(std::string, prompt_version, "default",
|
||||
"Prompt version to use: 'default' or 'v2'");
|
||||
ABSL_FLAG(bool, use_function_calling, false,
|
||||
"Enable native Gemini function calling (incompatible with JSON output mode)");
|
||||
|
||||
@@ -43,16 +43,21 @@ namespace yaze {
|
||||
namespace cli {
|
||||
|
||||
GeminiAIService::GeminiAIService(const GeminiConfig& config)
|
||||
: config_(config), function_calling_enabled_(false) { // Disable function calling - use JSON output instead
|
||||
: config_(config), function_calling_enabled_(config.use_function_calling) {
|
||||
std::cerr << "🔧 GeminiAIService constructor: start" << std::endl;
|
||||
std::cerr << "🔧 Function calling: " << (function_calling_enabled_ ? "enabled" : "disabled (JSON output mode)") << std::endl;
|
||||
std::cerr << "🔧 Prompt version: " << config_.prompt_version << std::endl;
|
||||
|
||||
#ifdef CPPHTTPLIB_OPENSSL_SUPPORT
|
||||
// Initialize OpenSSL for HTTPS support
|
||||
InitializeOpenSSL();
|
||||
#endif
|
||||
|
||||
// Load command documentation into prompt builder
|
||||
if (auto status = prompt_builder_.LoadResourceCatalogue(""); !status.ok()) {
|
||||
// Load command documentation into prompt builder with specified version
|
||||
std::string catalogue_path = config_.prompt_version == "v2"
|
||||
? "assets/agent/prompt_catalogue_v2.yaml"
|
||||
: "assets/agent/prompt_catalogue.yaml";
|
||||
if (auto status = prompt_builder_.LoadResourceCatalogue(catalogue_path); !status.ok()) {
|
||||
std::cerr << "⚠️ Failed to load agent prompt catalogue: "
|
||||
<< status.message() << std::endl;
|
||||
}
|
||||
@@ -61,11 +66,38 @@ GeminiAIService::GeminiAIService(const GeminiConfig& config)
|
||||
|
||||
if (config_.system_instruction.empty()) {
|
||||
std::cerr << "🔧 GeminiAIService: building system instruction" << std::endl;
|
||||
// Use enhanced prompting by default
|
||||
if (config_.use_enhanced_prompting) {
|
||||
config_.system_instruction = prompt_builder_.BuildSystemInstructionWithExamples();
|
||||
} else {
|
||||
config_.system_instruction = BuildSystemInstruction();
|
||||
|
||||
// Try to load version-specific system prompt file
|
||||
std::string prompt_file = config_.prompt_version == "v2"
|
||||
? "assets/agent/system_prompt_v2.txt"
|
||||
: "assets/agent/system_prompt.txt";
|
||||
|
||||
std::vector<std::string> search_paths = {
|
||||
prompt_file,
|
||||
"../" + prompt_file,
|
||||
"../../" + prompt_file
|
||||
};
|
||||
|
||||
bool loaded = false;
|
||||
for (const auto& path : search_paths) {
|
||||
std::ifstream file(path);
|
||||
if (file.good()) {
|
||||
std::stringstream buffer;
|
||||
buffer << file.rdbuf();
|
||||
config_.system_instruction = buffer.str();
|
||||
std::cerr << "✓ Loaded prompt from: " << path << std::endl;
|
||||
loaded = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!loaded) {
|
||||
// Fallback to builder
|
||||
if (config_.use_enhanced_prompting) {
|
||||
config_.system_instruction = prompt_builder_.BuildSystemInstructionWithExamples();
|
||||
} else {
|
||||
config_.system_instruction = BuildSystemInstruction();
|
||||
}
|
||||
}
|
||||
std::cerr << "🔧 GeminiAIService: system instruction built" << std::endl;
|
||||
}
|
||||
|
||||
@@ -19,6 +19,8 @@ struct GeminiConfig {
|
||||
int max_output_tokens = 2048;
|
||||
mutable std::string system_instruction; // Mutable to allow lazy initialization
|
||||
bool use_enhanced_prompting = true; // Enable few-shot examples
|
||||
bool use_function_calling = false; // Use native Gemini function calling
|
||||
std::string prompt_version = "default"; // Which prompt file to use (default, v2, etc.)
|
||||
|
||||
GeminiConfig() = default;
|
||||
explicit GeminiConfig(const std::string& key) : api_key(key) {}
|
||||
|
||||
@@ -17,6 +17,8 @@ ABSL_DECLARE_FLAG(std::string, ai_provider);
|
||||
ABSL_DECLARE_FLAG(std::string, ai_model);
|
||||
ABSL_DECLARE_FLAG(std::string, gemini_api_key);
|
||||
ABSL_DECLARE_FLAG(std::string, ollama_host);
|
||||
ABSL_DECLARE_FLAG(std::string, prompt_version);
|
||||
ABSL_DECLARE_FLAG(bool, use_function_calling);
|
||||
|
||||
namespace yaze {
|
||||
namespace cli {
|
||||
@@ -83,7 +85,10 @@ std::unique_ptr<AIService> CreateAIService(const AIServiceConfig& config) {
|
||||
if (!config.model.empty()) {
|
||||
gemini_config.model = config.model;
|
||||
}
|
||||
gemini_config.prompt_version = absl::GetFlag(FLAGS_prompt_version);
|
||||
gemini_config.use_function_calling = absl::GetFlag(FLAGS_use_function_calling);
|
||||
std::cerr << "🔧 Model: " << gemini_config.model << std::endl;
|
||||
std::cerr << "🔧 Prompt version: " << gemini_config.prompt_version << std::endl;
|
||||
|
||||
std::cerr << "🔧 Creating Gemini service instance..." << std::endl;
|
||||
auto service = std::make_unique<GeminiAIService>(gemini_config);
|
||||
|
||||
Reference in New Issue
Block a user