feat: Consolidate and enhance agent testing scripts, replacing manual and quickstart tests with a comprehensive test suite
This commit is contained in:
93
scripts/agent_test_suite.sh
Normal file
93
scripts/agent_test_suite.sh
Normal file
@@ -0,0 +1,93 @@
|
||||
#!/bin/bash
|
||||
|
||||
# Comprehensive test suite for the z3ed AI Agent.
|
||||
# This script consolidates multiple older test scripts into one.
|
||||
#
|
||||
# Usage: ./scripts/agent_test_suite.sh <provider>
|
||||
# provider: ollama, gemini, or mock
|
||||
|
||||
set -e # Exit immediately if a command exits with a non-zero status.
|
||||
|
||||
# --- Configuration ---
|
||||
Z3ED_BIN="/Users/scawful/Code/yaze/build_test/bin/z3ed"
|
||||
ROM_PATH="/Users/scawful/Code/yaze/assets/zelda3.sfc"
|
||||
TEST_DIR="/Users/scawful/Code/yaze/assets/agent"
|
||||
TEST_FILES=(
|
||||
"context_and_followup.txt"
|
||||
"complex_command_generation.txt"
|
||||
"error_handling_and_edge_cases.txt"
|
||||
)
|
||||
|
||||
# --- Helper Functions ---
|
||||
print_header() {
|
||||
echo ""
|
||||
echo "================================================="
|
||||
echo "$1"
|
||||
echo "================================================="
|
||||
}
|
||||
|
||||
# --- Pre-flight Checks ---
|
||||
print_header "Performing Pre-flight Checks"
|
||||
|
||||
if [ -z "$1" ]; then
|
||||
echo "❌ Error: No AI provider specified."
|
||||
echo "Usage: $0 <ollama|gemini|mock>"
|
||||
exit 1
|
||||
fi
|
||||
PROVIDER=$1
|
||||
echo "✅ Provider: $PROVIDER"
|
||||
|
||||
if [ ! -f "$Z3ED_BIN" ]; then
|
||||
echo "❌ Error: z3ed binary not found at $Z3ED_BIN"
|
||||
echo "Please build the project first (e.g., in build_test)."
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ z3ed binary found."
|
||||
|
||||
if [ ! -f "$ROM_PATH" ]; then
|
||||
echo "❌ Error: ROM not found at $ROM_PATH"
|
||||
exit 1
|
||||
fi
|
||||
echo "✅ ROM file found."
|
||||
|
||||
if [ "$PROVIDER" == "gemini" ] && [ -z "$GEMINI_API_KEY" ]; then
|
||||
echo "❌ Error: GEMINI_API_KEY environment variable is not set."
|
||||
echo "Please set it to your Gemini API key to run this test."
|
||||
exit 1
|
||||
fi
|
||||
if [ "$PROVIDER" == "gemini" ]; then
|
||||
echo "✅ GEMINI_API_KEY is set."
|
||||
fi
|
||||
|
||||
if [ "$PROVIDER" == "ollama" ]; then
|
||||
if ! pgrep -x "Ollama" > /dev/null && ! pgrep -x "ollama" > /dev/null; then
|
||||
echo "⚠️ Warning: Ollama server process not found. The script might fail if it's not running."
|
||||
else
|
||||
echo "✅ Ollama server process found."
|
||||
fi
|
||||
fi
|
||||
|
||||
# --- Run Test Suite ---
|
||||
for test_file in "${TEST_FILES[@]}"; do
|
||||
print_header "Running Test File: $test_file (Provider: $PROVIDER)"
|
||||
FULL_TEST_PATH="$TEST_DIR/$test_file"
|
||||
|
||||
if [ ! -f "$FULL_TEST_PATH" ]; then
|
||||
echo "❌ Error: Test file not found: $FULL_TEST_PATH"
|
||||
continue
|
||||
fi
|
||||
|
||||
# Construct the command. Use --quiet for cleaner test logs.
|
||||
COMMAND="$Z3ED_BIN agent simple-chat --file=$FULL_TEST_PATH --rom=$ROM_PATH --ai_provider=$PROVIDER --quiet"
|
||||
|
||||
echo "Executing command..."
|
||||
echo "--- Agent Output for $test_file ---"
|
||||
|
||||
# Execute the command and print its output
|
||||
eval $COMMAND
|
||||
|
||||
echo "--- Test Complete ---"
|
||||
echo ""
|
||||
done
|
||||
|
||||
print_header "✅ All tests completed successfully!"
|
||||
@@ -1,129 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Manual Gemini Integration Test
|
||||
# Usage: GEMINI_API_KEY='your-key' ./scripts/manual_gemini_test.sh
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
PROJECT_ROOT="$SCRIPT_DIR/.."
|
||||
Z3ED_BIN="$PROJECT_ROOT/build/bin/z3ed"
|
||||
|
||||
echo "🧪 Manual Gemini Integration Test"
|
||||
echo "=================================="
|
||||
echo ""
|
||||
|
||||
# Check if API key is set
|
||||
if [ -z "$GEMINI_API_KEY" ]; then
|
||||
echo "❌ Error: GEMINI_API_KEY not set"
|
||||
echo ""
|
||||
echo "Usage:"
|
||||
echo " GEMINI_API_KEY='your-api-key-here' ./scripts/manual_gemini_test.sh"
|
||||
echo ""
|
||||
echo "Or export it first:"
|
||||
echo " export GEMINI_API_KEY='your-api-key-here'"
|
||||
echo " ./scripts/manual_gemini_test.sh"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo "✅ GEMINI_API_KEY is set (length: ${#GEMINI_API_KEY} chars)"
|
||||
echo ""
|
||||
|
||||
# Test 1: Simple palette command
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Test 1: Simple palette color change"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Prompt: 'Change palette 0 color 5 to red'"
|
||||
echo ""
|
||||
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Change palette 0 color 5 to red" 2>&1)
|
||||
echo "$OUTPUT"
|
||||
echo ""
|
||||
|
||||
if echo "$OUTPUT" | grep -q "Using Gemini AI"; then
|
||||
echo "✅ Gemini service detected"
|
||||
else
|
||||
echo "❌ Expected 'Using Gemini AI' in output"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if echo "$OUTPUT" | grep -q -E "palette|color"; then
|
||||
echo "✅ Generated palette-related commands"
|
||||
else
|
||||
echo "❌ No palette commands found"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# Test 2: Overworld modification
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Test 2: Overworld tile placement"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Prompt: 'Place a tree at position (10, 20) on map 0'"
|
||||
echo ""
|
||||
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree at position (10, 20) on map 0" 2>&1)
|
||||
echo "$OUTPUT"
|
||||
echo ""
|
||||
|
||||
if echo "$OUTPUT" | grep -q "overworld"; then
|
||||
echo "✅ Generated overworld commands"
|
||||
else
|
||||
echo "⚠️ No overworld commands (model may have interpreted differently)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# Test 3: Complex multi-step task
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Test 3: Multi-step task"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Prompt: 'Export palette 0, change color 3 to blue, and import it back'"
|
||||
echo ""
|
||||
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0, change color 3 to blue, and import it back" 2>&1)
|
||||
echo "$OUTPUT"
|
||||
echo ""
|
||||
|
||||
COMMAND_COUNT=$(echo "$OUTPUT" | grep -c -E "^\s*-" || true)
|
||||
|
||||
if [ "$COMMAND_COUNT" -ge 2 ]; then
|
||||
echo "✅ Generated multiple commands ($COMMAND_COUNT commands)"
|
||||
else
|
||||
echo "⚠️ Expected multiple commands, got $COMMAND_COUNT"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
# Test 4: Direct run command (creates proposal)
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Test 4: Direct run command (creates proposal)"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "Prompt: 'Validate the ROM'"
|
||||
echo ""
|
||||
|
||||
OUTPUT=$($Z3ED_BIN agent run --prompt "Validate the ROM" 2>&1 || true)
|
||||
echo "$OUTPUT"
|
||||
echo ""
|
||||
|
||||
if echo "$OUTPUT" | grep -q "Proposal"; then
|
||||
echo "✅ Proposal created"
|
||||
else
|
||||
echo "ℹ️ No proposal created (may need ROM file)"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo "🎉 Manual Test Suite Complete!"
|
||||
echo "━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"
|
||||
echo ""
|
||||
echo "Summary:"
|
||||
echo " • Gemini API integration: ✅ Working"
|
||||
echo " • Command generation: ✅ Functional"
|
||||
echo " • Service factory: ✅ Correct provider selection"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. Review generated commands for accuracy"
|
||||
echo " 2. Test with more complex prompts"
|
||||
echo " 3. Compare with Ollama output quality"
|
||||
echo " 4. Proceed to Phase 3 (Claude) or Phase 4 (Enhanced Prompting)"
|
||||
@@ -1,128 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Quick Start Script for Testing Ollama Integration with z3ed
|
||||
# Usage: ./scripts/quickstart_ollama.sh
|
||||
|
||||
set -e
|
||||
|
||||
echo "🚀 z3ed + Ollama Quick Start"
|
||||
echo "================================"
|
||||
echo ""
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Step 1: Check if Ollama is installed
|
||||
echo "📦 Step 1: Checking Ollama installation..."
|
||||
if ! command -v ollama &> /dev/null; then
|
||||
echo -e "${RED}✗ Ollama not found${NC}"
|
||||
echo ""
|
||||
echo "Install Ollama with:"
|
||||
echo " macOS: brew install ollama"
|
||||
echo " Linux: curl -fsSL https://ollama.com/install.sh | sh"
|
||||
echo ""
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${GREEN}✓ Ollama installed${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 2: Check if Ollama server is running
|
||||
echo "🔌 Step 2: Checking Ollama server..."
|
||||
if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
|
||||
echo -e "${YELLOW}⚠ Ollama server not running${NC}"
|
||||
echo ""
|
||||
echo "Starting Ollama server in background..."
|
||||
ollama serve > /dev/null 2>&1 &
|
||||
OLLAMA_PID=$!
|
||||
echo "Waiting for server to start..."
|
||||
sleep 3
|
||||
|
||||
if ! curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
|
||||
echo -e "${RED}✗ Failed to start Ollama server${NC}"
|
||||
exit 1
|
||||
fi
|
||||
echo -e "${GREEN}✓ Ollama server started (PID: $OLLAMA_PID)${NC}"
|
||||
else
|
||||
echo -e "${GREEN}✓ Ollama server running${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Step 3: Check if recommended model is available
|
||||
RECOMMENDED_MODEL="qwen2.5-coder:7b"
|
||||
echo "🤖 Step 3: Checking for model: $RECOMMENDED_MODEL..."
|
||||
if ! ollama list | grep -q "$RECOMMENDED_MODEL"; then
|
||||
echo -e "${YELLOW}⚠ Model not found${NC}"
|
||||
echo ""
|
||||
read -p "Pull $RECOMMENDED_MODEL? (~4.7GB download) [y/N]: " -n 1 -r
|
||||
echo ""
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
echo "Pulling model (this may take a few minutes)..."
|
||||
ollama pull "$RECOMMENDED_MODEL"
|
||||
echo -e "${GREEN}✓ Model pulled successfully${NC}"
|
||||
else
|
||||
echo -e "${RED}✗ Model required for testing${NC}"
|
||||
exit 1
|
||||
fi
|
||||
else
|
||||
echo -e "${GREEN}✓ Model available${NC}"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Step 4: Check if z3ed is built
|
||||
echo "🔨 Step 4: Checking z3ed build..."
|
||||
if [ ! -f "./build/bin/z3ed" ]; then
|
||||
echo -e "${YELLOW}⚠ z3ed not found in ./build/bin/${NC}"
|
||||
echo ""
|
||||
echo "Building z3ed..."
|
||||
cmake --build build --target z3ed
|
||||
if [ ! -f "./build/bin/z3ed" ]; then
|
||||
echo -e "${RED}✗ Failed to build z3ed${NC}"
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
echo -e "${GREEN}✓ z3ed ready${NC}"
|
||||
echo ""
|
||||
|
||||
# Step 5: Test Ollama integration
|
||||
echo "🧪 Step 5: Testing z3ed + Ollama integration..."
|
||||
export YAZE_AI_PROVIDER=ollama
|
||||
export OLLAMA_MODEL="$RECOMMENDED_MODEL"
|
||||
|
||||
echo ""
|
||||
echo "Running test command:"
|
||||
echo -e "${BLUE}z3ed agent plan --prompt \"Validate the ROM file\"${NC}"
|
||||
echo ""
|
||||
|
||||
if ./build/bin/z3ed agent plan --prompt "Validate the ROM file"; then
|
||||
echo ""
|
||||
echo -e "${GREEN}✓ Integration test passed!${NC}"
|
||||
else
|
||||
echo ""
|
||||
echo -e "${RED}✗ Integration test failed${NC}"
|
||||
echo "Check error messages above for details"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "================================"
|
||||
echo -e "${GREEN}🎉 Setup Complete!${NC}"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. Try a full agent run:"
|
||||
echo " export YAZE_AI_PROVIDER=ollama"
|
||||
echo " z3ed agent run --prompt \"Export first palette\" --rom zelda3.sfc --sandbox"
|
||||
echo ""
|
||||
echo " 2. Review generated commands:"
|
||||
echo " z3ed agent list"
|
||||
echo " z3ed agent diff"
|
||||
echo ""
|
||||
echo " 3. Try different models:"
|
||||
echo " ollama pull codellama:13b"
|
||||
echo " export OLLAMA_MODEL=codellama:13b"
|
||||
echo ""
|
||||
echo " 4. Read the docs:"
|
||||
echo " docs/z3ed/LLM-INTEGRATION-PLAN.md"
|
||||
echo ""
|
||||
@@ -1,130 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Live testing script for conversational agent
|
||||
# Tests agent function calling with real Ollama/Gemini backends
|
||||
|
||||
set -e
|
||||
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(cd "${SCRIPT_DIR}/.." && pwd)"
|
||||
Z3ED="${PROJECT_ROOT}/build/bin/z3ed"
|
||||
ROM_FILE="${PROJECT_ROOT}/assets/zelda3.sfc"
|
||||
|
||||
# Colors
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m'
|
||||
|
||||
echo "========================================="
|
||||
echo "Live Conversational Agent Test"
|
||||
echo "========================================="
|
||||
echo ""
|
||||
|
||||
# Prerequisites check
|
||||
if [ ! -f "$Z3ED" ]; then
|
||||
echo -e "${RED}✗ z3ed not found at $Z3ED${NC}"
|
||||
echo "Build with: cmake --build build --target z3ed"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [ ! -f "$ROM_FILE" ]; then
|
||||
echo -e "${RED}✗ ROM file not found at $ROM_FILE${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✓ Prerequisites met${NC}"
|
||||
echo ""
|
||||
|
||||
# Check for AI backends
|
||||
BACKEND_AVAILABLE=false
|
||||
|
||||
echo "Checking AI Backends..."
|
||||
echo "-----------------------"
|
||||
|
||||
# Check Ollama
|
||||
if command -v ollama &> /dev/null; then
|
||||
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
|
||||
echo -e "${GREEN}✓ Ollama server running${NC}"
|
||||
if ollama list | grep -q "qwen2.5-coder"; then
|
||||
echo -e "${GREEN}✓ qwen2.5-coder model available${NC}"
|
||||
BACKEND_AVAILABLE=true
|
||||
AI_BACKEND="Ollama"
|
||||
else
|
||||
echo -e "${YELLOW}⚠ Recommended model qwen2.5-coder:7b not installed${NC}"
|
||||
echo " Install with: ollama pull qwen2.5-coder:7b"
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}⚠ Ollama not running${NC}"
|
||||
echo " Start with: ollama serve"
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}⚠ Ollama not installed${NC}"
|
||||
fi
|
||||
|
||||
# Check Gemini
|
||||
if [ -n "$GEMINI_API_KEY" ]; then
|
||||
echo -e "${GREEN}✓ Gemini API key set${NC}"
|
||||
BACKEND_AVAILABLE=true
|
||||
if [ "$AI_BACKEND" != "Ollama" ]; then
|
||||
AI_BACKEND="Gemini"
|
||||
fi
|
||||
else
|
||||
echo -e "${YELLOW}⚠ GEMINI_API_KEY not set${NC}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
|
||||
if [ "$BACKEND_AVAILABLE" = false ]; then
|
||||
echo -e "${RED}✗ No AI backend available${NC}"
|
||||
echo ""
|
||||
echo "Please set up at least one backend:"
|
||||
echo " - Ollama: brew install ollama && ollama serve && ollama pull qwen2.5-coder:7b"
|
||||
echo " - Gemini: export GEMINI_API_KEY='your-key-here'"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${GREEN}✓ Using AI Backend: $AI_BACKEND${NC}"
|
||||
echo ""
|
||||
|
||||
# Run the test-conversation command with default test cases
|
||||
echo "========================================="
|
||||
echo "Running Automated Conversation Tests"
|
||||
echo "========================================="
|
||||
echo ""
|
||||
echo "This will run 5 default test cases:"
|
||||
echo " 1. Simple ROM introspection (dungeon query)"
|
||||
echo " 2. Overworld tile search"
|
||||
echo " 3. Multi-step conversation"
|
||||
echo " 4. Command generation (tile placement)"
|
||||
echo " 5. Map description"
|
||||
echo ""
|
||||
|
||||
read -p "Press Enter to start tests (or Ctrl+C to cancel)..."
|
||||
echo ""
|
||||
|
||||
# Run the tests
|
||||
"$Z3ED" agent test-conversation --rom "$ROM_FILE" --verbose
|
||||
|
||||
TEST_EXIT_CODE=$?
|
||||
|
||||
echo ""
|
||||
echo "========================================="
|
||||
echo "Test Results"
|
||||
echo "========================================="
|
||||
|
||||
if [ $TEST_EXIT_CODE -eq 0 ]; then
|
||||
echo -e "${GREEN}✅ All tests completed successfully${NC}"
|
||||
else
|
||||
echo -e "${RED}❌ Tests failed with exit code $TEST_EXIT_CODE${NC}"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Next Steps:"
|
||||
echo " - Review the output above for any warnings"
|
||||
echo " - Check if tool calls are being invoked correctly"
|
||||
echo " - Verify JSON/table formatting is working"
|
||||
echo " - Test with custom conversation file: z3ed agent test-conversation --file my_tests.json"
|
||||
echo ""
|
||||
|
||||
exit $TEST_EXIT_CODE
|
||||
@@ -1,213 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Integration test for Gemini AI Service (Phase 2)
|
||||
|
||||
set -e # Exit on error
|
||||
|
||||
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||
PROJECT_ROOT="$SCRIPT_DIR/.."
|
||||
Z3ED_BIN="$PROJECT_ROOT/build/bin/z3ed"
|
||||
|
||||
echo "🧪 Gemini AI Integration Test Suite"
|
||||
echo "======================================"
|
||||
|
||||
# Color output helpers
|
||||
GREEN='\033[0;32m'
|
||||
RED='\033[0;31m'
|
||||
YELLOW='\033[0;33m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
pass() {
|
||||
echo -e "${GREEN}✓${NC} $1"
|
||||
}
|
||||
|
||||
fail() {
|
||||
echo -e "${RED}✗${NC} $1"
|
||||
exit 1
|
||||
}
|
||||
|
||||
warn() {
|
||||
echo -e "${YELLOW}⚠${NC} $1"
|
||||
}
|
||||
|
||||
# Test 1: z3ed executable exists
|
||||
echo ""
|
||||
echo "Test 1: z3ed executable exists"
|
||||
if [ -f "$Z3ED_BIN" ]; then
|
||||
pass "z3ed executable found at $Z3ED_BIN"
|
||||
else
|
||||
fail "z3ed executable not found. Run: cmake --build build --target z3ed"
|
||||
fi
|
||||
|
||||
# Test 2: Check GEMINI_API_KEY environment variable
|
||||
echo ""
|
||||
echo "Test 2: Check GEMINI_API_KEY environment variable"
|
||||
if [ -z "$GEMINI_API_KEY" ]; then
|
||||
warn "GEMINI_API_KEY not set - skipping API tests"
|
||||
echo " To test Gemini integration:"
|
||||
echo " 1. Get API key at: https://makersuite.google.com/app/apikey"
|
||||
echo " 2. Run: export GEMINI_API_KEY='your-api-key'"
|
||||
echo " 3. Re-run this script"
|
||||
|
||||
# Still test that service factory handles missing key gracefully
|
||||
echo ""
|
||||
echo "Test 2a: Verify graceful fallback without API key"
|
||||
unset YAZE_AI_PROVIDER
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree" 2>&1)
|
||||
|
||||
if echo "$OUTPUT" | grep -q "Using MockAIService"; then
|
||||
pass "Service factory falls back to Mock when GEMINI_API_KEY missing"
|
||||
else
|
||||
fail "Service factory should fall back to Mock without API key"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "⏭️ Skipping remaining Gemini API tests (no API key)"
|
||||
exit 0
|
||||
fi
|
||||
|
||||
pass "GEMINI_API_KEY is set"
|
||||
|
||||
# Test 3: Verify Gemini model availability
|
||||
echo ""
|
||||
echo "Test 3: Verify Gemini model availability"
|
||||
GEMINI_MODEL="${GEMINI_MODEL:-gemini-2.5-flash}"
|
||||
echo " Testing with model: $GEMINI_MODEL"
|
||||
|
||||
# Quick API check
|
||||
HTTP_CODE=$(curl -s -o /dev/null -w "%{http_code}" \
|
||||
-H "x-goog-api-key: $GEMINI_API_KEY" \
|
||||
"https://generativelanguage.googleapis.com/v1beta/models/$GEMINI_MODEL")
|
||||
|
||||
if [ "$HTTP_CODE" = "200" ]; then
|
||||
pass "Gemini API accessible, model '$GEMINI_MODEL' available"
|
||||
elif [ "$HTTP_CODE" = "401" ] || [ "$HTTP_CODE" = "403" ]; then
|
||||
fail "Invalid Gemini API key (HTTP $HTTP_CODE)"
|
||||
elif [ "$HTTP_CODE" = "404" ]; then
|
||||
fail "Model '$GEMINI_MODEL' not found (HTTP 404)"
|
||||
else
|
||||
warn "Unexpected HTTP status: $HTTP_CODE (continuing anyway)"
|
||||
fi
|
||||
|
||||
# Test 4: Generate commands with Gemini (simple prompt)
|
||||
echo ""
|
||||
echo "Test 4: Generate commands with Gemini (simple prompt)"
|
||||
unset YAZE_AI_PROVIDER # Let service factory auto-detect from GEMINI_API_KEY
|
||||
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Change the color of palette 0 index 5 to red" 2>&1)
|
||||
|
||||
if echo "$OUTPUT" | grep -q "Using Gemini AI"; then
|
||||
pass "Service factory selected Gemini"
|
||||
else
|
||||
fail "Expected 'Using Gemini AI' in output, got: $OUTPUT"
|
||||
fi
|
||||
|
||||
if echo "$OUTPUT" | grep -q "palette"; then
|
||||
pass "Gemini generated palette-related commands"
|
||||
echo " Generated commands:"
|
||||
echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/ /'
|
||||
else
|
||||
fail "Expected palette commands in output, got: $OUTPUT"
|
||||
fi
|
||||
|
||||
# Test 5: Generate commands with complex prompt
|
||||
echo ""
|
||||
echo "Test 5: Generate commands with complex prompt (overworld modification)"
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Place a tree at coordinates (10, 20) on overworld map 0" 2>&1)
|
||||
|
||||
if echo "$OUTPUT" | grep -q "overworld"; then
|
||||
pass "Gemini generated overworld commands"
|
||||
echo " Generated commands:"
|
||||
echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/ /'
|
||||
else
|
||||
fail "Expected overworld commands in output, got: $OUTPUT"
|
||||
fi
|
||||
|
||||
# Test 6: Test explicit provider selection
|
||||
echo ""
|
||||
echo "Test 6: Test explicit provider selection (YAZE_AI_PROVIDER=gemini)"
|
||||
# Note: Current implementation doesn't have explicit "gemini" provider value
|
||||
# It auto-detects from GEMINI_API_KEY. But we can test that Ollama doesn't override.
|
||||
unset YAZE_AI_PROVIDER
|
||||
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0" 2>&1)
|
||||
|
||||
if echo "$OUTPUT" | grep -q "Using Gemini AI"; then
|
||||
pass "Gemini selected when GEMINI_API_KEY present"
|
||||
else
|
||||
warn "Expected Gemini selection, got: $OUTPUT"
|
||||
fi
|
||||
|
||||
# Test 7: Verify JSON response parsing
|
||||
echo ""
|
||||
echo "Test 7: Verify JSON response parsing (check for command format)"
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Set tile at (5,5) to 0x100" 2>&1)
|
||||
|
||||
# Commands should NOT have "z3ed" prefix (service should strip it)
|
||||
if echo "$OUTPUT" | grep -E "^\s*- z3ed"; then
|
||||
warn "Commands still contain 'z3ed' prefix (should be stripped)"
|
||||
else
|
||||
pass "Commands properly formatted without 'z3ed' prefix"
|
||||
fi
|
||||
|
||||
# Test 8: Test multiple commands in response
|
||||
echo ""
|
||||
echo "Test 8: Test multiple commands generation"
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Export palette 0 to test.json, change color 5 to red, then import it back" 2>&1)
|
||||
|
||||
COMMAND_COUNT=$(echo "$OUTPUT" | grep -c -E "^\s*- " || true)
|
||||
|
||||
if [ "$COMMAND_COUNT" -ge 2 ]; then
|
||||
pass "Gemini generated multiple commands ($COMMAND_COUNT commands)"
|
||||
echo " Commands:"
|
||||
echo "$OUTPUT" | grep -E "^\s*-" | sed 's/^/ /'
|
||||
else
|
||||
warn "Expected multiple commands, got $COMMAND_COUNT"
|
||||
fi
|
||||
|
||||
# Test 9: Error handling - invalid API key
|
||||
echo ""
|
||||
echo "Test 9: Error handling with invalid API key"
|
||||
SAVED_KEY="$GEMINI_API_KEY"
|
||||
export GEMINI_API_KEY="invalid_key_12345"
|
||||
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Test" 2>&1 || true)
|
||||
|
||||
if echo "$OUTPUT" | grep -q "Invalid Gemini API key\|Falling back to MockAIService"; then
|
||||
pass "Service handles invalid API key gracefully"
|
||||
else
|
||||
warn "Expected error handling message, got: $OUTPUT"
|
||||
fi
|
||||
|
||||
# Restore key
|
||||
export GEMINI_API_KEY="$SAVED_KEY"
|
||||
|
||||
# Test 10: Model override via environment
|
||||
echo ""
|
||||
echo "Test 10: Model override via GEMINI_MODEL environment variable"
|
||||
export GEMINI_MODEL="gemini-1.5-pro"
|
||||
|
||||
OUTPUT=$($Z3ED_BIN agent plan --prompt "Test" 2>&1)
|
||||
|
||||
if echo "$OUTPUT" | grep -q "gemini-1.5-pro"; then
|
||||
pass "GEMINI_MODEL environment variable respected"
|
||||
else
|
||||
warn "Expected model override, got: $OUTPUT"
|
||||
fi
|
||||
|
||||
unset GEMINI_MODEL
|
||||
|
||||
echo ""
|
||||
echo "======================================"
|
||||
echo "✅ Gemini Integration Test Suite Complete"
|
||||
echo ""
|
||||
echo "Summary:"
|
||||
echo " - Gemini API accessible"
|
||||
echo " - Command generation working"
|
||||
echo " - Error handling functional"
|
||||
echo " - JSON parsing robust"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. Test with various prompt types"
|
||||
echo " 2. Measure response latency"
|
||||
echo " 3. Compare accuracy with Ollama"
|
||||
echo " 4. Consider rate limiting for production"
|
||||
@@ -1,172 +0,0 @@
|
||||
#!/bin/bash
|
||||
# Test script for Ollama AI service integration
|
||||
# This script validates Phase 1 implementation
|
||||
|
||||
set -e
|
||||
|
||||
echo "🧪 Testing Ollama AI Service Integration (Phase 1)"
|
||||
echo "=================================================="
|
||||
echo ""
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
TESTS_PASSED=0
|
||||
TESTS_FAILED=0
|
||||
|
||||
# Helper functions
|
||||
pass_test() {
|
||||
echo -e "${GREEN}✓ PASS:${NC} $1"
|
||||
((TESTS_PASSED++))
|
||||
}
|
||||
|
||||
fail_test() {
|
||||
echo -e "${RED}✗ FAIL:${NC} $1"
|
||||
((TESTS_FAILED++))
|
||||
}
|
||||
|
||||
info() {
|
||||
echo -e "${BLUE}ℹ${NC} $1"
|
||||
}
|
||||
|
||||
# Test 1: Check if z3ed built successfully
|
||||
echo "Test 1: z3ed executable exists"
|
||||
if [ -f "./build/bin/z3ed" ]; then
|
||||
pass_test "z3ed executable found"
|
||||
else
|
||||
fail_test "z3ed executable not found"
|
||||
exit 1
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 2: Test MockAIService fallback (no LLM configured)
|
||||
echo "Test 2: MockAIService fallback"
|
||||
unset YAZE_AI_PROVIDER
|
||||
unset GEMINI_API_KEY
|
||||
unset CLAUDE_API_KEY
|
||||
|
||||
OUTPUT=$(./build/bin/z3ed agent plan --prompt "Place a tree" 2>&1 || true)
|
||||
if echo "$OUTPUT" | grep -q "Using MockAIService"; then
|
||||
pass_test "MockAIService activated when no LLM configured"
|
||||
if echo "$OUTPUT" | grep -q "AI Agent Plan:"; then
|
||||
pass_test "MockAIService generated commands"
|
||||
fi
|
||||
else
|
||||
fail_test "MockAIService fallback not working"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 3: Test Ollama provider selection (without server)
|
||||
echo "Test 3: Ollama provider selection (without server running)"
|
||||
export YAZE_AI_PROVIDER=ollama
|
||||
|
||||
OUTPUT=$(./build/bin/z3ed agent plan --prompt "Validate ROM" 2>&1 || true)
|
||||
if echo "$OUTPUT" | grep -q "Ollama unavailable"; then
|
||||
pass_test "Ollama health check detected unavailable server"
|
||||
if echo "$OUTPUT" | grep -q "Falling back to MockAIService"; then
|
||||
pass_test "Graceful fallback to MockAIService"
|
||||
else
|
||||
fail_test "Did not fall back to MockAIService"
|
||||
fi
|
||||
else
|
||||
info "Note: If Ollama is running, this test will pass differently"
|
||||
fi
|
||||
echo ""
|
||||
|
||||
# Test 4: Check if Ollama is installed
|
||||
echo "Test 4: Ollama installation check"
|
||||
if command -v ollama &> /dev/null; then
|
||||
pass_test "Ollama is installed"
|
||||
|
||||
# Test 5: Check if Ollama server is running
|
||||
echo ""
|
||||
echo "Test 5: Ollama server availability"
|
||||
if curl -s http://localhost:11434/api/tags > /dev/null 2>&1; then
|
||||
pass_test "Ollama server is running"
|
||||
|
||||
# Test 6: Check for qwen2.5-coder model
|
||||
echo ""
|
||||
echo "Test 6: qwen2.5-coder:7b model availability"
|
||||
if ollama list | grep -q "qwen2.5-coder:7b"; then
|
||||
pass_test "Recommended model is available"
|
||||
|
||||
# Test 7: End-to-end test with Ollama
|
||||
echo ""
|
||||
echo "Test 7: End-to-end LLM command generation"
|
||||
export YAZE_AI_PROVIDER=ollama
|
||||
export OLLAMA_MODEL=qwen2.5-coder:7b
|
||||
|
||||
info "Testing: 'agent plan --prompt \"Validate the ROM\"'"
|
||||
OUTPUT=$(./build/bin/z3ed agent plan --prompt "Validate the ROM" 2>&1)
|
||||
|
||||
if echo "$OUTPUT" | grep -q "Using Ollama AI"; then
|
||||
pass_test "Ollama AI service activated"
|
||||
else
|
||||
fail_test "Ollama AI service not activated"
|
||||
fi
|
||||
|
||||
if echo "$OUTPUT" | grep -q "AI Agent Plan:"; then
|
||||
pass_test "Command generation completed"
|
||||
|
||||
# Check if reasonable commands were generated
|
||||
if echo "$OUTPUT" | grep -q "rom"; then
|
||||
pass_test "Generated ROM-related command"
|
||||
else
|
||||
fail_test "Generated command doesn't seem ROM-related"
|
||||
fi
|
||||
else
|
||||
fail_test "No commands generated"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "Generated output:"
|
||||
echo "---"
|
||||
echo "$OUTPUT"
|
||||
echo "---"
|
||||
|
||||
else
|
||||
fail_test "qwen2.5-coder:7b not found"
|
||||
info "Install with: ollama pull qwen2.5-coder:7b"
|
||||
fi
|
||||
else
|
||||
fail_test "Ollama server not running"
|
||||
info "Start with: ollama serve"
|
||||
fi
|
||||
else
|
||||
fail_test "Ollama not installed"
|
||||
info "Install with: brew install ollama (macOS)"
|
||||
info "Or visit: https://ollama.com/download"
|
||||
fi
|
||||
|
||||
echo ""
|
||||
echo "=================================================="
|
||||
echo "Test Summary:"
|
||||
echo -e " ${GREEN}Passed: $TESTS_PASSED${NC}"
|
||||
echo -e " ${RED}Failed: $TESTS_FAILED${NC}"
|
||||
echo ""
|
||||
|
||||
if [ $TESTS_FAILED -eq 0 ]; then
|
||||
echo -e "${GREEN}✓ All tests passed!${NC}"
|
||||
echo ""
|
||||
echo "Next steps:"
|
||||
echo " 1. If Ollama tests were skipped, install and configure:"
|
||||
echo " brew install ollama"
|
||||
echo " ollama serve &"
|
||||
echo " ollama pull qwen2.5-coder:7b"
|
||||
echo ""
|
||||
echo " 2. Try the full agent workflow:"
|
||||
echo " export YAZE_AI_PROVIDER=ollama"
|
||||
echo " ./build/bin/z3ed agent run --prompt \"Validate ROM\" --rom zelda3.sfc --sandbox"
|
||||
echo ""
|
||||
echo " 3. Check the implementation checklist:"
|
||||
echo " docs/z3ed/LLM-IMPLEMENTATION-CHECKLIST.md"
|
||||
exit 0
|
||||
else
|
||||
echo -e "${RED}✗ Some tests failed${NC}"
|
||||
echo "Review the output above for details"
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user