backend-infra-engineer: Release v0.3.3 snapshot

This commit is contained in:
scawful
2025-11-21 21:35:50 -05:00
parent 3d71417f62
commit 476dd1cd1c
818 changed files with 65706 additions and 35514 deletions

268
scripts/extract-symbols.sh Executable file
View File

@@ -0,0 +1,268 @@
#!/bin/bash
# Symbol Extraction Tool - Extract symbols from compiled object files
# Creates a JSON database of all symbols and their defining object files
#
# Usage: ./scripts/extract-symbols.sh [BUILD_DIR] [OUTPUT_FILE]
# BUILD_DIR: Path to CMake build directory (default: build)
# OUTPUT_FILE: Path to output JSON file (default: build/symbol_database.json)
set -euo pipefail
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
NC='\033[0m' # No Color
# Configuration
BUILD_DIR="${1:-.}"
OUTPUT_FILE="${2:-${BUILD_DIR}/symbol_database.json}"
TEMP_SYMBOLS="${BUILD_DIR}/.temp_symbols.txt"
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
PROJECT_ROOT="$(dirname "${SCRIPT_DIR}")"
# Platform detection
UNAME_S=$(uname -s)
IS_MACOS=false
IS_LINUX=false
IS_WINDOWS=false
case "${UNAME_S}" in
Darwin*) IS_MACOS=true ;;
Linux*) IS_LINUX=true ;;
MINGW*|MSYS*|CYGWIN*) IS_WINDOWS=true ;;
esac
# Validation
if [[ ! -d "${BUILD_DIR}" ]]; then
echo -e "${RED}Error: Build directory not found: ${BUILD_DIR}${NC}"
exit 1
fi
echo -e "${BLUE}=== Symbol Extraction Tool ===${NC}"
echo -e "Build directory: ${BUILD_DIR}"
echo -e "Output file: ${OUTPUT_FILE}"
echo ""
# Function to extract symbols using nm (Unix/macOS)
extract_symbols_unix() {
local obj_file="$1"
local obj_name="${obj_file##*/}"
if ! nm -P "${obj_file}" 2>/dev/null | while read -r sym rest; do
# Filter out special symbols and undefined references
if [[ -n "${sym}" ]] && [[ "${rest}" != *"U"* ]]; then
# Get symbol type (T=text, D=data, R=read-only, etc.)
local sym_type=$(echo "${rest}" | awk '{print $1}')
if [[ "${sym_type}" != "U" ]]; then
echo "${sym}|${obj_name}|${sym_type}"
fi
fi
done; then
return 1
fi
return 0
}
# Function to extract symbols using dumpbin (Windows)
extract_symbols_windows() {
local obj_file="$1"
local obj_name="${obj_file##*/}"
# Use dumpbin to extract symbols
if dumpbin /symbols "${obj_file}" 2>/dev/null | grep -E "^\s+[0-9A-F]+" | while read -r line; do
# Parse dumpbin output
local sym=$(echo "${line}" | awk '{print $NF}')
if [[ -n "${sym}" ]]; then
local sym_type="?" # Windows dumpbin doesn't clearly show type
echo "${sym}|${obj_name}|${sym_type}"
fi
done; then
return 1
fi
return 0
}
# Function to collect all object files
collect_object_files() {
local obj_list="${BUILD_DIR}/.object_files.tmp"
> "${obj_list}"
# Find all .o files (Unix/macOS) and .obj files (Windows)
if ${IS_WINDOWS}; then
find "${BUILD_DIR}" -type f \( -name "*.obj" -o -name "*.o" \) 2>/dev/null >> "${obj_list}" || true
else
find "${BUILD_DIR}" -type f -name "*.o" 2>/dev/null >> "${obj_list}" || true
fi
echo "${obj_list}"
}
# Extract symbols from all object files
echo -e "${BLUE}Scanning for object files...${NC}"
OBJ_LIST=$(collect_object_files)
OBJ_COUNT=$(wc -l < "${OBJ_LIST}")
if [[ ${OBJ_COUNT} -eq 0 ]]; then
echo -e "${YELLOW}Warning: No object files found in ${BUILD_DIR}${NC}"
echo "Make sure to build the project first."
exit 1
fi
echo -e "Found ${GREEN}${OBJ_COUNT}${NC} object files"
echo ""
echo -e "${BLUE}Extracting symbols (this may take a moment)...${NC}"
# Process object files and extract symbols
: > "${TEMP_SYMBOLS}"
PROCESSED=0
FAILED=0
while IFS= read -r obj_file; do
[[ -z "${obj_file}" ]] && continue
if [[ ! -f "${obj_file}" ]]; then
echo -e "${YELLOW}Skipping (not found): ${obj_file}${NC}"
((FAILED++))
continue
fi
# Extract symbols based on platform
if ${IS_WINDOWS}; then
if extract_symbols_windows "${obj_file}" >> "${TEMP_SYMBOLS}" 2>/dev/null; then
((PROCESSED++))
else
((FAILED++))
fi
else
if extract_symbols_unix "${obj_file}" >> "${TEMP_SYMBOLS}" 2>/dev/null; then
((PROCESSED++))
else
((FAILED++))
fi
fi
# Progress indicator
if (( PROCESSED % 50 == 0 )); then
echo -ne "\r Processed: ${PROCESSED}/${OBJ_COUNT} objects"
fi
done < "${OBJ_LIST}"
echo -ne "\r Processed: ${GREEN}${PROCESSED}${NC}/${OBJ_COUNT} objects (${FAILED} failed) \n"
echo ""
# Generate JSON output
echo -e "${BLUE}Generating symbol database...${NC}"
# Start JSON
cat > "${OUTPUT_FILE}" << 'EOF'
{
"metadata": {
"platform": "",
"build_dir": "",
"timestamp": "",
"object_files_scanned": 0,
"total_symbols": 0
},
"conflicts": [],
"symbols": {}
}
EOF
# Use Python to generate proper JSON (more portable than jq)
python3 << PYTHON_EOF
import json
import sys
from datetime import datetime
from collections import defaultdict
# Read extracted symbols
symbol_dict = defaultdict(list)
total_symbols = 0
try:
with open("${TEMP_SYMBOLS}", "r") as f:
for line in f:
line = line.strip()
if not line:
continue
parts = line.split("|")
if len(parts) >= 2:
symbol = parts[0]
obj_file = parts[1]
sym_type = parts[2] if len(parts) > 2 else "?"
symbol_dict[symbol].append({
"object_file": obj_file,
"type": sym_type
})
total_symbols += 1
except Exception as e:
print(f"Error reading symbols: {e}", file=sys.stderr)
sys.exit(1)
# Identify conflicts (symbols defined in multiple object files)
conflicts = []
for symbol, definitions in symbol_dict.items():
if len(definitions) > 1:
conflicts.append({
"symbol": symbol,
"count": len(definitions),
"definitions": definitions
})
# Sort conflicts by count (most duplicated first)
conflicts.sort(key=lambda x: x["count"], reverse=True)
# Build output JSON
output = {
"metadata": {
"platform": "${UNAME_S}",
"build_dir": "${BUILD_DIR}",
"timestamp": datetime.utcnow().isoformat() + "Z",
"object_files_scanned": ${PROCESSED},
"total_symbols": total_symbols,
"total_conflicts": len(conflicts)
},
"conflicts": conflicts,
"symbols": {}
}
# Add symbols to output (optional - only include conflicted symbols for smaller file)
for symbol, definitions in symbol_dict.items():
if len(definitions) > 1:
output["symbols"][symbol] = definitions
# Write JSON
try:
with open("${OUTPUT_FILE}", "w") as f:
json.dump(output, f, indent=2)
except Exception as e:
print(f"Error writing JSON: {e}", file=sys.stderr)
sys.exit(1)
print(f"Symbol database written to: ${OUTPUT_FILE}")
print(f"Total symbols: {total_symbols}")
print(f"Conflicts found: {len(conflicts)}")
PYTHON_EOF
# Cleanup
rm -f "${TEMP_SYMBOLS}" "${OBJ_LIST}"
# Report results
if [[ -f "${OUTPUT_FILE}" ]]; then
echo -e "${GREEN}Success!${NC}"
CONFLICT_COUNT=$(python3 -c "import json; f = json.load(open('${OUTPUT_FILE}')); print(f['metadata'].get('total_conflicts', 0))" 2>/dev/null || echo "?")
if [[ "${CONFLICT_COUNT}" -gt 0 ]]; then
echo -e "${YELLOW}Found ${RED}${CONFLICT_COUNT}${YELLOW} symbol conflicts${NC}"
exit 1 # Exit with error if conflicts found
else
echo -e "${GREEN}No symbol conflicts detected!${NC}"
exit 0
fi
else
echo -e "${RED}Failed to generate symbol database${NC}"
exit 1
fi