backend-infra-engineer: Release v0.3.3 snapshot
This commit is contained in:
268
scripts/extract-symbols.sh
Executable file
268
scripts/extract-symbols.sh
Executable file
@@ -0,0 +1,268 @@
|
||||
#!/bin/bash
|
||||
# Symbol Extraction Tool - Extract symbols from compiled object files
|
||||
# Creates a JSON database of all symbols and their defining object files
|
||||
#
|
||||
# Usage: ./scripts/extract-symbols.sh [BUILD_DIR] [OUTPUT_FILE]
|
||||
# BUILD_DIR: Path to CMake build directory (default: build)
|
||||
# OUTPUT_FILE: Path to output JSON file (default: build/symbol_database.json)
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
# Colors for output
|
||||
RED='\033[0;31m'
|
||||
GREEN='\033[0;32m'
|
||||
YELLOW='\033[1;33m'
|
||||
BLUE='\033[0;34m'
|
||||
NC='\033[0m' # No Color
|
||||
|
||||
# Configuration
|
||||
BUILD_DIR="${1:-.}"
|
||||
OUTPUT_FILE="${2:-${BUILD_DIR}/symbol_database.json}"
|
||||
TEMP_SYMBOLS="${BUILD_DIR}/.temp_symbols.txt"
|
||||
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
||||
PROJECT_ROOT="$(dirname "${SCRIPT_DIR}")"
|
||||
|
||||
# Platform detection
|
||||
UNAME_S=$(uname -s)
|
||||
IS_MACOS=false
|
||||
IS_LINUX=false
|
||||
IS_WINDOWS=false
|
||||
|
||||
case "${UNAME_S}" in
|
||||
Darwin*) IS_MACOS=true ;;
|
||||
Linux*) IS_LINUX=true ;;
|
||||
MINGW*|MSYS*|CYGWIN*) IS_WINDOWS=true ;;
|
||||
esac
|
||||
|
||||
# Validation
|
||||
if [[ ! -d "${BUILD_DIR}" ]]; then
|
||||
echo -e "${RED}Error: Build directory not found: ${BUILD_DIR}${NC}"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "${BLUE}=== Symbol Extraction Tool ===${NC}"
|
||||
echo -e "Build directory: ${BUILD_DIR}"
|
||||
echo -e "Output file: ${OUTPUT_FILE}"
|
||||
echo ""
|
||||
|
||||
# Function to extract symbols using nm (Unix/macOS)
|
||||
extract_symbols_unix() {
|
||||
local obj_file="$1"
|
||||
local obj_name="${obj_file##*/}"
|
||||
|
||||
if ! nm -P "${obj_file}" 2>/dev/null | while read -r sym rest; do
|
||||
# Filter out special symbols and undefined references
|
||||
if [[ -n "${sym}" ]] && [[ "${rest}" != *"U"* ]]; then
|
||||
# Get symbol type (T=text, D=data, R=read-only, etc.)
|
||||
local sym_type=$(echo "${rest}" | awk '{print $1}')
|
||||
if [[ "${sym_type}" != "U" ]]; then
|
||||
echo "${sym}|${obj_name}|${sym_type}"
|
||||
fi
|
||||
fi
|
||||
done; then
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# Function to extract symbols using dumpbin (Windows)
|
||||
extract_symbols_windows() {
|
||||
local obj_file="$1"
|
||||
local obj_name="${obj_file##*/}"
|
||||
|
||||
# Use dumpbin to extract symbols
|
||||
if dumpbin /symbols "${obj_file}" 2>/dev/null | grep -E "^\s+[0-9A-F]+" | while read -r line; do
|
||||
# Parse dumpbin output
|
||||
local sym=$(echo "${line}" | awk '{print $NF}')
|
||||
if [[ -n "${sym}" ]]; then
|
||||
local sym_type="?" # Windows dumpbin doesn't clearly show type
|
||||
echo "${sym}|${obj_name}|${sym_type}"
|
||||
fi
|
||||
done; then
|
||||
return 1
|
||||
fi
|
||||
return 0
|
||||
}
|
||||
|
||||
# Function to collect all object files
|
||||
collect_object_files() {
|
||||
local obj_list="${BUILD_DIR}/.object_files.tmp"
|
||||
> "${obj_list}"
|
||||
|
||||
# Find all .o files (Unix/macOS) and .obj files (Windows)
|
||||
if ${IS_WINDOWS}; then
|
||||
find "${BUILD_DIR}" -type f \( -name "*.obj" -o -name "*.o" \) 2>/dev/null >> "${obj_list}" || true
|
||||
else
|
||||
find "${BUILD_DIR}" -type f -name "*.o" 2>/dev/null >> "${obj_list}" || true
|
||||
fi
|
||||
|
||||
echo "${obj_list}"
|
||||
}
|
||||
|
||||
# Extract symbols from all object files
|
||||
echo -e "${BLUE}Scanning for object files...${NC}"
|
||||
OBJ_LIST=$(collect_object_files)
|
||||
OBJ_COUNT=$(wc -l < "${OBJ_LIST}")
|
||||
|
||||
if [[ ${OBJ_COUNT} -eq 0 ]]; then
|
||||
echo -e "${YELLOW}Warning: No object files found in ${BUILD_DIR}${NC}"
|
||||
echo "Make sure to build the project first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
echo -e "Found ${GREEN}${OBJ_COUNT}${NC} object files"
|
||||
echo ""
|
||||
echo -e "${BLUE}Extracting symbols (this may take a moment)...${NC}"
|
||||
|
||||
# Process object files and extract symbols
|
||||
: > "${TEMP_SYMBOLS}"
|
||||
PROCESSED=0
|
||||
FAILED=0
|
||||
|
||||
while IFS= read -r obj_file; do
|
||||
[[ -z "${obj_file}" ]] && continue
|
||||
|
||||
if [[ ! -f "${obj_file}" ]]; then
|
||||
echo -e "${YELLOW}Skipping (not found): ${obj_file}${NC}"
|
||||
((FAILED++))
|
||||
continue
|
||||
fi
|
||||
|
||||
# Extract symbols based on platform
|
||||
if ${IS_WINDOWS}; then
|
||||
if extract_symbols_windows "${obj_file}" >> "${TEMP_SYMBOLS}" 2>/dev/null; then
|
||||
((PROCESSED++))
|
||||
else
|
||||
((FAILED++))
|
||||
fi
|
||||
else
|
||||
if extract_symbols_unix "${obj_file}" >> "${TEMP_SYMBOLS}" 2>/dev/null; then
|
||||
((PROCESSED++))
|
||||
else
|
||||
((FAILED++))
|
||||
fi
|
||||
fi
|
||||
|
||||
# Progress indicator
|
||||
if (( PROCESSED % 50 == 0 )); then
|
||||
echo -ne "\r Processed: ${PROCESSED}/${OBJ_COUNT} objects"
|
||||
fi
|
||||
done < "${OBJ_LIST}"
|
||||
|
||||
echo -ne "\r Processed: ${GREEN}${PROCESSED}${NC}/${OBJ_COUNT} objects (${FAILED} failed) \n"
|
||||
echo ""
|
||||
|
||||
# Generate JSON output
|
||||
echo -e "${BLUE}Generating symbol database...${NC}"
|
||||
|
||||
# Start JSON
|
||||
cat > "${OUTPUT_FILE}" << 'EOF'
|
||||
{
|
||||
"metadata": {
|
||||
"platform": "",
|
||||
"build_dir": "",
|
||||
"timestamp": "",
|
||||
"object_files_scanned": 0,
|
||||
"total_symbols": 0
|
||||
},
|
||||
"conflicts": [],
|
||||
"symbols": {}
|
||||
}
|
||||
EOF
|
||||
|
||||
# Use Python to generate proper JSON (more portable than jq)
|
||||
python3 << PYTHON_EOF
|
||||
import json
|
||||
import sys
|
||||
from datetime import datetime
|
||||
from collections import defaultdict
|
||||
|
||||
# Read extracted symbols
|
||||
symbol_dict = defaultdict(list)
|
||||
total_symbols = 0
|
||||
|
||||
try:
|
||||
with open("${TEMP_SYMBOLS}", "r") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split("|")
|
||||
if len(parts) >= 2:
|
||||
symbol = parts[0]
|
||||
obj_file = parts[1]
|
||||
sym_type = parts[2] if len(parts) > 2 else "?"
|
||||
|
||||
symbol_dict[symbol].append({
|
||||
"object_file": obj_file,
|
||||
"type": sym_type
|
||||
})
|
||||
total_symbols += 1
|
||||
except Exception as e:
|
||||
print(f"Error reading symbols: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
# Identify conflicts (symbols defined in multiple object files)
|
||||
conflicts = []
|
||||
for symbol, definitions in symbol_dict.items():
|
||||
if len(definitions) > 1:
|
||||
conflicts.append({
|
||||
"symbol": symbol,
|
||||
"count": len(definitions),
|
||||
"definitions": definitions
|
||||
})
|
||||
|
||||
# Sort conflicts by count (most duplicated first)
|
||||
conflicts.sort(key=lambda x: x["count"], reverse=True)
|
||||
|
||||
# Build output JSON
|
||||
output = {
|
||||
"metadata": {
|
||||
"platform": "${UNAME_S}",
|
||||
"build_dir": "${BUILD_DIR}",
|
||||
"timestamp": datetime.utcnow().isoformat() + "Z",
|
||||
"object_files_scanned": ${PROCESSED},
|
||||
"total_symbols": total_symbols,
|
||||
"total_conflicts": len(conflicts)
|
||||
},
|
||||
"conflicts": conflicts,
|
||||
"symbols": {}
|
||||
}
|
||||
|
||||
# Add symbols to output (optional - only include conflicted symbols for smaller file)
|
||||
for symbol, definitions in symbol_dict.items():
|
||||
if len(definitions) > 1:
|
||||
output["symbols"][symbol] = definitions
|
||||
|
||||
# Write JSON
|
||||
try:
|
||||
with open("${OUTPUT_FILE}", "w") as f:
|
||||
json.dump(output, f, indent=2)
|
||||
except Exception as e:
|
||||
print(f"Error writing JSON: {e}", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
print(f"Symbol database written to: ${OUTPUT_FILE}")
|
||||
print(f"Total symbols: {total_symbols}")
|
||||
print(f"Conflicts found: {len(conflicts)}")
|
||||
PYTHON_EOF
|
||||
|
||||
# Cleanup
|
||||
rm -f "${TEMP_SYMBOLS}" "${OBJ_LIST}"
|
||||
|
||||
# Report results
|
||||
if [[ -f "${OUTPUT_FILE}" ]]; then
|
||||
echo -e "${GREEN}Success!${NC}"
|
||||
CONFLICT_COUNT=$(python3 -c "import json; f = json.load(open('${OUTPUT_FILE}')); print(f['metadata'].get('total_conflicts', 0))" 2>/dev/null || echo "?")
|
||||
|
||||
if [[ "${CONFLICT_COUNT}" -gt 0 ]]; then
|
||||
echo -e "${YELLOW}Found ${RED}${CONFLICT_COUNT}${YELLOW} symbol conflicts${NC}"
|
||||
exit 1 # Exit with error if conflicts found
|
||||
else
|
||||
echo -e "${GREEN}No symbol conflicts detected!${NC}"
|
||||
exit 0
|
||||
fi
|
||||
else
|
||||
echo -e "${RED}Failed to generate symbol database${NC}"
|
||||
exit 1
|
||||
fi
|
||||
Reference in New Issue
Block a user