refactor: Improve message parsing and dictionary handling
- Changed the type of dictionary variable from int to int8_t for better type safety. - Updated the handling of dictionary entries in message parsing to ensure correct formatting and prevent parsing errors with command arguments. - Refactored message data parsing logic to use index-based loops, improving clarity and correctness in handling command arguments. - Enhanced the documentation in message_data.h to provide a comprehensive overview of the message data system and its components. - Added new tests to validate the correct parsing of messages with commands and arguments, ensuring robustness against previous bugs.
This commit is contained in:
@@ -95,9 +95,10 @@ std::string ParseTextDataByte(uint8_t value) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check for dictionary.
|
// Check for dictionary.
|
||||||
int dictionary = FindDictionaryEntry(value);
|
int8_t dictionary = FindDictionaryEntry(value);
|
||||||
if (dictionary >= 0) {
|
if (dictionary >= 0) {
|
||||||
return absl::StrFormat("[%s:%02X]", DICTIONARYTOKEN, dictionary);
|
return absl::StrFormat("[%s:%02X]", DICTIONARYTOKEN,
|
||||||
|
static_cast<unsigned char>(dictionary));
|
||||||
}
|
}
|
||||||
|
|
||||||
return "";
|
return "";
|
||||||
@@ -105,7 +106,7 @@ std::string ParseTextDataByte(uint8_t value) {
|
|||||||
|
|
||||||
std::vector<uint8_t> ParseMessageToData(std::string str) {
|
std::vector<uint8_t> ParseMessageToData(std::string str) {
|
||||||
std::vector<uint8_t> bytes;
|
std::vector<uint8_t> bytes;
|
||||||
std::string temp_string = str;
|
std::string temp_string = std::move(str);
|
||||||
int pos = 0;
|
int pos = 0;
|
||||||
while (pos < temp_string.size()) {
|
while (pos < temp_string.size()) {
|
||||||
// Get next text fragment.
|
// Get next text fragment.
|
||||||
@@ -181,8 +182,8 @@ std::vector<DictionaryEntry> BuildDictionaryEntries(Rom* rom) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string ReplaceAllDictionaryWords(std::string str,
|
std::string ReplaceAllDictionaryWords(std::string str,
|
||||||
std::vector<DictionaryEntry> dictionary) {
|
const std::vector<DictionaryEntry>& dictionary) {
|
||||||
std::string temp = str;
|
std::string temp = std::move(str);
|
||||||
for (const auto& entry : dictionary) {
|
for (const auto& entry : dictionary) {
|
||||||
if (entry.ContainedInString(temp)) {
|
if (entry.ContainedInString(temp)) {
|
||||||
temp = entry.ReplaceInstancesOfIn(temp);
|
temp = entry.ReplaceInstancesOfIn(temp);
|
||||||
@@ -192,7 +193,7 @@ std::string ReplaceAllDictionaryWords(std::string str,
|
|||||||
}
|
}
|
||||||
|
|
||||||
DictionaryEntry FindRealDictionaryEntry(
|
DictionaryEntry FindRealDictionaryEntry(
|
||||||
uint8_t value, std::vector<DictionaryEntry> dictionary) {
|
uint8_t value, const std::vector<DictionaryEntry>& dictionary) {
|
||||||
for (const auto& entry : dictionary) {
|
for (const auto& entry : dictionary) {
|
||||||
if (entry.ID + DICTOFF == value) {
|
if (entry.ID + DICTOFF == value) {
|
||||||
return entry;
|
return entry;
|
||||||
@@ -245,12 +246,12 @@ absl::StatusOr<MessageData> ParseSingleMessage(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check for dictionary.
|
// Check for dictionary.
|
||||||
int dictionary = FindDictionaryEntry(current_byte);
|
int8_t dictionary = FindDictionaryEntry(current_byte);
|
||||||
if (dictionary >= 0) {
|
if (dictionary >= 0) {
|
||||||
current_message_raw.append("[");
|
current_message_raw.append("[");
|
||||||
current_message_raw.append(DICTIONARYTOKEN);
|
current_message_raw.append(DICTIONARYTOKEN);
|
||||||
current_message_raw.append(":");
|
current_message_raw.append(":");
|
||||||
current_message_raw.append(util::HexWord(dictionary));
|
current_message_raw.append(util::HexWord(static_cast<unsigned char>(dictionary)));
|
||||||
current_message_raw.append("]");
|
current_message_raw.append("]");
|
||||||
|
|
||||||
auto mutable_rom_data = const_cast<uint8_t*>(rom_data.data());
|
auto mutable_rom_data = const_cast<uint8_t*>(rom_data.data());
|
||||||
@@ -288,11 +289,37 @@ std::vector<std::string> ParseMessageData(
|
|||||||
|
|
||||||
for (auto& message : message_data) {
|
for (auto& message : message_data) {
|
||||||
std::string parsed_message = "";
|
std::string parsed_message = "";
|
||||||
int pos = 0;
|
// Use index-based loop to properly skip argument bytes
|
||||||
for (const uint8_t& byte : message.Data) {
|
for (size_t pos = 0; pos < message.Data.size(); ++pos) {
|
||||||
if (CharEncoder.contains(byte)) {
|
uint8_t byte = message.Data[pos];
|
||||||
parsed_message.push_back(CharEncoder.at(byte));
|
|
||||||
|
// Check for text commands first (they may have arguments to skip)
|
||||||
|
auto text_element = FindMatchingCommand(byte);
|
||||||
|
if (text_element != std::nullopt) {
|
||||||
|
// Add newline for certain commands
|
||||||
|
if (text_element->ID == kScrollVertical ||
|
||||||
|
text_element->ID == kLine2 || text_element->ID == kLine3) {
|
||||||
|
parsed_message.append("\n");
|
||||||
|
}
|
||||||
|
// If command has an argument, get it from next byte and skip it
|
||||||
|
if (text_element->HasArgument && pos + 1 < message.Data.size()) {
|
||||||
|
uint8_t arg_byte = message.Data[pos + 1];
|
||||||
|
parsed_message.append(text_element->GetParamToken(arg_byte));
|
||||||
|
pos++; // Skip the argument byte
|
||||||
} else {
|
} else {
|
||||||
|
parsed_message.append(text_element->GetParamToken());
|
||||||
|
}
|
||||||
|
continue; // Move to next byte
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for special characters
|
||||||
|
auto special_element = FindMatchingSpecial(byte);
|
||||||
|
if (special_element != std::nullopt) {
|
||||||
|
parsed_message.append(special_element->GetParamToken());
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for dictionary entries
|
||||||
if (byte >= DICTOFF && byte < (DICTOFF + 97)) {
|
if (byte >= DICTOFF && byte < (DICTOFF + 97)) {
|
||||||
DictionaryEntry dic_entry;
|
DictionaryEntry dic_entry;
|
||||||
for (const auto& entry : dictionary_entries) {
|
for (const auto& entry : dictionary_entries) {
|
||||||
@@ -302,31 +329,14 @@ std::vector<std::string> ParseMessageData(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
parsed_message.append(dic_entry.Contents);
|
parsed_message.append(dic_entry.Contents);
|
||||||
} else {
|
continue;
|
||||||
auto text_element = FindMatchingCommand(byte);
|
|
||||||
if (text_element != std::nullopt) {
|
|
||||||
if (text_element->ID == kScrollVertical ||
|
|
||||||
text_element->ID == kLine2 || text_element->ID == kLine3) {
|
|
||||||
parsed_message.append("\n");
|
|
||||||
}
|
}
|
||||||
// If there is a param, add it to the message using GetParamToken.
|
|
||||||
if (text_element->HasArgument) {
|
// Finally check for regular characters
|
||||||
// The next byte is the param.
|
if (CharEncoder.contains(byte)) {
|
||||||
parsed_message.append(
|
parsed_message.push_back(CharEncoder.at(byte));
|
||||||
text_element->GetParamToken(message.Data[pos + 1]));
|
|
||||||
pos++;
|
|
||||||
} else {
|
|
||||||
parsed_message.append(text_element->GetParamToken());
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
auto special_element = FindMatchingSpecial(byte);
|
|
||||||
if (special_element != std::nullopt) {
|
|
||||||
parsed_message.append(special_element->GetParamToken());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pos++;
|
|
||||||
}
|
|
||||||
parsed_messages.push_back(parsed_message);
|
parsed_messages.push_back(parsed_message);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -389,10 +399,10 @@ std::vector<MessageData> ReadAllTextData(uint8_t* rom, int pos) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check for dictionary.
|
// Check for dictionary.
|
||||||
int dictionary = FindDictionaryEntry(current_byte);
|
int8_t dictionary = FindDictionaryEntry(current_byte);
|
||||||
if (dictionary >= 0) {
|
if (dictionary >= 0) {
|
||||||
current_raw_message.append(absl::StrFormat("[%s:%s]", DICTIONARYTOKEN,
|
current_raw_message.append(absl::StrFormat("[%s:%s]", DICTIONARYTOKEN,
|
||||||
util::HexByte(dictionary)));
|
util::HexByte(static_cast<unsigned char>(dictionary))));
|
||||||
|
|
||||||
uint32_t address =
|
uint32_t address =
|
||||||
Get24LocalFromPC(rom, kPointersDictionaries + (dictionary * 2));
|
Get24LocalFromPC(rom, kPointersDictionaries + (dictionary * 2));
|
||||||
|
|||||||
@@ -1,6 +1,83 @@
|
|||||||
#ifndef YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
|
#ifndef YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
|
||||||
#define YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
|
#define YAZE_APP_EDITOR_MESSAGE_MESSAGE_DATA_H
|
||||||
|
|
||||||
|
// ===========================================================================
|
||||||
|
// Message Data System for Zelda 3 (A Link to the Past)
|
||||||
|
// ===========================================================================
|
||||||
|
//
|
||||||
|
// This system handles the parsing, editing, and serialization of in-game text
|
||||||
|
// messages from The Legend of Zelda: A Link to the Past (SNES).
|
||||||
|
//
|
||||||
|
// ## Architecture Overview
|
||||||
|
//
|
||||||
|
// The message system consists of several key components:
|
||||||
|
//
|
||||||
|
// 1. **Character Encoding** (`CharEncoder`):
|
||||||
|
// Maps byte values (0x00-0x66) to displayable characters (A-Z, a-z, 0-9,
|
||||||
|
// punctuation). This is the basic text representation in the ROM.
|
||||||
|
//
|
||||||
|
// 2. **Text Commands** (`TextCommands`):
|
||||||
|
// Special control codes (0x67-0x80) that control message display behavior:
|
||||||
|
// - Window appearance (border, position)
|
||||||
|
// - Text flow (line breaks, scrolling, delays)
|
||||||
|
// - Interactive elements (choices, player name insertion)
|
||||||
|
// - Some commands have arguments (e.g., [W:02] = window border type 2)
|
||||||
|
//
|
||||||
|
// 3. **Special Characters** (`SpecialChars`):
|
||||||
|
// Extended character set (0x43-0x5E) for game-specific symbols:
|
||||||
|
// - Directional arrows
|
||||||
|
// - Button prompts (A, B, X, Y)
|
||||||
|
// - HP indicators
|
||||||
|
// - Hieroglyphs
|
||||||
|
//
|
||||||
|
// 4. **Dictionary System** (`DictionaryEntry`):
|
||||||
|
// Compression system using byte values 0x88+ to reference common words/phrases
|
||||||
|
// stored separately in ROM. This saves space by replacing frequently-used
|
||||||
|
// text with single-byte references.
|
||||||
|
//
|
||||||
|
// 5. **Message Data** (`MessageData`):
|
||||||
|
// Represents a single in-game message with both raw binary data and parsed
|
||||||
|
// human-readable text. Each message is terminated by 0x7F in ROM.
|
||||||
|
//
|
||||||
|
// ## Data Flow
|
||||||
|
//
|
||||||
|
// ### Reading from ROM:
|
||||||
|
// ROM bytes → ReadAllTextData() → MessageData (raw) → ParseMessageData() →
|
||||||
|
// Human-readable string with [command] tokens
|
||||||
|
//
|
||||||
|
// ### Writing to ROM:
|
||||||
|
// User edits text → ParseMessageToData() → Binary bytes → ROM
|
||||||
|
//
|
||||||
|
// ### Dictionary Optimization:
|
||||||
|
// Text string → OptimizeMessageForDictionary() → Replace common phrases with
|
||||||
|
// [D:XX] tokens → Smaller binary representation
|
||||||
|
//
|
||||||
|
// ## ROM Memory Layout (SNES)
|
||||||
|
//
|
||||||
|
// - Text Data Block 1: 0xE0000 - 0xE7FFF (32KB)
|
||||||
|
// - Text Data Block 2: 0x75F40 - 0x773FF (5.3KB)
|
||||||
|
// - Dictionary Pointers: 0x74703
|
||||||
|
// - Character Widths: Table storing pixel widths for proportional font
|
||||||
|
// - Font Graphics: 0x70000+ (2bpp tile data)
|
||||||
|
//
|
||||||
|
// ## Message Format
|
||||||
|
//
|
||||||
|
// Messages are stored as byte sequences terminated by 0x7F:
|
||||||
|
// Example: [0x00, 0x01, 0x02, 0x7F] = "ABC"
|
||||||
|
// Example: [0x6A, 0x59, 0x2C, 0x61, 0x32, 0x28, 0x2B, 0x23, 0x7F]
|
||||||
|
// = "[L] saved Hyrule" (0x6A = player name command)
|
||||||
|
//
|
||||||
|
// ## Token Syntax (Human-Readable Format)
|
||||||
|
//
|
||||||
|
// Commands: [TOKEN:HEX] or [TOKEN]
|
||||||
|
// Examples: [W:02] (window border), [K] (wait for key)
|
||||||
|
// Dictionary: [D:HEX]
|
||||||
|
// Examples: [D:00] (first dictionary entry)
|
||||||
|
// Special Chars:[TOKEN]
|
||||||
|
// Examples: [A] (A button), [UP] (up arrow)
|
||||||
|
//
|
||||||
|
// ===========================================================================
|
||||||
|
|
||||||
#include <optional>
|
#include <optional>
|
||||||
#include <regex>
|
#include <regex>
|
||||||
#include <string>
|
#include <string>
|
||||||
@@ -18,10 +95,12 @@ namespace editor {
|
|||||||
|
|
||||||
const std::string kBankToken = "BANK";
|
const std::string kBankToken = "BANK";
|
||||||
const std::string DICTIONARYTOKEN = "D";
|
const std::string DICTIONARYTOKEN = "D";
|
||||||
constexpr uint8_t kMessageTerminator = 0x7F;
|
constexpr uint8_t kMessageTerminator = 0x7F; // Marks end of message in ROM
|
||||||
constexpr uint8_t DICTOFF = 0x88;
|
constexpr uint8_t DICTOFF = 0x88; // Dictionary entries start at byte 0x88
|
||||||
constexpr uint8_t kWidthArraySize = 100;
|
constexpr uint8_t kWidthArraySize = 100;
|
||||||
|
|
||||||
|
// Character encoding table: Maps ROM byte values to displayable characters
|
||||||
|
// Used for both parsing ROM data into text and converting text back to bytes
|
||||||
static const std::unordered_map<uint8_t, wchar_t> CharEncoder = {
|
static const std::unordered_map<uint8_t, wchar_t> CharEncoder = {
|
||||||
{0x00, 'A'}, {0x01, 'B'}, {0x02, 'C'}, {0x03, 'D'}, {0x04, 'E'},
|
{0x00, 'A'}, {0x01, 'B'}, {0x02, 'C'}, {0x03, 'D'}, {0x04, 'E'},
|
||||||
{0x05, 'F'}, {0x06, 'G'}, {0x07, 'H'}, {0x08, 'I'}, {0x09, 'J'},
|
{0x05, 'F'}, {0x06, 'G'}, {0x07, 'H'}, {0x08, 'I'}, {0x09, 'J'},
|
||||||
@@ -42,16 +121,27 @@ static const std::unordered_map<uint8_t, wchar_t> CharEncoder = {
|
|||||||
{0x65, ' '}, {0x66, '_'},
|
{0x65, ' '}, {0x66, '_'},
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Finds the ROM byte value for a given character (reverse lookup in CharEncoder)
|
||||||
|
// Returns 0xFF if character is not found
|
||||||
uint8_t FindMatchingCharacter(char value);
|
uint8_t FindMatchingCharacter(char value);
|
||||||
|
|
||||||
|
// Checks if a byte value represents a dictionary entry
|
||||||
|
// Returns dictionary index (0-96) or -1 if not a dictionary entry
|
||||||
int8_t FindDictionaryEntry(uint8_t value);
|
int8_t FindDictionaryEntry(uint8_t value);
|
||||||
|
|
||||||
|
// Converts a human-readable message string (with [command] tokens) into ROM bytes
|
||||||
|
// This is the inverse operation of ParseMessageData
|
||||||
std::vector<uint8_t> ParseMessageToData(std::string str);
|
std::vector<uint8_t> ParseMessageToData(std::string str);
|
||||||
|
|
||||||
|
// Represents a single dictionary entry (common word/phrase) used for text compression
|
||||||
|
// Dictionary entries are stored separately in ROM and referenced by bytes 0x88-0xE8
|
||||||
|
// Example: Dictionary entry 0x00 might contain "the" and be referenced as [D:00]
|
||||||
struct DictionaryEntry {
|
struct DictionaryEntry {
|
||||||
uint8_t ID = 0;
|
uint8_t ID = 0; // Dictionary index (0-96)
|
||||||
std::string Contents = "";
|
std::string Contents = ""; // The actual text this entry represents
|
||||||
std::vector<uint8_t> Data;
|
std::vector<uint8_t> Data; // Binary representation of Contents
|
||||||
int Length = 0;
|
int Length = 0; // Character count
|
||||||
std::string Token = "";
|
std::string Token = ""; // Human-readable token like "[D:00]"
|
||||||
|
|
||||||
DictionaryEntry() = default;
|
DictionaryEntry() = default;
|
||||||
DictionaryEntry(uint8_t i, std::string_view s)
|
DictionaryEntry(uint8_t i, std::string_view s)
|
||||||
@@ -60,10 +150,14 @@ struct DictionaryEntry {
|
|||||||
Data = ParseMessageToData(Contents);
|
Data = ParseMessageToData(Contents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Checks if this dictionary entry's text appears in the given string
|
||||||
bool ContainedInString(std::string_view s) const {
|
bool ContainedInString(std::string_view s) const {
|
||||||
return absl::StrContains(s, Contents);
|
// Convert to std::string to avoid Debian string_view bug with absl::StrContains
|
||||||
|
return absl::StrContains(std::string(s), Contents);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Replaces all occurrences of this dictionary entry's text with its token
|
||||||
|
// Example: "the cat" with dictionary[0]="the" becomes "[D:00] cat"
|
||||||
std::string ReplaceInstancesOfIn(std::string_view s) const {
|
std::string ReplaceInstancesOfIn(std::string_view s) const {
|
||||||
auto replaced_string = std::string(s);
|
auto replaced_string = std::string(s);
|
||||||
size_t pos = replaced_string.find(Contents);
|
size_t pos = replaced_string.find(Contents);
|
||||||
@@ -84,22 +178,33 @@ constexpr uint8_t kLine1 = 0x74;
|
|||||||
constexpr uint8_t kLine2 = 0x75;
|
constexpr uint8_t kLine2 = 0x75;
|
||||||
constexpr uint8_t kLine3 = 0x76;
|
constexpr uint8_t kLine3 = 0x76;
|
||||||
|
|
||||||
|
// Reads all dictionary entries from ROM and builds the dictionary table
|
||||||
std::vector<DictionaryEntry> BuildDictionaryEntries(Rom* rom);
|
std::vector<DictionaryEntry> BuildDictionaryEntries(Rom* rom);
|
||||||
std::string ReplaceAllDictionaryWords(std::string str,
|
|
||||||
std::vector<DictionaryEntry> dictionary);
|
|
||||||
DictionaryEntry FindRealDictionaryEntry(
|
|
||||||
uint8_t value, std::vector<DictionaryEntry> dictionary);
|
|
||||||
|
|
||||||
// Inserted into commands to protect them from dictionary replacements.
|
// Replaces all dictionary words in a string with their [D:XX] tokens
|
||||||
|
// Used for text compression when saving messages back to ROM
|
||||||
|
std::string ReplaceAllDictionaryWords(std::string str,
|
||||||
|
const std::vector<DictionaryEntry>& dictionary);
|
||||||
|
|
||||||
|
// Looks up a dictionary entry by its ROM byte value
|
||||||
|
DictionaryEntry FindRealDictionaryEntry(
|
||||||
|
uint8_t value, const std::vector<DictionaryEntry>& dictionary);
|
||||||
|
|
||||||
|
// Special marker inserted into commands to protect them from dictionary replacements
|
||||||
|
// during optimization. Removed after dictionary replacement is complete.
|
||||||
const std::string CHEESE = "\uBEBE";
|
const std::string CHEESE = "\uBEBE";
|
||||||
|
|
||||||
|
// Represents a complete in-game message with both raw and parsed representations
|
||||||
|
// Messages can exist in two forms:
|
||||||
|
// 1. Raw: Direct ROM bytes with dictionary references as [D:XX] tokens
|
||||||
|
// 2. Parsed: Fully expanded with dictionary words replaced by actual text
|
||||||
struct MessageData {
|
struct MessageData {
|
||||||
int ID = 0;
|
int ID = 0; // Message index in the ROM
|
||||||
int Address = 0;
|
int Address = 0; // ROM address where this message is stored
|
||||||
std::string RawString;
|
std::string RawString; // Human-readable with [D:XX] dictionary tokens
|
||||||
std::string ContentsParsed;
|
std::string ContentsParsed; // Fully expanded human-readable text
|
||||||
std::vector<uint8_t> Data;
|
std::vector<uint8_t> Data; // Raw ROM bytes (may contain dict references)
|
||||||
std::vector<uint8_t> DataParsed;
|
std::vector<uint8_t> DataParsed; // Expanded bytes (dict entries expanded)
|
||||||
|
|
||||||
MessageData() = default;
|
MessageData() = default;
|
||||||
MessageData(int id, int address, const std::string& rawString,
|
MessageData(int id, int address, const std::string& rawString,
|
||||||
@@ -123,11 +228,16 @@ struct MessageData {
|
|||||||
ContentsParsed = other.ContentsParsed;
|
ContentsParsed = other.ContentsParsed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Optimizes a message by replacing common phrases with dictionary tokens
|
||||||
|
// Inserts CHEESE markers inside commands to prevent dictionary replacement
|
||||||
|
// from corrupting command syntax like [W:02]
|
||||||
|
// Example: "Link saved the day" → "[D:00] saved [D:01] day"
|
||||||
std::string OptimizeMessageForDictionary(
|
std::string OptimizeMessageForDictionary(
|
||||||
std::string_view message_string,
|
std::string_view message_string,
|
||||||
const std::vector<DictionaryEntry>& dictionary) {
|
const std::vector<DictionaryEntry>& dictionary) {
|
||||||
std::stringstream protons;
|
std::stringstream protons;
|
||||||
bool command = false;
|
bool command = false;
|
||||||
|
// Insert CHEESE markers inside commands to protect them
|
||||||
for (const auto& c : message_string) {
|
for (const auto& c : message_string) {
|
||||||
if (c == '[') {
|
if (c == '[') {
|
||||||
command = true;
|
command = true;
|
||||||
@@ -137,7 +247,7 @@ struct MessageData {
|
|||||||
|
|
||||||
protons << c;
|
protons << c;
|
||||||
if (command) {
|
if (command) {
|
||||||
protons << CHEESE;
|
protons << CHEESE; // Protect command contents from replacement
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -150,6 +260,8 @@ struct MessageData {
|
|||||||
return final_string;
|
return final_string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Updates this message with new text content
|
||||||
|
// Automatically optimizes the message using dictionary compression
|
||||||
void SetMessage(const std::string& message,
|
void SetMessage(const std::string& message,
|
||||||
const std::vector<DictionaryEntry>& dictionary) {
|
const std::vector<DictionaryEntry>& dictionary) {
|
||||||
RawString = message;
|
RawString = message;
|
||||||
@@ -157,14 +269,17 @@ struct MessageData {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Represents a text command or special character definition
|
||||||
|
// Text commands control message display (line breaks, colors, choices, etc.)
|
||||||
|
// Special characters are game-specific symbols (arrows, buttons, HP hearts)
|
||||||
struct TextElement {
|
struct TextElement {
|
||||||
uint8_t ID;
|
uint8_t ID; // ROM byte value for this element
|
||||||
std::string Token;
|
std::string Token; // Short token like "W" or "UP"
|
||||||
std::string GenericToken;
|
std::string GenericToken; // Display format like "[W:##]" or "[UP]"
|
||||||
std::string Pattern;
|
std::string Pattern; // Regex pattern for parsing
|
||||||
std::string StrictPattern;
|
std::string StrictPattern; // Strict regex pattern for exact matching
|
||||||
std::string Description;
|
std::string Description; // Human-readable description
|
||||||
bool HasArgument;
|
bool HasArgument; // True if command takes a parameter byte
|
||||||
|
|
||||||
TextElement() = default;
|
TextElement() = default;
|
||||||
TextElement(uint8_t id, const std::string& token, bool arg,
|
TextElement(uint8_t id, const std::string& token, bool arg,
|
||||||
@@ -197,7 +312,7 @@ struct TextElement {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
std::smatch MatchMe(std::string dfrag) const {
|
std::smatch MatchMe(const std::string& dfrag) const {
|
||||||
std::regex pattern(StrictPattern);
|
std::regex pattern(StrictPattern);
|
||||||
std::smatch match;
|
std::smatch match;
|
||||||
std::regex_match(dfrag, match, pattern);
|
std::regex_match(dfrag, match, pattern);
|
||||||
@@ -258,8 +373,12 @@ static const std::vector<TextElement> TextCommands = {
|
|||||||
TextElement(0x70, "NONO", false, kCrash),
|
TextElement(0x70, "NONO", false, kCrash),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Finds the TextElement definition for a command byte value
|
||||||
|
// Returns nullopt if the byte is not a recognized command
|
||||||
std::optional<TextElement> FindMatchingCommand(uint8_t b);
|
std::optional<TextElement> FindMatchingCommand(uint8_t b);
|
||||||
|
|
||||||
|
// Special characters available in Zelda 3 messages
|
||||||
|
// These are symbols and game-specific icons that appear in text
|
||||||
static const std::vector<TextElement> SpecialChars = {
|
static const std::vector<TextElement> SpecialChars = {
|
||||||
TextElement(0x43, "...", false, "Ellipsis …"),
|
TextElement(0x43, "...", false, "Ellipsis …"),
|
||||||
TextElement(0x4D, "UP", false, "Arrow ↑"),
|
TextElement(0x4D, "UP", false, "Arrow ↑"),
|
||||||
@@ -284,25 +403,39 @@ static const std::vector<TextElement> SpecialChars = {
|
|||||||
TextElement(0x4B, "LFR", false, "Link face right"),
|
TextElement(0x4B, "LFR", false, "Link face right"),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Finds the TextElement definition for a special character byte
|
||||||
|
// Returns nullopt if the byte is not a recognized special character
|
||||||
std::optional<TextElement> FindMatchingSpecial(uint8_t b);
|
std::optional<TextElement> FindMatchingSpecial(uint8_t b);
|
||||||
|
|
||||||
|
// Result of parsing a text token like "[W:02]"
|
||||||
|
// Contains both the command definition and its argument value
|
||||||
struct ParsedElement {
|
struct ParsedElement {
|
||||||
TextElement Parent;
|
TextElement Parent; // The command or special character definition
|
||||||
uint8_t Value;
|
uint8_t Value; // Argument value (if command has argument)
|
||||||
bool Active = false;
|
bool Active = false; // True if parsing was successful
|
||||||
|
|
||||||
ParsedElement() = default;
|
ParsedElement() = default;
|
||||||
ParsedElement(const TextElement& textElement, uint8_t value)
|
ParsedElement(const TextElement& textElement, uint8_t value)
|
||||||
: Parent(textElement), Value(value), Active(true) {}
|
: Parent(textElement), Value(value), Active(true) {}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Parses a token string like "[W:02]" and returns its ParsedElement
|
||||||
|
// Returns inactive ParsedElement if token is invalid
|
||||||
ParsedElement FindMatchingElement(const std::string& str);
|
ParsedElement FindMatchingElement(const std::string& str);
|
||||||
|
|
||||||
|
// Converts a single ROM byte into its human-readable text representation
|
||||||
|
// Handles characters, commands, special chars, and dictionary references
|
||||||
std::string ParseTextDataByte(uint8_t value);
|
std::string ParseTextDataByte(uint8_t value);
|
||||||
|
|
||||||
|
// Parses a single message from ROM data starting at current_pos
|
||||||
|
// Updates current_pos to point after the message terminator
|
||||||
|
// Returns error if message is malformed (e.g., missing terminator)
|
||||||
absl::StatusOr<MessageData> ParseSingleMessage(
|
absl::StatusOr<MessageData> ParseSingleMessage(
|
||||||
const std::vector<uint8_t>& rom_data, int* current_pos);
|
const std::vector<uint8_t>& rom_data, int* current_pos);
|
||||||
|
|
||||||
|
// Converts MessageData objects into human-readable strings with [command] tokens
|
||||||
|
// This is the main function for displaying messages in the editor
|
||||||
|
// Properly handles commands with arguments to avoid parsing errors
|
||||||
std::vector<std::string> ParseMessageData(
|
std::vector<std::string> ParseMessageData(
|
||||||
std::vector<MessageData>& message_data,
|
std::vector<MessageData>& message_data,
|
||||||
const std::vector<DictionaryEntry>& dictionary_entries);
|
const std::vector<DictionaryEntry>& dictionary_entries);
|
||||||
|
|||||||
@@ -208,5 +208,93 @@ TEST_F(MessageRomTest, BuildDictionaryEntries_CorrectSize) {
|
|||||||
EXPECT_FALSE(result.empty());
|
EXPECT_FALSE(result.empty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_F(MessageRomTest, ParseMessageData_CommandWithArgument_NoExtraCharacters) {
|
||||||
|
// This test specifically checks for the bug where command arguments
|
||||||
|
// were being incorrectly parsed as characters (e.g., capital 'A' after [W])
|
||||||
|
// The bug was caused by using a range-based for loop while also tracking position
|
||||||
|
|
||||||
|
// Message: [W:01]ABC
|
||||||
|
// Bytes: 0x6B (W command), 0x01 (argument), 0x00 (A), 0x01 (B), 0x02 (C)
|
||||||
|
std::vector<uint8_t> data = {0x6B, 0x01, 0x00, 0x01, 0x02};
|
||||||
|
|
||||||
|
editor::MessageData message;
|
||||||
|
message.ID = 0;
|
||||||
|
message.Address = 0;
|
||||||
|
message.Data = data;
|
||||||
|
|
||||||
|
std::vector<editor::MessageData> message_data_vector = {message};
|
||||||
|
auto parsed = editor::ParseMessageData(message_data_vector, dictionary_);
|
||||||
|
|
||||||
|
// Should be "[W:01]ABC" NOT "[W:01]BABC" or "[W:01]AABC"
|
||||||
|
EXPECT_EQ(parsed[0], "[W:01]ABC");
|
||||||
|
|
||||||
|
// The 'B' should not appear twice or be skipped
|
||||||
|
EXPECT_EQ(parsed[0].find("BABC"), std::string::npos);
|
||||||
|
EXPECT_EQ(parsed[0].find("AABC"), std::string::npos);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(MessageRomTest, ParseMessageData_MultipleCommandsWithArguments) {
|
||||||
|
// Test multiple commands with arguments in sequence
|
||||||
|
// [W:01][C:02]AB
|
||||||
|
std::vector<uint8_t> data = {
|
||||||
|
0x6B, 0x01, // [W:01] - Window border command with arg
|
||||||
|
0x77, 0x02, // [C:02] - Color command with arg
|
||||||
|
0x00, 0x01 // AB - Regular characters
|
||||||
|
};
|
||||||
|
|
||||||
|
editor::MessageData message;
|
||||||
|
message.ID = 0;
|
||||||
|
message.Data = data;
|
||||||
|
|
||||||
|
std::vector<editor::MessageData> message_data_vector = {message};
|
||||||
|
auto parsed = editor::ParseMessageData(message_data_vector, dictionary_);
|
||||||
|
|
||||||
|
EXPECT_EQ(parsed[0], "[W:01][C:02]AB");
|
||||||
|
|
||||||
|
// Make sure argument bytes (0x01, 0x02) weren't parsed as characters
|
||||||
|
EXPECT_EQ(parsed[0].find("BAB"), std::string::npos);
|
||||||
|
EXPECT_EQ(parsed[0].find("CAB"), std::string::npos);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(MessageRomTest, ParseMessageData_CommandWithoutArgument) {
|
||||||
|
// Test command without argument followed by text
|
||||||
|
// [K]ABC - Wait for key command (no arg) followed by ABC
|
||||||
|
std::vector<uint8_t> data = {
|
||||||
|
0x7E, // [K] - Wait for key (no argument)
|
||||||
|
0x00, 0x01, 0x02 // ABC
|
||||||
|
};
|
||||||
|
|
||||||
|
editor::MessageData message;
|
||||||
|
message.ID = 0;
|
||||||
|
message.Data = data;
|
||||||
|
|
||||||
|
std::vector<editor::MessageData> message_data_vector = {message};
|
||||||
|
auto parsed = editor::ParseMessageData(message_data_vector, dictionary_);
|
||||||
|
|
||||||
|
EXPECT_EQ(parsed[0], "[K]ABC");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_F(MessageRomTest, ParseMessageData_MixedCommands) {
|
||||||
|
// Test mix of commands with and without arguments
|
||||||
|
// [W:01]A[K]B[C:02]C
|
||||||
|
std::vector<uint8_t> data = {
|
||||||
|
0x6B, 0x01, // [W:01] - with arg
|
||||||
|
0x00, // A
|
||||||
|
0x7E, // [K] - no arg
|
||||||
|
0x01, // B
|
||||||
|
0x77, 0x02, // [C:02] - with arg
|
||||||
|
0x02 // C
|
||||||
|
};
|
||||||
|
|
||||||
|
editor::MessageData message;
|
||||||
|
message.ID = 0;
|
||||||
|
message.Data = data;
|
||||||
|
|
||||||
|
std::vector<editor::MessageData> message_data_vector = {message};
|
||||||
|
auto parsed = editor::ParseMessageData(message_data_vector, dictionary_);
|
||||||
|
|
||||||
|
EXPECT_EQ(parsed[0], "[W:01]A[K]B[C:02]C");
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace test
|
} // namespace test
|
||||||
} // namespace yaze
|
} // namespace yaze
|
||||||
|
|||||||
Reference in New Issue
Block a user