Update AsmParser algorithm based on asar inferred labeling, WIP
This commit is contained in:
@@ -8,6 +8,8 @@
|
|||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "absl/strings/str_cat.h"
|
||||||
|
#include "absl/strings/str_split.h"
|
||||||
#include "app/emu/cpu/internal/opcodes.h"
|
#include "app/emu/cpu/internal/opcodes.h"
|
||||||
|
|
||||||
namespace yaze {
|
namespace yaze {
|
||||||
@@ -62,12 +64,156 @@ struct MnemonicModeHash {
|
|||||||
(std::hash<int>()(static_cast<int>(k.mode)) << 1);
|
(std::hash<int>()(static_cast<int>(k.mode)) << 1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class AsmParser {
|
||||||
|
public:
|
||||||
|
std::vector<uint8_t> Parse(const std::string& instruction) {
|
||||||
|
CreateInternalOpcodeMap();
|
||||||
|
auto tokens = Tokenize(instruction);
|
||||||
|
if (tokens.size() < 1) {
|
||||||
|
throw std::runtime_error("Invalid instruction format: " + instruction);
|
||||||
|
}
|
||||||
|
|
||||||
|
size_t index = 0;
|
||||||
|
std::vector<uint8_t> bytes;
|
||||||
|
while (index < tokens.size()) {
|
||||||
|
// For each "line" worth of tokens, we need to extract the
|
||||||
|
// mnemonic, optional addressing mode qualifier, and operand.
|
||||||
|
// The operand can come in a variety of formats:
|
||||||
|
// - Immediate: #$01
|
||||||
|
// - Immediate Word: #$1234
|
||||||
|
// - Absolute: $1234
|
||||||
|
// - Absolute Long: $123456
|
||||||
|
// This parser is not exhaustive and only supports a subset of
|
||||||
|
// the possible addressing modes and operands.
|
||||||
|
const std::string& mnemonic = tokens[index];
|
||||||
|
index++;
|
||||||
|
|
||||||
|
// Check if addressing mode qualifier is present
|
||||||
|
// Either .b, .w, .l, or nothing, which could mean
|
||||||
|
// it was omitted or the operand is implied
|
||||||
|
std::string qualifier = "";
|
||||||
|
std::string potential_mode = tokens[index];
|
||||||
|
if (absl::StrContains(potential_mode, ".")) {
|
||||||
|
qualifier = potential_mode;
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Now we check for either the immediate mode
|
||||||
|
// symbol # or the address symbol $ to determine
|
||||||
|
// the next step
|
||||||
|
std::string operand = tokens[index];
|
||||||
|
if (operand == "#") {
|
||||||
|
index++;
|
||||||
|
// Check if the next token is a # character, in which case it is
|
||||||
|
// a hexadecimal value that needs to be converted to a byte
|
||||||
|
if (tokens[index] == "#") {
|
||||||
|
index++;
|
||||||
|
operand = tokens[index];
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
} else if (operand == "$") {
|
||||||
|
index++;
|
||||||
|
operand = tokens[index];
|
||||||
|
index++;
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressingMode mode = DetermineMode(tokens);
|
||||||
|
|
||||||
|
MnemonicMode key{mnemonic, mode};
|
||||||
|
auto opcode_entry = mnemonic_to_opcode_.find(key);
|
||||||
|
if (opcode_entry == mnemonic_to_opcode_.end()) {
|
||||||
|
throw std::runtime_error("Opcode not found for mnemonic and mode: " +
|
||||||
|
mnemonic);
|
||||||
|
}
|
||||||
|
|
||||||
|
bytes.push_back(opcode_entry->second);
|
||||||
|
AppendOperandBytes(bytes, operand, mode);
|
||||||
|
}
|
||||||
|
|
||||||
|
return bytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Example: ADC.b #$01
|
||||||
|
// Returns: ["ADC", ".b", "#", "$", "01"]
|
||||||
|
std::vector<std::string> Tokenize(const std::string& instruction) {
|
||||||
|
std::vector<std::string> tokens;
|
||||||
|
std::regex tokenRegex{R"((\w+|\.\w+|[\#$]|[0-9a-fA-F]+|[a-zA-Z]+))"};
|
||||||
|
auto words_begin = std::sregex_iterator(instruction.begin(),
|
||||||
|
instruction.end(), tokenRegex);
|
||||||
|
auto words_end = std::sregex_iterator();
|
||||||
|
|
||||||
|
for (std::sregex_iterator i = words_begin; i != words_end; ++i) {
|
||||||
|
std::smatch match = *i;
|
||||||
|
tokens.push_back(match.str());
|
||||||
|
}
|
||||||
|
return tokens;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
void AppendOperandBytes(std::vector<uint8_t>& bytes,
|
||||||
|
const std::string& operand,
|
||||||
|
const AddressingMode& addressing_mode) {
|
||||||
|
// Handle different addressing modes
|
||||||
|
switch (addressing_mode) {
|
||||||
|
case AddressingMode::kImmediate: {
|
||||||
|
bytes.push_back(static_cast<uint8_t>(std::stoi(operand, nullptr, 16)));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AddressingMode::kAbsolute: {
|
||||||
|
uint16_t word_operand =
|
||||||
|
static_cast<uint16_t>(std::stoi(operand, nullptr, 16));
|
||||||
|
bytes.push_back(static_cast<uint8_t>(word_operand & 0xFF));
|
||||||
|
bytes.push_back(static_cast<uint8_t>((word_operand >> 8) & 0xFF));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AddressingMode::kAbsoluteLong: {
|
||||||
|
uint32_t long_operand =
|
||||||
|
static_cast<uint32_t>(std::stoul(operand, nullptr, 16));
|
||||||
|
bytes.push_back(static_cast<uint8_t>(long_operand & 0xFF));
|
||||||
|
bytes.push_back(static_cast<uint8_t>((long_operand >> 8) & 0xFF));
|
||||||
|
bytes.push_back(static_cast<uint8_t>((long_operand >> 16) & 0xFF));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case AddressingMode::kImplied: {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
// Unknown, append it anyway
|
||||||
|
bytes.push_back(static_cast<uint8_t>(std::stoi(operand, nullptr, 16)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
AddressingMode DetermineMode(const std::vector<std::string>& tokens) {
|
||||||
|
const std::string& addressingMode = tokens[1];
|
||||||
|
if (addressingMode == ".b") {
|
||||||
return AddressingMode::kImmediate;
|
return AddressingMode::kImmediate;
|
||||||
|
} else if (addressingMode == ".w") {
|
||||||
|
return AddressingMode::kAbsolute;
|
||||||
|
} else if (addressingMode == ".l") {
|
||||||
|
return AddressingMode::kAbsoluteLong;
|
||||||
} else {
|
} else {
|
||||||
return AddressingMode::kImplied;
|
return AddressingMode::kImplied;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool TryParseByte(const std::string& str, uint8_t& value) {
|
||||||
|
try {
|
||||||
|
value = std::stoi(str, nullptr, 16);
|
||||||
|
return true;
|
||||||
|
} catch (const std::invalid_argument& e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
bool TryParseHex(const std::string& str, uint32_t& value) {
|
||||||
|
try {
|
||||||
|
value = std::stoul(str, nullptr, 16);
|
||||||
|
return true;
|
||||||
|
} catch (const std::invalid_argument& e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void CreateInternalOpcodeMap() {
|
void CreateInternalOpcodeMap() {
|
||||||
mnemonic_to_opcode_[{"ADC", AddressingMode::kImmediate}] = 0x69;
|
mnemonic_to_opcode_[{"ADC", AddressingMode::kImmediate}] = 0x69;
|
||||||
|
|||||||
Reference in New Issue
Block a user