srctree

Robin Linden parent 0faebe00 1f0f1e0d
wasm: Clean up circular dependency between module- and insn parsing

This also makes sense as it's not unreasonable for the instruction bytecode parsing to live in the byte code parser.

instructions.cpp depended on byte_code_parser.h andbyte_code_parser.cpp depended on instructions.h.

inlinesplit
wasm/byte_code_parser.cpp added: 274, removed: 288, total 0
@@ -1,4 +1,5 @@
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2024 David Zero <zero-one@zer0-one.net>
//
// SPDX-License-Identifier: BSD-2-Clause
 
@@ -14,9 +15,11 @@
#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <istream>
#include <optional>
#include <sstream>
#include <string>
#include <string_view>
#include <utility>
@@ -146,7 +149,7 @@ std::optional<Global> parse(std::istream &is) {
return std::nullopt;
}
 
auto init = instructions::parse(is);
auto init = ByteCodeParser::parse_instructions(is);
if (!init) {
return std::nullopt;
}
@@ -253,7 +256,7 @@ std::optional<CodeEntry> parse(std::istream &is) {
return std::nullopt;
}
 
auto instructions = instructions::parse(is);
auto instructions = ByteCodeParser::parse_instructions(is);
if (!instructions) {
return std::nullopt;
}
@@ -311,6 +314,45 @@ std::optional<Import> parse(std::istream &is) {
};
}
 
template<>
std::optional<instructions::BlockType> parse(std::istream &is) {
using namespace instructions;
std::uint8_t type{};
if (!is.read(reinterpret_cast<char *>(&type), sizeof(type))) {
return std::nullopt;
}
 
constexpr std::uint8_t kEmptyTag = 0x40;
if (type == kEmptyTag) {
return BlockType{{BlockType::Empty{}}};
}
 
std::stringstream ss{std::string{static_cast<char>(type)}};
auto value_type = parse<ValueType>(ss);
if (value_type) {
return BlockType{{*std::move(value_type)}};
}
 
std::cerr << "Unhandled BlockType\n";
return std::nullopt;
}
 
template<>
std::optional<instructions::MemArg> parse(std::istream &is) {
using namespace instructions;
auto a = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!a) {
return std::nullopt;
}
 
auto o = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!o) {
return std::nullopt;
}
 
return MemArg{.align = *std::move(a), .offset = *std::move(o)};
}
 
// https://webassembly.github.io/spec/core/binary/conventions.html#vectors
template<typename T>
std::optional<std::vector<T>> parse_vector(std::istream &is) {
@@ -573,8 +615,223 @@ tl::expected<Module, ModuleParseError> ByteCodeParser::parse_module(std::istream
return module;
}
 
std::optional<ValueType> ByteCodeParser::parse_value_type(std::istream &is) {
return parse<ValueType>(is);
std::optional<std::vector<instructions::Instruction>> ByteCodeParser::parse_instructions(std::istream &is) {
using namespace instructions;
std::vector<Instruction> instructions{};
 
while (true) {
std::uint8_t opcode{};
if (!is.read(reinterpret_cast<char *>(&opcode), sizeof(opcode))) {
return std::nullopt;
}
 
switch (opcode) {
case Block::kOpcode: {
auto type = parse<BlockType>(is);
if (!type) {
return std::nullopt;
}
 
auto block_instructions = parse_instructions(is);
if (!block_instructions) {
return std::nullopt;
}
 
instructions.emplace_back(Block{*std::move(type), *std::move(block_instructions)});
break;
}
case Loop::kOpcode: {
auto type = parse<BlockType>(is);
if (!type) {
return std::nullopt;
}
 
auto block_instructions = parse_instructions(is);
if (!block_instructions) {
return std::nullopt;
}
 
instructions.emplace_back(Loop{*std::move(type), *std::move(block_instructions)});
break;
}
case Branch::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(Branch{*value});
break;
}
case BranchIf::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(BranchIf{*value});
break;
}
case Return::kOpcode:
instructions.emplace_back(Return{});
break;
case End::kOpcode:
return instructions;
case I32Const::kOpcode: {
auto value = wasm::Leb128<std::int32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(I32Const{*value});
break;
}
case I32EqualZero::kOpcode:
instructions.emplace_back(I32EqualZero{});
break;
case I32Equal::kOpcode:
instructions.emplace_back(I32Equal{});
break;
case I32NotEqual::kOpcode:
instructions.emplace_back(I32NotEqual{});
break;
case I32LessThanSigned::kOpcode:
instructions.emplace_back(I32LessThanSigned{});
break;
case I32LessThanUnsigned::kOpcode:
instructions.emplace_back(I32LessThanUnsigned{});
break;
case I32GreaterThanSigned::kOpcode:
instructions.emplace_back(I32GreaterThanSigned{});
break;
case I32GreaterThanUnsigned::kOpcode:
instructions.emplace_back(I32GreaterThanUnsigned{});
break;
case I32LessThanEqualSigned::kOpcode:
instructions.emplace_back(I32LessThanEqualSigned{});
break;
case I32LessThanEqualUnsigned::kOpcode:
instructions.emplace_back(I32LessThanEqualUnsigned{});
break;
case I32GreaterThanEqualSigned::kOpcode:
instructions.emplace_back(I32GreaterThanEqualSigned{});
break;
case I32GreaterThanEqualUnsigned::kOpcode:
instructions.emplace_back(I32GreaterThanEqualUnsigned{});
break;
case I32CountLeadingZeros::kOpcode:
instructions.emplace_back(I32CountLeadingZeros{});
break;
case I32CountTrailingZeros::kOpcode:
instructions.emplace_back(I32CountTrailingZeros{});
break;
case I32PopulationCount::kOpcode:
instructions.emplace_back(I32PopulationCount{});
break;
case I32Add::kOpcode:
instructions.emplace_back(I32Add{});
break;
case I32Subtract::kOpcode:
instructions.emplace_back(I32Subtract{});
break;
case I32Multiply::kOpcode:
instructions.emplace_back(I32Multiply{});
break;
case I32DivideSigned::kOpcode:
instructions.emplace_back(I32DivideSigned{});
break;
case I32DivideUnsigned::kOpcode:
instructions.emplace_back(I32DivideUnsigned{});
break;
case I32RemainderSigned::kOpcode:
instructions.emplace_back(I32RemainderSigned{});
break;
case I32RemainderUnsigned::kOpcode:
instructions.emplace_back(I32RemainderUnsigned{});
break;
case I32And::kOpcode:
instructions.emplace_back(I32And{});
break;
case I32Or::kOpcode:
instructions.emplace_back(I32Or{});
break;
case I32ExclusiveOr::kOpcode:
instructions.emplace_back(I32ExclusiveOr{});
break;
case I32ShiftLeft::kOpcode:
instructions.emplace_back(I32ShiftLeft{});
break;
case I32ShiftRightSigned::kOpcode:
instructions.emplace_back(I32ShiftRightSigned{});
break;
case I32ShiftRightUnsigned::kOpcode:
instructions.emplace_back(I32ShiftRightUnsigned{});
break;
case I32RotateLeft::kOpcode:
instructions.emplace_back(I32RotateLeft{});
break;
case I32RotateRight::kOpcode:
instructions.emplace_back(I32RotateRight{});
break;
case I32WrapI64::kOpcode:
instructions.emplace_back(I32WrapI64{});
break;
case I32TruncateF32Signed::kOpcode:
instructions.emplace_back(I32TruncateF32Signed{});
break;
case I32TruncateF32Unsigned::kOpcode:
instructions.emplace_back(I32TruncateF32Unsigned{});
break;
case I32TruncateF64Signed::kOpcode:
instructions.emplace_back(I32TruncateF64Signed{});
break;
case I32TruncateF64Unsigned::kOpcode:
instructions.emplace_back(I32TruncateF64Unsigned{});
break;
case I32ReinterpretF32::kOpcode:
instructions.emplace_back(I32ReinterpretF32{});
break;
case I32Extend8Signed::kOpcode:
instructions.emplace_back(I32Extend8Signed{});
break;
case I32Extend16Signed::kOpcode:
instructions.emplace_back(I32Extend16Signed{});
break;
case LocalGet::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(LocalGet{*value});
break;
}
case LocalSet::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(LocalSet{*value});
break;
}
case LocalTee::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(LocalTee{*value});
break;
}
case I32Load::kOpcode: {
auto arg = parse<MemArg>(is);
if (!arg) {
return std::nullopt;
}
 
instructions.emplace_back(I32Load{*std::move(arg)});
break;
}
default:
std::cerr << "Unhandled opcode 0x" << std::setw(2) << std::setfill('0') << std::hex << +opcode << '\n';
return std::nullopt;
}
}
}
 
} // namespace wasm
 
wasm/byte_code_parser.h added: 274, removed: 288, total 0
@@ -2,7 +2,7 @@
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "wasm/types.h"
#include "wasm/instructions.h"
#include "wasm/wasm.h"
 
#include <tl/expected.hpp>
@@ -11,6 +11,7 @@
#include <iosfwd>
#include <optional>
#include <string_view>
#include <vector>
 
namespace wasm {
 
@@ -79,8 +80,8 @@ public:
static tl::expected<Module, ModuleParseError> parse_module(std::istream &);
static tl::expected<Module, ModuleParseError> parse_module(std::istream &&is) { return parse_module(is); }
 
// TODO(robinlinden): Make private once instructions are parsed eagerly.
static std::optional<ValueType> parse_value_type(std::istream &);
// TODO(robinlinden): Make private.
static std::optional<std::vector<instructions::Instruction>> parse_instructions(std::istream &);
};
 
} // namespace wasm
 
wasm/instructions.cpp added: 274, removed: 288, total 0
@@ -5,19 +5,6 @@
 
#include "wasm/instructions.h"
 
#include "wasm/byte_code_parser.h"
#include "wasm/leb128.h"
 
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <istream>
#include <optional>
#include <sstream>
#include <string>
#include <utility>
#include <vector>
 
namespace wasm::instructions {
 
// clangd (16) crashes if this is = default even though though it's allowed and
@@ -32,257 +19,4 @@ bool Loop::operator==(Loop const &l) const {
return l.type == type && l.instructions == instructions;
}
 
std::optional<BlockType> BlockType::parse(std::istream &is) {
std::uint8_t type{};
if (!is.read(reinterpret_cast<char *>(&type), sizeof(type))) {
return std::nullopt;
}
 
constexpr std::uint8_t kEmptyTag = 0x40;
if (type == kEmptyTag) {
return BlockType{{BlockType::Empty{}}};
}
 
std::stringstream ss{std::string{static_cast<char>(type)}};
auto value_type = ByteCodeParser::parse_value_type(ss);
if (value_type) {
return BlockType{{*std::move(value_type)}};
}
 
std::cerr << "Unhandled BlockType\n";
return std::nullopt;
}
 
std::optional<MemArg> MemArg::parse(std::istream &is) {
auto a = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!a) {
return std::nullopt;
}
 
auto o = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!o) {
return std::nullopt;
}
 
return MemArg{.align = *std::move(a), .offset = *std::move(o)};
}
 
std::optional<std::vector<Instruction>> parse(std::istream &is) {
std::vector<Instruction> instructions{};
 
while (true) {
std::uint8_t opcode{};
if (!is.read(reinterpret_cast<char *>(&opcode), sizeof(opcode))) {
return std::nullopt;
}
 
switch (opcode) {
case Block::kOpcode: {
auto type = BlockType::parse(is);
if (!type) {
return std::nullopt;
}
 
auto block_instructions = parse(is);
if (!block_instructions) {
return std::nullopt;
}
 
instructions.emplace_back(Block{*std::move(type), *std::move(block_instructions)});
break;
}
case Loop::kOpcode: {
auto type = BlockType::parse(is);
if (!type) {
return std::nullopt;
}
 
auto block_instructions = parse(is);
if (!block_instructions) {
return std::nullopt;
}
 
instructions.emplace_back(Loop{*std::move(type), *std::move(block_instructions)});
break;
}
case Branch::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(Branch{*value});
break;
}
case BranchIf::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(BranchIf{*value});
break;
}
case Return::kOpcode:
instructions.emplace_back(Return{});
break;
case End::kOpcode:
return instructions;
case I32Const::kOpcode: {
auto value = wasm::Leb128<std::int32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(I32Const{*value});
break;
}
case I32EqualZero::kOpcode:
instructions.emplace_back(I32EqualZero{});
break;
case I32Equal::kOpcode:
instructions.emplace_back(I32Equal{});
break;
case I32NotEqual::kOpcode:
instructions.emplace_back(I32NotEqual{});
break;
case I32LessThanSigned::kOpcode:
instructions.emplace_back(I32LessThanSigned{});
break;
case I32LessThanUnsigned::kOpcode:
instructions.emplace_back(I32LessThanUnsigned{});
break;
case I32GreaterThanSigned::kOpcode:
instructions.emplace_back(I32GreaterThanSigned{});
break;
case I32GreaterThanUnsigned::kOpcode:
instructions.emplace_back(I32GreaterThanUnsigned{});
break;
case I32LessThanEqualSigned::kOpcode:
instructions.emplace_back(I32LessThanEqualSigned{});
break;
case I32LessThanEqualUnsigned::kOpcode:
instructions.emplace_back(I32LessThanEqualUnsigned{});
break;
case I32GreaterThanEqualSigned::kOpcode:
instructions.emplace_back(I32GreaterThanEqualSigned{});
break;
case I32GreaterThanEqualUnsigned::kOpcode:
instructions.emplace_back(I32GreaterThanEqualUnsigned{});
break;
case I32CountLeadingZeros::kOpcode:
instructions.emplace_back(I32CountLeadingZeros{});
break;
case I32CountTrailingZeros::kOpcode:
instructions.emplace_back(I32CountTrailingZeros{});
break;
case I32PopulationCount::kOpcode:
instructions.emplace_back(I32PopulationCount{});
break;
case I32Add::kOpcode:
instructions.emplace_back(I32Add{});
break;
case I32Subtract::kOpcode:
instructions.emplace_back(I32Subtract{});
break;
case I32Multiply::kOpcode:
instructions.emplace_back(I32Multiply{});
break;
case I32DivideSigned::kOpcode:
instructions.emplace_back(I32DivideSigned{});
break;
case I32DivideUnsigned::kOpcode:
instructions.emplace_back(I32DivideUnsigned{});
break;
case I32RemainderSigned::kOpcode:
instructions.emplace_back(I32RemainderSigned{});
break;
case I32RemainderUnsigned::kOpcode:
instructions.emplace_back(I32RemainderUnsigned{});
break;
case I32And::kOpcode:
instructions.emplace_back(I32And{});
break;
case I32Or::kOpcode:
instructions.emplace_back(I32Or{});
break;
case I32ExclusiveOr::kOpcode:
instructions.emplace_back(I32ExclusiveOr{});
break;
case I32ShiftLeft::kOpcode:
instructions.emplace_back(I32ShiftLeft{});
break;
case I32ShiftRightSigned::kOpcode:
instructions.emplace_back(I32ShiftRightSigned{});
break;
case I32ShiftRightUnsigned::kOpcode:
instructions.emplace_back(I32ShiftRightUnsigned{});
break;
case I32RotateLeft::kOpcode:
instructions.emplace_back(I32RotateLeft{});
break;
case I32RotateRight::kOpcode:
instructions.emplace_back(I32RotateRight{});
break;
case I32WrapI64::kOpcode:
instructions.emplace_back(I32WrapI64{});
break;
case I32TruncateF32Signed::kOpcode:
instructions.emplace_back(I32TruncateF32Signed{});
break;
case I32TruncateF32Unsigned::kOpcode:
instructions.emplace_back(I32TruncateF32Unsigned{});
break;
case I32TruncateF64Signed::kOpcode:
instructions.emplace_back(I32TruncateF64Signed{});
break;
case I32TruncateF64Unsigned::kOpcode:
instructions.emplace_back(I32TruncateF64Unsigned{});
break;
case I32ReinterpretF32::kOpcode:
instructions.emplace_back(I32ReinterpretF32{});
break;
case I32Extend8Signed::kOpcode:
instructions.emplace_back(I32Extend8Signed{});
break;
case I32Extend16Signed::kOpcode:
instructions.emplace_back(I32Extend16Signed{});
break;
case LocalGet::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(LocalGet{*value});
break;
}
case LocalSet::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(LocalSet{*value});
break;
}
case LocalTee::kOpcode: {
auto value = wasm::Leb128<std::uint32_t>::decode_from(is);
if (!value) {
return std::nullopt;
}
instructions.emplace_back(LocalTee{*value});
break;
}
case I32Load::kOpcode: {
auto arg = MemArg::parse(is);
if (!arg) {
return std::nullopt;
}
 
instructions.emplace_back(I32Load{*std::move(arg)});
break;
}
default:
std::cerr << "Unhandled opcode 0x" << std::setw(2) << std::setfill('0') << std::hex << +opcode << '\n';
return std::nullopt;
}
}
}
 
} // namespace wasm::instructions
 
wasm/instructions.h added: 274, removed: 288, total 0
@@ -1,4 +1,4 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2024 David Zero <zero-one@zer0-one.net>
//
// SPDX-License-Identifier: BSD-2-Clause
@@ -9,8 +9,6 @@
#include "wasm/types.h"
 
#include <cstdint>
#include <iosfwd>
#include <optional>
#include <string_view>
#include <variant>
#include <vector>
@@ -18,8 +16,6 @@
namespace wasm::instructions {
 
struct BlockType {
static std::optional<BlockType> parse(std::istream &);
 
struct Empty {
[[nodiscard]] bool operator==(Empty const &) const = default;
};
@@ -28,8 +24,6 @@ struct BlockType {
};
 
struct MemArg {
static std::optional<MemArg> parse(std::istream &);
 
std::uint32_t align{};
std::uint32_t offset{};
 
@@ -443,7 +437,6 @@ struct I32Load {
MemArg arg{};
[[nodiscard]] bool operator==(I32Load const &) const = default;
};
std::optional<std::vector<Instruction>> parse(std::istream &);
 
} // namespace wasm::instructions
 
 
wasm/instructions_test.cpp added: 274, removed: 288, total 0
@@ -1,10 +1,11 @@
// SPDX-FileCopyrightText: 2023 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2023-2024 Robin Lindén <dev@robinlinden.eu>
// SPDX-FileCopyrightText: 2024 David Zero <zero-one@zer0-one.net>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "wasm/instructions.h"
 
#include "wasm/byte_code_parser.h"
#include "wasm/types.h"
 
#include "etest/etest2.h"
@@ -20,7 +21,7 @@ using InsnVec = std::vector<wasm::instructions::Instruction>;
namespace {
std::optional<InsnVec> parse(std::string s) {
std::stringstream ss{std::move(s)};
return wasm::instructions::parse(ss);
return wasm::ByteCodeParser::parse_instructions(ss);
}
} // namespace