srctree

David Zero parent 75f83614 0faebe00
archive: Add brotli decoding

inlinesplit
WORKSPACE added: 274, removed: 6, total 268
@@ -102,6 +102,14 @@ http_archive(
url = "https://github.com/google/boringssl/archive/c0534bb964f085e4e2f273d23d08e9585e7518aa.tar.gz",
)
 
http_archive(
name = "brotli", # MIT
integrity = "sha256-5yCmyilCi4A/StFlNxdx9TmPq6OX7fZ3iDehhZnqE/8=",
patch_cmds = ["""sed -i'' -e 's/package(/package(features=["-layering_check"],/' BUILD.bazel"""],
strip_prefix = "brotli-1.1.0",
url = "https://github.com/google/brotli/archive/refs/tags/v1.1.0.tar.gz",
)
 
http_archive(
name = "expected", # CC0-1.0
build_file = "//third_party:expected.BUILD",
 
archive/BUILD added: 274, removed: 6, total 268
@@ -26,6 +26,20 @@ cc_library(
],
)
 
cc_library(
name = "brotli",
srcs = ["brotli.cpp"],
hdrs = ["brotli.h"],
copts = HASTUR_COPTS,
visibility = ["//visibility:public"],
deps = [
"@brotli//:brotli_inc",
"@brotli//:brotlicommon",
"@brotli//:brotlidec",
"@expected",
],
)
 
# TODO(robinlinden): Separate APIs for gzip and zlib.
alias(
name = "gzip",
 
filename was Deleted added: 274, removed: 6, total 268
@@ -0,0 +1,97 @@
// SPDX-FileCopyrightText: 2024 David Zero <zero-one@zer0-one.net>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "archive/brotli.h"
 
#include <brotli/decode.h>
#include <tl/expected.hpp>
 
#include <cstddef>
#include <cstdint>
#include <memory>
#include <span>
#include <string_view>
#include <vector>
 
namespace archive {
 
std::string_view to_string(BrotliError err) {
switch (err) {
case BrotliError::DecoderState:
return "Failed to create brotli decoder state";
case BrotliError::InputCorrupt:
return "Input is corrupt or truncated";
case BrotliError::InputEmpty:
return "Input is empty";
case BrotliError::MaximumOutputLengthExceeded:
return "Output buffer exceeded maximum allowed length";
case BrotliError::BrotliInternalError:
return "Decode failure";
}
 
return "Unknown error";
}
 
tl::expected<std::vector<std::byte>, BrotliError> brotli_decode(std::span<std::byte const> const input) {
if (input.empty()) {
return tl::unexpected{BrotliError::InputEmpty};
}
 
std::unique_ptr<BrotliDecoderState, decltype(&BrotliDecoderDestroyInstance)> br_state(
BrotliDecoderCreateInstance(nullptr, nullptr, nullptr), BrotliDecoderDestroyInstance);
 
if (br_state == nullptr) {
return tl::unexpected{BrotliError::DecoderState};
}
 
// Cap output buffer at 1GB. If we hit this, something fishy is probably
// going on, and we should bail before we OOM.
std::size_t constexpr kMaxOutSize = 1000000000;
std::size_t constexpr kChunkSize = 131072; // Matches the zstd chunk size
 
std::vector<std::byte> out;
 
std::size_t avail_in = input.size();
auto const *next_in = reinterpret_cast<std::uint8_t const *>(input.data());
std::size_t total_out = 0;
 
BrotliDecoderResult res = BROTLI_DECODER_RESULT_ERROR;
 
std::vector<std::byte> intermediate_buf(kChunkSize);
 
while (res != BROTLI_DECODER_RESULT_SUCCESS) {
std::size_t avail_out = kChunkSize;
auto *next_out = reinterpret_cast<std::uint8_t *>(intermediate_buf.data());
 
if (out.size() >= kMaxOutSize) {
return tl::unexpected{BrotliError::MaximumOutputLengthExceeded};
}
 
res = BrotliDecoderDecompressStream(br_state.get(), &avail_in, &next_in, &avail_out, &next_out, &total_out);
 
// Because we provide the whole input up-front, there's no reason we
// would ever block on needing more input, except for corrupt data
if (res == BROTLI_DECODER_RESULT_NEEDS_MORE_INPUT) {
return tl::unexpected{BrotliError::InputCorrupt};
}
 
if (res == BROTLI_DECODER_RESULT_ERROR) {
// Brotli doesn't expose this in a sane way, so we use magic
// numbers from the headers. -1 through -16 are errors related to
// bad input.
if (BrotliDecoderGetErrorCode(br_state.get()) <= -1 && BrotliDecoderGetErrorCode(br_state.get()) >= -16) {
return tl::unexpected{BrotliError::InputCorrupt};
}
 
return tl::unexpected{BrotliError::BrotliInternalError};
}
 
// TODO(zero-one): Replace with insert_range() when support is better
out.insert(out.end(), intermediate_buf.begin(), intermediate_buf.end() - avail_out);
}
 
return out;
}
 
} // namespace archive
 
filename was Deleted added: 274, removed: 6, total 268
@@ -0,0 +1,32 @@
// SPDX-FileCopyrightText: 2024 David Zero <zero-one@zer0-one.net>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#ifndef ARCHIVE_BROTLI_H_
#define ARCHIVE_BROTLI_H_
 
#include <tl/expected.hpp>
 
#include <cstddef>
#include <cstdint>
#include <span>
#include <string_view>
#include <vector>
 
namespace archive {
 
enum class BrotliError : std::uint8_t {
DecoderState,
InputCorrupt,
InputEmpty,
MaximumOutputLengthExceeded,
BrotliInternalError,
};
 
std::string_view to_string(BrotliError);
 
tl::expected<std::vector<std::byte>, BrotliError> brotli_decode(std::span<std::byte const>);
 
} // namespace archive
 
#endif
 
filename was Deleted added: 274, removed: 6, total 268
@@ -0,0 +1,17 @@
// SPDX-FileCopyrightText: 2024 David Zero <zero-one@zer0-one.net>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "archive/brotli.h"
 
#include <cstddef>
#include <span>
#include <stddef.h> // NOLINT
#include <stdint.h> // NOLINT
 
extern "C" int LLVMFuzzerTestOneInput(uint8_t const *data, size_t size); // NOLINT
 
extern "C" int LLVMFuzzerTestOneInput(uint8_t const *data, size_t size) {
std::ignore = archive::brotli_decode({reinterpret_cast<std::byte const *>(data), size});
return 0;
}
 
filename was Deleted added: 274, removed: 6, total 268
@@ -0,0 +1,100 @@
// SPDX-FileCopyrightText: 2024 David Zero <zero-one@zer0-one.net>
//
// SPDX-License-Identifier: BSD-2-Clause
 
#include "archive/brotli.h"
 
#include "etest/etest2.h"
 
#include <tl/expected.hpp>
 
#include <array>
#include <cstddef>
#include <cstdint>
#include <span>
#include <string>
 
namespace {
std::span<std::byte const> as_bytes(std::span<std::uint8_t const> s) {
return {reinterpret_cast<std::byte const *>(s.data()), s.size()};
}
} // namespace
 
int main() {
etest::Suite s{"brotli"};
 
using namespace archive;
 
s.add_test("empty input",
[](etest::IActions &a) { a.expect_eq(brotli_decode({}), tl::unexpected{BrotliError::InputEmpty}); });
 
s.add_test("trivial decode", [](etest::IActions &a) {
constexpr auto kCompress = std::to_array<std::uint8_t>(
{0x1f, 0x0d, 0x00, 0xf8, 0xa5, 0x40, 0xc2, 0xaa, 0x10, 0x49, 0xea, 0x16, 0x85, 0x9c, 0x32, 0x00});
 
auto ret = brotli_decode(as_bytes(kCompress));
 
a.expect(ret.has_value());
a.expect_eq(ret->size(), 14ul);
a.expect_eq(std::string(reinterpret_cast<char const *>(ret->data()), ret->size()), "This is a test");
});
 
s.add_test("input ends at block boundary", [](etest::IActions &a) {
// python -c "print('A' * 131072, end='')" | brotli
constexpr auto kCompress = std::to_array<std::uint8_t>(
{0x5f, 0xff, 0xff, 0x81, 0x5f, 0x22, 0x28, 0x1e, 0x0b, 0x04, 0x72, 0xef, 0x03, 0x00});
 
auto ret = brotli_decode(as_bytes(kCompress));
 
a.expect(ret.has_value());
a.expect_eq(ret->size(), 131072ul);
 
for (std::byte byte : *ret) {
a.expect_eq(byte, std::byte{0x41});
}
});
 
s.add_test("input ends at block boundary * 2", [](etest::IActions &a) {
// python -c "print('A' * 262144, end='')" | brotli
constexpr auto kCompress = std::to_array<std::uint8_t>(
{0x5f, 0xff, 0xff, 0x83, 0x5f, 0x22, 0x28, 0x1e, 0x0b, 0x04, 0x72, 0xef, 0x07, 0x00});
 
auto ret = brotli_decode(as_bytes(kCompress));
 
a.expect(ret.has_value());
a.expect_eq(ret->size(), 262144ul);
 
for (std::byte byte : *ret) {
a.expect_eq(byte, std::byte{0x41});
}
});
 
s.add_test("truncated input", [](etest::IActions &a) {
constexpr auto kCompress = std::to_array<std::uint8_t>(
{0x1f, 0x0d, 0x00, 0xf8, 0xa5, 0x40, 0xc2, 0xaa, 0x10, 0x49, 0xea, 0x16, 0x85});
 
auto ret = brotli_decode(as_bytes(kCompress));
 
a.expect_eq(ret, tl::unexpected{BrotliError::InputCorrupt});
});
 
s.add_test("junk input", [](etest::IActions &a) {
constexpr auto kCompress =
std::to_array<std::uint8_t>({0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff});
 
auto ret = brotli_decode(as_bytes(kCompress));
 
a.expect_eq(ret, tl::unexpected{BrotliError::InputCorrupt});
});
 
s.add_test("zero-sized output", [](etest::IActions &a) {
constexpr auto kCompress = std::to_array<std::uint8_t>({0x3f});
 
auto ret = brotli_decode(as_bytes(kCompress));
 
a.expect(ret.has_value());
a.expect(ret->empty());
});
 
return s.run();
}