srctree

Andrew Kelley parent 54c08579 95035907 a2651cbc
Merge pull request #19388 from ziglang/cache-dedup

cache system file deduplication

inline split

lib/std/Build/Cache.zig added: 461, removed: 383, total 78

@@ -2,77 +2,6 @@

//! This is not a general-purpose cache. It is designed to be fast and simple,

//! not to withstand attacks using specially-crafted input.

pub const Directory = struct {

/// This field is redundant for operations that can act on the open directory handle

/// directly, but it is needed when passing the directory to a child process.

/// `null` means cwd.

path: ?[]const u8,

handle: fs.Dir,

pub fn clone(d: Directory, arena: Allocator) Allocator.Error!Directory {

return .{

.path = if (d.path) |p| try arena.dupe(u8, p) else null,

.handle = d.handle,

};

}

pub fn cwd() Directory {

return .{

.path = null,

.handle = fs.cwd(),

};

}

pub fn join(self: Directory, allocator: Allocator, paths: []const []const u8) ![]u8 {

if (self.path) |p| {

// TODO clean way to do this with only 1 allocation

const part2 = try fs.path.join(allocator, paths);

defer allocator.free(part2);

return fs.path.join(allocator, &[_][]const u8{ p, part2 });

} else {

return fs.path.join(allocator, paths);

}

pub fn joinZ(self: Directory, allocator: Allocator, paths: []const []const u8) ![:0]u8 {

if (self.path) |p| {

// TODO clean way to do this with only 1 allocation

const part2 = try fs.path.join(allocator, paths);

defer allocator.free(part2);

return fs.path.joinZ(allocator, &[_][]const u8{ p, part2 });

} else {

return fs.path.joinZ(allocator, paths);

}

/// Whether or not the handle should be closed, or the path should be freed

/// is determined by usage, however this function is provided for convenience

/// if it happens to be what the caller needs.

pub fn closeAndFree(self: *Directory, gpa: Allocator) void {

self.handle.close();

if (self.path) |p| gpa.free(p);

self.* = undefined;

}

pub fn format(

self: Directory,

comptime fmt_string: []const u8,

options: fmt.FormatOptions,

writer: anytype,

) !void {

_ = options;

if (fmt_string.len != 0) fmt.invalidFmtError(fmt_string, self);

if (self.path) |p| {

try writer.writeAll(p);

try writer.writeAll(fs.path.sep_str);

}

pub fn eql(self: Directory, other: Directory) bool {

return self.handle.fd == other.handle.fd;

}

};

gpa: Allocator,

manifest_dir: fs.Dir,

hash: HashHelper = .{},

@@ -88,6 +17,8 @@ mutex: std.Thread.Mutex = .{},

prefixes_buffer: [4]Directory = undefined,

prefixes_len: usize = 0,

pub const Path = @import("Cache/Path.zig");

pub const Directory = @import("Cache/Directory.zig");

pub const DepTokenizer = @import("Cache/DepTokenizer.zig");

const Cache = @This();

@@ -124,7 +55,15 @@ pub fn prefixes(cache: *const Cache) []const Directory {

const PrefixedPath = struct {

prefix: u8,

sub_path: []u8,

sub_path: []const u8,

fn eql(a: PrefixedPath, b: PrefixedPath) bool {

return a.prefix == b.prefix and std.mem.eql(u8, a.sub_path, b.sub_path);

}

fn hash(pp: PrefixedPath) u32 {

return @truncate(std.hash.Wyhash.hash(pp.prefix, pp.sub_path));

}

};

fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath {

@@ -183,7 +122,7 @@ pub const HexDigest = [hex_digest_len]u8;

/// This is currently just an arbitrary non-empty string that can't match another manifest line.

const manifest_header = "0";

const manifest_file_size_max = 50 * 1024 * 1024;

const manifest_file_size_max = 100 * 1024 * 1024;

/// The type used for hashing file contents. Currently, this is SipHash128(1, 3), because it

/// provides enough collision resistance for the Manifest use cases, while being one of our

@@ -201,7 +140,7 @@ pub const hasher_init: Hasher = Hasher.init(&[_]u8{

});

pub const File = struct {

prefixed_path: ?PrefixedPath,

prefixed_path: PrefixedPath,

max_file_size: ?usize,

stat: Stat,

bin_digest: BinDigest,

@@ -214,16 +153,18 @@ pub const File = struct {

};

pub fn deinit(self: *File, gpa: Allocator) void {

if (self.prefixed_path) |pp| {

gpa.free(pp.sub_path);

self.prefixed_path = null;

}

gpa.free(self.prefixed_path.sub_path);

if (self.contents) |contents| {

gpa.free(contents);

self.contents = null;

}

self.* = undefined;

}

pub fn updateMaxSize(file: *File, new_max_size: ?usize) void {

const new = new_max_size orelse return;

file.max_file_size = if (file.max_file_size) |old| @max(old, new) else new;

}

};

pub const HashHelper = struct {

@@ -365,7 +306,7 @@ pub const Manifest = struct {

// order to obtain a problematic timestamp for the next call. Calls after that

// will then use the same timestamp, to avoid unnecessary filesystem writes.

want_refresh_timestamp: bool = true,

files: std.ArrayListUnmanaged(File) = .{},

files: Files = .{},

hex_digest: HexDigest,

/// Populated when hit() returns an error because of one

/// of the files listed in the manifest.

@@ -374,6 +315,34 @@ pub const Manifest = struct {

/// what time the file system thinks it is, according to its own granularity.

recent_problematic_timestamp: i128 = 0,

pub const Files = std.ArrayHashMapUnmanaged(File, void, FilesContext, false);

pub const FilesContext = struct {

pub fn hash(fc: FilesContext, file: File) u32 {

_ = fc;

return file.prefixed_path.hash();

}

pub fn eql(fc: FilesContext, a: File, b: File, b_index: usize) bool {

_ = fc;

_ = b_index;

return a.prefixed_path.eql(b.prefixed_path);

}

};

const FilesAdapter = struct {

pub fn eql(context: @This(), a: PrefixedPath, b: File, b_index: usize) bool {

_ = context;

_ = b_index;

return a.eql(b.prefixed_path);

}

pub fn hash(context: @This(), key: PrefixedPath) u32 {

_ = context;

return key.hash();

}

};

/// Add a file as a dependency of process being cached. When `hit` is

/// called, the file's contents will be checked to ensure that it matches

/// the contents from previous times.

@@ -386,7 +355,7 @@ pub const Manifest = struct {

/// to access the contents of the file after calling `hit()` like so:

///

/// ```

/// var file_contents = cache_hash.files.items[file_index].contents.?;

/// var file_contents = cache_hash.files.keys()[file_index].contents.?;

/// ```

pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize {

assert(self.manifest_file == null);

@@ -396,7 +365,12 @@ pub const Manifest = struct {

const prefixed_path = try self.cache.findPrefix(file_path);

errdefer gpa.free(prefixed_path.sub_path);

self.files.addOneAssumeCapacity().* = .{

const gop = self.files.getOrPutAssumeCapacityAdapted(prefixed_path, FilesAdapter{});

if (gop.found_existing) {

gop.key_ptr.updateMaxSize(max_file_size);

return gop.index;

}

gop.key_ptr.* = .{

.prefixed_path = prefixed_path,

.contents = null,

.max_file_size = max_file_size,

@@ -407,7 +381,7 @@ pub const Manifest = struct {

self.hash.add(prefixed_path.prefix);

self.hash.addBytes(prefixed_path.sub_path);

return self.files.items.len - 1;

return gop.index;

}

pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void {

@@ -487,7 +461,7 @@ pub const Manifest = struct {

self.want_refresh_timestamp = true;

const input_file_count = self.files.items.len;

const input_file_count = self.files.entries.len;

while (true) : (self.unhit(bin_digest, input_file_count)) {

const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max);

defer gpa.free(file_contents);

@@ -499,7 +473,7 @@ pub const Manifest = struct {

if (try self.upgradeToExclusiveLock()) continue;

self.manifest_dirty = true;

while (idx < input_file_count) : (idx += 1) {

const ch_file = &self.files.items[idx];

const ch_file = &self.files.keys()[idx];

self.populateFileHash(ch_file) catch |err| {

self.failed_file_index = idx;

return err;

@@ -510,18 +484,6 @@ pub const Manifest = struct {

while (line_iter.next()) |line| {

defer idx += 1;

const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: {

const new = try self.files.addOne(gpa);

new.* = .{

.prefixed_path = null,

.contents = null,

.max_file_size = null,

.stat = undefined,

.bin_digest = undefined,

};

break :blk new;

};

var iter = mem.tokenizeScalar(u8, line, ' ');

const size = iter.next() orelse return error.InvalidFormat;

const inode = iter.next() orelse return error.InvalidFormat;

@@ -530,30 +492,61 @@ pub const Manifest = struct {

const prefix_str = iter.next() orelse return error.InvalidFormat;

const file_path = iter.rest();

cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;

cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;

cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;

_ = fmt.hexToBytes(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat;

const stat_size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;

const stat_inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;

const stat_mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;

const file_bin_digest = b: {

if (digest_str.len != hex_digest_len) return error.InvalidFormat;

var bd: BinDigest = undefined;

_ = fmt.hexToBytes(&bd, digest_str) catch return error.InvalidFormat;

break :b bd;

};

const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat;

if (prefix >= self.cache.prefixes_len) return error.InvalidFormat;

if (file_path.len == 0) {

return error.InvalidFormat;

}

if (cache_hash_file.prefixed_path) |pp| {

if (pp.prefix != prefix or !mem.eql(u8, file_path, pp.sub_path)) {

return error.InvalidFormat;

}

if (file_path.len == 0) return error.InvalidFormat;

if (cache_hash_file.prefixed_path == null) {

cache_hash_file.prefixed_path = .{

const cache_hash_file = f: {

const prefixed_path: PrefixedPath = .{

.prefix = prefix,

.sub_path = try gpa.dupe(u8, file_path),

.sub_path = file_path, // expires with file_contents

};

}

if (idx < input_file_count) {

const file = &self.files.keys()[idx];

if (!file.prefixed_path.eql(prefixed_path))

return error.InvalidFormat;

const pp = cache_hash_file.prefixed_path.?;

file.stat = .{

.size = stat_size,

.inode = stat_inode,

.mtime = stat_mtime,

};

file.bin_digest = file_bin_digest;

break :f file;

}

const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});

errdefer assert(self.files.popOrNull() != null);

if (!gop.found_existing) {

gop.key_ptr.* = .{

.prefixed_path = .{

.prefix = prefix,

.sub_path = try gpa.dupe(u8, file_path),

.contents = null,

.max_file_size = null,

.stat = .{

.size = stat_size,

.inode = stat_inode,

.mtime = stat_mtime,

.bin_digest = file_bin_digest,

};

}

break :f gop.key_ptr;

};

const pp = cache_hash_file.prefixed_path;

const dir = self.cache.prefixes()[pp.prefix].handle;

const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) {

error.FileNotFound => {

@@ -617,7 +610,7 @@ pub const Manifest = struct {

if (try self.upgradeToExclusiveLock()) continue;

self.manifest_dirty = true;

while (idx < input_file_count) : (idx += 1) {

const ch_file = &self.files.items[idx];

const ch_file = &self.files.keys()[idx];

self.populateFileHash(ch_file) catch |err| {

self.failed_file_index = idx;

return err;

@@ -640,12 +633,12 @@ pub const Manifest = struct {

self.hash.hasher.update(&bin_digest);

// Remove files not in the initial hash.

for (self.files.items[input_file_count..]) |*file| {

for (self.files.keys()[input_file_count..]) |*file| {

file.deinit(self.cache.gpa);

}

self.files.shrinkRetainingCapacity(input_file_count);

for (self.files.items) |file| {

for (self.files.keys()) |file| {

self.hash.hasher.update(&file.bin_digest);

}

@@ -685,7 +678,7 @@ pub const Manifest = struct {

}

fn populateFileHash(self: *Manifest, ch_file: *File) !void {

const pp = ch_file.prefixed_path.?;

const pp = ch_file.prefixed_path;

const dir = self.cache.prefixes()[pp.prefix].handle;

const file = try dir.openFile(pp.sub_path, .{});

defer file.close();

@@ -751,7 +744,7 @@ pub const Manifest = struct {

.bin_digest = undefined,

.contents = null,

};

errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);

errdefer self.files.shrinkRetainingCapacity(self.files.entries.len - 1);

try self.populateFileHash(new_ch_file);

@@ -759,9 +752,11 @@ pub const Manifest = struct {

}

/// Add a file as a dependency of process being cached, after the initial hash has been

/// calculated. This is useful for processes that don't know the all the files that

/// are depended on ahead of time. For example, a source file that can import other files

/// will need to be recompiled if the imported file is changed.

/// calculated.

///

/// This is useful for processes that don't know the all the files that are

/// depended on ahead of time. For example, a source file that can import

/// other files will need to be recompiled if the imported file is changed.

pub fn addFilePost(self: *Manifest, file_path: []const u8) !void {

assert(self.manifest_file != null);

@@ -769,17 +764,26 @@ pub const Manifest = struct {

const prefixed_path = try self.cache.findPrefix(file_path);

errdefer gpa.free(prefixed_path.sub_path);

const new_ch_file = try self.files.addOne(gpa);

new_ch_file.* = .{

const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});

errdefer assert(self.files.popOrNull() != null);

if (gop.found_existing) {

gpa.free(prefixed_path.sub_path);

return;

}

gop.key_ptr.* = .{

.prefixed_path = prefixed_path,

.max_file_size = null,

.stat = undefined,

.bin_digest = undefined,

.contents = null,

};

errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);

try self.populateFileHash(new_ch_file);

self.files.lockPointers();

defer self.files.unlockPointers();

try self.populateFileHash(gop.key_ptr);

}

/// Like `addFilePost` but when the file contents have already been loaded from disk.

@@ -793,13 +797,20 @@ pub const Manifest = struct {

assert(self.manifest_file != null);

const gpa = self.cache.gpa;

const ch_file = try self.files.addOne(gpa);

errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);

const prefixed_path = try self.cache.findPrefixResolved(resolved_path);

errdefer gpa.free(prefixed_path.sub_path);

ch_file.* = .{

const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});

errdefer assert(self.files.popOrNull() != null);

if (gop.found_existing) {

gpa.free(prefixed_path.sub_path);

return;

}

const new_file = gop.key_ptr;

new_file.* = .{

.prefixed_path = prefixed_path,

.max_file_size = null,

.stat = stat,

@@ -807,19 +818,19 @@ pub const Manifest = struct {

.contents = null,

};

if (self.isProblematicTimestamp(ch_file.stat.mtime)) {

if (self.isProblematicTimestamp(new_file.stat.mtime)) {

// The actual file has an unreliable timestamp, force it to be hashed

ch_file.stat.mtime = 0;

ch_file.stat.inode = 0;

new_file.stat.mtime = 0;

new_file.stat.inode = 0;

}

{

var hasher = hasher_init;

hasher.update(bytes);

hasher.final(&ch_file.bin_digest);

hasher.final(&new_file.bin_digest);

}

self.hash.hasher.update(&ch_file.bin_digest);

self.hash.hasher.update(&new_file.bin_digest);

}

pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_basename: []const u8) !void {

@@ -885,14 +896,14 @@ pub const Manifest = struct {

const writer = contents.writer();

try writer.writeAll(manifest_header ++ "\n");

for (self.files.items) |file| {

for (self.files.keys()) |file| {

try writer.print("{d} {d} {d} {} {d} {s}\n", .{

file.stat.size,

file.stat.inode,

file.stat.mtime,

fmt.fmtSliceHexLower(&file.bin_digest),

file.prefixed_path.?.prefix,

file.prefixed_path.?.sub_path,

file.prefixed_path.prefix,

file.prefixed_path.sub_path,

});

}

@@ -961,7 +972,7 @@ pub const Manifest = struct {

file.close();

}

for (self.files.items) |*file| {

for (self.files.keys()) |*file| {

file.deinit(self.cache.gpa);

}

self.files.deinit(self.cache.gpa);

@@ -1130,7 +1141,7 @@ test "check that changing a file makes cache fail" {

// There should be nothing in the cache

try testing.expectEqual(false, try ch.hit());

try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?));

try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.keys()[temp_file_idx].contents.?));

digest1 = ch.final();

@@ -1150,7 +1161,7 @@ test "check that changing a file makes cache fail" {

try testing.expectEqual(false, try ch.hit());

// The cache system does not keep the contents of re-hashed input files.

try testing.expect(ch.files.items[temp_file_idx].contents == null);

try testing.expect(ch.files.keys()[temp_file_idx].contents == null);

digest2 = ch.final();

filename was Deleted added: 461, removed: 383, total 78

@@ -0,0 +1,74 @@

const Directory = @This();

const std = @import("../../std.zig");

const fs = std.fs;

const fmt = std.fmt;

const Allocator = std.mem.Allocator;

/// This field is redundant for operations that can act on the open directory handle

/// directly, but it is needed when passing the directory to a child process.

/// `null` means cwd.

path: ?[]const u8,

handle: fs.Dir,

pub fn clone(d: Directory, arena: Allocator) Allocator.Error!Directory {

return .{

.path = if (d.path) |p| try arena.dupe(u8, p) else null,

.handle = d.handle,

};

}

pub fn cwd() Directory {

return .{

.path = null,

.handle = fs.cwd(),

};

}

pub fn join(self: Directory, allocator: Allocator, paths: []const []const u8) ![]u8 {

if (self.path) |p| {

// TODO clean way to do this with only 1 allocation

const part2 = try fs.path.join(allocator, paths);

defer allocator.free(part2);

return fs.path.join(allocator, &[_][]const u8{ p, part2 });

} else {

return fs.path.join(allocator, paths);

}

pub fn joinZ(self: Directory, allocator: Allocator, paths: []const []const u8) ![:0]u8 {

if (self.path) |p| {

// TODO clean way to do this with only 1 allocation

const part2 = try fs.path.join(allocator, paths);

defer allocator.free(part2);

return fs.path.joinZ(allocator, &[_][]const u8{ p, part2 });

} else {

return fs.path.joinZ(allocator, paths);

}

/// Whether or not the handle should be closed, or the path should be freed

/// is determined by usage, however this function is provided for convenience

/// if it happens to be what the caller needs.

pub fn closeAndFree(self: *Directory, gpa: Allocator) void {

self.handle.close();

if (self.path) |p| gpa.free(p);

self.* = undefined;

}

pub fn format(

self: Directory,

comptime fmt_string: []const u8,

options: fmt.FormatOptions,

writer: anytype,

) !void {

_ = options;

if (fmt_string.len != 0) fmt.invalidFmtError(fmt_string, self);

if (self.path) |p| {

try writer.writeAll(p);

try writer.writeAll(fs.path.sep_str);

}

pub fn eql(self: Directory, other: Directory) bool {

return self.handle.fd == other.handle.fd;

}

filename was Deleted added: 461, removed: 383, total 78

@@ -0,0 +1,154 @@

root_dir: Cache.Directory,

/// The path, relative to the root dir, that this `Path` represents.

/// Empty string means the root_dir is the path.

sub_path: []const u8 = "",

pub fn clone(p: Path, arena: Allocator) Allocator.Error!Path {

return .{

.root_dir = try p.root_dir.clone(arena),

.sub_path = try arena.dupe(u8, p.sub_path),

};

}

pub fn cwd() Path {

return .{ .root_dir = Cache.Directory.cwd() };

}

pub fn join(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path {

if (sub_path.len == 0) return p;

const parts: []const []const u8 =

if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };

return .{

.root_dir = p.root_dir,

.sub_path = try fs.path.join(arena, parts),

};

}

pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path {

if (sub_path.len == 0) return p;

return .{

.root_dir = p.root_dir,

.sub_path = try fs.path.resolvePosix(arena, &.{ p.sub_path, sub_path }),

};

}

pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 {

const parts: []const []const u8 =

if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };

return p.root_dir.join(allocator, parts);

}

pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 {

const parts: []const []const u8 =

if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };

return p.root_dir.joinZ(allocator, parts);

}

pub fn openFile(

p: Path,

sub_path: []const u8,

flags: fs.File.OpenFlags,

) !fs.File {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.openFile(joined_path, flags);

}

pub fn makeOpenPath(p: Path, sub_path: []const u8, opts: fs.OpenDirOptions) !fs.Dir {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.makeOpenPath(joined_path, opts);

}

pub fn statFile(p: Path, sub_path: []const u8) !fs.Dir.Stat {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.statFile(joined_path);

}

pub fn atomicFile(

p: Path,

sub_path: []const u8,

options: fs.Dir.AtomicFileOptions,

buf: *[fs.MAX_PATH_BYTES]u8,

) !fs.AtomicFile {

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.atomicFile(joined_path, options);

}

pub fn access(p: Path, sub_path: []const u8, flags: fs.File.OpenFlags) !void {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.access(joined_path, flags);

}

pub fn makePath(p: Path, sub_path: []const u8) !void {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.makePath(joined_path);

}

pub fn format(

self: Path,

comptime fmt_string: []const u8,

options: std.fmt.FormatOptions,

writer: anytype,

) !void {

if (fmt_string.len == 1) {

// Quote-escape the string.

const stringEscape = std.zig.stringEscape;

const f = switch (fmt_string[0]) {

'q' => "",

'\'' => '\'',

else => @compileError("unsupported format string: " ++ fmt_string),

};

if (self.root_dir.path) |p| {

try stringEscape(p, f, options, writer);

if (self.sub_path.len > 0) try stringEscape(fs.path.sep_str, f, options, writer);

}

if (self.sub_path.len > 0) {

try stringEscape(self.sub_path, f, options, writer);

}

return;

}

if (fmt_string.len > 0)

std.fmt.invalidFmtError(fmt_string, self);

if (self.root_dir.path) |p| {

try writer.writeAll(p);

try writer.writeAll(fs.path.sep_str);

}

if (self.sub_path.len > 0) {

try writer.writeAll(self.sub_path);

try writer.writeAll(fs.path.sep_str);

}

const Path = @This();

const std = @import("../../std.zig");

const fs = std.fs;

const Allocator = std.mem.Allocator;

const Cache = std.Build.Cache;

lib/std/Build/Step.zig added: 461, removed: 383, total 78

@@ -544,7 +544,7 @@ pub fn cacheHit(s: *Step, man: *std.Build.Cache.Manifest) !bool {

fn failWithCacheError(s: *Step, man: *const std.Build.Cache.Manifest, err: anyerror) anyerror {

const i = man.failed_file_index orelse return err;

const pp = man.files.items[i].prefixed_path orelse return err;

const pp = man.files.keys()[i].prefixed_path;

const prefix = man.cache.prefixes()[pp.prefix].path orelse "";

return s.fail("{s}: {s}/{s}", .{ @errorName(err), prefix, pp.sub_path });

}

lib/std/array_hash_map.zig added: 461, removed: 383, total 78

@@ -9,23 +9,26 @@ const Wyhash = std.hash.Wyhash;

const Allocator = mem.Allocator;

const hash_map = @This();

/// An ArrayHashMap with default hash and equal functions.

/// See AutoContext for a description of the hash and equal implementations.

/// An `ArrayHashMap` with default hash and equal functions.

///

/// See `AutoContext` for a description of the hash and equal implementations.

pub fn AutoArrayHashMap(comptime K: type, comptime V: type) type {

return ArrayHashMap(K, V, AutoContext(K), !autoEqlIsCheap(K));

}

/// An ArrayHashMapUnmanaged with default hash and equal functions.

/// See AutoContext for a description of the hash and equal implementations.

/// An `ArrayHashMapUnmanaged` with default hash and equal functions.

///

/// See `AutoContext` for a description of the hash and equal implementations.

pub fn AutoArrayHashMapUnmanaged(comptime K: type, comptime V: type) type {

return ArrayHashMapUnmanaged(K, V, AutoContext(K), !autoEqlIsCheap(K));

}

/// Builtin hashmap for strings as keys.

/// An `ArrayHashMap` with strings as keys.

pub fn StringArrayHashMap(comptime V: type) type {

return ArrayHashMap([]const u8, V, StringContext, true);

}

/// An `ArrayHashMapUnmanaged` with strings as keys.

pub fn StringArrayHashMapUnmanaged(comptime V: type) type {

return ArrayHashMapUnmanaged([]const u8, V, StringContext, true);

}

@@ -50,29 +53,33 @@ pub fn hashString(s: []const u8) u32 {

return @as(u32, @truncate(std.hash.Wyhash.hash(0, s)));

}

/// Insertion order is preserved.

/// Deletions perform a "swap removal" on the entries list.

/// A hash table of keys and values, each stored sequentially.

///

/// Insertion order is preserved. In general, this data structure supports the same

/// operations as `std.ArrayList`.

///

/// Deletion operations:

/// * `swapRemove` - O(1)

/// * `orderedRemove` - O(N)

///

/// Modifying the hash map while iterating is allowed, however, one must understand

/// the (well defined) behavior when mixing insertions and deletions with iteration.

/// For a hash map that can be initialized directly that does not store an Allocator

/// field, see `ArrayHashMapUnmanaged`.

/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`

/// functions. It does not store each item's hash in the table. Setting `store_hash`

/// to `true` incurs slightly more memory cost by storing each key's hash in the table

/// but only has to call `eql` for hash collisions.

/// If typical operations (except iteration over entries) need to be faster, prefer

/// the alternative `std.HashMap`.

/// Context must be a struct type with two member functions:

/// hash(self, K) u32

/// eql(self, K, K, usize) bool

/// Adapted variants of many functions are provided. These variants

/// take a pseudo key instead of a key. Their context must have the functions:

/// hash(self, PseudoKey) u32

/// eql(self, PseudoKey, K, usize) bool

///

/// See `ArrayHashMapUnmanaged` for a variant of this data structure that accepts an

/// `Allocator` as a parameter when needed rather than storing it.

pub fn ArrayHashMap(

comptime K: type,

comptime V: type,

/// A namespace that provides these two functions:

/// * `pub fn hash(self, K) u32`

/// * `pub fn eql(self, K, K) bool`

///

comptime Context: type,

/// When `false`, this data structure is biased towards cheap `eql`

/// functions and avoids storing each key's hash in the table. Setting

/// `store_hash` to `true` incurs more memory cost but limits `eql` to

/// being called only once per insertion/deletion (provided there are no

/// hash collisions).

comptime store_hash: bool,

) type {

return struct {

@@ -472,34 +479,40 @@ pub fn ArrayHashMap(

};

}

/// General purpose hash table.

/// Insertion order is preserved.

/// Deletions perform a "swap removal" on the entries list.

/// A hash table of keys and values, each stored sequentially.

///

/// Insertion order is preserved. In general, this data structure supports the same

/// operations as `std.ArrayListUnmanaged`.

///

/// Deletion operations:

/// * `swapRemove` - O(1)

/// * `orderedRemove` - O(N)

///

/// Modifying the hash map while iterating is allowed, however, one must understand

/// the (well defined) behavior when mixing insertions and deletions with iteration.

/// This type does not store an Allocator field - the Allocator must be passed in

///

/// This type does not store an `Allocator` field - the `Allocator` must be passed in

/// with each function call that requires it. See `ArrayHashMap` for a type that stores

/// an Allocator field for convenience.

/// an `Allocator` field for convenience.

///

/// Can be initialized directly using the default field values.

///

/// This type is designed to have low overhead for small numbers of entries. When

/// `store_hash` is `false` and the number of entries in the map is less than 9,

/// the overhead cost of using `ArrayHashMapUnmanaged` rather than `std.ArrayList` is

/// only a single pointer-sized integer.

/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`

/// functions. It does not store each item's hash in the table. Setting `store_hash`

/// to `true` incurs slightly more memory cost by storing each key's hash in the table

/// but guarantees only one call to `eql` per insertion/deletion.

/// Context must be a struct type with two member functions:

/// hash(self, K) u32

/// eql(self, K, K) bool

/// Adapted variants of many functions are provided. These variants

/// take a pseudo key instead of a key. Their context must have the functions:

/// hash(self, PseudoKey) u32

/// eql(self, PseudoKey, K) bool

pub fn ArrayHashMapUnmanaged(

comptime K: type,

comptime V: type,

/// A namespace that provides these two functions:

/// * `pub fn hash(self, K) u32`

/// * `pub fn eql(self, K, K) bool`

comptime Context: type,

/// When `false`, this data structure is biased towards cheap `eql`

/// functions and avoids storing each key's hash in the table. Setting

/// `store_hash` to `true` incurs more memory cost but limits `eql` to

/// being called only once per insertion/deletion (provided there are no

/// hash collisions).

comptime store_hash: bool,

) type {

return struct {

@@ -516,10 +529,6 @@ pub fn ArrayHashMapUnmanaged(

/// Used to detect memory safety violations.

pointer_stability: std.debug.SafetyLock = .{},

comptime {

std.hash_map.verifyContext(Context, K, K, u32, true);

}

/// Modifying the key is allowed only if it does not change the hash.

/// Modifying the value is allowed.

/// Entry pointers become invalid whenever this ArrayHashMap is modified,

@@ -1834,27 +1843,16 @@ pub fn ArrayHashMapUnmanaged(

}

inline fn checkedHash(ctx: anytype, key: anytype) u32 {

comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(key), K, u32, true);

fn checkedHash(ctx: anytype, key: anytype) u32 {

// If you get a compile error on the next line, it means that your

// generic hash function doesn't accept your key.

const hash = ctx.hash(key);

if (@TypeOf(hash) != u32) {

@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic hash function that returns the wrong type!\n" ++

@typeName(u32) ++ " was expected, but found " ++ @typeName(@TypeOf(hash)));

}

return hash;

return ctx.hash(key);

}

inline fn checkedEql(ctx: anytype, a: anytype, b: K, b_index: usize) bool {

comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(a), K, u32, true);

fn checkedEql(ctx: anytype, a: anytype, b: K, b_index: usize) bool {

// If you get a compile error on the next line, it means that your

// generic eql function doesn't accept (self, adapt key, K, index).

const eql = ctx.eql(a, b, b_index);

if (@TypeOf(eql) != bool) {

@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic eql function that returns the wrong type!\n" ++

@typeName(bool) ++ " was expected, but found " ++ @typeName(@TypeOf(eql)));

}

return eql;

return ctx.eql(a, b, b_index);

}

fn dumpState(self: Self, comptime keyFmt: []const u8, comptime valueFmt: []const u8) void {

src/Compilation.zig added: 461, removed: 383, total 78

@@ -1999,7 +1999,7 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void

const is_hit = man.hit() catch |err| {

const i = man.failed_file_index orelse return err;

const pp = man.files.items[i].prefixed_path orelse return err;

const pp = man.files.keys()[i].prefixed_path;

const prefix = man.cache.prefixes()[pp.prefix];

return comp.setMiscFailure(

.check_whole_cache,

@@ -4147,7 +4147,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8, owner_mod: *Package.Module

const prev_hash_state = man.hash.peekBin();

const actual_hit = hit: {

_ = try man.hit();

if (man.files.items.len == 0) {

if (man.files.entries.len == 0) {

man.unhit(prev_hash_state, 0);

break :hit false;

}

src/Package.zig added: 461, removed: 383, total 78

@@ -2,162 +2,3 @@ pub const Module = @import("Package/Module.zig");

pub const Fetch = @import("Package/Fetch.zig");

pub const build_zig_basename = "build.zig";

pub const Manifest = @import("Package/Manifest.zig");

pub const Path = struct {

root_dir: Cache.Directory,

/// The path, relative to the root dir, that this `Path` represents.

/// Empty string means the root_dir is the path.

sub_path: []const u8 = "",

pub fn clone(p: Path, arena: Allocator) Allocator.Error!Path {

return .{

.root_dir = try p.root_dir.clone(arena),

.sub_path = try arena.dupe(u8, p.sub_path),

};

}

pub fn cwd() Path {

return .{ .root_dir = Cache.Directory.cwd() };

}

pub fn join(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path {

if (sub_path.len == 0) return p;

const parts: []const []const u8 =

if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };

return .{

.root_dir = p.root_dir,

.sub_path = try fs.path.join(arena, parts),

};

}

pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path {

if (sub_path.len == 0) return p;

return .{

.root_dir = p.root_dir,

.sub_path = try fs.path.resolvePosix(arena, &.{ p.sub_path, sub_path }),

};

}

pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 {

const parts: []const []const u8 =

if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };

return p.root_dir.join(allocator, parts);

}

pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 {

const parts: []const []const u8 =

if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };

return p.root_dir.joinZ(allocator, parts);

}

pub fn openFile(

p: Path,

sub_path: []const u8,

flags: fs.File.OpenFlags,

) !fs.File {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.openFile(joined_path, flags);

}

pub fn makeOpenPath(p: Path, sub_path: []const u8, opts: fs.OpenDirOptions) !fs.Dir {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.makeOpenPath(joined_path, opts);

}

pub fn statFile(p: Path, sub_path: []const u8) !fs.Dir.Stat {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.statFile(joined_path);

}

pub fn atomicFile(

p: Path,

sub_path: []const u8,

options: fs.Dir.AtomicFileOptions,

buf: *[fs.MAX_PATH_BYTES]u8,

) !fs.AtomicFile {

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.atomicFile(joined_path, options);

}

pub fn access(p: Path, sub_path: []const u8, flags: fs.File.OpenFlags) !void {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.access(joined_path, flags);

}

pub fn makePath(p: Path, sub_path: []const u8) !void {

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const joined_path = if (p.sub_path.len == 0) sub_path else p: {

break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{

p.sub_path, sub_path,

}) catch return error.NameTooLong;

};

return p.root_dir.handle.makePath(joined_path);

}

pub fn format(

self: Path,

comptime fmt_string: []const u8,

options: std.fmt.FormatOptions,

writer: anytype,

) !void {

if (fmt_string.len == 1) {

// Quote-escape the string.

const stringEscape = std.zig.stringEscape;

const f = switch (fmt_string[0]) {

'q' => "",

'\'' => '\'',

else => @compileError("unsupported format string: " ++ fmt_string),

};

if (self.root_dir.path) |p| {

try stringEscape(p, f, options, writer);

if (self.sub_path.len > 0) try stringEscape(fs.path.sep_str, f, options, writer);

}

if (self.sub_path.len > 0) {

try stringEscape(self.sub_path, f, options, writer);

}

return;

}

if (fmt_string.len > 0)

std.fmt.invalidFmtError(fmt_string, self);

if (self.root_dir.path) |p| {

try writer.writeAll(p);

try writer.writeAll(fs.path.sep_str);

}

if (self.sub_path.len > 0) {

try writer.writeAll(self.sub_path);

try writer.writeAll(fs.path.sep_str);

}

};

const Package = @This();

const builtin = @import("builtin");

const std = @import("std");

const fs = std.fs;

const Allocator = std.mem.Allocator;

const assert = std.debug.assert;

const Cache = std.Build.Cache;

src/Package/Fetch.zig added: 461, removed: 383, total 78

@@ -33,7 +33,7 @@ location_tok: std.zig.Ast.TokenIndex,

hash_tok: std.zig.Ast.TokenIndex,

name_tok: std.zig.Ast.TokenIndex,

lazy_status: LazyStatus,

parent_package_root: Package.Path,

parent_package_root: Cache.Path,

parent_manifest_ast: ?*const std.zig.Ast,

prog_node: *std.Progress.Node,

job_queue: *JobQueue,

@@ -50,7 +50,7 @@ allow_missing_paths_field: bool,

/// This will either be relative to `global_cache`, or to the build root of

/// the root package.

package_root: Package.Path,

package_root: Cache.Path,

error_bundle: ErrorBundle.Wip,

manifest: ?Manifest,

manifest_ast: std.zig.Ast,

@@ -263,7 +263,7 @@ pub const JobQueue = struct {

pub const Location = union(enum) {

remote: Remote,

/// A directory found inside the parent package.

relative_path: Package.Path,

relative_path: Cache.Path,

/// Recursive Fetch tasks will never use this Location, but it may be

/// passed in by the CLI. Indicates the file contents here should be copied

/// into the global package cache. It may be a file relative to the cwd or

@@ -564,7 +564,7 @@ fn checkBuildFileExistence(f: *Fetch) RunError!void {

}

/// This function populates `f.manifest` or leaves it `null`.

fn loadManifest(f: *Fetch, pkg_root: Package.Path) RunError!void {

fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void {

const eb = &f.error_bundle;

const arena = f.arena.allocator();

const manifest_bytes = pkg_root.root_dir.handle.readFileAllocOptions(

@@ -722,7 +722,7 @@ fn queueJobsForDeps(f: *Fetch) RunError!void {

}

pub fn relativePathDigest(

pkg_root: Package.Path,

pkg_root: Cache.Path,

cache_root: Cache.Directory,

) Manifest.MultiHashHexDigest {

var hasher = Manifest.Hash.init(.{});

@@ -1658,7 +1658,7 @@ const Filter = struct {

};

pub fn depDigest(

pkg_root: Package.Path,

pkg_root: Cache.Path,

cache_root: Cache.Directory,

dep: Manifest.Dependency,

) ?Manifest.MultiHashHexDigest {

src/Package/Module.zig added: 461, removed: 383, total 78

@@ -3,7 +3,7 @@

//! to Zcu. https://github.com/ziglang/zig/issues/14307

/// Only files inside this directory can be imported.

root: Package.Path,

root: Cache.Path,

/// Relative to `root`. May contain path separators.

root_src_path: []const u8,

/// Name used in compile errors. Looks like "root.foo.bar".

@@ -69,7 +69,7 @@ pub const CreateOptions = struct {

builtin_modules: ?*std.StringHashMapUnmanaged(*Module),

pub const Paths = struct {

root: Package.Path,

root: Cache.Path,

/// Relative to `root`. May contain path separators.

root_src_path: []const u8,

};

@@ -463,7 +463,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {

/// All fields correspond to `CreateOptions`.

pub const LimitedOptions = struct {

root: Package.Path,

root: Cache.Path,

root_src_path: []const u8,

fully_qualified_name: []const u8,

};

src/glibc.zig added: 461, removed: 383, total 78

@@ -713,7 +713,7 @@ pub fn buildSharedObjects(comp: *Compilation, prog_node: *std.Progress.Node) !vo

};

defer o_directory.handle.close();

const abilists_contents = man.files.items[abilists_index].contents.?;

const abilists_contents = man.files.keys()[abilists_index].contents.?;

const metadata = try loadMetaData(comp.gpa, abilists_contents);

defer metadata.destroy(comp.gpa);

src/main.zig added: 461, removed: 383, total 78

@@ -6143,7 +6143,7 @@ fn cmdAstCheck(

}

file.mod = try Package.Module.createLimited(arena, .{

.root = Package.Path.cwd(),

.root = Cache.Path.cwd(),

.root_src_path = file.sub_file_path,

.fully_qualified_name = "root",

});

@@ -6316,7 +6316,7 @@ fn cmdChangelist(

};

file.mod = try Package.Module.createLimited(arena, .{

.root = Package.Path.cwd(),

.root = Cache.Path.cwd(),

.root_src_path = file.sub_file_path,

.fully_qualified_name = "root",

});