srctree

Andrew Kelley parent 54c08579 95035907 a2651cbc
Merge pull request #19388 from ziglang/cache-dedup

cache system file deduplication

inlinesplit
lib/std/Build/Cache.zig added: 461, removed: 383, total 78
@@ -2,77 +2,6 @@
//! This is not a general-purpose cache. It is designed to be fast and simple,
//! not to withstand attacks using specially-crafted input.
 
pub const Directory = struct {
/// This field is redundant for operations that can act on the open directory handle
/// directly, but it is needed when passing the directory to a child process.
/// `null` means cwd.
path: ?[]const u8,
handle: fs.Dir,
 
pub fn clone(d: Directory, arena: Allocator) Allocator.Error!Directory {
return .{
.path = if (d.path) |p| try arena.dupe(u8, p) else null,
.handle = d.handle,
};
}
 
pub fn cwd() Directory {
return .{
.path = null,
.handle = fs.cwd(),
};
}
 
pub fn join(self: Directory, allocator: Allocator, paths: []const []const u8) ![]u8 {
if (self.path) |p| {
// TODO clean way to do this with only 1 allocation
const part2 = try fs.path.join(allocator, paths);
defer allocator.free(part2);
return fs.path.join(allocator, &[_][]const u8{ p, part2 });
} else {
return fs.path.join(allocator, paths);
}
}
 
pub fn joinZ(self: Directory, allocator: Allocator, paths: []const []const u8) ![:0]u8 {
if (self.path) |p| {
// TODO clean way to do this with only 1 allocation
const part2 = try fs.path.join(allocator, paths);
defer allocator.free(part2);
return fs.path.joinZ(allocator, &[_][]const u8{ p, part2 });
} else {
return fs.path.joinZ(allocator, paths);
}
}
 
/// Whether or not the handle should be closed, or the path should be freed
/// is determined by usage, however this function is provided for convenience
/// if it happens to be what the caller needs.
pub fn closeAndFree(self: *Directory, gpa: Allocator) void {
self.handle.close();
if (self.path) |p| gpa.free(p);
self.* = undefined;
}
 
pub fn format(
self: Directory,
comptime fmt_string: []const u8,
options: fmt.FormatOptions,
writer: anytype,
) !void {
_ = options;
if (fmt_string.len != 0) fmt.invalidFmtError(fmt_string, self);
if (self.path) |p| {
try writer.writeAll(p);
try writer.writeAll(fs.path.sep_str);
}
}
 
pub fn eql(self: Directory, other: Directory) bool {
return self.handle.fd == other.handle.fd;
}
};
 
gpa: Allocator,
manifest_dir: fs.Dir,
hash: HashHelper = .{},
@@ -88,6 +17,8 @@ mutex: std.Thread.Mutex = .{},
prefixes_buffer: [4]Directory = undefined,
prefixes_len: usize = 0,
 
pub const Path = @import("Cache/Path.zig");
pub const Directory = @import("Cache/Directory.zig");
pub const DepTokenizer = @import("Cache/DepTokenizer.zig");
 
const Cache = @This();
@@ -124,7 +55,15 @@ pub fn prefixes(cache: *const Cache) []const Directory {
 
const PrefixedPath = struct {
prefix: u8,
sub_path: []u8,
sub_path: []const u8,
 
fn eql(a: PrefixedPath, b: PrefixedPath) bool {
return a.prefix == b.prefix and std.mem.eql(u8, a.sub_path, b.sub_path);
}
 
fn hash(pp: PrefixedPath) u32 {
return @truncate(std.hash.Wyhash.hash(pp.prefix, pp.sub_path));
}
};
 
fn findPrefix(cache: *const Cache, file_path: []const u8) !PrefixedPath {
@@ -183,7 +122,7 @@ pub const HexDigest = [hex_digest_len]u8;
 
/// This is currently just an arbitrary non-empty string that can't match another manifest line.
const manifest_header = "0";
const manifest_file_size_max = 50 * 1024 * 1024;
const manifest_file_size_max = 100 * 1024 * 1024;
 
/// The type used for hashing file contents. Currently, this is SipHash128(1, 3), because it
/// provides enough collision resistance for the Manifest use cases, while being one of our
@@ -201,7 +140,7 @@ pub const hasher_init: Hasher = Hasher.init(&[_]u8{
});
 
pub const File = struct {
prefixed_path: ?PrefixedPath,
prefixed_path: PrefixedPath,
max_file_size: ?usize,
stat: Stat,
bin_digest: BinDigest,
@@ -214,16 +153,18 @@ pub const File = struct {
};
 
pub fn deinit(self: *File, gpa: Allocator) void {
if (self.prefixed_path) |pp| {
gpa.free(pp.sub_path);
self.prefixed_path = null;
}
gpa.free(self.prefixed_path.sub_path);
if (self.contents) |contents| {
gpa.free(contents);
self.contents = null;
}
self.* = undefined;
}
 
pub fn updateMaxSize(file: *File, new_max_size: ?usize) void {
const new = new_max_size orelse return;
file.max_file_size = if (file.max_file_size) |old| @max(old, new) else new;
}
};
 
pub const HashHelper = struct {
@@ -365,7 +306,7 @@ pub const Manifest = struct {
// order to obtain a problematic timestamp for the next call. Calls after that
// will then use the same timestamp, to avoid unnecessary filesystem writes.
want_refresh_timestamp: bool = true,
files: std.ArrayListUnmanaged(File) = .{},
files: Files = .{},
hex_digest: HexDigest,
/// Populated when hit() returns an error because of one
/// of the files listed in the manifest.
@@ -374,6 +315,34 @@ pub const Manifest = struct {
/// what time the file system thinks it is, according to its own granularity.
recent_problematic_timestamp: i128 = 0,
 
pub const Files = std.ArrayHashMapUnmanaged(File, void, FilesContext, false);
 
pub const FilesContext = struct {
pub fn hash(fc: FilesContext, file: File) u32 {
_ = fc;
return file.prefixed_path.hash();
}
 
pub fn eql(fc: FilesContext, a: File, b: File, b_index: usize) bool {
_ = fc;
_ = b_index;
return a.prefixed_path.eql(b.prefixed_path);
}
};
 
const FilesAdapter = struct {
pub fn eql(context: @This(), a: PrefixedPath, b: File, b_index: usize) bool {
_ = context;
_ = b_index;
return a.eql(b.prefixed_path);
}
 
pub fn hash(context: @This(), key: PrefixedPath) u32 {
_ = context;
return key.hash();
}
};
 
/// Add a file as a dependency of process being cached. When `hit` is
/// called, the file's contents will be checked to ensure that it matches
/// the contents from previous times.
@@ -386,7 +355,7 @@ pub const Manifest = struct {
/// to access the contents of the file after calling `hit()` like so:
///
/// ```
/// var file_contents = cache_hash.files.items[file_index].contents.?;
/// var file_contents = cache_hash.files.keys()[file_index].contents.?;
/// ```
pub fn addFile(self: *Manifest, file_path: []const u8, max_file_size: ?usize) !usize {
assert(self.manifest_file == null);
@@ -396,7 +365,12 @@ pub const Manifest = struct {
const prefixed_path = try self.cache.findPrefix(file_path);
errdefer gpa.free(prefixed_path.sub_path);
 
self.files.addOneAssumeCapacity().* = .{
const gop = self.files.getOrPutAssumeCapacityAdapted(prefixed_path, FilesAdapter{});
if (gop.found_existing) {
gop.key_ptr.updateMaxSize(max_file_size);
return gop.index;
}
gop.key_ptr.* = .{
.prefixed_path = prefixed_path,
.contents = null,
.max_file_size = max_file_size,
@@ -407,7 +381,7 @@ pub const Manifest = struct {
self.hash.add(prefixed_path.prefix);
self.hash.addBytes(prefixed_path.sub_path);
 
return self.files.items.len - 1;
return gop.index;
}
 
pub fn addOptionalFile(self: *Manifest, optional_file_path: ?[]const u8) !void {
@@ -487,7 +461,7 @@ pub const Manifest = struct {
 
self.want_refresh_timestamp = true;
 
const input_file_count = self.files.items.len;
const input_file_count = self.files.entries.len;
while (true) : (self.unhit(bin_digest, input_file_count)) {
const file_contents = try self.manifest_file.?.reader().readAllAlloc(gpa, manifest_file_size_max);
defer gpa.free(file_contents);
@@ -499,7 +473,7 @@ pub const Manifest = struct {
if (try self.upgradeToExclusiveLock()) continue;
self.manifest_dirty = true;
while (idx < input_file_count) : (idx += 1) {
const ch_file = &self.files.items[idx];
const ch_file = &self.files.keys()[idx];
self.populateFileHash(ch_file) catch |err| {
self.failed_file_index = idx;
return err;
@@ -510,18 +484,6 @@ pub const Manifest = struct {
while (line_iter.next()) |line| {
defer idx += 1;
 
const cache_hash_file = if (idx < input_file_count) &self.files.items[idx] else blk: {
const new = try self.files.addOne(gpa);
new.* = .{
.prefixed_path = null,
.contents = null,
.max_file_size = null,
.stat = undefined,
.bin_digest = undefined,
};
break :blk new;
};
 
var iter = mem.tokenizeScalar(u8, line, ' ');
const size = iter.next() orelse return error.InvalidFormat;
const inode = iter.next() orelse return error.InvalidFormat;
@@ -530,30 +492,61 @@ pub const Manifest = struct {
const prefix_str = iter.next() orelse return error.InvalidFormat;
const file_path = iter.rest();
 
cache_hash_file.stat.size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;
cache_hash_file.stat.inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;
cache_hash_file.stat.mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
_ = fmt.hexToBytes(&cache_hash_file.bin_digest, digest_str) catch return error.InvalidFormat;
const stat_size = fmt.parseInt(u64, size, 10) catch return error.InvalidFormat;
const stat_inode = fmt.parseInt(fs.File.INode, inode, 10) catch return error.InvalidFormat;
const stat_mtime = fmt.parseInt(i64, mtime_nsec_str, 10) catch return error.InvalidFormat;
const file_bin_digest = b: {
if (digest_str.len != hex_digest_len) return error.InvalidFormat;
var bd: BinDigest = undefined;
_ = fmt.hexToBytes(&bd, digest_str) catch return error.InvalidFormat;
break :b bd;
};
 
const prefix = fmt.parseInt(u8, prefix_str, 10) catch return error.InvalidFormat;
if (prefix >= self.cache.prefixes_len) return error.InvalidFormat;
 
if (file_path.len == 0) {
return error.InvalidFormat;
}
if (cache_hash_file.prefixed_path) |pp| {
if (pp.prefix != prefix or !mem.eql(u8, file_path, pp.sub_path)) {
return error.InvalidFormat;
}
}
if (file_path.len == 0) return error.InvalidFormat;
 
if (cache_hash_file.prefixed_path == null) {
cache_hash_file.prefixed_path = .{
const cache_hash_file = f: {
const prefixed_path: PrefixedPath = .{
.prefix = prefix,
.sub_path = try gpa.dupe(u8, file_path),
.sub_path = file_path, // expires with file_contents
};
}
if (idx < input_file_count) {
const file = &self.files.keys()[idx];
if (!file.prefixed_path.eql(prefixed_path))
return error.InvalidFormat;
 
const pp = cache_hash_file.prefixed_path.?;
file.stat = .{
.size = stat_size,
.inode = stat_inode,
.mtime = stat_mtime,
};
file.bin_digest = file_bin_digest;
break :f file;
}
const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
errdefer assert(self.files.popOrNull() != null);
if (!gop.found_existing) {
gop.key_ptr.* = .{
.prefixed_path = .{
.prefix = prefix,
.sub_path = try gpa.dupe(u8, file_path),
},
.contents = null,
.max_file_size = null,
.stat = .{
.size = stat_size,
.inode = stat_inode,
.mtime = stat_mtime,
},
.bin_digest = file_bin_digest,
};
}
break :f gop.key_ptr;
};
 
const pp = cache_hash_file.prefixed_path;
const dir = self.cache.prefixes()[pp.prefix].handle;
const this_file = dir.openFile(pp.sub_path, .{ .mode = .read_only }) catch |err| switch (err) {
error.FileNotFound => {
@@ -617,7 +610,7 @@ pub const Manifest = struct {
if (try self.upgradeToExclusiveLock()) continue;
self.manifest_dirty = true;
while (idx < input_file_count) : (idx += 1) {
const ch_file = &self.files.items[idx];
const ch_file = &self.files.keys()[idx];
self.populateFileHash(ch_file) catch |err| {
self.failed_file_index = idx;
return err;
@@ -640,12 +633,12 @@ pub const Manifest = struct {
self.hash.hasher.update(&bin_digest);
 
// Remove files not in the initial hash.
for (self.files.items[input_file_count..]) |*file| {
for (self.files.keys()[input_file_count..]) |*file| {
file.deinit(self.cache.gpa);
}
self.files.shrinkRetainingCapacity(input_file_count);
 
for (self.files.items) |file| {
for (self.files.keys()) |file| {
self.hash.hasher.update(&file.bin_digest);
}
}
@@ -685,7 +678,7 @@ pub const Manifest = struct {
}
 
fn populateFileHash(self: *Manifest, ch_file: *File) !void {
const pp = ch_file.prefixed_path.?;
const pp = ch_file.prefixed_path;
const dir = self.cache.prefixes()[pp.prefix].handle;
const file = try dir.openFile(pp.sub_path, .{});
defer file.close();
@@ -751,7 +744,7 @@ pub const Manifest = struct {
.bin_digest = undefined,
.contents = null,
};
errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);
errdefer self.files.shrinkRetainingCapacity(self.files.entries.len - 1);
 
try self.populateFileHash(new_ch_file);
 
@@ -759,9 +752,11 @@ pub const Manifest = struct {
}
 
/// Add a file as a dependency of process being cached, after the initial hash has been
/// calculated. This is useful for processes that don't know the all the files that
/// are depended on ahead of time. For example, a source file that can import other files
/// will need to be recompiled if the imported file is changed.
/// calculated.
///
/// This is useful for processes that don't know the all the files that are
/// depended on ahead of time. For example, a source file that can import
/// other files will need to be recompiled if the imported file is changed.
pub fn addFilePost(self: *Manifest, file_path: []const u8) !void {
assert(self.manifest_file != null);
 
@@ -769,17 +764,26 @@ pub const Manifest = struct {
const prefixed_path = try self.cache.findPrefix(file_path);
errdefer gpa.free(prefixed_path.sub_path);
 
const new_ch_file = try self.files.addOne(gpa);
new_ch_file.* = .{
const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
errdefer assert(self.files.popOrNull() != null);
 
if (gop.found_existing) {
gpa.free(prefixed_path.sub_path);
return;
}
 
gop.key_ptr.* = .{
.prefixed_path = prefixed_path,
.max_file_size = null,
.stat = undefined,
.bin_digest = undefined,
.contents = null,
};
errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);
 
try self.populateFileHash(new_ch_file);
self.files.lockPointers();
defer self.files.unlockPointers();
 
try self.populateFileHash(gop.key_ptr);
}
 
/// Like `addFilePost` but when the file contents have already been loaded from disk.
@@ -793,13 +797,20 @@ pub const Manifest = struct {
assert(self.manifest_file != null);
const gpa = self.cache.gpa;
 
const ch_file = try self.files.addOne(gpa);
errdefer self.files.shrinkRetainingCapacity(self.files.items.len - 1);
 
const prefixed_path = try self.cache.findPrefixResolved(resolved_path);
errdefer gpa.free(prefixed_path.sub_path);
 
ch_file.* = .{
const gop = try self.files.getOrPutAdapted(gpa, prefixed_path, FilesAdapter{});
errdefer assert(self.files.popOrNull() != null);
 
if (gop.found_existing) {
gpa.free(prefixed_path.sub_path);
return;
}
 
const new_file = gop.key_ptr;
 
new_file.* = .{
.prefixed_path = prefixed_path,
.max_file_size = null,
.stat = stat,
@@ -807,19 +818,19 @@ pub const Manifest = struct {
.contents = null,
};
 
if (self.isProblematicTimestamp(ch_file.stat.mtime)) {
if (self.isProblematicTimestamp(new_file.stat.mtime)) {
// The actual file has an unreliable timestamp, force it to be hashed
ch_file.stat.mtime = 0;
ch_file.stat.inode = 0;
new_file.stat.mtime = 0;
new_file.stat.inode = 0;
}
 
{
var hasher = hasher_init;
hasher.update(bytes);
hasher.final(&ch_file.bin_digest);
hasher.final(&new_file.bin_digest);
}
 
self.hash.hasher.update(&ch_file.bin_digest);
self.hash.hasher.update(&new_file.bin_digest);
}
 
pub fn addDepFilePost(self: *Manifest, dir: fs.Dir, dep_file_basename: []const u8) !void {
@@ -885,14 +896,14 @@ pub const Manifest = struct {
 
const writer = contents.writer();
try writer.writeAll(manifest_header ++ "\n");
for (self.files.items) |file| {
for (self.files.keys()) |file| {
try writer.print("{d} {d} {d} {} {d} {s}\n", .{
file.stat.size,
file.stat.inode,
file.stat.mtime,
fmt.fmtSliceHexLower(&file.bin_digest),
file.prefixed_path.?.prefix,
file.prefixed_path.?.sub_path,
file.prefixed_path.prefix,
file.prefixed_path.sub_path,
});
}
 
@@ -961,7 +972,7 @@ pub const Manifest = struct {
 
file.close();
}
for (self.files.items) |*file| {
for (self.files.keys()) |*file| {
file.deinit(self.cache.gpa);
}
self.files.deinit(self.cache.gpa);
@@ -1130,7 +1141,7 @@ test "check that changing a file makes cache fail" {
// There should be nothing in the cache
try testing.expectEqual(false, try ch.hit());
 
try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.items[temp_file_idx].contents.?));
try testing.expect(mem.eql(u8, original_temp_file_contents, ch.files.keys()[temp_file_idx].contents.?));
 
digest1 = ch.final();
 
@@ -1150,7 +1161,7 @@ test "check that changing a file makes cache fail" {
try testing.expectEqual(false, try ch.hit());
 
// The cache system does not keep the contents of re-hashed input files.
try testing.expect(ch.files.items[temp_file_idx].contents == null);
try testing.expect(ch.files.keys()[temp_file_idx].contents == null);
 
digest2 = ch.final();
 
 
filename was Deleted added: 461, removed: 383, total 78
@@ -0,0 +1,74 @@
const Directory = @This();
const std = @import("../../std.zig");
const fs = std.fs;
const fmt = std.fmt;
const Allocator = std.mem.Allocator;
 
/// This field is redundant for operations that can act on the open directory handle
/// directly, but it is needed when passing the directory to a child process.
/// `null` means cwd.
path: ?[]const u8,
handle: fs.Dir,
 
pub fn clone(d: Directory, arena: Allocator) Allocator.Error!Directory {
return .{
.path = if (d.path) |p| try arena.dupe(u8, p) else null,
.handle = d.handle,
};
}
 
pub fn cwd() Directory {
return .{
.path = null,
.handle = fs.cwd(),
};
}
 
pub fn join(self: Directory, allocator: Allocator, paths: []const []const u8) ![]u8 {
if (self.path) |p| {
// TODO clean way to do this with only 1 allocation
const part2 = try fs.path.join(allocator, paths);
defer allocator.free(part2);
return fs.path.join(allocator, &[_][]const u8{ p, part2 });
} else {
return fs.path.join(allocator, paths);
}
}
 
pub fn joinZ(self: Directory, allocator: Allocator, paths: []const []const u8) ![:0]u8 {
if (self.path) |p| {
// TODO clean way to do this with only 1 allocation
const part2 = try fs.path.join(allocator, paths);
defer allocator.free(part2);
return fs.path.joinZ(allocator, &[_][]const u8{ p, part2 });
} else {
return fs.path.joinZ(allocator, paths);
}
}
 
/// Whether or not the handle should be closed, or the path should be freed
/// is determined by usage, however this function is provided for convenience
/// if it happens to be what the caller needs.
pub fn closeAndFree(self: *Directory, gpa: Allocator) void {
self.handle.close();
if (self.path) |p| gpa.free(p);
self.* = undefined;
}
 
pub fn format(
self: Directory,
comptime fmt_string: []const u8,
options: fmt.FormatOptions,
writer: anytype,
) !void {
_ = options;
if (fmt_string.len != 0) fmt.invalidFmtError(fmt_string, self);
if (self.path) |p| {
try writer.writeAll(p);
try writer.writeAll(fs.path.sep_str);
}
}
 
pub fn eql(self: Directory, other: Directory) bool {
return self.handle.fd == other.handle.fd;
}
 
filename was Deleted added: 461, removed: 383, total 78
@@ -0,0 +1,154 @@
root_dir: Cache.Directory,
/// The path, relative to the root dir, that this `Path` represents.
/// Empty string means the root_dir is the path.
sub_path: []const u8 = "",
 
pub fn clone(p: Path, arena: Allocator) Allocator.Error!Path {
return .{
.root_dir = try p.root_dir.clone(arena),
.sub_path = try arena.dupe(u8, p.sub_path),
};
}
 
pub fn cwd() Path {
return .{ .root_dir = Cache.Directory.cwd() };
}
 
pub fn join(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path {
if (sub_path.len == 0) return p;
const parts: []const []const u8 =
if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };
return .{
.root_dir = p.root_dir,
.sub_path = try fs.path.join(arena, parts),
};
}
 
pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path {
if (sub_path.len == 0) return p;
return .{
.root_dir = p.root_dir,
.sub_path = try fs.path.resolvePosix(arena, &.{ p.sub_path, sub_path }),
};
}
 
pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 {
const parts: []const []const u8 =
if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };
return p.root_dir.join(allocator, parts);
}
 
pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 {
const parts: []const []const u8 =
if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };
return p.root_dir.joinZ(allocator, parts);
}
 
pub fn openFile(
p: Path,
sub_path: []const u8,
flags: fs.File.OpenFlags,
) !fs.File {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.openFile(joined_path, flags);
}
 
pub fn makeOpenPath(p: Path, sub_path: []const u8, opts: fs.OpenDirOptions) !fs.Dir {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.makeOpenPath(joined_path, opts);
}
 
pub fn statFile(p: Path, sub_path: []const u8) !fs.Dir.Stat {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.statFile(joined_path);
}
 
pub fn atomicFile(
p: Path,
sub_path: []const u8,
options: fs.Dir.AtomicFileOptions,
buf: *[fs.MAX_PATH_BYTES]u8,
) !fs.AtomicFile {
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.atomicFile(joined_path, options);
}
 
pub fn access(p: Path, sub_path: []const u8, flags: fs.File.OpenFlags) !void {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.access(joined_path, flags);
}
 
pub fn makePath(p: Path, sub_path: []const u8) !void {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.makePath(joined_path);
}
 
pub fn format(
self: Path,
comptime fmt_string: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
if (fmt_string.len == 1) {
// Quote-escape the string.
const stringEscape = std.zig.stringEscape;
const f = switch (fmt_string[0]) {
'q' => "",
'\'' => '\'',
else => @compileError("unsupported format string: " ++ fmt_string),
};
if (self.root_dir.path) |p| {
try stringEscape(p, f, options, writer);
if (self.sub_path.len > 0) try stringEscape(fs.path.sep_str, f, options, writer);
}
if (self.sub_path.len > 0) {
try stringEscape(self.sub_path, f, options, writer);
}
return;
}
if (fmt_string.len > 0)
std.fmt.invalidFmtError(fmt_string, self);
if (self.root_dir.path) |p| {
try writer.writeAll(p);
try writer.writeAll(fs.path.sep_str);
}
if (self.sub_path.len > 0) {
try writer.writeAll(self.sub_path);
try writer.writeAll(fs.path.sep_str);
}
}
 
const Path = @This();
const std = @import("../../std.zig");
const fs = std.fs;
const Allocator = std.mem.Allocator;
const Cache = std.Build.Cache;
 
lib/std/Build/Step.zig added: 461, removed: 383, total 78
@@ -544,7 +544,7 @@ pub fn cacheHit(s: *Step, man: *std.Build.Cache.Manifest) !bool {
 
fn failWithCacheError(s: *Step, man: *const std.Build.Cache.Manifest, err: anyerror) anyerror {
const i = man.failed_file_index orelse return err;
const pp = man.files.items[i].prefixed_path orelse return err;
const pp = man.files.keys()[i].prefixed_path;
const prefix = man.cache.prefixes()[pp.prefix].path orelse "";
return s.fail("{s}: {s}/{s}", .{ @errorName(err), prefix, pp.sub_path });
}
 
lib/std/array_hash_map.zig added: 461, removed: 383, total 78
@@ -9,23 +9,26 @@ const Wyhash = std.hash.Wyhash;
const Allocator = mem.Allocator;
const hash_map = @This();
 
/// An ArrayHashMap with default hash and equal functions.
/// See AutoContext for a description of the hash and equal implementations.
/// An `ArrayHashMap` with default hash and equal functions.
///
/// See `AutoContext` for a description of the hash and equal implementations.
pub fn AutoArrayHashMap(comptime K: type, comptime V: type) type {
return ArrayHashMap(K, V, AutoContext(K), !autoEqlIsCheap(K));
}
 
/// An ArrayHashMapUnmanaged with default hash and equal functions.
/// See AutoContext for a description of the hash and equal implementations.
/// An `ArrayHashMapUnmanaged` with default hash and equal functions.
///
/// See `AutoContext` for a description of the hash and equal implementations.
pub fn AutoArrayHashMapUnmanaged(comptime K: type, comptime V: type) type {
return ArrayHashMapUnmanaged(K, V, AutoContext(K), !autoEqlIsCheap(K));
}
 
/// Builtin hashmap for strings as keys.
/// An `ArrayHashMap` with strings as keys.
pub fn StringArrayHashMap(comptime V: type) type {
return ArrayHashMap([]const u8, V, StringContext, true);
}
 
/// An `ArrayHashMapUnmanaged` with strings as keys.
pub fn StringArrayHashMapUnmanaged(comptime V: type) type {
return ArrayHashMapUnmanaged([]const u8, V, StringContext, true);
}
@@ -50,29 +53,33 @@ pub fn hashString(s: []const u8) u32 {
return @as(u32, @truncate(std.hash.Wyhash.hash(0, s)));
}
 
/// Insertion order is preserved.
/// Deletions perform a "swap removal" on the entries list.
/// A hash table of keys and values, each stored sequentially.
///
/// Insertion order is preserved. In general, this data structure supports the same
/// operations as `std.ArrayList`.
///
/// Deletion operations:
/// * `swapRemove` - O(1)
/// * `orderedRemove` - O(N)
///
/// Modifying the hash map while iterating is allowed, however, one must understand
/// the (well defined) behavior when mixing insertions and deletions with iteration.
/// For a hash map that can be initialized directly that does not store an Allocator
/// field, see `ArrayHashMapUnmanaged`.
/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`
/// functions. It does not store each item's hash in the table. Setting `store_hash`
/// to `true` incurs slightly more memory cost by storing each key's hash in the table
/// but only has to call `eql` for hash collisions.
/// If typical operations (except iteration over entries) need to be faster, prefer
/// the alternative `std.HashMap`.
/// Context must be a struct type with two member functions:
/// hash(self, K) u32
/// eql(self, K, K, usize) bool
/// Adapted variants of many functions are provided. These variants
/// take a pseudo key instead of a key. Their context must have the functions:
/// hash(self, PseudoKey) u32
/// eql(self, PseudoKey, K, usize) bool
///
/// See `ArrayHashMapUnmanaged` for a variant of this data structure that accepts an
/// `Allocator` as a parameter when needed rather than storing it.
pub fn ArrayHashMap(
comptime K: type,
comptime V: type,
/// A namespace that provides these two functions:
/// * `pub fn hash(self, K) u32`
/// * `pub fn eql(self, K, K) bool`
///
comptime Context: type,
/// When `false`, this data structure is biased towards cheap `eql`
/// functions and avoids storing each key's hash in the table. Setting
/// `store_hash` to `true` incurs more memory cost but limits `eql` to
/// being called only once per insertion/deletion (provided there are no
/// hash collisions).
comptime store_hash: bool,
) type {
return struct {
@@ -472,34 +479,40 @@ pub fn ArrayHashMap(
};
}
 
/// General purpose hash table.
/// Insertion order is preserved.
/// Deletions perform a "swap removal" on the entries list.
/// A hash table of keys and values, each stored sequentially.
///
/// Insertion order is preserved. In general, this data structure supports the same
/// operations as `std.ArrayListUnmanaged`.
///
/// Deletion operations:
/// * `swapRemove` - O(1)
/// * `orderedRemove` - O(N)
///
/// Modifying the hash map while iterating is allowed, however, one must understand
/// the (well defined) behavior when mixing insertions and deletions with iteration.
/// This type does not store an Allocator field - the Allocator must be passed in
///
/// This type does not store an `Allocator` field - the `Allocator` must be passed in
/// with each function call that requires it. See `ArrayHashMap` for a type that stores
/// an Allocator field for convenience.
/// an `Allocator` field for convenience.
///
/// Can be initialized directly using the default field values.
///
/// This type is designed to have low overhead for small numbers of entries. When
/// `store_hash` is `false` and the number of entries in the map is less than 9,
/// the overhead cost of using `ArrayHashMapUnmanaged` rather than `std.ArrayList` is
/// only a single pointer-sized integer.
/// When `store_hash` is `false`, this data structure is biased towards cheap `eql`
/// functions. It does not store each item's hash in the table. Setting `store_hash`
/// to `true` incurs slightly more memory cost by storing each key's hash in the table
/// but guarantees only one call to `eql` per insertion/deletion.
/// Context must be a struct type with two member functions:
/// hash(self, K) u32
/// eql(self, K, K) bool
/// Adapted variants of many functions are provided. These variants
/// take a pseudo key instead of a key. Their context must have the functions:
/// hash(self, PseudoKey) u32
/// eql(self, PseudoKey, K) bool
pub fn ArrayHashMapUnmanaged(
comptime K: type,
comptime V: type,
/// A namespace that provides these two functions:
/// * `pub fn hash(self, K) u32`
/// * `pub fn eql(self, K, K) bool`
comptime Context: type,
/// When `false`, this data structure is biased towards cheap `eql`
/// functions and avoids storing each key's hash in the table. Setting
/// `store_hash` to `true` incurs more memory cost but limits `eql` to
/// being called only once per insertion/deletion (provided there are no
/// hash collisions).
comptime store_hash: bool,
) type {
return struct {
@@ -516,10 +529,6 @@ pub fn ArrayHashMapUnmanaged(
/// Used to detect memory safety violations.
pointer_stability: std.debug.SafetyLock = .{},
 
comptime {
std.hash_map.verifyContext(Context, K, K, u32, true);
}
 
/// Modifying the key is allowed only if it does not change the hash.
/// Modifying the value is allowed.
/// Entry pointers become invalid whenever this ArrayHashMap is modified,
@@ -1834,27 +1843,16 @@ pub fn ArrayHashMapUnmanaged(
}
}
 
inline fn checkedHash(ctx: anytype, key: anytype) u32 {
comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(key), K, u32, true);
fn checkedHash(ctx: anytype, key: anytype) u32 {
// If you get a compile error on the next line, it means that your
// generic hash function doesn't accept your key.
const hash = ctx.hash(key);
if (@TypeOf(hash) != u32) {
@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic hash function that returns the wrong type!\n" ++
@typeName(u32) ++ " was expected, but found " ++ @typeName(@TypeOf(hash)));
}
return hash;
return ctx.hash(key);
}
inline fn checkedEql(ctx: anytype, a: anytype, b: K, b_index: usize) bool {
comptime std.hash_map.verifyContext(@TypeOf(ctx), @TypeOf(a), K, u32, true);
 
fn checkedEql(ctx: anytype, a: anytype, b: K, b_index: usize) bool {
// If you get a compile error on the next line, it means that your
// generic eql function doesn't accept (self, adapt key, K, index).
const eql = ctx.eql(a, b, b_index);
if (@TypeOf(eql) != bool) {
@compileError("Context " ++ @typeName(@TypeOf(ctx)) ++ " has a generic eql function that returns the wrong type!\n" ++
@typeName(bool) ++ " was expected, but found " ++ @typeName(@TypeOf(eql)));
}
return eql;
return ctx.eql(a, b, b_index);
}
 
fn dumpState(self: Self, comptime keyFmt: []const u8, comptime valueFmt: []const u8) void {
 
src/Compilation.zig added: 461, removed: 383, total 78
@@ -1999,7 +1999,7 @@ pub fn update(comp: *Compilation, main_progress_node: *std.Progress.Node) !void
 
const is_hit = man.hit() catch |err| {
const i = man.failed_file_index orelse return err;
const pp = man.files.items[i].prefixed_path orelse return err;
const pp = man.files.keys()[i].prefixed_path;
const prefix = man.cache.prefixes()[pp.prefix];
return comp.setMiscFailure(
.check_whole_cache,
@@ -4147,7 +4147,7 @@ pub fn cImport(comp: *Compilation, c_src: []const u8, owner_mod: *Package.Module
const prev_hash_state = man.hash.peekBin();
const actual_hit = hit: {
_ = try man.hit();
if (man.files.items.len == 0) {
if (man.files.entries.len == 0) {
man.unhit(prev_hash_state, 0);
break :hit false;
}
 
src/Package.zig added: 461, removed: 383, total 78
@@ -2,162 +2,3 @@ pub const Module = @import("Package/Module.zig");
pub const Fetch = @import("Package/Fetch.zig");
pub const build_zig_basename = "build.zig";
pub const Manifest = @import("Package/Manifest.zig");
 
pub const Path = struct {
root_dir: Cache.Directory,
/// The path, relative to the root dir, that this `Path` represents.
/// Empty string means the root_dir is the path.
sub_path: []const u8 = "",
 
pub fn clone(p: Path, arena: Allocator) Allocator.Error!Path {
return .{
.root_dir = try p.root_dir.clone(arena),
.sub_path = try arena.dupe(u8, p.sub_path),
};
}
 
pub fn cwd() Path {
return .{ .root_dir = Cache.Directory.cwd() };
}
 
pub fn join(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path {
if (sub_path.len == 0) return p;
const parts: []const []const u8 =
if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };
return .{
.root_dir = p.root_dir,
.sub_path = try fs.path.join(arena, parts),
};
}
 
pub fn resolvePosix(p: Path, arena: Allocator, sub_path: []const u8) Allocator.Error!Path {
if (sub_path.len == 0) return p;
return .{
.root_dir = p.root_dir,
.sub_path = try fs.path.resolvePosix(arena, &.{ p.sub_path, sub_path }),
};
}
 
pub fn joinString(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![]u8 {
const parts: []const []const u8 =
if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };
return p.root_dir.join(allocator, parts);
}
 
pub fn joinStringZ(p: Path, allocator: Allocator, sub_path: []const u8) Allocator.Error![:0]u8 {
const parts: []const []const u8 =
if (p.sub_path.len == 0) &.{sub_path} else &.{ p.sub_path, sub_path };
return p.root_dir.joinZ(allocator, parts);
}
 
pub fn openFile(
p: Path,
sub_path: []const u8,
flags: fs.File.OpenFlags,
) !fs.File {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.openFile(joined_path, flags);
}
 
pub fn makeOpenPath(p: Path, sub_path: []const u8, opts: fs.OpenDirOptions) !fs.Dir {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.makeOpenPath(joined_path, opts);
}
 
pub fn statFile(p: Path, sub_path: []const u8) !fs.Dir.Stat {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.statFile(joined_path);
}
 
pub fn atomicFile(
p: Path,
sub_path: []const u8,
options: fs.Dir.AtomicFileOptions,
buf: *[fs.MAX_PATH_BYTES]u8,
) !fs.AtomicFile {
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.atomicFile(joined_path, options);
}
 
pub fn access(p: Path, sub_path: []const u8, flags: fs.File.OpenFlags) !void {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.access(joined_path, flags);
}
 
pub fn makePath(p: Path, sub_path: []const u8) !void {
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const joined_path = if (p.sub_path.len == 0) sub_path else p: {
break :p std.fmt.bufPrint(&buf, "{s}" ++ fs.path.sep_str ++ "{s}", .{
p.sub_path, sub_path,
}) catch return error.NameTooLong;
};
return p.root_dir.handle.makePath(joined_path);
}
 
pub fn format(
self: Path,
comptime fmt_string: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
if (fmt_string.len == 1) {
// Quote-escape the string.
const stringEscape = std.zig.stringEscape;
const f = switch (fmt_string[0]) {
'q' => "",
'\'' => '\'',
else => @compileError("unsupported format string: " ++ fmt_string),
};
if (self.root_dir.path) |p| {
try stringEscape(p, f, options, writer);
if (self.sub_path.len > 0) try stringEscape(fs.path.sep_str, f, options, writer);
}
if (self.sub_path.len > 0) {
try stringEscape(self.sub_path, f, options, writer);
}
return;
}
if (fmt_string.len > 0)
std.fmt.invalidFmtError(fmt_string, self);
if (self.root_dir.path) |p| {
try writer.writeAll(p);
try writer.writeAll(fs.path.sep_str);
}
if (self.sub_path.len > 0) {
try writer.writeAll(self.sub_path);
try writer.writeAll(fs.path.sep_str);
}
}
};
 
const Package = @This();
const builtin = @import("builtin");
const std = @import("std");
const fs = std.fs;
const Allocator = std.mem.Allocator;
const assert = std.debug.assert;
const Cache = std.Build.Cache;
 
src/Package/Fetch.zig added: 461, removed: 383, total 78
@@ -33,7 +33,7 @@ location_tok: std.zig.Ast.TokenIndex,
hash_tok: std.zig.Ast.TokenIndex,
name_tok: std.zig.Ast.TokenIndex,
lazy_status: LazyStatus,
parent_package_root: Package.Path,
parent_package_root: Cache.Path,
parent_manifest_ast: ?*const std.zig.Ast,
prog_node: *std.Progress.Node,
job_queue: *JobQueue,
@@ -50,7 +50,7 @@ allow_missing_paths_field: bool,
 
/// This will either be relative to `global_cache`, or to the build root of
/// the root package.
package_root: Package.Path,
package_root: Cache.Path,
error_bundle: ErrorBundle.Wip,
manifest: ?Manifest,
manifest_ast: std.zig.Ast,
@@ -263,7 +263,7 @@ pub const JobQueue = struct {
pub const Location = union(enum) {
remote: Remote,
/// A directory found inside the parent package.
relative_path: Package.Path,
relative_path: Cache.Path,
/// Recursive Fetch tasks will never use this Location, but it may be
/// passed in by the CLI. Indicates the file contents here should be copied
/// into the global package cache. It may be a file relative to the cwd or
@@ -564,7 +564,7 @@ fn checkBuildFileExistence(f: *Fetch) RunError!void {
}
 
/// This function populates `f.manifest` or leaves it `null`.
fn loadManifest(f: *Fetch, pkg_root: Package.Path) RunError!void {
fn loadManifest(f: *Fetch, pkg_root: Cache.Path) RunError!void {
const eb = &f.error_bundle;
const arena = f.arena.allocator();
const manifest_bytes = pkg_root.root_dir.handle.readFileAllocOptions(
@@ -722,7 +722,7 @@ fn queueJobsForDeps(f: *Fetch) RunError!void {
}
 
pub fn relativePathDigest(
pkg_root: Package.Path,
pkg_root: Cache.Path,
cache_root: Cache.Directory,
) Manifest.MultiHashHexDigest {
var hasher = Manifest.Hash.init(.{});
@@ -1658,7 +1658,7 @@ const Filter = struct {
};
 
pub fn depDigest(
pkg_root: Package.Path,
pkg_root: Cache.Path,
cache_root: Cache.Directory,
dep: Manifest.Dependency,
) ?Manifest.MultiHashHexDigest {
 
src/Package/Module.zig added: 461, removed: 383, total 78
@@ -3,7 +3,7 @@
//! to Zcu. https://github.com/ziglang/zig/issues/14307
 
/// Only files inside this directory can be imported.
root: Package.Path,
root: Cache.Path,
/// Relative to `root`. May contain path separators.
root_src_path: []const u8,
/// Name used in compile errors. Looks like "root.foo.bar".
@@ -69,7 +69,7 @@ pub const CreateOptions = struct {
builtin_modules: ?*std.StringHashMapUnmanaged(*Module),
 
pub const Paths = struct {
root: Package.Path,
root: Cache.Path,
/// Relative to `root`. May contain path separators.
root_src_path: []const u8,
};
@@ -463,7 +463,7 @@ pub fn create(arena: Allocator, options: CreateOptions) !*Package.Module {
 
/// All fields correspond to `CreateOptions`.
pub const LimitedOptions = struct {
root: Package.Path,
root: Cache.Path,
root_src_path: []const u8,
fully_qualified_name: []const u8,
};
 
src/glibc.zig added: 461, removed: 383, total 78
@@ -713,7 +713,7 @@ pub fn buildSharedObjects(comp: *Compilation, prog_node: *std.Progress.Node) !vo
};
defer o_directory.handle.close();
 
const abilists_contents = man.files.items[abilists_index].contents.?;
const abilists_contents = man.files.keys()[abilists_index].contents.?;
const metadata = try loadMetaData(comp.gpa, abilists_contents);
defer metadata.destroy(comp.gpa);
 
 
src/main.zig added: 461, removed: 383, total 78
@@ -6143,7 +6143,7 @@ fn cmdAstCheck(
}
 
file.mod = try Package.Module.createLimited(arena, .{
.root = Package.Path.cwd(),
.root = Cache.Path.cwd(),
.root_src_path = file.sub_file_path,
.fully_qualified_name = "root",
});
@@ -6316,7 +6316,7 @@ fn cmdChangelist(
};
 
file.mod = try Package.Module.createLimited(arena, .{
.root = Package.Path.cwd(),
.root = Cache.Path.cwd(),
.root_src_path = file.sub_file_path,
.fully_qualified_name = "root",
});