srctree

Andrew Kelley parent 63ea3e17 9fec608b 6c2eb0f1
Merge pull request #19005 from squeek502/wtf

Fix handling of Windows (WTF-16) and WASI (UTF-8) paths, etc

inlinesplit
deps/aro/aro/Compilation.zig added: 1932, removed: 517, total 1415
@@ -69,7 +69,7 @@ pub const Environment = struct {
const val: ?[]const u8 = std.process.getEnvVarOwned(allocator, env_var_name) catch |err| switch (err) {
error.OutOfMemory => |e| return e,
error.EnvironmentVariableNotFound => null,
error.InvalidUtf8 => null,
error.InvalidWtf8 => null,
};
@field(env, field.name) = val;
}
 
deps/aro/aro/Driver.zig added: 1932, removed: 517, total 1415
@@ -523,7 +523,8 @@ pub fn errorDescription(e: anyerror) []const u8 {
error.NotDir => "is not a directory",
error.NotOpenForReading => "file is not open for reading",
error.NotOpenForWriting => "file is not open for writing",
error.InvalidUtf8 => "input is not valid UTF-8",
error.InvalidUtf8 => "path is not valid UTF-8",
error.InvalidWtf8 => "path is not valid WTF-8",
error.FileBusy => "file is busy",
error.NameTooLong => "file name is too long",
error.AccessDenied => "access denied",
 
lib/std/Build/Cache.zig added: 1932, removed: 517, total 1415
@@ -162,7 +162,7 @@ fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath {
fn getPrefixSubpath(allocator: Allocator, prefix: []const u8, path: []u8) ![]u8 {
const relative = try std.fs.path.relative(allocator, prefix, path);
errdefer allocator.free(relative);
var component_iterator = std.fs.path.NativeUtf8ComponentIterator.init(relative) catch {
var component_iterator = std.fs.path.NativeComponentIterator.init(relative) catch {
return error.NotASubPath;
};
if (component_iterator.root() != null) {
 
lib/std/Thread.zig added: 1932, removed: 517, total 1415
@@ -91,7 +91,7 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void {
},
.windows => {
var buf: [max_name_len]u16 = undefined;
const len = try std.unicode.utf8ToUtf16Le(&buf, name);
const len = try std.unicode.wtf8ToWtf16Le(&buf, name);
const byte_len = math.cast(c_ushort, len * 2) orelse return error.NameTooLong;
 
// Note: NT allocates its own copy, no use-after-free here.
@@ -157,17 +157,12 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void {
}
 
pub const GetNameError = error{
// For Windows, the name is converted from UTF16 to UTF8
CodepointTooLarge,
Utf8CannotEncodeSurrogateHalf,
DanglingSurrogateHalf,
ExpectedSecondSurrogateHalf,
UnexpectedSecondSurrogateHalf,
 
Unsupported,
Unexpected,
} || os.PrctlError || os.ReadError || std.fs.File.OpenError || std.fmt.BufPrintError;
 
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]const u8 {
buffer_ptr[max_name_len] = 0;
var buffer: [:0]u8 = buffer_ptr;
@@ -213,7 +208,7 @@ pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]co
)) {
.SUCCESS => {
const string = @as(*const os.windows.UNICODE_STRING, @ptrCast(&buf));
const len = try std.unicode.utf16leToUtf8(buffer, string.Buffer[0 .. string.Length / 2]);
const len = std.unicode.wtf16LeToWtf8(buffer, string.Buffer[0 .. string.Length / 2]);
return if (len > 0) buffer[0..len] else null;
},
.NOT_IMPLEMENTED => return error.Unsupported,
 
lib/std/child_process.zig added: 1932, removed: 517, total 1415
@@ -129,10 +129,9 @@ pub const ChildProcess = struct {
/// POSIX-only. `StdIo.Ignore` was selected and opening `/dev/null` returned ENODEV.
NoDevice,
 
/// Windows-only. One of:
/// * `cwd` was provided and it could not be re-encoded into UTF16LE, or
/// * The `PATH` or `PATHEXT` environment variable contained invalid UTF-8.
InvalidUtf8,
/// Windows-only. `cwd` or `argv` was provided and it was invalid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
 
/// Windows-only. `cwd` was provided, but the path did not exist when spawning the child process.
CurrentWorkingDirectoryUnlinked,
@@ -767,7 +766,7 @@ pub const ChildProcess = struct {
};
var piProcInfo: windows.PROCESS_INFORMATION = undefined;
 
const cwd_w = if (self.cwd) |cwd| try unicode.utf8ToUtf16LeWithNull(self.allocator, cwd) else null;
const cwd_w = if (self.cwd) |cwd| try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd) else null;
defer if (cwd_w) |cwd| self.allocator.free(cwd);
const cwd_w_ptr = if (cwd_w) |cwd| cwd.ptr else null;
 
@@ -775,8 +774,8 @@ pub const ChildProcess = struct {
defer if (maybe_envp_buf) |envp_buf| self.allocator.free(envp_buf);
const envp_ptr = if (maybe_envp_buf) |envp_buf| envp_buf.ptr else null;
 
const app_name_utf8 = self.argv[0];
const app_name_is_absolute = fs.path.isAbsolute(app_name_utf8);
const app_name_wtf8 = self.argv[0];
const app_name_is_absolute = fs.path.isAbsolute(app_name_wtf8);
 
// the cwd set in ChildProcess is in effect when choosing the executable path
// to match posix semantics
@@ -785,11 +784,11 @@ pub const ChildProcess = struct {
// If the app name is absolute, then we need to use its dirname as the cwd
if (app_name_is_absolute) {
cwd_path_w_needs_free = true;
const dir = fs.path.dirname(app_name_utf8).?;
break :x try unicode.utf8ToUtf16LeWithNull(self.allocator, dir);
const dir = fs.path.dirname(app_name_wtf8).?;
break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, dir);
} else if (self.cwd) |cwd| {
cwd_path_w_needs_free = true;
break :x try unicode.utf8ToUtf16LeWithNull(self.allocator, cwd);
break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd);
} else {
break :x &[_:0]u16{}; // empty for cwd
}
@@ -800,19 +799,19 @@ pub const ChildProcess = struct {
// into the basename and dirname and use the dirname as an addition to the cwd
// path. This is because NtQueryDirectoryFile cannot accept FileName params with
// path separators.
const app_basename_utf8 = fs.path.basename(app_name_utf8);
const app_basename_wtf8 = fs.path.basename(app_name_wtf8);
// If the app name is absolute, then the cwd will already have the app's dirname in it,
// so only populate app_dirname if app name is a relative path with > 0 path separators.
const maybe_app_dirname_utf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_utf8) else null;
const maybe_app_dirname_wtf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_wtf8) else null;
const app_dirname_w: ?[:0]u16 = x: {
if (maybe_app_dirname_utf8) |app_dirname_utf8| {
break :x try unicode.utf8ToUtf16LeWithNull(self.allocator, app_dirname_utf8);
if (maybe_app_dirname_wtf8) |app_dirname_wtf8| {
break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_dirname_wtf8);
}
break :x null;
};
defer if (app_dirname_w != null) self.allocator.free(app_dirname_w.?);
 
const app_name_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, app_basename_utf8);
const app_name_w = try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_basename_wtf8);
defer self.allocator.free(app_name_w);
 
const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) {
@@ -1173,7 +1172,7 @@ const CreateProcessSupportedExtension = enum {
exe,
};
 
/// Case-insensitive UTF-16 lookup
/// Case-insensitive WTF-16 lookup
fn windowsCreateProcessSupportsExtension(ext: []const u16) ?CreateProcessSupportedExtension {
if (ext.len != 4) return null;
const State = enum {
@@ -1237,7 +1236,7 @@ test "windowsCreateProcessSupportsExtension" {
try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null);
}
 
pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 };
pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidWtf8, InvalidArg0 };
 
/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and
/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice.
@@ -1320,7 +1319,7 @@ pub fn argvToCommandLineWindows(
}
}
 
return try unicode.utf8ToUtf16LeWithNull(allocator, buf.items);
return try unicode.wtf8ToWtf16LeAllocZ(allocator, buf.items);
}
 
test "argvToCommandLineWindows" {
@@ -1386,7 +1385,7 @@ fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []c
const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv);
defer std.testing.allocator.free(cmd_line_w);
 
const cmd_line = try unicode.utf16leToUtf8Alloc(std.testing.allocator, cmd_line_w);
const cmd_line = try unicode.wtf16LeToWtf8Alloc(std.testing.allocator, cmd_line_w);
defer std.testing.allocator.free(cmd_line);
 
try std.testing.expectEqualStrings(expected_cmd_line, cmd_line);
@@ -1424,7 +1423,7 @@ fn windowsMakeAsyncPipe(rd: *?windows.HANDLE, wr: *?windows.HANDLE, sattr: *cons
"\\\\.\\pipe\\zig-childprocess-{d}-{d}",
.{ windows.kernel32.GetCurrentProcessId(), pipe_name_counter.fetchAdd(1, .Monotonic) },
) catch unreachable;
const len = std.unicode.utf8ToUtf16Le(&tmp_bufw, pipe_path) catch unreachable;
const len = std.unicode.wtf8ToWtf16Le(&tmp_bufw, pipe_path) catch unreachable;
tmp_bufw[len] = 0;
break :blk tmp_bufw[0..len :0];
};
@@ -1521,10 +1520,10 @@ pub fn createWindowsEnvBlock(allocator: mem.Allocator, env_map: *const EnvMap) !
var it = env_map.iterator();
var i: usize = 0;
while (it.next()) |pair| {
i += try unicode.utf8ToUtf16Le(result[i..], pair.key_ptr.*);
i += try unicode.wtf8ToWtf16Le(result[i..], pair.key_ptr.*);
result[i] = '=';
i += 1;
i += try unicode.utf8ToUtf16Le(result[i..], pair.value_ptr.*);
i += try unicode.wtf8ToWtf16Le(result[i..], pair.value_ptr.*);
result[i] = 0;
i += 1;
}
 
lib/std/fs.zig added: 1932, removed: 517, total 1415
@@ -31,18 +31,21 @@ pub const realpathW = os.realpathW;
pub const getAppDataDir = @import("fs/get_app_data_dir.zig").getAppDataDir;
pub const GetAppDataDirError = @import("fs/get_app_data_dir.zig").GetAppDataDirError;
 
/// This represents the maximum size of a UTF-8 encoded file path that the
/// This represents the maximum size of a `[]u8` file path that the
/// operating system will accept. Paths, including those returned from file
/// system operations, may be longer than this length, but such paths cannot
/// be successfully passed back in other file system operations. However,
/// all path components returned by file system operations are assumed to
/// fit into a UTF-8 encoded array of this length.
/// fit into a `u8` array of this length.
/// The byte count includes room for a null sentinel byte.
/// On Windows, `[]u8` file paths are encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `[]u8` file paths are encoded as valid UTF-8.
/// On other platforms, `[]u8` file paths are opaque sequences of bytes with no particular encoding.
pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
.linux, .macos, .ios, .freebsd, .openbsd, .netbsd, .dragonfly, .haiku, .solaris, .illumos, .plan9, .emscripten => os.PATH_MAX,
// Each UTF-16LE character may be expanded to 3 UTF-8 bytes.
// If it would require 4 UTF-8 bytes, then there would be a surrogate
// pair in the UTF-16LE, and we (over)account 3 bytes for it that way.
// Each WTF-16LE code unit may be expanded to 3 WTF-8 bytes.
// If it would require 4 WTF-8 bytes, then there would be a surrogate
// pair in the WTF-16LE, and we (over)account 3 bytes for it that way.
// +1 for the null byte at the end, which can be encoded in 1 byte.
.windows => os.windows.PATH_MAX_WIDE * 3 + 1,
// TODO work out what a reasonable value we should use here
@@ -53,18 +56,21 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) {
@compileError("PATH_MAX not implemented for " ++ @tagName(builtin.os.tag)),
};
 
/// This represents the maximum size of a UTF-8 encoded file name component that
/// This represents the maximum size of a `[]u8` file name component that
/// the platform's common file systems support. File name components returned by file system
/// operations are likely to fit into a UTF-8 encoded array of this length, but
/// operations are likely to fit into a `u8` array of this length, but
/// (depending on the platform) this assumption may not hold for every configuration.
/// The byte count does not include a null sentinel byte.
/// On Windows, `[]u8` file name components are encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, file name components are encoded as valid UTF-8.
/// On other platforms, `[]u8` components are an opaque sequence of bytes with no particular encoding.
pub const MAX_NAME_BYTES = switch (builtin.os.tag) {
.linux, .macos, .ios, .freebsd, .openbsd, .netbsd, .dragonfly, .solaris, .illumos => os.NAME_MAX,
// Haiku's NAME_MAX includes the null terminator, so subtract one.
.haiku => os.NAME_MAX - 1,
// Each UTF-16LE character may be expanded to 3 UTF-8 bytes.
// If it would require 4 UTF-8 bytes, then there would be a surrogate
// pair in the UTF-16LE, and we (over)account 3 bytes for it that way.
// Each WTF-16LE character may be expanded to 3 WTF-8 bytes.
// If it would require 4 WTF-8 bytes, then there would be a surrogate
// pair in the WTF-16LE, and we (over)account 3 bytes for it that way.
.windows => os.windows.NAME_MAX * 3,
// For WASI, the MAX_NAME will depend on the host OS, so it needs to be
// as large as the largest MAX_NAME_BYTES (Windows) in order to work on any host OS.
@@ -86,6 +92,9 @@ pub const base64_decoder = base64.Base64Decoder.init(base64_alphabet, null);
 
/// TODO remove the allocator requirement from this API
/// TODO move to Dir
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn atomicSymLink(allocator: Allocator, existing_path: []const u8, new_path: []const u8) !void {
if (cwd().symLink(existing_path, new_path, .{})) {
return;
@@ -117,6 +126,9 @@ pub fn atomicSymLink(allocator: Allocator, existing_path: []const u8, new_path:
/// Same as `Dir.updateFile`, except asserts that both `source_path` and `dest_path`
/// are absolute. See `Dir.updateFile` for a function that operates on both
/// absolute and relative paths.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn updateFileAbsolute(
source_path: []const u8,
dest_path: []const u8,
@@ -131,6 +143,9 @@ pub fn updateFileAbsolute(
/// Same as `Dir.copyFile`, except asserts that both `source_path` and `dest_path`
/// are absolute. See `Dir.copyFile` for a function that operates on both
/// absolute and relative paths.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn copyFileAbsolute(
source_path: []const u8,
dest_path: []const u8,
@@ -145,24 +160,30 @@ pub fn copyFileAbsolute(
/// Create a new directory, based on an absolute path.
/// Asserts that the path is absolute. See `Dir.makeDir` for a function that operates
/// on both absolute and relative paths.
/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `absolute_path` should be encoded as valid UTF-8.
/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn makeDirAbsolute(absolute_path: []const u8) !void {
assert(path.isAbsolute(absolute_path));
return os.mkdir(absolute_path, Dir.default_mode);
}
 
/// Same as `makeDirAbsolute` except the parameter is a null-terminated UTF-8-encoded string.
/// Same as `makeDirAbsolute` except the parameter is null-terminated.
pub fn makeDirAbsoluteZ(absolute_path_z: [*:0]const u8) !void {
assert(path.isAbsoluteZ(absolute_path_z));
return os.mkdirZ(absolute_path_z, Dir.default_mode);
}
 
/// Same as `makeDirAbsolute` except the parameter is a null-terminated WTF-16-encoded string.
/// Same as `makeDirAbsolute` except the parameter is a null-terminated WTF-16 LE-encoded string.
pub fn makeDirAbsoluteW(absolute_path_w: [*:0]const u16) !void {
assert(path.isAbsoluteWindowsW(absolute_path_w));
return os.mkdirW(absolute_path_w, Dir.default_mode);
}
 
/// Same as `Dir.deleteDir` except the path is absolute.
/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `dir_path` should be encoded as valid UTF-8.
/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteDirAbsolute(dir_path: []const u8) !void {
assert(path.isAbsolute(dir_path));
return os.rmdir(dir_path);
@@ -181,6 +202,9 @@ pub fn deleteDirAbsoluteW(dir_path: [*:0]const u16) !void {
}
 
/// Same as `Dir.rename` except the paths are absolute.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn renameAbsolute(old_path: []const u8, new_path: []const u8) !void {
assert(path.isAbsolute(old_path));
assert(path.isAbsolute(new_path));
@@ -211,7 +235,7 @@ pub fn renameZ(old_dir: Dir, old_sub_path_z: [*:0]const u8, new_dir: Dir, new_su
return os.renameatZ(old_dir.fd, old_sub_path_z, new_dir.fd, new_sub_path_z);
}
 
/// Same as `rename` except the parameters are UTF16LE, NT prefixed.
/// Same as `rename` except the parameters are WTF16LE, NT prefixed.
/// This function is Windows-only.
pub fn renameW(old_dir: Dir, old_sub_path_w: []const u16, new_dir: Dir, new_sub_path_w: []const u16) !void {
return os.renameatW(old_dir.fd, old_sub_path_w, new_dir.fd, new_sub_path_w);
@@ -240,6 +264,9 @@ pub fn defaultWasiCwd() std.os.wasi.fd_t {
/// See `openDirAbsoluteZ` for a function that accepts a null-terminated path.
///
/// Asserts that the path parameter has no null bytes.
/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `absolute_path` should be encoded as valid UTF-8.
/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn openDirAbsolute(absolute_path: []const u8, flags: Dir.OpenDirOptions) File.OpenError!Dir {
assert(path.isAbsolute(absolute_path));
return cwd().openDir(absolute_path, flags);
@@ -262,6 +289,9 @@ pub fn openDirAbsoluteW(absolute_path_c: [*:0]const u16, flags: Dir.OpenDirOptio
/// operates on both absolute and relative paths.
/// Asserts that the path parameter has no null bytes. See `openFileAbsoluteZ` for a function
/// that accepts a null-terminated path.
/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `absolute_path` should be encoded as valid UTF-8.
/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn openFileAbsolute(absolute_path: []const u8, flags: File.OpenFlags) File.OpenError!File {
assert(path.isAbsolute(absolute_path));
return cwd().openFile(absolute_path, flags);
@@ -280,11 +310,13 @@ pub fn openFileAbsoluteW(absolute_path_w: []const u16, flags: File.OpenFlags) Fi
}
 
/// Test accessing `path`.
/// `path` is UTF-8-encoded.
/// Be careful of Time-Of-Check-Time-Of-Use race conditions when using this function.
/// For example, instead of testing if a file exists and then opening it, just
/// open it and handle the error for file not found.
/// See `accessAbsoluteZ` for a function that accepts a null-terminated path.
/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `absolute_path` should be encoded as valid UTF-8.
/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn accessAbsolute(absolute_path: []const u8, flags: File.OpenFlags) Dir.AccessError!void {
assert(path.isAbsolute(absolute_path));
try cwd().access(absolute_path, flags);
@@ -306,6 +338,9 @@ pub fn accessAbsoluteW(absolute_path: [*:0]const u16, flags: File.OpenFlags) Dir
/// operates on both absolute and relative paths.
/// Asserts that the path parameter has no null bytes. See `createFileAbsoluteC` for a function
/// that accepts a null-terminated path.
/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `absolute_path` should be encoded as valid UTF-8.
/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn createFileAbsolute(absolute_path: []const u8, flags: File.CreateFlags) File.OpenError!File {
assert(path.isAbsolute(absolute_path));
return cwd().createFile(absolute_path, flags);
@@ -327,6 +362,9 @@ pub fn createFileAbsoluteW(absolute_path_w: [*:0]const u16, flags: File.CreateFl
/// Asserts that the path is absolute. See `Dir.deleteFile` for a function that
/// operates on both absolute and relative paths.
/// Asserts that the path parameter has no null bytes.
/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `absolute_path` should be encoded as valid UTF-8.
/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteFileAbsolute(absolute_path: []const u8) Dir.DeleteFileError!void {
assert(path.isAbsolute(absolute_path));
return cwd().deleteFile(absolute_path);
@@ -349,6 +387,9 @@ pub fn deleteFileAbsoluteW(absolute_path_w: [*:0]const u16) Dir.DeleteFileError!
/// Asserts that the path is absolute. See `Dir.deleteTree` for a function that
/// operates on both absolute and relative paths.
/// Asserts that the path parameter has no null bytes.
/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `absolute_path` should be encoded as valid UTF-8.
/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteTreeAbsolute(absolute_path: []const u8) !void {
assert(path.isAbsolute(absolute_path));
const dirname = path.dirname(absolute_path) orelse return error{
@@ -364,6 +405,9 @@ pub fn deleteTreeAbsolute(absolute_path: []const u8) !void {
}
 
/// Same as `Dir.readLink`, except it asserts the path is absolute.
/// On Windows, `pathname` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `pathname` should be encoded as valid UTF-8.
/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.
pub fn readLinkAbsolute(pathname: []const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 {
assert(path.isAbsolute(pathname));
return os.readlink(pathname, buffer);
@@ -387,6 +431,9 @@ pub fn readLinkAbsoluteZ(pathname_c: [*:0]const u8, buffer: *[MAX_PATH_BYTES]u8)
/// one; the latter case is known as a dangling link.
/// If `sym_link_path` exists, it will not be overwritten.
/// See also `symLinkAbsoluteZ` and `symLinkAbsoluteW`.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn symLinkAbsolute(
target_path: []const u8,
sym_link_path: []const u8,
@@ -402,7 +449,7 @@ pub fn symLinkAbsolute(
return os.symlink(target_path, sym_link_path);
}
 
/// Windows-only. Same as `symLinkAbsolute` except the parameters are null-terminated, WTF16 encoded.
/// Windows-only. Same as `symLinkAbsolute` except the parameters are null-terminated, WTF16 LE encoded.
/// Note that this function will by default try creating a symbolic link to a file. If you would
/// like to create a symbolic link to a directory, specify this with `SymLinkFlags{ .is_directory = true }`.
/// See also `symLinkAbsolute`, `symLinkAbsoluteZ`.
@@ -426,27 +473,14 @@ pub fn symLinkAbsoluteZ(
assert(path.isAbsoluteZ(target_path_c));
assert(path.isAbsoluteZ(sym_link_path_c));
if (builtin.os.tag == .windows) {
const target_path_w = try os.windows.cStrToWin32PrefixedFileW(target_path_c);
const sym_link_path_w = try os.windows.cStrToWin32PrefixedFileW(sym_link_path_c);
return os.windows.CreateSymbolicLink(sym_link_path_w.span(), target_path_w.span(), flags.is_directory);
const target_path_w = try os.windows.cStrToPrefixedFileW(null, target_path_c);
const sym_link_path_w = try os.windows.cStrToPrefixedFileW(null, sym_link_path_c);
return os.windows.CreateSymbolicLink(null, sym_link_path_w.span(), target_path_w.span(), flags.is_directory);
}
return os.symlinkZ(target_path_c, sym_link_path_c);
}
 
pub const OpenSelfExeError = error{
SharingViolation,
PathAlreadyExists,
FileNotFound,
AccessDenied,
PipeBusy,
NameTooLong,
/// On Windows, file paths must be valid Unicode.
InvalidUtf8,
/// On Windows, file paths cannot contain these characters:
/// '/', '*', '?', '"', '<', '>', '|'
BadPathName,
Unexpected,
} || os.OpenError || SelfExePathError || os.FlockError;
pub const OpenSelfExeError = os.OpenError || SelfExePathError || os.FlockError;
 
pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File {
if (builtin.os.tag == .linux) {
@@ -469,7 +503,45 @@ pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File {
return openFileAbsoluteZ(buf[0..self_exe_path.len :0].ptr, flags);
}
 
pub const SelfExePathError = os.ReadLinkError || os.SysCtlError || os.RealPathError;
// This is os.ReadLinkError || os.RealPathError with impossible errors excluded
pub const SelfExePathError = error{
FileNotFound,
AccessDenied,
NameTooLong,
NotSupported,
NotDir,
SymLinkLoop,
InputOutput,
FileTooBig,
IsDir,
ProcessFdQuotaExceeded,
SystemFdQuotaExceeded,
NoDevice,
SystemResources,
NoSpaceLeft,
FileSystem,
BadPathName,
DeviceBusy,
SharingViolation,
PipeBusy,
NotLink,
PathAlreadyExists,
InvalidHandle,
 
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
 
/// On Windows, antivirus software is enabled by default. It can be
/// disabled, but Windows Update sometimes ignores the user's preference
/// and re-enables it. When enabled, antivirus software on Windows
/// intercepts file system operations and makes them significantly slower
/// in addition to possibly failing with this error code.
AntivirusInterference,
 
/// On Windows, the volume does not contain a recognized file system. File
/// system drivers might not be loaded, or the volume may be corrupt.
UnrecognizedVolume,
} || os.SysCtlError;
 
/// `selfExePath` except allocates the result on the heap.
/// Caller owns returned memory.
@@ -491,6 +563,8 @@ pub fn selfExePathAlloc(allocator: Allocator) ![]u8 {
/// This function may return an error if the current executable
/// was deleted after spawning.
/// Returned value is a slice of out_buffer.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
///
/// On Linux, depends on procfs being mounted. If the currently executing binary has
/// been deleted, the file path looks something like `/a/b/c/exe (deleted)`.
@@ -505,15 +579,31 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {
if (rc != 0) return error.NameTooLong;
 
var real_path_buf: [MAX_PATH_BYTES]u8 = undefined;
const real_path = try std.os.realpathZ(&symlink_path_buf, &real_path_buf);
const real_path = std.os.realpathZ(&symlink_path_buf, &real_path_buf) catch |err| switch (err) {
error.InvalidWtf8 => unreachable, // Windows-only
error.NetworkNotFound => unreachable, // Windows-only
else => |e| return e,
};
if (real_path.len > out_buffer.len) return error.NameTooLong;
const result = out_buffer[0..real_path.len];
@memcpy(result, real_path);
return result;
}
switch (builtin.os.tag) {
.linux => return os.readlinkZ("/proc/self/exe", out_buffer),
.solaris, .illumos => return os.readlinkZ("/proc/self/path/a.out", out_buffer),
.linux => return os.readlinkZ("/proc/self/exe", out_buffer) catch |err| switch (err) {
error.InvalidUtf8 => unreachable, // WASI-only
error.InvalidWtf8 => unreachable, // Windows-only
error.UnsupportedReparsePointType => unreachable, // Windows-only
error.NetworkNotFound => unreachable, // Windows-only
else => |e| return e,
},
.solaris, .illumos => return os.readlinkZ("/proc/self/path/a.out", out_buffer) catch |err| switch (err) {
error.InvalidUtf8 => unreachable, // WASI-only
error.InvalidWtf8 => unreachable, // Windows-only
error.UnsupportedReparsePointType => unreachable, // Windows-only
error.NetworkNotFound => unreachable, // Windows-only
else => |e| return e,
},
.freebsd, .dragonfly => {
var mib = [4]c_int{ os.CTL.KERN, os.KERN.PROC, os.KERN.PROC_PATHNAME, -1 };
var out_len: usize = out_buffer.len;
@@ -537,7 +627,11 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {
if (mem.indexOf(u8, argv0, "/") != null) {
// argv[0] is a path (relative or absolute): use realpath(3) directly
var real_path_buf: [MAX_PATH_BYTES]u8 = undefined;
const real_path = try os.realpathZ(os.argv[0], &real_path_buf);
const real_path = os.realpathZ(os.argv[0], &real_path_buf) catch |err| switch (err) {
error.InvalidWtf8 => unreachable, // Windows-only
error.NetworkNotFound => unreachable, // Windows-only
else => |e| return e,
};
if (real_path.len > out_buffer.len)
return error.NameTooLong;
const result = out_buffer[0..real_path.len];
@@ -575,7 +669,10 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {
// symlink, not the path that the symlink points to. We want the path
// that the symlink points to, though, so we need to get the realpath.
const pathname_w = try os.windows.wToPrefixedFileW(null, image_path_name);
return std.fs.cwd().realpathW(pathname_w.span(), out_buffer);
return std.fs.cwd().realpathW(pathname_w.span(), out_buffer) catch |err| switch (err) {
error.InvalidWtf8 => unreachable,
else => |e| return e,
};
},
else => @compileError("std.fs.selfExePath not supported for this target"),
}
@@ -599,6 +696,8 @@ pub fn selfExeDirPathAlloc(allocator: Allocator) ![]u8 {
 
/// Get the directory path that contains the current executable.
/// Returned value is a slice of out_buffer.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 {
const self_exe_path = try selfExePath(out_buffer);
// Assume that the OS APIs return absolute paths, and therefore dirname
@@ -607,6 +706,8 @@ pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 {
}
 
/// `realpath`, except caller must free the returned memory.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
/// See also `Dir.realpath`.
pub fn realpathAlloc(allocator: Allocator, pathname: []const u8) ![]u8 {
// Use of MAX_PATH_BYTES here is valid as the realpath function does not
 
lib/std/fs/Dir.zig added: 1932, removed: 517, total 1415
@@ -9,7 +9,14 @@ pub const Entry = struct {
pub const Kind = File.Kind;
};
 
const IteratorError = error{ AccessDenied, SystemResources } || posix.UnexpectedError;
const IteratorError = error{
AccessDenied,
SystemResources,
/// WASI-only. The path of an entry could not be encoded as valid UTF-8.
/// WASI is unable to handle paths that cannot be encoded as well-formed UTF-8.
/// https://github.com/WebAssembly/wasi-filesystem/issues/17#issuecomment-1430639353
InvalidUtf8,
} || posix.UnexpectedError;
 
pub const Iterator = switch (builtin.os.tag) {
.macos, .ios, .freebsd, .netbsd, .dragonfly, .openbsd, .solaris, .illumos => struct {
@@ -445,13 +452,12 @@ pub const Iterator = switch (builtin.os.tag) {
self.index = self.buf.len;
}
 
const name_utf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2];
const name_wtf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2];
 
if (mem.eql(u16, name_utf16le, &[_]u16{'.'}) or mem.eql(u16, name_utf16le, &[_]u16{ '.', '.' }))
if (mem.eql(u16, name_wtf16le, &[_]u16{'.'}) or mem.eql(u16, name_wtf16le, &[_]u16{ '.', '.' }))
continue;
// Trust that Windows gives us valid UTF-16LE
const name_utf8_len = std.unicode.utf16leToUtf8(self.name_data[0..], name_utf16le) catch unreachable;
const name_utf8 = self.name_data[0..name_utf8_len];
const name_wtf8_len = std.unicode.wtf16LeToWtf8(self.name_data[0..], name_wtf16le);
const name_wtf8 = self.name_data[0..name_wtf8_len];
const kind: Entry.Kind = blk: {
const attrs = dir_info.FileAttributes;
if (attrs & w.FILE_ATTRIBUTE_DIRECTORY != 0) break :blk .directory;
@@ -459,7 +465,7 @@ pub const Iterator = switch (builtin.os.tag) {
break :blk .file;
};
return Entry{
.name = name_utf8,
.name = name_wtf8,
.kind = kind,
};
}
@@ -516,6 +522,7 @@ pub const Iterator = switch (builtin.os.tag) {
.INVAL => unreachable,
.NOENT => return error.DirNotFound, // The directory being iterated was deleted during iteration.
.NOTCAPABLE => return error.AccessDenied,
.ILSEQ => return error.InvalidUtf8, // An entry's name cannot be encoded as UTF-8.
else => |err| return posix.unexpectedErrno(err),
}
if (bufused == 0) return null;
@@ -743,7 +750,11 @@ pub const OpenError = error{
SystemFdQuotaExceeded,
NoDevice,
SystemResources,
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
BadPathName,
DeviceBusy,
/// On Windows, `\\server` or `\\server\share` was not found.
@@ -759,6 +770,9 @@ pub fn close(self: *Dir) void {
/// To create a new file, see `createFile`.
/// Call `File.close` to release the resource.
/// Asserts that the path parameter has no null bytes.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn openFile(self: Dir, sub_path: []const u8, flags: File.OpenFlags) File.OpenError!File {
if (builtin.os.tag == .windows) {
const path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sub_path);
@@ -911,6 +925,9 @@ pub fn openFileW(self: Dir, sub_path_w: []const u16, flags: File.OpenFlags) File
/// Creates, opens, or overwrites a file with write access.
/// Call `File.close` on the result when done.
/// Asserts that the path parameter has no null bytes.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn createFile(self: Dir, sub_path: []const u8, flags: File.CreateFlags) File.OpenError!File {
if (builtin.os.tag == .windows) {
const path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sub_path);
@@ -1060,18 +1077,21 @@ pub fn createFileW(self: Dir, sub_path_w: []const u16, flags: File.CreateFlags)
/// Creates a single directory with a relative or absolute path.
/// To create multiple directories to make an entire path, see `makePath`.
/// To operate on only absolute paths, see `makeDirAbsolute`.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn makeDir(self: Dir, sub_path: []const u8) !void {
try posix.mkdirat(self.fd, sub_path, default_mode);
}
 
/// Creates a single directory with a relative or absolute null-terminated UTF-8-encoded path.
/// Same as `makeDir`, but `sub_path` is null-terminated.
/// To create multiple directories to make an entire path, see `makePath`.
/// To operate on only absolute paths, see `makeDirAbsoluteZ`.
pub fn makeDirZ(self: Dir, sub_path: [*:0]const u8) !void {
try posix.mkdiratZ(self.fd, sub_path, default_mode);
}
 
/// Creates a single directory with a relative or absolute null-terminated WTF-16-encoded path.
/// Creates a single directory with a relative or absolute null-terminated WTF-16 LE-encoded path.
/// To create multiple directories to make an entire path, see `makePath`.
/// To operate on only absolute paths, see `makeDirAbsoluteW`.
pub fn makeDirW(self: Dir, sub_path: [*:0]const u16) !void {
@@ -1083,6 +1103,9 @@ pub fn makeDirW(self: Dir, sub_path: [*:0]const u16) !void {
/// Returns success if the path already exists and is a directory.
/// This function is not atomic, and if it returns an error, the file system may
/// have been modified regardless.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
///
/// Paths containing `..` components are handled differently depending on the platform:
/// - On Windows, `..` are resolved before the path is passed to NtCreateFile, meaning
@@ -1119,16 +1142,17 @@ pub fn makePath(self: Dir, sub_path: []const u8) !void {
}
}
 
/// Calls makeOpenDirAccessMaskW iteratively to make an entire path
/// Windows only. Calls makeOpenDirAccessMaskW iteratively to make an entire path
/// (i.e. creating any parent directories that do not exist).
/// Opens the dir if the path already exists and is a directory.
/// This function is not atomic, and if it returns an error, the file system may
/// have been modified regardless.
/// `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
fn makeOpenPathAccessMaskW(self: Dir, sub_path: []const u8, access_mask: u32, no_follow: bool) OpenError!Dir {
const w = std.os.windows;
var it = try fs.path.componentIterator(sub_path);
// If there are no components in the path, then create a dummy component with the full path.
var component = it.last() orelse fs.path.NativeUtf8ComponentIterator.Component{
var component = it.last() orelse fs.path.NativeComponentIterator.Component{
.name = "",
.path = sub_path,
};
@@ -1156,7 +1180,9 @@ fn makeOpenPathAccessMaskW(self: Dir, sub_path: []const u8, access_mask: u32, no
/// This function performs `makePath`, followed by `openDir`.
/// If supported by the OS, this operation is atomic. It is not atomic on
/// all operating systems.
/// On Windows, this function performs `makeOpenPathAccessMaskW`.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn makeOpenPath(self: Dir, sub_path: []const u8, open_dir_options: OpenDirOptions) !Dir {
return switch (builtin.os.tag) {
.windows => {
@@ -1185,6 +1211,10 @@ pub const RealPathError = posix.RealPathError;
/// `pathname` relative to this `Dir`. If `pathname` is absolute, ignores this
/// `Dir` handle and returns the canonicalized absolute pathname of `pathname`
/// argument.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
/// This function is not universally supported by all platforms.
/// Currently supported hosts are: Linux, macOS, and Windows.
/// See also `Dir.realpathZ`, `Dir.realpathW`, and `Dir.realpathAlloc`.
@@ -1224,6 +1254,7 @@ pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) RealPathE
error.FileLocksNotSupported => return error.Unexpected,
error.FileBusy => return error.Unexpected,
error.WouldBlock => return error.Unexpected,
error.InvalidUtf8 => unreachable, // WASI-only
else => |e| return e,
};
defer posix.close(fd);
@@ -1246,7 +1277,8 @@ pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) RealPathE
return result;
}
 
/// Windows-only. Same as `Dir.realpath` except `pathname` is WTF16 encoded.
/// Windows-only. Same as `Dir.realpath` except `pathname` is WTF16 LE encoded.
/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// See also `Dir.realpath`, `realpathW`.
pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) RealPathError![]u8 {
const w = std.os.windows;
@@ -1272,16 +1304,7 @@ pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) RealPathErr
var wide_buf: [w.PATH_MAX_WIDE]u16 = undefined;
const wide_slice = try w.GetFinalPathNameByHandle(h_file, .{}, &wide_buf);
var big_out_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const end_index = std.unicode.utf16leToUtf8(&big_out_buf, wide_slice) catch |e| switch (e) {
// TODO: Windows file paths can be arbitrary arrays of u16 values and
// must not fail with InvalidUtf8.
error.DanglingSurrogateHalf,
error.ExpectedSecondSurrogateHalf,
error.UnexpectedSecondSurrogateHalf,
error.CodepointTooLarge,
error.Utf8CannotEncodeSurrogateHalf,
=> return error.InvalidUtf8,
};
const end_index = std.unicode.wtf16LeToWtf8(&big_out_buf, wide_slice);
if (end_index > out_buffer.len)
return error.NameTooLong;
const result = out_buffer[0..end_index];
@@ -1344,6 +1367,9 @@ pub const OpenDirOptions = struct {
/// open until `close` is called on the result.
/// The directory cannot be iterated unless the `iterate` option is set to `true`.
///
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
/// Asserts that the path parameter has no null bytes.
pub fn openDir(self: Dir, sub_path: []const u8, args: OpenDirOptions) OpenError!Dir {
switch (builtin.os.tag) {
@@ -1428,7 +1454,7 @@ pub fn openDirZ(self: Dir, sub_path_c: [*:0]const u8, args: OpenDirOptions) Open
}
}
 
/// Same as `openDir` except the path parameter is WTF-16 encoded, NT-prefixed.
/// Same as `openDir` except the path parameter is WTF-16 LE encoded, NT-prefixed.
/// This function asserts the target OS is Windows.
pub fn openDirW(self: Dir, sub_path_w: [*:0]const u16, args: OpenDirOptions) OpenError!Dir {
const w = std.os.windows;
@@ -1518,6 +1544,9 @@ fn makeOpenDirAccessMaskW(self: Dir, sub_path_w: [*:0]const u16, access_mask: u3
pub const DeleteFileError = posix.UnlinkError;
 
/// Delete a file name and possibly the file it refers to, based on an open directory handle.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
/// Asserts that the path parameter has no null bytes.
pub fn deleteFile(self: Dir, sub_path: []const u8) DeleteFileError!void {
if (builtin.os.tag == .windows) {
@@ -1553,7 +1582,7 @@ pub fn deleteFileZ(self: Dir, sub_path_c: [*:0]const u8) DeleteFileError!void {
};
}
 
/// Same as `deleteFile` except the parameter is WTF-16 encoded.
/// Same as `deleteFile` except the parameter is WTF-16 LE encoded.
pub fn deleteFileW(self: Dir, sub_path_w: []const u16) DeleteFileError!void {
posix.unlinkatW(self.fd, sub_path_w, 0) catch |err| switch (err) {
error.DirNotEmpty => unreachable, // not passing AT.REMOVEDIR
@@ -1572,7 +1601,11 @@ pub const DeleteDirError = error{
NotDir,
SystemResources,
ReadOnlyFileSystem,
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
BadPathName,
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
@@ -1581,6 +1614,9 @@ pub const DeleteDirError = error{
 
/// Returns `error.DirNotEmpty` if the directory is not empty.
/// To delete a directory recursively, see `deleteTree`.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
/// Asserts that the path parameter has no null bytes.
pub fn deleteDir(self: Dir, sub_path: []const u8) DeleteDirError!void {
if (builtin.os.tag == .windows) {
@@ -1605,7 +1641,7 @@ pub fn deleteDirZ(self: Dir, sub_path_c: [*:0]const u8) DeleteDirError!void {
};
}
 
/// Same as `deleteDir` except the parameter is UTF16LE, NT prefixed.
/// Same as `deleteDir` except the parameter is WTF16LE, NT prefixed.
/// This function is Windows-only.
pub fn deleteDirW(self: Dir, sub_path_w: []const u16) DeleteDirError!void {
posix.unlinkatW(self.fd, sub_path_w, posix.AT.REMOVEDIR) catch |err| switch (err) {
@@ -1620,6 +1656,9 @@ pub const RenameError = posix.RenameError;
/// If new_sub_path already exists, it will be replaced.
/// Renaming a file over an existing directory or a directory
/// over an existing file will fail with `error.IsDir` or `error.NotDir`
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn rename(self: Dir, old_sub_path: []const u8, new_sub_path: []const u8) RenameError!void {
return posix.renameat(self.fd, old_sub_path, self.fd, new_sub_path);
}
@@ -1629,7 +1668,7 @@ pub fn renameZ(self: Dir, old_sub_path_z: [*:0]const u8, new_sub_path_z: [*:0]co
return posix.renameatZ(self.fd, old_sub_path_z, self.fd, new_sub_path_z);
}
 
/// Same as `rename` except the parameters are UTF16LE, NT prefixed.
/// Same as `rename` except the parameters are WTF16LE, NT prefixed.
/// This function is Windows-only.
pub fn renameW(self: Dir, old_sub_path_w: []const u16, new_sub_path_w: []const u16) RenameError!void {
return posix.renameatW(self.fd, old_sub_path_w, self.fd, new_sub_path_w);
@@ -1647,6 +1686,9 @@ pub const SymLinkFlags = struct {
/// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent
/// one; the latter case is known as a dangling link.
/// If `sym_link_path` exists, it will not be overwritten.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn symLink(
self: Dir,
target_path: []const u8,
@@ -1662,7 +1704,7 @@ pub fn symLink(
// when converting to an NT namespaced path. CreateSymbolicLink in
// symLinkW will handle the necessary conversion.
var target_path_w: std.os.windows.PathSpace = undefined;
target_path_w.len = try std.unicode.utf8ToUtf16Le(&target_path_w.data, target_path);
target_path_w.len = try std.unicode.wtf8ToWtf16Le(&target_path_w.data, target_path);
target_path_w.data[target_path_w.len] = 0;
const sym_link_path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sym_link_path);
return self.symLinkW(target_path_w.span(), sym_link_path_w.span(), flags);
@@ -1698,7 +1740,7 @@ pub fn symLinkZ(
}
 
/// Windows-only. Same as `symLink` except the pathname parameters
/// are null-terminated, WTF16 encoded.
/// are WTF16 LE encoded.
pub fn symLinkW(
self: Dir,
/// WTF-16, does not need to be NT-prefixed. The NT-prefixing
@@ -1716,6 +1758,9 @@ pub const ReadLinkError = posix.ReadLinkError;
/// Read value of a symbolic link.
/// The return value is a slice of `buffer`, from index `0`.
/// Asserts that the path parameter has no null bytes.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ReadLinkError![]u8 {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return self.readLinkWasi(sub_path, buffer);
@@ -1733,7 +1778,7 @@ pub fn readLinkWasi(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 {
return posix.readlinkat(self.fd, sub_path, buffer);
}
 
/// Same as `readLink`, except the `pathname` parameter is null-terminated.
/// Same as `readLink`, except the `sub_path_c` parameter is null-terminated.
pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
if (builtin.os.tag == .windows) {
const sub_path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path_c);
@@ -1743,7 +1788,7 @@ pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {
}
 
/// Windows-only. Same as `readLink` except the pathname parameter
/// is null-terminated, WTF16 encoded.
/// is WTF16 LE encoded.
pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 {
return std.os.windows.ReadLink(self.fd, sub_path_w, buffer);
}
@@ -1753,6 +1798,9 @@ pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 {
/// the situation is ambiguous. It could either mean that the entire file was read, and
/// it exactly fits the buffer, or it could mean the buffer was not big enough for the
/// entire file.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 {
var file = try self.openFile(file_path, .{});
defer file.close();
@@ -1763,6 +1811,9 @@ pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 {
 
/// On success, caller owns returned buffer.
/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 {
return self.readFileAllocOptions(allocator, file_path, max_bytes, null, @alignOf(u8), null);
}
@@ -1772,6 +1823,9 @@ pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8,
/// If `size_hint` is specified the initial buffer size is calculated using
/// that value, otherwise the effective file size is used instead.
/// Allows specifying alignment and a sentinel value.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
pub fn readFileAllocOptions(
self: Dir,
allocator: mem.Allocator,
@@ -1811,9 +1865,13 @@ pub const DeleteTreeError = error{
/// This error is unreachable if `sub_path` does not contain a path separator.
NotDir,
 
/// On Windows, file paths must be valid Unicode.
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
 
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
 
/// On Windows, file paths cannot contain these characters:
/// '/', '*', '?', '"', '<', '>', '|'
BadPathName,
@@ -1826,6 +1884,9 @@ pub const DeleteTreeError = error{
/// removes it. If it cannot be removed because it is a non-empty directory,
/// this function recursively removes its entries and then tries again.
/// This operation is not atomic on most file systems.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
var initial_iterable_dir = (try self.deleteTreeOpenInitialSubpath(sub_path, .file)) orelse return;
 
@@ -1879,6 +1940,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
error.SystemResources,
error.Unexpected,
error.InvalidUtf8,
error.InvalidWtf8,
error.BadPathName,
error.NetworkNotFound,
error.DeviceBusy,
@@ -1910,6 +1972,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
 
error.AccessDenied,
error.InvalidUtf8,
error.InvalidWtf8,
error.SymLinkLoop,
error.NameTooLong,
error.SystemResources,
@@ -1973,6 +2036,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
error.SystemResources,
error.Unexpected,
error.InvalidUtf8,
error.InvalidWtf8,
error.BadPathName,
error.NetworkNotFound,
error.DeviceBusy,
@@ -1994,6 +2058,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
 
error.AccessDenied,
error.InvalidUtf8,
error.InvalidWtf8,
error.SymLinkLoop,
error.NameTooLong,
error.SystemResources,
@@ -2022,6 +2087,9 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {
 
/// Like `deleteTree`, but only keeps one `Iterator` active at a time to minimize the function's stack size.
/// This is slower than `deleteTree` but uses less stack space.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn deleteTreeMinStackSize(self: Dir, sub_path: []const u8) DeleteTreeError!void {
return self.deleteTreeMinStackSizeWithKindHint(sub_path, .file);
}
@@ -2074,6 +2142,7 @@ fn deleteTreeMinStackSizeWithKindHint(self: Dir, sub_path: []const u8, kind_hint
error.SystemResources,
error.Unexpected,
error.InvalidUtf8,
error.InvalidWtf8,
error.BadPathName,
error.NetworkNotFound,
error.DeviceBusy,
@@ -2102,6 +2171,7 @@ fn deleteTreeMinStackSizeWithKindHint(self: Dir, sub_path: []const u8, kind_hint
 
error.AccessDenied,
error.InvalidUtf8,
error.InvalidWtf8,
error.SymLinkLoop,
error.NameTooLong,
error.SystemResources,
@@ -2171,6 +2241,7 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File
error.SystemResources,
error.Unexpected,
error.InvalidUtf8,
error.InvalidWtf8,
error.BadPathName,
error.DeviceBusy,
error.NetworkNotFound,
@@ -2189,6 +2260,7 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File
 
error.AccessDenied,
error.InvalidUtf8,
error.InvalidWtf8,
error.SymLinkLoop,
error.NameTooLong,
error.SystemResources,
@@ -2209,6 +2281,9 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File
pub const WriteFileError = File.WriteError || File.OpenError;
 
/// Deprecated: use `writeFile2`.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) WriteFileError!void {
return writeFile2(self, .{
.sub_path = sub_path,
@@ -2218,6 +2293,9 @@ pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) WriteFileErr
}
 
pub const WriteFileOptions = struct {
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
sub_path: []const u8,
data: []const u8,
flags: File.CreateFlags = .{},
@@ -2232,8 +2310,10 @@ pub fn writeFile2(self: Dir, options: WriteFileOptions) WriteFileError!void {
 
pub const AccessError = posix.AccessError;
 
/// Test accessing `path`.
/// `path` is UTF-8-encoded.
/// Test accessing `sub_path`.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
/// Be careful of Time-Of-Check-Time-Of-Use race conditions when using this function.
/// For example, instead of testing if a file exists and then opening it, just
/// open it and handle the error for file not found.
@@ -2268,9 +2348,9 @@ pub fn accessZ(self: Dir, sub_path: [*:0]const u8, flags: File.OpenFlags) Access
}
 
/// Same as `access` except asserts the target OS is Windows and the path parameter is
/// * WTF-16 encoded
/// * WTF-16 LE encoded
/// * null-terminated
/// * NtDll prefixed
/// * relative or has the NT namespace prefix
/// TODO currently this ignores `flags`.
pub fn accessW(self: Dir, sub_path_w: [*:0]const u16, flags: File.OpenFlags) AccessError!void {
_ = flags;
@@ -2292,6 +2372,9 @@ pub const PrevStatus = enum {
/// atime, and mode of the source file so that the next call to `updateFile` will not need a copy.
/// Returns the previous status of the file before updating.
/// If any of the directories do not exist for dest_path, they are created.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn updateFile(
source_dir: Dir,
source_path: []const u8,
@@ -2343,6 +2426,9 @@ pub const CopyFileError = File.OpenError || File.StatError ||
/// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and readily available,
/// there is a possibility of power loss or application termination leaving temporary files present
/// in the same directory as dest_path.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn copyFile(
source_dir: Dir,
source_path: []const u8,
@@ -2430,6 +2516,9 @@ pub const AtomicFileOptions = struct {
/// Always call `AtomicFile.deinit` to clean up, regardless of whether
/// `AtomicFile.finish` succeeded. `dest_path` must remain valid until
/// `AtomicFile.deinit` is called.
/// On Windows, `dest_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `dest_path` should be encoded as valid UTF-8.
/// On other platforms, `dest_path` is an opaque sequence of bytes with no particular encoding.
pub fn atomicFile(self: Dir, dest_path: []const u8, options: AtomicFileOptions) !AtomicFile {
if (fs.path.dirname(dest_path)) |dirname| {
const dir = if (options.make_path)
@@ -2461,6 +2550,9 @@ pub const StatFileError = File.OpenError || File.StatError || posix.FStatAtError
/// Symlinks are followed.
///
/// `sub_path` may be absolute, in which case `self` is ignored.
/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.
pub fn statFile(self: Dir, sub_path: []const u8) StatFileError!Stat {
if (builtin.os.tag == .windows) {
var file = try self.openFile(sub_path, .{});
 
lib/std/fs/File.zig added: 1932, removed: 517, total 1415
@@ -40,8 +40,11 @@ pub const OpenError = error{
AccessDenied,
PipeBusy,
NameTooLong,
/// On Windows, file paths must be valid Unicode.
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
/// On Windows, file paths cannot contain these characters:
/// '/', '*', '?', '"', '<', '>', '|'
BadPathName,
 
lib/std/fs/path.zig added: 1932, removed: 517, total 1415
@@ -1,3 +1,17 @@
//! POSIX paths are arbitrary sequences of `u8` with no particular encoding.
//!
//! Windows paths are arbitrary sequences of `u16` (WTF-16).
//! For cross-platform APIs that deal with sequences of `u8`, Windows
//! paths are encoded by Zig as [WTF-8](https://simonsapin.github.io/wtf-8/).
//! WTF-8 is a superset of UTF-8 that allows encoding surrogate codepoints,
//! which enables lossless roundtripping when converting to/from WTF-16
//! (as long as the WTF-8 encoded surrogate codepoints do not form a pair).
//!
//! WASI paths are sequences of valid Unicode scalar values,
//! which means that WASI is unable to handle paths that cannot be
//! encoded as well-formed UTF-8/UTF-16.
//! https://github.com/WebAssembly/wasi-filesystem/issues/17#issuecomment-1430639353
 
const builtin = @import("builtin");
const std = @import("../std.zig");
const debug = std.debug;
@@ -438,7 +452,7 @@ fn networkShareServersEql(ns1: []const u8, ns2: []const u8) bool {
var it1 = mem.tokenizeScalar(u8, ns1, sep1);
var it2 = mem.tokenizeScalar(u8, ns2, sep2);
 
return windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?);
return windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?);
}
 
fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8) bool {
@@ -458,7 +472,7 @@ fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8
var it1 = mem.tokenizeScalar(u8, p1, sep1);
var it2 = mem.tokenizeScalar(u8, p2, sep2);
 
return windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?) and windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?);
return windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?) and windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?);
},
}
}
@@ -1099,7 +1113,7 @@ pub fn relativeWindows(allocator: Allocator, from: []const u8, to: []const u8) !
const from_component = from_it.next() orelse return allocator.dupe(u8, to_it.rest());
const to_rest = to_it.rest();
if (to_it.next()) |to_component| {
if (windows.eqlIgnoreCaseUtf8(from_component, to_component))
if (windows.eqlIgnoreCaseWtf8(from_component, to_component))
continue;
}
var up_index_end = "..".len;
@@ -1564,14 +1578,14 @@ pub fn ComponentIterator(comptime path_type: PathType, comptime T: type) type {
};
}
 
pub const NativeUtf8ComponentIterator = ComponentIterator(switch (native_os) {
pub const NativeComponentIterator = ComponentIterator(switch (native_os) {
.windows => .windows,
.uefi => .uefi,
else => .posix,
}, u8);
 
pub fn componentIterator(path: []const u8) !NativeUtf8ComponentIterator {
return NativeUtf8ComponentIterator.init(path);
pub fn componentIterator(path: []const u8) !NativeComponentIterator {
return NativeComponentIterator.init(path);
}
 
test "ComponentIterator posix" {
@@ -1826,7 +1840,7 @@ test "ComponentIterator windows" {
}
}
 
test "ComponentIterator windows UTF-16" {
test "ComponentIterator windows WTF-16" {
// TODO: Fix on big endian architectures
if (builtin.cpu.arch.endian() != .little) {
return error.SkipZigTest;
@@ -1925,3 +1939,18 @@ test "ComponentIterator roots" {
try std.testing.expectEqualStrings("//a/b//", it.root().?);
}
}
 
/// Format a path encoded as bytes for display as UTF-8.
/// Returns a Formatter for the given path. The path will be converted to valid UTF-8
/// during formatting. This is a lossy conversion if the path contains any ill-formed UTF-8.
/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
pub const fmtAsUtf8Lossy = std.unicode.fmtUtf8;
 
/// Format a path encoded as WTF-16 LE for display as UTF-8.
/// Return a Formatter for a (potentially ill-formed) UTF-16 LE path.
/// The path will be converted to valid UTF-8 during formatting. This is
/// a lossy conversion if the path contains any unpaired surrogates.
/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
pub const fmtWtf16LeAsUtf8Lossy = std.unicode.fmtUtf16Le;
 
lib/std/fs/test.zig added: 1932, removed: 517, total 1415
@@ -26,39 +26,39 @@ const PathType = enum {
}
 
pub const TransformError = std.os.RealPathError || error{OutOfMemory};
pub const TransformFn = fn (allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8;
pub const TransformFn = fn (allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8;
 
pub fn getTransformFn(comptime path_type: PathType) TransformFn {
switch (path_type) {
.relative => return struct {
fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 {
fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 {
_ = allocator;
_ = dir;
return relative_path;
}
}.transform,
.absolute => return struct {
fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 {
fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 {
// The final path may not actually exist which would cause realpath to fail.
// So instead, we get the path of the dir and join it with the relative path.
var fd_path_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const dir_path = try os.getFdPath(dir.fd, &fd_path_buf);
return fs.path.join(allocator, &.{ dir_path, relative_path });
return fs.path.joinZ(allocator, &.{ dir_path, relative_path });
}
}.transform,
.unc => return struct {
fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 {
fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 {
// Any drive absolute path (C:\foo) can be converted into a UNC path by
// using '127.0.0.1' as the server name and '<drive letter>$' as the share name.
var fd_path_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const dir_path = try os.getFdPath(dir.fd, &fd_path_buf);
const windows_path_type = std.os.windows.getUnprefixedPathType(u8, dir_path);
switch (windows_path_type) {
.unc_absolute => return fs.path.join(allocator, &.{ dir_path, relative_path }),
.unc_absolute => return fs.path.joinZ(allocator, &.{ dir_path, relative_path }),
.drive_absolute => {
// `C:\<...>` -> `\\127.0.0.1\C$\<...>`
const prepended = "\\\\127.0.0.1\\";
var path = try fs.path.join(allocator, &.{ prepended, dir_path, relative_path });
var path = try fs.path.joinZ(allocator, &.{ prepended, dir_path, relative_path });
path[prepended.len + 1] = '$';
return path;
},
@@ -96,7 +96,7 @@ const TestContext = struct {
/// Returns the `relative_path` transformed into the TestContext's `path_type`.
/// The result is allocated by the TestContext's arena and will be free'd during
/// `TestContext.deinit`.
pub fn transformPath(self: *TestContext, relative_path: []const u8) ![]const u8 {
pub fn transformPath(self: *TestContext, relative_path: [:0]const u8) ![:0]const u8 {
return self.transform_fn(self.arena.allocator(), self.dir, relative_path);
}
};
@@ -1001,6 +1001,16 @@ test "openSelfExe" {
self_exe_file.close();
}
 
test "selfExePath" {
if (builtin.os.tag == .wasi) return error.SkipZigTest;
 
var buf: [fs.MAX_PATH_BYTES]u8 = undefined;
const buf_self_exe_path = try std.fs.selfExePath(&buf);
const alloc_self_exe_path = try std.fs.selfExePathAlloc(testing.allocator);
defer testing.allocator.free(alloc_self_exe_path);
try testing.expectEqualSlices(u8, buf_self_exe_path, alloc_self_exe_path);
}
 
test "deleteTree does not follow symlinks" {
var tmp = tmpDir(.{});
defer tmp.cleanup();
@@ -1907,3 +1917,111 @@ test "delete a setAsCwd directory on Windows" {
// Close the parent "tmp" so we don't leak the HANDLE.
tmp.parent_dir.close();
}
 
test "invalid UTF-8/WTF-8 paths" {
const expected_err = switch (builtin.os.tag) {
.wasi => error.InvalidUtf8,
.windows => error.InvalidWtf8,
else => return error.SkipZigTest,
};
 
try testWithAllSupportedPathTypes(struct {
fn impl(ctx: *TestContext) !void {
// This is both invalid UTF-8 and WTF-8, since \xFF is an invalid start byte
const invalid_path = try ctx.transformPath("\xFF");
 
try testing.expectError(expected_err, ctx.dir.openFile(invalid_path, .{}));
try testing.expectError(expected_err, ctx.dir.openFileZ(invalid_path, .{}));
 
try testing.expectError(expected_err, ctx.dir.createFile(invalid_path, .{}));
try testing.expectError(expected_err, ctx.dir.createFileZ(invalid_path, .{}));
 
try testing.expectError(expected_err, ctx.dir.makeDir(invalid_path));
try testing.expectError(expected_err, ctx.dir.makeDirZ(invalid_path));
 
try testing.expectError(expected_err, ctx.dir.makePath(invalid_path));
try testing.expectError(expected_err, ctx.dir.makeOpenPath(invalid_path, .{}));
 
try testing.expectError(expected_err, ctx.dir.openDir(invalid_path, .{}));
try testing.expectError(expected_err, ctx.dir.openDirZ(invalid_path, .{}));
 
try testing.expectError(expected_err, ctx.dir.deleteFile(invalid_path));
try testing.expectError(expected_err, ctx.dir.deleteFileZ(invalid_path));
 
try testing.expectError(expected_err, ctx.dir.deleteDir(invalid_path));
try testing.expectError(expected_err, ctx.dir.deleteDirZ(invalid_path));
 
try testing.expectError(expected_err, ctx.dir.rename(invalid_path, invalid_path));
try testing.expectError(expected_err, ctx.dir.renameZ(invalid_path, invalid_path));
 
try testing.expectError(expected_err, ctx.dir.symLink(invalid_path, invalid_path, .{}));
try testing.expectError(expected_err, ctx.dir.symLinkZ(invalid_path, invalid_path, .{}));
if (builtin.os.tag == .wasi) {
try testing.expectError(expected_err, ctx.dir.symLinkWasi(invalid_path, invalid_path, .{}));
}
 
try testing.expectError(expected_err, ctx.dir.readLink(invalid_path, &[_]u8{}));
try testing.expectError(expected_err, ctx.dir.readLinkZ(invalid_path, &[_]u8{}));
if (builtin.os.tag == .wasi) {
try testing.expectError(expected_err, ctx.dir.readLinkWasi(invalid_path, &[_]u8{}));
}
 
try testing.expectError(expected_err, ctx.dir.readFile(invalid_path, &[_]u8{}));
try testing.expectError(expected_err, ctx.dir.readFileAlloc(testing.allocator, invalid_path, 0));
 
try testing.expectError(expected_err, ctx.dir.deleteTree(invalid_path));
try testing.expectError(expected_err, ctx.dir.deleteTreeMinStackSize(invalid_path));
 
try testing.expectError(expected_err, ctx.dir.writeFile(invalid_path, ""));
try testing.expectError(expected_err, ctx.dir.writeFile2(.{
.sub_path = invalid_path,
.data = "",
}));
 
try testing.expectError(expected_err, ctx.dir.access(invalid_path, .{}));
try testing.expectError(expected_err, ctx.dir.accessZ(invalid_path, .{}));
 
try testing.expectError(expected_err, ctx.dir.updateFile(invalid_path, ctx.dir, invalid_path, .{}));
try testing.expectError(expected_err, ctx.dir.copyFile(invalid_path, ctx.dir, invalid_path, .{}));
 
try testing.expectError(expected_err, ctx.dir.statFile(invalid_path));
 
if (builtin.os.tag != .wasi) {
try testing.expectError(expected_err, ctx.dir.realpath(invalid_path, &[_]u8{}));
try testing.expectError(expected_err, ctx.dir.realpathZ(invalid_path, &[_]u8{}));
try testing.expectError(expected_err, ctx.dir.realpathAlloc(testing.allocator, invalid_path));
}
 
try testing.expectError(expected_err, fs.rename(ctx.dir, invalid_path, ctx.dir, invalid_path));
try testing.expectError(expected_err, fs.renameZ(ctx.dir, invalid_path, ctx.dir, invalid_path));
 
if (builtin.os.tag != .wasi and ctx.path_type != .relative) {
try testing.expectError(expected_err, fs.updateFileAbsolute(invalid_path, invalid_path, .{}));
try testing.expectError(expected_err, fs.copyFileAbsolute(invalid_path, invalid_path, .{}));
try testing.expectError(expected_err, fs.makeDirAbsolute(invalid_path));
try testing.expectError(expected_err, fs.makeDirAbsoluteZ(invalid_path));
try testing.expectError(expected_err, fs.deleteDirAbsolute(invalid_path));
try testing.expectError(expected_err, fs.deleteDirAbsoluteZ(invalid_path));
try testing.expectError(expected_err, fs.renameAbsolute(invalid_path, invalid_path));
try testing.expectError(expected_err, fs.renameAbsoluteZ(invalid_path, invalid_path));
try testing.expectError(expected_err, fs.openDirAbsolute(invalid_path, .{}));
try testing.expectError(expected_err, fs.openDirAbsoluteZ(invalid_path, .{}));
try testing.expectError(expected_err, fs.openFileAbsolute(invalid_path, .{}));
try testing.expectError(expected_err, fs.openFileAbsoluteZ(invalid_path, .{}));
try testing.expectError(expected_err, fs.accessAbsolute(invalid_path, .{}));
try testing.expectError(expected_err, fs.accessAbsoluteZ(invalid_path, .{}));
try testing.expectError(expected_err, fs.createFileAbsolute(invalid_path, .{}));
try testing.expectError(expected_err, fs.createFileAbsoluteZ(invalid_path, .{}));
try testing.expectError(expected_err, fs.deleteFileAbsolute(invalid_path));
try testing.expectError(expected_err, fs.deleteFileAbsoluteZ(invalid_path));
try testing.expectError(expected_err, fs.deleteTreeAbsolute(invalid_path));
var readlink_buf: [fs.MAX_PATH_BYTES]u8 = undefined;
try testing.expectError(expected_err, fs.readLinkAbsolute(invalid_path, &readlink_buf));
try testing.expectError(expected_err, fs.readLinkAbsoluteZ(invalid_path, &readlink_buf));
try testing.expectError(expected_err, fs.symLinkAbsolute(invalid_path, invalid_path, .{}));
try testing.expectError(expected_err, fs.symLinkAbsoluteZ(invalid_path, invalid_path, .{}));
try testing.expectError(expected_err, fs.realpathAlloc(testing.allocator, invalid_path));
}
}
}.impl);
}
 
lib/std/os.zig added: 1932, removed: 517, total 1415
@@ -3,7 +3,7 @@
//! * Convert "errno"-style error codes into Zig errors.
//! * When null-terminated byte buffers are required, provide APIs which accept
//! slices as well as APIs which accept null-terminated byte buffers. Same goes
//! for UTF-16LE encoding.
//! for WTF-16LE encoding.
//! * Where operating systems share APIs, e.g. POSIX, these thin wrappers provide
//! cross platform abstracting.
//! * When there exists a corresponding libc function and linking libc, the libc
@@ -498,6 +498,7 @@ fn fchmodat2(dirfd: fd_t, path: []const u8, mode: mode_t, flags: u32) FChmodAtEr
const stat = fstatatZ(pathfd, "", AT.EMPTY_PATH) catch |err| switch (err) {
error.NameTooLong => unreachable,
error.FileNotFound => unreachable,
error.InvalidUtf8 => unreachable,
else => |e| return e,
};
if ((stat.mode & S.IFMT) == S.IFLNK)
@@ -1614,9 +1615,16 @@ pub const OpenError = error{
/// The underlying filesystem does not support file locks
FileLocksNotSupported,
 
/// Path contains characters that are disallowed by the underlying filesystem.
BadPathName,
 
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
 
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
 
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
 
@@ -1634,6 +1642,9 @@ pub const OpenError = error{
} || UnexpectedError;
 
/// Open and possibly create a file. Keeps trying if it gets interrupted.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `openZ`.
pub fn open(file_path: []const u8, flags: O, perm: mode_t) OpenError!fd_t {
if (builtin.os.tag == .windows) {
@@ -1646,6 +1657,9 @@ pub fn open(file_path: []const u8, flags: O, perm: mode_t) OpenError!fd_t {
}
 
/// Open and possibly create a file. Keeps trying if it gets interrupted.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `open`.
pub fn openZ(file_path: [*:0]const u8, flags: O, perm: mode_t) OpenError!fd_t {
if (builtin.os.tag == .windows) {
@@ -1687,6 +1701,9 @@ pub fn openZ(file_path: [*:0]const u8, flags: O, perm: mode_t) OpenError!fd_t {
 
/// Open and possibly create a file. Keeps trying if it gets interrupted.
/// `file_path` is relative to the open directory handle `dir_fd`.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `openatZ`.
pub fn openat(dir_fd: fd_t, file_path: []const u8, flags: O, mode: mode_t) OpenError!fd_t {
if (builtin.os.tag == .windows) {
@@ -1829,6 +1846,7 @@ pub fn openatWasi(
.EXIST => return error.PathAlreadyExists,
.BUSY => return error.DeviceBusy,
.NOTCAPABLE => return error.AccessDenied,
.ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
@@ -1836,6 +1854,9 @@ pub fn openatWasi(
 
/// Open and possibly create a file. Keeps trying if it gets interrupted.
/// `file_path` is relative to the open directory handle `dir_fd`.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `openat`.
pub fn openatZ(dir_fd: fd_t, file_path: [*:0]const u8, flags: O, mode: mode_t) OpenError!fd_t {
if (builtin.os.tag == .windows) {
@@ -2156,13 +2177,23 @@ pub const SymLinkError = error{
ReadOnlyFileSystem,
NotDir,
NameTooLong,
 
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
 
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
 
BadPathName,
} || UnexpectedError;
 
/// Creates a symbolic link named `sym_link_path` which contains the string `target_path`.
/// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent
/// one; the latter case is known as a dangling link.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
/// If `sym_link_path` exists, it will not be overwritten.
/// See also `symlinkZ.
pub fn symlink(target_path: []const u8, sym_link_path: []const u8) SymLinkError!void {
@@ -2200,6 +2231,10 @@ pub fn symlinkZ(target_path: [*:0]const u8, sym_link_path: [*:0]const u8) SymLin
.NOMEM => return error.SystemResources,
.NOSPC => return error.NoSpaceLeft,
.ROFS => return error.ReadOnlyFileSystem,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -2208,6 +2243,9 @@ pub fn symlinkZ(target_path: [*:0]const u8, sym_link_path: [*:0]const u8) SymLin
/// `target_path` **relative** to `newdirfd` directory handle.
/// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent
/// one; the latter case is known as a dangling link.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
/// If `sym_link_path` exists, it will not be overwritten.
/// See also `symlinkatWasi`, `symlinkatZ` and `symlinkatW`.
pub fn symlinkat(target_path: []const u8, newdirfd: fd_t, sym_link_path: []const u8) SymLinkError!void {
@@ -2242,6 +2280,7 @@ pub fn symlinkatWasi(target_path: []const u8, newdirfd: fd_t, sym_link_path: []c
.NOSPC => return error.NoSpaceLeft,
.ROFS => return error.ReadOnlyFileSystem,
.NOTCAPABLE => return error.AccessDenied,
.ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
@@ -2270,6 +2309,10 @@ pub fn symlinkatZ(target_path: [*:0]const u8, newdirfd: fd_t, sym_link_path: [*:
.NOMEM => return error.SystemResources,
.NOSPC => return error.NoSpaceLeft,
.ROFS => return error.ReadOnlyFileSystem,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -2287,8 +2330,13 @@ pub const LinkError = UnexpectedError || error{
NoSpaceLeft,
ReadOnlyFileSystem,
NotSameFileSystem,
 
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
};
 
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn linkZ(oldpath: [*:0]const u8, newpath: [*:0]const u8, flags: i32) LinkError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return link(mem.sliceTo(oldpath, 0), mem.sliceTo(newpath, 0), flags);
@@ -2310,10 +2358,16 @@ pub fn linkZ(oldpath: [*:0]const u8, newpath: [*:0]const u8, flags: i32) LinkErr
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.NotSameFileSystem,
.INVAL => unreachable,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn link(oldpath: []const u8, newpath: []const u8, flags: i32) LinkError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return linkat(wasi.AT.FDCWD, oldpath, wasi.AT.FDCWD, newpath, flags) catch |err| switch (err) {
@@ -2328,6 +2382,8 @@ pub fn link(oldpath: []const u8, newpath: []const u8, flags: i32) LinkError!void
 
pub const LinkatError = LinkError || error{NotDir};
 
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn linkatZ(
olddir: fd_t,
oldpath: [*:0]const u8,
@@ -2356,10 +2412,16 @@ pub fn linkatZ(
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.NotSameFileSystem,
.INVAL => unreachable,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn linkat(
olddir: fd_t,
oldpath: []const u8,
@@ -2399,6 +2461,7 @@ pub fn linkat(
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.NotSameFileSystem,
.INVAL => unreachable,
.ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
@@ -2422,9 +2485,13 @@ pub const UnlinkError = error{
SystemResources,
ReadOnlyFileSystem,
 
/// On Windows, file paths must be valid Unicode.
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
 
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
 
/// On Windows, file paths cannot contain these characters:
/// '/', '*', '?', '"', '<', '>', '|'
BadPathName,
@@ -2434,6 +2501,9 @@ pub const UnlinkError = error{
} || UnexpectedError;
 
/// Delete a name and possibly the file it refers to.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// See also `unlinkZ`.
pub fn unlink(file_path: []const u8) UnlinkError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
@@ -2450,7 +2520,7 @@ pub fn unlink(file_path: []const u8) UnlinkError!void {
}
}
 
/// Same as `unlink` except the parameter is a null terminated UTF8-encoded string.
/// Same as `unlink` except the parameter is null terminated.
pub fn unlinkZ(file_path: [*:0]const u8) UnlinkError!void {
if (builtin.os.tag == .windows) {
const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
@@ -2473,11 +2543,15 @@ pub fn unlinkZ(file_path: [*:0]const u8) UnlinkError!void {
.NOTDIR => return error.NotDir,
.NOMEM => return error.SystemResources,
.ROFS => return error.ReadOnlyFileSystem,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// Windows-only. Same as `unlink` except the parameter is null-terminated, WTF16 encoded.
/// Windows-only. Same as `unlink` except the parameter is null-terminated, WTF16 LE encoded.
pub fn unlinkW(file_path_w: []const u16) UnlinkError!void {
windows.DeleteFile(file_path_w, .{ .dir = std.fs.cwd().fd }) catch |err| switch (err) {
error.DirNotEmpty => unreachable, // we're not passing .remove_dir = true
@@ -2491,6 +2565,9 @@ pub const UnlinkatError = UnlinkError || error{
};
 
/// Delete a file name and possibly the file it refers to, based on an open directory handle.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// Asserts that the path parameter has no null bytes.
pub fn unlinkat(dirfd: fd_t, file_path: []const u8, flags: u32) UnlinkatError!void {
if (builtin.os.tag == .windows) {
@@ -2528,6 +2605,7 @@ pub fn unlinkatWasi(dirfd: fd_t, file_path: []const u8, flags: u32) UnlinkatErro
.ROFS => return error.ReadOnlyFileSystem,
.NOTEMPTY => return error.DirNotEmpty,
.NOTCAPABLE => return error.AccessDenied,
.ILSEQ => return error.InvalidUtf8,
 
.INVAL => unreachable, // invalid flags, or pathname has . as last component
.BADF => unreachable, // always a race condition
@@ -2560,6 +2638,10 @@ pub fn unlinkatZ(dirfd: fd_t, file_path_c: [*:0]const u8, flags: u32) UnlinkatEr
.ROFS => return error.ReadOnlyFileSystem,
.EXIST => return error.DirNotEmpty,
.NOTEMPTY => return error.DirNotEmpty,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
 
.INVAL => unreachable, // invalid flags, or pathname has . as last component
.BADF => unreachable, // always a race condition
@@ -2568,7 +2650,7 @@ pub fn unlinkatZ(dirfd: fd_t, file_path_c: [*:0]const u8, flags: u32) UnlinkatEr
}
}
 
/// Same as `unlinkat` but `sub_path_w` is UTF16LE, NT prefixed. Windows only.
/// Same as `unlinkat` but `sub_path_w` is WTF16LE, NT prefixed. Windows only.
pub fn unlinkatW(dirfd: fd_t, sub_path_w: []const u16, flags: u32) UnlinkatError!void {
const remove_dir = (flags & AT.REMOVEDIR) != 0;
return windows.DeleteFile(sub_path_w, .{ .dir = dirfd, .remove_dir = remove_dir });
@@ -2594,7 +2676,11 @@ pub const RenameError = error{
PathAlreadyExists,
ReadOnlyFileSystem,
RenameAcrossMountPoints,
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
BadPathName,
NoDevice,
SharingViolation,
@@ -2610,6 +2696,9 @@ pub const RenameError = error{
} || UnexpectedError;
 
/// Change the name or location of a file.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn rename(old_path: []const u8, new_path: []const u8) RenameError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return renameat(wasi.AT.FDCWD, old_path, wasi.AT.FDCWD, new_path);
@@ -2624,7 +2713,7 @@ pub fn rename(old_path: []const u8, new_path: []const u8) RenameError!void {
}
}
 
/// Same as `rename` except the parameters are null-terminated byte arrays.
/// Same as `rename` except the parameters are null-terminated.
pub fn renameZ(old_path: [*:0]const u8, new_path: [*:0]const u8) RenameError!void {
if (builtin.os.tag == .windows) {
const old_path_w = try windows.cStrToPrefixedFileW(null, old_path);
@@ -2653,11 +2742,15 @@ pub fn renameZ(old_path: [*:0]const u8, new_path: [*:0]const u8) RenameError!voi
.NOTEMPTY => return error.PathAlreadyExists,
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.RenameAcrossMountPoints,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// Same as `rename` except the parameters are null-terminated UTF16LE encoded byte arrays.
/// Same as `rename` except the parameters are null-terminated and WTF16LE encoded.
/// Assumes target is Windows.
pub fn renameW(old_path: [*:0]const u16, new_path: [*:0]const u16) RenameError!void {
const flags = windows.MOVEFILE_REPLACE_EXISTING | windows.MOVEFILE_WRITE_THROUGH;
@@ -2665,6 +2758,9 @@ pub fn renameW(old_path: [*:0]const u16, new_path: [*:0]const u16) RenameError!v
}
 
/// Change the name or location of a file based on an open directory handle.
/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, both paths should be encoded as valid UTF-8.
/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.
pub fn renameat(
old_dir_fd: fd_t,
old_path: []const u8,
@@ -2710,11 +2806,12 @@ pub fn renameatWasi(old: RelativePathWasi, new: RelativePathWasi) RenameError!vo
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.RenameAcrossMountPoints,
.NOTCAPABLE => return error.AccessDenied,
.ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
 
/// Same as `renameat` except the parameters are null-terminated byte arrays.
/// Same as `renameat` except the parameters are null-terminated.
pub fn renameatZ(
old_dir_fd: fd_t,
old_path: [*:0]const u8,
@@ -2749,6 +2846,10 @@ pub fn renameatZ(
.NOTEMPTY => return error.PathAlreadyExists,
.ROFS => return error.ReadOnlyFileSystem,
.XDEV => return error.RenameAcrossMountPoints,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -2860,6 +2961,9 @@ pub fn renameatW(
}
}
 
/// On Windows, `sub_dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `sub_dir_path` should be encoded as valid UTF-8.
/// On other platforms, `sub_dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn mkdirat(dir_fd: fd_t, sub_dir_path: []const u8, mode: u32) MakeDirError!void {
if (builtin.os.tag == .windows) {
const sub_dir_path_w = try windows.sliceToPrefixedFileW(dir_fd, sub_dir_path);
@@ -2891,14 +2995,16 @@ pub fn mkdiratWasi(dir_fd: fd_t, sub_dir_path: []const u8, mode: u32) MakeDirErr
.NOTDIR => return error.NotDir,
.ROFS => return error.ReadOnlyFileSystem,
.NOTCAPABLE => return error.AccessDenied,
.ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
 
/// Same as `mkdirat` except the parameters are null-terminated.
pub fn mkdiratZ(dir_fd: fd_t, sub_dir_path: [*:0]const u8, mode: u32) MakeDirError!void {
if (builtin.os.tag == .windows) {
const sub_dir_path_w = try windows.cStrToPrefixedFileW(dir_fd, sub_dir_path);
return mkdiratW(dir_fd, sub_dir_path_w.span().ptr, mode);
return mkdiratW(dir_fd, sub_dir_path_w.span(), mode);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
return mkdirat(dir_fd, mem.sliceTo(sub_dir_path, 0), mode);
}
@@ -2920,10 +3026,15 @@ pub fn mkdiratZ(dir_fd: fd_t, sub_dir_path: [*:0]const u8, mode: u32) MakeDirErr
.ROFS => return error.ReadOnlyFileSystem,
// dragonfly: when dir_fd is unlinked from filesystem
.NOTCONN => return error.FileNotFound,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// Windows-only. Same as `mkdirat` except the parameter WTF16 LE encoded.
pub fn mkdiratW(dir_fd: fd_t, sub_path_w: []const u16, mode: u32) MakeDirError!void {
_ = mode;
const sub_dir_handle = windows.OpenFile(sub_path_w, .{
@@ -2955,7 +3066,11 @@ pub const MakeDirError = error{
NoSpaceLeft,
NotDir,
ReadOnlyFileSystem,
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
BadPathName,
NoDevice,
/// On Windows, `\\server` or `\\server\share` was not found.
@@ -2964,6 +3079,9 @@ pub const MakeDirError = error{
 
/// Create a directory.
/// `mode` is ignored on Windows and WASI.
/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `dir_path` should be encoded as valid UTF-8.
/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn mkdir(dir_path: []const u8, mode: u32) MakeDirError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return mkdirat(wasi.AT.FDCWD, dir_path, mode);
@@ -2976,7 +3094,10 @@ pub fn mkdir(dir_path: []const u8, mode: u32) MakeDirError!void {
}
}
 
/// Same as `mkdir` but the parameter is a null-terminated UTF8-encoded string.
/// Same as `mkdir` but the parameter is null-terminated.
/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `dir_path` should be encoded as valid UTF-8.
/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn mkdirZ(dir_path: [*:0]const u8, mode: u32) MakeDirError!void {
if (builtin.os.tag == .windows) {
const dir_path_w = try windows.cStrToPrefixedFileW(null, dir_path);
@@ -2999,11 +3120,15 @@ pub fn mkdirZ(dir_path: [*:0]const u8, mode: u32) MakeDirError!void {
.NOSPC => return error.NoSpaceLeft,
.NOTDIR => return error.NotDir,
.ROFS => return error.ReadOnlyFileSystem,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// Windows-only. Same as `mkdir` but the parameters is WTF16 encoded.
/// Windows-only. Same as `mkdir` but the parameters is WTF16LE encoded.
pub fn mkdirW(dir_path_w: []const u16, mode: u32) MakeDirError!void {
_ = mode;
const sub_dir_handle = windows.OpenFile(dir_path_w, .{
@@ -3031,13 +3156,20 @@ pub const DeleteDirError = error{
NotDir,
DirNotEmpty,
ReadOnlyFileSystem,
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
BadPathName,
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
} || UnexpectedError;
 
/// Deletes an empty directory.
/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `dir_path` should be encoded as valid UTF-8.
/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn rmdir(dir_path: []const u8) DeleteDirError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return unlinkat(wasi.AT.FDCWD, dir_path, AT.REMOVEDIR) catch |err| switch (err) {
@@ -3055,6 +3187,9 @@ pub fn rmdir(dir_path: []const u8) DeleteDirError!void {
}
 
/// Same as `rmdir` except the parameter is null-terminated.
/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `dir_path` should be encoded as valid UTF-8.
/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn rmdirZ(dir_path: [*:0]const u8) DeleteDirError!void {
if (builtin.os.tag == .windows) {
const dir_path_w = try windows.cStrToPrefixedFileW(null, dir_path);
@@ -3077,11 +3212,15 @@ pub fn rmdirZ(dir_path: [*:0]const u8) DeleteDirError!void {
.EXIST => return error.DirNotEmpty,
.NOTEMPTY => return error.DirNotEmpty,
.ROFS => return error.ReadOnlyFileSystem,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// Windows-only. Same as `rmdir` except the parameter is WTF16 encoded.
/// Windows-only. Same as `rmdir` except the parameter is WTF-16 LE encoded.
pub fn rmdirW(dir_path_w: []const u16) DeleteDirError!void {
return windows.DeleteFile(dir_path_w, .{ .dir = std.fs.cwd().fd, .remove_dir = true }) catch |err| switch (err) {
error.IsDir => unreachable,
@@ -3098,21 +3237,25 @@ pub const ChangeCurDirError = error{
SystemResources,
NotDir,
BadPathName,
 
/// On Windows, file paths must be valid Unicode.
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
} || UnexpectedError;
 
/// Changes the current working directory of the calling process.
/// `dir_path` is recommended to be a UTF-8 encoded string.
/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `dir_path` should be encoded as valid UTF-8.
/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn chdir(dir_path: []const u8) ChangeCurDirError!void {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
@compileError("WASI does not support os.chdir");
} else if (builtin.os.tag == .windows) {
var utf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;
const len = try std.unicode.utf8ToUtf16Le(utf16_dir_path[0..], dir_path);
if (len > utf16_dir_path.len) return error.NameTooLong;
return chdirW(utf16_dir_path[0..len]);
var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;
const len = try std.unicode.wtf8ToWtf16Le(wtf16_dir_path[0..], dir_path);
if (len > wtf16_dir_path.len) return error.NameTooLong;
return chdirW(wtf16_dir_path[0..len]);
} else {
const dir_path_c = try toPosixPath(dir_path);
return chdirZ(&dir_path_c);
@@ -3120,12 +3263,15 @@ pub fn chdir(dir_path: []const u8) ChangeCurDirError!void {
}
 
/// Same as `chdir` except the parameter is null-terminated.
/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `dir_path` should be encoded as valid UTF-8.
/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.
pub fn chdirZ(dir_path: [*:0]const u8) ChangeCurDirError!void {
if (builtin.os.tag == .windows) {
var utf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;
const len = try std.unicode.utf8ToUtf16Le(utf16_dir_path[0..], mem.span(dir_path));
if (len > utf16_dir_path.len) return error.NameTooLong;
return chdirW(utf16_dir_path[0..len]);
var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;
const len = try std.unicode.wtf8ToWtf16Le(wtf16_dir_path[0..], mem.span(dir_path));
if (len > wtf16_dir_path.len) return error.NameTooLong;
return chdirW(wtf16_dir_path[0..len]);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
return chdir(mem.span(dir_path));
}
@@ -3139,11 +3285,15 @@ pub fn chdirZ(dir_path: [*:0]const u8) ChangeCurDirError!void {
.NOENT => return error.FileNotFound,
.NOMEM => return error.SystemResources,
.NOTDIR => return error.NotDir,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// Windows-only. Same as `chdir` except the parameter is WTF16 encoded.
/// Windows-only. Same as `chdir` except the parameter is WTF16 LE encoded.
pub fn chdirW(dir_path: []const u16) ChangeCurDirError!void {
windows.SetCurrentDirectory(dir_path) catch |err| switch (err) {
error.NoDevice => return error.FileSystem,
@@ -3183,7 +3333,11 @@ pub const ReadLinkError = error{
SystemResources,
NotLink,
NotDir,
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
BadPathName,
/// Windows-only. This error may occur if the opened reparse point is
/// of unsupported type.
@@ -3193,7 +3347,13 @@ pub const ReadLinkError = error{
} || UnexpectedError;
 
/// Read value of a symbolic link.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// The return value is a slice of `out_buffer` from index 0.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, the result is encoded as UTF-8.
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
return readlinkat(wasi.AT.FDCWD, file_path, out_buffer);
@@ -3206,7 +3366,8 @@ pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {
}
}
 
/// Windows-only. Same as `readlink` except `file_path` is WTF16 encoded.
/// Windows-only. Same as `readlink` except `file_path` is WTF16 LE encoded.
/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// See also `readlinkZ`.
pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
return windows.ReadLink(std.fs.cwd().fd, file_path, out_buffer);
@@ -3215,7 +3376,7 @@ pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
/// Same as `readlink` except `file_path` is null-terminated.
pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 {
if (builtin.os.tag == .windows) {
const file_path_w = try windows.cStrToWin32PrefixedFileW(file_path);
const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);
return readlinkW(file_path_w.span(), out_buffer);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
return readlink(mem.sliceTo(file_path, 0), out_buffer);
@@ -3232,12 +3393,22 @@ pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8
.NOENT => return error.FileNotFound,
.NOMEM => return error.SystemResources,
.NOTDIR => return error.NotDir,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// Similar to `readlink` except reads value of a symbolink link **relative** to `dirfd` directory handle.
/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `file_path` should be encoded as valid UTF-8.
/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.
/// The return value is a slice of `out_buffer` from index 0.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, the result is encoded as UTF-8.
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
/// See also `readlinkatWasi`, `realinkatZ` and `realinkatW`.
pub fn readlinkat(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
@@ -3267,11 +3438,13 @@ pub fn readlinkatWasi(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) Read
.NOMEM => return error.SystemResources,
.NOTDIR => return error.NotDir,
.NOTCAPABLE => return error.AccessDenied,
.ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
 
/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 encoded.
/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 LE encoded.
/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// See also `readlinkat`.
pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {
return windows.ReadLink(dirfd, file_path, out_buffer);
@@ -3298,6 +3471,10 @@ pub fn readlinkatZ(dirfd: fd_t, file_path: [*:0]const u8, out_buffer: []u8) Read
.NOENT => return error.FileNotFound,
.NOMEM => return error.SystemResources,
.NOTDIR => return error.NotDir,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -4274,10 +4451,18 @@ pub fn fstat_wasi(fd: fd_t) FStatError!wasi.filestat_t {
}
}
 
pub const FStatAtError = FStatError || error{ NameTooLong, FileNotFound, SymLinkLoop };
pub const FStatAtError = FStatError || error{
NameTooLong,
FileNotFound,
SymLinkLoop,
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
};
 
/// Similar to `fstat`, but returns stat of a resource pointed to by `pathname`
/// which is relative to `dirfd` handle.
/// On WASI, `pathname` should be encoded as valid UTF-8.
/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.
/// See also `fstatatZ` and `fstatat_wasi`.
pub fn fstatat(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!Stat {
if (builtin.os.tag == .wasi and !builtin.link_libc) {
@@ -4294,6 +4479,7 @@ pub fn fstatat(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!Stat
}
 
/// WASI-only. Same as `fstatat` but targeting WASI.
/// `pathname` should be encoded as valid UTF-8.
/// See also `fstatat`.
pub fn fstatat_wasi(dirfd: fd_t, pathname: []const u8, flags: wasi.lookupflags_t) FStatAtError!wasi.filestat_t {
var stat: wasi.filestat_t = undefined;
@@ -4308,6 +4494,7 @@ pub fn fstatat_wasi(dirfd: fd_t, pathname: []const u8, flags: wasi.lookupflags_t
.NOENT => return error.FileNotFound,
.NOTDIR => return error.FileNotFound,
.NOTCAPABLE => return error.AccessDenied,
.ILSEQ => return error.InvalidUtf8,
else => |err| return unexpectedErrno(err),
}
}
@@ -4337,6 +4524,10 @@ pub fn fstatatZ(dirfd: fd_t, pathname: [*:0]const u8, flags: u32) FStatAtError!S
.LOOP => return error.SymLinkLoop,
.NOENT => return error.FileNotFound,
.NOTDIR => return error.FileNotFound,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -4693,12 +4884,17 @@ pub const AccessError = error{
FileBusy,
SymLinkLoop,
ReadOnlyFileSystem,
 
/// On Windows, file paths must be valid Unicode.
/// WASI-only; file paths must be valid UTF-8.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
} || UnexpectedError;
 
/// check user's permissions for a file
/// On Windows, `path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `path` should be encoded as valid UTF-8.
/// On other platforms, `path` is an opaque sequence of bytes with no particular encoding.
/// TODO currently this assumes `mode` is `F.OK` on Windows.
pub fn access(path: []const u8, mode: u32) AccessError!void {
if (builtin.os.tag == .windows) {
@@ -4740,12 +4936,16 @@ pub fn accessZ(path: [*:0]const u8, mode: u32) AccessError!void {
.FAULT => unreachable,
.IO => return error.InputOutput,
.NOMEM => return error.SystemResources,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
 
/// Call from Windows-specific code if you already have a UTF-16LE encoded, null terminated string.
/// Otherwise use `access` or `accessC`.
/// Call from Windows-specific code if you already have a WTF-16LE encoded, null terminated string.
/// Otherwise use `access` or `accessZ`.
/// TODO currently this ignores `mode`.
pub fn accessW(path: [*:0]const u16, mode: u32) windows.GetFileAttributesError!void {
_ = mode;
@@ -4762,6 +4962,9 @@ pub fn accessW(path: [*:0]const u16, mode: u32) windows.GetFileAttributesError!v
}
 
/// Check user's permissions for a file, based on an open directory handle.
/// On Windows, `path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On WASI, `path` should be encoded as valid UTF-8.
/// On other platforms, `path` is an opaque sequence of bytes with no particular encoding.
/// TODO currently this ignores `mode` and `flags` on Windows.
pub fn faccessat(dirfd: fd_t, path: []const u8, mode: u32, flags: u32) AccessError!void {
if (builtin.os.tag == .windows) {
@@ -4832,6 +5035,10 @@ pub fn faccessatZ(dirfd: fd_t, path: [*:0]const u8, mode: u32, flags: u32) Acces
.FAULT => unreachable,
.IO => return error.InputOutput,
.NOMEM => return error.SystemResources,
.ILSEQ => |err| if (builtin.os.tag == .wasi)
return error.InvalidUtf8
else
return unexpectedErrno(err),
else => |err| return unexpectedErrno(err),
}
}
@@ -5339,8 +5546,9 @@ pub const RealPathError = error{
/// On WASI, the current CWD may not be associated with an absolute path.
InvalidHandle,
 
/// On Windows, file paths must be valid Unicode.
InvalidUtf8,
/// Windows-only; file paths provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
 
/// On Windows, `\\server` or `\\server\share` was not found.
NetworkNotFound,
@@ -5362,8 +5570,12 @@ pub const RealPathError = error{
/// Return the canonicalized absolute pathname.
/// Expands all symbolic links and resolves references to `.`, `..`, and
/// extra `/` characters in `pathname`.
/// On Windows, `pathname` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.
/// The return value is a slice of `out_buffer`, but not necessarily from the beginning.
/// See also `realpathZ` and `realpathW`.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
/// Calling this function is usually a bug.
pub fn realpath(pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
if (builtin.os.tag == .windows) {
@@ -5402,6 +5614,7 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP
error.WouldBlock => unreachable,
error.FileBusy => unreachable, // not asking for write permissions
error.InvalidHandle => unreachable, // WASI-only
error.InvalidUtf8 => unreachable, // WASI-only
else => |e| return e,
};
defer close(fd);
@@ -5425,7 +5638,8 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP
return mem.sliceTo(result_path, 0);
}
 
/// Same as `realpath` except `pathname` is UTF16LE-encoded.
/// Same as `realpath` except `pathname` is WTF16LE-encoded.
/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// Calling this function is usually a bug.
pub fn realpathW(pathname: []const u16, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
const w = windows;
@@ -5475,6 +5689,8 @@ pub fn isGetFdPathSupportedOnTarget(os: std.Target.Os) bool {
/// This function is very host-specific and is not universally supported by all hosts.
/// For example, while it generally works on Linux, macOS, FreeBSD or Windows, it is
/// unsupported on WASI.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
/// Calling this function is usually a bug.
pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
if (!comptime isGetFdPathSupportedOnTarget(builtin.os)) {
@@ -5485,10 +5701,7 @@ pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
var wide_buf: [windows.PATH_MAX_WIDE]u16 = undefined;
const wide_slice = try windows.GetFinalPathNameByHandle(fd, .{}, wide_buf[0..]);
 
// TODO: Windows file paths can be arbitrary arrays of u16 values
// and must not fail with InvalidUtf8.
const end_index = std.unicode.utf16leToUtf8(out_buffer, wide_slice) catch
return error.InvalidUtf8;
const end_index = std.unicode.wtf16LeToWtf8(out_buffer, wide_slice);
return out_buffer[0..end_index];
},
.macos, .ios, .watchos, .tvos => {
@@ -5512,8 +5725,12 @@ pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {
 
const target = readlinkZ(proc_path, out_buffer) catch |err| {
switch (err) {
error.UnsupportedReparsePointType => unreachable, // Windows only,
error.NotLink => unreachable,
error.BadPathName => unreachable,
error.InvalidUtf8 => unreachable, // WASI-only
error.InvalidWtf8 => unreachable, // Windows-only
error.UnsupportedReparsePointType => unreachable, // Windows-only
error.NetworkNotFound => unreachable, // Windows-only
else => |e| return e,
}
};
 
lib/std/os/windows.zig added: 1932, removed: 517, total 1415
@@ -1,8 +1,8 @@
//! This file contains thin wrappers around Windows-specific APIs, with these
//! specific goals in mind:
//! * Convert "errno"-style error codes into Zig errors.
//! * When null-terminated or UTF16LE byte buffers are required, provide APIs which accept
//! slices as well as APIs which accept null-terminated UTF16LE byte buffers.
//! * When null-terminated or WTF16LE byte buffers are required, provide APIs which accept
//! slices as well as APIs which accept null-terminated WTF16LE byte buffers.
 
const builtin = @import("builtin");
const std = @import("../std.zig");
@@ -548,7 +548,6 @@ pub fn WriteFile(
 
pub const SetCurrentDirectoryError = error{
NameTooLong,
InvalidUtf8,
FileNotFound,
NotDir,
AccessDenied,
@@ -587,24 +586,24 @@ pub const GetCurrentDirectoryError = error{
};
 
/// The result is a slice of `buffer`, indexed from 0.
/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
pub fn GetCurrentDirectory(buffer: []u8) GetCurrentDirectoryError![]u8 {
var utf16le_buf: [PATH_MAX_WIDE]u16 = undefined;
const result = kernel32.GetCurrentDirectoryW(utf16le_buf.len, &utf16le_buf);
var wtf16le_buf: [PATH_MAX_WIDE]u16 = undefined;
const result = kernel32.GetCurrentDirectoryW(wtf16le_buf.len, &wtf16le_buf);
if (result == 0) {
switch (kernel32.GetLastError()) {
else => |err| return unexpectedError(err),
}
}
assert(result <= utf16le_buf.len);
const utf16le_slice = utf16le_buf[0..result];
// Trust that Windows gives us valid UTF-16LE.
assert(result <= wtf16le_buf.len);
const wtf16le_slice = wtf16le_buf[0..result];
var end_index: usize = 0;
var it = std.unicode.Utf16LeIterator.init(utf16le_slice);
while (it.nextCodepoint() catch unreachable) |codepoint| {
var it = std.unicode.Wtf16LeIterator.init(wtf16le_slice);
while (it.nextCodepoint()) |codepoint| {
const seq_len = std.unicode.utf8CodepointSequenceLength(codepoint) catch unreachable;
if (end_index + seq_len >= buffer.len)
return error.NameTooLong;
end_index += std.unicode.utf8Encode(codepoint, buffer[end_index..]) catch unreachable;
end_index += std.unicode.wtf8Encode(codepoint, buffer[end_index..]) catch unreachable;
}
return buffer[0..end_index];
}
@@ -812,6 +811,8 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin
}
}
 
/// Asserts that there is enough space is `out_buffer`.
/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u8 {
const win32_namespace_path = path: {
if (is_relative) break :path path;
@@ -821,7 +822,7 @@ fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u
};
break :path win32_path.span();
};
const out_len = std.unicode.utf16leToUtf8(out_buffer, win32_namespace_path) catch unreachable;
const out_len = std.unicode.wtf16LeToWtf8(out_buffer, win32_namespace_path);
return out_buffer[0..out_len];
}
 
@@ -1942,13 +1943,13 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool {
if (@inComptime() or builtin.os.tag != .windows) {
// This function compares the strings code unit by code unit (aka u16-to-u16),
// so any length difference implies inequality. In other words, there's no possible
// conversion that changes the number of UTF-16 code units needed for the uppercase/lowercase
// conversion that changes the number of WTF-16 code units needed for the uppercase/lowercase
// version in the conversion table since only codepoints <= max(u16) are eligible
// for conversion at all.
if (a.len != b.len) return false;
 
for (a, b) |a_c, b_c| {
// The slices are always UTF-16 LE, so need to convert the elements to native
// The slices are always WTF-16 LE, so need to convert the elements to native
// endianness for the uppercasing
const a_c_native = std.mem.littleToNative(u16, a_c);
const b_c_native = std.mem.littleToNative(u16, b_c);
@@ -1975,18 +1976,18 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool {
return ntdll.RtlEqualUnicodeString(&a_string, &b_string, TRUE) == TRUE;
}
 
/// Compares two UTF-8 strings using the equivalent functionality of
/// Compares two WTF-8 strings using the equivalent functionality of
/// `RtlEqualUnicodeString` (with case insensitive comparison enabled).
/// This function can be called on any target.
/// Assumes `a` and `b` are valid UTF-8.
pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool {
/// Assumes `a` and `b` are valid WTF-8.
pub fn eqlIgnoreCaseWtf8(a: []const u8, b: []const u8) bool {
// A length equality check is not possible here because there are
// some codepoints that have a different length uppercase UTF-8 representations
// than their lowercase counterparts, e.g. U+0250 (2 bytes) <-> U+2C6F (3 bytes).
// There are 7 such codepoints in the uppercase data used by Windows.
 
var a_utf8_it = std.unicode.Utf8View.initUnchecked(a).iterator();
var b_utf8_it = std.unicode.Utf8View.initUnchecked(b).iterator();
var a_wtf8_it = std.unicode.Wtf8View.initUnchecked(a).iterator();
var b_wtf8_it = std.unicode.Wtf8View.initUnchecked(b).iterator();
 
// Use RtlUpcaseUnicodeChar on Windows when not in comptime to avoid including a
// redundant copy of the uppercase data.
@@ -1996,8 +1997,8 @@ pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool {
};
 
while (true) {
const a_cp = a_utf8_it.nextCodepoint() orelse break;
const b_cp = b_utf8_it.nextCodepoint() orelse return false;
const a_cp = a_wtf8_it.nextCodepoint() orelse break;
const b_cp = b_wtf8_it.nextCodepoint() orelse return false;
 
if (a_cp <= std.math.maxInt(u16) and b_cp <= std.math.maxInt(u16)) {
if (a_cp != b_cp and upcaseImpl(@intCast(a_cp)) != upcaseImpl(@intCast(b_cp))) {
@@ -2008,26 +2009,26 @@ pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool {
}
}
// Make sure there are no leftover codepoints in b
if (b_utf8_it.nextCodepoint() != null) return false;
if (b_wtf8_it.nextCodepoint() != null) return false;
 
return true;
}
 
fn testEqlIgnoreCase(comptime expect_eql: bool, comptime a: []const u8, comptime b: []const u8) !void {
try std.testing.expectEqual(expect_eql, eqlIgnoreCaseUtf8(a, b));
try std.testing.expectEqual(expect_eql, eqlIgnoreCaseWtf8(a, b));
try std.testing.expectEqual(expect_eql, eqlIgnoreCaseWTF16(
std.unicode.utf8ToUtf16LeStringLiteral(a),
std.unicode.utf8ToUtf16LeStringLiteral(b),
));
 
try comptime std.testing.expect(expect_eql == eqlIgnoreCaseUtf8(a, b));
try comptime std.testing.expect(expect_eql == eqlIgnoreCaseWtf8(a, b));
try comptime std.testing.expect(expect_eql == eqlIgnoreCaseWTF16(
std.unicode.utf8ToUtf16LeStringLiteral(a),
std.unicode.utf8ToUtf16LeStringLiteral(b),
));
}
 
test "eqlIgnoreCaseWTF16/Utf8" {
test "eqlIgnoreCaseWTF16/Wtf8" {
try testEqlIgnoreCase(true, "\x01 a B Λ ɐ", "\x01 A b λ Ɐ");
// does not do case-insensitive comparison for codepoints >= U+10000
try testEqlIgnoreCase(false, "𐓏", "𐓷");
@@ -2117,20 +2118,32 @@ pub fn normalizePath(comptime T: type, path: []T) RemoveDotDirsError!usize {
return prefix_len + try removeDotDirsSanitized(T, path[prefix_len..new_len]);
}
 
pub const Wtf8ToPrefixedFileWError = error{InvalidWtf8} || Wtf16ToPrefixedFileWError;
 
/// Same as `sliceToPrefixedFileW` but accepts a pointer
/// to a null-terminated path.
pub fn cStrToPrefixedFileW(dir: ?HANDLE, s: [*:0]const u8) !PathSpace {
/// to a null-terminated WTF-8 encoded path.
/// https://simonsapin.github.io/wtf-8/
pub fn cStrToPrefixedFileW(dir: ?HANDLE, s: [*:0]const u8) Wtf8ToPrefixedFileWError!PathSpace {
return sliceToPrefixedFileW(dir, mem.sliceTo(s, 0));
}
 
/// Same as `wToPrefixedFileW` but accepts a UTF-8 encoded path.
pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) !PathSpace {
/// Same as `wToPrefixedFileW` but accepts a WTF-8 encoded path.
/// https://simonsapin.github.io/wtf-8/
pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) Wtf8ToPrefixedFileWError!PathSpace {
var temp_path: PathSpace = undefined;
temp_path.len = try std.unicode.utf8ToUtf16Le(&temp_path.data, path);
temp_path.len = try std.unicode.wtf8ToWtf16Le(&temp_path.data, path);
temp_path.data[temp_path.len] = 0;
return wToPrefixedFileW(dir, temp_path.span());
}
 
pub const Wtf16ToPrefixedFileWError = error{
AccessDenied,
BadPathName,
FileNotFound,
NameTooLong,
Unexpected,
};
 
/// Converts the `path` to WTF16, null-terminated. If the path contains any
/// namespace prefix, or is anything but a relative path (rooted, drive relative,
/// etc) the result will have the NT-style prefix `\??\`.
@@ -2142,7 +2155,7 @@ pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) !PathSpace {
/// is non-null, or the CWD if it is null.
/// - Special case device names like COM1, NUL, etc are not handled specially (TODO)
/// - . and space are not stripped from the end of relative paths (potential TODO)
pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) !PathSpace {
pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) Wtf16ToPrefixedFileWError!PathSpace {
const nt_prefix = [_]u16{ '\\', '?', '?', '\\' };
switch (getNamespacePrefix(u16, path)) {
// TODO: Figure out a way to design an API that can avoid the copy for .nt,
@@ -2312,7 +2325,7 @@ pub const NamespacePrefix = enum {
nt,
};
 
/// If `T` is `u16`, then `path` should be encoded as UTF-16LE.
/// If `T` is `u16`, then `path` should be encoded as WTF-16LE.
pub fn getNamespacePrefix(comptime T: type, path: []const T) NamespacePrefix {
if (path.len < 4) return .none;
var all_backslash = switch (mem.littleToNative(T, path[0])) {
@@ -2366,7 +2379,7 @@ pub const UnprefixedPathType = enum {
 
/// Get the path type of a path that is known to not have any namespace prefixes
/// (`\\?\`, `\\.\`, `\??\`).
/// If `T` is `u16`, then `path` should be encoded as UTF-16LE.
/// If `T` is `u16`, then `path` should be encoded as WTF-16LE.
pub fn getUnprefixedPathType(comptime T: type, path: []const T) UnprefixedPathType {
if (path.len < 1) return .relative;
 
@@ -2420,7 +2433,7 @@ test getUnprefixedPathType {
/// Functionality is based on the ReactOS test cases found here:
/// https://github.com/reactos/reactos/blob/master/modules/rostests/apitests/ntdll/RtlNtPathNameToDosPathName.c
///
/// `path` should be encoded as UTF-16LE.
/// `path` should be encoded as WTF-16LE.
pub fn ntToWin32Namespace(path: []const u16) !PathSpace {
if (path.len > PATH_MAX_WIDE) return error.NameTooLong;
 
@@ -2530,7 +2543,6 @@ pub fn unexpectedError(err: Win32Error) std.os.UnexpectedError {
if (std.os.unexpected_error_tracing) {
// 614 is the length of the longest windows error description
var buf_wstr: [614]WCHAR = undefined;
var buf_utf8: [614]u8 = undefined;
const len = kernel32.FormatMessageW(
FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,
null,
@@ -2540,8 +2552,10 @@ pub fn unexpectedError(err: Win32Error) std.os.UnexpectedError {
buf_wstr.len,
null,
);
_ = std.unicode.utf16leToUtf8(&buf_utf8, buf_wstr[0..len]) catch unreachable;
std.debug.print("error.Unexpected: GetLastError({}): {s}\n", .{ @intFromEnum(err), buf_utf8[0..len] });
std.debug.print("error.Unexpected: GetLastError({}): {}\n", .{
@intFromEnum(err),
std.unicode.fmtUtf16Le(buf_wstr[0..len]),
});
std.debug.dumpCurrentStackTrace(@returnAddress());
}
return error.Unexpected;
 
lib/std/os/windows/test.zig added: 1932, removed: 517, total 1415
@@ -30,7 +30,7 @@ fn testToPrefixedFileNoOracle(comptime path: []const u8, comptime expected_path:
const expected_path_utf16 = std.unicode.utf8ToUtf16LeStringLiteral(expected_path);
const actual_path = try windows.wToPrefixedFileW(null, path_utf16);
std.testing.expectEqualSlices(u16, expected_path_utf16, actual_path.span()) catch |e| {
std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16le(actual_path.span()), std.unicode.fmtUtf16le(expected_path_utf16) });
std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16Le(actual_path.span()), std.unicode.fmtUtf16le(expected_path_utf16) });
return e;
};
}
@@ -48,7 +48,7 @@ fn testToPrefixedFileOnlyOracle(comptime path: []const u8) !void {
const zig_result = try windows.wToPrefixedFileW(null, path_utf16);
const win32_api_result = try RtlDosPathNameToNtPathName_U(path_utf16);
std.testing.expectEqualSlices(u16, win32_api_result.span(), zig_result.span()) catch |e| {
std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16le(zig_result.span()), std.unicode.fmtUtf16le(win32_api_result.span()) });
std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16Le(zig_result.span()), std.unicode.fmtUtf16le(win32_api_result.span()) });
return e;
};
}
 
lib/std/process.zig added: 1932, removed: 517, total 1415
@@ -16,11 +16,15 @@ pub const changeCurDir = os.chdir;
pub const changeCurDirC = os.chdirC;
 
/// The result is a slice of `out_buffer`, from index `0`.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn getCwd(out_buffer: []u8) ![]u8 {
return os.getcwd(out_buffer);
}
 
/// Caller must free the returned memory.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn getCwdAlloc(allocator: Allocator) ![]u8 {
// The use of MAX_PATH_BYTES here is just a heuristic: most paths will fit
// in stack_buf, avoiding an extra allocation in the common case.
@@ -76,7 +80,7 @@ pub const EnvMap = struct {
_ = self;
if (builtin.os.tag == .windows) {
var h = std.hash.Wyhash.init(0);
var it = std.unicode.Utf8View.initUnchecked(s).iterator();
var it = std.unicode.Wtf8View.initUnchecked(s).iterator();
while (it.nextCodepoint()) |cp| {
const cp_upper = upcase(cp);
h.update(&[_]u8{
@@ -93,8 +97,8 @@ pub const EnvMap = struct {
pub fn eql(self: @This(), a: []const u8, b: []const u8) bool {
_ = self;
if (builtin.os.tag == .windows) {
var it_a = std.unicode.Utf8View.initUnchecked(a).iterator();
var it_b = std.unicode.Utf8View.initUnchecked(b).iterator();
var it_a = std.unicode.Wtf8View.initUnchecked(a).iterator();
var it_b = std.unicode.Wtf8View.initUnchecked(b).iterator();
while (true) {
const c_a = it_a.nextCodepoint() orelse break;
const c_b = it_b.nextCodepoint() orelse return false;
@@ -129,8 +133,9 @@ pub const EnvMap = struct {
/// Same as `put` but the key and value become owned by the EnvMap rather
/// than being copied.
/// If `putMove` fails, the ownership of key and value does not transfer.
/// On Windows `key` must be a valid UTF-8 string.
/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn putMove(self: *EnvMap, key: []u8, value: []u8) !void {
assert(std.unicode.wtf8ValidateSlice(key));
const get_or_put = try self.hash_map.getOrPut(key);
if (get_or_put.found_existing) {
self.free(get_or_put.key_ptr.*);
@@ -141,8 +146,9 @@ pub const EnvMap = struct {
}
 
/// `key` and `value` are copied into the EnvMap.
/// On Windows `key` must be a valid UTF-8 string.
/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn put(self: *EnvMap, key: []const u8, value: []const u8) !void {
assert(std.unicode.wtf8ValidateSlice(key));
const value_copy = try self.copy(value);
errdefer self.free(value_copy);
const get_or_put = try self.hash_map.getOrPut(key);
@@ -159,23 +165,26 @@ pub const EnvMap = struct {
 
/// Find the address of the value associated with a key.
/// The returned pointer is invalidated if the map resizes.
/// On Windows `key` must be a valid UTF-8 string.
/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn getPtr(self: EnvMap, key: []const u8) ?*[]const u8 {
assert(std.unicode.wtf8ValidateSlice(key));
return self.hash_map.getPtr(key);
}
 
/// Return the map's copy of the value associated with
/// a key. The returned string is invalidated if this
/// key is removed from the map.
/// On Windows `key` must be a valid UTF-8 string.
/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn get(self: EnvMap, key: []const u8) ?[]const u8 {
assert(std.unicode.wtf8ValidateSlice(key));
return self.hash_map.get(key);
}
 
/// Removes the item from the map and frees its value.
/// This invalidates the value returned by get() for this key.
/// On Windows `key` must be a valid UTF-8 string.
/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.
pub fn remove(self: *EnvMap, key: []const u8) void {
assert(std.unicode.wtf8ValidateSlice(key));
const kv = self.hash_map.fetchRemove(key) orelse return;
self.free(kv.key);
self.free(kv.value);
@@ -239,18 +248,34 @@ test "EnvMap" {
 
try testing.expectEqual(@as(EnvMap.Size, 1), env.count());
 
// test Unicode case-insensitivity on Windows
if (builtin.os.tag == .windows) {
// test Unicode case-insensitivity on Windows
try env.put("КИРиллИЦА", "something else");
try testing.expectEqualStrings("something else", env.get("кириллица").?);
 
// and WTF-8 that's not valid UTF-8
const wtf8_with_surrogate_pair = try std.unicode.wtf16LeToWtf8Alloc(testing.allocator, &[_]u16{
std.mem.nativeToLittle(u16, 0xD83D), // unpaired high surrogate
});
defer testing.allocator.free(wtf8_with_surrogate_pair);
 
try env.put(wtf8_with_surrogate_pair, wtf8_with_surrogate_pair);
try testing.expectEqualSlices(u8, wtf8_with_surrogate_pair, env.get(wtf8_with_surrogate_pair).?);
}
}
 
pub const GetEnvMapError = error{
OutOfMemory,
/// WASI-only. `environ_sizes_get` or `environ_get`
/// failed for an unexpected reason.
Unexpected,
};
 
/// Returns a snapshot of the environment variables of the current process.
/// Any modifications to the resulting EnvMap will not be reflected in the environment, and
/// likewise, any future modifications to the environment will not be reflected in the EnvMap.
/// Caller owns resulting `EnvMap` and should call its `deinit` fn when done.
pub fn getEnvMap(allocator: Allocator) !EnvMap {
pub fn getEnvMap(allocator: Allocator) GetEnvMapError!EnvMap {
var result = EnvMap.init(allocator);
errdefer result.deinit();
 
@@ -269,7 +294,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap {
 
while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {}
const key_w = ptr[key_start..i];
const key = try std.unicode.utf16leToUtf8Alloc(allocator, key_w);
const key = try std.unicode.wtf16LeToWtf8Alloc(allocator, key_w);
errdefer allocator.free(key);
 
if (ptr[i] == '=') i += 1;
@@ -277,7 +302,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap {
const value_start = i;
while (ptr[i] != 0) : (i += 1) {}
const value_w = ptr[value_start..i];
const value = try std.unicode.utf16leToUtf8Alloc(allocator, value_w);
const value = try std.unicode.wtf16LeToWtf8Alloc(allocator, value_w);
errdefer allocator.free(value);
 
i += 1; // skip over null byte
@@ -355,25 +380,28 @@ pub const GetEnvVarOwnedError = error{
OutOfMemory,
EnvironmentVariableNotFound,
 
/// See https://github.com/ziglang/zig/issues/1774
InvalidUtf8,
/// On Windows, environment variable keys provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
};
 
/// Caller must free returned memory.
/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/),
/// then `error.InvalidWtf8` is returned.
/// On Windows, the value is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the value is an opaque sequence of bytes with no particular encoding.
pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError![]u8 {
if (builtin.os.tag == .windows) {
const result_w = blk: {
const key_w = try std.unicode.utf8ToUtf16LeWithNull(allocator, key);
defer allocator.free(key_w);
var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator);
const stack_allocator = stack_alloc.get();
const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key);
defer stack_allocator.free(key_w);
 
break :blk std.os.getenvW(key_w) orelse return error.EnvironmentVariableNotFound;
};
return std.unicode.utf16leToUtf8Alloc(allocator, result_w) catch |err| switch (err) {
error.DanglingSurrogateHalf => return error.InvalidUtf8,
error.ExpectedSecondSurrogateHalf => return error.InvalidUtf8,
error.UnexpectedSecondSurrogateHalf => return error.InvalidUtf8,
else => |e| return e,
};
// wtf16LeToWtf8Alloc can only fail with OutOfMemory
return std.unicode.wtf16LeToWtf8Alloc(allocator, result_w);
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
var envmap = getEnvMap(allocator) catch return error.OutOfMemory;
defer envmap.deinit();
@@ -385,6 +413,7 @@ pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError
}
}
 
/// On Windows, `key` must be valid UTF-8.
pub fn hasEnvVarConstant(comptime key: []const u8) bool {
if (builtin.os.tag == .windows) {
const key_w = comptime std.unicode.utf8ToUtf16LeStringLiteral(key);
@@ -396,11 +425,22 @@ pub fn hasEnvVarConstant(comptime key: []const u8) bool {
}
}
 
pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool {
pub const HasEnvVarError = error{
OutOfMemory,
 
/// On Windows, environment variable keys provided by the user must be valid WTF-8.
/// https://simonsapin.github.io/wtf-8/
InvalidWtf8,
};
 
/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/),
/// then `error.InvalidWtf8` is returned.
pub fn hasEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool {
if (builtin.os.tag == .windows) {
var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator);
const key_w = try std.unicode.utf8ToUtf16LeWithNull(stack_alloc.get(), key);
defer stack_alloc.allocator.free(key_w);
const stack_allocator = stack_alloc.get();
const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key);
defer stack_allocator.free(key_w);
return std.os.getenvW(key_w) != null;
} else if (builtin.os.tag == .wasi and !builtin.link_libc) {
var envmap = getEnvMap(allocator) catch return error.OutOfMemory;
@@ -411,9 +451,22 @@ pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool
}
}
 
test "os.getEnvVarOwned" {
const ga = std.testing.allocator;
try testing.expectError(error.EnvironmentVariableNotFound, getEnvVarOwned(ga, "BADENV"));
test getEnvVarOwned {
try testing.expectError(
error.EnvironmentVariableNotFound,
getEnvVarOwned(std.testing.allocator, "BADENV"),
);
}
 
test hasEnvVarConstant {
if (builtin.os.tag == .wasi and !builtin.link_libc) return error.SkipZigTest;
 
try testing.expect(!hasEnvVarConstant("BADENV"));
}
 
test hasEnvVar {
const has_env = try hasEnvVar(std.testing.allocator, "BADENV");
try testing.expect(!has_env);
}
 
pub const ArgIteratorPosix = struct {
@@ -531,6 +584,7 @@ pub const ArgIteratorWasi = struct {
pub const ArgIteratorWindows = struct {
allocator: Allocator,
/// Owned by the iterator.
/// Encoded as WTF-8.
cmd_line: []const u8,
index: usize = 0,
/// Owned by the iterator. Long enough to hold the entire `cmd_line` plus a null terminator.
@@ -538,20 +592,14 @@ pub const ArgIteratorWindows = struct {
start: usize = 0,
end: usize = 0,
 
pub const InitError = error{ OutOfMemory, InvalidCmdLine };
pub const InitError = error{OutOfMemory};
 
/// `cmd_line_w` *must* be an UTF16-LE-encoded string.
/// `cmd_line_w` *must* be a WTF16-LE-encoded string.
///
/// The iterator makes a copy of `cmd_line_w` converted UTF-8 and keeps it; it does *not* take
/// The iterator makes a copy of `cmd_line_w` converted WTF-8 and keeps it; it does *not* take
/// ownership of `cmd_line_w`.
pub fn init(allocator: Allocator, cmd_line_w: [*:0]const u16) InitError!ArgIteratorWindows {
const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0)) catch |err| switch (err) {
error.DanglingSurrogateHalf,
error.ExpectedSecondSurrogateHalf,
error.UnexpectedSecondSurrogateHalf,
=> return error.InvalidCmdLine,
error.OutOfMemory => return error.OutOfMemory,
};
const cmd_line = try std.unicode.wtf16LeToWtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0));
errdefer allocator.free(cmd_line);
 
const buffer = try allocator.alloc(u8, cmd_line.len + 1);
@@ -566,6 +614,7 @@ pub const ArgIteratorWindows = struct {
 
/// Returns the next argument and advances the iterator. Returns `null` if at the end of the
/// command-line string. The iterator owns the returned slice.
/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
pub fn next(self: *ArgIteratorWindows) ?[:0]const u8 {
return self.nextWithStrategy(next_strategy);
}
@@ -777,7 +826,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type {
pub const Self = @This();
 
pub const InitError = error{OutOfMemory};
pub const InitUtf16leError = error{ OutOfMemory, InvalidCmdLine };
 
/// cmd_line_utf8 MUST remain valid and constant while using this instance
pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {
@@ -805,30 +853,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type {
};
}
 
/// cmd_line_utf16le MUST be encoded UTF16-LE, and is converted to UTF-8 in an internal buffer
pub fn initUtf16le(allocator: Allocator, cmd_line_utf16le: [*:0]const u16) InitUtf16leError!Self {
const utf16le_slice = mem.sliceTo(cmd_line_utf16le, 0);
const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, utf16le_slice) catch |err| switch (err) {
error.ExpectedSecondSurrogateHalf,
error.DanglingSurrogateHalf,
error.UnexpectedSecondSurrogateHalf,
=> return error.InvalidCmdLine,
 
error.OutOfMemory => return error.OutOfMemory,
};
errdefer allocator.free(cmd_line);
 
const buffer = try allocator.alloc(u8, cmd_line.len + 1);
errdefer allocator.free(buffer);
 
return Self{
.allocator = allocator,
.cmd_line = cmd_line,
.free_cmd_line_on_deinit = true,
.buffer = buffer,
};
}
 
// Skips over whitespace in the cmd_line.
// Returns false if the terminating sentinel is reached, true otherwise.
// Also skips over comments (if supported).
@@ -1021,6 +1045,8 @@ pub const ArgIterator = struct {
 
/// Get the next argument. Returns 'null' if we are at the end.
/// Returned slice is pointing to the iterator's internal buffer.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn next(self: *ArgIterator) ?([:0]const u8) {
return self.inner.next();
}
@@ -1057,6 +1083,8 @@ pub fn argsWithAllocator(allocator: Allocator) ArgIterator.InitError!ArgIterator
}
 
/// Caller must call argsFree on result.
/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).
/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.
pub fn argsAlloc(allocator: Allocator) ![][:0]u8 {
// TODO refactor to only make 1 allocation.
var it = try argsWithAllocator(allocator);
@@ -1201,7 +1229,7 @@ test "ArgIteratorWindows" {
}
 
fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {
const cmd_line_w = try std.unicode.utf8ToUtf16LeWithNull(testing.allocator, cmd_line);
const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line);
defer testing.allocator.free(cmd_line_w);
 
// next
 
lib/std/unicode.zig added: 1932, removed: 517, total 1415
@@ -39,7 +39,16 @@ pub fn utf8ByteSequenceLength(first_byte: u8) !u3 {
/// out: the out buffer to write to. Must have a len >= utf8CodepointSequenceLength(c).
/// Errors: if c cannot be encoded in UTF-8.
/// Returns: the number of bytes written to out.
pub fn utf8Encode(c: u21, out: []u8) !u3 {
pub fn utf8Encode(c: u21, out: []u8) error{ Utf8CannotEncodeSurrogateHalf, CodepointTooLarge }!u3 {
return utf8EncodeImpl(c, out, .cannot_encode_surrogate_half);
}
 
const Surrogates = enum {
cannot_encode_surrogate_half,
can_encode_surrogate_half,
};
 
fn utf8EncodeImpl(c: u21, out: []u8, comptime surrogates: Surrogates) !u3 {
const length = try utf8CodepointSequenceLength(c);
assert(out.len >= length);
switch (length) {
@@ -53,7 +62,9 @@ pub fn utf8Encode(c: u21, out: []u8) !u3 {
out[1] = @as(u8, @intCast(0b10000000 | (c & 0b111111)));
},
3 => {
if (0xd800 <= c and c <= 0xdfff) return error.Utf8CannotEncodeSurrogateHalf;
if (surrogates == .cannot_encode_surrogate_half and isSurrogateCodepoint(c)) {
return error.Utf8CannotEncodeSurrogateHalf;
}
out[0] = @as(u8, @intCast(0b11100000 | (c >> 12)));
out[1] = @as(u8, @intCast(0b10000000 | ((c >> 6) & 0b111111)));
out[2] = @as(u8, @intCast(0b10000000 | (c & 0b111111)));
@@ -116,12 +127,22 @@ pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u21 {
return value;
}
 
const Utf8Decode3Error = error{
Utf8ExpectedContinuation,
Utf8OverlongEncoding,
const Utf8Decode3Error = Utf8Decode3AllowSurrogateHalfError || error{
Utf8EncodesSurrogateHalf,
};
pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u21 {
const value = try utf8Decode3AllowSurrogateHalf(bytes);
 
if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
 
return value;
}
 
const Utf8Decode3AllowSurrogateHalfError = error{
Utf8ExpectedContinuation,
Utf8OverlongEncoding,
};
pub fn utf8Decode3AllowSurrogateHalf(bytes: []const u8) Utf8Decode3AllowSurrogateHalfError!u21 {
assert(bytes.len == 3);
assert(bytes[0] & 0b11110000 == 0b11100000);
var value: u21 = bytes[0] & 0b00001111;
@@ -135,7 +156,6 @@ pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u21 {
value |= bytes[2] & 0b00111111;
 
if (value < 0x800) return error.Utf8OverlongEncoding;
if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;
 
return value;
}
@@ -213,6 +233,10 @@ pub fn utf8CountCodepoints(s: []const u8) !usize {
 
/// Returns true if the input consists entirely of UTF-8 codepoints
pub fn utf8ValidateSlice(input: []const u8) bool {
return utf8ValidateSliceImpl(input, .cannot_encode_surrogate_half);
}
 
fn utf8ValidateSliceImpl(input: []const u8, comptime surrogates: Surrogates) bool {
var remaining = input;
 
const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;
@@ -240,9 +264,15 @@ pub fn utf8ValidateSlice(input: []const u8) bool {
const xx = 0xF1; // invalid: size 1
const as = 0xF0; // ASCII: size 1
const s1 = 0x02; // accept 0, size 2
const s2 = 0x13; // accept 1, size 3
const s2 = switch (surrogates) {
.cannot_encode_surrogate_half => 0x13, // accept 1, size 3
.can_encode_surrogate_half => 0x03, // accept 0, size 3
};
const s3 = 0x03; // accept 0, size 3
const s4 = 0x23; // accept 2, size 3
const s4 = switch (surrogates) {
.cannot_encode_surrogate_half => 0x23, // accept 2, size 3
.can_encode_surrogate_half => 0x03, // accept 0, size 3
};
const s5 = 0x34; // accept 3, size 4
const s6 = 0x04; // accept 0, size 4
const s7 = 0x44; // accept 4, size 4
@@ -458,7 +488,9 @@ pub const Utf16LeIterator = struct {
};
}
 
pub fn nextCodepoint(it: *Utf16LeIterator) !?u21 {
pub const NextCodepointError = error{ DanglingSurrogateHalf, ExpectedSecondSurrogateHalf, UnexpectedSecondSurrogateHalf };
 
pub fn nextCodepoint(it: *Utf16LeIterator) NextCodepointError!?u21 {
assert(it.i <= it.bytes.len);
if (it.i == it.bytes.len) return null;
var code_units: [2]u16 = undefined;
@@ -770,94 +802,230 @@ fn testDecode(bytes: []const u8) !u21 {
return utf8Decode(bytes);
}
 
/// Print the given `utf8` string, encoded as UTF-8 bytes.
/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
fn formatUtf8(
utf8: []const u8,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
writer: anytype,
) !void {
_ = fmt;
_ = options;
var buf: [300]u8 = undefined; // just an arbitrary size
var u8len: usize = 0;
 
// This implementation is based on this specification:
// https://encoding.spec.whatwg.org/#utf-8-decoder
var codepoint: u21 = 0;
var cont_bytes_seen: u3 = 0;
var cont_bytes_needed: u3 = 0;
var lower_boundary: u8 = 0x80;
var upper_boundary: u8 = 0xBF;
 
var i: usize = 0;
while (i < utf8.len) {
const byte = utf8[i];
if (cont_bytes_needed == 0) {
switch (byte) {
0x00...0x7F => {
buf[u8len] = byte;
u8len += 1;
},
0xC2...0xDF => {
cont_bytes_needed = 1;
codepoint = byte & 0b00011111;
},
0xE0...0xEF => {
if (byte == 0xE0) lower_boundary = 0xA0;
if (byte == 0xED) upper_boundary = 0x9F;
cont_bytes_needed = 2;
codepoint = byte & 0b00001111;
},
0xF0...0xF4 => {
if (byte == 0xF0) lower_boundary = 0x90;
if (byte == 0xF4) upper_boundary = 0x8F;
cont_bytes_needed = 3;
codepoint = byte & 0b00000111;
},
else => {
u8len += utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
},
}
// consume the byte
i += 1;
} else if (byte < lower_boundary or byte > upper_boundary) {
codepoint = 0;
cont_bytes_needed = 0;
cont_bytes_seen = 0;
lower_boundary = 0x80;
upper_boundary = 0xBF;
u8len += utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
// do not consume the current byte, it should now be treated as a possible start byte
} else {
lower_boundary = 0x80;
upper_boundary = 0xBF;
codepoint <<= 6;
codepoint |= byte & 0b00111111;
cont_bytes_seen += 1;
// consume the byte
i += 1;
 
if (cont_bytes_seen == cont_bytes_needed) {
const codepoint_len = cont_bytes_seen + 1;
const codepoint_start_i = i - codepoint_len;
@memcpy(buf[u8len..][0..codepoint_len], utf8[codepoint_start_i..][0..codepoint_len]);
u8len += codepoint_len;
 
codepoint = 0;
cont_bytes_needed = 0;
cont_bytes_seen = 0;
}
}
// make sure there's always enough room for another maximum length UTF-8 codepoint
if (u8len + 4 > buf.len) {
try writer.writeAll(buf[0..u8len]);
u8len = 0;
}
}
if (cont_bytes_needed != 0) {
// we know there's enough room because we always flush
// if there's less than 4 bytes remaining in the buffer.
u8len += utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
}
try writer.writeAll(buf[0..u8len]);
}
 
/// Return a Formatter for a (potentially ill-formed) UTF-8 string.
/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)
/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of
/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder
pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8) {
return .{ .data = utf8 };
}
 
test "fmtUtf8" {
const expectFmt = testing.expectFmt;
try expectFmt("", "{}", .{fmtUtf8("")});
try expectFmt("foo", "{}", .{fmtUtf8("foo")});
try expectFmt("𐐷", "{}", .{fmtUtf8("𐐷")});
 
// Table 3-8. U+FFFD for Non-Shortest Form Sequences
try expectFmt("��������A", "{}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")});
 
// Table 3-9. U+FFFD for Ill-Formed Sequences for Surrogates
try expectFmt("��������A", "{}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")});
 
// Table 3-10. U+FFFD for Other Ill-Formed Sequences
try expectFmt("�����A��B", "{}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")});
 
// Table 3-11. U+FFFD for Truncated Sequences
try expectFmt("����A", "{}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")});
}
 
fn utf16LeToUtf8ArrayListImpl(
array_list: *std.ArrayList(u8),
utf16le: []const u16,
comptime surrogates: Surrogates,
) (switch (surrogates) {
.cannot_encode_surrogate_half => Utf16LeToUtf8AllocError,
.can_encode_surrogate_half => mem.Allocator.Error,
})!void {
// optimistically guess that it will all be ascii.
try array_list.ensureTotalCapacityPrecise(utf16le.len);
 
var remaining = utf16le;
if (builtin.zig_backend != .stage2_x86_64) {
const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
const Chunk = @Vector(chunk_len, u16);
 
// Fast path. Check for and encode ASCII characters at the start of the input.
while (remaining.len >= chunk_len) {
const chunk: Chunk = remaining[0..chunk_len].*;
const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));
if (@reduce(.Or, chunk | mask != mask)) {
// found a non ASCII code unit
break;
}
const chunk_byte_len = chunk_len * 2;
const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;
const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);
const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];
// We allocated enough space to encode every UTF-16 code unit
// as ASCII, so if the entire string is ASCII then we are
// guaranteed to have enough space allocated
array_list.appendSliceAssumeCapacity(&ascii_bytes);
remaining = remaining[chunk_len..];
}
}
 
var out_index: usize = array_list.items.len;
switch (surrogates) {
.cannot_encode_surrogate_half => {
var it = Utf16LeIterator.init(remaining);
while (try it.nextCodepoint()) |codepoint| {
const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
try array_list.resize(array_list.items.len + utf8_len);
assert((utf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len);
out_index += utf8_len;
}
},
.can_encode_surrogate_half => {
var it = Wtf16LeIterator.init(remaining);
while (it.nextCodepoint()) |codepoint| {
const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
try array_list.resize(array_list.items.len + utf8_len);
assert((wtf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len);
out_index += utf8_len;
}
},
}
}
 
pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error;
 
pub fn utf16LeToUtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {
return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .cannot_encode_surrogate_half);
}
 
/// Deprecated; renamed to utf16LeToUtf8Alloc
pub const utf16leToUtf8Alloc = utf16LeToUtf8Alloc;
 
/// Caller must free returned memory.
pub fn utf16leToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8 {
pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![]u8 {
// optimistically guess that it will all be ascii.
var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);
errdefer result.deinit();
 
var remaining = utf16le;
if (builtin.zig_backend != .stage2_x86_64) {
const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
const Chunk = @Vector(chunk_len, u16);
 
// Fast path. Check for and encode ASCII characters at the start of the input.
while (remaining.len >= chunk_len) {
const chunk: Chunk = remaining[0..chunk_len].*;
const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));
if (@reduce(.Or, chunk | mask != mask)) {
// found a non ASCII code unit
break;
}
const chunk_byte_len = chunk_len * 2;
const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;
const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);
const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];
// We allocated enough space to encode every UTF-16 code unit
// as ASCII, so if the entire string is ASCII then we are
// guaranteed to have enough space allocated
result.appendSliceAssumeCapacity(&ascii_bytes);
remaining = remaining[chunk_len..];
}
}
 
var out_index: usize = result.items.len;
var it = Utf16LeIterator.init(remaining);
while (try it.nextCodepoint()) |codepoint| {
const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
try result.resize(result.items.len + utf8_len);
assert((utf8Encode(codepoint, result.items[out_index..]) catch unreachable) == utf8_len);
out_index += utf8_len;
}
try utf16LeToUtf8ArrayList(&result, utf16le);
 
return result.toOwnedSlice();
}
 
/// Deprecated; renamed to utf16LeToUtf8AllocZ
pub const utf16leToUtf8AllocZ = utf16LeToUtf8AllocZ;
 
/// Caller must free returned memory.
pub fn utf16leToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0]u8 {
pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![:0]u8 {
// optimistically guess that it will all be ascii (and allocate space for the null terminator)
var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1);
errdefer result.deinit();
 
var remaining = utf16le;
if (builtin.zig_backend != .stage2_x86_64) {
const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;
const Chunk = @Vector(chunk_len, u16);
try utf16LeToUtf8ArrayList(&result, utf16le);
 
// Fast path. Check for and encode ASCII characters at the start of the input.
while (remaining.len >= chunk_len) {
const chunk: Chunk = remaining[0..chunk_len].*;
const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));
if (@reduce(.Or, chunk | mask != mask)) {
// found a non ASCII code unit
break;
}
const chunk_byte_len = chunk_len * 2;
const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;
const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);
const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];
// We allocated enough space to encode every UTF-16 code unit
// as ASCII, so if the entire string is ASCII then we are
// guaranteed to have enough space allocated
result.appendSliceAssumeCapacity(&ascii_bytes);
remaining = remaining[chunk_len..];
}
}
 
var out_index = result.items.len;
var it = Utf16LeIterator.init(remaining);
while (try it.nextCodepoint()) |codepoint| {
const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;
try result.resize(result.items.len + utf8_len);
assert((utf8Encode(codepoint, result.items[out_index..]) catch unreachable) == utf8_len);
out_index += utf8_len;
}
return result.toOwnedSliceSentinel(0);
}
 
pub const Utf16LeToUtf8Error = Utf16LeIterator.NextCodepointError;
 
/// Asserts that the output buffer is big enough.
/// Returns end byte index into utf8.
pub fn utf16leToUtf8(utf8: []u8, utf16le: []const u16) !usize {
fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surrogates) (switch (surrogates) {
.cannot_encode_surrogate_half => Utf16LeToUtf8Error,
.can_encode_surrogate_half => error{},
})!usize {
var end_index: usize = 0;
 
var remaining = utf16le;
@@ -883,30 +1051,58 @@ pub fn utf16leToUtf8(utf8: []u8, utf16le: []const u16) !usize {
}
}
 
var it = Utf16LeIterator.init(remaining);
while (try it.nextCodepoint()) |codepoint| {
end_index += try utf8Encode(codepoint, utf8[end_index..]);
switch (surrogates) {
.cannot_encode_surrogate_half => {
var it = Utf16LeIterator.init(remaining);
while (try it.nextCodepoint()) |codepoint| {
end_index += utf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) {
// The maximum possible codepoint encoded by UTF-16 is U+10FFFF,
// which is within the valid codepoint range.
error.CodepointTooLarge => unreachable,
// We know the codepoint was valid in UTF-16, meaning it is not
// an unpaired surrogate codepoint.
error.Utf8CannotEncodeSurrogateHalf => unreachable,
};
}
},
.can_encode_surrogate_half => {
var it = Wtf16LeIterator.init(remaining);
while (it.nextCodepoint()) |codepoint| {
end_index += wtf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) {
// The maximum possible codepoint encoded by UTF-16 is U+10FFFF,
// which is within the valid codepoint range.
error.CodepointTooLarge => unreachable,
};
}
},
}
return end_index;
}
 
test "utf16leToUtf8" {
/// Deprecated; renamed to utf16LeToUtf8
pub const utf16leToUtf8 = utf16LeToUtf8;
 
pub fn utf16LeToUtf8(utf8: []u8, utf16le: []const u16) Utf16LeToUtf8Error!usize {
return utf16LeToUtf8Impl(utf8, utf16le, .cannot_encode_surrogate_half);
}
 
test utf16LeToUtf8 {
var utf16le: [2]u16 = undefined;
const utf16le_as_bytes = mem.sliceAsBytes(utf16le[0..]);
 
{
mem.writeInt(u16, utf16le_as_bytes[0..2], 'A', .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 'a', .little);
const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
defer std.testing.allocator.free(utf8);
const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "Aa"));
}
 
{
mem.writeInt(u16, utf16le_as_bytes[0..2], 0x80, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xffff, .little);
const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
defer std.testing.allocator.free(utf8);
const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xc2\x80" ++ "\xef\xbf\xbf"));
}
 
@@ -914,8 +1110,8 @@ test "utf16leToUtf8" {
// the values just outside the surrogate half range
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xd7ff, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xe000, .little);
const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
defer std.testing.allocator.free(utf8);
const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xed\x9f\xbf" ++ "\xee\x80\x80"));
}
 
@@ -923,8 +1119,8 @@ test "utf16leToUtf8" {
// smallest surrogate pair
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xd800, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdc00, .little);
const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
defer std.testing.allocator.free(utf8);
const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xf0\x90\x80\x80"));
}
 
@@ -932,31 +1128,30 @@ test "utf16leToUtf8" {
// largest surrogate pair
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xdbff, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdfff, .little);
const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
defer std.testing.allocator.free(utf8);
const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xbf\xbf"));
}
 
{
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xdbff, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdc00, .little);
const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
defer std.testing.allocator.free(utf8);
const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);
defer testing.allocator.free(utf8);
try testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xb0\x80"));
}
 
{
mem.writeInt(u16, utf16le_as_bytes[0..2], 0xdcdc, .little);
mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdcdc, .little);
const result = utf16leToUtf8Alloc(std.testing.allocator, &utf16le);
try std.testing.expectError(error.UnexpectedSecondSurrogateHalf, result);
const result = utf16LeToUtf8Alloc(testing.allocator, &utf16le);
try testing.expectError(error.UnexpectedSecondSurrogateHalf, result);
}
}
 
pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u16 {
fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void {
// optimistically guess that it will not require surrogate pairs
var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len + 1);
errdefer result.deinit();
try array_list.ensureTotalCapacityPrecise(utf8.len);
 
var remaining = utf8;
// Need support for std.simd.interlace
@@ -974,33 +1169,65 @@ pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u1
}
const zeroes: Chunk = @splat(0);
const utf16_chunk: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes });
result.appendSliceAssumeCapacity(std.mem.bytesAsSlice(u16, &utf16_chunk));
array_list.appendSliceAssumeCapacity(std.mem.bytesAsSlice(u16, &utf16_chunk));
remaining = remaining[chunk_len..];
}
}
 
const view = try Utf8View.init(remaining);
const view = switch (surrogates) {
.cannot_encode_surrogate_half => try Utf8View.init(remaining),
.can_encode_surrogate_half => try Wtf8View.init(remaining),
};
var it = view.iterator();
while (it.nextCodepoint()) |codepoint| {
if (codepoint < 0x10000) {
const short = @as(u16, @intCast(codepoint));
try result.append(mem.nativeToLittle(u16, short));
try array_list.append(mem.nativeToLittle(u16, short));
} else {
const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;
const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;
var out: [2]u16 = undefined;
out[0] = mem.nativeToLittle(u16, high);
out[1] = mem.nativeToLittle(u16, low);
try result.appendSlice(out[0..]);
try array_list.appendSlice(out[0..]);
}
}
}
 
pub fn utf8ToUtf16LeArrayList(array_list: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void {
return utf8ToUtf16LeArrayListImpl(array_list, utf8, .cannot_encode_surrogate_half);
}
 
pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 {
// optimistically guess that it will not require surrogate pairs
var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len);
errdefer result.deinit();
 
try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);
 
return result.toOwnedSlice();
}
 
/// Deprecated; renamed to utf8ToUtf16LeAllocZ
pub const utf8ToUtf16LeWithNull = utf8ToUtf16LeAllocZ;
 
pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![:0]u16 {
// optimistically guess that it will not require surrogate pairs
var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len + 1);
errdefer result.deinit();
 
try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);
 
return result.toOwnedSliceSentinel(0);
}
 
/// Returns index of next character. If exact fit, returned index equals output slice length.
/// Assumes there is enough space for the output.
pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) error{InvalidUtf8}!usize {
return utf8ToUtf16LeImpl(utf16le, utf8, .cannot_encode_surrogate_half);
}
 
pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: Surrogates) !usize {
var dest_i: usize = 0;
 
var remaining = utf8;
@@ -1027,9 +1254,15 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {
 
var src_i: usize = 0;
while (src_i < remaining.len) {
const n = utf8ByteSequenceLength(remaining[src_i]) catch return error.InvalidUtf8;
const n = utf8ByteSequenceLength(remaining[src_i]) catch return switch (surrogates) {
.cannot_encode_surrogate_half => error.InvalidUtf8,
.can_encode_surrogate_half => error.InvalidWtf8,
};
const next_src_i = src_i + n;
const codepoint = utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8;
const codepoint = switch (surrogates) {
.cannot_encode_surrogate_half => utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8,
.can_encode_surrogate_half => wtf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidWtf8,
};
if (codepoint < 0x10000) {
const short = @as(u16, @intCast(codepoint));
utf16le[dest_i] = mem.nativeToLittle(u16, short);
@@ -1064,21 +1297,59 @@ test "utf8ToUtf16Le" {
}
}
 
test "utf8ToUtf16LeWithNull" {
test utf8ToUtf16LeArrayList {
{
const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "𐐷");
var list = std.ArrayList(u16).init(testing.allocator);
defer list.deinit();
try utf8ToUtf16LeArrayList(&list, "𐐷");
try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(list.items));
}
{
var list = std.ArrayList(u16).init(testing.allocator);
defer list.deinit();
try utf8ToUtf16LeArrayList(&list, "\u{10FFFF}");
try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(list.items));
}
{
var list = std.ArrayList(u16).init(testing.allocator);
defer list.deinit();
const result = utf8ToUtf16LeArrayList(&list, "\xf4\x90\x80\x80");
try testing.expectError(error.InvalidUtf8, result);
}
}
 
test utf8ToUtf16LeAlloc {
{
const utf16 = try utf8ToUtf16LeAlloc(testing.allocator, "𐐷");
defer testing.allocator.free(utf16);
try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16[0..]));
}
{
const utf16 = try utf8ToUtf16LeAlloc(testing.allocator, "\u{10FFFF}");
defer testing.allocator.free(utf16);
try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..]));
}
{
const result = utf8ToUtf16LeAlloc(testing.allocator, "\xf4\x90\x80\x80");
try testing.expectError(error.InvalidUtf8, result);
}
}
 
test utf8ToUtf16LeAllocZ {
{
const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "𐐷");
defer testing.allocator.free(utf16);
try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16[0..]));
try testing.expect(utf16[2] == 0);
}
{
const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "\u{10FFFF}");
const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "\u{10FFFF}");
defer testing.allocator.free(utf16);
try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..]));
try testing.expect(utf16[2] == 0);
}
{
const result = utf8ToUtf16LeWithNull(testing.allocator, "\xf4\x90\x80\x80");
const result = utf8ToUtf16LeAllocZ(testing.allocator, "\xf4\x90\x80\x80");
try testing.expectError(error.InvalidUtf8, result);
}
}
@@ -1127,8 +1398,9 @@ test "calculate utf16 string length of given utf8 string in u16" {
try comptime testCalcUtf16LeLen();
}
 
/// Print the given `utf16le` string
fn formatUtf16le(
/// Print the given `utf16le` string, encoded as UTF-8 bytes.
/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
fn formatUtf16Le(
utf16le: []const u16,
comptime fmt: []const u8,
options: std.fmt.FormatOptions,
@@ -1136,13 +1408,14 @@ fn formatUtf16le(
) !void {
_ = fmt;
_ = options;
var buf: [300]u8 = undefined; // just a random size I chose
var buf: [300]u8 = undefined; // just an arbitrary size
var it = Utf16LeIterator.init(utf16le);
var u8len: usize = 0;
while (it.nextCodepoint() catch replacement_character) |codepoint| {
u8len += utf8Encode(codepoint, buf[u8len..]) catch
utf8Encode(replacement_character, buf[u8len..]) catch unreachable;
if (u8len + 3 >= buf.len) {
// make sure there's always enough room for another maximum length UTF-8 codepoint
if (u8len + 4 > buf.len) {
try writer.writeAll(buf[0..u8len]);
u8len = 0;
}
@@ -1150,22 +1423,27 @@ fn formatUtf16le(
try writer.writeAll(buf[0..u8len]);
}
 
/// Return a Formatter for a Utf16le string
pub fn fmtUtf16le(utf16le: []const u16) std.fmt.Formatter(formatUtf16le) {
/// Deprecated; renamed to fmtUtf16Le
pub const fmtUtf16le = fmtUtf16Le;
 
/// Return a Formatter for a (potentially ill-formed) UTF-16 LE string,
/// which will be converted to UTF-8 during formatting.
/// Unpaired surrogates are replaced by the replacement character (U+FFFD).
pub fn fmtUtf16Le(utf16le: []const u16) std.fmt.Formatter(formatUtf16Le) {
return .{ .data = utf16le };
}
 
test "fmtUtf16le" {
const expectFmt = std.testing.expectFmt;
try expectFmt("", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral(""))});
try expectFmt("foo", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("foo"))});
try expectFmt("𐐷", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("𐐷"))});
try expectFmt("퟿", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\xff\xd7", native_endian)})});
try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\x00\xd8", native_endian)})});
try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\xff\xdb", native_endian)})});
try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\x00\xdc", native_endian)})});
try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\xff\xdf", native_endian)})});
try expectFmt("", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\x00\xe0", native_endian)})});
test "fmtUtf16Le" {
const expectFmt = testing.expectFmt;
try expectFmt("", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))});
try expectFmt("foo", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))});
try expectFmt("𐐷", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("𐐷"))});
try expectFmt("퟿", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xd7", native_endian)})});
try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xd8", native_endian)})});
try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdb", native_endian)})});
try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xdc", native_endian)})});
try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdf", native_endian)})});
try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xe0", native_endian)})});
}
 
test "utf8ToUtf16LeStringLiteral" {
@@ -1248,3 +1526,535 @@ test "utf8 valid codepoint" {
try testUtf8ValidCodepoint();
try comptime testUtf8ValidCodepoint();
}
 
/// Returns true if the codepoint is a surrogate (U+DC00 to U+DFFF)
pub fn isSurrogateCodepoint(c: u21) bool {
return switch (c) {
0xD800...0xDFFF => true,
else => false,
};
}
 
/// Encodes the given codepoint into a WTF-8 byte sequence.
/// c: the codepoint.
/// out: the out buffer to write to. Must have a len >= utf8CodepointSequenceLength(c).
/// Errors: if c cannot be encoded in WTF-8.
/// Returns: the number of bytes written to out.
pub fn wtf8Encode(c: u21, out: []u8) error{CodepointTooLarge}!u3 {
return utf8EncodeImpl(c, out, .can_encode_surrogate_half);
}
 
const Wtf8DecodeError = Utf8Decode2Error || Utf8Decode3AllowSurrogateHalfError || Utf8Decode4Error;
 
pub fn wtf8Decode(bytes: []const u8) Wtf8DecodeError!u21 {
return switch (bytes.len) {
1 => @as(u21, bytes[0]),
2 => utf8Decode2(bytes),
3 => utf8Decode3AllowSurrogateHalf(bytes),
4 => utf8Decode4(bytes),
else => unreachable,
};
}
 
/// Returns true if the input consists entirely of WTF-8 codepoints
/// (all the same restrictions as UTF-8, but allows surrogate codepoints
/// U+D800 to U+DFFF).
/// Does not check for well-formed WTF-8, meaning that this function
/// does not check that all surrogate halves are unpaired.
pub fn wtf8ValidateSlice(input: []const u8) bool {
return utf8ValidateSliceImpl(input, .can_encode_surrogate_half);
}
 
test "validate WTF-8 slice" {
try testValidateWtf8Slice();
try comptime testValidateWtf8Slice();
 
// We skip a variable (based on recommended vector size) chunks of
// ASCII characters. Let's make sure we're chunking correctly.
const str = [_]u8{'a'} ** 550 ++ "\xc0";
for (0..str.len - 3) |i| {
try testing.expect(!wtf8ValidateSlice(str[i..]));
}
}
fn testValidateWtf8Slice() !void {
// These are valid/invalid under both UTF-8 and WTF-8 rules.
try testing.expect(wtf8ValidateSlice("abc"));
try testing.expect(wtf8ValidateSlice("abc\xdf\xbf"));
try testing.expect(wtf8ValidateSlice(""));
try testing.expect(wtf8ValidateSlice("a"));
try testing.expect(wtf8ValidateSlice("abc"));
try testing.expect(wtf8ValidateSlice("Ж"));
try testing.expect(wtf8ValidateSlice("ЖЖ"));
try testing.expect(wtf8ValidateSlice("брэд-ЛГТМ"));
try testing.expect(wtf8ValidateSlice("☺☻☹"));
try testing.expect(wtf8ValidateSlice("a\u{fffdb}"));
try testing.expect(wtf8ValidateSlice("\xf4\x8f\xbf\xbf"));
try testing.expect(wtf8ValidateSlice("abc\xdf\xbf"));
 
try testing.expect(!wtf8ValidateSlice("abc\xc0"));
try testing.expect(!wtf8ValidateSlice("abc\xc0abc"));
try testing.expect(!wtf8ValidateSlice("aa\xe2"));
try testing.expect(!wtf8ValidateSlice("\x42\xfa"));
try testing.expect(!wtf8ValidateSlice("\x42\xfa\x43"));
try testing.expect(!wtf8ValidateSlice("abc\xc0"));
try testing.expect(!wtf8ValidateSlice("abc\xc0abc"));
try testing.expect(!wtf8ValidateSlice("\xf4\x90\x80\x80"));
try testing.expect(!wtf8ValidateSlice("\xf7\xbf\xbf\xbf"));
try testing.expect(!wtf8ValidateSlice("\xfb\xbf\xbf\xbf\xbf"));
try testing.expect(!wtf8ValidateSlice("\xc0\x80"));
 
// But surrogate codepoints are only valid in WTF-8.
try testing.expect(wtf8ValidateSlice("\xed\xa0\x80"));
try testing.expect(wtf8ValidateSlice("\xed\xbf\xbf"));
}
 
/// Wtf8View iterates the code points of a WTF-8 encoded string,
/// including surrogate halves.
///
/// ```
/// var wtf8 = (try std.unicode.Wtf8View.init("hi there")).iterator();
/// while (wtf8.nextCodepointSlice()) |codepoint| {
/// // note: codepoint could be a surrogate half which is invalid
/// // UTF-8, avoid printing or otherwise sending/emitting this directly
/// }
/// ```
pub const Wtf8View = struct {
bytes: []const u8,
 
pub fn init(s: []const u8) error{InvalidWtf8}!Wtf8View {
if (!wtf8ValidateSlice(s)) {
return error.InvalidWtf8;
}
 
return initUnchecked(s);
}
 
pub fn initUnchecked(s: []const u8) Wtf8View {
return Wtf8View{ .bytes = s };
}
 
pub inline fn initComptime(comptime s: []const u8) Wtf8View {
return comptime if (init(s)) |r| r else |err| switch (err) {
error.InvalidWtf8 => {
@compileError("invalid wtf8");
},
};
}
 
pub fn iterator(s: Wtf8View) Wtf8Iterator {
return Wtf8Iterator{
.bytes = s.bytes,
.i = 0,
};
}
};
 
/// Asserts that `bytes` is valid WTF-8
pub const Wtf8Iterator = struct {
bytes: []const u8,
i: usize,
 
pub fn nextCodepointSlice(it: *Wtf8Iterator) ?[]const u8 {
if (it.i >= it.bytes.len) {
return null;
}
 
const cp_len = utf8ByteSequenceLength(it.bytes[it.i]) catch unreachable;
it.i += cp_len;
return it.bytes[it.i - cp_len .. it.i];
}
 
pub fn nextCodepoint(it: *Wtf8Iterator) ?u21 {
const slice = it.nextCodepointSlice() orelse return null;
return wtf8Decode(slice) catch unreachable;
}
 
/// Look ahead at the next n codepoints without advancing the iterator.
/// If fewer than n codepoints are available, then return the remainder of the string.
pub fn peek(it: *Wtf8Iterator, n: usize) []const u8 {
const original_i = it.i;
defer it.i = original_i;
 
var end_ix = original_i;
var found: usize = 0;
while (found < n) : (found += 1) {
const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..];
end_ix += next_codepoint.len;
}
 
return it.bytes[original_i..end_ix];
}
};
 
pub fn wtf16LeToWtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void {
return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .can_encode_surrogate_half);
}
 
/// Caller must free returned memory.
pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![]u8 {
// optimistically guess that it will all be ascii.
var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len);
errdefer result.deinit();
 
try wtf16LeToWtf8ArrayList(&result, wtf16le);
 
return result.toOwnedSlice();
}
 
/// Caller must free returned memory.
pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![:0]u8 {
// optimistically guess that it will all be ascii (and allocate space for the null terminator)
var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len + 1);
errdefer result.deinit();
 
try wtf16LeToWtf8ArrayList(&result, wtf16le);
 
return result.toOwnedSliceSentinel(0);
}
 
pub fn wtf16LeToWtf8(wtf8: []u8, wtf16le: []const u16) usize {
return utf16LeToUtf8Impl(wtf8, wtf16le, .can_encode_surrogate_half) catch |err| switch (err) {};
}
 
pub fn wtf8ToWtf16LeArrayList(array_list: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void {
return utf8ToUtf16LeArrayListImpl(array_list, wtf8, .can_encode_surrogate_half);
}
 
pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u16 {
// optimistically guess that it will not require surrogate pairs
var result = try std.ArrayList(u16).initCapacity(allocator, wtf8.len);
errdefer result.deinit();
 
try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half);
 
return result.toOwnedSlice();
}
 
pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u16 {
// optimistically guess that it will not require surrogate pairs
var result = try std.ArrayList(u16).initCapacity(allocator, wtf8.len + 1);
errdefer result.deinit();
 
try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half);
 
return result.toOwnedSliceSentinel(0);
}
 
/// Returns index of next character. If exact fit, returned index equals output slice length.
/// Assumes there is enough space for the output.
pub fn wtf8ToWtf16Le(wtf16le: []u16, wtf8: []const u8) error{InvalidWtf8}!usize {
return utf8ToUtf16LeImpl(wtf16le, wtf8, .can_encode_surrogate_half);
}
 
/// Surrogate codepoints (U+D800 to U+DFFF) are replaced by the Unicode replacement
/// character (U+FFFD).
/// All surrogate codepoints and the replacement character are encoded as three
/// bytes, meaning the input and output slices will always be the same length.
/// In-place conversion is supported when `utf8` and `wtf8` refer to the same slice.
/// Note: If `wtf8` is entirely composed of well-formed UTF-8, then no conversion is necessary.
/// `utf8ValidateSlice` can be used to check if lossy conversion is worthwhile.
/// If `wtf8` is not valid WTF-8, then `error.InvalidWtf8` is returned.
pub fn wtf8ToUtf8Lossy(utf8: []u8, wtf8: []const u8) error{InvalidWtf8}!void {
assert(utf8.len >= wtf8.len);
 
const in_place = utf8.ptr == wtf8.ptr;
const replacement_char_bytes = comptime blk: {
var buf: [3]u8 = undefined;
assert((utf8Encode(replacement_character, &buf) catch unreachable) == 3);
break :blk buf;
};
 
var dest_i: usize = 0;
const view = try Wtf8View.init(wtf8);
var it = view.iterator();
while (it.nextCodepointSlice()) |codepoint_slice| {
// All surrogate codepoints are encoded as 3 bytes
if (codepoint_slice.len == 3) {
const codepoint = wtf8Decode(codepoint_slice) catch unreachable;
if (isSurrogateCodepoint(codepoint)) {
@memcpy(utf8[dest_i..][0..replacement_char_bytes.len], &replacement_char_bytes);
dest_i += replacement_char_bytes.len;
continue;
}
}
if (!in_place) {
@memcpy(utf8[dest_i..][0..codepoint_slice.len], codepoint_slice);
}
dest_i += codepoint_slice.len;
}
}
 
pub fn wtf8ToUtf8LossyAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u8 {
const utf8 = try allocator.alloc(u8, wtf8.len);
errdefer allocator.free(utf8);
 
try wtf8ToUtf8Lossy(utf8, wtf8);
 
return utf8;
}
 
pub fn wtf8ToUtf8LossyAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u8 {
const utf8 = try allocator.allocSentinel(u8, wtf8.len, 0);
errdefer allocator.free(utf8);
 
try wtf8ToUtf8Lossy(utf8, wtf8);
 
return utf8;
}
 
test wtf8ToUtf8Lossy {
var buf: [32]u8 = undefined;
 
const invalid_utf8 = "\xff";
try testing.expectError(error.InvalidWtf8, wtf8ToUtf8Lossy(&buf, invalid_utf8));
 
const ascii = "abcd";
try wtf8ToUtf8Lossy(&buf, ascii);
try testing.expectEqualStrings("abcd", buf[0..ascii.len]);
 
const high_surrogate_half = "ab\xed\xa0\xbdcd";
try wtf8ToUtf8Lossy(&buf, high_surrogate_half);
try testing.expectEqualStrings("ab\u{FFFD}cd", buf[0..high_surrogate_half.len]);
 
const low_surrogate_half = "ab\xed\xb2\xa9cd";
try wtf8ToUtf8Lossy(&buf, low_surrogate_half);
try testing.expectEqualStrings("ab\u{FFFD}cd", buf[0..low_surrogate_half.len]);
 
// If the WTF-8 is not well-formed, each surrogate half is converted into a separate
// replacement character instead of being interpreted as a surrogate pair.
const encoded_surrogate_pair = "ab\xed\xa0\xbd\xed\xb2\xa9cd";
try wtf8ToUtf8Lossy(&buf, encoded_surrogate_pair);
try testing.expectEqualStrings("ab\u{FFFD}\u{FFFD}cd", buf[0..encoded_surrogate_pair.len]);
 
// in place
@memcpy(buf[0..low_surrogate_half.len], low_surrogate_half);
const slice = buf[0..low_surrogate_half.len];
try wtf8ToUtf8Lossy(slice, slice);
try testing.expectEqualStrings("ab\u{FFFD}cd", slice);
}
 
test wtf8ToUtf8LossyAlloc {
const invalid_utf8 = "\xff";
try testing.expectError(error.InvalidWtf8, wtf8ToUtf8LossyAlloc(testing.allocator, invalid_utf8));
 
{
const ascii = "abcd";
const utf8 = try wtf8ToUtf8LossyAlloc(testing.allocator, ascii);
defer testing.allocator.free(utf8);
try testing.expectEqualStrings("abcd", utf8);
}
 
{
const surrogate_half = "ab\xed\xa0\xbdcd";
const utf8 = try wtf8ToUtf8LossyAlloc(testing.allocator, surrogate_half);
defer testing.allocator.free(utf8);
try testing.expectEqualStrings("ab\u{FFFD}cd", utf8);
}
 
{
// If the WTF-8 is not well-formed, each surrogate half is converted into a separate
// replacement character instead of being interpreted as a surrogate pair.
const encoded_surrogate_pair = "ab\xed\xa0\xbd\xed\xb2\xa9cd";
const utf8 = try wtf8ToUtf8LossyAlloc(testing.allocator, encoded_surrogate_pair);
defer testing.allocator.free(utf8);
try testing.expectEqualStrings("ab\u{FFFD}\u{FFFD}cd", utf8);
}
}
 
test wtf8ToUtf8LossyAllocZ {
const invalid_utf8 = "\xff";
try testing.expectError(error.InvalidWtf8, wtf8ToUtf8LossyAllocZ(testing.allocator, invalid_utf8));
 
{
const ascii = "abcd";
const utf8 = try wtf8ToUtf8LossyAllocZ(testing.allocator, ascii);
defer testing.allocator.free(utf8);
try testing.expectEqualStrings("abcd", utf8);
}
 
{
const surrogate_half = "ab\xed\xa0\xbdcd";
const utf8 = try wtf8ToUtf8LossyAllocZ(testing.allocator, surrogate_half);
defer testing.allocator.free(utf8);
try testing.expectEqualStrings("ab\u{FFFD}cd", utf8);
}
 
{
// If the WTF-8 is not well-formed, each surrogate half is converted into a separate
// replacement character instead of being interpreted as a surrogate pair.
const encoded_surrogate_pair = "ab\xed\xa0\xbd\xed\xb2\xa9cd";
const utf8 = try wtf8ToUtf8LossyAllocZ(testing.allocator, encoded_surrogate_pair);
defer testing.allocator.free(utf8);
try testing.expectEqualStrings("ab\u{FFFD}\u{FFFD}cd", utf8);
}
}
 
pub const Wtf16LeIterator = struct {
bytes: []const u8,
i: usize,
 
pub fn init(s: []const u16) Wtf16LeIterator {
return Wtf16LeIterator{
.bytes = std.mem.sliceAsBytes(s),
.i = 0,
};
}
 
/// If the next codepoint is encoded by a surrogate pair, returns the
/// codepoint that the surrogate pair represents.
/// If the next codepoint is an unpaired surrogate, returns the codepoint
/// of the unpaired surrogate.
pub fn nextCodepoint(it: *Wtf16LeIterator) ?u21 {
assert(it.i <= it.bytes.len);
if (it.i == it.bytes.len) return null;
var code_units: [2]u16 = undefined;
code_units[0] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little);
it.i += 2;
surrogate_pair: {
if (utf16IsHighSurrogate(code_units[0])) {
if (it.i >= it.bytes.len) break :surrogate_pair;
code_units[1] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little);
const codepoint = utf16DecodeSurrogatePair(&code_units) catch break :surrogate_pair;
it.i += 2;
return codepoint;
}
}
return code_units[0];
}
};
 
test "non-well-formed WTF-8 does not roundtrip" {
// This encodes the surrogate pair U+D83D U+DCA9.
// The well-formed version of this would be U+1F4A9 which is \xF0\x9F\x92\xA9.
const non_well_formed_wtf8 = "\xed\xa0\xbd\xed\xb2\xa9";
 
var wtf16_buf: [2]u16 = undefined;
const wtf16_len = try wtf8ToWtf16Le(&wtf16_buf, non_well_formed_wtf8);
const wtf16 = wtf16_buf[0..wtf16_len];
 
try testing.expectEqualSlices(u16, &[_]u16{
mem.nativeToLittle(u16, 0xD83D), // high surrogate
mem.nativeToLittle(u16, 0xDCA9), // low surrogate
}, wtf16);
 
var wtf8_buf: [4]u8 = undefined;
const wtf8_len = wtf16LeToWtf8(&wtf8_buf, wtf16);
const wtf8 = wtf8_buf[0..wtf8_len];
 
// Converting to WTF-16 and back results in well-formed WTF-8,
// but it does not match the input WTF-8
try testing.expectEqualSlices(u8, "\xf0\x9f\x92\xa9", wtf8);
}
 
fn testRoundtripWtf8(wtf8: []const u8) !void {
// Buffer
{
var wtf16_buf: [32]u16 = undefined;
const wtf16_len = try wtf8ToWtf16Le(&wtf16_buf, wtf8);
const wtf16 = wtf16_buf[0..wtf16_len];
 
var roundtripped_buf: [32]u8 = undefined;
const roundtripped_len = wtf16LeToWtf8(&roundtripped_buf, wtf16);
const roundtripped = roundtripped_buf[0..roundtripped_len];
 
try testing.expectEqualSlices(u8, wtf8, roundtripped);
}
// Alloc
{
const wtf16 = try wtf8ToWtf16LeAlloc(testing.allocator, wtf8);
defer testing.allocator.free(wtf16);
 
const roundtripped = try wtf16LeToWtf8Alloc(testing.allocator, wtf16);
defer testing.allocator.free(roundtripped);
 
try testing.expectEqualSlices(u8, wtf8, roundtripped);
}
// AllocZ
{
const wtf16 = try wtf8ToWtf16LeAllocZ(testing.allocator, wtf8);
defer testing.allocator.free(wtf16);
 
const roundtripped = try wtf16LeToWtf8AllocZ(testing.allocator, wtf16);
defer testing.allocator.free(roundtripped);
 
try testing.expectEqualSlices(u8, wtf8, roundtripped);
}
}
 
test "well-formed WTF-8 roundtrips" {
try testRoundtripWtf8("\xed\x9f\xbf"); // not a surrogate half
try testRoundtripWtf8("\xed\xa0\xbd"); // high surrogate
try testRoundtripWtf8("\xed\xb2\xa9"); // low surrogate
try testRoundtripWtf8("\xed\xa0\xbd \xed\xb2\xa9"); // <high surrogate><space><low surrogate>
try testRoundtripWtf8("\xed\xa0\x80\xed\xaf\xbf"); // <high surrogate><high surrogate>
try testRoundtripWtf8("\xed\xa0\x80\xee\x80\x80"); // <high surrogate><not surrogate>
try testRoundtripWtf8("\xed\x9f\xbf\xed\xb0\x80"); // <not surrogate><low surrogate>
try testRoundtripWtf8("a\xed\xb0\x80"); // <not surrogate><low surrogate>
try testRoundtripWtf8("\xf0\x9f\x92\xa9"); // U+1F4A9, encoded as a surrogate pair in WTF-16
}
 
fn testRoundtripWtf16(wtf16le: []const u16) !void {
// Buffer
{
var wtf8_buf: [32]u8 = undefined;
const wtf8_len = wtf16LeToWtf8(&wtf8_buf, wtf16le);
const wtf8 = wtf8_buf[0..wtf8_len];
 
var roundtripped_buf: [32]u16 = undefined;
const roundtripped_len = try wtf8ToWtf16Le(&roundtripped_buf, wtf8);
const roundtripped = roundtripped_buf[0..roundtripped_len];
 
try testing.expectEqualSlices(u16, wtf16le, roundtripped);
}
// Alloc
{
const wtf8 = try wtf16LeToWtf8Alloc(testing.allocator, wtf16le);
defer testing.allocator.free(wtf8);
 
const roundtripped = try wtf8ToWtf16LeAlloc(testing.allocator, wtf8);
defer testing.allocator.free(roundtripped);
 
try testing.expectEqualSlices(u16, wtf16le, roundtripped);
}
// AllocZ
{
const wtf8 = try wtf16LeToWtf8AllocZ(testing.allocator, wtf16le);
defer testing.allocator.free(wtf8);
 
const roundtripped = try wtf8ToWtf16LeAllocZ(testing.allocator, wtf8);
defer testing.allocator.free(roundtripped);
 
try testing.expectEqualSlices(u16, wtf16le, roundtripped);
}
}
 
test "well-formed WTF-16 roundtrips" {
try testRoundtripWtf16(&[_]u16{
std.mem.nativeToLittle(u16, 0xD83D), // high surrogate
std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate
});
try testRoundtripWtf16(&[_]u16{
std.mem.nativeToLittle(u16, 0xD83D), // high surrogate
std.mem.nativeToLittle(u16, ' '), // not surrogate
std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate
});
try testRoundtripWtf16(&[_]u16{
std.mem.nativeToLittle(u16, 0xD800), // high surrogate
std.mem.nativeToLittle(u16, 0xDBFF), // high surrogate
});
try testRoundtripWtf16(&[_]u16{
std.mem.nativeToLittle(u16, 0xD800), // high surrogate
std.mem.nativeToLittle(u16, 0xE000), // not surrogate
});
try testRoundtripWtf16(&[_]u16{
std.mem.nativeToLittle(u16, 0xD7FF), // not surrogate
std.mem.nativeToLittle(u16, 0xDC00), // low surrogate
});
try testRoundtripWtf16(&[_]u16{
std.mem.nativeToLittle(u16, 0x61), // not surrogate
std.mem.nativeToLittle(u16, 0xDC00), // low surrogate
});
try testRoundtripWtf16(&[_]u16{
std.mem.nativeToLittle(u16, 0xDC00), // low surrogate
});
}
 
lib/std/zig/system.zig added: 1932, removed: 517, total 1415
@@ -639,7 +639,8 @@ pub fn abiAndDynamicLinkerFromFile(
var link_buf: [std.os.PATH_MAX]u8 = undefined;
const link_name = std.os.readlink(dl_path, &link_buf) catch |err| switch (err) {
error.NameTooLong => unreachable,
error.InvalidUtf8 => unreachable, // Windows only
error.InvalidUtf8 => unreachable, // WASI only
error.InvalidWtf8 => unreachable, // Windows only
error.BadPathName => unreachable, // Windows only
error.UnsupportedReparsePointType => unreachable, // Windows only
error.NetworkNotFound => unreachable, // Windows only
@@ -730,7 +731,8 @@ test glibcVerFromLinkName {
fn glibcVerFromRPath(rpath: []const u8) !std.SemanticVersion {
var dir = fs.cwd().openDir(rpath, .{}) catch |err| switch (err) {
error.NameTooLong => unreachable,
error.InvalidUtf8 => unreachable,
error.InvalidUtf8 => unreachable, // WASI only
error.InvalidWtf8 => unreachable, // Windows-only
error.BadPathName => unreachable,
error.DeviceBusy => unreachable,
error.NetworkNotFound => unreachable, // Windows-only
@@ -761,7 +763,8 @@ fn glibcVerFromRPath(rpath: []const u8) !std.SemanticVersion {
const glibc_so_basename = "libc.so.6";
var f = dir.openFile(glibc_so_basename, .{}) catch |err| switch (err) {
error.NameTooLong => unreachable,
error.InvalidUtf8 => unreachable, // Windows only
error.InvalidUtf8 => unreachable, // WASI only
error.InvalidWtf8 => unreachable, // Windows only
error.BadPathName => unreachable, // Windows only
error.PipeBusy => unreachable, // Windows-only
error.SharingViolation => unreachable, // Windows-only
@@ -998,7 +1001,8 @@ fn detectAbiAndDynamicLinker(
error.NameTooLong => unreachable,
error.PathAlreadyExists => unreachable,
error.SharingViolation => unreachable,
error.InvalidUtf8 => unreachable,
error.InvalidUtf8 => unreachable, // WASI only
error.InvalidWtf8 => unreachable, // Windows only
error.BadPathName => unreachable,
error.PipeBusy => unreachable,
error.FileLocksNotSupported => unreachable,
 
lib/std/zig/system/NativePaths.zig added: 1932, removed: 517, total 1415
@@ -41,7 +41,7 @@ pub fn detect(arena: Allocator, native_target: std.Target) !NativePaths {
}
}
} else |err| switch (err) {
error.InvalidUtf8 => {},
error.InvalidWtf8 => unreachable,
error.EnvironmentVariableNotFound => {},
error.OutOfMemory => |e| return e,
}
@@ -73,7 +73,7 @@ pub fn detect(arena: Allocator, native_target: std.Target) !NativePaths {
}
}
} else |err| switch (err) {
error.InvalidUtf8 => {},
error.InvalidWtf8 => unreachable,
error.EnvironmentVariableNotFound => {},
error.OutOfMemory => |e| return e,
}
 
lib/std/zig/system/windows.zig added: 1932, removed: 517, total 1415
@@ -160,7 +160,7 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {
=> {
var buf = @field(args, field.name).value_buf;
const entry = @as(*align(1) const std.os.windows.UNICODE_STRING, @ptrCast(table[i + 1].EntryContext));
const len = try std.unicode.utf16leToUtf8(buf, entry.Buffer[0 .. entry.Length / 2]);
const len = try std.unicode.utf16LeToUtf8(buf, entry.Buffer[0 .. entry.Length / 2]);
buf[len] = 0;
},
 
 
src/Module.zig added: 1932, removed: 517, total 1415
@@ -2662,6 +2662,7 @@ pub fn astGenFile(mod: *Module, file: *File) !void {
}) catch |err| switch (err) {
error.NotDir => unreachable, // no dir components
error.InvalidUtf8 => unreachable, // it's a hex encoded name
error.InvalidWtf8 => unreachable, // it's a hex encoded name
error.BadPathName => unreachable, // it's a hex encoded name
error.NameTooLong => unreachable, // it's a fixed size name
error.PipeBusy => unreachable, // it's not a pipe
 
src/libc_installation.zig added: 1932, removed: 517, total 1415
@@ -246,7 +246,10 @@ pub const LibCInstallation = struct {
const allocator = args.allocator;
 
// Detect infinite loops.
var env_map = try std.process.getEnvMap(allocator);
var env_map = std.process.getEnvMap(allocator) catch |err| switch (err) {
error.Unexpected => unreachable, // WASI-only
else => |e| return e,
};
defer env_map.deinit();
const skip_cc_env_var = if (env_map.get(inf_loop_env_key)) |phase| blk: {
if (std.mem.eql(u8, phase, "1")) {
@@ -572,7 +575,10 @@ fn ccPrintFileName(args: CCPrintFileNameOptions) ![:0]u8 {
const allocator = args.allocator;
 
// Detect infinite loops.
var env_map = try std.process.getEnvMap(allocator);
var env_map = std.process.getEnvMap(allocator) catch |err| switch (err) {
error.Unexpected => unreachable, // WASI-only
else => |e| return e,
};
defer env_map.deinit();
const skip_cc_env_var = if (env_map.get(inf_loop_env_key)) |phase| blk: {
if (std.mem.eql(u8, phase, "1")) {
 
src/main.zig added: 1932, removed: 517, total 1415
@@ -5756,7 +5756,7 @@ fn readSourceFileToEndAlloc(
// If the file starts with a UTF-16 little endian BOM, translate it to UTF-8
if (mem.startsWith(u8, source_code, "\xff\xfe")) {
const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);
const source_code_utf8 = std.unicode.utf16leToUtf8AllocZ(allocator, source_code_utf16_le) catch |err| switch (err) {
const source_code_utf8 = std.unicode.utf16LeToUtf8AllocZ(allocator, source_code_utf16_le) catch |err| switch (err) {
error.DanglingSurrogateHalf => error.UnsupportedEncoding,
error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,
error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,
 
src/windows_sdk.zig added: 1932, removed: 517, total 1415
@@ -84,26 +84,26 @@ fn iterateAndFilterBySemVer(
return dirs_filtered_slice;
}
 
const RegistryUtf8 = struct {
const RegistryWtf8 = struct {
key: windows.HKEY,
 
/// Assert that `key` is valid UTF-8 string
pub fn openKey(hkey: windows.HKEY, key: []const u8) error{KeyNotFound}!RegistryUtf8 {
const key_utf16le: [:0]const u16 = key_utf16le: {
var key_utf16le_buf: [RegistryUtf16Le.key_name_max_len]u16 = undefined;
const key_utf16le_len: usize = std.unicode.utf8ToUtf16Le(key_utf16le_buf[0..], key) catch |err| switch (err) {
error.InvalidUtf8 => unreachable,
/// Assert that `key` is valid WTF-8 string
pub fn openKey(hkey: windows.HKEY, key: []const u8) error{KeyNotFound}!RegistryWtf8 {
const key_wtf16le: [:0]const u16 = key_wtf16le: {
var key_wtf16le_buf: [RegistryWtf16Le.key_name_max_len]u16 = undefined;
const key_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(key_wtf16le_buf[0..], key) catch |err| switch (err) {
error.InvalidWtf8 => unreachable,
};
key_utf16le_buf[key_utf16le_len] = 0;
break :key_utf16le key_utf16le_buf[0..key_utf16le_len :0];
key_wtf16le_buf[key_wtf16le_len] = 0;
break :key_wtf16le key_wtf16le_buf[0..key_wtf16le_len :0];
};
 
const registry_utf16le = try RegistryUtf16Le.openKey(hkey, key_utf16le);
return RegistryUtf8{ .key = registry_utf16le.key };
const registry_wtf16le = try RegistryWtf16Le.openKey(hkey, key_wtf16le);
return RegistryWtf8{ .key = registry_wtf16le.key };
}
 
/// Closes key, after that usage is invalid
pub fn closeKey(self: *const RegistryUtf8) void {
pub fn closeKey(self: *const RegistryWtf8) void {
const return_code_int: windows.HRESULT = windows.advapi32.RegCloseKey(self.key);
const return_code: windows.Win32Error = @enumFromInt(return_code_int);
switch (return_code) {
@@ -114,71 +114,68 @@ const RegistryUtf8 = struct {
 
/// Get string from registry.
/// Caller owns result.
pub fn getString(self: *const RegistryUtf8, allocator: std.mem.Allocator, subkey: []const u8, value_name: []const u8) error{ OutOfMemory, ValueNameNotFound, NotAString, StringNotFound }![]u8 {
const subkey_utf16le: [:0]const u16 = subkey_utf16le: {
var subkey_utf16le_buf: [RegistryUtf16Le.key_name_max_len]u16 = undefined;
const subkey_utf16le_len: usize = std.unicode.utf8ToUtf16Le(subkey_utf16le_buf[0..], subkey) catch unreachable;
subkey_utf16le_buf[subkey_utf16le_len] = 0;
break :subkey_utf16le subkey_utf16le_buf[0..subkey_utf16le_len :0];
pub fn getString(self: *const RegistryWtf8, allocator: std.mem.Allocator, subkey: []const u8, value_name: []const u8) error{ OutOfMemory, ValueNameNotFound, NotAString, StringNotFound }![]u8 {
const subkey_wtf16le: [:0]const u16 = subkey_wtf16le: {
var subkey_wtf16le_buf: [RegistryWtf16Le.key_name_max_len]u16 = undefined;
const subkey_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(subkey_wtf16le_buf[0..], subkey) catch unreachable;
subkey_wtf16le_buf[subkey_wtf16le_len] = 0;
break :subkey_wtf16le subkey_wtf16le_buf[0..subkey_wtf16le_len :0];
};
 
const value_name_utf16le: [:0]const u16 = value_name_utf16le: {
var value_name_utf16le_buf: [RegistryUtf16Le.value_name_max_len]u16 = undefined;
const value_name_utf16le_len: usize = std.unicode.utf8ToUtf16Le(value_name_utf16le_buf[0..], value_name) catch unreachable;
value_name_utf16le_buf[value_name_utf16le_len] = 0;
break :value_name_utf16le value_name_utf16le_buf[0..value_name_utf16le_len :0];
const value_name_wtf16le: [:0]const u16 = value_name_wtf16le: {
var value_name_wtf16le_buf: [RegistryWtf16Le.value_name_max_len]u16 = undefined;
const value_name_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(value_name_wtf16le_buf[0..], value_name) catch unreachable;
value_name_wtf16le_buf[value_name_wtf16le_len] = 0;
break :value_name_wtf16le value_name_wtf16le_buf[0..value_name_wtf16le_len :0];
};
 
const registry_utf16le = RegistryUtf16Le{ .key = self.key };
const value_utf16le = try registry_utf16le.getString(allocator, subkey_utf16le, value_name_utf16le);
defer allocator.free(value_utf16le);
const registry_wtf16le = RegistryWtf16Le{ .key = self.key };
const value_wtf16le = try registry_wtf16le.getString(allocator, subkey_wtf16le, value_name_wtf16le);
defer allocator.free(value_wtf16le);
 
const value_utf8: []u8 = std.unicode.utf16leToUtf8Alloc(allocator, value_utf16le) catch |err| switch (err) {
error.OutOfMemory => return error.OutOfMemory,
else => return error.StringNotFound,
};
errdefer allocator.free(value_utf8);
const value_wtf8: []u8 = try std.unicode.wtf16LeToWtf8Alloc(allocator, value_wtf16le);
errdefer allocator.free(value_wtf8);
 
return value_utf8;
return value_wtf8;
}
 
/// Get DWORD (u32) from registry.
pub fn getDword(self: *const RegistryUtf8, subkey: []const u8, value_name: []const u8) error{ ValueNameNotFound, NotADword, DwordTooLong, DwordNotFound }!u32 {
const subkey_utf16le: [:0]const u16 = subkey_utf16le: {
var subkey_utf16le_buf: [RegistryUtf16Le.key_name_max_len]u16 = undefined;
const subkey_utf16le_len: usize = std.unicode.utf8ToUtf16Le(subkey_utf16le_buf[0..], subkey) catch unreachable;
subkey_utf16le_buf[subkey_utf16le_len] = 0;
break :subkey_utf16le subkey_utf16le_buf[0..subkey_utf16le_len :0];
pub fn getDword(self: *const RegistryWtf8, subkey: []const u8, value_name: []const u8) error{ ValueNameNotFound, NotADword, DwordTooLong, DwordNotFound }!u32 {
const subkey_wtf16le: [:0]const u16 = subkey_wtf16le: {
var subkey_wtf16le_buf: [RegistryWtf16Le.key_name_max_len]u16 = undefined;
const subkey_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(subkey_wtf16le_buf[0..], subkey) catch unreachable;
subkey_wtf16le_buf[subkey_wtf16le_len] = 0;
break :subkey_wtf16le subkey_wtf16le_buf[0..subkey_wtf16le_len :0];
};
 
const value_name_utf16le: [:0]const u16 = value_name_utf16le: {
var value_name_utf16le_buf: [RegistryUtf16Le.value_name_max_len]u16 = undefined;
const value_name_utf16le_len: usize = std.unicode.utf8ToUtf16Le(value_name_utf16le_buf[0..], value_name) catch unreachable;
value_name_utf16le_buf[value_name_utf16le_len] = 0;
break :value_name_utf16le value_name_utf16le_buf[0..value_name_utf16le_len :0];
const value_name_wtf16le: [:0]const u16 = value_name_wtf16le: {
var value_name_wtf16le_buf: [RegistryWtf16Le.value_name_max_len]u16 = undefined;
const value_name_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(value_name_wtf16le_buf[0..], value_name) catch unreachable;
value_name_wtf16le_buf[value_name_wtf16le_len] = 0;
break :value_name_wtf16le value_name_wtf16le_buf[0..value_name_wtf16le_len :0];
};
 
const registry_utf16le = RegistryUtf16Le{ .key = self.key };
return try registry_utf16le.getDword(subkey_utf16le, value_name_utf16le);
const registry_wtf16le = RegistryWtf16Le{ .key = self.key };
return try registry_wtf16le.getDword(subkey_wtf16le, value_name_wtf16le);
}
 
/// Under private space with flags:
/// KEY_QUERY_VALUE and KEY_ENUMERATE_SUB_KEYS.
/// After finishing work, call `closeKey`.
pub fn loadFromPath(absolute_path: []const u8) error{KeyNotFound}!RegistryUtf8 {
const absolute_path_utf16le: [:0]const u16 = absolute_path_utf16le: {
var absolute_path_utf16le_buf: [RegistryUtf16Le.value_name_max_len]u16 = undefined;
const absolute_path_utf16le_len: usize = std.unicode.utf8ToUtf16Le(absolute_path_utf16le_buf[0..], absolute_path) catch unreachable;
absolute_path_utf16le_buf[absolute_path_utf16le_len] = 0;
break :absolute_path_utf16le absolute_path_utf16le_buf[0..absolute_path_utf16le_len :0];
pub fn loadFromPath(absolute_path: []const u8) error{KeyNotFound}!RegistryWtf8 {
const absolute_path_wtf16le: [:0]const u16 = absolute_path_wtf16le: {
var absolute_path_wtf16le_buf: [RegistryWtf16Le.value_name_max_len]u16 = undefined;
const absolute_path_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(absolute_path_wtf16le_buf[0..], absolute_path) catch unreachable;
absolute_path_wtf16le_buf[absolute_path_wtf16le_len] = 0;
break :absolute_path_wtf16le absolute_path_wtf16le_buf[0..absolute_path_wtf16le_len :0];
};
 
const registry_utf16le = try RegistryUtf16Le.loadFromPath(absolute_path_utf16le);
return RegistryUtf8{ .key = registry_utf16le.key };
const registry_wtf16le = try RegistryWtf16Le.loadFromPath(absolute_path_wtf16le);
return RegistryWtf8{ .key = registry_wtf16le.key };
}
};
 
const RegistryUtf16Le = struct {
const RegistryWtf16Le = struct {
key: windows.HKEY,
 
/// Includes root key (f.e. HKEY_LOCAL_MACHINE).
@@ -191,11 +188,11 @@ const RegistryUtf16Le = struct {
/// Under HKEY_LOCAL_MACHINE with flags:
/// KEY_QUERY_VALUE, KEY_WOW64_32KEY, and KEY_ENUMERATE_SUB_KEYS.
/// After finishing work, call `closeKey`.
fn openKey(hkey: windows.HKEY, key_utf16le: [:0]const u16) error{KeyNotFound}!RegistryUtf16Le {
fn openKey(hkey: windows.HKEY, key_wtf16le: [:0]const u16) error{KeyNotFound}!RegistryWtf16Le {
var key: windows.HKEY = undefined;
const return_code_int: windows.HRESULT = windows.advapi32.RegOpenKeyExW(
hkey,
key_utf16le,
key_wtf16le,
0,
windows.KEY_QUERY_VALUE | windows.KEY_WOW64_32KEY | windows.KEY_ENUMERATE_SUB_KEYS,
&key,
@@ -207,11 +204,11 @@ const RegistryUtf16Le = struct {
 
else => return error.KeyNotFound,
}
return RegistryUtf16Le{ .key = key };
return RegistryWtf16Le{ .key = key };
}
 
/// Closes key, after that usage is invalid
fn closeKey(self: *const RegistryUtf16Le) void {
fn closeKey(self: *const RegistryWtf16Le) void {
const return_code_int: windows.HRESULT = windows.advapi32.RegCloseKey(self.key);
const return_code: windows.Win32Error = @enumFromInt(return_code_int);
switch (return_code) {
@@ -221,25 +218,25 @@ const RegistryUtf16Le = struct {
}
 
/// Get string ([:0]const u16) from registry.
fn getString(self: *const RegistryUtf16Le, allocator: std.mem.Allocator, subkey_utf16le: [:0]const u16, value_name_utf16le: [:0]const u16) error{ OutOfMemory, ValueNameNotFound, NotAString, StringNotFound }![]const u16 {
fn getString(self: *const RegistryWtf16Le, allocator: std.mem.Allocator, subkey_wtf16le: [:0]const u16, value_name_wtf16le: [:0]const u16) error{ OutOfMemory, ValueNameNotFound, NotAString, StringNotFound }![]const u16 {
var actual_type: windows.ULONG = undefined;
 
// Calculating length to allocate
var value_utf16le_buf_size: u32 = 0; // in bytes, including any terminating NUL character or characters.
var value_wtf16le_buf_size: u32 = 0; // in bytes, including any terminating NUL character or characters.
var return_code_int: windows.HRESULT = windows.advapi32.RegGetValueW(
self.key,
subkey_utf16le,
value_name_utf16le,
subkey_wtf16le,
value_name_wtf16le,
RRF.RT_REG_SZ,
&actual_type,
null,
&value_utf16le_buf_size,
&value_wtf16le_buf_size,
);
 
// Check returned code and type
var return_code: windows.Win32Error = @enumFromInt(return_code_int);
switch (return_code) {
.SUCCESS => std.debug.assert(value_utf16le_buf_size != 0),
.SUCCESS => std.debug.assert(value_wtf16le_buf_size != 0),
.MORE_DATA => unreachable, // We are only reading length
.FILE_NOT_FOUND => return error.ValueNameNotFound,
.INVALID_PARAMETER => unreachable, // We didn't combine RRF.SUBKEY_WOW6464KEY and RRF.SUBKEY_WOW6432KEY
@@ -250,17 +247,17 @@ const RegistryUtf16Le = struct {
else => return error.NotAString,
}
 
const value_utf16le_buf: []u16 = try allocator.alloc(u16, std.math.divCeil(u32, value_utf16le_buf_size, 2) catch unreachable);
errdefer allocator.free(value_utf16le_buf);
const value_wtf16le_buf: []u16 = try allocator.alloc(u16, std.math.divCeil(u32, value_wtf16le_buf_size, 2) catch unreachable);
errdefer allocator.free(value_wtf16le_buf);
 
return_code_int = windows.advapi32.RegGetValueW(
self.key,
subkey_utf16le,
value_name_utf16le,
subkey_wtf16le,
value_name_wtf16le,
RRF.RT_REG_SZ,
&actual_type,
value_utf16le_buf.ptr,
&value_utf16le_buf_size,
value_wtf16le_buf.ptr,
&value_wtf16le_buf_size,
);
 
// Check returned code and (just in case) type again.
@@ -277,28 +274,28 @@ const RegistryUtf16Le = struct {
else => return error.NotAString,
}
 
const value_utf16le: []const u16 = value_utf16le: {
const value_wtf16le: []const u16 = value_wtf16le: {
// note(bratishkaerik): somehow returned value in `buf_len` is overestimated by Windows and contains extra space
// we will just search for zero termination and forget length
// Windows sure is strange
const value_utf16le_overestimated: [*:0]const u16 = @ptrCast(value_utf16le_buf.ptr);
break :value_utf16le std.mem.span(value_utf16le_overestimated);
const value_wtf16le_overestimated: [*:0]const u16 = @ptrCast(value_wtf16le_buf.ptr);
break :value_wtf16le std.mem.span(value_wtf16le_overestimated);
};
 
_ = allocator.resize(value_utf16le_buf, value_utf16le.len);
return value_utf16le;
_ = allocator.resize(value_wtf16le_buf, value_wtf16le.len);
return value_wtf16le;
}
 
/// Get DWORD (u32) from registry.
fn getDword(self: *const RegistryUtf16Le, subkey_utf16le: [:0]const u16, value_name_utf16le: [:0]const u16) error{ ValueNameNotFound, NotADword, DwordTooLong, DwordNotFound }!u32 {
fn getDword(self: *const RegistryWtf16Le, subkey_wtf16le: [:0]const u16, value_name_wtf16le: [:0]const u16) error{ ValueNameNotFound, NotADword, DwordTooLong, DwordNotFound }!u32 {
var actual_type: windows.ULONG = undefined;
var reg_size: u32 = @sizeOf(u32);
var reg_value: u32 = 0;
 
const return_code_int: windows.HRESULT = windows.advapi32.RegGetValueW(
self.key,
subkey_utf16le,
value_name_utf16le,
subkey_wtf16le,
value_name_wtf16le,
RRF.RT_REG_DWORD,
&actual_type,
&reg_value,
@@ -324,11 +321,11 @@ const RegistryUtf16Le = struct {
/// Under private space with flags:
/// KEY_QUERY_VALUE and KEY_ENUMERATE_SUB_KEYS.
/// After finishing work, call `closeKey`.
fn loadFromPath(absolute_path_as_utf16le: [:0]const u16) error{KeyNotFound}!RegistryUtf16Le {
fn loadFromPath(absolute_path_as_wtf16le: [:0]const u16) error{KeyNotFound}!RegistryWtf16Le {
var key: windows.HKEY = undefined;
 
const return_code_int: windows.HRESULT = std.os.windows.advapi32.RegLoadAppKeyW(
absolute_path_as_utf16le,
absolute_path_as_wtf16le,
&key,
windows.KEY_QUERY_VALUE | windows.KEY_ENUMERATE_SUB_KEYS,
0,
@@ -340,7 +337,7 @@ const RegistryUtf16Le = struct {
else => return error.KeyNotFound,
}
 
return RegistryUtf16Le{ .key = key };
return RegistryWtf16Le{ .key = key };
}
};
 
@@ -352,7 +349,7 @@ pub const Windows10Sdk = struct {
/// Caller owns the result's fields.
/// After finishing work, call `free(allocator)`.
fn find(allocator: std.mem.Allocator) error{ OutOfMemory, Windows10SdkNotFound, PathTooLong, VersionTooLong }!Windows10Sdk {
const v10_key = RegistryUtf8.openKey(windows.HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\v10.0") catch |err| switch (err) {
const v10_key = RegistryWtf8.openKey(windows.HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\v10.0") catch |err| switch (err) {
error.KeyNotFound => return error.Windows10SdkNotFound,
};
defer v10_key.closeKey();
@@ -413,11 +410,11 @@ pub const Windows10Sdk = struct {
/// Check whether this version is enumerated in registry.
fn isValidVersion(windows10sdk: *const Windows10Sdk) bool {
var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;
const reg_query_as_utf8 = std.fmt.bufPrint(buf[0..], "{s}\\{s}\\Installed Options", .{ WINDOWS_KIT_REG_KEY, windows10sdk.version }) catch |err| switch (err) {
const reg_query_as_wtf8 = std.fmt.bufPrint(buf[0..], "{s}\\{s}\\Installed Options", .{ WINDOWS_KIT_REG_KEY, windows10sdk.version }) catch |err| switch (err) {
error.NoSpaceLeft => return false,
};
 
const options_key = RegistryUtf8.openKey(windows.HKEY_LOCAL_MACHINE, reg_query_as_utf8) catch |err| switch (err) {
const options_key = RegistryWtf8.openKey(windows.HKEY_LOCAL_MACHINE, reg_query_as_wtf8) catch |err| switch (err) {
error.KeyNotFound => return false,
};
defer options_key.closeKey();
@@ -447,7 +444,7 @@ pub const Windows81Sdk = struct {
/// Find path and version of Windows 8.1 SDK.
/// Caller owns the result's fields.
/// After finishing work, call `free(allocator)`.
fn find(allocator: std.mem.Allocator, roots_key: *const RegistryUtf8) error{ OutOfMemory, Windows81SdkNotFound, PathTooLong, VersionTooLong }!Windows81Sdk {
fn find(allocator: std.mem.Allocator, roots_key: *const RegistryWtf8) error{ OutOfMemory, Windows81SdkNotFound, PathTooLong, VersionTooLong }!Windows81Sdk {
const path: []const u8 = path81: {
const path_maybe_with_trailing_slash = roots_key.getString(allocator, "", "KitsRoot81") catch |err| switch (err) {
error.NotAString => return error.Windows81SdkNotFound,
@@ -523,7 +520,7 @@ pub const ZigWindowsSDK = struct {
if (builtin.os.tag != .windows) return error.NotFound;
 
//note(dimenus): If this key doesn't exist, neither the Win 8 SDK nor the Win 10 SDK is installed
const roots_key = RegistryUtf8.openKey(windows.HKEY_LOCAL_MACHINE, WINDOWS_KIT_REG_KEY) catch |err| switch (err) {
const roots_key = RegistryWtf8.openKey(windows.HKEY_LOCAL_MACHINE, WINDOWS_KIT_REG_KEY) catch |err| switch (err) {
error.KeyNotFound => return error.NotFound,
};
defer roots_key.closeKey();
@@ -583,7 +580,7 @@ pub const ZigWindowsSDK = struct {
const MsvcLibDir = struct {
fn findInstancesDirViaCLSID(allocator: std.mem.Allocator) error{ OutOfMemory, PathNotFound }!std.fs.Dir {
const setup_configuration_clsid = "{177f0c4a-1cd3-4de7-a32c-71dbbb9fa36d}";
const setup_config_key = RegistryUtf8.openKey(windows.HKEY_CLASSES_ROOT, "CLSID\\" ++ setup_configuration_clsid) catch |err| switch (err) {
const setup_config_key = RegistryWtf8.openKey(windows.HKEY_CLASSES_ROOT, "CLSID\\" ++ setup_configuration_clsid) catch |err| switch (err) {
error.KeyNotFound => return error.PathNotFound,
};
defer setup_config_key.closeKey();
@@ -805,13 +802,13 @@ const MsvcLibDir = struct {
for (vs_versions) |vs_version| allocator.free(vs_version);
allocator.free(vs_versions);
}
var config_subkey_buf: [RegistryUtf16Le.key_name_max_len * 2]u8 = undefined;
var config_subkey_buf: [RegistryWtf16Le.key_name_max_len * 2]u8 = undefined;
const source_directories: []const u8 = source_directories: for (vs_versions) |vs_version| {
const privateregistry_absolute_path = std.fs.path.join(allocator, &.{ visualstudio_folder_path, vs_version, "privateregistry.bin" }) catch continue;
defer allocator.free(privateregistry_absolute_path);
if (!std.fs.path.isAbsolute(privateregistry_absolute_path)) continue;
 
const visualstudio_registry = RegistryUtf8.loadFromPath(privateregistry_absolute_path) catch continue;
const visualstudio_registry = RegistryWtf8.loadFromPath(privateregistry_absolute_path) catch continue;
defer visualstudio_registry.closeKey();
 
const config_subkey = std.fmt.bufPrint(config_subkey_buf[0..], "Software\\Microsoft\\VisualStudio\\{s}_Config", .{vs_version}) catch unreachable;
@@ -894,7 +891,7 @@ const MsvcLibDir = struct {
}
}
 
const vs7_key = RegistryUtf8.openKey(windows.HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\VisualStudio\\SxS\\VS7") catch return error.PathNotFound;
const vs7_key = RegistryWtf8.openKey(windows.HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\VisualStudio\\SxS\\VS7") catch return error.PathNotFound;
defer vs7_key.closeKey();
try_vs7_key: {
const path_maybe_with_trailing_slash = vs7_key.getString(allocator, "", "14.0") catch |err| switch (err) {
 
test/standalone/windows_spawn/main.zig added: 1932, removed: 517, total 1415
@@ -17,7 +17,7 @@ pub fn main() anyerror!void {
 
const tmp_absolute_path = try tmp.dir.realpathAlloc(allocator, ".");
defer allocator.free(tmp_absolute_path);
const tmp_absolute_path_w = try std.unicode.utf8ToUtf16LeWithNull(allocator, tmp_absolute_path);
const tmp_absolute_path_w = try std.unicode.utf8ToUtf16LeAllocZ(allocator, tmp_absolute_path);
defer allocator.free(tmp_absolute_path_w);
const cwd_absolute_path = try std.fs.cwd().realpathAlloc(allocator, ".");
defer allocator.free(cwd_absolute_path);