srctree

Andrew Kelley parent 63ea3e17 9fec608b 6c2eb0f1
Merge pull request #19005 from squeek502/wtf

Fix handling of Windows (WTF-16) and WASI (UTF-8) paths, etc

inline split

deps/aro/aro/Compilation.zig added: 1932, removed: 517, total 1415

@@ -69,7 +69,7 @@ pub const Environment = struct {

const val: ?[]const u8 = std.process.getEnvVarOwned(allocator, env_var_name) catch |err| switch (err) {

error.OutOfMemory => |e| return e,

error.EnvironmentVariableNotFound => null,

error.InvalidUtf8 => null,

error.InvalidWtf8 => null,

};

@field(env, field.name) = val;

}

deps/aro/aro/Driver.zig added: 1932, removed: 517, total 1415

@@ -523,7 +523,8 @@ pub fn errorDescription(e: anyerror) []const u8 {

error.NotDir => "is not a directory",

error.NotOpenForReading => "file is not open for reading",

error.NotOpenForWriting => "file is not open for writing",

error.InvalidUtf8 => "input is not valid UTF-8",

error.InvalidUtf8 => "path is not valid UTF-8",

error.InvalidWtf8 => "path is not valid WTF-8",

error.FileBusy => "file is busy",

error.NameTooLong => "file name is too long",

error.AccessDenied => "access denied",

lib/std/Build/Cache.zig added: 1932, removed: 517, total 1415

@@ -162,7 +162,7 @@ fn findPrefixResolved(cache: *const Cache, resolved_path: []u8) !PrefixedPath {

fn getPrefixSubpath(allocator: Allocator, prefix: []const u8, path: []u8) ![]u8 {

const relative = try std.fs.path.relative(allocator, prefix, path);

errdefer allocator.free(relative);

var component_iterator = std.fs.path.NativeUtf8ComponentIterator.init(relative) catch {

var component_iterator = std.fs.path.NativeComponentIterator.init(relative) catch {

return error.NotASubPath;

};

if (component_iterator.root() != null) {

lib/std/Thread.zig added: 1932, removed: 517, total 1415

@@ -91,7 +91,7 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void {

.windows => {

var buf: [max_name_len]u16 = undefined;

const len = try std.unicode.utf8ToUtf16Le(&buf, name);

const len = try std.unicode.wtf8ToWtf16Le(&buf, name);

const byte_len = math.cast(c_ushort, len * 2) orelse return error.NameTooLong;

// Note: NT allocates its own copy, no use-after-free here.

@@ -157,17 +157,12 @@ pub fn setName(self: Thread, name: []const u8) SetNameError!void {

}

pub const GetNameError = error{

// For Windows, the name is converted from UTF16 to UTF8

CodepointTooLarge,

Utf8CannotEncodeSurrogateHalf,

DanglingSurrogateHalf,

ExpectedSecondSurrogateHalf,

UnexpectedSecondSurrogateHalf,

Unsupported,

Unexpected,

} || os.PrctlError || os.ReadError || std.fs.File.OpenError || std.fmt.BufPrintError;

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]const u8 {

buffer_ptr[max_name_len] = 0;

var buffer: [:0]u8 = buffer_ptr;

@@ -213,7 +208,7 @@ pub fn getName(self: Thread, buffer_ptr: *[max_name_len:0]u8) GetNameError!?[]co

)) {

.SUCCESS => {

const string = @as(*const os.windows.UNICODE_STRING, @ptrCast(&buf));

const len = try std.unicode.utf16leToUtf8(buffer, string.Buffer[0 .. string.Length / 2]);

const len = std.unicode.wtf16LeToWtf8(buffer, string.Buffer[0 .. string.Length / 2]);

return if (len > 0) buffer[0..len] else null;

.NOT_IMPLEMENTED => return error.Unsupported,

lib/std/child_process.zig added: 1932, removed: 517, total 1415

@@ -129,10 +129,9 @@ pub const ChildProcess = struct {

/// POSIX-only. `StdIo.Ignore` was selected and opening `/dev/null` returned ENODEV.

NoDevice,

/// Windows-only. One of:

/// * `cwd` was provided and it could not be re-encoded into UTF16LE, or

/// * The `PATH` or `PATHEXT` environment variable contained invalid UTF-8.

InvalidUtf8,

/// Windows-only. `cwd` or `argv` was provided and it was invalid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

/// Windows-only. `cwd` was provided, but the path did not exist when spawning the child process.

CurrentWorkingDirectoryUnlinked,

@@ -767,7 +766,7 @@ pub const ChildProcess = struct {

};

var piProcInfo: windows.PROCESS_INFORMATION = undefined;

const cwd_w = if (self.cwd) |cwd| try unicode.utf8ToUtf16LeWithNull(self.allocator, cwd) else null;

const cwd_w = if (self.cwd) |cwd| try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd) else null;

defer if (cwd_w) |cwd| self.allocator.free(cwd);

const cwd_w_ptr = if (cwd_w) |cwd| cwd.ptr else null;

@@ -775,8 +774,8 @@ pub const ChildProcess = struct {

defer if (maybe_envp_buf) |envp_buf| self.allocator.free(envp_buf);

const envp_ptr = if (maybe_envp_buf) |envp_buf| envp_buf.ptr else null;

const app_name_utf8 = self.argv[0];

const app_name_is_absolute = fs.path.isAbsolute(app_name_utf8);

const app_name_wtf8 = self.argv[0];

const app_name_is_absolute = fs.path.isAbsolute(app_name_wtf8);

// the cwd set in ChildProcess is in effect when choosing the executable path

// to match posix semantics

@@ -785,11 +784,11 @@ pub const ChildProcess = struct {

// If the app name is absolute, then we need to use its dirname as the cwd

if (app_name_is_absolute) {

cwd_path_w_needs_free = true;

const dir = fs.path.dirname(app_name_utf8).?;

break :x try unicode.utf8ToUtf16LeWithNull(self.allocator, dir);

const dir = fs.path.dirname(app_name_wtf8).?;

break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, dir);

} else if (self.cwd) |cwd| {

cwd_path_w_needs_free = true;

break :x try unicode.utf8ToUtf16LeWithNull(self.allocator, cwd);

break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, cwd);

} else {

break :x &[_:0]u16{}; // empty for cwd

}

@@ -800,19 +799,19 @@ pub const ChildProcess = struct {

// into the basename and dirname and use the dirname as an addition to the cwd

// path. This is because NtQueryDirectoryFile cannot accept FileName params with

// path separators.

const app_basename_utf8 = fs.path.basename(app_name_utf8);

const app_basename_wtf8 = fs.path.basename(app_name_wtf8);

// If the app name is absolute, then the cwd will already have the app's dirname in it,

// so only populate app_dirname if app name is a relative path with > 0 path separators.

const maybe_app_dirname_utf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_utf8) else null;

const maybe_app_dirname_wtf8 = if (!app_name_is_absolute) fs.path.dirname(app_name_wtf8) else null;

const app_dirname_w: ?[:0]u16 = x: {

if (maybe_app_dirname_utf8) |app_dirname_utf8| {

break :x try unicode.utf8ToUtf16LeWithNull(self.allocator, app_dirname_utf8);

if (maybe_app_dirname_wtf8) |app_dirname_wtf8| {

break :x try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_dirname_wtf8);

}

break :x null;

};

defer if (app_dirname_w != null) self.allocator.free(app_dirname_w.?);

const app_name_w = try unicode.utf8ToUtf16LeWithNull(self.allocator, app_basename_utf8);

const app_name_w = try unicode.wtf8ToWtf16LeAllocZ(self.allocator, app_basename_wtf8);

defer self.allocator.free(app_name_w);

const cmd_line_w = argvToCommandLineWindows(self.allocator, self.argv) catch |err| switch (err) {

@@ -1173,7 +1172,7 @@ const CreateProcessSupportedExtension = enum {

exe,

};

/// Case-insensitive UTF-16 lookup

/// Case-insensitive WTF-16 lookup

fn windowsCreateProcessSupportsExtension(ext: []const u16) ?CreateProcessSupportedExtension {

if (ext.len != 4) return null;

const State = enum {

@@ -1237,7 +1236,7 @@ test "windowsCreateProcessSupportsExtension" {

try std.testing.expect(windowsCreateProcessSupportsExtension(&[_]u16{ '.', 'e', 'X', 'e', 'c' }) == null);

}

pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidUtf8, InvalidArg0 };

pub const ArgvToCommandLineError = error{ OutOfMemory, InvalidWtf8, InvalidArg0 };

/// Serializes `argv` to a Windows command-line string suitable for passing to a child process and

/// parsing by the `CommandLineToArgvW` algorithm. The caller owns the returned slice.

@@ -1320,7 +1319,7 @@ pub fn argvToCommandLineWindows(

}

return try unicode.utf8ToUtf16LeWithNull(allocator, buf.items);

return try unicode.wtf8ToWtf16LeAllocZ(allocator, buf.items);

}

test "argvToCommandLineWindows" {

@@ -1386,7 +1385,7 @@ fn testArgvToCommandLineWindows(argv: []const []const u8, expected_cmd_line: []c

const cmd_line_w = try argvToCommandLineWindows(std.testing.allocator, argv);

defer std.testing.allocator.free(cmd_line_w);

const cmd_line = try unicode.utf16leToUtf8Alloc(std.testing.allocator, cmd_line_w);

const cmd_line = try unicode.wtf16LeToWtf8Alloc(std.testing.allocator, cmd_line_w);

defer std.testing.allocator.free(cmd_line);

try std.testing.expectEqualStrings(expected_cmd_line, cmd_line);

@@ -1424,7 +1423,7 @@ fn windowsMakeAsyncPipe(rd: *?windows.HANDLE, wr: *?windows.HANDLE, sattr: *cons

"\\\\.\\pipe\\zig-childprocess-{d}-{d}",

.{ windows.kernel32.GetCurrentProcessId(), pipe_name_counter.fetchAdd(1, .Monotonic) },

) catch unreachable;

const len = std.unicode.utf8ToUtf16Le(&tmp_bufw, pipe_path) catch unreachable;

const len = std.unicode.wtf8ToWtf16Le(&tmp_bufw, pipe_path) catch unreachable;

tmp_bufw[len] = 0;

break :blk tmp_bufw[0..len :0];

};

@@ -1521,10 +1520,10 @@ pub fn createWindowsEnvBlock(allocator: mem.Allocator, env_map: *const EnvMap) !

var it = env_map.iterator();

var i: usize = 0;

while (it.next()) |pair| {

i += try unicode.utf8ToUtf16Le(result[i..], pair.key_ptr.*);

i += try unicode.wtf8ToWtf16Le(result[i..], pair.key_ptr.*);

result[i] = '=';

i += 1;

i += try unicode.utf8ToUtf16Le(result[i..], pair.value_ptr.*);

i += try unicode.wtf8ToWtf16Le(result[i..], pair.value_ptr.*);

result[i] = 0;

i += 1;

}

lib/std/fs.zig added: 1932, removed: 517, total 1415

@@ -31,18 +31,21 @@ pub const realpathW = os.realpathW;

pub const getAppDataDir = @import("fs/get_app_data_dir.zig").getAppDataDir;

pub const GetAppDataDirError = @import("fs/get_app_data_dir.zig").GetAppDataDirError;

/// This represents the maximum size of a UTF-8 encoded file path that the

/// This represents the maximum size of a `[]u8` file path that the

/// operating system will accept. Paths, including those returned from file

/// system operations, may be longer than this length, but such paths cannot

/// be successfully passed back in other file system operations. However,

/// all path components returned by file system operations are assumed to

/// fit into a UTF-8 encoded array of this length.

/// fit into a `u8` array of this length.

/// The byte count includes room for a null sentinel byte.

/// On Windows, `[]u8` file paths are encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `[]u8` file paths are encoded as valid UTF-8.

/// On other platforms, `[]u8` file paths are opaque sequences of bytes with no particular encoding.

pub const MAX_PATH_BYTES = switch (builtin.os.tag) {

.linux, .macos, .ios, .freebsd, .openbsd, .netbsd, .dragonfly, .haiku, .solaris, .illumos, .plan9, .emscripten => os.PATH_MAX,

// Each UTF-16LE character may be expanded to 3 UTF-8 bytes.

// If it would require 4 UTF-8 bytes, then there would be a surrogate

// pair in the UTF-16LE, and we (over)account 3 bytes for it that way.

// Each WTF-16LE code unit may be expanded to 3 WTF-8 bytes.

// If it would require 4 WTF-8 bytes, then there would be a surrogate

// pair in the WTF-16LE, and we (over)account 3 bytes for it that way.

// +1 for the null byte at the end, which can be encoded in 1 byte.

.windows => os.windows.PATH_MAX_WIDE * 3 + 1,

// TODO work out what a reasonable value we should use here

@@ -53,18 +56,21 @@ pub const MAX_PATH_BYTES = switch (builtin.os.tag) {

@compileError("PATH_MAX not implemented for " ++ @tagName(builtin.os.tag)),

};

/// This represents the maximum size of a UTF-8 encoded file name component that

/// This represents the maximum size of a `[]u8` file name component that

/// the platform's common file systems support. File name components returned by file system

/// operations are likely to fit into a UTF-8 encoded array of this length, but

/// operations are likely to fit into a `u8` array of this length, but

/// (depending on the platform) this assumption may not hold for every configuration.

/// The byte count does not include a null sentinel byte.

/// On Windows, `[]u8` file name components are encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, file name components are encoded as valid UTF-8.

/// On other platforms, `[]u8` components are an opaque sequence of bytes with no particular encoding.

pub const MAX_NAME_BYTES = switch (builtin.os.tag) {

.linux, .macos, .ios, .freebsd, .openbsd, .netbsd, .dragonfly, .solaris, .illumos => os.NAME_MAX,

// Haiku's NAME_MAX includes the null terminator, so subtract one.

.haiku => os.NAME_MAX - 1,

// Each UTF-16LE character may be expanded to 3 UTF-8 bytes.

// If it would require 4 UTF-8 bytes, then there would be a surrogate

// pair in the UTF-16LE, and we (over)account 3 bytes for it that way.

// Each WTF-16LE character may be expanded to 3 WTF-8 bytes.

// If it would require 4 WTF-8 bytes, then there would be a surrogate

// pair in the WTF-16LE, and we (over)account 3 bytes for it that way.

.windows => os.windows.NAME_MAX * 3,

// For WASI, the MAX_NAME will depend on the host OS, so it needs to be

// as large as the largest MAX_NAME_BYTES (Windows) in order to work on any host OS.

@@ -86,6 +92,9 @@ pub const base64_decoder = base64.Base64Decoder.init(base64_alphabet, null);

/// TODO remove the allocator requirement from this API

/// TODO move to Dir

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn atomicSymLink(allocator: Allocator, existing_path: []const u8, new_path: []const u8) !void {

if (cwd().symLink(existing_path, new_path, .{})) {

return;

@@ -117,6 +126,9 @@ pub fn atomicSymLink(allocator: Allocator, existing_path: []const u8, new_path:

/// Same as `Dir.updateFile`, except asserts that both `source_path` and `dest_path`

/// are absolute. See `Dir.updateFile` for a function that operates on both

/// absolute and relative paths.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn updateFileAbsolute(

source_path: []const u8,

dest_path: []const u8,

@@ -131,6 +143,9 @@ pub fn updateFileAbsolute(

/// Same as `Dir.copyFile`, except asserts that both `source_path` and `dest_path`

/// are absolute. See `Dir.copyFile` for a function that operates on both

/// absolute and relative paths.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn copyFileAbsolute(

source_path: []const u8,

dest_path: []const u8,

@@ -145,24 +160,30 @@ pub fn copyFileAbsolute(

/// Create a new directory, based on an absolute path.

/// Asserts that the path is absolute. See `Dir.makeDir` for a function that operates

/// on both absolute and relative paths.

/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `absolute_path` should be encoded as valid UTF-8.

/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.

pub fn makeDirAbsolute(absolute_path: []const u8) !void {

assert(path.isAbsolute(absolute_path));

return os.mkdir(absolute_path, Dir.default_mode);

}

/// Same as `makeDirAbsolute` except the parameter is a null-terminated UTF-8-encoded string.

/// Same as `makeDirAbsolute` except the parameter is null-terminated.

pub fn makeDirAbsoluteZ(absolute_path_z: [*:0]const u8) !void {

assert(path.isAbsoluteZ(absolute_path_z));

return os.mkdirZ(absolute_path_z, Dir.default_mode);

}

/// Same as `makeDirAbsolute` except the parameter is a null-terminated WTF-16-encoded string.

/// Same as `makeDirAbsolute` except the parameter is a null-terminated WTF-16 LE-encoded string.

pub fn makeDirAbsoluteW(absolute_path_w: [*:0]const u16) !void {

assert(path.isAbsoluteWindowsW(absolute_path_w));

return os.mkdirW(absolute_path_w, Dir.default_mode);

}

/// Same as `Dir.deleteDir` except the path is absolute.

/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `dir_path` should be encoded as valid UTF-8.

/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.

pub fn deleteDirAbsolute(dir_path: []const u8) !void {

assert(path.isAbsolute(dir_path));

return os.rmdir(dir_path);

@@ -181,6 +202,9 @@ pub fn deleteDirAbsoluteW(dir_path: [*:0]const u16) !void {

}

/// Same as `Dir.rename` except the paths are absolute.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn renameAbsolute(old_path: []const u8, new_path: []const u8) !void {

assert(path.isAbsolute(old_path));

assert(path.isAbsolute(new_path));

@@ -211,7 +235,7 @@ pub fn renameZ(old_dir: Dir, old_sub_path_z: [*:0]const u8, new_dir: Dir, new_su

return os.renameatZ(old_dir.fd, old_sub_path_z, new_dir.fd, new_sub_path_z);

}

/// Same as `rename` except the parameters are UTF16LE, NT prefixed.

/// Same as `rename` except the parameters are WTF16LE, NT prefixed.

/// This function is Windows-only.

pub fn renameW(old_dir: Dir, old_sub_path_w: []const u16, new_dir: Dir, new_sub_path_w: []const u16) !void {

return os.renameatW(old_dir.fd, old_sub_path_w, new_dir.fd, new_sub_path_w);

@@ -240,6 +264,9 @@ pub fn defaultWasiCwd() std.os.wasi.fd_t {

/// See `openDirAbsoluteZ` for a function that accepts a null-terminated path.

///

/// Asserts that the path parameter has no null bytes.

/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `absolute_path` should be encoded as valid UTF-8.

/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.

pub fn openDirAbsolute(absolute_path: []const u8, flags: Dir.OpenDirOptions) File.OpenError!Dir {

assert(path.isAbsolute(absolute_path));

return cwd().openDir(absolute_path, flags);

@@ -262,6 +289,9 @@ pub fn openDirAbsoluteW(absolute_path_c: [*:0]const u16, flags: Dir.OpenDirOptio

/// operates on both absolute and relative paths.

/// Asserts that the path parameter has no null bytes. See `openFileAbsoluteZ` for a function

/// that accepts a null-terminated path.

/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `absolute_path` should be encoded as valid UTF-8.

/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.

pub fn openFileAbsolute(absolute_path: []const u8, flags: File.OpenFlags) File.OpenError!File {

assert(path.isAbsolute(absolute_path));

return cwd().openFile(absolute_path, flags);

@@ -280,11 +310,13 @@ pub fn openFileAbsoluteW(absolute_path_w: []const u16, flags: File.OpenFlags) Fi

}

/// Test accessing `path`.

/// `path` is UTF-8-encoded.

/// Be careful of Time-Of-Check-Time-Of-Use race conditions when using this function.

/// For example, instead of testing if a file exists and then opening it, just

/// open it and handle the error for file not found.

/// See `accessAbsoluteZ` for a function that accepts a null-terminated path.

/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `absolute_path` should be encoded as valid UTF-8.

/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.

pub fn accessAbsolute(absolute_path: []const u8, flags: File.OpenFlags) Dir.AccessError!void {

assert(path.isAbsolute(absolute_path));

try cwd().access(absolute_path, flags);

@@ -306,6 +338,9 @@ pub fn accessAbsoluteW(absolute_path: [*:0]const u16, flags: File.OpenFlags) Dir

/// operates on both absolute and relative paths.

/// Asserts that the path parameter has no null bytes. See `createFileAbsoluteC` for a function

/// that accepts a null-terminated path.

/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `absolute_path` should be encoded as valid UTF-8.

/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.

pub fn createFileAbsolute(absolute_path: []const u8, flags: File.CreateFlags) File.OpenError!File {

assert(path.isAbsolute(absolute_path));

return cwd().createFile(absolute_path, flags);

@@ -327,6 +362,9 @@ pub fn createFileAbsoluteW(absolute_path_w: [*:0]const u16, flags: File.CreateFl

/// Asserts that the path is absolute. See `Dir.deleteFile` for a function that

/// operates on both absolute and relative paths.

/// Asserts that the path parameter has no null bytes.

/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `absolute_path` should be encoded as valid UTF-8.

/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.

pub fn deleteFileAbsolute(absolute_path: []const u8) Dir.DeleteFileError!void {

assert(path.isAbsolute(absolute_path));

return cwd().deleteFile(absolute_path);

@@ -349,6 +387,9 @@ pub fn deleteFileAbsoluteW(absolute_path_w: [*:0]const u16) Dir.DeleteFileError!

/// Asserts that the path is absolute. See `Dir.deleteTree` for a function that

/// operates on both absolute and relative paths.

/// Asserts that the path parameter has no null bytes.

/// On Windows, `absolute_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `absolute_path` should be encoded as valid UTF-8.

/// On other platforms, `absolute_path` is an opaque sequence of bytes with no particular encoding.

pub fn deleteTreeAbsolute(absolute_path: []const u8) !void {

assert(path.isAbsolute(absolute_path));

const dirname = path.dirname(absolute_path) orelse return error{

@@ -364,6 +405,9 @@ pub fn deleteTreeAbsolute(absolute_path: []const u8) !void {

}

/// Same as `Dir.readLink`, except it asserts the path is absolute.

/// On Windows, `pathname` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `pathname` should be encoded as valid UTF-8.

/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.

pub fn readLinkAbsolute(pathname: []const u8, buffer: *[MAX_PATH_BYTES]u8) ![]u8 {

assert(path.isAbsolute(pathname));

return os.readlink(pathname, buffer);

@@ -387,6 +431,9 @@ pub fn readLinkAbsoluteZ(pathname_c: [*:0]const u8, buffer: *[MAX_PATH_BYTES]u8)

/// one; the latter case is known as a dangling link.

/// If `sym_link_path` exists, it will not be overwritten.

/// See also `symLinkAbsoluteZ` and `symLinkAbsoluteW`.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn symLinkAbsolute(

target_path: []const u8,

sym_link_path: []const u8,

@@ -402,7 +449,7 @@ pub fn symLinkAbsolute(

return os.symlink(target_path, sym_link_path);

}

/// Windows-only. Same as `symLinkAbsolute` except the parameters are null-terminated, WTF16 encoded.

/// Windows-only. Same as `symLinkAbsolute` except the parameters are null-terminated, WTF16 LE encoded.

/// Note that this function will by default try creating a symbolic link to a file. If you would

/// like to create a symbolic link to a directory, specify this with `SymLinkFlags{ .is_directory = true }`.

/// See also `symLinkAbsolute`, `symLinkAbsoluteZ`.

@@ -426,27 +473,14 @@ pub fn symLinkAbsoluteZ(

assert(path.isAbsoluteZ(target_path_c));

assert(path.isAbsoluteZ(sym_link_path_c));

if (builtin.os.tag == .windows) {

const target_path_w = try os.windows.cStrToWin32PrefixedFileW(target_path_c);

const sym_link_path_w = try os.windows.cStrToWin32PrefixedFileW(sym_link_path_c);

return os.windows.CreateSymbolicLink(sym_link_path_w.span(), target_path_w.span(), flags.is_directory);

const target_path_w = try os.windows.cStrToPrefixedFileW(null, target_path_c);

const sym_link_path_w = try os.windows.cStrToPrefixedFileW(null, sym_link_path_c);

return os.windows.CreateSymbolicLink(null, sym_link_path_w.span(), target_path_w.span(), flags.is_directory);

}

return os.symlinkZ(target_path_c, sym_link_path_c);

}

pub const OpenSelfExeError = error{

SharingViolation,

PathAlreadyExists,

FileNotFound,

AccessDenied,

PipeBusy,

NameTooLong,

/// On Windows, file paths must be valid Unicode.

InvalidUtf8,

/// On Windows, file paths cannot contain these characters:

/// '/', '*', '?', '"', '<', '>', '|'

BadPathName,

Unexpected,

} || os.OpenError || SelfExePathError || os.FlockError;

pub const OpenSelfExeError = os.OpenError || SelfExePathError || os.FlockError;

pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File {

if (builtin.os.tag == .linux) {

@@ -469,7 +503,45 @@ pub fn openSelfExe(flags: File.OpenFlags) OpenSelfExeError!File {

return openFileAbsoluteZ(buf[0..self_exe_path.len :0].ptr, flags);

}

pub const SelfExePathError = os.ReadLinkError || os.SysCtlError || os.RealPathError;

// This is os.ReadLinkError || os.RealPathError with impossible errors excluded

pub const SelfExePathError = error{

FileNotFound,

AccessDenied,

NameTooLong,

NotSupported,

NotDir,

SymLinkLoop,

InputOutput,

FileTooBig,

IsDir,

ProcessFdQuotaExceeded,

SystemFdQuotaExceeded,

NoDevice,

SystemResources,

NoSpaceLeft,

FileSystem,

BadPathName,

DeviceBusy,

SharingViolation,

PipeBusy,

NotLink,

PathAlreadyExists,

InvalidHandle,

/// On Windows, `\\server` or `\\server\share` was not found.

NetworkNotFound,

/// On Windows, antivirus software is enabled by default. It can be

/// disabled, but Windows Update sometimes ignores the user's preference

/// and re-enables it. When enabled, antivirus software on Windows

/// intercepts file system operations and makes them significantly slower

/// in addition to possibly failing with this error code.

AntivirusInterference,

/// On Windows, the volume does not contain a recognized file system. File

/// system drivers might not be loaded, or the volume may be corrupt.

UnrecognizedVolume,

} || os.SysCtlError;

/// `selfExePath` except allocates the result on the heap.

/// Caller owns returned memory.

@@ -491,6 +563,8 @@ pub fn selfExePathAlloc(allocator: Allocator) ![]u8 {

/// This function may return an error if the current executable

/// was deleted after spawning.

/// Returned value is a slice of out_buffer.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

///

/// On Linux, depends on procfs being mounted. If the currently executing binary has

/// been deleted, the file path looks something like `/a/b/c/exe (deleted)`.

@@ -505,15 +579,31 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {

if (rc != 0) return error.NameTooLong;

var real_path_buf: [MAX_PATH_BYTES]u8 = undefined;

const real_path = try std.os.realpathZ(&symlink_path_buf, &real_path_buf);

const real_path = std.os.realpathZ(&symlink_path_buf, &real_path_buf) catch |err| switch (err) {

error.InvalidWtf8 => unreachable, // Windows-only

error.NetworkNotFound => unreachable, // Windows-only

else => |e| return e,

};

if (real_path.len > out_buffer.len) return error.NameTooLong;

const result = out_buffer[0..real_path.len];

@memcpy(result, real_path);

return result;

}

switch (builtin.os.tag) {

.linux => return os.readlinkZ("/proc/self/exe", out_buffer),

.solaris, .illumos => return os.readlinkZ("/proc/self/path/a.out", out_buffer),

.linux => return os.readlinkZ("/proc/self/exe", out_buffer) catch |err| switch (err) {

error.InvalidUtf8 => unreachable, // WASI-only

error.InvalidWtf8 => unreachable, // Windows-only

error.UnsupportedReparsePointType => unreachable, // Windows-only

error.NetworkNotFound => unreachable, // Windows-only

else => |e| return e,

.solaris, .illumos => return os.readlinkZ("/proc/self/path/a.out", out_buffer) catch |err| switch (err) {

error.InvalidUtf8 => unreachable, // WASI-only

error.InvalidWtf8 => unreachable, // Windows-only

error.UnsupportedReparsePointType => unreachable, // Windows-only

error.NetworkNotFound => unreachable, // Windows-only

else => |e| return e,

.freebsd, .dragonfly => {

var mib = [4]c_int{ os.CTL.KERN, os.KERN.PROC, os.KERN.PROC_PATHNAME, -1 };

var out_len: usize = out_buffer.len;

@@ -537,7 +627,11 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {

if (mem.indexOf(u8, argv0, "/") != null) {

// argv[0] is a path (relative or absolute): use realpath(3) directly

var real_path_buf: [MAX_PATH_BYTES]u8 = undefined;

const real_path = try os.realpathZ(os.argv[0], &real_path_buf);

const real_path = os.realpathZ(os.argv[0], &real_path_buf) catch |err| switch (err) {

error.InvalidWtf8 => unreachable, // Windows-only

error.NetworkNotFound => unreachable, // Windows-only

else => |e| return e,

};

if (real_path.len > out_buffer.len)

return error.NameTooLong;

const result = out_buffer[0..real_path.len];

@@ -575,7 +669,10 @@ pub fn selfExePath(out_buffer: []u8) SelfExePathError![]u8 {

// symlink, not the path that the symlink points to. We want the path

// that the symlink points to, though, so we need to get the realpath.

const pathname_w = try os.windows.wToPrefixedFileW(null, image_path_name);

return std.fs.cwd().realpathW(pathname_w.span(), out_buffer);

return std.fs.cwd().realpathW(pathname_w.span(), out_buffer) catch |err| switch (err) {

error.InvalidWtf8 => unreachable,

else => |e| return e,

};

else => @compileError("std.fs.selfExePath not supported for this target"),

}

@@ -599,6 +696,8 @@ pub fn selfExeDirPathAlloc(allocator: Allocator) ![]u8 {

/// Get the directory path that contains the current executable.

/// Returned value is a slice of out_buffer.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 {

const self_exe_path = try selfExePath(out_buffer);

// Assume that the OS APIs return absolute paths, and therefore dirname

@@ -607,6 +706,8 @@ pub fn selfExeDirPath(out_buffer: []u8) SelfExePathError![]const u8 {

}

/// `realpath`, except caller must free the returned memory.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

/// See also `Dir.realpath`.

pub fn realpathAlloc(allocator: Allocator, pathname: []const u8) ![]u8 {

// Use of MAX_PATH_BYTES here is valid as the realpath function does not

lib/std/fs/Dir.zig added: 1932, removed: 517, total 1415

@@ -9,7 +9,14 @@ pub const Entry = struct {

pub const Kind = File.Kind;

};

const IteratorError = error{ AccessDenied, SystemResources } || posix.UnexpectedError;

const IteratorError = error{

AccessDenied,

SystemResources,

/// WASI-only. The path of an entry could not be encoded as valid UTF-8.

/// WASI is unable to handle paths that cannot be encoded as well-formed UTF-8.

/// https://github.com/WebAssembly/wasi-filesystem/issues/17#issuecomment-1430639353

InvalidUtf8,

} || posix.UnexpectedError;

pub const Iterator = switch (builtin.os.tag) {

.macos, .ios, .freebsd, .netbsd, .dragonfly, .openbsd, .solaris, .illumos => struct {

@@ -445,13 +452,12 @@ pub const Iterator = switch (builtin.os.tag) {

self.index = self.buf.len;

}

const name_utf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2];

const name_wtf16le = @as([*]u16, @ptrCast(&dir_info.FileName))[0 .. dir_info.FileNameLength / 2];

if (mem.eql(u16, name_utf16le, &[_]u16{'.'}) or mem.eql(u16, name_utf16le, &[_]u16{ '.', '.' }))

if (mem.eql(u16, name_wtf16le, &[_]u16{'.'}) or mem.eql(u16, name_wtf16le, &[_]u16{ '.', '.' }))

continue;

// Trust that Windows gives us valid UTF-16LE

const name_utf8_len = std.unicode.utf16leToUtf8(self.name_data[0..], name_utf16le) catch unreachable;

const name_utf8 = self.name_data[0..name_utf8_len];

const name_wtf8_len = std.unicode.wtf16LeToWtf8(self.name_data[0..], name_wtf16le);

const name_wtf8 = self.name_data[0..name_wtf8_len];

const kind: Entry.Kind = blk: {

const attrs = dir_info.FileAttributes;

if (attrs & w.FILE_ATTRIBUTE_DIRECTORY != 0) break :blk .directory;

@@ -459,7 +465,7 @@ pub const Iterator = switch (builtin.os.tag) {

break :blk .file;

};

return Entry{

.name = name_utf8,

.name = name_wtf8,

.kind = kind,

};

}

@@ -516,6 +522,7 @@ pub const Iterator = switch (builtin.os.tag) {

.INVAL => unreachable,

.NOENT => return error.DirNotFound, // The directory being iterated was deleted during iteration.

.NOTCAPABLE => return error.AccessDenied,

.ILSEQ => return error.InvalidUtf8, // An entry's name cannot be encoded as UTF-8.

else => |err| return posix.unexpectedErrno(err),

}

if (bufused == 0) return null;

@@ -743,7 +750,11 @@ pub const OpenError = error{

SystemFdQuotaExceeded,

NoDevice,

SystemResources,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

BadPathName,

DeviceBusy,

/// On Windows, `\\server` or `\\server\share` was not found.

@@ -759,6 +770,9 @@ pub fn close(self: *Dir) void {

/// To create a new file, see `createFile`.

/// Call `File.close` to release the resource.

/// Asserts that the path parameter has no null bytes.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

pub fn openFile(self: Dir, sub_path: []const u8, flags: File.OpenFlags) File.OpenError!File {

if (builtin.os.tag == .windows) {

const path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sub_path);

@@ -911,6 +925,9 @@ pub fn openFileW(self: Dir, sub_path_w: []const u16, flags: File.OpenFlags) File

/// Creates, opens, or overwrites a file with write access.

/// Call `File.close` on the result when done.

/// Asserts that the path parameter has no null bytes.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

pub fn createFile(self: Dir, sub_path: []const u8, flags: File.CreateFlags) File.OpenError!File {

if (builtin.os.tag == .windows) {

const path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sub_path);

@@ -1060,18 +1077,21 @@ pub fn createFileW(self: Dir, sub_path_w: []const u16, flags: File.CreateFlags)

/// Creates a single directory with a relative or absolute path.

/// To create multiple directories to make an entire path, see `makePath`.

/// To operate on only absolute paths, see `makeDirAbsolute`.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

pub fn makeDir(self: Dir, sub_path: []const u8) !void {

try posix.mkdirat(self.fd, sub_path, default_mode);

}

/// Creates a single directory with a relative or absolute null-terminated UTF-8-encoded path.

/// Same as `makeDir`, but `sub_path` is null-terminated.

/// To create multiple directories to make an entire path, see `makePath`.

/// To operate on only absolute paths, see `makeDirAbsoluteZ`.

pub fn makeDirZ(self: Dir, sub_path: [*:0]const u8) !void {

try posix.mkdiratZ(self.fd, sub_path, default_mode);

}

/// Creates a single directory with a relative or absolute null-terminated WTF-16-encoded path.

/// Creates a single directory with a relative or absolute null-terminated WTF-16 LE-encoded path.

/// To create multiple directories to make an entire path, see `makePath`.

/// To operate on only absolute paths, see `makeDirAbsoluteW`.

pub fn makeDirW(self: Dir, sub_path: [*:0]const u16) !void {

@@ -1083,6 +1103,9 @@ pub fn makeDirW(self: Dir, sub_path: [*:0]const u16) !void {

/// Returns success if the path already exists and is a directory.

/// This function is not atomic, and if it returns an error, the file system may

/// have been modified regardless.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

///

/// Paths containing `..` components are handled differently depending on the platform:

/// - On Windows, `..` are resolved before the path is passed to NtCreateFile, meaning

@@ -1119,16 +1142,17 @@ pub fn makePath(self: Dir, sub_path: []const u8) !void {

}

/// Calls makeOpenDirAccessMaskW iteratively to make an entire path

/// Windows only. Calls makeOpenDirAccessMaskW iteratively to make an entire path

/// (i.e. creating any parent directories that do not exist).

/// Opens the dir if the path already exists and is a directory.

/// This function is not atomic, and if it returns an error, the file system may

/// have been modified regardless.

/// `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

fn makeOpenPathAccessMaskW(self: Dir, sub_path: []const u8, access_mask: u32, no_follow: bool) OpenError!Dir {

const w = std.os.windows;

var it = try fs.path.componentIterator(sub_path);

// If there are no components in the path, then create a dummy component with the full path.

var component = it.last() orelse fs.path.NativeUtf8ComponentIterator.Component{

var component = it.last() orelse fs.path.NativeComponentIterator.Component{

.name = "",

.path = sub_path,

};

@@ -1156,7 +1180,9 @@ fn makeOpenPathAccessMaskW(self: Dir, sub_path: []const u8, access_mask: u32, no

/// This function performs `makePath`, followed by `openDir`.

/// If supported by the OS, this operation is atomic. It is not atomic on

/// all operating systems.

/// On Windows, this function performs `makeOpenPathAccessMaskW`.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

pub fn makeOpenPath(self: Dir, sub_path: []const u8, open_dir_options: OpenDirOptions) !Dir {

return switch (builtin.os.tag) {

.windows => {

@@ -1185,6 +1211,10 @@ pub const RealPathError = posix.RealPathError;

/// `pathname` relative to this `Dir`. If `pathname` is absolute, ignores this

/// `Dir` handle and returns the canonicalized absolute pathname of `pathname`

/// argument.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

/// This function is not universally supported by all platforms.

/// Currently supported hosts are: Linux, macOS, and Windows.

/// See also `Dir.realpathZ`, `Dir.realpathW`, and `Dir.realpathAlloc`.

@@ -1224,6 +1254,7 @@ pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) RealPathE

error.FileLocksNotSupported => return error.Unexpected,

error.FileBusy => return error.Unexpected,

error.WouldBlock => return error.Unexpected,

error.InvalidUtf8 => unreachable, // WASI-only

else => |e| return e,

};

defer posix.close(fd);

@@ -1246,7 +1277,8 @@ pub fn realpathZ(self: Dir, pathname: [*:0]const u8, out_buffer: []u8) RealPathE

return result;

}

/// Windows-only. Same as `Dir.realpath` except `pathname` is WTF16 encoded.

/// Windows-only. Same as `Dir.realpath` except `pathname` is WTF16 LE encoded.

/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// See also `Dir.realpath`, `realpathW`.

pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) RealPathError![]u8 {

const w = std.os.windows;

@@ -1272,16 +1304,7 @@ pub fn realpathW(self: Dir, pathname: []const u16, out_buffer: []u8) RealPathErr

var wide_buf: [w.PATH_MAX_WIDE]u16 = undefined;

const wide_slice = try w.GetFinalPathNameByHandle(h_file, .{}, &wide_buf);

var big_out_buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const end_index = std.unicode.utf16leToUtf8(&big_out_buf, wide_slice) catch |e| switch (e) {

// TODO: Windows file paths can be arbitrary arrays of u16 values and

// must not fail with InvalidUtf8.

error.DanglingSurrogateHalf,

error.ExpectedSecondSurrogateHalf,

error.UnexpectedSecondSurrogateHalf,

error.CodepointTooLarge,

error.Utf8CannotEncodeSurrogateHalf,

=> return error.InvalidUtf8,

};

const end_index = std.unicode.wtf16LeToWtf8(&big_out_buf, wide_slice);

if (end_index > out_buffer.len)

return error.NameTooLong;

const result = out_buffer[0..end_index];

@@ -1344,6 +1367,9 @@ pub const OpenDirOptions = struct {

/// open until `close` is called on the result.

/// The directory cannot be iterated unless the `iterate` option is set to `true`.

///

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

/// Asserts that the path parameter has no null bytes.

pub fn openDir(self: Dir, sub_path: []const u8, args: OpenDirOptions) OpenError!Dir {

switch (builtin.os.tag) {

@@ -1428,7 +1454,7 @@ pub fn openDirZ(self: Dir, sub_path_c: [*:0]const u8, args: OpenDirOptions) Open

}

/// Same as `openDir` except the path parameter is WTF-16 encoded, NT-prefixed.

/// Same as `openDir` except the path parameter is WTF-16 LE encoded, NT-prefixed.

/// This function asserts the target OS is Windows.

pub fn openDirW(self: Dir, sub_path_w: [*:0]const u16, args: OpenDirOptions) OpenError!Dir {

const w = std.os.windows;

@@ -1518,6 +1544,9 @@ fn makeOpenDirAccessMaskW(self: Dir, sub_path_w: [*:0]const u16, access_mask: u3

pub const DeleteFileError = posix.UnlinkError;

/// Delete a file name and possibly the file it refers to, based on an open directory handle.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

/// Asserts that the path parameter has no null bytes.

pub fn deleteFile(self: Dir, sub_path: []const u8) DeleteFileError!void {

if (builtin.os.tag == .windows) {

@@ -1553,7 +1582,7 @@ pub fn deleteFileZ(self: Dir, sub_path_c: [*:0]const u8) DeleteFileError!void {

};

}

/// Same as `deleteFile` except the parameter is WTF-16 encoded.

/// Same as `deleteFile` except the parameter is WTF-16 LE encoded.

pub fn deleteFileW(self: Dir, sub_path_w: []const u16) DeleteFileError!void {

posix.unlinkatW(self.fd, sub_path_w, 0) catch |err| switch (err) {

error.DirNotEmpty => unreachable, // not passing AT.REMOVEDIR

@@ -1572,7 +1601,11 @@ pub const DeleteDirError = error{

NotDir,

SystemResources,

ReadOnlyFileSystem,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

BadPathName,

/// On Windows, `\\server` or `\\server\share` was not found.

NetworkNotFound,

@@ -1581,6 +1614,9 @@ pub const DeleteDirError = error{

/// Returns `error.DirNotEmpty` if the directory is not empty.

/// To delete a directory recursively, see `deleteTree`.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

/// Asserts that the path parameter has no null bytes.

pub fn deleteDir(self: Dir, sub_path: []const u8) DeleteDirError!void {

if (builtin.os.tag == .windows) {

@@ -1605,7 +1641,7 @@ pub fn deleteDirZ(self: Dir, sub_path_c: [*:0]const u8) DeleteDirError!void {

};

}

/// Same as `deleteDir` except the parameter is UTF16LE, NT prefixed.

/// Same as `deleteDir` except the parameter is WTF16LE, NT prefixed.

/// This function is Windows-only.

pub fn deleteDirW(self: Dir, sub_path_w: []const u16) DeleteDirError!void {

posix.unlinkatW(self.fd, sub_path_w, posix.AT.REMOVEDIR) catch |err| switch (err) {

@@ -1620,6 +1656,9 @@ pub const RenameError = posix.RenameError;

/// If new_sub_path already exists, it will be replaced.

/// Renaming a file over an existing directory or a directory

/// over an existing file will fail with `error.IsDir` or `error.NotDir`

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn rename(self: Dir, old_sub_path: []const u8, new_sub_path: []const u8) RenameError!void {

return posix.renameat(self.fd, old_sub_path, self.fd, new_sub_path);

}

@@ -1629,7 +1668,7 @@ pub fn renameZ(self: Dir, old_sub_path_z: [*:0]const u8, new_sub_path_z: [*:0]co

return posix.renameatZ(self.fd, old_sub_path_z, self.fd, new_sub_path_z);

}

/// Same as `rename` except the parameters are UTF16LE, NT prefixed.

/// Same as `rename` except the parameters are WTF16LE, NT prefixed.

/// This function is Windows-only.

pub fn renameW(self: Dir, old_sub_path_w: []const u16, new_sub_path_w: []const u16) RenameError!void {

return posix.renameatW(self.fd, old_sub_path_w, self.fd, new_sub_path_w);

@@ -1647,6 +1686,9 @@ pub const SymLinkFlags = struct {

/// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent

/// one; the latter case is known as a dangling link.

/// If `sym_link_path` exists, it will not be overwritten.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn symLink(

self: Dir,

target_path: []const u8,

@@ -1662,7 +1704,7 @@ pub fn symLink(

// when converting to an NT namespaced path. CreateSymbolicLink in

// symLinkW will handle the necessary conversion.

var target_path_w: std.os.windows.PathSpace = undefined;

target_path_w.len = try std.unicode.utf8ToUtf16Le(&target_path_w.data, target_path);

target_path_w.len = try std.unicode.wtf8ToWtf16Le(&target_path_w.data, target_path);

target_path_w.data[target_path_w.len] = 0;

const sym_link_path_w = try std.os.windows.sliceToPrefixedFileW(self.fd, sym_link_path);

return self.symLinkW(target_path_w.span(), sym_link_path_w.span(), flags);

@@ -1698,7 +1740,7 @@ pub fn symLinkZ(

}

/// Windows-only. Same as `symLink` except the pathname parameters

/// are null-terminated, WTF16 encoded.

/// are WTF16 LE encoded.

pub fn symLinkW(

self: Dir,

/// WTF-16, does not need to be NT-prefixed. The NT-prefixing

@@ -1716,6 +1758,9 @@ pub const ReadLinkError = posix.ReadLinkError;

/// Read value of a symbolic link.

/// The return value is a slice of `buffer`, from index `0`.

/// Asserts that the path parameter has no null bytes.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

pub fn readLink(self: Dir, sub_path: []const u8, buffer: []u8) ReadLinkError![]u8 {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

return self.readLinkWasi(sub_path, buffer);

@@ -1733,7 +1778,7 @@ pub fn readLinkWasi(self: Dir, sub_path: []const u8, buffer: []u8) ![]u8 {

return posix.readlinkat(self.fd, sub_path, buffer);

}

/// Same as `readLink`, except the `pathname` parameter is null-terminated.

/// Same as `readLink`, except the `sub_path_c` parameter is null-terminated.

pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {

if (builtin.os.tag == .windows) {

const sub_path_w = try std.os.windows.cStrToPrefixedFileW(self.fd, sub_path_c);

@@ -1743,7 +1788,7 @@ pub fn readLinkZ(self: Dir, sub_path_c: [*:0]const u8, buffer: []u8) ![]u8 {

}

/// Windows-only. Same as `readLink` except the pathname parameter

/// is null-terminated, WTF16 encoded.

/// is WTF16 LE encoded.

pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 {

return std.os.windows.ReadLink(self.fd, sub_path_w, buffer);

}

@@ -1753,6 +1798,9 @@ pub fn readLinkW(self: Dir, sub_path_w: []const u16, buffer: []u8) ![]u8 {

/// the situation is ambiguous. It could either mean that the entire file was read, and

/// it exactly fits the buffer, or it could mean the buffer was not big enough for the

/// entire file.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 {

var file = try self.openFile(file_path, .{});

defer file.close();

@@ -1763,6 +1811,9 @@ pub fn readFile(self: Dir, file_path: []const u8, buffer: []u8) ![]u8 {

/// On success, caller owns returned buffer.

/// If the file is larger than `max_bytes`, returns `error.FileTooBig`.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8, max_bytes: usize) ![]u8 {

return self.readFileAllocOptions(allocator, file_path, max_bytes, null, @alignOf(u8), null);

}

@@ -1772,6 +1823,9 @@ pub fn readFileAlloc(self: Dir, allocator: mem.Allocator, file_path: []const u8,

/// If `size_hint` is specified the initial buffer size is calculated using

/// that value, otherwise the effective file size is used instead.

/// Allows specifying alignment and a sentinel value.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

pub fn readFileAllocOptions(

self: Dir,

allocator: mem.Allocator,

@@ -1811,9 +1865,13 @@ pub const DeleteTreeError = error{

/// This error is unreachable if `sub_path` does not contain a path separator.

NotDir,

/// On Windows, file paths must be valid Unicode.

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

/// On Windows, file paths cannot contain these characters:

/// '/', '*', '?', '"', '<', '>', '|'

BadPathName,

@@ -1826,6 +1884,9 @@ pub const DeleteTreeError = error{

/// removes it. If it cannot be removed because it is a non-empty directory,

/// this function recursively removes its entries and then tries again.

/// This operation is not atomic on most file systems.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {

var initial_iterable_dir = (try self.deleteTreeOpenInitialSubpath(sub_path, .file)) orelse return;

@@ -1879,6 +1940,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {

error.SystemResources,

error.Unexpected,

error.InvalidUtf8,

error.InvalidWtf8,

error.BadPathName,

error.NetworkNotFound,

error.DeviceBusy,

@@ -1910,6 +1972,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {

error.AccessDenied,

error.InvalidUtf8,

error.InvalidWtf8,

error.SymLinkLoop,

error.NameTooLong,

error.SystemResources,

@@ -1973,6 +2036,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {

error.SystemResources,

error.Unexpected,

error.InvalidUtf8,

error.InvalidWtf8,

error.BadPathName,

error.NetworkNotFound,

error.DeviceBusy,

@@ -1994,6 +2058,7 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {

error.AccessDenied,

error.InvalidUtf8,

error.InvalidWtf8,

error.SymLinkLoop,

error.NameTooLong,

error.SystemResources,

@@ -2022,6 +2087,9 @@ pub fn deleteTree(self: Dir, sub_path: []const u8) DeleteTreeError!void {

/// Like `deleteTree`, but only keeps one `Iterator` active at a time to minimize the function's stack size.

/// This is slower than `deleteTree` but uses less stack space.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

pub fn deleteTreeMinStackSize(self: Dir, sub_path: []const u8) DeleteTreeError!void {

return self.deleteTreeMinStackSizeWithKindHint(sub_path, .file);

}

@@ -2074,6 +2142,7 @@ fn deleteTreeMinStackSizeWithKindHint(self: Dir, sub_path: []const u8, kind_hint

error.SystemResources,

error.Unexpected,

error.InvalidUtf8,

error.InvalidWtf8,

error.BadPathName,

error.NetworkNotFound,

error.DeviceBusy,

@@ -2102,6 +2171,7 @@ fn deleteTreeMinStackSizeWithKindHint(self: Dir, sub_path: []const u8, kind_hint

error.AccessDenied,

error.InvalidUtf8,

error.InvalidWtf8,

error.SymLinkLoop,

error.NameTooLong,

error.SystemResources,

@@ -2171,6 +2241,7 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File

error.SystemResources,

error.Unexpected,

error.InvalidUtf8,

error.InvalidWtf8,

error.BadPathName,

error.DeviceBusy,

error.NetworkNotFound,

@@ -2189,6 +2260,7 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File

error.AccessDenied,

error.InvalidUtf8,

error.InvalidWtf8,

error.SymLinkLoop,

error.NameTooLong,

error.SystemResources,

@@ -2209,6 +2281,9 @@ fn deleteTreeOpenInitialSubpath(self: Dir, sub_path: []const u8, kind_hint: File

pub const WriteFileError = File.WriteError || File.OpenError;

/// Deprecated: use `writeFile2`.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) WriteFileError!void {

return writeFile2(self, .{

.sub_path = sub_path,

@@ -2218,6 +2293,9 @@ pub fn writeFile(self: Dir, sub_path: []const u8, data: []const u8) WriteFileErr

}

pub const WriteFileOptions = struct {

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

sub_path: []const u8,

data: []const u8,

flags: File.CreateFlags = .{},

@@ -2232,8 +2310,10 @@ pub fn writeFile2(self: Dir, options: WriteFileOptions) WriteFileError!void {

pub const AccessError = posix.AccessError;

/// Test accessing `path`.

/// `path` is UTF-8-encoded.

/// Test accessing `sub_path`.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

/// Be careful of Time-Of-Check-Time-Of-Use race conditions when using this function.

/// For example, instead of testing if a file exists and then opening it, just

/// open it and handle the error for file not found.

@@ -2268,9 +2348,9 @@ pub fn accessZ(self: Dir, sub_path: [*:0]const u8, flags: File.OpenFlags) Access

}

/// Same as `access` except asserts the target OS is Windows and the path parameter is

/// * WTF-16 encoded

/// * WTF-16 LE encoded

/// * null-terminated

/// * NtDll prefixed

/// * relative or has the NT namespace prefix

/// TODO currently this ignores `flags`.

pub fn accessW(self: Dir, sub_path_w: [*:0]const u16, flags: File.OpenFlags) AccessError!void {

_ = flags;

@@ -2292,6 +2372,9 @@ pub const PrevStatus = enum {

/// atime, and mode of the source file so that the next call to `updateFile` will not need a copy.

/// Returns the previous status of the file before updating.

/// If any of the directories do not exist for dest_path, they are created.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn updateFile(

source_dir: Dir,

source_path: []const u8,

@@ -2343,6 +2426,9 @@ pub const CopyFileError = File.OpenError || File.StatError ||

/// On Linux, until https://patchwork.kernel.org/patch/9636735/ is merged and readily available,

/// there is a possibility of power loss or application termination leaving temporary files present

/// in the same directory as dest_path.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn copyFile(

source_dir: Dir,

source_path: []const u8,

@@ -2430,6 +2516,9 @@ pub const AtomicFileOptions = struct {

/// Always call `AtomicFile.deinit` to clean up, regardless of whether

/// `AtomicFile.finish` succeeded. `dest_path` must remain valid until

/// `AtomicFile.deinit` is called.

/// On Windows, `dest_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `dest_path` should be encoded as valid UTF-8.

/// On other platforms, `dest_path` is an opaque sequence of bytes with no particular encoding.

pub fn atomicFile(self: Dir, dest_path: []const u8, options: AtomicFileOptions) !AtomicFile {

if (fs.path.dirname(dest_path)) |dirname| {

const dir = if (options.make_path)

@@ -2461,6 +2550,9 @@ pub const StatFileError = File.OpenError || File.StatError || posix.FStatAtError

/// Symlinks are followed.

///

/// `sub_path` may be absolute, in which case `self` is ignored.

/// On Windows, `sub_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_path` is an opaque sequence of bytes with no particular encoding.

pub fn statFile(self: Dir, sub_path: []const u8) StatFileError!Stat {

if (builtin.os.tag == .windows) {

var file = try self.openFile(sub_path, .{});

lib/std/fs/File.zig added: 1932, removed: 517, total 1415

@@ -40,8 +40,11 @@ pub const OpenError = error{

AccessDenied,

PipeBusy,

NameTooLong,

/// On Windows, file paths must be valid Unicode.

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

/// On Windows, file paths cannot contain these characters:

/// '/', '*', '?', '"', '<', '>', '|'

BadPathName,

lib/std/fs/path.zig added: 1932, removed: 517, total 1415

@@ -1,3 +1,17 @@

//! POSIX paths are arbitrary sequences of `u8` with no particular encoding.

//!

//! Windows paths are arbitrary sequences of `u16` (WTF-16).

//! For cross-platform APIs that deal with sequences of `u8`, Windows

//! paths are encoded by Zig as [WTF-8](https://simonsapin.github.io/wtf-8/).

//! WTF-8 is a superset of UTF-8 that allows encoding surrogate codepoints,

//! which enables lossless roundtripping when converting to/from WTF-16

//! (as long as the WTF-8 encoded surrogate codepoints do not form a pair).

//!

//! WASI paths are sequences of valid Unicode scalar values,

//! which means that WASI is unable to handle paths that cannot be

//! encoded as well-formed UTF-8/UTF-16.

//! https://github.com/WebAssembly/wasi-filesystem/issues/17#issuecomment-1430639353

const builtin = @import("builtin");

const std = @import("../std.zig");

const debug = std.debug;

@@ -438,7 +452,7 @@ fn networkShareServersEql(ns1: []const u8, ns2: []const u8) bool {

var it1 = mem.tokenizeScalar(u8, ns1, sep1);

var it2 = mem.tokenizeScalar(u8, ns2, sep2);

return windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?);

return windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?);

}

fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8) bool {

@@ -458,7 +472,7 @@ fn compareDiskDesignators(kind: WindowsPath.Kind, p1: []const u8, p2: []const u8

var it1 = mem.tokenizeScalar(u8, p1, sep1);

var it2 = mem.tokenizeScalar(u8, p2, sep2);

return windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?) and windows.eqlIgnoreCaseUtf8(it1.next().?, it2.next().?);

return windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?) and windows.eqlIgnoreCaseWtf8(it1.next().?, it2.next().?);

}

@@ -1099,7 +1113,7 @@ pub fn relativeWindows(allocator: Allocator, from: []const u8, to: []const u8) !

const from_component = from_it.next() orelse return allocator.dupe(u8, to_it.rest());

const to_rest = to_it.rest();

if (to_it.next()) |to_component| {

if (windows.eqlIgnoreCaseUtf8(from_component, to_component))

if (windows.eqlIgnoreCaseWtf8(from_component, to_component))

continue;

}

var up_index_end = "..".len;

@@ -1564,14 +1578,14 @@ pub fn ComponentIterator(comptime path_type: PathType, comptime T: type) type {

};

}

pub const NativeUtf8ComponentIterator = ComponentIterator(switch (native_os) {

pub const NativeComponentIterator = ComponentIterator(switch (native_os) {

.windows => .windows,

.uefi => .uefi,

else => .posix,

}, u8);

pub fn componentIterator(path: []const u8) !NativeUtf8ComponentIterator {

return NativeUtf8ComponentIterator.init(path);

pub fn componentIterator(path: []const u8) !NativeComponentIterator {

return NativeComponentIterator.init(path);

}

test "ComponentIterator posix" {

@@ -1826,7 +1840,7 @@ test "ComponentIterator windows" {

}

test "ComponentIterator windows UTF-16" {

test "ComponentIterator windows WTF-16" {

// TODO: Fix on big endian architectures

if (builtin.cpu.arch.endian() != .little) {

return error.SkipZigTest;

@@ -1925,3 +1939,18 @@ test "ComponentIterator roots" {

try std.testing.expectEqualStrings("//a/b//", it.root().?);

}

/// Format a path encoded as bytes for display as UTF-8.

/// Returns a Formatter for the given path. The path will be converted to valid UTF-8

/// during formatting. This is a lossy conversion if the path contains any ill-formed UTF-8.

/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)

/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of

/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder

pub const fmtAsUtf8Lossy = std.unicode.fmtUtf8;

/// Format a path encoded as WTF-16 LE for display as UTF-8.

/// Return a Formatter for a (potentially ill-formed) UTF-16 LE path.

/// The path will be converted to valid UTF-8 during formatting. This is

/// a lossy conversion if the path contains any unpaired surrogates.

/// Unpaired surrogates are replaced by the replacement character (U+FFFD).

pub const fmtWtf16LeAsUtf8Lossy = std.unicode.fmtUtf16Le;

lib/std/fs/test.zig added: 1932, removed: 517, total 1415

@@ -26,39 +26,39 @@ const PathType = enum {

}

pub const TransformError = std.os.RealPathError || error{OutOfMemory};

pub const TransformFn = fn (allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8;

pub const TransformFn = fn (allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8;

pub fn getTransformFn(comptime path_type: PathType) TransformFn {

switch (path_type) {

.relative => return struct {

fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 {

fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 {

_ = allocator;

_ = dir;

return relative_path;

}

}.transform,

.absolute => return struct {

fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 {

fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 {

// The final path may not actually exist which would cause realpath to fail.

// So instead, we get the path of the dir and join it with the relative path.

var fd_path_buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const dir_path = try os.getFdPath(dir.fd, &fd_path_buf);

return fs.path.join(allocator, &.{ dir_path, relative_path });

return fs.path.joinZ(allocator, &.{ dir_path, relative_path });

}

}.transform,

.unc => return struct {

fn transform(allocator: mem.Allocator, dir: Dir, relative_path: []const u8) TransformError![]const u8 {

fn transform(allocator: mem.Allocator, dir: Dir, relative_path: [:0]const u8) TransformError![:0]const u8 {

// Any drive absolute path (C:\foo) can be converted into a UNC path by

// using '127.0.0.1' as the server name and '<drive letter>$' as the share name.

var fd_path_buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const dir_path = try os.getFdPath(dir.fd, &fd_path_buf);

const windows_path_type = std.os.windows.getUnprefixedPathType(u8, dir_path);

switch (windows_path_type) {

.unc_absolute => return fs.path.join(allocator, &.{ dir_path, relative_path }),

.unc_absolute => return fs.path.joinZ(allocator, &.{ dir_path, relative_path }),

.drive_absolute => {

// `C:\<...>` -> `\\127.0.0.1\C$\<...>`

const prepended = "\\\\127.0.0.1\\";

var path = try fs.path.join(allocator, &.{ prepended, dir_path, relative_path });

var path = try fs.path.joinZ(allocator, &.{ prepended, dir_path, relative_path });

path[prepended.len + 1] = '$';

return path;

@@ -96,7 +96,7 @@ const TestContext = struct {

/// Returns the `relative_path` transformed into the TestContext's `path_type`.

/// The result is allocated by the TestContext's arena and will be free'd during

/// `TestContext.deinit`.

pub fn transformPath(self: *TestContext, relative_path: []const u8) ![]const u8 {

pub fn transformPath(self: *TestContext, relative_path: [:0]const u8) ![:0]const u8 {

return self.transform_fn(self.arena.allocator(), self.dir, relative_path);

}

};

@@ -1001,6 +1001,16 @@ test "openSelfExe" {

self_exe_file.close();

}

test "selfExePath" {

if (builtin.os.tag == .wasi) return error.SkipZigTest;

var buf: [fs.MAX_PATH_BYTES]u8 = undefined;

const buf_self_exe_path = try std.fs.selfExePath(&buf);

const alloc_self_exe_path = try std.fs.selfExePathAlloc(testing.allocator);

defer testing.allocator.free(alloc_self_exe_path);

try testing.expectEqualSlices(u8, buf_self_exe_path, alloc_self_exe_path);

}

test "deleteTree does not follow symlinks" {

var tmp = tmpDir(.{});

defer tmp.cleanup();

@@ -1907,3 +1917,111 @@ test "delete a setAsCwd directory on Windows" {

// Close the parent "tmp" so we don't leak the HANDLE.

tmp.parent_dir.close();

}

test "invalid UTF-8/WTF-8 paths" {

const expected_err = switch (builtin.os.tag) {

.wasi => error.InvalidUtf8,

.windows => error.InvalidWtf8,

else => return error.SkipZigTest,

};

try testWithAllSupportedPathTypes(struct {

fn impl(ctx: *TestContext) !void {

// This is both invalid UTF-8 and WTF-8, since \xFF is an invalid start byte

const invalid_path = try ctx.transformPath("\xFF");

try testing.expectError(expected_err, ctx.dir.openFile(invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.openFileZ(invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.createFile(invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.createFileZ(invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.makeDir(invalid_path));

try testing.expectError(expected_err, ctx.dir.makeDirZ(invalid_path));

try testing.expectError(expected_err, ctx.dir.makePath(invalid_path));

try testing.expectError(expected_err, ctx.dir.makeOpenPath(invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.openDir(invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.openDirZ(invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.deleteFile(invalid_path));

try testing.expectError(expected_err, ctx.dir.deleteFileZ(invalid_path));

try testing.expectError(expected_err, ctx.dir.deleteDir(invalid_path));

try testing.expectError(expected_err, ctx.dir.deleteDirZ(invalid_path));

try testing.expectError(expected_err, ctx.dir.rename(invalid_path, invalid_path));

try testing.expectError(expected_err, ctx.dir.renameZ(invalid_path, invalid_path));

try testing.expectError(expected_err, ctx.dir.symLink(invalid_path, invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.symLinkZ(invalid_path, invalid_path, .{}));

if (builtin.os.tag == .wasi) {

try testing.expectError(expected_err, ctx.dir.symLinkWasi(invalid_path, invalid_path, .{}));

}

try testing.expectError(expected_err, ctx.dir.readLink(invalid_path, &[_]u8{}));

try testing.expectError(expected_err, ctx.dir.readLinkZ(invalid_path, &[_]u8{}));

if (builtin.os.tag == .wasi) {

try testing.expectError(expected_err, ctx.dir.readLinkWasi(invalid_path, &[_]u8{}));

}

try testing.expectError(expected_err, ctx.dir.readFile(invalid_path, &[_]u8{}));

try testing.expectError(expected_err, ctx.dir.readFileAlloc(testing.allocator, invalid_path, 0));

try testing.expectError(expected_err, ctx.dir.deleteTree(invalid_path));

try testing.expectError(expected_err, ctx.dir.deleteTreeMinStackSize(invalid_path));

try testing.expectError(expected_err, ctx.dir.writeFile(invalid_path, ""));

try testing.expectError(expected_err, ctx.dir.writeFile2(.{

.sub_path = invalid_path,

.data = "",

}));

try testing.expectError(expected_err, ctx.dir.access(invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.accessZ(invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.updateFile(invalid_path, ctx.dir, invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.copyFile(invalid_path, ctx.dir, invalid_path, .{}));

try testing.expectError(expected_err, ctx.dir.statFile(invalid_path));

if (builtin.os.tag != .wasi) {

try testing.expectError(expected_err, ctx.dir.realpath(invalid_path, &[_]u8{}));

try testing.expectError(expected_err, ctx.dir.realpathZ(invalid_path, &[_]u8{}));

try testing.expectError(expected_err, ctx.dir.realpathAlloc(testing.allocator, invalid_path));

}

try testing.expectError(expected_err, fs.rename(ctx.dir, invalid_path, ctx.dir, invalid_path));

try testing.expectError(expected_err, fs.renameZ(ctx.dir, invalid_path, ctx.dir, invalid_path));

if (builtin.os.tag != .wasi and ctx.path_type != .relative) {

try testing.expectError(expected_err, fs.updateFileAbsolute(invalid_path, invalid_path, .{}));

try testing.expectError(expected_err, fs.copyFileAbsolute(invalid_path, invalid_path, .{}));

try testing.expectError(expected_err, fs.makeDirAbsolute(invalid_path));

try testing.expectError(expected_err, fs.makeDirAbsoluteZ(invalid_path));

try testing.expectError(expected_err, fs.deleteDirAbsolute(invalid_path));

try testing.expectError(expected_err, fs.deleteDirAbsoluteZ(invalid_path));

try testing.expectError(expected_err, fs.renameAbsolute(invalid_path, invalid_path));

try testing.expectError(expected_err, fs.renameAbsoluteZ(invalid_path, invalid_path));

try testing.expectError(expected_err, fs.openDirAbsolute(invalid_path, .{}));

try testing.expectError(expected_err, fs.openDirAbsoluteZ(invalid_path, .{}));

try testing.expectError(expected_err, fs.openFileAbsolute(invalid_path, .{}));

try testing.expectError(expected_err, fs.openFileAbsoluteZ(invalid_path, .{}));

try testing.expectError(expected_err, fs.accessAbsolute(invalid_path, .{}));

try testing.expectError(expected_err, fs.accessAbsoluteZ(invalid_path, .{}));

try testing.expectError(expected_err, fs.createFileAbsolute(invalid_path, .{}));

try testing.expectError(expected_err, fs.createFileAbsoluteZ(invalid_path, .{}));

try testing.expectError(expected_err, fs.deleteFileAbsolute(invalid_path));

try testing.expectError(expected_err, fs.deleteFileAbsoluteZ(invalid_path));

try testing.expectError(expected_err, fs.deleteTreeAbsolute(invalid_path));

var readlink_buf: [fs.MAX_PATH_BYTES]u8 = undefined;

try testing.expectError(expected_err, fs.readLinkAbsolute(invalid_path, &readlink_buf));

try testing.expectError(expected_err, fs.readLinkAbsoluteZ(invalid_path, &readlink_buf));

try testing.expectError(expected_err, fs.symLinkAbsolute(invalid_path, invalid_path, .{}));

try testing.expectError(expected_err, fs.symLinkAbsoluteZ(invalid_path, invalid_path, .{}));

try testing.expectError(expected_err, fs.realpathAlloc(testing.allocator, invalid_path));

}

}.impl);

}

lib/std/os.zig added: 1932, removed: 517, total 1415

@@ -3,7 +3,7 @@

//! * Convert "errno"-style error codes into Zig errors.

//! * When null-terminated byte buffers are required, provide APIs which accept

//! slices as well as APIs which accept null-terminated byte buffers. Same goes

//! for UTF-16LE encoding.

//! for WTF-16LE encoding.

//! * Where operating systems share APIs, e.g. POSIX, these thin wrappers provide

//! cross platform abstracting.

//! * When there exists a corresponding libc function and linking libc, the libc

@@ -498,6 +498,7 @@ fn fchmodat2(dirfd: fd_t, path: []const u8, mode: mode_t, flags: u32) FChmodAtEr

const stat = fstatatZ(pathfd, "", AT.EMPTY_PATH) catch |err| switch (err) {

error.NameTooLong => unreachable,

error.FileNotFound => unreachable,

error.InvalidUtf8 => unreachable,

else => |e| return e,

};

if ((stat.mode & S.IFMT) == S.IFLNK)

@@ -1614,9 +1615,16 @@ pub const OpenError = error{

/// The underlying filesystem does not support file locks

FileLocksNotSupported,

/// Path contains characters that are disallowed by the underlying filesystem.

BadPathName,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

/// On Windows, `\\server` or `\\server\share` was not found.

NetworkNotFound,

@@ -1634,6 +1642,9 @@ pub const OpenError = error{

} || UnexpectedError;

/// Open and possibly create a file. Keeps trying if it gets interrupted.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

/// See also `openZ`.

pub fn open(file_path: []const u8, flags: O, perm: mode_t) OpenError!fd_t {

if (builtin.os.tag == .windows) {

@@ -1646,6 +1657,9 @@ pub fn open(file_path: []const u8, flags: O, perm: mode_t) OpenError!fd_t {

}

/// Open and possibly create a file. Keeps trying if it gets interrupted.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

/// See also `open`.

pub fn openZ(file_path: [*:0]const u8, flags: O, perm: mode_t) OpenError!fd_t {

if (builtin.os.tag == .windows) {

@@ -1687,6 +1701,9 @@ pub fn openZ(file_path: [*:0]const u8, flags: O, perm: mode_t) OpenError!fd_t {

/// Open and possibly create a file. Keeps trying if it gets interrupted.

/// `file_path` is relative to the open directory handle `dir_fd`.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

/// See also `openatZ`.

pub fn openat(dir_fd: fd_t, file_path: []const u8, flags: O, mode: mode_t) OpenError!fd_t {

if (builtin.os.tag == .windows) {

@@ -1829,6 +1846,7 @@ pub fn openatWasi(

.EXIST => return error.PathAlreadyExists,

.BUSY => return error.DeviceBusy,

.NOTCAPABLE => return error.AccessDenied,

.ILSEQ => return error.InvalidUtf8,

else => |err| return unexpectedErrno(err),

}

@@ -1836,6 +1854,9 @@ pub fn openatWasi(

/// Open and possibly create a file. Keeps trying if it gets interrupted.

/// `file_path` is relative to the open directory handle `dir_fd`.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

/// See also `openat`.

pub fn openatZ(dir_fd: fd_t, file_path: [*:0]const u8, flags: O, mode: mode_t) OpenError!fd_t {

if (builtin.os.tag == .windows) {

@@ -2156,13 +2177,23 @@ pub const SymLinkError = error{

ReadOnlyFileSystem,

NotDir,

NameTooLong,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

BadPathName,

} || UnexpectedError;

/// Creates a symbolic link named `sym_link_path` which contains the string `target_path`.

/// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent

/// one; the latter case is known as a dangling link.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

/// If `sym_link_path` exists, it will not be overwritten.

/// See also `symlinkZ.

pub fn symlink(target_path: []const u8, sym_link_path: []const u8) SymLinkError!void {

@@ -2200,6 +2231,10 @@ pub fn symlinkZ(target_path: [*:0]const u8, sym_link_path: [*:0]const u8) SymLin

.NOMEM => return error.SystemResources,

.NOSPC => return error.NoSpaceLeft,

.ROFS => return error.ReadOnlyFileSystem,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

@@ -2208,6 +2243,9 @@ pub fn symlinkZ(target_path: [*:0]const u8, sym_link_path: [*:0]const u8) SymLin

/// `target_path` **relative** to `newdirfd` directory handle.

/// A symbolic link (also known as a soft link) may point to an existing file or to a nonexistent

/// one; the latter case is known as a dangling link.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

/// If `sym_link_path` exists, it will not be overwritten.

/// See also `symlinkatWasi`, `symlinkatZ` and `symlinkatW`.

pub fn symlinkat(target_path: []const u8, newdirfd: fd_t, sym_link_path: []const u8) SymLinkError!void {

@@ -2242,6 +2280,7 @@ pub fn symlinkatWasi(target_path: []const u8, newdirfd: fd_t, sym_link_path: []c

.NOSPC => return error.NoSpaceLeft,

.ROFS => return error.ReadOnlyFileSystem,

.NOTCAPABLE => return error.AccessDenied,

.ILSEQ => return error.InvalidUtf8,

else => |err| return unexpectedErrno(err),

}

@@ -2270,6 +2309,10 @@ pub fn symlinkatZ(target_path: [*:0]const u8, newdirfd: fd_t, sym_link_path: [*:

.NOMEM => return error.SystemResources,

.NOSPC => return error.NoSpaceLeft,

.ROFS => return error.ReadOnlyFileSystem,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

@@ -2287,8 +2330,13 @@ pub const LinkError = UnexpectedError || error{

NoSpaceLeft,

ReadOnlyFileSystem,

NotSameFileSystem,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

};

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn linkZ(oldpath: [*:0]const u8, newpath: [*:0]const u8, flags: i32) LinkError!void {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

return link(mem.sliceTo(oldpath, 0), mem.sliceTo(newpath, 0), flags);

@@ -2310,10 +2358,16 @@ pub fn linkZ(oldpath: [*:0]const u8, newpath: [*:0]const u8, flags: i32) LinkErr

.ROFS => return error.ReadOnlyFileSystem,

.XDEV => return error.NotSameFileSystem,

.INVAL => unreachable,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn link(oldpath: []const u8, newpath: []const u8, flags: i32) LinkError!void {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

return linkat(wasi.AT.FDCWD, oldpath, wasi.AT.FDCWD, newpath, flags) catch |err| switch (err) {

@@ -2328,6 +2382,8 @@ pub fn link(oldpath: []const u8, newpath: []const u8, flags: i32) LinkError!void

pub const LinkatError = LinkError || error{NotDir};

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn linkatZ(

olddir: fd_t,

oldpath: [*:0]const u8,

@@ -2356,10 +2412,16 @@ pub fn linkatZ(

.ROFS => return error.ReadOnlyFileSystem,

.XDEV => return error.NotSameFileSystem,

.INVAL => unreachable,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn linkat(

olddir: fd_t,

oldpath: []const u8,

@@ -2399,6 +2461,7 @@ pub fn linkat(

.ROFS => return error.ReadOnlyFileSystem,

.XDEV => return error.NotSameFileSystem,

.INVAL => unreachable,

.ILSEQ => return error.InvalidUtf8,

else => |err| return unexpectedErrno(err),

}

@@ -2422,9 +2485,13 @@ pub const UnlinkError = error{

SystemResources,

ReadOnlyFileSystem,

/// On Windows, file paths must be valid Unicode.

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

/// On Windows, file paths cannot contain these characters:

/// '/', '*', '?', '"', '<', '>', '|'

BadPathName,

@@ -2434,6 +2501,9 @@ pub const UnlinkError = error{

} || UnexpectedError;

/// Delete a name and possibly the file it refers to.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

/// See also `unlinkZ`.

pub fn unlink(file_path: []const u8) UnlinkError!void {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

@@ -2450,7 +2520,7 @@ pub fn unlink(file_path: []const u8) UnlinkError!void {

}

/// Same as `unlink` except the parameter is a null terminated UTF8-encoded string.

/// Same as `unlink` except the parameter is null terminated.

pub fn unlinkZ(file_path: [*:0]const u8) UnlinkError!void {

if (builtin.os.tag == .windows) {

const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);

@@ -2473,11 +2543,15 @@ pub fn unlinkZ(file_path: [*:0]const u8) UnlinkError!void {

.NOTDIR => return error.NotDir,

.NOMEM => return error.SystemResources,

.ROFS => return error.ReadOnlyFileSystem,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// Windows-only. Same as `unlink` except the parameter is null-terminated, WTF16 encoded.

/// Windows-only. Same as `unlink` except the parameter is null-terminated, WTF16 LE encoded.

pub fn unlinkW(file_path_w: []const u16) UnlinkError!void {

windows.DeleteFile(file_path_w, .{ .dir = std.fs.cwd().fd }) catch |err| switch (err) {

error.DirNotEmpty => unreachable, // we're not passing .remove_dir = true

@@ -2491,6 +2565,9 @@ pub const UnlinkatError = UnlinkError || error{

};

/// Delete a file name and possibly the file it refers to, based on an open directory handle.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

/// Asserts that the path parameter has no null bytes.

pub fn unlinkat(dirfd: fd_t, file_path: []const u8, flags: u32) UnlinkatError!void {

if (builtin.os.tag == .windows) {

@@ -2528,6 +2605,7 @@ pub fn unlinkatWasi(dirfd: fd_t, file_path: []const u8, flags: u32) UnlinkatErro

.ROFS => return error.ReadOnlyFileSystem,

.NOTEMPTY => return error.DirNotEmpty,

.NOTCAPABLE => return error.AccessDenied,

.ILSEQ => return error.InvalidUtf8,

.INVAL => unreachable, // invalid flags, or pathname has . as last component

.BADF => unreachable, // always a race condition

@@ -2560,6 +2638,10 @@ pub fn unlinkatZ(dirfd: fd_t, file_path_c: [*:0]const u8, flags: u32) UnlinkatEr

.ROFS => return error.ReadOnlyFileSystem,

.EXIST => return error.DirNotEmpty,

.NOTEMPTY => return error.DirNotEmpty,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

.INVAL => unreachable, // invalid flags, or pathname has . as last component

.BADF => unreachable, // always a race condition

@@ -2568,7 +2650,7 @@ pub fn unlinkatZ(dirfd: fd_t, file_path_c: [*:0]const u8, flags: u32) UnlinkatEr

}

/// Same as `unlinkat` but `sub_path_w` is UTF16LE, NT prefixed. Windows only.

/// Same as `unlinkat` but `sub_path_w` is WTF16LE, NT prefixed. Windows only.

pub fn unlinkatW(dirfd: fd_t, sub_path_w: []const u16, flags: u32) UnlinkatError!void {

const remove_dir = (flags & AT.REMOVEDIR) != 0;

return windows.DeleteFile(sub_path_w, .{ .dir = dirfd, .remove_dir = remove_dir });

@@ -2594,7 +2676,11 @@ pub const RenameError = error{

PathAlreadyExists,

ReadOnlyFileSystem,

RenameAcrossMountPoints,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

BadPathName,

NoDevice,

SharingViolation,

@@ -2610,6 +2696,9 @@ pub const RenameError = error{

} || UnexpectedError;

/// Change the name or location of a file.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn rename(old_path: []const u8, new_path: []const u8) RenameError!void {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

return renameat(wasi.AT.FDCWD, old_path, wasi.AT.FDCWD, new_path);

@@ -2624,7 +2713,7 @@ pub fn rename(old_path: []const u8, new_path: []const u8) RenameError!void {

}

/// Same as `rename` except the parameters are null-terminated byte arrays.

/// Same as `rename` except the parameters are null-terminated.

pub fn renameZ(old_path: [*:0]const u8, new_path: [*:0]const u8) RenameError!void {

if (builtin.os.tag == .windows) {

const old_path_w = try windows.cStrToPrefixedFileW(null, old_path);

@@ -2653,11 +2742,15 @@ pub fn renameZ(old_path: [*:0]const u8, new_path: [*:0]const u8) RenameError!voi

.NOTEMPTY => return error.PathAlreadyExists,

.ROFS => return error.ReadOnlyFileSystem,

.XDEV => return error.RenameAcrossMountPoints,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// Same as `rename` except the parameters are null-terminated UTF16LE encoded byte arrays.

/// Same as `rename` except the parameters are null-terminated and WTF16LE encoded.

/// Assumes target is Windows.

pub fn renameW(old_path: [*:0]const u16, new_path: [*:0]const u16) RenameError!void {

const flags = windows.MOVEFILE_REPLACE_EXISTING | windows.MOVEFILE_WRITE_THROUGH;

@@ -2665,6 +2758,9 @@ pub fn renameW(old_path: [*:0]const u16, new_path: [*:0]const u16) RenameError!v

}

/// Change the name or location of a file based on an open directory handle.

/// On Windows, both paths should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, both paths should be encoded as valid UTF-8.

/// On other platforms, both paths are an opaque sequence of bytes with no particular encoding.

pub fn renameat(

old_dir_fd: fd_t,

old_path: []const u8,

@@ -2710,11 +2806,12 @@ pub fn renameatWasi(old: RelativePathWasi, new: RelativePathWasi) RenameError!vo

.ROFS => return error.ReadOnlyFileSystem,

.XDEV => return error.RenameAcrossMountPoints,

.NOTCAPABLE => return error.AccessDenied,

.ILSEQ => return error.InvalidUtf8,

else => |err| return unexpectedErrno(err),

}

/// Same as `renameat` except the parameters are null-terminated byte arrays.

/// Same as `renameat` except the parameters are null-terminated.

pub fn renameatZ(

old_dir_fd: fd_t,

old_path: [*:0]const u8,

@@ -2749,6 +2846,10 @@ pub fn renameatZ(

.NOTEMPTY => return error.PathAlreadyExists,

.ROFS => return error.ReadOnlyFileSystem,

.XDEV => return error.RenameAcrossMountPoints,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

@@ -2860,6 +2961,9 @@ pub fn renameatW(

}

/// On Windows, `sub_dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `sub_dir_path` should be encoded as valid UTF-8.

/// On other platforms, `sub_dir_path` is an opaque sequence of bytes with no particular encoding.

pub fn mkdirat(dir_fd: fd_t, sub_dir_path: []const u8, mode: u32) MakeDirError!void {

if (builtin.os.tag == .windows) {

const sub_dir_path_w = try windows.sliceToPrefixedFileW(dir_fd, sub_dir_path);

@@ -2891,14 +2995,16 @@ pub fn mkdiratWasi(dir_fd: fd_t, sub_dir_path: []const u8, mode: u32) MakeDirErr

.NOTDIR => return error.NotDir,

.ROFS => return error.ReadOnlyFileSystem,

.NOTCAPABLE => return error.AccessDenied,

.ILSEQ => return error.InvalidUtf8,

else => |err| return unexpectedErrno(err),

}

/// Same as `mkdirat` except the parameters are null-terminated.

pub fn mkdiratZ(dir_fd: fd_t, sub_dir_path: [*:0]const u8, mode: u32) MakeDirError!void {

if (builtin.os.tag == .windows) {

const sub_dir_path_w = try windows.cStrToPrefixedFileW(dir_fd, sub_dir_path);

return mkdiratW(dir_fd, sub_dir_path_w.span().ptr, mode);

return mkdiratW(dir_fd, sub_dir_path_w.span(), mode);

} else if (builtin.os.tag == .wasi and !builtin.link_libc) {

return mkdirat(dir_fd, mem.sliceTo(sub_dir_path, 0), mode);

}

@@ -2920,10 +3026,15 @@ pub fn mkdiratZ(dir_fd: fd_t, sub_dir_path: [*:0]const u8, mode: u32) MakeDirErr

.ROFS => return error.ReadOnlyFileSystem,

// dragonfly: when dir_fd is unlinked from filesystem

.NOTCONN => return error.FileNotFound,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// Windows-only. Same as `mkdirat` except the parameter WTF16 LE encoded.

pub fn mkdiratW(dir_fd: fd_t, sub_path_w: []const u16, mode: u32) MakeDirError!void {

_ = mode;

const sub_dir_handle = windows.OpenFile(sub_path_w, .{

@@ -2955,7 +3066,11 @@ pub const MakeDirError = error{

NoSpaceLeft,

NotDir,

ReadOnlyFileSystem,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

BadPathName,

NoDevice,

/// On Windows, `\\server` or `\\server\share` was not found.

@@ -2964,6 +3079,9 @@ pub const MakeDirError = error{

/// Create a directory.

/// `mode` is ignored on Windows and WASI.

/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `dir_path` should be encoded as valid UTF-8.

/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.

pub fn mkdir(dir_path: []const u8, mode: u32) MakeDirError!void {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

return mkdirat(wasi.AT.FDCWD, dir_path, mode);

@@ -2976,7 +3094,10 @@ pub fn mkdir(dir_path: []const u8, mode: u32) MakeDirError!void {

}

/// Same as `mkdir` but the parameter is a null-terminated UTF8-encoded string.

/// Same as `mkdir` but the parameter is null-terminated.

/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `dir_path` should be encoded as valid UTF-8.

/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.

pub fn mkdirZ(dir_path: [*:0]const u8, mode: u32) MakeDirError!void {

if (builtin.os.tag == .windows) {

const dir_path_w = try windows.cStrToPrefixedFileW(null, dir_path);

@@ -2999,11 +3120,15 @@ pub fn mkdirZ(dir_path: [*:0]const u8, mode: u32) MakeDirError!void {

.NOSPC => return error.NoSpaceLeft,

.NOTDIR => return error.NotDir,

.ROFS => return error.ReadOnlyFileSystem,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// Windows-only. Same as `mkdir` but the parameters is WTF16 encoded.

/// Windows-only. Same as `mkdir` but the parameters is WTF16LE encoded.

pub fn mkdirW(dir_path_w: []const u16, mode: u32) MakeDirError!void {

_ = mode;

const sub_dir_handle = windows.OpenFile(dir_path_w, .{

@@ -3031,13 +3156,20 @@ pub const DeleteDirError = error{

NotDir,

DirNotEmpty,

ReadOnlyFileSystem,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

BadPathName,

/// On Windows, `\\server` or `\\server\share` was not found.

NetworkNotFound,

} || UnexpectedError;

/// Deletes an empty directory.

/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `dir_path` should be encoded as valid UTF-8.

/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.

pub fn rmdir(dir_path: []const u8) DeleteDirError!void {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

return unlinkat(wasi.AT.FDCWD, dir_path, AT.REMOVEDIR) catch |err| switch (err) {

@@ -3055,6 +3187,9 @@ pub fn rmdir(dir_path: []const u8) DeleteDirError!void {

}

/// Same as `rmdir` except the parameter is null-terminated.

/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `dir_path` should be encoded as valid UTF-8.

/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.

pub fn rmdirZ(dir_path: [*:0]const u8) DeleteDirError!void {

if (builtin.os.tag == .windows) {

const dir_path_w = try windows.cStrToPrefixedFileW(null, dir_path);

@@ -3077,11 +3212,15 @@ pub fn rmdirZ(dir_path: [*:0]const u8) DeleteDirError!void {

.EXIST => return error.DirNotEmpty,

.NOTEMPTY => return error.DirNotEmpty,

.ROFS => return error.ReadOnlyFileSystem,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// Windows-only. Same as `rmdir` except the parameter is WTF16 encoded.

/// Windows-only. Same as `rmdir` except the parameter is WTF-16 LE encoded.

pub fn rmdirW(dir_path_w: []const u16) DeleteDirError!void {

return windows.DeleteFile(dir_path_w, .{ .dir = std.fs.cwd().fd, .remove_dir = true }) catch |err| switch (err) {

error.IsDir => unreachable,

@@ -3098,21 +3237,25 @@ pub const ChangeCurDirError = error{

SystemResources,

NotDir,

BadPathName,

/// On Windows, file paths must be valid Unicode.

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

} || UnexpectedError;

/// Changes the current working directory of the calling process.

/// `dir_path` is recommended to be a UTF-8 encoded string.

/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `dir_path` should be encoded as valid UTF-8.

/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.

pub fn chdir(dir_path: []const u8) ChangeCurDirError!void {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

@compileError("WASI does not support os.chdir");

} else if (builtin.os.tag == .windows) {

var utf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;

const len = try std.unicode.utf8ToUtf16Le(utf16_dir_path[0..], dir_path);

if (len > utf16_dir_path.len) return error.NameTooLong;

return chdirW(utf16_dir_path[0..len]);

var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;

const len = try std.unicode.wtf8ToWtf16Le(wtf16_dir_path[0..], dir_path);

if (len > wtf16_dir_path.len) return error.NameTooLong;

return chdirW(wtf16_dir_path[0..len]);

} else {

const dir_path_c = try toPosixPath(dir_path);

return chdirZ(&dir_path_c);

@@ -3120,12 +3263,15 @@ pub fn chdir(dir_path: []const u8) ChangeCurDirError!void {

}

/// Same as `chdir` except the parameter is null-terminated.

/// On Windows, `dir_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `dir_path` should be encoded as valid UTF-8.

/// On other platforms, `dir_path` is an opaque sequence of bytes with no particular encoding.

pub fn chdirZ(dir_path: [*:0]const u8) ChangeCurDirError!void {

if (builtin.os.tag == .windows) {

var utf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;

const len = try std.unicode.utf8ToUtf16Le(utf16_dir_path[0..], mem.span(dir_path));

if (len > utf16_dir_path.len) return error.NameTooLong;

return chdirW(utf16_dir_path[0..len]);

var wtf16_dir_path: [windows.PATH_MAX_WIDE]u16 = undefined;

const len = try std.unicode.wtf8ToWtf16Le(wtf16_dir_path[0..], mem.span(dir_path));

if (len > wtf16_dir_path.len) return error.NameTooLong;

return chdirW(wtf16_dir_path[0..len]);

} else if (builtin.os.tag == .wasi and !builtin.link_libc) {

return chdir(mem.span(dir_path));

}

@@ -3139,11 +3285,15 @@ pub fn chdirZ(dir_path: [*:0]const u8) ChangeCurDirError!void {

.NOENT => return error.FileNotFound,

.NOMEM => return error.SystemResources,

.NOTDIR => return error.NotDir,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// Windows-only. Same as `chdir` except the parameter is WTF16 encoded.

/// Windows-only. Same as `chdir` except the parameter is WTF16 LE encoded.

pub fn chdirW(dir_path: []const u16) ChangeCurDirError!void {

windows.SetCurrentDirectory(dir_path) catch |err| switch (err) {

error.NoDevice => return error.FileSystem,

@@ -3183,7 +3333,11 @@ pub const ReadLinkError = error{

SystemResources,

NotLink,

NotDir,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

BadPathName,

/// Windows-only. This error may occur if the opened reparse point is

/// of unsupported type.

@@ -3193,7 +3347,13 @@ pub const ReadLinkError = error{

} || UnexpectedError;

/// Read value of a symbolic link.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

/// The return value is a slice of `out_buffer` from index 0.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, the result is encoded as UTF-8.

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

return readlinkat(wasi.AT.FDCWD, file_path, out_buffer);

@@ -3206,7 +3366,8 @@ pub fn readlink(file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {

}

/// Windows-only. Same as `readlink` except `file_path` is WTF16 encoded.

/// Windows-only. Same as `readlink` except `file_path` is WTF16 LE encoded.

/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// See also `readlinkZ`.

pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {

return windows.ReadLink(std.fs.cwd().fd, file_path, out_buffer);

@@ -3215,7 +3376,7 @@ pub fn readlinkW(file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {

/// Same as `readlink` except `file_path` is null-terminated.

pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8 {

if (builtin.os.tag == .windows) {

const file_path_w = try windows.cStrToWin32PrefixedFileW(file_path);

const file_path_w = try windows.cStrToPrefixedFileW(null, file_path);

return readlinkW(file_path_w.span(), out_buffer);

} else if (builtin.os.tag == .wasi and !builtin.link_libc) {

return readlink(mem.sliceTo(file_path, 0), out_buffer);

@@ -3232,12 +3393,22 @@ pub fn readlinkZ(file_path: [*:0]const u8, out_buffer: []u8) ReadLinkError![]u8

.NOENT => return error.FileNotFound,

.NOMEM => return error.SystemResources,

.NOTDIR => return error.NotDir,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// Similar to `readlink` except reads value of a symbolink link **relative** to `dirfd` directory handle.

/// On Windows, `file_path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `file_path` should be encoded as valid UTF-8.

/// On other platforms, `file_path` is an opaque sequence of bytes with no particular encoding.

/// The return value is a slice of `out_buffer` from index 0.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, the result is encoded as UTF-8.

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

/// See also `readlinkatWasi`, `realinkatZ` and `realinkatW`.

pub fn readlinkat(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) ReadLinkError![]u8 {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

@@ -3267,11 +3438,13 @@ pub fn readlinkatWasi(dirfd: fd_t, file_path: []const u8, out_buffer: []u8) Read

.NOMEM => return error.SystemResources,

.NOTDIR => return error.NotDir,

.NOTCAPABLE => return error.AccessDenied,

.ILSEQ => return error.InvalidUtf8,

else => |err| return unexpectedErrno(err),

}

/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 encoded.

/// Windows-only. Same as `readlinkat` except `file_path` is null-terminated, WTF16 LE encoded.

/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// See also `readlinkat`.

pub fn readlinkatW(dirfd: fd_t, file_path: []const u16, out_buffer: []u8) ReadLinkError![]u8 {

return windows.ReadLink(dirfd, file_path, out_buffer);

@@ -3298,6 +3471,10 @@ pub fn readlinkatZ(dirfd: fd_t, file_path: [*:0]const u8, out_buffer: []u8) Read

.NOENT => return error.FileNotFound,

.NOMEM => return error.SystemResources,

.NOTDIR => return error.NotDir,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

@@ -4274,10 +4451,18 @@ pub fn fstat_wasi(fd: fd_t) FStatError!wasi.filestat_t {

}

pub const FStatAtError = FStatError || error{ NameTooLong, FileNotFound, SymLinkLoop };

pub const FStatAtError = FStatError || error{

NameTooLong,

FileNotFound,

SymLinkLoop,

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

};

/// Similar to `fstat`, but returns stat of a resource pointed to by `pathname`

/// which is relative to `dirfd` handle.

/// On WASI, `pathname` should be encoded as valid UTF-8.

/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.

/// See also `fstatatZ` and `fstatat_wasi`.

pub fn fstatat(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!Stat {

if (builtin.os.tag == .wasi and !builtin.link_libc) {

@@ -4294,6 +4479,7 @@ pub fn fstatat(dirfd: fd_t, pathname: []const u8, flags: u32) FStatAtError!Stat

}

/// WASI-only. Same as `fstatat` but targeting WASI.

/// `pathname` should be encoded as valid UTF-8.

/// See also `fstatat`.

pub fn fstatat_wasi(dirfd: fd_t, pathname: []const u8, flags: wasi.lookupflags_t) FStatAtError!wasi.filestat_t {

var stat: wasi.filestat_t = undefined;

@@ -4308,6 +4494,7 @@ pub fn fstatat_wasi(dirfd: fd_t, pathname: []const u8, flags: wasi.lookupflags_t

.NOENT => return error.FileNotFound,

.NOTDIR => return error.FileNotFound,

.NOTCAPABLE => return error.AccessDenied,

.ILSEQ => return error.InvalidUtf8,

else => |err| return unexpectedErrno(err),

}

@@ -4337,6 +4524,10 @@ pub fn fstatatZ(dirfd: fd_t, pathname: [*:0]const u8, flags: u32) FStatAtError!S

.LOOP => return error.SymLinkLoop,

.NOENT => return error.FileNotFound,

.NOTDIR => return error.FileNotFound,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

@@ -4693,12 +4884,17 @@ pub const AccessError = error{

FileBusy,

SymLinkLoop,

ReadOnlyFileSystem,

/// On Windows, file paths must be valid Unicode.

/// WASI-only; file paths must be valid UTF-8.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

} || UnexpectedError;

/// check user's permissions for a file

/// On Windows, `path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `path` should be encoded as valid UTF-8.

/// On other platforms, `path` is an opaque sequence of bytes with no particular encoding.

/// TODO currently this assumes `mode` is `F.OK` on Windows.

pub fn access(path: []const u8, mode: u32) AccessError!void {

if (builtin.os.tag == .windows) {

@@ -4740,12 +4936,16 @@ pub fn accessZ(path: [*:0]const u8, mode: u32) AccessError!void {

.FAULT => unreachable,

.IO => return error.InputOutput,

.NOMEM => return error.SystemResources,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

/// Call from Windows-specific code if you already have a UTF-16LE encoded, null terminated string.

/// Otherwise use `access` or `accessC`.

/// Call from Windows-specific code if you already have a WTF-16LE encoded, null terminated string.

/// Otherwise use `access` or `accessZ`.

/// TODO currently this ignores `mode`.

pub fn accessW(path: [*:0]const u16, mode: u32) windows.GetFileAttributesError!void {

_ = mode;

@@ -4762,6 +4962,9 @@ pub fn accessW(path: [*:0]const u16, mode: u32) windows.GetFileAttributesError!v

}

/// Check user's permissions for a file, based on an open directory handle.

/// On Windows, `path` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On WASI, `path` should be encoded as valid UTF-8.

/// On other platforms, `path` is an opaque sequence of bytes with no particular encoding.

/// TODO currently this ignores `mode` and `flags` on Windows.

pub fn faccessat(dirfd: fd_t, path: []const u8, mode: u32, flags: u32) AccessError!void {

if (builtin.os.tag == .windows) {

@@ -4832,6 +5035,10 @@ pub fn faccessatZ(dirfd: fd_t, path: [*:0]const u8, mode: u32, flags: u32) Acces

.FAULT => unreachable,

.IO => return error.InputOutput,

.NOMEM => return error.SystemResources,

.ILSEQ => |err| if (builtin.os.tag == .wasi)

return error.InvalidUtf8

else

return unexpectedErrno(err),

else => |err| return unexpectedErrno(err),

}

@@ -5339,8 +5546,9 @@ pub const RealPathError = error{

/// On WASI, the current CWD may not be associated with an absolute path.

InvalidHandle,

/// On Windows, file paths must be valid Unicode.

InvalidUtf8,

/// Windows-only; file paths provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

/// On Windows, `\\server` or `\\server\share` was not found.

NetworkNotFound,

@@ -5362,8 +5570,12 @@ pub const RealPathError = error{

/// Return the canonicalized absolute pathname.

/// Expands all symbolic links and resolves references to `.`, `..`, and

/// extra `/` characters in `pathname`.

/// On Windows, `pathname` should be encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, `pathname` is an opaque sequence of bytes with no particular encoding.

/// The return value is a slice of `out_buffer`, but not necessarily from the beginning.

/// See also `realpathZ` and `realpathW`.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

/// Calling this function is usually a bug.

pub fn realpath(pathname: []const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {

if (builtin.os.tag == .windows) {

@@ -5402,6 +5614,7 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP

error.WouldBlock => unreachable,

error.FileBusy => unreachable, // not asking for write permissions

error.InvalidHandle => unreachable, // WASI-only

error.InvalidUtf8 => unreachable, // WASI-only

else => |e| return e,

};

defer close(fd);

@@ -5425,7 +5638,8 @@ pub fn realpathZ(pathname: [*:0]const u8, out_buffer: *[MAX_PATH_BYTES]u8) RealP

return mem.sliceTo(result_path, 0);

}

/// Same as `realpath` except `pathname` is UTF16LE-encoded.

/// Same as `realpath` except `pathname` is WTF16LE-encoded.

/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// Calling this function is usually a bug.

pub fn realpathW(pathname: []const u16, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {

const w = windows;

@@ -5475,6 +5689,8 @@ pub fn isGetFdPathSupportedOnTarget(os: std.Target.Os) bool {

/// This function is very host-specific and is not universally supported by all hosts.

/// For example, while it generally works on Linux, macOS, FreeBSD or Windows, it is

/// unsupported on WASI.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

/// Calling this function is usually a bug.

pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {

if (!comptime isGetFdPathSupportedOnTarget(builtin.os)) {

@@ -5485,10 +5701,7 @@ pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {

var wide_buf: [windows.PATH_MAX_WIDE]u16 = undefined;

const wide_slice = try windows.GetFinalPathNameByHandle(fd, .{}, wide_buf[0..]);

// TODO: Windows file paths can be arbitrary arrays of u16 values

// and must not fail with InvalidUtf8.

const end_index = std.unicode.utf16leToUtf8(out_buffer, wide_slice) catch

return error.InvalidUtf8;

const end_index = std.unicode.wtf16LeToWtf8(out_buffer, wide_slice);

return out_buffer[0..end_index];

.macos, .ios, .watchos, .tvos => {

@@ -5512,8 +5725,12 @@ pub fn getFdPath(fd: fd_t, out_buffer: *[MAX_PATH_BYTES]u8) RealPathError![]u8 {

const target = readlinkZ(proc_path, out_buffer) catch |err| {

switch (err) {

error.UnsupportedReparsePointType => unreachable, // Windows only,

error.NotLink => unreachable,

error.BadPathName => unreachable,

error.InvalidUtf8 => unreachable, // WASI-only

error.InvalidWtf8 => unreachable, // Windows-only

error.UnsupportedReparsePointType => unreachable, // Windows-only

error.NetworkNotFound => unreachable, // Windows-only

else => |e| return e,

}

};

lib/std/os/windows.zig added: 1932, removed: 517, total 1415

@@ -1,8 +1,8 @@

//! This file contains thin wrappers around Windows-specific APIs, with these

//! specific goals in mind:

//! * Convert "errno"-style error codes into Zig errors.

//! * When null-terminated or UTF16LE byte buffers are required, provide APIs which accept

//! slices as well as APIs which accept null-terminated UTF16LE byte buffers.

//! * When null-terminated or WTF16LE byte buffers are required, provide APIs which accept

//! slices as well as APIs which accept null-terminated WTF16LE byte buffers.

const builtin = @import("builtin");

const std = @import("../std.zig");

@@ -548,7 +548,6 @@ pub fn WriteFile(

pub const SetCurrentDirectoryError = error{

NameTooLong,

InvalidUtf8,

FileNotFound,

NotDir,

AccessDenied,

@@ -587,24 +586,24 @@ pub const GetCurrentDirectoryError = error{

};

/// The result is a slice of `buffer`, indexed from 0.

/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

pub fn GetCurrentDirectory(buffer: []u8) GetCurrentDirectoryError![]u8 {

var utf16le_buf: [PATH_MAX_WIDE]u16 = undefined;

const result = kernel32.GetCurrentDirectoryW(utf16le_buf.len, &utf16le_buf);

var wtf16le_buf: [PATH_MAX_WIDE]u16 = undefined;

const result = kernel32.GetCurrentDirectoryW(wtf16le_buf.len, &wtf16le_buf);

if (result == 0) {

switch (kernel32.GetLastError()) {

else => |err| return unexpectedError(err),

}

assert(result <= utf16le_buf.len);

const utf16le_slice = utf16le_buf[0..result];

// Trust that Windows gives us valid UTF-16LE.

assert(result <= wtf16le_buf.len);

const wtf16le_slice = wtf16le_buf[0..result];

var end_index: usize = 0;

var it = std.unicode.Utf16LeIterator.init(utf16le_slice);

while (it.nextCodepoint() catch unreachable) |codepoint| {

var it = std.unicode.Wtf16LeIterator.init(wtf16le_slice);

while (it.nextCodepoint()) |codepoint| {

const seq_len = std.unicode.utf8CodepointSequenceLength(codepoint) catch unreachable;

if (end_index + seq_len >= buffer.len)

return error.NameTooLong;

end_index += std.unicode.utf8Encode(codepoint, buffer[end_index..]) catch unreachable;

end_index += std.unicode.wtf8Encode(codepoint, buffer[end_index..]) catch unreachable;

}

return buffer[0..end_index];

}

@@ -812,6 +811,8 @@ pub fn ReadLink(dir: ?HANDLE, sub_path_w: []const u16, out_buffer: []u8) ReadLin

}

/// Asserts that there is enough space is `out_buffer`.

/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u8 {

const win32_namespace_path = path: {

if (is_relative) break :path path;

@@ -821,7 +822,7 @@ fn parseReadlinkPath(path: []const u16, is_relative: bool, out_buffer: []u8) []u

};

break :path win32_path.span();

};

const out_len = std.unicode.utf16leToUtf8(out_buffer, win32_namespace_path) catch unreachable;

const out_len = std.unicode.wtf16LeToWtf8(out_buffer, win32_namespace_path);

return out_buffer[0..out_len];

}

@@ -1942,13 +1943,13 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool {

if (@inComptime() or builtin.os.tag != .windows) {

// This function compares the strings code unit by code unit (aka u16-to-u16),

// so any length difference implies inequality. In other words, there's no possible

// conversion that changes the number of UTF-16 code units needed for the uppercase/lowercase

// conversion that changes the number of WTF-16 code units needed for the uppercase/lowercase

// version in the conversion table since only codepoints <= max(u16) are eligible

// for conversion at all.

if (a.len != b.len) return false;

for (a, b) |a_c, b_c| {

// The slices are always UTF-16 LE, so need to convert the elements to native

// The slices are always WTF-16 LE, so need to convert the elements to native

// endianness for the uppercasing

const a_c_native = std.mem.littleToNative(u16, a_c);

const b_c_native = std.mem.littleToNative(u16, b_c);

@@ -1975,18 +1976,18 @@ pub fn eqlIgnoreCaseWTF16(a: []const u16, b: []const u16) bool {

return ntdll.RtlEqualUnicodeString(&a_string, &b_string, TRUE) == TRUE;

}

/// Compares two UTF-8 strings using the equivalent functionality of

/// Compares two WTF-8 strings using the equivalent functionality of

/// `RtlEqualUnicodeString` (with case insensitive comparison enabled).

/// This function can be called on any target.

/// Assumes `a` and `b` are valid UTF-8.

pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool {

/// Assumes `a` and `b` are valid WTF-8.

pub fn eqlIgnoreCaseWtf8(a: []const u8, b: []const u8) bool {

// A length equality check is not possible here because there are

// some codepoints that have a different length uppercase UTF-8 representations

// than their lowercase counterparts, e.g. U+0250 (2 bytes) <-> U+2C6F (3 bytes).

// There are 7 such codepoints in the uppercase data used by Windows.

var a_utf8_it = std.unicode.Utf8View.initUnchecked(a).iterator();

var b_utf8_it = std.unicode.Utf8View.initUnchecked(b).iterator();

var a_wtf8_it = std.unicode.Wtf8View.initUnchecked(a).iterator();

var b_wtf8_it = std.unicode.Wtf8View.initUnchecked(b).iterator();

// Use RtlUpcaseUnicodeChar on Windows when not in comptime to avoid including a

// redundant copy of the uppercase data.

@@ -1996,8 +1997,8 @@ pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool {

};

while (true) {

const a_cp = a_utf8_it.nextCodepoint() orelse break;

const b_cp = b_utf8_it.nextCodepoint() orelse return false;

const a_cp = a_wtf8_it.nextCodepoint() orelse break;

const b_cp = b_wtf8_it.nextCodepoint() orelse return false;

if (a_cp <= std.math.maxInt(u16) and b_cp <= std.math.maxInt(u16)) {

if (a_cp != b_cp and upcaseImpl(@intCast(a_cp)) != upcaseImpl(@intCast(b_cp))) {

@@ -2008,26 +2009,26 @@ pub fn eqlIgnoreCaseUtf8(a: []const u8, b: []const u8) bool {

}

// Make sure there are no leftover codepoints in b

if (b_utf8_it.nextCodepoint() != null) return false;

if (b_wtf8_it.nextCodepoint() != null) return false;

return true;

}

fn testEqlIgnoreCase(comptime expect_eql: bool, comptime a: []const u8, comptime b: []const u8) !void {

try std.testing.expectEqual(expect_eql, eqlIgnoreCaseUtf8(a, b));

try std.testing.expectEqual(expect_eql, eqlIgnoreCaseWtf8(a, b));

try std.testing.expectEqual(expect_eql, eqlIgnoreCaseWTF16(

std.unicode.utf8ToUtf16LeStringLiteral(a),

std.unicode.utf8ToUtf16LeStringLiteral(b),

));

try comptime std.testing.expect(expect_eql == eqlIgnoreCaseUtf8(a, b));

try comptime std.testing.expect(expect_eql == eqlIgnoreCaseWtf8(a, b));

try comptime std.testing.expect(expect_eql == eqlIgnoreCaseWTF16(

std.unicode.utf8ToUtf16LeStringLiteral(a),

std.unicode.utf8ToUtf16LeStringLiteral(b),

));

}

test "eqlIgnoreCaseWTF16/Utf8" {

test "eqlIgnoreCaseWTF16/Wtf8" {

try testEqlIgnoreCase(true, "\x01 a B Λ ɐ", "\x01 A b λ Ɐ");

// does not do case-insensitive comparison for codepoints >= U+10000

try testEqlIgnoreCase(false, "𐓏", "𐓷");

@@ -2117,20 +2118,32 @@ pub fn normalizePath(comptime T: type, path: []T) RemoveDotDirsError!usize {

return prefix_len + try removeDotDirsSanitized(T, path[prefix_len..new_len]);

}

pub const Wtf8ToPrefixedFileWError = error{InvalidWtf8} || Wtf16ToPrefixedFileWError;

/// Same as `sliceToPrefixedFileW` but accepts a pointer

/// to a null-terminated path.

pub fn cStrToPrefixedFileW(dir: ?HANDLE, s: [*:0]const u8) !PathSpace {

/// to a null-terminated WTF-8 encoded path.

/// https://simonsapin.github.io/wtf-8/

pub fn cStrToPrefixedFileW(dir: ?HANDLE, s: [*:0]const u8) Wtf8ToPrefixedFileWError!PathSpace {

return sliceToPrefixedFileW(dir, mem.sliceTo(s, 0));

}

/// Same as `wToPrefixedFileW` but accepts a UTF-8 encoded path.

pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) !PathSpace {

/// Same as `wToPrefixedFileW` but accepts a WTF-8 encoded path.

/// https://simonsapin.github.io/wtf-8/

pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) Wtf8ToPrefixedFileWError!PathSpace {

var temp_path: PathSpace = undefined;

temp_path.len = try std.unicode.utf8ToUtf16Le(&temp_path.data, path);

temp_path.len = try std.unicode.wtf8ToWtf16Le(&temp_path.data, path);

temp_path.data[temp_path.len] = 0;

return wToPrefixedFileW(dir, temp_path.span());

}

pub const Wtf16ToPrefixedFileWError = error{

AccessDenied,

BadPathName,

FileNotFound,

NameTooLong,

Unexpected,

};

/// Converts the `path` to WTF16, null-terminated. If the path contains any

/// namespace prefix, or is anything but a relative path (rooted, drive relative,

/// etc) the result will have the NT-style prefix `\??\`.

@@ -2142,7 +2155,7 @@ pub fn sliceToPrefixedFileW(dir: ?HANDLE, path: []const u8) !PathSpace {

/// is non-null, or the CWD if it is null.

/// - Special case device names like COM1, NUL, etc are not handled specially (TODO)

/// - . and space are not stripped from the end of relative paths (potential TODO)

pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) !PathSpace {

pub fn wToPrefixedFileW(dir: ?HANDLE, path: [:0]const u16) Wtf16ToPrefixedFileWError!PathSpace {

const nt_prefix = [_]u16{ '\\', '?', '?', '\\' };

switch (getNamespacePrefix(u16, path)) {

// TODO: Figure out a way to design an API that can avoid the copy for .nt,

@@ -2312,7 +2325,7 @@ pub const NamespacePrefix = enum {

nt,

};

/// If `T` is `u16`, then `path` should be encoded as UTF-16LE.

/// If `T` is `u16`, then `path` should be encoded as WTF-16LE.

pub fn getNamespacePrefix(comptime T: type, path: []const T) NamespacePrefix {

if (path.len < 4) return .none;

var all_backslash = switch (mem.littleToNative(T, path[0])) {

@@ -2366,7 +2379,7 @@ pub const UnprefixedPathType = enum {

/// Get the path type of a path that is known to not have any namespace prefixes

/// (`\\?\`, `\\.\`, `\??\`).

/// If `T` is `u16`, then `path` should be encoded as UTF-16LE.

/// If `T` is `u16`, then `path` should be encoded as WTF-16LE.

pub fn getUnprefixedPathType(comptime T: type, path: []const T) UnprefixedPathType {

if (path.len < 1) return .relative;

@@ -2420,7 +2433,7 @@ test getUnprefixedPathType {

/// Functionality is based on the ReactOS test cases found here:

/// https://github.com/reactos/reactos/blob/master/modules/rostests/apitests/ntdll/RtlNtPathNameToDosPathName.c

///

/// `path` should be encoded as UTF-16LE.

/// `path` should be encoded as WTF-16LE.

pub fn ntToWin32Namespace(path: []const u16) !PathSpace {

if (path.len > PATH_MAX_WIDE) return error.NameTooLong;

@@ -2530,7 +2543,6 @@ pub fn unexpectedError(err: Win32Error) std.os.UnexpectedError {

if (std.os.unexpected_error_tracing) {

// 614 is the length of the longest windows error description

var buf_wstr: [614]WCHAR = undefined;

var buf_utf8: [614]u8 = undefined;

const len = kernel32.FormatMessageW(

FORMAT_MESSAGE_FROM_SYSTEM | FORMAT_MESSAGE_IGNORE_INSERTS,

null,

@@ -2540,8 +2552,10 @@ pub fn unexpectedError(err: Win32Error) std.os.UnexpectedError {

buf_wstr.len,

null,

);

_ = std.unicode.utf16leToUtf8(&buf_utf8, buf_wstr[0..len]) catch unreachable;

std.debug.print("error.Unexpected: GetLastError({}): {s}\n", .{ @intFromEnum(err), buf_utf8[0..len] });

std.debug.print("error.Unexpected: GetLastError({}): {}\n", .{

@intFromEnum(err),

std.unicode.fmtUtf16Le(buf_wstr[0..len]),

});

std.debug.dumpCurrentStackTrace(@returnAddress());

}

return error.Unexpected;

lib/std/os/windows/test.zig added: 1932, removed: 517, total 1415

@@ -30,7 +30,7 @@ fn testToPrefixedFileNoOracle(comptime path: []const u8, comptime expected_path:

const expected_path_utf16 = std.unicode.utf8ToUtf16LeStringLiteral(expected_path);

const actual_path = try windows.wToPrefixedFileW(null, path_utf16);

std.testing.expectEqualSlices(u16, expected_path_utf16, actual_path.span()) catch |e| {

std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16le(actual_path.span()), std.unicode.fmtUtf16le(expected_path_utf16) });

std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16Le(actual_path.span()), std.unicode.fmtUtf16le(expected_path_utf16) });

return e;

};

}

@@ -48,7 +48,7 @@ fn testToPrefixedFileOnlyOracle(comptime path: []const u8) !void {

const zig_result = try windows.wToPrefixedFileW(null, path_utf16);

const win32_api_result = try RtlDosPathNameToNtPathName_U(path_utf16);

std.testing.expectEqualSlices(u16, win32_api_result.span(), zig_result.span()) catch |e| {

std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16le(zig_result.span()), std.unicode.fmtUtf16le(win32_api_result.span()) });

std.debug.print("got '{s}', expected '{s}'\n", .{ std.unicode.fmtUtf16Le(zig_result.span()), std.unicode.fmtUtf16le(win32_api_result.span()) });

return e;

};

}

lib/std/process.zig added: 1932, removed: 517, total 1415

@@ -16,11 +16,15 @@ pub const changeCurDir = os.chdir;

pub const changeCurDirC = os.chdirC;

/// The result is a slice of `out_buffer`, from index `0`.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

pub fn getCwd(out_buffer: []u8) ![]u8 {

return os.getcwd(out_buffer);

}

/// Caller must free the returned memory.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

pub fn getCwdAlloc(allocator: Allocator) ![]u8 {

// The use of MAX_PATH_BYTES here is just a heuristic: most paths will fit

// in stack_buf, avoiding an extra allocation in the common case.

@@ -76,7 +80,7 @@ pub const EnvMap = struct {

_ = self;

if (builtin.os.tag == .windows) {

var h = std.hash.Wyhash.init(0);

var it = std.unicode.Utf8View.initUnchecked(s).iterator();

var it = std.unicode.Wtf8View.initUnchecked(s).iterator();

while (it.nextCodepoint()) |cp| {

const cp_upper = upcase(cp);

h.update(&[_]u8{

@@ -93,8 +97,8 @@ pub const EnvMap = struct {

pub fn eql(self: @This(), a: []const u8, b: []const u8) bool {

_ = self;

if (builtin.os.tag == .windows) {

var it_a = std.unicode.Utf8View.initUnchecked(a).iterator();

var it_b = std.unicode.Utf8View.initUnchecked(b).iterator();

var it_a = std.unicode.Wtf8View.initUnchecked(a).iterator();

var it_b = std.unicode.Wtf8View.initUnchecked(b).iterator();

while (true) {

const c_a = it_a.nextCodepoint() orelse break;

const c_b = it_b.nextCodepoint() orelse return false;

@@ -129,8 +133,9 @@ pub const EnvMap = struct {

/// Same as `put` but the key and value become owned by the EnvMap rather

/// than being copied.

/// If `putMove` fails, the ownership of key and value does not transfer.

/// On Windows `key` must be a valid UTF-8 string.

/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.

pub fn putMove(self: *EnvMap, key: []u8, value: []u8) !void {

assert(std.unicode.wtf8ValidateSlice(key));

const get_or_put = try self.hash_map.getOrPut(key);

if (get_or_put.found_existing) {

self.free(get_or_put.key_ptr.*);

@@ -141,8 +146,9 @@ pub const EnvMap = struct {

}

/// `key` and `value` are copied into the EnvMap.

/// On Windows `key` must be a valid UTF-8 string.

/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.

pub fn put(self: *EnvMap, key: []const u8, value: []const u8) !void {

assert(std.unicode.wtf8ValidateSlice(key));

const value_copy = try self.copy(value);

errdefer self.free(value_copy);

const get_or_put = try self.hash_map.getOrPut(key);

@@ -159,23 +165,26 @@ pub const EnvMap = struct {

/// Find the address of the value associated with a key.

/// The returned pointer is invalidated if the map resizes.

/// On Windows `key` must be a valid UTF-8 string.

/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.

pub fn getPtr(self: EnvMap, key: []const u8) ?*[]const u8 {

assert(std.unicode.wtf8ValidateSlice(key));

return self.hash_map.getPtr(key);

}

/// Return the map's copy of the value associated with

/// a key. The returned string is invalidated if this

/// key is removed from the map.

/// On Windows `key` must be a valid UTF-8 string.

/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.

pub fn get(self: EnvMap, key: []const u8) ?[]const u8 {

assert(std.unicode.wtf8ValidateSlice(key));

return self.hash_map.get(key);

}

/// Removes the item from the map and frees its value.

/// This invalidates the value returned by get() for this key.

/// On Windows `key` must be a valid UTF-8 string.

/// On Windows `key` must be a valid [WTF-8](https://simonsapin.github.io/wtf-8/) string.

pub fn remove(self: *EnvMap, key: []const u8) void {

assert(std.unicode.wtf8ValidateSlice(key));

const kv = self.hash_map.fetchRemove(key) orelse return;

self.free(kv.key);

self.free(kv.value);

@@ -239,18 +248,34 @@ test "EnvMap" {

try testing.expectEqual(@as(EnvMap.Size, 1), env.count());

// test Unicode case-insensitivity on Windows

if (builtin.os.tag == .windows) {

// test Unicode case-insensitivity on Windows

try env.put("КИРиллИЦА", "something else");

try testing.expectEqualStrings("something else", env.get("кириллица").?);

// and WTF-8 that's not valid UTF-8

const wtf8_with_surrogate_pair = try std.unicode.wtf16LeToWtf8Alloc(testing.allocator, &[_]u16{

std.mem.nativeToLittle(u16, 0xD83D), // unpaired high surrogate

});

defer testing.allocator.free(wtf8_with_surrogate_pair);

try env.put(wtf8_with_surrogate_pair, wtf8_with_surrogate_pair);

try testing.expectEqualSlices(u8, wtf8_with_surrogate_pair, env.get(wtf8_with_surrogate_pair).?);

}

pub const GetEnvMapError = error{

OutOfMemory,

/// WASI-only. `environ_sizes_get` or `environ_get`

/// failed for an unexpected reason.

Unexpected,

};

/// Returns a snapshot of the environment variables of the current process.

/// Any modifications to the resulting EnvMap will not be reflected in the environment, and

/// likewise, any future modifications to the environment will not be reflected in the EnvMap.

/// Caller owns resulting `EnvMap` and should call its `deinit` fn when done.

pub fn getEnvMap(allocator: Allocator) !EnvMap {

pub fn getEnvMap(allocator: Allocator) GetEnvMapError!EnvMap {

var result = EnvMap.init(allocator);

errdefer result.deinit();

@@ -269,7 +294,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap {

while (ptr[i] != 0 and ptr[i] != '=') : (i += 1) {}

const key_w = ptr[key_start..i];

const key = try std.unicode.utf16leToUtf8Alloc(allocator, key_w);

const key = try std.unicode.wtf16LeToWtf8Alloc(allocator, key_w);

errdefer allocator.free(key);

if (ptr[i] == '=') i += 1;

@@ -277,7 +302,7 @@ pub fn getEnvMap(allocator: Allocator) !EnvMap {

const value_start = i;

while (ptr[i] != 0) : (i += 1) {}

const value_w = ptr[value_start..i];

const value = try std.unicode.utf16leToUtf8Alloc(allocator, value_w);

const value = try std.unicode.wtf16LeToWtf8Alloc(allocator, value_w);

errdefer allocator.free(value);

i += 1; // skip over null byte

@@ -355,25 +380,28 @@ pub const GetEnvVarOwnedError = error{

OutOfMemory,

EnvironmentVariableNotFound,

/// See https://github.com/ziglang/zig/issues/1774

InvalidUtf8,

/// On Windows, environment variable keys provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

};

/// Caller must free returned memory.

/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/),

/// then `error.InvalidWtf8` is returned.

/// On Windows, the value is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the value is an opaque sequence of bytes with no particular encoding.

pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError![]u8 {

if (builtin.os.tag == .windows) {

const result_w = blk: {

const key_w = try std.unicode.utf8ToUtf16LeWithNull(allocator, key);

defer allocator.free(key_w);

var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator);

const stack_allocator = stack_alloc.get();

const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key);

defer stack_allocator.free(key_w);

break :blk std.os.getenvW(key_w) orelse return error.EnvironmentVariableNotFound;

};

return std.unicode.utf16leToUtf8Alloc(allocator, result_w) catch |err| switch (err) {

error.DanglingSurrogateHalf => return error.InvalidUtf8,

error.ExpectedSecondSurrogateHalf => return error.InvalidUtf8,

error.UnexpectedSecondSurrogateHalf => return error.InvalidUtf8,

else => |e| return e,

};

// wtf16LeToWtf8Alloc can only fail with OutOfMemory

return std.unicode.wtf16LeToWtf8Alloc(allocator, result_w);

} else if (builtin.os.tag == .wasi and !builtin.link_libc) {

var envmap = getEnvMap(allocator) catch return error.OutOfMemory;

defer envmap.deinit();

@@ -385,6 +413,7 @@ pub fn getEnvVarOwned(allocator: Allocator, key: []const u8) GetEnvVarOwnedError

}

/// On Windows, `key` must be valid UTF-8.

pub fn hasEnvVarConstant(comptime key: []const u8) bool {

if (builtin.os.tag == .windows) {

const key_w = comptime std.unicode.utf8ToUtf16LeStringLiteral(key);

@@ -396,11 +425,22 @@ pub fn hasEnvVarConstant(comptime key: []const u8) bool {

}

pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool {

pub const HasEnvVarError = error{

OutOfMemory,

/// On Windows, environment variable keys provided by the user must be valid WTF-8.

/// https://simonsapin.github.io/wtf-8/

InvalidWtf8,

};

/// On Windows, if `key` is not valid [WTF-8](https://simonsapin.github.io/wtf-8/),

/// then `error.InvalidWtf8` is returned.

pub fn hasEnvVar(allocator: Allocator, key: []const u8) HasEnvVarError!bool {

if (builtin.os.tag == .windows) {

var stack_alloc = std.heap.stackFallback(256 * @sizeOf(u16), allocator);

const key_w = try std.unicode.utf8ToUtf16LeWithNull(stack_alloc.get(), key);

defer stack_alloc.allocator.free(key_w);

const stack_allocator = stack_alloc.get();

const key_w = try std.unicode.wtf8ToWtf16LeAllocZ(stack_allocator, key);

defer stack_allocator.free(key_w);

return std.os.getenvW(key_w) != null;

} else if (builtin.os.tag == .wasi and !builtin.link_libc) {

var envmap = getEnvMap(allocator) catch return error.OutOfMemory;

@@ -411,9 +451,22 @@ pub fn hasEnvVar(allocator: Allocator, key: []const u8) error{OutOfMemory}!bool

}

test "os.getEnvVarOwned" {

const ga = std.testing.allocator;

try testing.expectError(error.EnvironmentVariableNotFound, getEnvVarOwned(ga, "BADENV"));

test getEnvVarOwned {

try testing.expectError(

error.EnvironmentVariableNotFound,

getEnvVarOwned(std.testing.allocator, "BADENV"),

);

}

test hasEnvVarConstant {

if (builtin.os.tag == .wasi and !builtin.link_libc) return error.SkipZigTest;

try testing.expect(!hasEnvVarConstant("BADENV"));

}

test hasEnvVar {

const has_env = try hasEnvVar(std.testing.allocator, "BADENV");

try testing.expect(!has_env);

}

pub const ArgIteratorPosix = struct {

@@ -531,6 +584,7 @@ pub const ArgIteratorWasi = struct {

pub const ArgIteratorWindows = struct {

allocator: Allocator,

/// Owned by the iterator.

/// Encoded as WTF-8.

cmd_line: []const u8,

index: usize = 0,

/// Owned by the iterator. Long enough to hold the entire `cmd_line` plus a null terminator.

@@ -538,20 +592,14 @@ pub const ArgIteratorWindows = struct {

start: usize = 0,

end: usize = 0,

pub const InitError = error{ OutOfMemory, InvalidCmdLine };

pub const InitError = error{OutOfMemory};

/// `cmd_line_w` *must* be an UTF16-LE-encoded string.

/// `cmd_line_w` *must* be a WTF16-LE-encoded string.

///

/// The iterator makes a copy of `cmd_line_w` converted UTF-8 and keeps it; it does *not* take

/// The iterator makes a copy of `cmd_line_w` converted WTF-8 and keeps it; it does *not* take

/// ownership of `cmd_line_w`.

pub fn init(allocator: Allocator, cmd_line_w: [*:0]const u16) InitError!ArgIteratorWindows {

const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0)) catch |err| switch (err) {

error.DanglingSurrogateHalf,

error.ExpectedSecondSurrogateHalf,

error.UnexpectedSecondSurrogateHalf,

=> return error.InvalidCmdLine,

error.OutOfMemory => return error.OutOfMemory,

};

const cmd_line = try std.unicode.wtf16LeToWtf8Alloc(allocator, mem.sliceTo(cmd_line_w, 0));

errdefer allocator.free(cmd_line);

const buffer = try allocator.alloc(u8, cmd_line.len + 1);

@@ -566,6 +614,7 @@ pub const ArgIteratorWindows = struct {

/// Returns the next argument and advances the iterator. Returns `null` if at the end of the

/// command-line string. The iterator owns the returned slice.

/// The result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

pub fn next(self: *ArgIteratorWindows) ?[:0]const u8 {

return self.nextWithStrategy(next_strategy);

}

@@ -777,7 +826,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type {

pub const Self = @This();

pub const InitError = error{OutOfMemory};

pub const InitUtf16leError = error{ OutOfMemory, InvalidCmdLine };

/// cmd_line_utf8 MUST remain valid and constant while using this instance

pub fn init(allocator: Allocator, cmd_line_utf8: []const u8) InitError!Self {

@@ -805,30 +853,6 @@ pub fn ArgIteratorGeneral(comptime options: ArgIteratorGeneralOptions) type {

};

}

/// cmd_line_utf16le MUST be encoded UTF16-LE, and is converted to UTF-8 in an internal buffer

pub fn initUtf16le(allocator: Allocator, cmd_line_utf16le: [*:0]const u16) InitUtf16leError!Self {

const utf16le_slice = mem.sliceTo(cmd_line_utf16le, 0);

const cmd_line = std.unicode.utf16leToUtf8Alloc(allocator, utf16le_slice) catch |err| switch (err) {

error.ExpectedSecondSurrogateHalf,

error.DanglingSurrogateHalf,

error.UnexpectedSecondSurrogateHalf,

=> return error.InvalidCmdLine,

error.OutOfMemory => return error.OutOfMemory,

};

errdefer allocator.free(cmd_line);

const buffer = try allocator.alloc(u8, cmd_line.len + 1);

errdefer allocator.free(buffer);

return Self{

.allocator = allocator,

.cmd_line = cmd_line,

.free_cmd_line_on_deinit = true,

.buffer = buffer,

};

}

// Skips over whitespace in the cmd_line.

// Returns false if the terminating sentinel is reached, true otherwise.

// Also skips over comments (if supported).

@@ -1021,6 +1045,8 @@ pub const ArgIterator = struct {

/// Get the next argument. Returns 'null' if we are at the end.

/// Returned slice is pointing to the iterator's internal buffer.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

pub fn next(self: *ArgIterator) ?([:0]const u8) {

return self.inner.next();

}

@@ -1057,6 +1083,8 @@ pub fn argsWithAllocator(allocator: Allocator) ArgIterator.InitError!ArgIterator

}

/// Caller must call argsFree on result.

/// On Windows, the result is encoded as [WTF-8](https://simonsapin.github.io/wtf-8/).

/// On other platforms, the result is an opaque sequence of bytes with no particular encoding.

pub fn argsAlloc(allocator: Allocator) ![][:0]u8 {

// TODO refactor to only make 1 allocation.

var it = try argsWithAllocator(allocator);

@@ -1201,7 +1229,7 @@ test "ArgIteratorWindows" {

}

fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {

const cmd_line_w = try std.unicode.utf8ToUtf16LeWithNull(testing.allocator, cmd_line);

const cmd_line_w = try std.unicode.wtf8ToWtf16LeAllocZ(testing.allocator, cmd_line);

defer testing.allocator.free(cmd_line_w);

// next

lib/std/unicode.zig added: 1932, removed: 517, total 1415

@@ -39,7 +39,16 @@ pub fn utf8ByteSequenceLength(first_byte: u8) !u3 {

/// out: the out buffer to write to. Must have a len >= utf8CodepointSequenceLength(c).

/// Errors: if c cannot be encoded in UTF-8.

/// Returns: the number of bytes written to out.

pub fn utf8Encode(c: u21, out: []u8) !u3 {

pub fn utf8Encode(c: u21, out: []u8) error{ Utf8CannotEncodeSurrogateHalf, CodepointTooLarge }!u3 {

return utf8EncodeImpl(c, out, .cannot_encode_surrogate_half);

}

const Surrogates = enum {

cannot_encode_surrogate_half,

can_encode_surrogate_half,

};

fn utf8EncodeImpl(c: u21, out: []u8, comptime surrogates: Surrogates) !u3 {

const length = try utf8CodepointSequenceLength(c);

assert(out.len >= length);

switch (length) {

@@ -53,7 +62,9 @@ pub fn utf8Encode(c: u21, out: []u8) !u3 {

out[1] = @as(u8, @intCast(0b10000000 | (c & 0b111111)));

3 => {

if (0xd800 <= c and c <= 0xdfff) return error.Utf8CannotEncodeSurrogateHalf;

if (surrogates == .cannot_encode_surrogate_half and isSurrogateCodepoint(c)) {

return error.Utf8CannotEncodeSurrogateHalf;

}

out[0] = @as(u8, @intCast(0b11100000 | (c >> 12)));

out[1] = @as(u8, @intCast(0b10000000 | ((c >> 6) & 0b111111)));

out[2] = @as(u8, @intCast(0b10000000 | (c & 0b111111)));

@@ -116,12 +127,22 @@ pub fn utf8Decode2(bytes: []const u8) Utf8Decode2Error!u21 {

return value;

}

const Utf8Decode3Error = error{

Utf8ExpectedContinuation,

Utf8OverlongEncoding,

const Utf8Decode3Error = Utf8Decode3AllowSurrogateHalfError || error{

Utf8EncodesSurrogateHalf,

};

pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u21 {

const value = try utf8Decode3AllowSurrogateHalf(bytes);

if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;

return value;

}

const Utf8Decode3AllowSurrogateHalfError = error{

Utf8ExpectedContinuation,

Utf8OverlongEncoding,

};

pub fn utf8Decode3AllowSurrogateHalf(bytes: []const u8) Utf8Decode3AllowSurrogateHalfError!u21 {

assert(bytes.len == 3);

assert(bytes[0] & 0b11110000 == 0b11100000);

var value: u21 = bytes[0] & 0b00001111;

@@ -135,7 +156,6 @@ pub fn utf8Decode3(bytes: []const u8) Utf8Decode3Error!u21 {

value |= bytes[2] & 0b00111111;

if (value < 0x800) return error.Utf8OverlongEncoding;

if (0xd800 <= value and value <= 0xdfff) return error.Utf8EncodesSurrogateHalf;

return value;

}

@@ -213,6 +233,10 @@ pub fn utf8CountCodepoints(s: []const u8) !usize {

/// Returns true if the input consists entirely of UTF-8 codepoints

pub fn utf8ValidateSlice(input: []const u8) bool {

return utf8ValidateSliceImpl(input, .cannot_encode_surrogate_half);

}

fn utf8ValidateSliceImpl(input: []const u8, comptime surrogates: Surrogates) bool {

var remaining = input;

const chunk_len = std.simd.suggestVectorLength(u8) orelse 1;

@@ -240,9 +264,15 @@ pub fn utf8ValidateSlice(input: []const u8) bool {

const xx = 0xF1; // invalid: size 1

const as = 0xF0; // ASCII: size 1

const s1 = 0x02; // accept 0, size 2

const s2 = 0x13; // accept 1, size 3

const s2 = switch (surrogates) {

.cannot_encode_surrogate_half => 0x13, // accept 1, size 3

.can_encode_surrogate_half => 0x03, // accept 0, size 3

};

const s3 = 0x03; // accept 0, size 3

const s4 = 0x23; // accept 2, size 3

const s4 = switch (surrogates) {

.cannot_encode_surrogate_half => 0x23, // accept 2, size 3

.can_encode_surrogate_half => 0x03, // accept 0, size 3

};

const s5 = 0x34; // accept 3, size 4

const s6 = 0x04; // accept 0, size 4

const s7 = 0x44; // accept 4, size 4

@@ -458,7 +488,9 @@ pub const Utf16LeIterator = struct {

};

}

pub fn nextCodepoint(it: *Utf16LeIterator) !?u21 {

pub const NextCodepointError = error{ DanglingSurrogateHalf, ExpectedSecondSurrogateHalf, UnexpectedSecondSurrogateHalf };

pub fn nextCodepoint(it: *Utf16LeIterator) NextCodepointError!?u21 {

assert(it.i <= it.bytes.len);

if (it.i == it.bytes.len) return null;

var code_units: [2]u16 = undefined;

@@ -770,94 +802,230 @@ fn testDecode(bytes: []const u8) !u21 {

return utf8Decode(bytes);

}

/// Print the given `utf8` string, encoded as UTF-8 bytes.

/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)

/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of

/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder

fn formatUtf8(

utf8: []const u8,

comptime fmt: []const u8,

options: std.fmt.FormatOptions,

writer: anytype,

) !void {

_ = fmt;

_ = options;

var buf: [300]u8 = undefined; // just an arbitrary size

var u8len: usize = 0;

// This implementation is based on this specification:

// https://encoding.spec.whatwg.org/#utf-8-decoder

var codepoint: u21 = 0;

var cont_bytes_seen: u3 = 0;

var cont_bytes_needed: u3 = 0;

var lower_boundary: u8 = 0x80;

var upper_boundary: u8 = 0xBF;

var i: usize = 0;

while (i < utf8.len) {

const byte = utf8[i];

if (cont_bytes_needed == 0) {

switch (byte) {

0x00...0x7F => {

buf[u8len] = byte;

u8len += 1;

0xC2...0xDF => {

cont_bytes_needed = 1;

codepoint = byte & 0b00011111;

0xE0...0xEF => {

if (byte == 0xE0) lower_boundary = 0xA0;

if (byte == 0xED) upper_boundary = 0x9F;

cont_bytes_needed = 2;

codepoint = byte & 0b00001111;

0xF0...0xF4 => {

if (byte == 0xF0) lower_boundary = 0x90;

if (byte == 0xF4) upper_boundary = 0x8F;

cont_bytes_needed = 3;

codepoint = byte & 0b00000111;

else => {

u8len += utf8Encode(replacement_character, buf[u8len..]) catch unreachable;

}

// consume the byte

i += 1;

} else if (byte < lower_boundary or byte > upper_boundary) {

codepoint = 0;

cont_bytes_needed = 0;

cont_bytes_seen = 0;

lower_boundary = 0x80;

upper_boundary = 0xBF;

u8len += utf8Encode(replacement_character, buf[u8len..]) catch unreachable;

// do not consume the current byte, it should now be treated as a possible start byte

} else {

lower_boundary = 0x80;

upper_boundary = 0xBF;

codepoint <<= 6;

codepoint |= byte & 0b00111111;

cont_bytes_seen += 1;

// consume the byte

i += 1;

if (cont_bytes_seen == cont_bytes_needed) {

const codepoint_len = cont_bytes_seen + 1;

const codepoint_start_i = i - codepoint_len;

@memcpy(buf[u8len..][0..codepoint_len], utf8[codepoint_start_i..][0..codepoint_len]);

u8len += codepoint_len;

codepoint = 0;

cont_bytes_needed = 0;

cont_bytes_seen = 0;

}

// make sure there's always enough room for another maximum length UTF-8 codepoint

if (u8len + 4 > buf.len) {

try writer.writeAll(buf[0..u8len]);

u8len = 0;

}

if (cont_bytes_needed != 0) {

// we know there's enough room because we always flush

// if there's less than 4 bytes remaining in the buffer.

u8len += utf8Encode(replacement_character, buf[u8len..]) catch unreachable;

}

try writer.writeAll(buf[0..u8len]);

}

/// Return a Formatter for a (potentially ill-formed) UTF-8 string.

/// Ill-formed UTF-8 byte sequences are replaced by the replacement character (U+FFFD)

/// according to "U+FFFD Substitution of Maximal Subparts" from Chapter 3 of

/// the Unicode standard, and as specified by https://encoding.spec.whatwg.org/#utf-8-decoder

pub fn fmtUtf8(utf8: []const u8) std.fmt.Formatter(formatUtf8) {

return .{ .data = utf8 };

}

test "fmtUtf8" {

const expectFmt = testing.expectFmt;

try expectFmt("", "{}", .{fmtUtf8("")});

try expectFmt("foo", "{}", .{fmtUtf8("foo")});

try expectFmt("𐐷", "{}", .{fmtUtf8("𐐷")});

// Table 3-8. U+FFFD for Non-Shortest Form Sequences

try expectFmt("��A", "{}", .{fmtUtf8("\xC0\xAF\xE0\x80\xBF\xF0\x81\x82A")});

// Table 3-9. U+FFFD for Ill-Formed Sequences for Surrogates

try expectFmt("��A", "{}", .{fmtUtf8("\xED\xA0\x80\xED\xBF\xBF\xED\xAFA")});

// Table 3-10. U+FFFD for Other Ill-Formed Sequences

try expectFmt("��A��B", "{}", .{fmtUtf8("\xF4\x91\x92\x93\xFFA\x80\xBFB")});

// Table 3-11. U+FFFD for Truncated Sequences

try expectFmt("��A", "{}", .{fmtUtf8("\xE1\x80\xE2\xF0\x91\x92\xF1\xBFA")});

}

fn utf16LeToUtf8ArrayListImpl(

array_list: *std.ArrayList(u8),

utf16le: []const u16,

comptime surrogates: Surrogates,

) (switch (surrogates) {

.cannot_encode_surrogate_half => Utf16LeToUtf8AllocError,

.can_encode_surrogate_half => mem.Allocator.Error,

})!void {

// optimistically guess that it will all be ascii.

try array_list.ensureTotalCapacityPrecise(utf16le.len);

var remaining = utf16le;

if (builtin.zig_backend != .stage2_x86_64) {

const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;

const Chunk = @Vector(chunk_len, u16);

// Fast path. Check for and encode ASCII characters at the start of the input.

while (remaining.len >= chunk_len) {

const chunk: Chunk = remaining[0..chunk_len].*;

const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));

if (@reduce(.Or, chunk | mask != mask)) {

// found a non ASCII code unit

break;

}

const chunk_byte_len = chunk_len * 2;

const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;

const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);

const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];

// We allocated enough space to encode every UTF-16 code unit

// as ASCII, so if the entire string is ASCII then we are

// guaranteed to have enough space allocated

array_list.appendSliceAssumeCapacity(&ascii_bytes);

remaining = remaining[chunk_len..];

}

var out_index: usize = array_list.items.len;

switch (surrogates) {

.cannot_encode_surrogate_half => {

var it = Utf16LeIterator.init(remaining);

while (try it.nextCodepoint()) |codepoint| {

const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;

try array_list.resize(array_list.items.len + utf8_len);

assert((utf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len);

out_index += utf8_len;

}

.can_encode_surrogate_half => {

var it = Wtf16LeIterator.init(remaining);

while (it.nextCodepoint()) |codepoint| {

const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;

try array_list.resize(array_list.items.len + utf8_len);

assert((wtf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len);

out_index += utf8_len;

}

pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error;

pub fn utf16LeToUtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {

return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .cannot_encode_surrogate_half);

}

/// Deprecated; renamed to utf16LeToUtf8Alloc

pub const utf16leToUtf8Alloc = utf16LeToUtf8Alloc;

/// Caller must free returned memory.

pub fn utf16leToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) ![]u8 {

pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![]u8 {

// optimistically guess that it will all be ascii.

var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);

errdefer result.deinit();

var remaining = utf16le;

if (builtin.zig_backend != .stage2_x86_64) {

const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;

const Chunk = @Vector(chunk_len, u16);

// Fast path. Check for and encode ASCII characters at the start of the input.

while (remaining.len >= chunk_len) {

const chunk: Chunk = remaining[0..chunk_len].*;

const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));

if (@reduce(.Or, chunk | mask != mask)) {

// found a non ASCII code unit

break;

}

const chunk_byte_len = chunk_len * 2;

const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;

const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);

const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];

// We allocated enough space to encode every UTF-16 code unit

// as ASCII, so if the entire string is ASCII then we are

// guaranteed to have enough space allocated

result.appendSliceAssumeCapacity(&ascii_bytes);

remaining = remaining[chunk_len..];

}

var out_index: usize = result.items.len;

var it = Utf16LeIterator.init(remaining);

while (try it.nextCodepoint()) |codepoint| {

const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;

try result.resize(result.items.len + utf8_len);

assert((utf8Encode(codepoint, result.items[out_index..]) catch unreachable) == utf8_len);

out_index += utf8_len;

}

try utf16LeToUtf8ArrayList(&result, utf16le);

return result.toOwnedSlice();

}

/// Deprecated; renamed to utf16LeToUtf8AllocZ

pub const utf16leToUtf8AllocZ = utf16LeToUtf8AllocZ;

/// Caller must free returned memory.

pub fn utf16leToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) ![:0]u8 {

pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16LeToUtf8AllocError![:0]u8 {

// optimistically guess that it will all be ascii (and allocate space for the null terminator)

var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1);

errdefer result.deinit();

var remaining = utf16le;

if (builtin.zig_backend != .stage2_x86_64) {

const chunk_len = std.simd.suggestVectorLength(u16) orelse 1;

const Chunk = @Vector(chunk_len, u16);

try utf16LeToUtf8ArrayList(&result, utf16le);

// Fast path. Check for and encode ASCII characters at the start of the input.

while (remaining.len >= chunk_len) {

const chunk: Chunk = remaining[0..chunk_len].*;

const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));

if (@reduce(.Or, chunk | mask != mask)) {

// found a non ASCII code unit

break;

}

const chunk_byte_len = chunk_len * 2;

const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;

const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);

const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];

// We allocated enough space to encode every UTF-16 code unit

// as ASCII, so if the entire string is ASCII then we are

// guaranteed to have enough space allocated

result.appendSliceAssumeCapacity(&ascii_bytes);

remaining = remaining[chunk_len..];

}

var out_index = result.items.len;

var it = Utf16LeIterator.init(remaining);

while (try it.nextCodepoint()) |codepoint| {

const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;

try result.resize(result.items.len + utf8_len);

assert((utf8Encode(codepoint, result.items[out_index..]) catch unreachable) == utf8_len);

out_index += utf8_len;

}

return result.toOwnedSliceSentinel(0);

}

pub const Utf16LeToUtf8Error = Utf16LeIterator.NextCodepointError;

/// Asserts that the output buffer is big enough.

/// Returns end byte index into utf8.

pub fn utf16leToUtf8(utf8: []u8, utf16le: []const u16) !usize {

fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surrogates) (switch (surrogates) {

.cannot_encode_surrogate_half => Utf16LeToUtf8Error,

.can_encode_surrogate_half => error{},

})!usize {

var end_index: usize = 0;

var remaining = utf16le;

@@ -883,30 +1051,58 @@ pub fn utf16leToUtf8(utf8: []u8, utf16le: []const u16) !usize {

}

var it = Utf16LeIterator.init(remaining);

while (try it.nextCodepoint()) |codepoint| {

end_index += try utf8Encode(codepoint, utf8[end_index..]);

switch (surrogates) {

.cannot_encode_surrogate_half => {

var it = Utf16LeIterator.init(remaining);

while (try it.nextCodepoint()) |codepoint| {

end_index += utf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) {

// The maximum possible codepoint encoded by UTF-16 is U+10FFFF,

// which is within the valid codepoint range.

error.CodepointTooLarge => unreachable,

// We know the codepoint was valid in UTF-16, meaning it is not

// an unpaired surrogate codepoint.

error.Utf8CannotEncodeSurrogateHalf => unreachable,

};

}

.can_encode_surrogate_half => {

var it = Wtf16LeIterator.init(remaining);

while (it.nextCodepoint()) |codepoint| {

end_index += wtf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) {

// The maximum possible codepoint encoded by UTF-16 is U+10FFFF,

// which is within the valid codepoint range.

error.CodepointTooLarge => unreachable,

};

}

return end_index;

}

test "utf16leToUtf8" {

/// Deprecated; renamed to utf16LeToUtf8

pub const utf16leToUtf8 = utf16LeToUtf8;

pub fn utf16LeToUtf8(utf8: []u8, utf16le: []const u16) Utf16LeToUtf8Error!usize {

return utf16LeToUtf8Impl(utf8, utf16le, .cannot_encode_surrogate_half);

}

test utf16LeToUtf8 {

var utf16le: [2]u16 = undefined;

const utf16le_as_bytes = mem.sliceAsBytes(utf16le[0..]);

{

mem.writeInt(u16, utf16le_as_bytes[0..2], 'A', .little);

mem.writeInt(u16, utf16le_as_bytes[2..4], 'a', .little);

const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);

defer std.testing.allocator.free(utf8);

const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);

defer testing.allocator.free(utf8);

try testing.expect(mem.eql(u8, utf8, "Aa"));

}

{

mem.writeInt(u16, utf16le_as_bytes[0..2], 0x80, .little);

mem.writeInt(u16, utf16le_as_bytes[2..4], 0xffff, .little);

const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);

defer std.testing.allocator.free(utf8);

const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);

defer testing.allocator.free(utf8);

try testing.expect(mem.eql(u8, utf8, "\xc2\x80" ++ "\xef\xbf\xbf"));

}

@@ -914,8 +1110,8 @@ test "utf16leToUtf8" {

// the values just outside the surrogate half range

mem.writeInt(u16, utf16le_as_bytes[0..2], 0xd7ff, .little);

mem.writeInt(u16, utf16le_as_bytes[2..4], 0xe000, .little);

const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);

defer std.testing.allocator.free(utf8);

const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);

defer testing.allocator.free(utf8);

try testing.expect(mem.eql(u8, utf8, "\xed\x9f\xbf" ++ "\xee\x80\x80"));

}

@@ -923,8 +1119,8 @@ test "utf16leToUtf8" {

// smallest surrogate pair

mem.writeInt(u16, utf16le_as_bytes[0..2], 0xd800, .little);

mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdc00, .little);

const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);

defer std.testing.allocator.free(utf8);

const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);

defer testing.allocator.free(utf8);

try testing.expect(mem.eql(u8, utf8, "\xf0\x90\x80\x80"));

}

@@ -932,31 +1128,30 @@ test "utf16leToUtf8" {

// largest surrogate pair

mem.writeInt(u16, utf16le_as_bytes[0..2], 0xdbff, .little);

mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdfff, .little);

const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);

defer std.testing.allocator.free(utf8);

const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);

defer testing.allocator.free(utf8);

try testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xbf\xbf"));

}

{

mem.writeInt(u16, utf16le_as_bytes[0..2], 0xdbff, .little);

mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdc00, .little);

const utf8 = try utf16leToUtf8Alloc(std.testing.allocator, &utf16le);

defer std.testing.allocator.free(utf8);

const utf8 = try utf16LeToUtf8Alloc(testing.allocator, &utf16le);

defer testing.allocator.free(utf8);

try testing.expect(mem.eql(u8, utf8, "\xf4\x8f\xb0\x80"));

}

{

mem.writeInt(u16, utf16le_as_bytes[0..2], 0xdcdc, .little);

mem.writeInt(u16, utf16le_as_bytes[2..4], 0xdcdc, .little);

const result = utf16leToUtf8Alloc(std.testing.allocator, &utf16le);

try std.testing.expectError(error.UnexpectedSecondSurrogateHalf, result);

const result = utf16LeToUtf8Alloc(testing.allocator, &utf16le);

try testing.expectError(error.UnexpectedSecondSurrogateHalf, result);

}

pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u16 {

fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void {

// optimistically guess that it will not require surrogate pairs

var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len + 1);

errdefer result.deinit();

try array_list.ensureTotalCapacityPrecise(utf8.len);

var remaining = utf8;

// Need support for std.simd.interlace

@@ -974,33 +1169,65 @@ pub fn utf8ToUtf16LeWithNull(allocator: mem.Allocator, utf8: []const u8) ![:0]u1

}

const zeroes: Chunk = @splat(0);

const utf16_chunk: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes });

result.appendSliceAssumeCapacity(std.mem.bytesAsSlice(u16, &utf16_chunk));

array_list.appendSliceAssumeCapacity(std.mem.bytesAsSlice(u16, &utf16_chunk));

remaining = remaining[chunk_len..];

}

const view = try Utf8View.init(remaining);

const view = switch (surrogates) {

.cannot_encode_surrogate_half => try Utf8View.init(remaining),

.can_encode_surrogate_half => try Wtf8View.init(remaining),

};

var it = view.iterator();

while (it.nextCodepoint()) |codepoint| {

if (codepoint < 0x10000) {

const short = @as(u16, @intCast(codepoint));

try result.append(mem.nativeToLittle(u16, short));

try array_list.append(mem.nativeToLittle(u16, short));

} else {

const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;

const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;

var out: [2]u16 = undefined;

out[0] = mem.nativeToLittle(u16, high);

out[1] = mem.nativeToLittle(u16, low);

try result.appendSlice(out[0..]);

try array_list.appendSlice(out[0..]);

}

pub fn utf8ToUtf16LeArrayList(array_list: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void {

return utf8ToUtf16LeArrayListImpl(array_list, utf8, .cannot_encode_surrogate_half);

}

pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 {

// optimistically guess that it will not require surrogate pairs

var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len);

errdefer result.deinit();

try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);

return result.toOwnedSlice();

}

/// Deprecated; renamed to utf8ToUtf16LeAllocZ

pub const utf8ToUtf16LeWithNull = utf8ToUtf16LeAllocZ;

pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![:0]u16 {

// optimistically guess that it will not require surrogate pairs

var result = try std.ArrayList(u16).initCapacity(allocator, utf8.len + 1);

errdefer result.deinit();

try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);

return result.toOwnedSliceSentinel(0);

}

/// Returns index of next character. If exact fit, returned index equals output slice length.

/// Assumes there is enough space for the output.

pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {

pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) error{InvalidUtf8}!usize {

return utf8ToUtf16LeImpl(utf16le, utf8, .cannot_encode_surrogate_half);

}

pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: Surrogates) !usize {

var dest_i: usize = 0;

var remaining = utf8;

@@ -1027,9 +1254,15 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) !usize {

var src_i: usize = 0;

while (src_i < remaining.len) {

const n = utf8ByteSequenceLength(remaining[src_i]) catch return error.InvalidUtf8;

const n = utf8ByteSequenceLength(remaining[src_i]) catch return switch (surrogates) {

.cannot_encode_surrogate_half => error.InvalidUtf8,

.can_encode_surrogate_half => error.InvalidWtf8,

};

const next_src_i = src_i + n;

const codepoint = utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8;

const codepoint = switch (surrogates) {

.cannot_encode_surrogate_half => utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8,

.can_encode_surrogate_half => wtf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidWtf8,

};

if (codepoint < 0x10000) {

const short = @as(u16, @intCast(codepoint));

utf16le[dest_i] = mem.nativeToLittle(u16, short);

@@ -1064,21 +1297,59 @@ test "utf8ToUtf16Le" {

}

test "utf8ToUtf16LeWithNull" {

test utf8ToUtf16LeArrayList {

{

const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "𐐷");

var list = std.ArrayList(u16).init(testing.allocator);

defer list.deinit();

try utf8ToUtf16LeArrayList(&list, "𐐷");

try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(list.items));

}

{

var list = std.ArrayList(u16).init(testing.allocator);

defer list.deinit();

try utf8ToUtf16LeArrayList(&list, "\u{10FFFF}");

try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(list.items));

}

{

var list = std.ArrayList(u16).init(testing.allocator);

defer list.deinit();

const result = utf8ToUtf16LeArrayList(&list, "\xf4\x90\x80\x80");

try testing.expectError(error.InvalidUtf8, result);

}

test utf8ToUtf16LeAlloc {

{

const utf16 = try utf8ToUtf16LeAlloc(testing.allocator, "𐐷");

defer testing.allocator.free(utf16);

try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16[0..]));

}

{

const utf16 = try utf8ToUtf16LeAlloc(testing.allocator, "\u{10FFFF}");

defer testing.allocator.free(utf16);

try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..]));

}

{

const result = utf8ToUtf16LeAlloc(testing.allocator, "\xf4\x90\x80\x80");

try testing.expectError(error.InvalidUtf8, result);

}

test utf8ToUtf16LeAllocZ {

{

const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "𐐷");

defer testing.allocator.free(utf16);

try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16[0..]));

try testing.expect(utf16[2] == 0);

}

{

const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "\u{10FFFF}");

const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "\u{10FFFF}");

defer testing.allocator.free(utf16);

try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..]));

try testing.expect(utf16[2] == 0);

}

{

const result = utf8ToUtf16LeWithNull(testing.allocator, "\xf4\x90\x80\x80");

const result = utf8ToUtf16LeAllocZ(testing.allocator, "\xf4\x90\x80\x80");

try testing.expectError(error.InvalidUtf8, result);

}

@@ -1127,8 +1398,9 @@ test "calculate utf16 string length of given utf8 string in u16" {

try comptime testCalcUtf16LeLen();

}

/// Print the given `utf16le` string

fn formatUtf16le(

/// Print the given `utf16le` string, encoded as UTF-8 bytes.

/// Unpaired surrogates are replaced by the replacement character (U+FFFD).

fn formatUtf16Le(

utf16le: []const u16,

comptime fmt: []const u8,

options: std.fmt.FormatOptions,

@@ -1136,13 +1408,14 @@ fn formatUtf16le(

) !void {

_ = fmt;

_ = options;

var buf: [300]u8 = undefined; // just a random size I chose

var buf: [300]u8 = undefined; // just an arbitrary size

var it = Utf16LeIterator.init(utf16le);

var u8len: usize = 0;

while (it.nextCodepoint() catch replacement_character) |codepoint| {

u8len += utf8Encode(codepoint, buf[u8len..]) catch

utf8Encode(replacement_character, buf[u8len..]) catch unreachable;

if (u8len + 3 >= buf.len) {

// make sure there's always enough room for another maximum length UTF-8 codepoint

if (u8len + 4 > buf.len) {

try writer.writeAll(buf[0..u8len]);

u8len = 0;

}

@@ -1150,22 +1423,27 @@ fn formatUtf16le(

try writer.writeAll(buf[0..u8len]);

}

/// Return a Formatter for a Utf16le string

pub fn fmtUtf16le(utf16le: []const u16) std.fmt.Formatter(formatUtf16le) {

/// Deprecated; renamed to fmtUtf16Le

pub const fmtUtf16le = fmtUtf16Le;

/// Return a Formatter for a (potentially ill-formed) UTF-16 LE string,

/// which will be converted to UTF-8 during formatting.

/// Unpaired surrogates are replaced by the replacement character (U+FFFD).

pub fn fmtUtf16Le(utf16le: []const u16) std.fmt.Formatter(formatUtf16Le) {

return .{ .data = utf16le };

}

test "fmtUtf16le" {

const expectFmt = std.testing.expectFmt;

try expectFmt("", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral(""))});

try expectFmt("foo", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("foo"))});

try expectFmt("𐐷", "{}", .{fmtUtf16le(utf8ToUtf16LeStringLiteral("𐐷"))});

try expectFmt("퟿", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\xff\xd7", native_endian)})});

try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\x00\xd8", native_endian)})});

try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\xff\xdb", native_endian)})});

try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\x00\xdc", native_endian)})});

try expectFmt("�", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\xff\xdf", native_endian)})});

try expectFmt("", "{}", .{fmtUtf16le(&[_]u16{std.mem.readInt(u16, "\x00\xe0", native_endian)})});

test "fmtUtf16Le" {

const expectFmt = testing.expectFmt;

try expectFmt("", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral(""))});

try expectFmt("foo", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("foo"))});

try expectFmt("𐐷", "{}", .{fmtUtf16Le(utf8ToUtf16LeStringLiteral("𐐷"))});

try expectFmt("퟿", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xd7", native_endian)})});

try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xd8", native_endian)})});

try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdb", native_endian)})});

try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xdc", native_endian)})});

try expectFmt("�", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\xff\xdf", native_endian)})});

try expectFmt("", "{}", .{fmtUtf16Le(&[_]u16{std.mem.readInt(u16, "\x00\xe0", native_endian)})});

}

test "utf8ToUtf16LeStringLiteral" {

@@ -1248,3 +1526,535 @@ test "utf8 valid codepoint" {

try testUtf8ValidCodepoint();

try comptime testUtf8ValidCodepoint();

}

/// Returns true if the codepoint is a surrogate (U+DC00 to U+DFFF)

pub fn isSurrogateCodepoint(c: u21) bool {

return switch (c) {

0xD800...0xDFFF => true,

else => false,

};

}

/// Encodes the given codepoint into a WTF-8 byte sequence.

/// c: the codepoint.

/// out: the out buffer to write to. Must have a len >= utf8CodepointSequenceLength(c).

/// Errors: if c cannot be encoded in WTF-8.

/// Returns: the number of bytes written to out.

pub fn wtf8Encode(c: u21, out: []u8) error{CodepointTooLarge}!u3 {

return utf8EncodeImpl(c, out, .can_encode_surrogate_half);

}

const Wtf8DecodeError = Utf8Decode2Error || Utf8Decode3AllowSurrogateHalfError || Utf8Decode4Error;

pub fn wtf8Decode(bytes: []const u8) Wtf8DecodeError!u21 {

return switch (bytes.len) {

1 => @as(u21, bytes[0]),

2 => utf8Decode2(bytes),

3 => utf8Decode3AllowSurrogateHalf(bytes),

4 => utf8Decode4(bytes),

else => unreachable,

};

}

/// Returns true if the input consists entirely of WTF-8 codepoints

/// (all the same restrictions as UTF-8, but allows surrogate codepoints

/// U+D800 to U+DFFF).

/// Does not check for well-formed WTF-8, meaning that this function

/// does not check that all surrogate halves are unpaired.

pub fn wtf8ValidateSlice(input: []const u8) bool {

return utf8ValidateSliceImpl(input, .can_encode_surrogate_half);

}

test "validate WTF-8 slice" {

try testValidateWtf8Slice();

try comptime testValidateWtf8Slice();

// We skip a variable (based on recommended vector size) chunks of

// ASCII characters. Let's make sure we're chunking correctly.

const str = [_]u8{'a'} ** 550 ++ "\xc0";

for (0..str.len - 3) |i| {

try testing.expect(!wtf8ValidateSlice(str[i..]));

}

fn testValidateWtf8Slice() !void {

// These are valid/invalid under both UTF-8 and WTF-8 rules.

try testing.expect(wtf8ValidateSlice("abc"));

try testing.expect(wtf8ValidateSlice("abc\xdf\xbf"));

try testing.expect(wtf8ValidateSlice(""));

try testing.expect(wtf8ValidateSlice("a"));

try testing.expect(wtf8ValidateSlice("abc"));

try testing.expect(wtf8ValidateSlice("Ж"));

try testing.expect(wtf8ValidateSlice("ЖЖ"));

try testing.expect(wtf8ValidateSlice("брэд-ЛГТМ"));

try testing.expect(wtf8ValidateSlice("☺☻☹"));

try testing.expect(wtf8ValidateSlice("a\u{fffdb}"));

try testing.expect(wtf8ValidateSlice("\xf4\x8f\xbf\xbf"));

try testing.expect(wtf8ValidateSlice("abc\xdf\xbf"));

try testing.expect(!wtf8ValidateSlice("abc\xc0"));

try testing.expect(!wtf8ValidateSlice("abc\xc0abc"));

try testing.expect(!wtf8ValidateSlice("aa\xe2"));

try testing.expect(!wtf8ValidateSlice("\x42\xfa"));

try testing.expect(!wtf8ValidateSlice("\x42\xfa\x43"));

try testing.expect(!wtf8ValidateSlice("abc\xc0"));

try testing.expect(!wtf8ValidateSlice("abc\xc0abc"));

try testing.expect(!wtf8ValidateSlice("\xf4\x90\x80\x80"));

try testing.expect(!wtf8ValidateSlice("\xf7\xbf\xbf\xbf"));

try testing.expect(!wtf8ValidateSlice("\xfb\xbf\xbf\xbf\xbf"));

try testing.expect(!wtf8ValidateSlice("\xc0\x80"));

// But surrogate codepoints are only valid in WTF-8.

try testing.expect(wtf8ValidateSlice("\xed\xa0\x80"));

try testing.expect(wtf8ValidateSlice("\xed\xbf\xbf"));

}

/// Wtf8View iterates the code points of a WTF-8 encoded string,

/// including surrogate halves.

///

/// ```

/// var wtf8 = (try std.unicode.Wtf8View.init("hi there")).iterator();

/// while (wtf8.nextCodepointSlice()) |codepoint| {

/// // note: codepoint could be a surrogate half which is invalid

/// // UTF-8, avoid printing or otherwise sending/emitting this directly

/// }

/// ```

pub const Wtf8View = struct {

bytes: []const u8,

pub fn init(s: []const u8) error{InvalidWtf8}!Wtf8View {

if (!wtf8ValidateSlice(s)) {

return error.InvalidWtf8;

}

return initUnchecked(s);

}

pub fn initUnchecked(s: []const u8) Wtf8View {

return Wtf8View{ .bytes = s };

}

pub inline fn initComptime(comptime s: []const u8) Wtf8View {

return comptime if (init(s)) |r| r else |err| switch (err) {

error.InvalidWtf8 => {

@compileError("invalid wtf8");

};

}

pub fn iterator(s: Wtf8View) Wtf8Iterator {

return Wtf8Iterator{

.bytes = s.bytes,

.i = 0,

};

}

};

/// Asserts that `bytes` is valid WTF-8

pub const Wtf8Iterator = struct {

bytes: []const u8,

i: usize,

pub fn nextCodepointSlice(it: *Wtf8Iterator) ?[]const u8 {

if (it.i >= it.bytes.len) {

return null;

}

const cp_len = utf8ByteSequenceLength(it.bytes[it.i]) catch unreachable;

it.i += cp_len;

return it.bytes[it.i - cp_len .. it.i];

}

pub fn nextCodepoint(it: *Wtf8Iterator) ?u21 {

const slice = it.nextCodepointSlice() orelse return null;

return wtf8Decode(slice) catch unreachable;

}

/// Look ahead at the next n codepoints without advancing the iterator.

/// If fewer than n codepoints are available, then return the remainder of the string.

pub fn peek(it: *Wtf8Iterator, n: usize) []const u8 {

const original_i = it.i;

defer it.i = original_i;

var end_ix = original_i;

var found: usize = 0;

while (found < n) : (found += 1) {

const next_codepoint = it.nextCodepointSlice() orelse return it.bytes[original_i..];

end_ix += next_codepoint.len;

}

return it.bytes[original_i..end_ix];

}

};

pub fn wtf16LeToWtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) mem.Allocator.Error!void {

return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .can_encode_surrogate_half);

}

/// Caller must free returned memory.

pub fn wtf16LeToWtf8Alloc(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![]u8 {

// optimistically guess that it will all be ascii.

var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len);

errdefer result.deinit();

try wtf16LeToWtf8ArrayList(&result, wtf16le);

return result.toOwnedSlice();

}

/// Caller must free returned memory.

pub fn wtf16LeToWtf8AllocZ(allocator: mem.Allocator, wtf16le: []const u16) mem.Allocator.Error![:0]u8 {

// optimistically guess that it will all be ascii (and allocate space for the null terminator)

var result = try std.ArrayList(u8).initCapacity(allocator, wtf16le.len + 1);

errdefer result.deinit();

try wtf16LeToWtf8ArrayList(&result, wtf16le);

return result.toOwnedSliceSentinel(0);

}

pub fn wtf16LeToWtf8(wtf8: []u8, wtf16le: []const u16) usize {

return utf16LeToUtf8Impl(wtf8, wtf16le, .can_encode_surrogate_half) catch |err| switch (err) {};

}

pub fn wtf8ToWtf16LeArrayList(array_list: *std.ArrayList(u16), wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }!void {

return utf8ToUtf16LeArrayListImpl(array_list, wtf8, .can_encode_surrogate_half);

}

pub fn wtf8ToWtf16LeAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u16 {

// optimistically guess that it will not require surrogate pairs

var result = try std.ArrayList(u16).initCapacity(allocator, wtf8.len);

errdefer result.deinit();

try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half);

return result.toOwnedSlice();

}

pub fn wtf8ToWtf16LeAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u16 {

// optimistically guess that it will not require surrogate pairs

var result = try std.ArrayList(u16).initCapacity(allocator, wtf8.len + 1);

errdefer result.deinit();

try utf8ToUtf16LeArrayListImpl(&result, wtf8, .can_encode_surrogate_half);

return result.toOwnedSliceSentinel(0);

}

/// Returns index of next character. If exact fit, returned index equals output slice length.

/// Assumes there is enough space for the output.

pub fn wtf8ToWtf16Le(wtf16le: []u16, wtf8: []const u8) error{InvalidWtf8}!usize {

return utf8ToUtf16LeImpl(wtf16le, wtf8, .can_encode_surrogate_half);

}

/// Surrogate codepoints (U+D800 to U+DFFF) are replaced by the Unicode replacement

/// character (U+FFFD).

/// All surrogate codepoints and the replacement character are encoded as three

/// bytes, meaning the input and output slices will always be the same length.

/// In-place conversion is supported when `utf8` and `wtf8` refer to the same slice.

/// Note: If `wtf8` is entirely composed of well-formed UTF-8, then no conversion is necessary.

/// `utf8ValidateSlice` can be used to check if lossy conversion is worthwhile.

/// If `wtf8` is not valid WTF-8, then `error.InvalidWtf8` is returned.

pub fn wtf8ToUtf8Lossy(utf8: []u8, wtf8: []const u8) error{InvalidWtf8}!void {

assert(utf8.len >= wtf8.len);

const in_place = utf8.ptr == wtf8.ptr;

const replacement_char_bytes = comptime blk: {

var buf: [3]u8 = undefined;

assert((utf8Encode(replacement_character, &buf) catch unreachable) == 3);

break :blk buf;

};

var dest_i: usize = 0;

const view = try Wtf8View.init(wtf8);

var it = view.iterator();

while (it.nextCodepointSlice()) |codepoint_slice| {

// All surrogate codepoints are encoded as 3 bytes

if (codepoint_slice.len == 3) {

const codepoint = wtf8Decode(codepoint_slice) catch unreachable;

if (isSurrogateCodepoint(codepoint)) {

@memcpy(utf8[dest_i..][0..replacement_char_bytes.len], &replacement_char_bytes);

dest_i += replacement_char_bytes.len;

continue;

}

if (!in_place) {

@memcpy(utf8[dest_i..][0..codepoint_slice.len], codepoint_slice);

}

dest_i += codepoint_slice.len;

}

pub fn wtf8ToUtf8LossyAlloc(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![]u8 {

const utf8 = try allocator.alloc(u8, wtf8.len);

errdefer allocator.free(utf8);

try wtf8ToUtf8Lossy(utf8, wtf8);

return utf8;

}

pub fn wtf8ToUtf8LossyAllocZ(allocator: mem.Allocator, wtf8: []const u8) error{ InvalidWtf8, OutOfMemory }![:0]u8 {

const utf8 = try allocator.allocSentinel(u8, wtf8.len, 0);

errdefer allocator.free(utf8);

try wtf8ToUtf8Lossy(utf8, wtf8);

return utf8;

}

test wtf8ToUtf8Lossy {

var buf: [32]u8 = undefined;

const invalid_utf8 = "\xff";

try testing.expectError(error.InvalidWtf8, wtf8ToUtf8Lossy(&buf, invalid_utf8));

const ascii = "abcd";

try wtf8ToUtf8Lossy(&buf, ascii);

try testing.expectEqualStrings("abcd", buf[0..ascii.len]);

const high_surrogate_half = "ab\xed\xa0\xbdcd";

try wtf8ToUtf8Lossy(&buf, high_surrogate_half);

try testing.expectEqualStrings("ab\u{FFFD}cd", buf[0..high_surrogate_half.len]);

const low_surrogate_half = "ab\xed\xb2\xa9cd";

try wtf8ToUtf8Lossy(&buf, low_surrogate_half);

try testing.expectEqualStrings("ab\u{FFFD}cd", buf[0..low_surrogate_half.len]);

// If the WTF-8 is not well-formed, each surrogate half is converted into a separate

// replacement character instead of being interpreted as a surrogate pair.

const encoded_surrogate_pair = "ab\xed\xa0\xbd\xed\xb2\xa9cd";

try wtf8ToUtf8Lossy(&buf, encoded_surrogate_pair);

try testing.expectEqualStrings("ab\u{FFFD}\u{FFFD}cd", buf[0..encoded_surrogate_pair.len]);

// in place

@memcpy(buf[0..low_surrogate_half.len], low_surrogate_half);

const slice = buf[0..low_surrogate_half.len];

try wtf8ToUtf8Lossy(slice, slice);

try testing.expectEqualStrings("ab\u{FFFD}cd", slice);

}

test wtf8ToUtf8LossyAlloc {

const invalid_utf8 = "\xff";

try testing.expectError(error.InvalidWtf8, wtf8ToUtf8LossyAlloc(testing.allocator, invalid_utf8));

{

const ascii = "abcd";

const utf8 = try wtf8ToUtf8LossyAlloc(testing.allocator, ascii);

defer testing.allocator.free(utf8);

try testing.expectEqualStrings("abcd", utf8);

}

{

const surrogate_half = "ab\xed\xa0\xbdcd";

const utf8 = try wtf8ToUtf8LossyAlloc(testing.allocator, surrogate_half);

defer testing.allocator.free(utf8);

try testing.expectEqualStrings("ab\u{FFFD}cd", utf8);

}

{

// If the WTF-8 is not well-formed, each surrogate half is converted into a separate

// replacement character instead of being interpreted as a surrogate pair.

const encoded_surrogate_pair = "ab\xed\xa0\xbd\xed\xb2\xa9cd";

const utf8 = try wtf8ToUtf8LossyAlloc(testing.allocator, encoded_surrogate_pair);

defer testing.allocator.free(utf8);

try testing.expectEqualStrings("ab\u{FFFD}\u{FFFD}cd", utf8);

}

test wtf8ToUtf8LossyAllocZ {

const invalid_utf8 = "\xff";

try testing.expectError(error.InvalidWtf8, wtf8ToUtf8LossyAllocZ(testing.allocator, invalid_utf8));

{

const ascii = "abcd";

const utf8 = try wtf8ToUtf8LossyAllocZ(testing.allocator, ascii);

defer testing.allocator.free(utf8);

try testing.expectEqualStrings("abcd", utf8);

}

{

const surrogate_half = "ab\xed\xa0\xbdcd";

const utf8 = try wtf8ToUtf8LossyAllocZ(testing.allocator, surrogate_half);

defer testing.allocator.free(utf8);

try testing.expectEqualStrings("ab\u{FFFD}cd", utf8);

}

{

// If the WTF-8 is not well-formed, each surrogate half is converted into a separate

// replacement character instead of being interpreted as a surrogate pair.

const encoded_surrogate_pair = "ab\xed\xa0\xbd\xed\xb2\xa9cd";

const utf8 = try wtf8ToUtf8LossyAllocZ(testing.allocator, encoded_surrogate_pair);

defer testing.allocator.free(utf8);

try testing.expectEqualStrings("ab\u{FFFD}\u{FFFD}cd", utf8);

}

pub const Wtf16LeIterator = struct {

bytes: []const u8,

i: usize,

pub fn init(s: []const u16) Wtf16LeIterator {

return Wtf16LeIterator{

.bytes = std.mem.sliceAsBytes(s),

.i = 0,

};

}

/// If the next codepoint is encoded by a surrogate pair, returns the

/// codepoint that the surrogate pair represents.

/// If the next codepoint is an unpaired surrogate, returns the codepoint

/// of the unpaired surrogate.

pub fn nextCodepoint(it: *Wtf16LeIterator) ?u21 {

assert(it.i <= it.bytes.len);

if (it.i == it.bytes.len) return null;

var code_units: [2]u16 = undefined;

code_units[0] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little);

it.i += 2;

surrogate_pair: {

if (utf16IsHighSurrogate(code_units[0])) {

if (it.i >= it.bytes.len) break :surrogate_pair;

code_units[1] = std.mem.readInt(u16, it.bytes[it.i..][0..2], .little);

const codepoint = utf16DecodeSurrogatePair(&code_units) catch break :surrogate_pair;

it.i += 2;

return codepoint;

}

return code_units[0];

}

};

test "non-well-formed WTF-8 does not roundtrip" {

// This encodes the surrogate pair U+D83D U+DCA9.

// The well-formed version of this would be U+1F4A9 which is \xF0\x9F\x92\xA9.

const non_well_formed_wtf8 = "\xed\xa0\xbd\xed\xb2\xa9";

var wtf16_buf: [2]u16 = undefined;

const wtf16_len = try wtf8ToWtf16Le(&wtf16_buf, non_well_formed_wtf8);

const wtf16 = wtf16_buf[0..wtf16_len];

try testing.expectEqualSlices(u16, &[_]u16{

mem.nativeToLittle(u16, 0xD83D), // high surrogate

mem.nativeToLittle(u16, 0xDCA9), // low surrogate

}, wtf16);

var wtf8_buf: [4]u8 = undefined;

const wtf8_len = wtf16LeToWtf8(&wtf8_buf, wtf16);

const wtf8 = wtf8_buf[0..wtf8_len];

// Converting to WTF-16 and back results in well-formed WTF-8,

// but it does not match the input WTF-8

try testing.expectEqualSlices(u8, "\xf0\x9f\x92\xa9", wtf8);

}

fn testRoundtripWtf8(wtf8: []const u8) !void {

// Buffer

{

var wtf16_buf: [32]u16 = undefined;

const wtf16_len = try wtf8ToWtf16Le(&wtf16_buf, wtf8);

const wtf16 = wtf16_buf[0..wtf16_len];

var roundtripped_buf: [32]u8 = undefined;

const roundtripped_len = wtf16LeToWtf8(&roundtripped_buf, wtf16);

const roundtripped = roundtripped_buf[0..roundtripped_len];

try testing.expectEqualSlices(u8, wtf8, roundtripped);

}

// Alloc

{

const wtf16 = try wtf8ToWtf16LeAlloc(testing.allocator, wtf8);

defer testing.allocator.free(wtf16);

const roundtripped = try wtf16LeToWtf8Alloc(testing.allocator, wtf16);

defer testing.allocator.free(roundtripped);

try testing.expectEqualSlices(u8, wtf8, roundtripped);

}

// AllocZ

{

const wtf16 = try wtf8ToWtf16LeAllocZ(testing.allocator, wtf8);

defer testing.allocator.free(wtf16);

const roundtripped = try wtf16LeToWtf8AllocZ(testing.allocator, wtf16);

defer testing.allocator.free(roundtripped);

try testing.expectEqualSlices(u8, wtf8, roundtripped);

}

test "well-formed WTF-8 roundtrips" {

try testRoundtripWtf8("\xed\x9f\xbf"); // not a surrogate half

try testRoundtripWtf8("\xed\xa0\xbd"); // high surrogate

try testRoundtripWtf8("\xed\xb2\xa9"); // low surrogate

try testRoundtripWtf8("\xed\xa0\xbd \xed\xb2\xa9"); // <high surrogate><space><low surrogate>

try testRoundtripWtf8("\xed\xa0\x80\xed\xaf\xbf"); // <high surrogate><high surrogate>

try testRoundtripWtf8("\xed\xa0\x80\xee\x80\x80"); // <high surrogate><not surrogate>

try testRoundtripWtf8("\xed\x9f\xbf\xed\xb0\x80"); // <not surrogate><low surrogate>

try testRoundtripWtf8("a\xed\xb0\x80"); // <not surrogate><low surrogate>

try testRoundtripWtf8("\xf0\x9f\x92\xa9"); // U+1F4A9, encoded as a surrogate pair in WTF-16

}

fn testRoundtripWtf16(wtf16le: []const u16) !void {

// Buffer

{

var wtf8_buf: [32]u8 = undefined;

const wtf8_len = wtf16LeToWtf8(&wtf8_buf, wtf16le);

const wtf8 = wtf8_buf[0..wtf8_len];

var roundtripped_buf: [32]u16 = undefined;

const roundtripped_len = try wtf8ToWtf16Le(&roundtripped_buf, wtf8);

const roundtripped = roundtripped_buf[0..roundtripped_len];

try testing.expectEqualSlices(u16, wtf16le, roundtripped);

}

// Alloc

{

const wtf8 = try wtf16LeToWtf8Alloc(testing.allocator, wtf16le);

defer testing.allocator.free(wtf8);

const roundtripped = try wtf8ToWtf16LeAlloc(testing.allocator, wtf8);

defer testing.allocator.free(roundtripped);

try testing.expectEqualSlices(u16, wtf16le, roundtripped);

}

// AllocZ

{

const wtf8 = try wtf16LeToWtf8AllocZ(testing.allocator, wtf16le);

defer testing.allocator.free(wtf8);

const roundtripped = try wtf8ToWtf16LeAllocZ(testing.allocator, wtf8);

defer testing.allocator.free(roundtripped);

try testing.expectEqualSlices(u16, wtf16le, roundtripped);

}

test "well-formed WTF-16 roundtrips" {

try testRoundtripWtf16(&[_]u16{

std.mem.nativeToLittle(u16, 0xD83D), // high surrogate

std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate

});

try testRoundtripWtf16(&[_]u16{

std.mem.nativeToLittle(u16, 0xD83D), // high surrogate

std.mem.nativeToLittle(u16, ' '), // not surrogate

std.mem.nativeToLittle(u16, 0xDCA9), // low surrogate

});

try testRoundtripWtf16(&[_]u16{

std.mem.nativeToLittle(u16, 0xD800), // high surrogate

std.mem.nativeToLittle(u16, 0xDBFF), // high surrogate

});

try testRoundtripWtf16(&[_]u16{

std.mem.nativeToLittle(u16, 0xD800), // high surrogate

std.mem.nativeToLittle(u16, 0xE000), // not surrogate

});

try testRoundtripWtf16(&[_]u16{

std.mem.nativeToLittle(u16, 0xD7FF), // not surrogate

std.mem.nativeToLittle(u16, 0xDC00), // low surrogate

});

try testRoundtripWtf16(&[_]u16{

std.mem.nativeToLittle(u16, 0x61), // not surrogate

std.mem.nativeToLittle(u16, 0xDC00), // low surrogate

});

try testRoundtripWtf16(&[_]u16{

std.mem.nativeToLittle(u16, 0xDC00), // low surrogate

});

}

lib/std/zig/system.zig added: 1932, removed: 517, total 1415

@@ -639,7 +639,8 @@ pub fn abiAndDynamicLinkerFromFile(

var link_buf: [std.os.PATH_MAX]u8 = undefined;

const link_name = std.os.readlink(dl_path, &link_buf) catch |err| switch (err) {

error.NameTooLong => unreachable,

error.InvalidUtf8 => unreachable, // Windows only

error.InvalidUtf8 => unreachable, // WASI only

error.InvalidWtf8 => unreachable, // Windows only

error.BadPathName => unreachable, // Windows only

error.UnsupportedReparsePointType => unreachable, // Windows only

error.NetworkNotFound => unreachable, // Windows only

@@ -730,7 +731,8 @@ test glibcVerFromLinkName {

fn glibcVerFromRPath(rpath: []const u8) !std.SemanticVersion {

var dir = fs.cwd().openDir(rpath, .{}) catch |err| switch (err) {

error.NameTooLong => unreachable,

error.InvalidUtf8 => unreachable,

error.InvalidUtf8 => unreachable, // WASI only

error.InvalidWtf8 => unreachable, // Windows-only

error.BadPathName => unreachable,

error.DeviceBusy => unreachable,

error.NetworkNotFound => unreachable, // Windows-only

@@ -761,7 +763,8 @@ fn glibcVerFromRPath(rpath: []const u8) !std.SemanticVersion {

const glibc_so_basename = "libc.so.6";

var f = dir.openFile(glibc_so_basename, .{}) catch |err| switch (err) {

error.NameTooLong => unreachable,

error.InvalidUtf8 => unreachable, // Windows only

error.InvalidUtf8 => unreachable, // WASI only

error.InvalidWtf8 => unreachable, // Windows only

error.BadPathName => unreachable, // Windows only

error.PipeBusy => unreachable, // Windows-only

error.SharingViolation => unreachable, // Windows-only

@@ -998,7 +1001,8 @@ fn detectAbiAndDynamicLinker(

error.NameTooLong => unreachable,

error.PathAlreadyExists => unreachable,

error.SharingViolation => unreachable,

error.InvalidUtf8 => unreachable,

error.InvalidUtf8 => unreachable, // WASI only

error.InvalidWtf8 => unreachable, // Windows only

error.BadPathName => unreachable,

error.PipeBusy => unreachable,

error.FileLocksNotSupported => unreachable,

lib/std/zig/system/NativePaths.zig added: 1932, removed: 517, total 1415

@@ -41,7 +41,7 @@ pub fn detect(arena: Allocator, native_target: std.Target) !NativePaths {

}

} else |err| switch (err) {

error.InvalidUtf8 => {},

error.InvalidWtf8 => unreachable,

error.EnvironmentVariableNotFound => {},

error.OutOfMemory => |e| return e,

}

@@ -73,7 +73,7 @@ pub fn detect(arena: Allocator, native_target: std.Target) !NativePaths {

}

} else |err| switch (err) {

error.InvalidUtf8 => {},

error.InvalidWtf8 => unreachable,

error.EnvironmentVariableNotFound => {},

error.OutOfMemory => |e| return e,

}

lib/std/zig/system/windows.zig added: 1932, removed: 517, total 1415

@@ -160,7 +160,7 @@ fn getCpuInfoFromRegistry(core: usize, args: anytype) !void {

=> {

var buf = @field(args, field.name).value_buf;

const entry = @as(*align(1) const std.os.windows.UNICODE_STRING, @ptrCast(table[i + 1].EntryContext));

const len = try std.unicode.utf16leToUtf8(buf, entry.Buffer[0 .. entry.Length / 2]);

const len = try std.unicode.utf16LeToUtf8(buf, entry.Buffer[0 .. entry.Length / 2]);

buf[len] = 0;

src/Module.zig added: 1932, removed: 517, total 1415

@@ -2662,6 +2662,7 @@ pub fn astGenFile(mod: *Module, file: *File) !void {

}) catch |err| switch (err) {

error.NotDir => unreachable, // no dir components

error.InvalidUtf8 => unreachable, // it's a hex encoded name

error.InvalidWtf8 => unreachable, // it's a hex encoded name

error.BadPathName => unreachable, // it's a hex encoded name

error.NameTooLong => unreachable, // it's a fixed size name

error.PipeBusy => unreachable, // it's not a pipe

src/libc_installation.zig added: 1932, removed: 517, total 1415

@@ -246,7 +246,10 @@ pub const LibCInstallation = struct {

const allocator = args.allocator;

// Detect infinite loops.

var env_map = try std.process.getEnvMap(allocator);

var env_map = std.process.getEnvMap(allocator) catch |err| switch (err) {

error.Unexpected => unreachable, // WASI-only

else => |e| return e,

};

defer env_map.deinit();

const skip_cc_env_var = if (env_map.get(inf_loop_env_key)) |phase| blk: {

if (std.mem.eql(u8, phase, "1")) {

@@ -572,7 +575,10 @@ fn ccPrintFileName(args: CCPrintFileNameOptions) ![:0]u8 {

const allocator = args.allocator;

// Detect infinite loops.

var env_map = try std.process.getEnvMap(allocator);

var env_map = std.process.getEnvMap(allocator) catch |err| switch (err) {

error.Unexpected => unreachable, // WASI-only

else => |e| return e,

};

defer env_map.deinit();

const skip_cc_env_var = if (env_map.get(inf_loop_env_key)) |phase| blk: {

if (std.mem.eql(u8, phase, "1")) {

src/main.zig added: 1932, removed: 517, total 1415

@@ -5756,7 +5756,7 @@ fn readSourceFileToEndAlloc(

// If the file starts with a UTF-16 little endian BOM, translate it to UTF-8

if (mem.startsWith(u8, source_code, "\xff\xfe")) {

const source_code_utf16_le = mem.bytesAsSlice(u16, source_code);

const source_code_utf8 = std.unicode.utf16leToUtf8AllocZ(allocator, source_code_utf16_le) catch |err| switch (err) {

const source_code_utf8 = std.unicode.utf16LeToUtf8AllocZ(allocator, source_code_utf16_le) catch |err| switch (err) {

error.DanglingSurrogateHalf => error.UnsupportedEncoding,

error.ExpectedSecondSurrogateHalf => error.UnsupportedEncoding,

error.UnexpectedSecondSurrogateHalf => error.UnsupportedEncoding,

src/windows_sdk.zig added: 1932, removed: 517, total 1415

@@ -84,26 +84,26 @@ fn iterateAndFilterBySemVer(

return dirs_filtered_slice;

}

const RegistryUtf8 = struct {

const RegistryWtf8 = struct {

key: windows.HKEY,

/// Assert that `key` is valid UTF-8 string

pub fn openKey(hkey: windows.HKEY, key: []const u8) error{KeyNotFound}!RegistryUtf8 {

const key_utf16le: [:0]const u16 = key_utf16le: {

var key_utf16le_buf: [RegistryUtf16Le.key_name_max_len]u16 = undefined;

const key_utf16le_len: usize = std.unicode.utf8ToUtf16Le(key_utf16le_buf[0..], key) catch |err| switch (err) {

error.InvalidUtf8 => unreachable,

/// Assert that `key` is valid WTF-8 string

pub fn openKey(hkey: windows.HKEY, key: []const u8) error{KeyNotFound}!RegistryWtf8 {

const key_wtf16le: [:0]const u16 = key_wtf16le: {

var key_wtf16le_buf: [RegistryWtf16Le.key_name_max_len]u16 = undefined;

const key_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(key_wtf16le_buf[0..], key) catch |err| switch (err) {

error.InvalidWtf8 => unreachable,

};

key_utf16le_buf[key_utf16le_len] = 0;

break :key_utf16le key_utf16le_buf[0..key_utf16le_len :0];

key_wtf16le_buf[key_wtf16le_len] = 0;

break :key_wtf16le key_wtf16le_buf[0..key_wtf16le_len :0];

};

const registry_utf16le = try RegistryUtf16Le.openKey(hkey, key_utf16le);

return RegistryUtf8{ .key = registry_utf16le.key };

const registry_wtf16le = try RegistryWtf16Le.openKey(hkey, key_wtf16le);

return RegistryWtf8{ .key = registry_wtf16le.key };

}

/// Closes key, after that usage is invalid

pub fn closeKey(self: *const RegistryUtf8) void {

pub fn closeKey(self: *const RegistryWtf8) void {

const return_code_int: windows.HRESULT = windows.advapi32.RegCloseKey(self.key);

const return_code: windows.Win32Error = @enumFromInt(return_code_int);

switch (return_code) {

@@ -114,71 +114,68 @@ const RegistryUtf8 = struct {

/// Get string from registry.

/// Caller owns result.

pub fn getString(self: *const RegistryUtf8, allocator: std.mem.Allocator, subkey: []const u8, value_name: []const u8) error{ OutOfMemory, ValueNameNotFound, NotAString, StringNotFound }![]u8 {

const subkey_utf16le: [:0]const u16 = subkey_utf16le: {

var subkey_utf16le_buf: [RegistryUtf16Le.key_name_max_len]u16 = undefined;

const subkey_utf16le_len: usize = std.unicode.utf8ToUtf16Le(subkey_utf16le_buf[0..], subkey) catch unreachable;

subkey_utf16le_buf[subkey_utf16le_len] = 0;

break :subkey_utf16le subkey_utf16le_buf[0..subkey_utf16le_len :0];

pub fn getString(self: *const RegistryWtf8, allocator: std.mem.Allocator, subkey: []const u8, value_name: []const u8) error{ OutOfMemory, ValueNameNotFound, NotAString, StringNotFound }![]u8 {

const subkey_wtf16le: [:0]const u16 = subkey_wtf16le: {

var subkey_wtf16le_buf: [RegistryWtf16Le.key_name_max_len]u16 = undefined;

const subkey_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(subkey_wtf16le_buf[0..], subkey) catch unreachable;

subkey_wtf16le_buf[subkey_wtf16le_len] = 0;

break :subkey_wtf16le subkey_wtf16le_buf[0..subkey_wtf16le_len :0];

};

const value_name_utf16le: [:0]const u16 = value_name_utf16le: {

var value_name_utf16le_buf: [RegistryUtf16Le.value_name_max_len]u16 = undefined;

const value_name_utf16le_len: usize = std.unicode.utf8ToUtf16Le(value_name_utf16le_buf[0..], value_name) catch unreachable;

value_name_utf16le_buf[value_name_utf16le_len] = 0;

break :value_name_utf16le value_name_utf16le_buf[0..value_name_utf16le_len :0];

const value_name_wtf16le: [:0]const u16 = value_name_wtf16le: {

var value_name_wtf16le_buf: [RegistryWtf16Le.value_name_max_len]u16 = undefined;

const value_name_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(value_name_wtf16le_buf[0..], value_name) catch unreachable;

value_name_wtf16le_buf[value_name_wtf16le_len] = 0;

break :value_name_wtf16le value_name_wtf16le_buf[0..value_name_wtf16le_len :0];

};

const registry_utf16le = RegistryUtf16Le{ .key = self.key };

const value_utf16le = try registry_utf16le.getString(allocator, subkey_utf16le, value_name_utf16le);

defer allocator.free(value_utf16le);

const registry_wtf16le = RegistryWtf16Le{ .key = self.key };

const value_wtf16le = try registry_wtf16le.getString(allocator, subkey_wtf16le, value_name_wtf16le);

defer allocator.free(value_wtf16le);

const value_utf8: []u8 = std.unicode.utf16leToUtf8Alloc(allocator, value_utf16le) catch |err| switch (err) {

error.OutOfMemory => return error.OutOfMemory,

else => return error.StringNotFound,

};

errdefer allocator.free(value_utf8);

const value_wtf8: []u8 = try std.unicode.wtf16LeToWtf8Alloc(allocator, value_wtf16le);

errdefer allocator.free(value_wtf8);

return value_utf8;

return value_wtf8;

}

/// Get DWORD (u32) from registry.

pub fn getDword(self: *const RegistryUtf8, subkey: []const u8, value_name: []const u8) error{ ValueNameNotFound, NotADword, DwordTooLong, DwordNotFound }!u32 {

const subkey_utf16le: [:0]const u16 = subkey_utf16le: {

var subkey_utf16le_buf: [RegistryUtf16Le.key_name_max_len]u16 = undefined;

const subkey_utf16le_len: usize = std.unicode.utf8ToUtf16Le(subkey_utf16le_buf[0..], subkey) catch unreachable;

subkey_utf16le_buf[subkey_utf16le_len] = 0;

break :subkey_utf16le subkey_utf16le_buf[0..subkey_utf16le_len :0];

pub fn getDword(self: *const RegistryWtf8, subkey: []const u8, value_name: []const u8) error{ ValueNameNotFound, NotADword, DwordTooLong, DwordNotFound }!u32 {

const subkey_wtf16le: [:0]const u16 = subkey_wtf16le: {

var subkey_wtf16le_buf: [RegistryWtf16Le.key_name_max_len]u16 = undefined;

const subkey_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(subkey_wtf16le_buf[0..], subkey) catch unreachable;

subkey_wtf16le_buf[subkey_wtf16le_len] = 0;

break :subkey_wtf16le subkey_wtf16le_buf[0..subkey_wtf16le_len :0];

};

const value_name_utf16le: [:0]const u16 = value_name_utf16le: {

var value_name_utf16le_buf: [RegistryUtf16Le.value_name_max_len]u16 = undefined;

const value_name_utf16le_len: usize = std.unicode.utf8ToUtf16Le(value_name_utf16le_buf[0..], value_name) catch unreachable;

value_name_utf16le_buf[value_name_utf16le_len] = 0;

break :value_name_utf16le value_name_utf16le_buf[0..value_name_utf16le_len :0];

const value_name_wtf16le: [:0]const u16 = value_name_wtf16le: {

var value_name_wtf16le_buf: [RegistryWtf16Le.value_name_max_len]u16 = undefined;

const value_name_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(value_name_wtf16le_buf[0..], value_name) catch unreachable;

value_name_wtf16le_buf[value_name_wtf16le_len] = 0;

break :value_name_wtf16le value_name_wtf16le_buf[0..value_name_wtf16le_len :0];

};

const registry_utf16le = RegistryUtf16Le{ .key = self.key };

return try registry_utf16le.getDword(subkey_utf16le, value_name_utf16le);

const registry_wtf16le = RegistryWtf16Le{ .key = self.key };

return try registry_wtf16le.getDword(subkey_wtf16le, value_name_wtf16le);

}

/// Under private space with flags:

/// KEY_QUERY_VALUE and KEY_ENUMERATE_SUB_KEYS.

/// After finishing work, call `closeKey`.

pub fn loadFromPath(absolute_path: []const u8) error{KeyNotFound}!RegistryUtf8 {

const absolute_path_utf16le: [:0]const u16 = absolute_path_utf16le: {

var absolute_path_utf16le_buf: [RegistryUtf16Le.value_name_max_len]u16 = undefined;

const absolute_path_utf16le_len: usize = std.unicode.utf8ToUtf16Le(absolute_path_utf16le_buf[0..], absolute_path) catch unreachable;

absolute_path_utf16le_buf[absolute_path_utf16le_len] = 0;

break :absolute_path_utf16le absolute_path_utf16le_buf[0..absolute_path_utf16le_len :0];

pub fn loadFromPath(absolute_path: []const u8) error{KeyNotFound}!RegistryWtf8 {

const absolute_path_wtf16le: [:0]const u16 = absolute_path_wtf16le: {

var absolute_path_wtf16le_buf: [RegistryWtf16Le.value_name_max_len]u16 = undefined;

const absolute_path_wtf16le_len: usize = std.unicode.wtf8ToWtf16Le(absolute_path_wtf16le_buf[0..], absolute_path) catch unreachable;

absolute_path_wtf16le_buf[absolute_path_wtf16le_len] = 0;

break :absolute_path_wtf16le absolute_path_wtf16le_buf[0..absolute_path_wtf16le_len :0];

};

const registry_utf16le = try RegistryUtf16Le.loadFromPath(absolute_path_utf16le);

return RegistryUtf8{ .key = registry_utf16le.key };

const registry_wtf16le = try RegistryWtf16Le.loadFromPath(absolute_path_wtf16le);

return RegistryWtf8{ .key = registry_wtf16le.key };

}

};

const RegistryUtf16Le = struct {

const RegistryWtf16Le = struct {

key: windows.HKEY,

/// Includes root key (f.e. HKEY_LOCAL_MACHINE).

@@ -191,11 +188,11 @@ const RegistryUtf16Le = struct {

/// Under HKEY_LOCAL_MACHINE with flags:

/// KEY_QUERY_VALUE, KEY_WOW64_32KEY, and KEY_ENUMERATE_SUB_KEYS.

/// After finishing work, call `closeKey`.

fn openKey(hkey: windows.HKEY, key_utf16le: [:0]const u16) error{KeyNotFound}!RegistryUtf16Le {

fn openKey(hkey: windows.HKEY, key_wtf16le: [:0]const u16) error{KeyNotFound}!RegistryWtf16Le {

var key: windows.HKEY = undefined;

const return_code_int: windows.HRESULT = windows.advapi32.RegOpenKeyExW(

hkey,

key_utf16le,

key_wtf16le,

windows.KEY_QUERY_VALUE | windows.KEY_WOW64_32KEY | windows.KEY_ENUMERATE_SUB_KEYS,

&key,

@@ -207,11 +204,11 @@ const RegistryUtf16Le = struct {

else => return error.KeyNotFound,

}

return RegistryUtf16Le{ .key = key };

return RegistryWtf16Le{ .key = key };

}

/// Closes key, after that usage is invalid

fn closeKey(self: *const RegistryUtf16Le) void {

fn closeKey(self: *const RegistryWtf16Le) void {

const return_code_int: windows.HRESULT = windows.advapi32.RegCloseKey(self.key);

const return_code: windows.Win32Error = @enumFromInt(return_code_int);

switch (return_code) {

@@ -221,25 +218,25 @@ const RegistryUtf16Le = struct {

}

/// Get string ([:0]const u16) from registry.

fn getString(self: *const RegistryUtf16Le, allocator: std.mem.Allocator, subkey_utf16le: [:0]const u16, value_name_utf16le: [:0]const u16) error{ OutOfMemory, ValueNameNotFound, NotAString, StringNotFound }![]const u16 {

fn getString(self: *const RegistryWtf16Le, allocator: std.mem.Allocator, subkey_wtf16le: [:0]const u16, value_name_wtf16le: [:0]const u16) error{ OutOfMemory, ValueNameNotFound, NotAString, StringNotFound }![]const u16 {

var actual_type: windows.ULONG = undefined;

// Calculating length to allocate

var value_utf16le_buf_size: u32 = 0; // in bytes, including any terminating NUL character or characters.

var value_wtf16le_buf_size: u32 = 0; // in bytes, including any terminating NUL character or characters.

var return_code_int: windows.HRESULT = windows.advapi32.RegGetValueW(

self.key,

subkey_utf16le,

value_name_utf16le,

subkey_wtf16le,

value_name_wtf16le,

RRF.RT_REG_SZ,

&actual_type,

null,

&value_utf16le_buf_size,

&value_wtf16le_buf_size,

);

// Check returned code and type

var return_code: windows.Win32Error = @enumFromInt(return_code_int);

switch (return_code) {

.SUCCESS => std.debug.assert(value_utf16le_buf_size != 0),

.SUCCESS => std.debug.assert(value_wtf16le_buf_size != 0),

.MORE_DATA => unreachable, // We are only reading length

.FILE_NOT_FOUND => return error.ValueNameNotFound,

.INVALID_PARAMETER => unreachable, // We didn't combine RRF.SUBKEY_WOW6464KEY and RRF.SUBKEY_WOW6432KEY

@@ -250,17 +247,17 @@ const RegistryUtf16Le = struct {

else => return error.NotAString,

}

const value_utf16le_buf: []u16 = try allocator.alloc(u16, std.math.divCeil(u32, value_utf16le_buf_size, 2) catch unreachable);

errdefer allocator.free(value_utf16le_buf);

const value_wtf16le_buf: []u16 = try allocator.alloc(u16, std.math.divCeil(u32, value_wtf16le_buf_size, 2) catch unreachable);

errdefer allocator.free(value_wtf16le_buf);

return_code_int = windows.advapi32.RegGetValueW(

self.key,

subkey_utf16le,

value_name_utf16le,

subkey_wtf16le,

value_name_wtf16le,

RRF.RT_REG_SZ,

&actual_type,

value_utf16le_buf.ptr,

&value_utf16le_buf_size,

value_wtf16le_buf.ptr,

&value_wtf16le_buf_size,

);

// Check returned code and (just in case) type again.

@@ -277,28 +274,28 @@ const RegistryUtf16Le = struct {

else => return error.NotAString,

}

const value_utf16le: []const u16 = value_utf16le: {

const value_wtf16le: []const u16 = value_wtf16le: {

// note(bratishkaerik): somehow returned value in `buf_len` is overestimated by Windows and contains extra space

// we will just search for zero termination and forget length

// Windows sure is strange

const value_utf16le_overestimated: [*:0]const u16 = @ptrCast(value_utf16le_buf.ptr);

break :value_utf16le std.mem.span(value_utf16le_overestimated);

const value_wtf16le_overestimated: [*:0]const u16 = @ptrCast(value_wtf16le_buf.ptr);

break :value_wtf16le std.mem.span(value_wtf16le_overestimated);

};

_ = allocator.resize(value_utf16le_buf, value_utf16le.len);

return value_utf16le;

_ = allocator.resize(value_wtf16le_buf, value_wtf16le.len);

return value_wtf16le;

}

/// Get DWORD (u32) from registry.

fn getDword(self: *const RegistryUtf16Le, subkey_utf16le: [:0]const u16, value_name_utf16le: [:0]const u16) error{ ValueNameNotFound, NotADword, DwordTooLong, DwordNotFound }!u32 {

fn getDword(self: *const RegistryWtf16Le, subkey_wtf16le: [:0]const u16, value_name_wtf16le: [:0]const u16) error{ ValueNameNotFound, NotADword, DwordTooLong, DwordNotFound }!u32 {

var actual_type: windows.ULONG = undefined;

var reg_size: u32 = @sizeOf(u32);

var reg_value: u32 = 0;

const return_code_int: windows.HRESULT = windows.advapi32.RegGetValueW(

self.key,

subkey_utf16le,

value_name_utf16le,

subkey_wtf16le,

value_name_wtf16le,

RRF.RT_REG_DWORD,

&actual_type,

&reg_value,

@@ -324,11 +321,11 @@ const RegistryUtf16Le = struct {

/// Under private space with flags:

/// KEY_QUERY_VALUE and KEY_ENUMERATE_SUB_KEYS.

/// After finishing work, call `closeKey`.

fn loadFromPath(absolute_path_as_utf16le: [:0]const u16) error{KeyNotFound}!RegistryUtf16Le {

fn loadFromPath(absolute_path_as_wtf16le: [:0]const u16) error{KeyNotFound}!RegistryWtf16Le {

var key: windows.HKEY = undefined;

const return_code_int: windows.HRESULT = std.os.windows.advapi32.RegLoadAppKeyW(

absolute_path_as_utf16le,

absolute_path_as_wtf16le,

&key,

windows.KEY_QUERY_VALUE | windows.KEY_ENUMERATE_SUB_KEYS,

@@ -340,7 +337,7 @@ const RegistryUtf16Le = struct {

else => return error.KeyNotFound,

}

return RegistryUtf16Le{ .key = key };

return RegistryWtf16Le{ .key = key };

}

};

@@ -352,7 +349,7 @@ pub const Windows10Sdk = struct {

/// Caller owns the result's fields.

/// After finishing work, call `free(allocator)`.

fn find(allocator: std.mem.Allocator) error{ OutOfMemory, Windows10SdkNotFound, PathTooLong, VersionTooLong }!Windows10Sdk {

const v10_key = RegistryUtf8.openKey(windows.HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\v10.0") catch |err| switch (err) {

const v10_key = RegistryWtf8.openKey(windows.HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\Microsoft SDKs\\Windows\\v10.0") catch |err| switch (err) {

error.KeyNotFound => return error.Windows10SdkNotFound,

};

defer v10_key.closeKey();

@@ -413,11 +410,11 @@ pub const Windows10Sdk = struct {

/// Check whether this version is enumerated in registry.

fn isValidVersion(windows10sdk: *const Windows10Sdk) bool {

var buf: [std.fs.MAX_PATH_BYTES]u8 = undefined;

const reg_query_as_utf8 = std.fmt.bufPrint(buf[0..], "{s}\\{s}\\Installed Options", .{ WINDOWS_KIT_REG_KEY, windows10sdk.version }) catch |err| switch (err) {

const reg_query_as_wtf8 = std.fmt.bufPrint(buf[0..], "{s}\\{s}\\Installed Options", .{ WINDOWS_KIT_REG_KEY, windows10sdk.version }) catch |err| switch (err) {

error.NoSpaceLeft => return false,

};

const options_key = RegistryUtf8.openKey(windows.HKEY_LOCAL_MACHINE, reg_query_as_utf8) catch |err| switch (err) {

const options_key = RegistryWtf8.openKey(windows.HKEY_LOCAL_MACHINE, reg_query_as_wtf8) catch |err| switch (err) {

error.KeyNotFound => return false,

};

defer options_key.closeKey();

@@ -447,7 +444,7 @@ pub const Windows81Sdk = struct {

/// Find path and version of Windows 8.1 SDK.

/// Caller owns the result's fields.

/// After finishing work, call `free(allocator)`.

fn find(allocator: std.mem.Allocator, roots_key: *const RegistryUtf8) error{ OutOfMemory, Windows81SdkNotFound, PathTooLong, VersionTooLong }!Windows81Sdk {

fn find(allocator: std.mem.Allocator, roots_key: *const RegistryWtf8) error{ OutOfMemory, Windows81SdkNotFound, PathTooLong, VersionTooLong }!Windows81Sdk {

const path: []const u8 = path81: {

const path_maybe_with_trailing_slash = roots_key.getString(allocator, "", "KitsRoot81") catch |err| switch (err) {

error.NotAString => return error.Windows81SdkNotFound,

@@ -523,7 +520,7 @@ pub const ZigWindowsSDK = struct {

if (builtin.os.tag != .windows) return error.NotFound;

//note(dimenus): If this key doesn't exist, neither the Win 8 SDK nor the Win 10 SDK is installed

const roots_key = RegistryUtf8.openKey(windows.HKEY_LOCAL_MACHINE, WINDOWS_KIT_REG_KEY) catch |err| switch (err) {

const roots_key = RegistryWtf8.openKey(windows.HKEY_LOCAL_MACHINE, WINDOWS_KIT_REG_KEY) catch |err| switch (err) {

error.KeyNotFound => return error.NotFound,

};

defer roots_key.closeKey();

@@ -583,7 +580,7 @@ pub const ZigWindowsSDK = struct {

const MsvcLibDir = struct {

fn findInstancesDirViaCLSID(allocator: std.mem.Allocator) error{ OutOfMemory, PathNotFound }!std.fs.Dir {

const setup_configuration_clsid = "{177f0c4a-1cd3-4de7-a32c-71dbbb9fa36d}";

const setup_config_key = RegistryUtf8.openKey(windows.HKEY_CLASSES_ROOT, "CLSID\\" ++ setup_configuration_clsid) catch |err| switch (err) {

const setup_config_key = RegistryWtf8.openKey(windows.HKEY_CLASSES_ROOT, "CLSID\\" ++ setup_configuration_clsid) catch |err| switch (err) {

error.KeyNotFound => return error.PathNotFound,

};

defer setup_config_key.closeKey();

@@ -805,13 +802,13 @@ const MsvcLibDir = struct {

for (vs_versions) |vs_version| allocator.free(vs_version);

allocator.free(vs_versions);

}

var config_subkey_buf: [RegistryUtf16Le.key_name_max_len * 2]u8 = undefined;

var config_subkey_buf: [RegistryWtf16Le.key_name_max_len * 2]u8 = undefined;

const source_directories: []const u8 = source_directories: for (vs_versions) |vs_version| {

const privateregistry_absolute_path = std.fs.path.join(allocator, &.{ visualstudio_folder_path, vs_version, "privateregistry.bin" }) catch continue;

defer allocator.free(privateregistry_absolute_path);

if (!std.fs.path.isAbsolute(privateregistry_absolute_path)) continue;

const visualstudio_registry = RegistryUtf8.loadFromPath(privateregistry_absolute_path) catch continue;

const visualstudio_registry = RegistryWtf8.loadFromPath(privateregistry_absolute_path) catch continue;

defer visualstudio_registry.closeKey();

const config_subkey = std.fmt.bufPrint(config_subkey_buf[0..], "Software\\Microsoft\\VisualStudio\\{s}_Config", .{vs_version}) catch unreachable;

@@ -894,7 +891,7 @@ const MsvcLibDir = struct {

}

const vs7_key = RegistryUtf8.openKey(windows.HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\VisualStudio\\SxS\\VS7") catch return error.PathNotFound;

const vs7_key = RegistryWtf8.openKey(windows.HKEY_LOCAL_MACHINE, "SOFTWARE\\Microsoft\\VisualStudio\\SxS\\VS7") catch return error.PathNotFound;

defer vs7_key.closeKey();

try_vs7_key: {

const path_maybe_with_trailing_slash = vs7_key.getString(allocator, "", "14.0") catch |err| switch (err) {

test/standalone/windows_spawn/main.zig added: 1932, removed: 517, total 1415

@@ -17,7 +17,7 @@ pub fn main() anyerror!void {

const tmp_absolute_path = try tmp.dir.realpathAlloc(allocator, ".");

defer allocator.free(tmp_absolute_path);

const tmp_absolute_path_w = try std.unicode.utf8ToUtf16LeWithNull(allocator, tmp_absolute_path);

const tmp_absolute_path_w = try std.unicode.utf8ToUtf16LeAllocZ(allocator, tmp_absolute_path);

defer allocator.free(tmp_absolute_path_w);

const cwd_absolute_path = try std.fs.cwd().realpathAlloc(allocator, ".");

defer allocator.free(cwd_absolute_path);