srctree

Andrew Kelley parent ff18103e cffe1999 b78b2689
Merge pull request #19655 from squeek502/windows-argv-post-2008

ArgIteratorWindows: Match post-2008 C runtime rather than CommandLineToArgvW

inlinesplit
lib/std/process.zig added: 510, removed: 92, total 418
@@ -625,11 +625,22 @@ pub const ArgIteratorWasi = struct {
};
 
/// Iterator that implements the Windows command-line parsing algorithm.
/// The implementation is intended to be compatible with the post-2008 C runtime,
/// but is *not* intended to be compatible with `CommandLineToArgvW` since
/// `CommandLineToArgvW` uses the pre-2008 parsing rules.
///
/// This iterator faithfully implements the parsing behavior observed in `CommandLineToArgvW` with
/// This iterator faithfully implements the parsing behavior observed from the C runtime with
/// one exception: if the command-line string is empty, the iterator will immediately complete
/// without returning any arguments (whereas `CommandLineArgvW` will return a single argument
/// without returning any arguments (whereas the C runtime will return a single argument
/// representing the name of the current executable).
///
/// The essential parts of the algorithm are described in Microsoft's documentation:
///
/// - https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments
///
/// David Deley explains some additional undocumented quirks in great detail:
///
/// - https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES
pub const ArgIteratorWindows = struct {
allocator: Allocator,
/// Owned by the iterator.
@@ -686,6 +697,51 @@ pub const ArgIteratorWindows = struct {
fn emitCharacter(self: *ArgIteratorWindows, char: u8) void {
self.buffer[self.end] = char;
self.end += 1;
 
// Because we are emitting WTF-8 byte-by-byte, we need to
// check to see if we've emitted two consecutive surrogate
// codepoints that form a valid surrogate pair in order
// to ensure that we're always emitting well-formed WTF-8
// (https://simonsapin.github.io/wtf-8/#concatenating).
//
// If we do have a valid surrogate pair, we need to emit
// the UTF-8 sequence for the codepoint that they encode
// instead of the WTF-8 encoding for the two surrogate pairs
// separately.
//
// This is relevant when dealing with a WTF-16 encoded
// command line like this:
// "<0xD801>"<0xDC37>
// which would get converted to WTF-8 in `cmd_line` as:
// "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7>
// and then after parsing it'd naively get emitted as:
// <0xED><0xA0><0x81><0xED><0xB0><0xB7>
// but instead, we need to recognize the surrogate pair
// and emit the codepoint it encodes, which in this
// example is U+10437 (𐐷), which is encoded in UTF-8 as:
// <0xF0><0x90><0x90><0xB7>
concatSurrogatePair(self);
}
 
fn concatSurrogatePair(self: *ArgIteratorWindows) void {
// Surrogate codepoints are always encoded as 3 bytes, so there
// must be 6 bytes for a surrogate pair to exist.
if (self.end - self.start >= 6) {
const window = self.buffer[self.end - 6 .. self.end];
const view = std.unicode.Wtf8View.init(window) catch return;
var it = view.iterator();
var pair: [2]u16 = undefined;
pair[0] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return);
if (!std.unicode.utf16IsHighSurrogate(std.mem.littleToNative(u16, pair[0]))) return;
pair[1] = std.mem.nativeToLittle(u16, std.math.cast(u16, it.nextCodepoint().?) orelse return);
if (!std.unicode.utf16IsLowSurrogate(std.mem.littleToNative(u16, pair[1]))) return;
// We know we have a valid surrogate pair, so convert
// it to UTF-8, overwriting the surrogate pair's bytes
// and then chop off the extra bytes.
const len = std.unicode.utf16LeToUtf8(window, &pair) catch unreachable;
const delta = 6 - len;
self.end -= delta;
}
}
 
fn yieldArg(self: *ArgIteratorWindows) [:0]const u8 {
@@ -711,69 +767,37 @@ pub const ArgIteratorWindows = struct {
}
};
 
// The essential parts of the algorithm are described in Microsoft's documentation:
//
// - <https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args?view=msvc-170#parsing-c-command-line-arguments>
// - <https://learn.microsoft.com/en-us/windows/win32/api/shellapi/nf-shellapi-commandlinetoargvw>
//
// David Deley explains some additional undocumented quirks in great detail:
//
// - <https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULES>
//
// Code points <= U+0020 terminating an unquoted first argument was discovered independently by
// testing and observing the behavior of 'CommandLineToArgvW' on Windows 10.
 
fn nextWithStrategy(self: *ArgIteratorWindows, comptime strategy: type) strategy.T {
// The first argument (the executable name) uses different parsing rules.
if (self.index == 0) {
var char = if (self.cmd_line.len != 0) self.cmd_line[0] else 0;
switch (char) {
0 => {
// Immediately complete the iterator.
// 'CommandLineToArgvW' would return the name of the current executable here.
return strategy.eof;
},
'"' => {
// If the first character is a quote, read everything until the next quote (then
// skip that quote), or until the end of the string.
self.index += 1;
while (true) : (self.index += 1) {
char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
switch (char) {
0 => {
return strategy.yieldArg(self);
},
'"' => {
self.index += 1;
return strategy.yieldArg(self);
},
else => {
strategy.emitCharacter(self, char);
},
if (self.cmd_line.len == 0 or self.cmd_line[0] == 0) {
// Immediately complete the iterator.
// The C runtime would return the name of the current executable here.
return strategy.eof;
}
 
var inside_quotes = false;
while (true) : (self.index += 1) {
const char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
switch (char) {
0 => {
return strategy.yieldArg(self);
},
'"' => {
inside_quotes = !inside_quotes;
},
' ', '\t' => {
if (inside_quotes)
strategy.emitCharacter(self, char)
else {
self.index += 1;
return strategy.yieldArg(self);
}
}
},
else => {
// Otherwise, read everything until the next space or ASCII control character
// (not including DEL) (then skip that character), or until the end of the
// string. This means that if the command-line string starts with one of these
// characters, the first returned argument will be the empty string.
while (true) : (self.index += 1) {
char = if (self.index != self.cmd_line.len) self.cmd_line[self.index] else 0;
switch (char) {
0 => {
return strategy.yieldArg(self);
},
'\x01'...' ' => {
self.index += 1;
return strategy.yieldArg(self);
},
else => {
strategy.emitCharacter(self, char);
},
}
}
},
},
else => {
strategy.emitCharacter(self, char);
},
}
}
}
 
@@ -791,9 +815,10 @@ pub const ArgIteratorWindows = struct {
//
// - The end of the string always terminates the current argument.
// - When not in 'inside_quotes' mode, a space or tab terminates the current argument.
// - 2n backslashes followed by a quote emit n backslashes. If in 'inside_quotes' and the
// quote is immediately followed by a second quote, one quote is emitted and the other is
// skipped, otherwise, the quote is skipped. Finally, 'inside_quotes' is toggled.
// - 2n backslashes followed by a quote emit n backslashes (note: n can be zero).
// If in 'inside_quotes' and the quote is immediately followed by a second quote,
// one quote is emitted and the other is skipped, otherwise, the quote is skipped
// and 'inside_quotes' is toggled.
// - 2n + 1 backslashes followed by a quote emit n backslashes followed by a quote.
// - n backslashes not followed by a quote emit n backslashes.
var backslash_count: usize = 0;
@@ -826,8 +851,9 @@ pub const ArgIteratorWindows = struct {
{
strategy.emitCharacter(self, '"');
self.index += 1;
} else {
inside_quotes = !inside_quotes;
}
inside_quotes = !inside_quotes;
}
},
'\\' => {
@@ -1215,10 +1241,10 @@ test ArgIteratorWindows {
// Separators
try t("aa bb cc", &.{ "aa", "bb", "cc" });
try t("aa\tbb\tcc", &.{ "aa", "bb", "cc" });
try t("aa\nbb\ncc", &.{ "aa", "bb\ncc" });
try t("aa\r\nbb\r\ncc", &.{ "aa", "\nbb\r\ncc" });
try t("aa\rbb\rcc", &.{ "aa", "bb\rcc" });
try t("aa\x07bb\x07cc", &.{ "aa", "bb\x07cc" });
try t("aa\nbb\ncc", &.{"aa\nbb\ncc"});
try t("aa\r\nbb\r\ncc", &.{"aa\r\nbb\r\ncc"});
try t("aa\rbb\rcc", &.{"aa\rbb\rcc"});
try t("aa\x07bb\x07cc", &.{"aa\x07bb\x07cc"});
try t("aa\x7Fbb\x7Fcc", &.{"aa\x7Fbb\x7Fcc"});
try t("aa🦎bb🦎cc", &.{"aa🦎bb🦎cc"});
 
@@ -1227,22 +1253,22 @@ test ArgIteratorWindows {
try t(" aa bb ", &.{ "", "aa", "bb" });
try t("\t\t", &.{""});
try t("\t\taa\t\tbb\t\t", &.{ "", "aa", "bb" });
try t("\n\n", &.{ "", "\n" });
try t("\n\naa\n\nbb\n\n", &.{ "", "\naa\n\nbb\n\n" });
try t("\n\n", &.{"\n\n"});
try t("\n\naa\n\nbb\n\n", &.{"\n\naa\n\nbb\n\n"});
 
// Executable name with quotes/backslashes
try t("\"aa bb\tcc\ndd\"", &.{"aa bb\tcc\ndd"});
try t("\"", &.{""});
try t("\"\"", &.{""});
try t("\"\"\"", &.{ "", "" });
try t("\"\"\"\"", &.{ "", "" });
try t("\"\"\"\"\"", &.{ "", "\"" });
try t("aa\"bb\"cc\"dd", &.{"aa\"bb\"cc\"dd"});
try t("aa\"bb cc\"dd", &.{ "aa\"bb", "ccdd" });
try t("\"aa\\\"bb\"", &.{ "aa\\", "bb" });
try t("\"\"\"", &.{""});
try t("\"\"\"\"", &.{""});
try t("\"\"\"\"\"", &.{""});
try t("aa\"bb\"cc\"dd", &.{"aabbccdd"});
try t("aa\"bb cc\"dd", &.{"aabb ccdd"});
try t("\"aa\\\"bb\"", &.{"aa\\bb"});
try t("\"aa\\\\\"", &.{"aa\\\\"});
try t("aa\\\"bb", &.{"aa\\\"bb"});
try t("aa\\\\\"bb", &.{"aa\\\\\"bb"});
try t("aa\\\"bb", &.{"aa\\bb"});
try t("aa\\\\\"bb", &.{"aa\\\\bb"});
 
// Arguments with quotes/backslashes
try t(". \"aa bb\tcc\ndd\"", &.{ ".", "aa bb\tcc\ndd" });
@@ -1252,29 +1278,66 @@ test ArgIteratorWindows {
try t(". \"\"", &.{ ".", "" });
try t(". \"\"\"", &.{ ".", "\"" });
try t(". \"\"\"\"", &.{ ".", "\"" });
try t(". \"\"\"\"\"", &.{ ".", "\"" });
try t(". \"\"\"\"\"", &.{ ".", "\"\"" });
try t(". \"\"\"\"\"\"", &.{ ".", "\"\"" });
try t(". \" \"", &.{ ".", " " });
try t(". \" \"\"", &.{ ".", " \"" });
try t(". \" \"\"\"", &.{ ".", " \"" });
try t(". \" \"\"\"\"", &.{ ".", " \"" });
try t(". \" \"\"\"\"", &.{ ".", " \"\"" });
try t(". \" \"\"\"\"\"", &.{ ".", " \"\"" });
try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"" });
try t(". \" \"\"\"\"\"\"", &.{ ".", " \"\"\"" });
try t(". \\\"", &.{ ".", "\"" });
try t(". \\\"\"", &.{ ".", "\"" });
try t(". \\\"\"\"", &.{ ".", "\"" });
try t(". \\\"\"\"\"", &.{ ".", "\"\"" });
try t(". \\\"\"\"\"\"", &.{ ".", "\"\"" });
try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"" });
try t(". \\\"\"\"\"\"\"", &.{ ".", "\"\"\"" });
try t(". \" \\\"", &.{ ".", " \"" });
try t(". \" \\\"\"", &.{ ".", " \"" });
try t(". \" \\\"\"\"", &.{ ".", " \"\"" });
try t(". \" \\\"\"\"\"", &.{ ".", " \"\"" });
try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"" });
try t(". \" \\\"\"\"\"\"", &.{ ".", " \"\"\"" });
try t(". \" \\\"\"\"\"\"\"", &.{ ".", " \"\"\"" });
try t(". aa\\bb\\\\cc\\\\\\dd", &.{ ".", "aa\\bb\\\\cc\\\\\\dd" });
try t(". \\\\\\\"aa bb\"", &.{ ".", "\\\"aa", "bb" });
try t(". \\\\\\\\\"aa bb\"", &.{ ".", "\\\\aa bb" });
 
// From https://learn.microsoft.com/en-us/cpp/cpp/main-function-command-line-args#results-of-parsing-command-lines
try t(
\\foo.exe "abc" d e
, &.{ "foo.exe", "abc", "d", "e" });
try t(
\\foo.exe a\\b d"e f"g h
, &.{ "foo.exe", "a\\\\b", "de fg", "h" });
try t(
\\foo.exe a\\\"b c d
, &.{ "foo.exe", "a\\\"b", "c", "d" });
try t(
\\foo.exe a\\\\"b c" d e
, &.{ "foo.exe", "a\\\\b c", "d", "e" });
try t(
\\foo.exe a"b"" c d
, &.{ "foo.exe", "ab\" c d" });
 
// From https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESEX
try t("foo.exe CallMeIshmael", &.{ "foo.exe", "CallMeIshmael" });
try t("foo.exe \"Call Me Ishmael\"", &.{ "foo.exe", "Call Me Ishmael" });
try t("foo.exe Cal\"l Me I\"shmael", &.{ "foo.exe", "Call Me Ishmael" });
try t("foo.exe CallMe\\\"Ishmael", &.{ "foo.exe", "CallMe\"Ishmael" });
try t("foo.exe \"CallMe\\\"Ishmael\"", &.{ "foo.exe", "CallMe\"Ishmael" });
try t("foo.exe \"Call Me Ishmael\\\\\"", &.{ "foo.exe", "Call Me Ishmael\\" });
try t("foo.exe \"CallMe\\\\\\\"Ishmael\"", &.{ "foo.exe", "CallMe\\\"Ishmael" });
try t("foo.exe a\\\\\\b", &.{ "foo.exe", "a\\\\\\b" });
try t("foo.exe \"a\\\\\\b\"", &.{ "foo.exe", "a\\\\\\b" });
 
// Surrogate pair encoding of 𐐷 separated by quotes.
// Encoded as WTF-16:
// "<0xD801>"<0xDC37>
// Encoded as WTF-8:
// "<0xED><0xA0><0x81>"<0xED><0xB0><0xB7>
// During parsing, the quotes drop out and the surrogate pair
// should end up encoded as its normal UTF-8 representation.
try t("foo.exe \"\xed\xa0\x81\"\xed\xb0\xb7", &.{ "foo.exe", "𐐷" });
}
 
fn testArgIteratorWindows(cmd_line: []const u8, expected_args: []const []const u8) !void {
 
test/standalone/build.zig.zon added: 510, removed: 92, total 418
@@ -104,6 +104,9 @@
.windows_spawn = .{
.path = "windows_spawn",
},
.windows_argv = .{
.path = "windows_argv",
},
.self_exe_symlink = .{
.path = "self_exe_symlink",
},
 
filename was Deleted added: 510, removed: 92, total 418
@@ -0,0 +1,19 @@
Tests that Zig's `std.process.ArgIteratorWindows` is compatible with both the MSVC and MinGW C runtimes' argv splitting algorithms.
 
The method of testing is:
- Compile a C file with `wmain` as its entry point
- The C `wmain` calls a Zig-implemented `verify` function that takes the `argv` from `wmain` and compares it to the argv gotten from `std.proccess.argsAlloc` (which takes `kernel32.GetCommandLineW()` and splits it)
- The compiled C program is spawned continuously as a child process by the implementation in `fuzz.zig` with randomly generated command lines
+ On Windows, the 'application name' and the 'command line' are disjoint concepts. That is, you can spawn `foo.exe` but set the command line to `bar.exe`, and `CreateProcessW` will spawn `foo.exe` but `argv[0]` will be `bar.exe`. This quirk allows us to test arbitrary `argv[0]` values as well which otherwise wouldn't be possible.
 
Note: This is intentionally testing against the C runtime argv splitting and *not* [`CommandLineToArgvW`](https://learn.microsoft.com/en-us/windows/win32/api/shellapi/nf-shellapi-commandlinetoargvw), since the C runtime argv splitting was updated in 2008 but `CommandLineToArgvW` still uses the pre-2008 algorithm (which differs in both `argv[0]` rules and `""`; see [here](https://daviddeley.com/autohotkey/parameters/parameters.htm#WINCRULESDOC) for details)
 
---
 
In addition to being run during `zig build test-standalone`, this test can be run on its own via `zig build test` from within this directory.
 
When run on its own:
- `-Diterations=<num>` can be used to set the max fuzzing iterations, and `-Diterations=0` can be used to fuzz indefinitely
- `-Dseed=<num>` can be used to set the PRNG seed for fuzz testing. If not provided, then the seed is chosen at random during `build.zig` compilation.
 
On failure, the number of iterations and the seed can be seen in the failing command, e.g. in `path\to\fuzz.exe path\to\verify-msvc.exe 100 2780392459403250529`, the iterations is `100` and the seed is `2780392459403250529`.
 
filename was Deleted added: 510, removed: 92, total 418
@@ -0,0 +1,100 @@
const std = @import("std");
const builtin = @import("builtin");
 
pub fn build(b: *std.Build) !void {
const test_step = b.step("test", "Test it");
b.default_step = test_step;
 
if (builtin.os.tag != .windows) return;
 
const optimize: std.builtin.OptimizeMode = .Debug;
 
const lib_gnu = b.addStaticLibrary(.{
.name = "toargv-gnu",
.root_source_file = .{ .path = "lib.zig" },
.target = b.resolveTargetQuery(.{
.abi = .gnu,
}),
.optimize = optimize,
});
const verify_gnu = b.addExecutable(.{
.name = "verify-gnu",
.target = b.resolveTargetQuery(.{
.abi = .gnu,
}),
.optimize = optimize,
});
verify_gnu.addCSourceFile(.{
.file = .{ .path = "verify.c" },
.flags = &.{ "-DUNICODE", "-D_UNICODE" },
});
verify_gnu.mingw_unicode_entry_point = true;
verify_gnu.linkLibrary(lib_gnu);
verify_gnu.linkLibC();
 
const fuzz = b.addExecutable(.{
.name = "fuzz",
.root_source_file = .{ .path = "fuzz.zig" },
.target = b.host,
.optimize = optimize,
});
 
const fuzz_max_iterations = b.option(u64, "iterations", "The max fuzz iterations (default: 100)") orelse 100;
const fuzz_iterations_arg = std.fmt.allocPrint(b.allocator, "{}", .{fuzz_max_iterations}) catch @panic("oom");
 
const fuzz_seed = b.option(u64, "seed", "Seed to use for the PRNG (default: random)") orelse seed: {
var buf: [8]u8 = undefined;
try std.posix.getrandom(&buf);
break :seed std.mem.readInt(u64, &buf, builtin.cpu.arch.endian());
};
const fuzz_seed_arg = std.fmt.allocPrint(b.allocator, "{}", .{fuzz_seed}) catch @panic("oom");
 
const run_gnu = b.addRunArtifact(fuzz);
run_gnu.setName("fuzz-gnu");
run_gnu.addArtifactArg(verify_gnu);
run_gnu.addArgs(&.{ fuzz_iterations_arg, fuzz_seed_arg });
run_gnu.expectExitCode(0);
 
test_step.dependOn(&run_gnu.step);
 
// Only target the MSVC ABI if MSVC/Windows SDK is available
const has_msvc = has_msvc: {
const sdk = std.zig.WindowsSdk.find(b.allocator) catch |err| switch (err) {
error.OutOfMemory => @panic("oom"),
else => break :has_msvc false,
};
defer sdk.free(b.allocator);
break :has_msvc true;
};
if (has_msvc) {
const lib_msvc = b.addStaticLibrary(.{
.name = "toargv-msvc",
.root_source_file = .{ .path = "lib.zig" },
.target = b.resolveTargetQuery(.{
.abi = .msvc,
}),
.optimize = optimize,
});
const verify_msvc = b.addExecutable(.{
.name = "verify-msvc",
.target = b.resolveTargetQuery(.{
.abi = .msvc,
}),
.optimize = optimize,
});
verify_msvc.addCSourceFile(.{
.file = .{ .path = "verify.c" },
.flags = &.{ "-DUNICODE", "-D_UNICODE" },
});
verify_msvc.linkLibrary(lib_msvc);
verify_msvc.linkLibC();
 
const run_msvc = b.addRunArtifact(fuzz);
run_msvc.setName("fuzz-msvc");
run_msvc.addArtifactArg(verify_msvc);
run_msvc.addArgs(&.{ fuzz_iterations_arg, fuzz_seed_arg });
run_msvc.expectExitCode(0);
 
test_step.dependOn(&run_msvc.step);
}
}
 
filename was Deleted added: 510, removed: 92, total 418
@@ -0,0 +1,159 @@
const std = @import("std");
const builtin = @import("builtin");
const windows = std.os.windows;
const Allocator = std.mem.Allocator;
 
pub fn main() !void {
var gpa = std.heap.GeneralPurposeAllocator(.{}){};
defer std.debug.assert(gpa.deinit() == .ok);
const allocator = gpa.allocator();
 
const args = try std.process.argsAlloc(allocator);
defer std.process.argsFree(allocator, args);
 
if (args.len < 2) return error.MissingArgs;
 
const verify_path_wtf8 = args[1];
const verify_path_w = try std.unicode.wtf8ToWtf16LeAllocZ(allocator, verify_path_wtf8);
defer allocator.free(verify_path_w);
 
const iterations: u64 = iterations: {
if (args.len < 3) break :iterations 0;
break :iterations try std.fmt.parseUnsigned(u64, args[2], 10);
};
 
var rand_seed = false;
const seed: u64 = seed: {
if (args.len < 4) {
rand_seed = true;
var buf: [8]u8 = undefined;
try std.posix.getrandom(&buf);
break :seed std.mem.readInt(u64, &buf, builtin.cpu.arch.endian());
}
break :seed try std.fmt.parseUnsigned(u64, args[3], 10);
};
var random = std.rand.DefaultPrng.init(seed);
const rand = random.random();
 
// If the seed was not given via the CLI, then output the
// randomly chosen seed so that this run can be reproduced
if (rand_seed) {
std.debug.print("rand seed: {}\n", .{seed});
}
 
var cmd_line_w_buf = std.ArrayList(u16).init(allocator);
defer cmd_line_w_buf.deinit();
 
var i: u64 = 0;
var errors: u64 = 0;
while (iterations == 0 or i < iterations) {
const cmd_line_w = try randomCommandLineW(allocator, rand);
defer allocator.free(cmd_line_w);
 
// avoid known difference for 0-length command lines
if (cmd_line_w.len == 0 or cmd_line_w[0] == '\x00') continue;
 
const exit_code = try spawnVerify(verify_path_w, cmd_line_w);
if (exit_code != 0) {
std.debug.print(">>> found discrepancy <<<\n", .{});
const cmd_line_wtf8 = try std.unicode.wtf16LeToWtf8Alloc(allocator, cmd_line_w);
defer allocator.free(cmd_line_wtf8);
std.debug.print("\"{}\"\n\n", .{std.zig.fmtEscapes(cmd_line_wtf8)});
 
errors += 1;
}
 
i += 1;
}
if (errors > 0) {
// we never get here if iterations is 0 so we don't have to worry about that case
std.debug.print("found {} discrepancies in {} iterations\n", .{ errors, iterations });
return error.FoundDiscrepancies;
}
}
 
fn randomCommandLineW(allocator: Allocator, rand: std.rand.Random) ![:0]const u16 {
const Choice = enum {
backslash,
quote,
space,
tab,
control,
printable,
non_ascii,
};
 
const choices = rand.uintAtMostBiased(u16, 256);
var buf = try std.ArrayList(u16).initCapacity(allocator, choices);
errdefer buf.deinit();
 
for (0..choices) |_| {
const choice = rand.enumValue(Choice);
const code_unit = switch (choice) {
.backslash => '\\',
.quote => '"',
.space => ' ',
.tab => '\t',
.control => switch (rand.uintAtMostBiased(u8, 0x21)) {
0x21 => '\x7F',
else => |b| b,
},
.printable => '!' + rand.uintAtMostBiased(u8, '~' - '!'),
.non_ascii => rand.intRangeAtMostBiased(u16, 0x80, 0xFFFF),
};
try buf.append(std.mem.nativeToLittle(u16, code_unit));
}
 
return buf.toOwnedSliceSentinel(0);
}
 
/// Returns the exit code of the verify process
fn spawnVerify(verify_path: [:0]const u16, cmd_line: [:0]const u16) !windows.DWORD {
const child_proc = spawn: {
var startup_info: windows.STARTUPINFOW = .{
.cb = @sizeOf(windows.STARTUPINFOW),
.lpReserved = null,
.lpDesktop = null,
.lpTitle = null,
.dwX = 0,
.dwY = 0,
.dwXSize = 0,
.dwYSize = 0,
.dwXCountChars = 0,
.dwYCountChars = 0,
.dwFillAttribute = 0,
.dwFlags = windows.STARTF_USESTDHANDLES,
.wShowWindow = 0,
.cbReserved2 = 0,
.lpReserved2 = null,
.hStdInput = null,
.hStdOutput = null,
.hStdError = windows.GetStdHandle(windows.STD_ERROR_HANDLE) catch null,
};
var proc_info: windows.PROCESS_INFORMATION = undefined;
 
try windows.CreateProcessW(
@constCast(verify_path.ptr),
@constCast(cmd_line.ptr),
null,
null,
windows.TRUE,
0,
null,
null,
&startup_info,
&proc_info,
);
windows.CloseHandle(proc_info.hThread);
 
break :spawn proc_info.hProcess;
};
defer windows.CloseHandle(child_proc);
try windows.WaitForSingleObjectEx(child_proc, windows.INFINITE, false);
 
var exit_code: windows.DWORD = undefined;
if (windows.kernel32.GetExitCodeProcess(child_proc, &exit_code) == 0) {
return error.UnableToGetExitCode;
}
return exit_code;
}
 
filename was Deleted added: 510, removed: 92, total 418
@@ -0,0 +1,8 @@
#ifndef _LIB_H_
#define _LIB_H_
 
#include <windows.h>
 
int verify(int argc, wchar_t *argv[]);
 
#endif
No newline at end of file
 
filename was Deleted added: 510, removed: 92, total 418
@@ -0,0 +1,59 @@
const std = @import("std");
 
/// Returns 1 on success, 0 on failure
export fn verify(argc: c_int, argv: [*]const [*:0]const u16) c_int {
const argv_slice = argv[0..@intCast(argc)];
testArgv(argv_slice) catch |err| switch (err) {
error.OutOfMemory => @panic("oom"),
error.Overflow => @panic("bytes needed to contain args would overflow usize"),
error.ArgvMismatch => return 0,
};
return 1;
}
 
fn testArgv(expected_args: []const [*:0]const u16) !void {
var arena_state = std.heap.ArenaAllocator.init(std.heap.page_allocator);
defer arena_state.deinit();
const allocator = arena_state.allocator();
 
const args = try std.process.argsAlloc(allocator);
var wtf8_buf = std.ArrayList(u8).init(allocator);
 
var eql = true;
if (args.len != expected_args.len) eql = false;
 
const min_len = @min(expected_args.len, args.len);
for (expected_args[0..min_len], args[0..min_len], 0..) |expected_arg, arg_wtf8, i| {
wtf8_buf.clearRetainingCapacity();
try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(expected_arg));
if (!std.mem.eql(u8, wtf8_buf.items, arg_wtf8)) {
std.debug.print("{}: expected: \"{}\"\n", .{ i, std.zig.fmtEscapes(wtf8_buf.items) });
std.debug.print("{}: actual: \"{}\"\n", .{ i, std.zig.fmtEscapes(arg_wtf8) });
eql = false;
}
}
if (!eql) {
for (expected_args[min_len..], min_len..) |arg, i| {
wtf8_buf.clearRetainingCapacity();
try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(arg));
std.debug.print("{}: expected: \"{}\"\n", .{ i, std.zig.fmtEscapes(wtf8_buf.items) });
}
for (args[min_len..], min_len..) |arg, i| {
std.debug.print("{}: actual: \"{}\"\n", .{ i, std.zig.fmtEscapes(arg) });
}
const peb = std.os.windows.peb();
const lpCmdLine: [*:0]u16 = @ptrCast(peb.ProcessParameters.CommandLine.Buffer);
wtf8_buf.clearRetainingCapacity();
try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(lpCmdLine));
std.debug.print("command line: \"{}\"\n", .{std.zig.fmtEscapes(wtf8_buf.items)});
std.debug.print("expected argv:\n", .{});
std.debug.print("&.{{\n", .{});
for (expected_args) |arg| {
wtf8_buf.clearRetainingCapacity();
try std.unicode.wtf16LeToWtf8ArrayList(&wtf8_buf, std.mem.span(arg));
std.debug.print(" \"{}\",\n", .{std.zig.fmtEscapes(wtf8_buf.items)});
}
std.debug.print("}}\n", .{});
return error.ArgvMismatch;
}
}
 
filename was Deleted added: 510, removed: 92, total 418
@@ -0,0 +1,7 @@
#include <windows.h>
#include "lib.h"
 
int wmain(int argc, wchar_t *argv[]) {
if (!verify(argc, argv)) return 1;
return 0;
}
No newline at end of file