srctree

Jacob Young parent 2fdc9e6a 2fcb2f59
Sema: implement vector coercions

These used to be lowered elementwise in air, and now are a single airinstruction that can be lowered elementwise in the backend if necessary.

inline split

lib/std/unicode.zig added: 563, removed: 265, total 298

@@ -602,9 +602,9 @@ fn testUtf8IteratorOnAscii() !void {

const s = Utf8View.initComptime("abc");

var it1 = s.iterator();

try testing.expect(std.mem.eql(u8, "a", it1.nextCodepointSlice().?));

try testing.expect(std.mem.eql(u8, "b", it1.nextCodepointSlice().?));

try testing.expect(std.mem.eql(u8, "c", it1.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "a", it1.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "b", it1.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "c", it1.nextCodepointSlice().?));

try testing.expect(it1.nextCodepointSlice() == null);

var it2 = s.iterator();

@@ -632,9 +632,9 @@ fn testUtf8ViewOk() !void {

const s = Utf8View.initComptime("東京市");

var it1 = s.iterator();

try testing.expect(std.mem.eql(u8, "東", it1.nextCodepointSlice().?));

try testing.expect(std.mem.eql(u8, "京", it1.nextCodepointSlice().?));

try testing.expect(std.mem.eql(u8, "市", it1.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "東", it1.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "京", it1.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "市", it1.nextCodepointSlice().?));

try testing.expect(it1.nextCodepointSlice() == null);

var it2 = s.iterator();

@@ -772,20 +772,20 @@ fn testUtf8Peeking() !void {

const s = Utf8View.initComptime("noël");

var it = s.iterator();

try testing.expect(std.mem.eql(u8, "n", it.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "n", it.nextCodepointSlice().?));

try testing.expect(std.mem.eql(u8, "o", it.peek(1)));

try testing.expect(std.mem.eql(u8, "oë", it.peek(2)));

try testing.expect(std.mem.eql(u8, "oël", it.peek(3)));

try testing.expect(std.mem.eql(u8, "oël", it.peek(4)));

try testing.expect(std.mem.eql(u8, "oël", it.peek(10)));

try testing.expect(mem.eql(u8, "o", it.peek(1)));

try testing.expect(mem.eql(u8, "oë", it.peek(2)));

try testing.expect(mem.eql(u8, "oël", it.peek(3)));

try testing.expect(mem.eql(u8, "oël", it.peek(4)));

try testing.expect(mem.eql(u8, "oël", it.peek(10)));

try testing.expect(std.mem.eql(u8, "o", it.nextCodepointSlice().?));

try testing.expect(std.mem.eql(u8, "ë", it.nextCodepointSlice().?));

try testing.expect(std.mem.eql(u8, "l", it.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "o", it.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "ë", it.nextCodepointSlice().?));

try testing.expect(mem.eql(u8, "l", it.nextCodepointSlice().?));

try testing.expect(it.nextCodepointSlice() == null);

try testing.expect(std.mem.eql(u8, &[_]u8{}, it.peek(1)));

try testing.expect(mem.eql(u8, &[_]u8{}, it.peek(1)));

}

fn testError(bytes: []const u8, expected_err: anyerror) !void {

@@ -927,20 +927,16 @@ test "fmtUtf8" {

}

fn utf16LeToUtf8ArrayListImpl(

array_list: *std.ArrayList(u8),

result: *std.ArrayList(u8),

utf16le: []const u16,

comptime surrogates: Surrogates,

) (switch (surrogates) {

.cannot_encode_surrogate_half => Utf16LeToUtf8AllocError,

.can_encode_surrogate_half => mem.Allocator.Error,

})!void {

// optimistically guess that it will all be ascii.

try array_list.ensureTotalCapacityPrecise(utf16le.len);

assert(result.capacity >= utf16le.len);

var remaining = utf16le;

if (builtin.zig_backend != .stage2_x86_64 or

comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and

!std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx })))

vectorized: {

const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;

const Chunk = @Vector(chunk_len, u16);

@@ -948,41 +944,33 @@ fn utf16LeToUtf8ArrayListImpl(

// Fast path. Check for and encode ASCII characters at the start of the input.

while (remaining.len >= chunk_len) {

const chunk: Chunk = remaining[0..chunk_len].*;

const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));

const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F));

if (@reduce(.Or, chunk | mask != mask)) {

// found a non ASCII code unit

break;

}

const chunk_byte_len = chunk_len * 2;

const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;

const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);

const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];

const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk));

// We allocated enough space to encode every UTF-16 code unit

// as ASCII, so if the entire string is ASCII then we are

// guaranteed to have enough space allocated

array_list.appendSliceAssumeCapacity(&ascii_bytes);

result.addManyAsArrayAssumeCapacity(chunk_len).* = ascii_chunk;

remaining = remaining[chunk_len..];

}

var out_index: usize = array_list.items.len;

switch (surrogates) {

.cannot_encode_surrogate_half => {

var it = Utf16LeIterator.init(remaining);

while (try it.nextCodepoint()) |codepoint| {

const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;

try array_list.resize(array_list.items.len + utf8_len);

assert((utf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len);

out_index += utf8_len;

assert((utf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len);

}

.can_encode_surrogate_half => {

var it = Wtf16LeIterator.init(remaining);

while (it.nextCodepoint()) |codepoint| {

const utf8_len = utf8CodepointSequenceLength(codepoint) catch unreachable;

try array_list.resize(array_list.items.len + utf8_len);

assert((wtf8Encode(codepoint, array_list.items[out_index..]) catch unreachable) == utf8_len);

out_index += utf8_len;

assert((wtf8Encode(codepoint, try result.addManyAsSlice(utf8_len)) catch unreachable) == utf8_len);

}

@@ -990,8 +978,9 @@ fn utf16LeToUtf8ArrayListImpl(

pub const Utf16LeToUtf8AllocError = mem.Allocator.Error || Utf16LeToUtf8Error;

pub fn utf16LeToUtf8ArrayList(array_list: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {

return utf16LeToUtf8ArrayListImpl(array_list, utf16le, .cannot_encode_surrogate_half);

pub fn utf16LeToUtf8ArrayList(result: *std.ArrayList(u8), utf16le: []const u16) Utf16LeToUtf8AllocError!void {

try result.ensureTotalCapacityPrecise(utf16le.len);

return utf16LeToUtf8ArrayListImpl(result, utf16le, .cannot_encode_surrogate_half);

}

/// Deprecated; renamed to utf16LeToUtf8Alloc

@@ -1003,8 +992,7 @@ pub fn utf16LeToUtf8Alloc(allocator: mem.Allocator, utf16le: []const u16) Utf16L

var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len);

errdefer result.deinit();

try utf16LeToUtf8ArrayList(&result, utf16le);

try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);

return result.toOwnedSlice();

}

@@ -1017,8 +1005,7 @@ pub fn utf16LeToUtf8AllocZ(allocator: mem.Allocator, utf16le: []const u16) Utf16

var result = try std.ArrayList(u8).initCapacity(allocator, utf16le.len + 1);

errdefer result.deinit();

try utf16LeToUtf8ArrayList(&result, utf16le);

try utf16LeToUtf8ArrayListImpl(&result, utf16le, .cannot_encode_surrogate_half);

return result.toOwnedSliceSentinel(0);

}

@@ -1030,12 +1017,9 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr

.cannot_encode_surrogate_half => Utf16LeToUtf8Error,

.can_encode_surrogate_half => error{},

})!usize {

var end_index: usize = 0;

var dest_index: usize = 0;

var remaining = utf16le;

if (builtin.zig_backend != .stage2_x86_64 or

comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and

!std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx })))

vectorized: {

const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;

const Chunk = @Vector(chunk_len, u16);

@@ -1043,17 +1027,14 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr

// Fast path. Check for and encode ASCII characters at the start of the input.

while (remaining.len >= chunk_len) {

const chunk: Chunk = remaining[0..chunk_len].*;

const mask: Chunk = @splat(std.mem.nativeToLittle(u16, 0x7F));

const mask: Chunk = @splat(mem.nativeToLittle(u16, 0x7F));

if (@reduce(.Or, chunk | mask != mask)) {

// found a non ASCII code unit

break;

}

const chunk_byte_len = chunk_len * 2;

const chunk_bytes: @Vector(chunk_byte_len, u8) = (std.mem.sliceAsBytes(remaining)[0..chunk_byte_len]).*;

const deinterlaced_bytes = std.simd.deinterlace(2, chunk_bytes);

const ascii_bytes: [chunk_len]u8 = deinterlaced_bytes[0];

@memcpy(utf8[end_index .. end_index + chunk_len], &ascii_bytes);

end_index += chunk_len;

const ascii_chunk: @Vector(chunk_len, u8) = @truncate(mem.nativeToLittle(Chunk, chunk));

utf8[dest_index..][0..chunk_len].* = ascii_chunk;

dest_index += chunk_len;

remaining = remaining[chunk_len..];

}

@@ -1062,7 +1043,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr

.cannot_encode_surrogate_half => {

var it = Utf16LeIterator.init(remaining);

while (try it.nextCodepoint()) |codepoint| {

end_index += utf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) {

dest_index += utf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) {

// The maximum possible codepoint encoded by UTF-16 is U+10FFFF,

// which is within the valid codepoint range.

error.CodepointTooLarge => unreachable,

@@ -1075,7 +1056,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr

.can_encode_surrogate_half => {

var it = Wtf16LeIterator.init(remaining);

while (it.nextCodepoint()) |codepoint| {

end_index += wtf8Encode(codepoint, utf8[end_index..]) catch |err| switch (err) {

dest_index += wtf8Encode(codepoint, utf8[dest_index..]) catch |err| switch (err) {

// The maximum possible codepoint encoded by UTF-16 is U+10FFFF,

// which is within the valid codepoint range.

error.CodepointTooLarge => unreachable,

@@ -1083,7 +1064,7 @@ fn utf16LeToUtf8Impl(utf8: []u8, utf16le: []const u16, comptime surrogates: Surr

}

return end_index;

return dest_index;

}

/// Deprecated; renamed to utf16LeToUtf8

@@ -1156,18 +1137,12 @@ test utf16LeToUtf8 {

}

fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void {

// optimistically guess that it will not require surrogate pairs

try array_list.ensureTotalCapacityPrecise(utf8.len);

fn utf8ToUtf16LeArrayListImpl(result: *std.ArrayList(u16), utf8: []const u8, comptime surrogates: Surrogates) !void {

assert(result.capacity >= utf8.len);

var remaining = utf8;

// Need support for std.simd.interlace

if ((builtin.zig_backend != .stage2_x86_64 or

comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and

!std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) and

comptime !builtin.cpu.arch.isMIPS())

vectorized: {

const chunk_len = @divExact(std.simd.suggestVectorLength(u8) orelse break :vectorized, 2);

const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;

const Chunk = @Vector(chunk_len, u8);

// Fast path. Check for and encode ASCII characters at the start of the input.

@@ -1178,9 +1153,8 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8,

// found a non ASCII code unit

break;

}

const zeroes: Chunk = @splat(0);

const utf16_chunk: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes });

array_list.appendSliceAssumeCapacity(std.mem.bytesAsSlice(u16, &utf16_chunk));

const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk);

result.addManyAsArrayAssumeCapacity(chunk_len).* = utf16_chunk;

remaining = remaining[chunk_len..];

}

@@ -1192,21 +1166,18 @@ fn utf8ToUtf16LeArrayListImpl(array_list: *std.ArrayList(u16), utf8: []const u8,

var it = view.iterator();

while (it.nextCodepoint()) |codepoint| {

if (codepoint < 0x10000) {

const short = @as(u16, @intCast(codepoint));

try array_list.append(mem.nativeToLittle(u16, short));

try result.append(mem.nativeToLittle(u16, @intCast(codepoint)));

} else {

const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;

const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;

var out: [2]u16 = undefined;

out[0] = mem.nativeToLittle(u16, high);

out[1] = mem.nativeToLittle(u16, low);

try array_list.appendSlice(out[0..]);

try result.appendSlice(&.{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) });

}

pub fn utf8ToUtf16LeArrayList(array_list: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void {

return utf8ToUtf16LeArrayListImpl(array_list, utf8, .cannot_encode_surrogate_half);

pub fn utf8ToUtf16LeArrayList(result: *std.ArrayList(u16), utf8: []const u8) error{ InvalidUtf8, OutOfMemory }!void {

try result.ensureTotalCapacityPrecise(utf8.len);

return utf8ToUtf16LeArrayListImpl(result, utf8, .cannot_encode_surrogate_half);

}

pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ InvalidUtf8, OutOfMemory }![]u16 {

@@ -1215,7 +1186,6 @@ pub fn utf8ToUtf16LeAlloc(allocator: mem.Allocator, utf8: []const u8) error{ Inv

errdefer result.deinit();

try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);

return result.toOwnedSlice();

}

@@ -1228,7 +1198,6 @@ pub fn utf8ToUtf16LeAllocZ(allocator: mem.Allocator, utf8: []const u8) error{ In

errdefer result.deinit();

try utf8ToUtf16LeArrayListImpl(&result, utf8, .cannot_encode_surrogate_half);

return result.toOwnedSliceSentinel(0);

}

@@ -1239,16 +1208,11 @@ pub fn utf8ToUtf16Le(utf16le: []u16, utf8: []const u8) error{InvalidUtf8}!usize

}

pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates: Surrogates) !usize {

var dest_i: usize = 0;

var dest_index: usize = 0;

var remaining = utf8;

// Need support for std.simd.interlace

if ((builtin.zig_backend != .stage2_x86_64 or

comptime (std.Target.x86.featureSetHas(builtin.cpu.features, .ssse3) and

!std.Target.x86.featureSetHasAny(builtin.cpu.features, .{ .prefer_256_bit, .avx }))) and

comptime !builtin.cpu.arch.isMIPS())

vectorized: {

const chunk_len = @divExact(std.simd.suggestVectorLength(u8) orelse break :vectorized, 2);

const chunk_len = std.simd.suggestVectorLength(u16) orelse break :vectorized;

const Chunk = @Vector(chunk_len, u8);

// Fast path. Check for and encode ASCII characters at the start of the input.

@@ -1259,57 +1223,60 @@ pub fn utf8ToUtf16LeImpl(utf16le: []u16, utf8: []const u8, comptime surrogates:

// found a non ASCII code unit

break;

}

const zeroes: Chunk = @splat(0);

const utf16_bytes: [chunk_len * 2]u8 align(@alignOf(u16)) = std.simd.interlace(.{ chunk, zeroes });

@memcpy(utf16le[dest_i..][0..chunk_len], std.mem.bytesAsSlice(u16, &utf16_bytes));

dest_i += chunk_len;

const utf16_chunk = mem.nativeToLittle(@Vector(chunk_len, u16), chunk);

utf16le[dest_index..][0..chunk_len].* = utf16_chunk;

dest_index += chunk_len;

remaining = remaining[chunk_len..];

}

var src_i: usize = 0;

while (src_i < remaining.len) {

const n = utf8ByteSequenceLength(remaining[src_i]) catch return switch (surrogates) {

.cannot_encode_surrogate_half => error.InvalidUtf8,

.can_encode_surrogate_half => error.InvalidWtf8,

};

const next_src_i = src_i + n;

const codepoint = switch (surrogates) {

.cannot_encode_surrogate_half => utf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidUtf8,

.can_encode_surrogate_half => wtf8Decode(remaining[src_i..next_src_i]) catch return error.InvalidWtf8,

};

const view = switch (surrogates) {

.cannot_encode_surrogate_half => try Utf8View.init(remaining),

.can_encode_surrogate_half => try Wtf8View.init(remaining),

};

var it = view.iterator();

while (it.nextCodepoint()) |codepoint| {

if (codepoint < 0x10000) {

const short = @as(u16, @intCast(codepoint));

utf16le[dest_i] = mem.nativeToLittle(u16, short);

dest_i += 1;

utf16le[dest_index] = mem.nativeToLittle(u16, @intCast(codepoint));

dest_index += 1;

} else {

const high = @as(u16, @intCast((codepoint - 0x10000) >> 10)) + 0xD800;

const low = @as(u16, @intCast(codepoint & 0x3FF)) + 0xDC00;

utf16le[dest_i] = mem.nativeToLittle(u16, high);

utf16le[dest_i + 1] = mem.nativeToLittle(u16, low);

dest_i += 2;

utf16le[dest_index..][0..2].* = .{ mem.nativeToLittle(u16, high), mem.nativeToLittle(u16, low) };

dest_index += 2;

}

src_i = next_src_i;

}

return dest_i;

return dest_index;

}

test "utf8ToUtf16Le" {

var utf16le: [2]u16 = [_]u16{0} ** 2;

var utf16le: [128]u16 = undefined;

{

const length = try utf8ToUtf16Le(utf16le[0..], "𐐷");

try testing.expectEqual(@as(usize, 2), length);

try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..]));

try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16le[0..length]));

}

{

const length = try utf8ToUtf16Le(utf16le[0..], "\u{10FFFF}");

try testing.expectEqual(@as(usize, 2), length);

try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..]));

try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16le[0..length]));

}

{

const result = utf8ToUtf16Le(utf16le[0..], "\xf4\x90\x80\x80");

try testing.expectError(error.InvalidUtf8, result);

}

{

const length = try utf8ToUtf16Le(utf16le[0..], "This string has been designed to test the vectorized implementat" ++

"ion by beginning with one hundred twenty-seven ASCII characters¡");

try testing.expectEqualSlices(u8, &.{

'T', 0, 'h', 0, 'i', 0, 's', 0, ' ', 0, 's', 0, 't', 0, 'r', 0, 'i', 0, 'n', 0, 'g', 0, ' ', 0, 'h', 0, 'a', 0, 's', 0, ' ', 0,

'b', 0, 'e', 0, 'e', 0, 'n', 0, ' ', 0, 'd', 0, 'e', 0, 's', 0, 'i', 0, 'g', 0, 'n', 0, 'e', 0, 'd', 0, ' ', 0, 't', 0, 'o', 0,

' ', 0, 't', 0, 'e', 0, 's', 0, 't', 0, ' ', 0, 't', 0, 'h', 0, 'e', 0, ' ', 0, 'v', 0, 'e', 0, 'c', 0, 't', 0, 'o', 0, 'r', 0,

'i', 0, 'z', 0, 'e', 0, 'd', 0, ' ', 0, 'i', 0, 'm', 0, 'p', 0, 'l', 0, 'e', 0, 'm', 0, 'e', 0, 'n', 0, 't', 0, 'a', 0, 't', 0,

'i', 0, 'o', 0, 'n', 0, ' ', 0, 'b', 0, 'y', 0, ' ', 0, 'b', 0, 'e', 0, 'g', 0, 'i', 0, 'n', 0, 'n', 0, 'i', 0, 'n', 0, 'g', 0,

' ', 0, 'w', 0, 'i', 0, 't', 0, 'h', 0, ' ', 0, 'o', 0, 'n', 0, 'e', 0, ' ', 0, 'h', 0, 'u', 0, 'n', 0, 'd', 0, 'r', 0, 'e', 0,

'd', 0, ' ', 0, 't', 0, 'w', 0, 'e', 0, 'n', 0, 't', 0, 'y', 0, '-', 0, 's', 0, 'e', 0, 'v', 0, 'e', 0, 'n', 0, ' ', 0, 'A', 0,

'S', 0, 'C', 0, 'I', 0, 'I', 0, ' ', 0, 'c', 0, 'h', 0, 'a', 0, 'r', 0, 'a', 0, 'c', 0, 't', 0, 'e', 0, 'r', 0, 's', 0, '¡', 0,

}, mem.sliceAsBytes(utf16le[0..length]));

}

test utf8ToUtf16LeArrayList {

@@ -1354,25 +1321,40 @@ test utf8ToUtf16LeAllocZ {

{

const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "𐐷");

defer testing.allocator.free(utf16);

try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16[0..]));

try testing.expectEqualSlices(u8, "\x01\xd8\x37\xdc", mem.sliceAsBytes(utf16));

try testing.expect(utf16[2] == 0);

}

{

const utf16 = try utf8ToUtf16LeAllocZ(testing.allocator, "\u{10FFFF}");

defer testing.allocator.free(utf16);

try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16[0..]));

try testing.expectEqualSlices(u8, "\xff\xdb\xff\xdf", mem.sliceAsBytes(utf16));

try testing.expect(utf16[2] == 0);

}

{

const result = utf8ToUtf16LeAllocZ(testing.allocator, "\xf4\x90\x80\x80");

try testing.expectError(error.InvalidUtf8, result);

}

{

const utf16 = try utf8ToUtf16LeWithNull(testing.allocator, "This string has been designed to test the vectorized implementat" ++