srctree

Phil Schumann parent 66b2477a b109186d
std/zig/parse_string_literal.zig: add hex+unicode escapes (#4678)

inlinesplit
lib/std/zig/parse_string_literal.zig added: 57, removed: 8, total 49
@@ -19,17 +19,19 @@ pub fn parseStringLiteral(
bytes: []const u8,
bad_index: *usize, // populated if error.InvalidCharacter is returned
) ParseStringLiteralError![]u8 {
const first_index = if (bytes[0] == 'c') @as(usize, 2) else @as(usize, 1);
assert(bytes[bytes.len - 1] == '"');
assert(bytes.len >= 2 and bytes[0] == '"' and bytes[bytes.len - 1] == '"');
 
var list = std.ArrayList(u8).init(allocator);
errdefer list.deinit();
 
const slice = bytes[first_index..];
const slice = bytes[1..];
try list.ensureCapacity(slice.len - 1);
 
var state = State.Start;
for (slice) |b, index| {
var index: usize = 0;
while (index < slice.len) : (index += 1) {
const b = slice[index];
 
switch (state) {
State.Start => switch (b) {
'\\' => state = State.Backslash,
@@ -41,9 +43,6 @@ pub fn parseStringLiteral(
else => try list.append(b),
},
State.Backslash => switch (b) {
'x' => @panic("TODO"),
'u' => @panic("TODO"),
'U' => @panic("TODO"),
'n' => {
try list.append('\n');
state = State.Start;
@@ -60,10 +59,46 @@ pub fn parseStringLiteral(
try list.append('\t');
state = State.Start;
},
'\'' => {
try list.append('\'');
state = State.Start;
},
'"' => {
try list.append('"');
state = State.Start;
},
'x' => {
// TODO: add more/better/broader tests for this.
const index_continue = index + 3;
if (slice.len >= index_continue)
if (std.fmt.parseUnsigned(u8, slice[index + 1 .. index_continue], 16)) |char| {
try list.append(char);
state = State.Start;
index = index_continue - 1; // loop-header increments again
continue;
} else |_| {};
 
bad_index.* = index;
return error.InvalidCharacter;
},
'u' => {
// TODO: add more/better/broader tests for this.
if (slice.len > index + 2 and slice[index + 1] == '{')
if (std.mem.indexOfScalarPos(u8, slice[0..std.math.min(index + 9, slice.len)], index + 3, '}')) |index_end| {
const hex_str = slice[index + 2 .. index_end];
if (std.fmt.parseUnsigned(u32, hex_str, 16)) |uint| {
if (uint <= 0x10ffff) {
try list.appendSlice(std.mem.toBytes(uint)[0..]);
state = State.Start;
index = index_end; // loop-header increments
continue;
}
} else |_| {}
};
 
bad_index.* = index;
return error.InvalidCharacter;
},
else => {
bad_index.* = index;
return error.InvalidCharacter;
@@ -74,3 +109,17 @@ pub fn parseStringLiteral(
}
unreachable;
}
 
test "parseStringLiteral" {
const expect = std.testing.expect;
const eql = std.mem.eql;
 
var fixed_buf_mem: [32]u8 = undefined;
var fixed_buf_alloc = std.heap.FixedBufferAllocator.init(fixed_buf_mem[0..]);
var alloc = &fixed_buf_alloc.allocator;
var bad_index: usize = undefined;
 
expect(eql(u8, "foo", try parseStringLiteral(alloc, "\"foo\"", &bad_index)));
expect(eql(u8, "foo", try parseStringLiteral(alloc, "\"f\x6f\x6f\"", &bad_index)));
expect(eql(u8, "f💯", try parseStringLiteral(alloc, "\"f\u{1f4af}\"", &bad_index)));
}