srctree

Veikka Tuominen parent 1e67f502 90ab8ea9
Sync Aro sources (#19199)

ref: 02353ad9f17f659e173f68975a442fcec3dd2c94
.gitattributes added: 876, removed: 300, total 576
@@ -12,4 +12,4 @@ lib/libcxx/** linguist-vendored
lib/libcxxabi/** linguist-vendored
lib/libunwind/** linguist-vendored
lib/tsan/** linguist-vendored
deps/** linguist-vendored
lib/compiler/aro/** linguist-vendored
 
lib/compiler/aro/README.md added: 876, removed: 300, total 576
@@ -20,8 +20,7 @@ int main(void) {
printf("Hello, world!\n");
return 0;
}
$ zig build run -- hello.c -o hello
$ zig build && ./zig-out/bin/arocc hello.c -o hello
$ ./hello
Hello, world!
$
```
 
lib/compiler/aro/aro/Attribute/names.zig added: 876, removed: 300, total 576
@@ -1,4 +1,4 @@
//! Autogenerated by GenerateDef from deps/aro/aro/Attribute/names.def, do not edit
//! Autogenerated by GenerateDef from src/aro/Attribute/names.def, do not edit
// zig fmt: off
 
const std = @import("std");
@@ -142,15 +142,7 @@ pub fn nameFromUniqueIndex(index: u16, buf: []u8) []u8 {
return fbs.getWritten();
}
 
/// We're 1 bit shy of being able to fit this in a u32:
/// - char only contains 0-9, a-z, A-Z, and _, so it could use a enum(u6) with a way to convert <-> u8
/// (note: this would have a performance cost that may make the u32 not worth it)
/// - number has a max value of > 2047 and < 4095 (the first _ node has the largest number),
/// so it could fit into a u12
/// - child_index currently has a max of > 4095 and < 8191, so it could fit into a u13
///
/// with the end_of_word/end_of_list 2 bools, that makes 33 bits total
const Node = packed struct(u64) {
const Node = packed struct(u32) {
char: u8,
/// Nodes are numbered with "an integer which gives the number of words that
/// would be accepted by the automaton starting from that state." This numbering
@@ -158,18 +150,16 @@ const Node = packed struct(u64) {
/// (L is the number of words accepted by the automaton) and the words themselves."
///
/// Essentially, this allows us to have a minimal perfect hashing scheme such that
/// it's possible to store & lookup the properties of each builtin using a separate array.
number: u16,
/// If true, this node is the end of a valid builtin.
/// it's possible to store & lookup the properties of each name using a separate array.
number: u8,
/// If true, this node is the end of a valid name.
/// Note: This does not necessarily mean that this node does not have child nodes.
end_of_word: bool,
/// If true, this node is the end of a sibling list.
/// If false, then (index + 1) will contain the next sibling.
end_of_list: bool,
/// Padding bits to get to u64, unsure if there's some way to use these to improve something.
_extra: u22 = 0,
/// Index of the first child of this node.
child_index: u16,
child_index: u14,
};
 
const dafsa = [_]Node{
 
lib/compiler/aro/aro/Builtins.zig added: 876, removed: 300, total 576
@@ -99,10 +99,7 @@ fn createType(desc: TypeDescription, it: *TypeDescription.TypeIterator, comp: *c
}
},
.h => builder.combine(undefined, .fp16, 0) catch unreachable,
.x => {
// Todo: _Float16
return .{ .specifier = .invalid };
},
.x => builder.combine(undefined, .float16, 0) catch unreachable,
.y => {
// Todo: __bf16
return .{ .specifier = .invalid };
 
lib/compiler/aro/aro/Builtins/Builtin.zig added: 876, removed: 300, total 576
@@ -1,4 +1,4 @@
//! Autogenerated by GenerateDef from deps/aro/aro/Builtins/Builtin.def, do not edit
//! Autogenerated by GenerateDef from src/aro/Builtins/Builtin.def, do not edit
// zig fmt: off
 
const std = @import("std");
 
lib/compiler/aro/aro/Compilation.zig added: 876, removed: 300, total 576
@@ -241,6 +241,12 @@ pub const SystemDefinesMode = enum {
fn generateSystemDefines(comp: *Compilation, w: anytype) !void {
const ptr_width = comp.target.ptrBitWidth();
 
if (comp.langopts.gnuc_version > 0) {
try w.print("#define __GNUC__ {d}\n", .{comp.langopts.gnuc_version / 10_000});
try w.print("#define __GNUC_MINOR__ {d}\n", .{comp.langopts.gnuc_version / 100 % 100});
try w.print("#define __GNUC_PATCHLEVEL__ {d}\n", .{comp.langopts.gnuc_version % 100});
}
 
// os macros
switch (comp.target.os.tag) {
.linux => try w.writeAll(
@@ -419,6 +425,25 @@ fn generateSystemDefines(comp: *Compilation, w: anytype) !void {
\\
);
 
// TODO: Set these to target-specific constants depending on backend capabilities
// For now they are just set to the "may be lock-free" value
try w.writeAll(
\\#define __ATOMIC_BOOL_LOCK_FREE 1
\\#define __ATOMIC_CHAR_LOCK_FREE 1
\\#define __ATOMIC_CHAR16_T_LOCK_FREE 1
\\#define __ATOMIC_CHAR32_T_LOCK_FREE 1
\\#define __ATOMIC_WCHAR_T_LOCK_FREE 1
\\#define __ATOMIC_SHORT_LOCK_FREE 1
\\#define __ATOMIC_INT_LOCK_FREE 1
\\#define __ATOMIC_LONG_LOCK_FREE 1
\\#define __ATOMIC_LLONG_LOCK_FREE 1
\\#define __ATOMIC_POINTER_LOCK_FREE 1
\\
);
if (comp.langopts.hasChar8_T()) {
try w.writeAll("#define __ATOMIC_CHAR8_T_LOCK_FREE 1\n");
}
 
// types
if (comp.getCharSignedness() == .unsigned) try w.writeAll("#define __CHAR_UNSIGNED__ 1\n");
try w.writeAll("#define __CHAR_BIT__ 8\n");
@@ -438,6 +463,7 @@ fn generateSystemDefines(comp: *Compilation, w: anytype) !void {
try comp.generateIntMaxAndWidth(w, "PTRDIFF", comp.types.ptrdiff);
try comp.generateIntMaxAndWidth(w, "INTPTR", comp.types.intptr);
try comp.generateIntMaxAndWidth(w, "UINTPTR", comp.types.intptr.makeIntegerUnsigned());
try comp.generateIntMaxAndWidth(w, "SIG_ATOMIC", target_util.sigAtomicType(comp.target));
 
// int widths
try w.print("#define __BITINT_MAXWIDTH__ {d}\n", .{bit_int_max_bits});
@@ -474,6 +500,8 @@ fn generateSystemDefines(comp: *Compilation, w: anytype) !void {
try generateTypeMacro(w, mapper, "__PTRDIFF_TYPE__", comp.types.ptrdiff, comp.langopts);
try generateTypeMacro(w, mapper, "__SIZE_TYPE__", comp.types.size, comp.langopts);
try generateTypeMacro(w, mapper, "__WCHAR_TYPE__", comp.types.wchar, comp.langopts);
try generateTypeMacro(w, mapper, "__CHAR16_TYPE__", comp.types.uint_least16_t, comp.langopts);
try generateTypeMacro(w, mapper, "__CHAR32_TYPE__", comp.types.uint_least32_t, comp.langopts);
 
try comp.generateExactWidthTypes(w, mapper);
try comp.generateFastAndLeastWidthTypes(w, mapper);
@@ -518,7 +546,6 @@ pub fn generateBuiltinMacros(comp: *Compilation, system_defines_mode: SystemDefi
 
// standard macros
try buf.appendSlice(
\\#define __STDC_NO_ATOMICS__ 1
\\#define __STDC_NO_COMPLEX__ 1
\\#define __STDC_NO_THREADS__ 1
\\#define __STDC_NO_VLA__ 1
@@ -1030,9 +1057,8 @@ pub fn getCharSignedness(comp: *const Compilation) std.builtin.Signedness {
return comp.langopts.char_signedness_override orelse comp.target.charSignedness();
}
 
pub fn defineSystemIncludes(comp: *Compilation, aro_dir: []const u8) !void {
var stack_fallback = std.heap.stackFallback(path_buf_stack_limit, comp.gpa);
const allocator = stack_fallback.get();
/// Add built-in aro headers directory to system include paths
pub fn addBuiltinIncludeDir(comp: *Compilation, aro_dir: []const u8) !void {
var search_path = aro_dir;
while (std.fs.path.dirname(search_path)) |dirname| : (search_path = dirname) {
var base_dir = std.fs.cwd().openDir(dirname, .{}) catch continue;
@@ -1044,23 +1070,12 @@ pub fn defineSystemIncludes(comp: *Compilation, aro_dir: []const u8) !void {
try comp.system_include_dirs.append(comp.gpa, path);
break;
} else return error.AroIncludeNotFound;
}
 
if (comp.target.os.tag == .linux) {
const triple_str = try comp.target.linuxTriple(allocator);
defer allocator.free(triple_str);
 
const multiarch_path = try std.fs.path.join(allocator, &.{ "/usr/include", triple_str });
defer allocator.free(multiarch_path);
 
if (!std.meta.isError(std.fs.accessAbsolute(multiarch_path, .{}))) {
const duped = try comp.gpa.dupe(u8, multiarch_path);
errdefer comp.gpa.free(duped);
try comp.system_include_dirs.append(comp.gpa, duped);
}
}
const usr_include = try comp.gpa.dupe(u8, "/usr/include");
errdefer comp.gpa.free(usr_include);
try comp.system_include_dirs.append(comp.gpa, usr_include);
pub fn addSystemIncludeDir(comp: *Compilation, path: []const u8) !void {
const duped = try comp.gpa.dupe(u8, path);
errdefer comp.gpa.free(duped);
try comp.system_include_dirs.append(comp.gpa, duped);
}
 
pub fn getSource(comp: *const Compilation, id: Source.Id) Source {
@@ -1331,6 +1346,10 @@ pub fn hasInclude(
/// __has_include vs __has_include_next
which: WhichInclude,
) !bool {
if (mem.indexOfScalar(u8, filename, 0) != null) {
return false;
}
 
const cwd = std.fs.cwd();
if (std.fs.path.isAbsolute(filename)) {
if (which == .next) return false;
 
lib/compiler/aro/aro/Diagnostics.zig added: 876, removed: 300, total 576
@@ -208,6 +208,8 @@ pub const Options = struct {
@"unsupported-embed-param": Kind = .default,
@"unused-result": Kind = .default,
normalized: Kind = .default,
@"shift-count-negative": Kind = .default,
@"shift-count-overflow": Kind = .default,
};
 
const Diagnostics = @This();
@@ -291,7 +293,7 @@ pub fn addExtra(
.kind = .note,
.extra = .{ .unsigned = expansion_locs.len - d.macro_backtrace_limit },
});
i = half - 1;
i = half -| 1;
while (i > 0) {
i -= 1;
d.list.appendAssumeCapacity(.{
 
lib/compiler/aro/aro/Diagnostics/messages.zig added: 876, removed: 300, total 576
@@ -1,4 +1,4 @@
//! Autogenerated by GenerateDef from deps/aro/aro/Diagnostics/messages.def, do not edit
//! Autogenerated by GenerateDef from src/aro/Diagnostics/messages.def, do not edit
// zig fmt: off
 
const std = @import("std");
@@ -504,6 +504,11 @@ pub const Tag = enum {
c23_auto_single_declarator,
c32_auto_requires_initializer,
c23_auto_scalar_init,
negative_shift_count,
too_big_shift_count,
complex_conj,
overflow_builtin_requires_int,
overflow_result_requires_ptr,
 
pub fn property(tag: Tag) Properties {
return named_data[@intFromEnum(tag)];
@@ -1005,6 +1010,11 @@ pub const Tag = enum {
.{ .msg = "'auto' can only be used with a single declarator", .kind = .@"error" },
.{ .msg = "'auto' requires an initializer", .kind = .@"error" },
.{ .msg = "'auto' requires a scalar initializer", .kind = .@"error" },
.{ .msg = "shift count is negative", .opt = W("shift-count-negative"), .kind = .warning, .all = true },
.{ .msg = "shift count >= width of type", .opt = W("shift-count-overflow"), .kind = .warning, .all = true },
.{ .msg = "ISO C does not support '~' for complex conjugation of '{s}'", .opt = W("pedantic"), .extra = .str, .kind = .off },
.{ .msg = "operand argument to overflow builtin must be an integer ('{s}' invalid)", .extra = .str, .kind = .@"error" },
.{ .msg = "result argument to overflow builtin must be a pointer to a non-const integer ('{s}' invalid)", .extra = .str, .kind = .@"error" },
};
};
};
 
lib/compiler/aro/aro/Driver.zig added: 876, removed: 300, total 576
@@ -12,6 +12,7 @@ const Preprocessor = @import("Preprocessor.zig");
const Source = @import("Source.zig");
const Toolchain = @import("Toolchain.zig");
const target_util = @import("target.zig");
const GCCVersion = @import("Driver/GCCVersion.zig");
 
pub const Linker = enum {
ld,
@@ -43,6 +44,9 @@ verbose_pp: bool = false,
verbose_ir: bool = false,
verbose_linker_args: bool = false,
color: ?bool = null,
nobuiltininc: bool = false,
nostdinc: bool = false,
nostdlibinc: bool = false,
 
/// Full path to the aro executable
aro_name: []const u8 = "",
@@ -95,6 +99,7 @@ pub const usage =
\\ -fcolor-diagnostics Enable colors in diagnostics
\\ -fno-color-diagnostics Disable colors in diagnostics
\\ -fdeclspec Enable support for __declspec attributes
\\ -fgnuc-version=<value> Controls value of __GNUC__ and related macros. Set to 0 or empty to disable them.
\\ -fno-declspec Disable support for __declspec attributes
\\ -ffp-eval-method=[source|double|extended]
\\ Evaluation method to use for floating-point arithmetic
@@ -127,6 +132,10 @@ pub const usage =
\\ -isystem Add directory to SYSTEM include search path
\\ --emulate=[clang|gcc|msvc]
\\ Select which C compiler to emulate (default clang)
\\ -nobuiltininc Do not search the compiler's builtin directory for include files
\\ -nostdinc, --no-standard-includes
\\ Do not search the standard system directories or compiler builtin directories for include files.
\\ -nostdlibinc Do not search the standard system directories for include files, but do search compiler builtin include directories
\\ -o <file> Write output to <file>
\\ -P, --no-line-commands Disable linemarker output in -E mode
\\ -pedantic Warn on language extensions
@@ -180,6 +189,7 @@ pub fn parseArgs(
var i: usize = 1;
var comment_arg: []const u8 = "";
var hosted: ?bool = null;
var gnuc_version: []const u8 = "4.2.1"; // default value set by clang
while (i < args.len) : (i += 1) {
const arg = args[i];
if (mem.startsWith(u8, arg, "-") and arg.len > 1) {
@@ -303,6 +313,10 @@ pub fn parseArgs(
d.only_syntax = true;
} else if (mem.startsWith(u8, arg, "-fno-syntax-only")) {
d.only_syntax = false;
} else if (mem.eql(u8, arg, "-fgnuc-version=")) {
gnuc_version = "0";
} else if (option(arg, "-fgnuc-version=")) |version| {
gnuc_version = version;
} else if (mem.startsWith(u8, arg, "-isystem")) {
var path = arg["-isystem".len..];
if (path.len == 0) {
@@ -421,6 +435,12 @@ pub fn parseArgs(
d.nodefaultlibs = true;
} else if (mem.eql(u8, arg, "-nolibc")) {
d.nolibc = true;
} else if (mem.eql(u8, arg, "-nobuiltininc")) {
d.nobuiltininc = true;
} else if (mem.eql(u8, arg, "-nostdinc") or mem.eql(u8, arg, "--no-standard-includes")) {
d.nostdinc = true;
} else if (mem.eql(u8, arg, "-nostdlibinc")) {
d.nostdlibinc = true;
} else if (mem.eql(u8, arg, "-nostdlib")) {
d.nostdlib = true;
} else if (mem.eql(u8, arg, "-nostartfiles")) {
@@ -459,6 +479,11 @@ pub fn parseArgs(
d.comp.target.os.tag = .freestanding;
}
}
const version = GCCVersion.parse(gnuc_version);
if (version.major == -1) {
return d.fatal("invalid value '{0s}' in '-fgnuc-version={0s}'", .{gnuc_version});
}
d.comp.langopts.gnuc_version = version.toUnsigned();
return false;
}
 
@@ -558,7 +583,8 @@ pub fn main(d: *Driver, tc: *Toolchain, args: []const []const u8, comptime fast_
try d.comp.addDiagnostic(.{ .tag = .cli_unused_link_object, .extra = .{ .str = obj } }, &.{});
};
 
d.comp.defineSystemIncludes(d.aro_name) catch |er| switch (er) {
try tc.discover();
tc.defineSystemIncludes() catch |er| switch (er) {
error.OutOfMemory => return error.OutOfMemory,
error.AroIncludeNotFound => return d.fatal("unable to find Aro builtin headers", .{}),
};
@@ -763,8 +789,6 @@ fn dumpLinkerArgs(items: []const []const u8) !void {
/// The entry point of the Aro compiler.
/// **MAY call `exit` if `fast_exit` is set.**
pub fn invokeLinker(d: *Driver, tc: *Toolchain, comptime fast_exit: bool) !void {
try tc.discover();
 
var argv = std.ArrayList([]const u8).init(d.comp.gpa);
defer argv.deinit();
 
 
lib/compiler/aro/aro/Driver/GCCVersion.zig added: 876, removed: 300, total 576
@@ -98,6 +98,16 @@ pub fn order(a: GCCVersion, b: GCCVersion) Order {
return .eq;
}
 
/// Used for determining __GNUC__ macro values
/// This matches clang's logic for overflowing values
pub fn toUnsigned(self: GCCVersion) u32 {
var result: u32 = 0;
if (self.major > 0) result = @as(u32, @intCast(self.major)) *% 10_000;
if (self.minor > 0) result +%= @as(u32, @intCast(self.minor)) *% 100;
if (self.patch > 0) result +%= @as(u32, @intCast(self.patch));
return result;
}
 
test parse {
const versions = [10]GCCVersion{
parse("5"),
 
filename was Deleted added: 876, removed: 300, total 576
@@ -0,0 +1,191 @@
//! A hideset is a linked list (implemented as an array so that elements are identified by 4-byte indices)
//! of the set of identifiers from which a token was expanded.
//! During macro expansion, if a token would otherwise be expanded, but its hideset contains
//! the token itself, then it is not expanded
//! Most tokens have an empty hideset, and the hideset is not needed once expansion is complete,
//! so we use a hash map to store them instead of directly storing them with the token.
//! The C standard underspecifies the algorithm for updating a token's hideset;
//! we use the one here: https://www.spinellis.gr/blog/20060626/cpp.algo.pdf
 
const std = @import("std");
const mem = std.mem;
const Allocator = mem.Allocator;
const Source = @import("Source.zig");
const Compilation = @import("Compilation.zig");
const Tokenizer = @import("Tokenizer.zig");
 
pub const Hideset = @This();
 
const Identifier = struct {
id: Source.Id = .unused,
byte_offset: u32 = 0,
 
fn slice(self: Identifier, comp: *const Compilation) []const u8 {
var tmp_tokenizer = Tokenizer{
.buf = comp.getSource(self.id).buf,
.langopts = comp.langopts,
.index = self.byte_offset,
.source = .generated,
};
const res = tmp_tokenizer.next();
return tmp_tokenizer.buf[res.start..res.end];
}
 
fn fromLocation(loc: Source.Location) Identifier {
return .{
.id = loc.id,
.byte_offset = loc.byte_offset,
};
}
};
 
const Item = struct {
identifier: Identifier = .{},
next: Index = .none,
 
const List = std.MultiArrayList(Item);
};
 
const Index = enum(u32) {
none = std.math.maxInt(u32),
_,
};
 
map: std.AutoHashMapUnmanaged(Identifier, Index) = .{},
/// Used for computing intersection of two lists; stored here so that allocations can be retained
/// until hideset is deinit'ed
intersection_map: std.AutoHashMapUnmanaged(Identifier, void) = .{},
linked_list: Item.List = .{},
comp: *const Compilation,
 
/// Invalidated if the underlying MultiArrayList slice is reallocated due to resize
const Iterator = struct {
slice: Item.List.Slice,
i: Index,
 
fn next(self: *Iterator) ?Identifier {
if (self.i == .none) return null;
defer self.i = self.slice.items(.next)[@intFromEnum(self.i)];
return self.slice.items(.identifier)[@intFromEnum(self.i)];
}
};
 
pub fn deinit(self: *Hideset) void {
self.map.deinit(self.comp.gpa);
self.intersection_map.deinit(self.comp.gpa);
self.linked_list.deinit(self.comp.gpa);
}
 
pub fn clearRetainingCapacity(self: *Hideset) void {
self.linked_list.shrinkRetainingCapacity(0);
self.map.clearRetainingCapacity();
}
 
pub fn clearAndFree(self: *Hideset) void {
self.map.clearAndFree(self.comp.gpa);
self.intersection_map.clearAndFree(self.comp.gpa);
self.linked_list.shrinkAndFree(self.comp.gpa, 0);
}
 
/// Iterator is invalidated if the underlying MultiArrayList slice is reallocated due to resize
fn iterator(self: *const Hideset, idx: Index) Iterator {
return Iterator{
.slice = self.linked_list.slice(),
.i = idx,
};
}
 
pub fn get(self: *const Hideset, loc: Source.Location) Index {
return self.map.get(Identifier.fromLocation(loc)) orelse .none;
}
 
pub fn put(self: *Hideset, loc: Source.Location, value: Index) !void {
try self.map.put(self.comp.gpa, Identifier.fromLocation(loc), value);
}
 
fn ensureUnusedCapacity(self: *Hideset, new_size: usize) !void {
try self.linked_list.ensureUnusedCapacity(self.comp.gpa, new_size);
}
 
/// Creates a one-item list with contents `identifier`
fn createNodeAssumeCapacity(self: *Hideset, identifier: Identifier) Index {
const next_idx = self.linked_list.len;
self.linked_list.appendAssumeCapacity(.{ .identifier = identifier });
return @enumFromInt(next_idx);
}
 
/// Create a new list with `identifier` at the front followed by `tail`
pub fn prepend(self: *Hideset, loc: Source.Location, tail: Index) !Index {
const new_idx = self.linked_list.len;
try self.linked_list.append(self.comp.gpa, .{ .identifier = Identifier.fromLocation(loc), .next = tail });
return @enumFromInt(new_idx);
}
 
/// Copy a, then attach b at the end
pub fn @"union"(self: *Hideset, a: Index, b: Index) !Index {
var cur: Index = .none;
var head: Index = b;
try self.ensureUnusedCapacity(self.len(a));
var it = self.iterator(a);
while (it.next()) |identifier| {
const new_idx = self.createNodeAssumeCapacity(identifier);
if (head == b) {
head = new_idx;
}
if (cur != .none) {
self.linked_list.items(.next)[@intFromEnum(cur)] = new_idx;
}
cur = new_idx;
}
if (cur != .none) {
self.linked_list.items(.next)[@intFromEnum(cur)] = b;
}
return head;
}
 
pub fn contains(self: *const Hideset, list: Index, str: []const u8) bool {
var it = self.iterator(list);
while (it.next()) |identifier| {
if (mem.eql(u8, str, identifier.slice(self.comp))) return true;
}
return false;
}
 
fn len(self: *const Hideset, list: Index) usize {
const nexts = self.linked_list.items(.next);
var cur = list;
var count: usize = 0;
while (cur != .none) : (count += 1) {
cur = nexts[@intFromEnum(cur)];
}
return count;
}
 
pub fn intersection(self: *Hideset, a: Index, b: Index) !Index {
if (a == .none or b == .none) return .none;
self.intersection_map.clearRetainingCapacity();
 
var cur: Index = .none;
var head: Index = .none;
var it = self.iterator(a);
var a_len: usize = 0;
while (it.next()) |identifier| : (a_len += 1) {
try self.intersection_map.put(self.comp.gpa, identifier, {});
}
try self.ensureUnusedCapacity(@min(a_len, self.len(b)));
 
it = self.iterator(b);
while (it.next()) |identifier| {
if (self.intersection_map.contains(identifier)) {
const new_idx = self.createNodeAssumeCapacity(identifier);
if (head == .none) {
head = new_idx;
}
if (cur != .none) {
self.linked_list.items(.next)[@intFromEnum(cur)] = new_idx;
}
cur = new_idx;
}
}
return head;
}
 
lib/compiler/aro/aro/LangOpts.zig added: 876, removed: 300, total 576
@@ -135,6 +135,11 @@ preserve_comments: bool = false,
/// Preserve comments in macros when preprocessing
preserve_comments_in_macros: bool = false,
 
/// Used ONLY for generating __GNUC__ and related macros. Does not control the presence/absence of any features
/// Encoded as major * 10,000 + minor * 100 + patch
/// e.g. 4.2.1 == 40201
gnuc_version: u32 = 0,
 
pub fn setStandard(self: *LangOpts, name: []const u8) error{InvalidStandard}!void {
self.standard = Standard.NameMap.get(name) orelse return error.InvalidStandard;
}
 
lib/compiler/aro/aro/Parser.zig added: 876, removed: 300, total 576
@@ -403,7 +403,7 @@ pub fn errExtra(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex, extra: Diag
.tag = tag,
.loc = loc,
.extra = extra,
}, tok.expansionSlice());
}, p.pp.expansionSlice(tok_i));
}
 
pub fn errTok(p: *Parser, tag: Diagnostics.Tag, tok_i: TokenIndex) Compilation.Error!void {
@@ -432,6 +432,11 @@ pub fn removeNull(p: *Parser, str: Value) !Value {
}
 
pub fn typeStr(p: *Parser, ty: Type) ![]const u8 {
if (@import("builtin").mode != .Debug) {
if (ty.is(.invalid)) {
return "Tried to render invalid type - this is an aro bug.";
}
}
if (Type.Builder.fromType(ty).str(p.comp.langopts)) |str| return str;
const strings_top = p.strings.items.len;
defer p.strings.items.len = strings_top;
@@ -446,6 +451,11 @@ pub fn typePairStr(p: *Parser, a: Type, b: Type) ![]const u8 {
}
 
pub fn typePairStrExtra(p: *Parser, a: Type, msg: []const u8, b: Type) ![]const u8 {
if (@import("builtin").mode != .Debug) {
if (a.is(.invalid) or b.is(.invalid)) {
return "Tried to render invalid type - this is an aro bug.";
}
}
const strings_top = p.strings.items.len;
defer p.strings.items.len = strings_top;
 
@@ -635,7 +645,6 @@ fn diagnoseIncompleteDefinitions(p: *Parser) !void {
const tys = node_slices.items(.ty);
const data = node_slices.items(.data);
 
const err_start = p.comp.diagnostics.list.items.len;
for (p.decl_buf.items) |decl_node| {
const idx = @intFromEnum(decl_node);
switch (tags[idx]) {
@@ -656,8 +665,6 @@ fn diagnoseIncompleteDefinitions(p: *Parser) !void {
try p.errStr(.tentative_definition_incomplete, tentative_def_tok, type_str);
try p.errStr(.forward_declaration_here, data[idx].decl_ref, type_str);
}
const errors_added = p.comp.diagnostics.list.items.len - err_start;
assert(errors_added == 2 * p.tentative_defs.count()); // Each tentative def should add an error + note
}
 
/// root : (decl | assembly ';' | staticAssert)*
@@ -2201,7 +2208,15 @@ fn recordSpec(p: *Parser) Error!Type {
} else {
record_ty.fields = try p.arena.dupe(Type.Record.Field, p.record_buf.items[record_buf_top..]);
}
if (old_field_attr_start < p.field_attr_buf.items.len) {
const attr_count = p.field_attr_buf.items.len - old_field_attr_start;
const record_decls = p.decl_buf.items[decl_buf_top..];
if (attr_count > 0) {
if (attr_count != record_decls.len) {
// A mismatch here means that non-field decls were parsed. This can happen if there were
// parse errors during attribute parsing. Bail here because if there are any field attributes,
// there must be exactly one per field.
return error.ParsingFailed;
}
const field_attr_slice = p.field_attr_buf.items[old_field_attr_start..];
const duped = try p.arena.dupe([]const Attribute, field_attr_slice);
record_ty.field_attributes = duped.ptr;
@@ -2242,7 +2257,6 @@ fn recordSpec(p: *Parser) Error!Type {
.ty = ty,
.data = .{ .bin = .{ .lhs = .none, .rhs = .none } },
};
const record_decls = p.decl_buf.items[decl_buf_top..];
switch (record_decls.len) {
0 => {},
1 => node.data = .{ .bin = .{ .lhs = record_decls[0], .rhs = .none } },
@@ -2560,6 +2574,7 @@ fn enumSpec(p: *Parser) Error!Type {
if (field.ty.eql(Type.int, p.comp, false)) continue;
 
const sym = p.syms.get(field.name, .vars) orelse continue;
if (sym.kind != .enumeration) continue; // already an error
 
var res = Result{ .node = field.node, .ty = field.ty, .val = sym.val };
const dest_ty = if (p.comp.fixedEnumTagSpecifier()) |some|
@@ -4603,24 +4618,31 @@ fn nodeIsNoreturn(p: *Parser, node: NodeIndex) NoreturnKind {
},
.compound_stmt_two => {
const data = p.nodes.items(.data)[@intFromEnum(node)];
if (data.bin.rhs != .none) return p.nodeIsNoreturn(data.bin.rhs);
if (data.bin.lhs != .none) return p.nodeIsNoreturn(data.bin.lhs);
const lhs_type = if (data.bin.lhs != .none) p.nodeIsNoreturn(data.bin.lhs) else .no;
const rhs_type = if (data.bin.rhs != .none) p.nodeIsNoreturn(data.bin.rhs) else .no;
if (lhs_type == .complex or rhs_type == .complex) return .complex;
if (lhs_type == .yes or rhs_type == .yes) return .yes;
return .no;
},
.compound_stmt => {
const data = p.nodes.items(.data)[@intFromEnum(node)];
return p.nodeIsNoreturn(p.data.items[data.range.end - 1]);
var it = data.range.start;
while (it != data.range.end) : (it += 1) {
const kind = p.nodeIsNoreturn(p.data.items[it]);
if (kind != .no) return kind;
}
return .no;
},
.labeled_stmt => {
const data = p.nodes.items(.data)[@intFromEnum(node)];
return p.nodeIsNoreturn(data.decl.node);
},
.switch_stmt => {
.default_stmt => {
const data = p.nodes.items(.data)[@intFromEnum(node)];
if (data.bin.rhs == .none) return .complex;
if (p.nodeIsNoreturn(data.bin.rhs) == .yes) return .yes;
return .complex;
if (data.un == .none) return .no;
return p.nodeIsNoreturn(data.un);
},
.while_stmt, .do_while_stmt, .for_decl_stmt, .forever_stmt, .for_stmt, .switch_stmt => return .complex,
else => return .no,
}
}
@@ -4787,7 +4809,11 @@ const CallExpr = union(enum) {
Builtin.tagFromName("__va_start").?,
Builtin.tagFromName("va_start").?,
=> arg_idx != 1,
Builtin.tagFromName("__builtin_complex").? => false,
Builtin.tagFromName("__builtin_complex").?,
Builtin.tagFromName("__builtin_add_overflow").?,
Builtin.tagFromName("__builtin_sub_overflow").?,
Builtin.tagFromName("__builtin_mul_overflow").?,
=> false,
else => true,
},
};
@@ -4800,6 +4826,7 @@ const CallExpr = union(enum) {
}
 
fn checkVarArg(self: CallExpr, p: *Parser, first_after: TokenIndex, param_tok: TokenIndex, arg: *Result, arg_idx: u32) !void {
@setEvalBranchQuota(10_000);
if (self == .standard) return;
 
const builtin_tok = p.nodes.items(.data)[@intFromEnum(self.builtin.node)].decl.name;
@@ -4809,6 +4836,11 @@ const CallExpr = union(enum) {
Builtin.tagFromName("va_start").?,
=> return p.checkVaStartArg(builtin_tok, first_after, param_tok, arg, arg_idx),
Builtin.tagFromName("__builtin_complex").? => return p.checkComplexArg(builtin_tok, first_after, param_tok, arg, arg_idx),
Builtin.tagFromName("__builtin_add_overflow").?,
Builtin.tagFromName("__builtin_sub_overflow").?,
Builtin.tagFromName("__builtin_mul_overflow").?,
=> return p.checkArithOverflowArg(builtin_tok, first_after, param_tok, arg, arg_idx),
 
else => {},
}
}
@@ -4823,16 +4855,44 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => null,
.builtin => |builtin| switch (builtin.tag) {
Builtin.tagFromName("__builtin_complex").? => 2,
Builtin.tagFromName("__c11_atomic_thread_fence").?,
Builtin.tagFromName("__c11_atomic_signal_fence").?,
Builtin.tagFromName("__c11_atomic_is_lock_free").?,
=> 1,
 
Builtin.tagFromName("__builtin_complex").?,
Builtin.tagFromName("__c11_atomic_load").?,
Builtin.tagFromName("__c11_atomic_init").?,
=> 2,
 
Builtin.tagFromName("__c11_atomic_store").?,
Builtin.tagFromName("__c11_atomic_exchange").?,
Builtin.tagFromName("__c11_atomic_fetch_add").?,
Builtin.tagFromName("__c11_atomic_fetch_sub").?,
Builtin.tagFromName("__c11_atomic_fetch_or").?,
Builtin.tagFromName("__c11_atomic_fetch_xor").?,
Builtin.tagFromName("__c11_atomic_fetch_and").?,
Builtin.tagFromName("__atomic_fetch_add").?,
Builtin.tagFromName("__atomic_fetch_sub").?,
Builtin.tagFromName("__atomic_fetch_and").?,
Builtin.tagFromName("__atomic_fetch_xor").?,
Builtin.tagFromName("__atomic_fetch_or").?,
Builtin.tagFromName("__atomic_fetch_nand").?,
Builtin.tagFromName("__atomic_add_fetch").?,
Builtin.tagFromName("__atomic_sub_fetch").?,
Builtin.tagFromName("__atomic_and_fetch").?,
Builtin.tagFromName("__atomic_xor_fetch").?,
Builtin.tagFromName("__atomic_or_fetch").?,
Builtin.tagFromName("__atomic_nand_fetch").?,
Builtin.tagFromName("__builtin_add_overflow").?,
Builtin.tagFromName("__builtin_sub_overflow").?,
Builtin.tagFromName("__builtin_mul_overflow").?,
=> 3,
 
Builtin.tagFromName("__c11_atomic_compare_exchange_strong").?,
Builtin.tagFromName("__c11_atomic_compare_exchange_weak").?,
=> 5,
 
Builtin.tagFromName("__atomic_compare_exchange").?,
Builtin.tagFromName("__atomic_compare_exchange_n").?,
=> 6,
@@ -4845,15 +4905,45 @@ const CallExpr = union(enum) {
return switch (self) {
.standard => callable_ty.returnType(),
.builtin => |builtin| switch (builtin.tag) {
Builtin.tagFromName("__c11_atomic_exchange").? => {
if (p.list_buf.items.len != 4) return Type.invalid; // wrong number of arguments; already an error
const second_param = p.list_buf.items[2];
return p.nodes.items(.ty)[@intFromEnum(second_param)];
},
Builtin.tagFromName("__c11_atomic_load").? => {
if (p.list_buf.items.len != 3) return Type.invalid; // wrong number of arguments; already an error
const first_param = p.list_buf.items[1];
const ty = p.nodes.items(.ty)[@intFromEnum(first_param)];
if (!ty.isPtr()) return Type.invalid;
return ty.elemType();
},
 
Builtin.tagFromName("__atomic_fetch_add").?,
Builtin.tagFromName("__atomic_add_fetch").?,
Builtin.tagFromName("__c11_atomic_fetch_add").?,
 
Builtin.tagFromName("__atomic_fetch_sub").?,
Builtin.tagFromName("__atomic_sub_fetch").?,
Builtin.tagFromName("__c11_atomic_fetch_sub").?,
 
Builtin.tagFromName("__atomic_fetch_and").?,
Builtin.tagFromName("__atomic_and_fetch").?,
Builtin.tagFromName("__c11_atomic_fetch_and").?,
 
Builtin.tagFromName("__atomic_fetch_xor").?,
Builtin.tagFromName("__atomic_xor_fetch").?,
Builtin.tagFromName("__c11_atomic_fetch_xor").?,
 
Builtin.tagFromName("__atomic_fetch_or").?,
Builtin.tagFromName("__atomic_or_fetch").?,
Builtin.tagFromName("__c11_atomic_fetch_or").?,
 
Builtin.tagFromName("__atomic_fetch_nand").?,
Builtin.tagFromName("__atomic_nand_fetch").?,
Builtin.tagFromName("__c11_atomic_fetch_nand").?,
=> {
if (p.list_buf.items.len < 2) return Type.invalid; // not enough arguments; already an error
const second_param = p.list_buf.items[p.list_buf.items.len - 2];
if (p.list_buf.items.len != 3) return Type.invalid; // wrong number of arguments; already an error
const second_param = p.list_buf.items[2];
return p.nodes.items(.ty)[@intFromEnum(second_param)];
},
Builtin.tagFromName("__builtin_complex").? => {
@@ -4863,8 +4953,17 @@ const CallExpr = union(enum) {
},
Builtin.tagFromName("__atomic_compare_exchange").?,
Builtin.tagFromName("__atomic_compare_exchange_n").?,
Builtin.tagFromName("__c11_atomic_is_lock_free").?,
=> .{ .specifier = .bool },
else => callable_ty.returnType(),
 
Builtin.tagFromName("__c11_atomic_compare_exchange_strong").?,
Builtin.tagFromName("__c11_atomic_compare_exchange_weak").?,
=> {
if (p.list_buf.items.len != 6) return Type.invalid; // wrong number of arguments
const third_param = p.list_buf.items[3];
return p.nodes.items(.ty)[@intFromEnum(third_param)];
},
},
};
}
@@ -4975,15 +5074,19 @@ pub const Result = struct {
.call_expr_one => {
const fn_ptr = p.nodes.items(.data)[@intFromEnum(cur_node)].bin.lhs;
const fn_ty = p.nodes.items(.ty)[@intFromEnum(fn_ptr)].elemType();
if (fn_ty.hasAttribute(.nodiscard)) try p.errStr(.nodiscard_unused, expr_start, "TODO get name");
if (fn_ty.hasAttribute(.warn_unused_result)) try p.errStr(.warn_unused_result, expr_start, "TODO get name");
const cast_info = p.nodes.items(.data)[@intFromEnum(fn_ptr)].cast.operand;
const decl_ref = p.nodes.items(.data)[@intFromEnum(cast_info)].decl_ref;
if (fn_ty.hasAttribute(.nodiscard)) try p.errStr(.nodiscard_unused, expr_start, p.tokSlice(decl_ref));
if (fn_ty.hasAttribute(.warn_unused_result)) try p.errStr(.warn_unused_result, expr_start, p.tokSlice(decl_ref));
return;
},
.call_expr => {
const fn_ptr = p.data.items[p.nodes.items(.data)[@intFromEnum(cur_node)].range.start];
const fn_ty = p.nodes.items(.ty)[@intFromEnum(fn_ptr)].elemType();
if (fn_ty.hasAttribute(.nodiscard)) try p.errStr(.nodiscard_unused, expr_start, "TODO get name");
if (fn_ty.hasAttribute(.warn_unused_result)) try p.errStr(.warn_unused_result, expr_start, "TODO get name");
const cast_info = p.nodes.items(.data)[@intFromEnum(fn_ptr)].cast.operand;
const decl_ref = p.nodes.items(.data)[@intFromEnum(cast_info)].decl_ref;
if (fn_ty.hasAttribute(.nodiscard)) try p.errStr(.nodiscard_unused, expr_start, p.tokSlice(decl_ref));
if (fn_ty.hasAttribute(.warn_unused_result)) try p.errStr(.warn_unused_result, expr_start, p.tokSlice(decl_ref));
return;
},
.stmt_expr => {
@@ -6356,8 +6459,15 @@ fn shiftExpr(p: *Parser) Error!Result {
try rhs.expect(p);
 
if (try lhs.adjustTypes(shr.?, &rhs, p, .integer)) {
if (rhs.val.compare(.lt, Value.zero, p.comp)) {
try p.errStr(.negative_shift_count, shl orelse shr.?, try rhs.str(p));
}
if (rhs.val.compare(.gte, try Value.int(lhs.ty.bitSizeof(p.comp).?, p.comp), p.comp)) {
try p.errStr(.too_big_shift_count, shl orelse shr.?, try rhs.str(p));
}
if (shl != null) {
if (try lhs.val.shl(lhs.val, rhs.val, lhs.ty, p.comp)) try p.errOverflow(shl.?, lhs);
if (try lhs.val.shl(lhs.val, rhs.val, lhs.ty, p.comp) and
lhs.ty.signedness(p.comp) != .unsigned) try p.errOverflow(shl.?, lhs);
} else {
lhs.val = try lhs.val.shr(rhs.val, lhs.ty, p.comp);
}
@@ -6381,9 +6491,11 @@ fn addExpr(p: *Parser) Error!Result {
const lhs_ty = lhs.ty;
if (try lhs.adjustTypes(minus.?, &rhs, p, if (plus != null) .add else .sub)) {
if (plus != null) {
if (try lhs.val.add(lhs.val, rhs.val, lhs.ty, p.comp)) try p.errOverflow(plus.?, lhs);
if (try lhs.val.add(lhs.val, rhs.val, lhs.ty, p.comp) and
lhs.ty.signedness(p.comp) != .unsigned) try p.errOverflow(plus.?, lhs);
} else {
if (try lhs.val.sub(lhs.val, rhs.val, lhs.ty, p.comp)) try p.errOverflow(minus.?, lhs);
if (try lhs.val.sub(lhs.val, rhs.val, lhs.ty, p.comp) and
lhs.ty.signedness(p.comp) != .unsigned) try p.errOverflow(minus.?, lhs);
}
}
if (lhs.ty.specifier != .invalid and lhs_ty.isPtr() and !lhs_ty.isVoidStar() and lhs_ty.elemType().hasIncompleteSize()) {
@@ -6420,9 +6532,11 @@ fn mulExpr(p: *Parser) Error!Result {
 
if (try lhs.adjustTypes(percent.?, &rhs, p, if (tag == .mod_expr) .integer else .arithmetic)) {
if (mul != null) {
if (try lhs.val.mul(lhs.val, rhs.val, lhs.ty, p.comp)) try p.errOverflow(mul.?, lhs);
if (try lhs.val.mul(lhs.val, rhs.val, lhs.ty, p.comp) and
lhs.ty.signedness(p.comp) != .unsigned) try p.errOverflow(mul.?, lhs);
} else if (div != null) {
if (try lhs.val.div(lhs.val, rhs.val, lhs.ty, p.comp)) try p.errOverflow(mul.?, lhs);
if (try lhs.val.div(lhs.val, rhs.val, lhs.ty, p.comp) and
lhs.ty.signedness(p.comp) != .unsigned) try p.errOverflow(mul.?, lhs);
} else {
var res = try Value.rem(lhs.val, rhs.val, lhs.ty, p.comp);
if (res.opt_ref == .none) {
@@ -6827,7 +6941,7 @@ fn unExpr(p: *Parser) Error!Result {
try p.errStr(.invalid_argument_un, tok, try p.typeStr(operand.ty));
 
try operand.usualUnaryConversion(p, tok);
if (operand.val.is(.int, p.comp)) {
if (operand.val.is(.int, p.comp) or operand.val.is(.float, p.comp)) {
_ = try operand.val.sub(Value.zero, operand.val, operand.ty, p.comp);
} else {
operand.val = .{};
@@ -6898,6 +7012,8 @@ fn unExpr(p: *Parser) Error!Result {
if (operand.val.is(.int, p.comp)) {
operand.val = try operand.val.bitNot(operand.ty, p.comp);
}
} else if (operand.ty.isComplex()) {
try p.errStr(.complex_conj, tok, try p.typeStr(operand.ty));
} else {
try p.errStr(.invalid_argument_un, tok, try p.typeStr(operand.ty));
operand.val = .{};
@@ -7334,6 +7450,20 @@ fn checkVaStartArg(p: *Parser, builtin_tok: TokenIndex, first_after: TokenIndex,
}
}
 
fn checkArithOverflowArg(p: *Parser, builtin_tok: TokenIndex, first_after: TokenIndex, param_tok: TokenIndex, arg: *Result, idx: u32) !void {
_ = builtin_tok;
_ = first_after;
if (idx <= 1) {
if (!arg.ty.isInt()) {
return p.errStr(.overflow_builtin_requires_int, param_tok, try p.typeStr(arg.ty));
}
} else if (idx == 2) {
if (!arg.ty.isPtr()) return p.errStr(.overflow_result_requires_ptr, param_tok, try p.typeStr(arg.ty));
const child = arg.ty.elemType();
if (!child.isInt() or child.is(.bool) or child.is(.@"enum") or child.qual.@"const") return p.errStr(.overflow_result_requires_ptr, param_tok, try p.typeStr(arg.ty));
}
}
 
fn checkComplexArg(p: *Parser, builtin_tok: TokenIndex, first_after: TokenIndex, param_tok: TokenIndex, arg: *Result, idx: u32) !void {
_ = builtin_tok;
_ = first_after;
@@ -7880,6 +8010,7 @@ fn charLiteral(p: *Parser) Error!Result {
 
const slice = char_kind.contentSlice(p.tokSlice(p.tok_i));
 
var is_multichar = false;
if (slice.len == 1 and std.ascii.isASCII(slice[0])) {
// fast path: single unescaped ASCII char
val = slice[0];
@@ -7913,7 +8044,7 @@ fn charLiteral(p: *Parser) Error!Result {
},
};
 
const is_multichar = chars.items.len > 1;
is_multichar = chars.items.len > 1;
if (is_multichar) {
if (char_kind == .char and chars.items.len == 4) {
char_literal_parser.warn(.four_char_char_literal, .{ .none = {} });
@@ -7956,9 +8087,19 @@ fn charLiteral(p: *Parser) Error!Result {
else
p.comp.types.intmax;
 
var value = try Value.int(val, p.comp);
// C99 6.4.4.4.10
// > If an integer character constant contains a single character or escape sequence,
// > its value is the one that results when an object with type char whose value is
// > that of the single character or escape sequence is converted to type int.
// This conversion only matters if `char` is signed and has a high-order bit of `1`
if (char_kind == .char and !is_multichar and val > 0x7F and p.comp.getCharSignedness() == .signed) {
try value.intCast(.{ .specifier = .char }, p.comp);
}
 
const res = Result{
.ty = if (p.in_macro) macro_ty else ty,
.val = try Value.int(val, p.comp),
.val = value,
.node = try p.addNode(.{ .tag = .char_literal, .ty = ty, .data = undefined }),
};
if (!p.in_macro) try p.value_map.put(res.node, res.val);
 
lib/compiler/aro/aro/Preprocessor.zig added: 876, removed: 300, total 576
@@ -9,9 +9,12 @@ const Tokenizer = @import("Tokenizer.zig");
const RawToken = Tokenizer.Token;
const Parser = @import("Parser.zig");
const Diagnostics = @import("Diagnostics.zig");
const Token = @import("Tree.zig").Token;
const Tree = @import("Tree.zig");
const Token = Tree.Token;
const TokenWithExpansionLocs = Tree.TokenWithExpansionLocs;
const Attribute = @import("Attribute.zig");
const features = @import("features.zig");
const Hideset = @import("Hideset.zig");
 
const DefineMap = std.StringHashMapUnmanaged(Macro);
const RawTokenList = std.ArrayList(RawToken);
@@ -40,8 +43,6 @@ const Macro = struct {
 
/// Location of macro in the source
loc: Source.Location,
start: u32,
end: u32,
 
fn eql(a: Macro, b: Macro, pp: *Preprocessor) bool {
if (a.tokens.len != b.tokens.len) return false;
@@ -64,11 +65,24 @@ const Macro = struct {
 
const Preprocessor = @This();
 
const ExpansionEntry = struct {
idx: Tree.TokenIndex,
locs: [*]Source.Location,
};
 
const TokenState = struct {
tokens_len: usize,
expansion_entries_len: usize,
};
 
comp: *Compilation,
gpa: mem.Allocator,
arena: std.heap.ArenaAllocator,
defines: DefineMap = .{},
/// Do not directly mutate this; use addToken / addTokenAssumeCapacity / ensureTotalTokenCapacity / ensureUnusedTokenCapacity
tokens: Token.List = .{},
/// Do not directly mutate this; must be kept in sync with `tokens`
expansion_entries: std.MultiArrayList(ExpansionEntry) = .{},
token_buf: RawTokenList,
char_buf: std.ArrayList(u8),
/// Counter that is incremented each time preprocess() is called
@@ -93,6 +107,8 @@ preserve_whitespace: bool = false,
/// linemarker tokens. Must be .none unless in -E mode (parser does not handle linemarkers)
linemarkers: Linemarkers = .none,
 
hideset: Hideset,
 
pub const parse = Parser.parse;
 
pub const Linemarkers = enum {
@@ -113,6 +129,7 @@ pub fn init(comp: *Compilation) Preprocessor {
.char_buf = std.ArrayList(u8).init(comp.gpa),
.poisoned_identifiers = std.StringHashMap(void).init(comp.gpa),
.top_expansion_buf = ExpandBuf.init(comp.gpa),
.hideset = .{ .comp = comp },
};
comp.pragmaEvent(.before_preprocess);
return pp;
@@ -201,8 +218,6 @@ fn addBuiltinMacro(pp: *Preprocessor, name: []const u8, is_func: bool, tokens: [
.var_args = false,
.is_func = is_func,
.loc = .{ .id = .generated },
.start = 0,
.end = 0,
.is_builtin = true,
});
}
@@ -228,7 +243,6 @@ pub fn addBuiltinMacros(pp: *Preprocessor) !void {
 
pub fn deinit(pp: *Preprocessor) void {
pp.defines.deinit(pp.gpa);
for (pp.tokens.items(.expansion_locs)) |loc| Token.free(loc, pp.gpa);
pp.tokens.deinit(pp.gpa);
pp.arena.deinit();
pp.token_buf.deinit();
@@ -236,6 +250,33 @@ pub fn deinit(pp: *Preprocessor) void {
pp.poisoned_identifiers.deinit();
pp.include_guards.deinit(pp.gpa);
pp.top_expansion_buf.deinit();
pp.hideset.deinit();
for (pp.expansion_entries.items(.locs)) |locs| TokenWithExpansionLocs.free(locs, pp.gpa);
pp.expansion_entries.deinit(pp.gpa);
}
 
/// Free buffers that are not needed after preprocessing
fn clearBuffers(pp: *Preprocessor) void {
pp.token_buf.clearAndFree();
pp.char_buf.clearAndFree();
pp.top_expansion_buf.clearAndFree();
pp.hideset.clearAndFree();
}
 
pub fn expansionSlice(pp: *Preprocessor, tok: Tree.TokenIndex) []Source.Location {
const S = struct {
fn order_token_index(context: void, lhs: Tree.TokenIndex, rhs: Tree.TokenIndex) std.math.Order {
_ = context;
return std.math.order(lhs, rhs);
}
};
 
const indices = pp.expansion_entries.items(.idx);
const idx = std.sort.binarySearch(Tree.TokenIndex, tok, indices, {}, S.order_token_index) orelse return &.{};
const locs = pp.expansion_entries.items(.locs)[idx];
var i: usize = 0;
while (locs[i].id != .unused) : (i += 1) {}
return locs[0..i];
}
 
/// Preprocess a compilation unit of sources into a parsable list of tokens.
@@ -247,13 +288,14 @@ pub fn preprocessSources(pp: *Preprocessor, sources: []const Source) Error!void
try pp.addIncludeStart(header);
_ = try pp.preprocess(header);
}
try pp.addIncludeResume(first.id, 0, 0);
try pp.addIncludeResume(first.id, 0, 1);
const eof = try pp.preprocess(first);
try pp.tokens.append(pp.comp.gpa, eof);
try pp.addToken(eof);
pp.clearBuffers();
}
 
/// Preprocess a source file, returns eof token.
pub fn preprocess(pp: *Preprocessor, source: Source) Error!Token {
pub fn preprocess(pp: *Preprocessor, source: Source) Error!TokenWithExpansionLocs {
const eof = pp.preprocessExtra(source) catch |er| switch (er) {
// This cannot occur in the main file and is handled in `include`.
error.StopPreprocessing => unreachable,
@@ -275,27 +317,27 @@ pub fn tokenize(pp: *Preprocessor, source: Source) Error!Token {
 
// Estimate how many new tokens this source will contain.
const estimated_token_count = source.buf.len / 8;
try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count);
try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);
 
while (true) {
const tok = tokenizer.next();
if (tok.id == .eof) return tokFromRaw(tok);
try pp.tokens.append(pp.gpa, tokFromRaw(tok));
try pp.addToken(tokFromRaw(tok));
}
}
 
pub fn addIncludeStart(pp: *Preprocessor, source: Source) !void {
if (pp.linemarkers == .none) return;
try pp.tokens.append(pp.gpa, .{ .id = .include_start, .loc = .{
try pp.addToken(.{ .id = .include_start, .loc = .{
.id = source.id,
.byte_offset = std.math.maxInt(u32),
.line = 0,
.line = 1,
} });
}
 
pub fn addIncludeResume(pp: *Preprocessor, source: Source.Id, offset: u32, line: u32) !void {
if (pp.linemarkers == .none) return;
try pp.tokens.append(pp.gpa, .{ .id = .include_resume, .loc = .{
try pp.addToken(.{ .id = .include_resume, .loc = .{
.id = source,
.byte_offset = offset,
.line = line,
@@ -328,7 +370,7 @@ fn findIncludeGuard(pp: *Preprocessor, source: Source) ?[]const u8 {
return pp.tokSlice(guard);
}
 
fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!TokenWithExpansionLocs {
var guard_name = pp.findIncludeGuard(source);
 
pp.preprocess_count += 1;
@@ -340,7 +382,7 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
 
// Estimate how many new tokens this source will contain.
const estimated_token_count = source.buf.len / 8;
try pp.tokens.ensureTotalCapacity(pp.gpa, pp.tokens.len + estimated_token_count);
try pp.ensureTotalTokenCapacity(pp.tokens.len + estimated_token_count);
 
var if_level: u8 = 0;
var if_kind = std.PackedIntArray(u2, 256).init([1]u2{0} ** 256);
@@ -352,7 +394,7 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
while (true) {
var tok = tokenizer.next();
switch (tok.id) {
.hash => if (!start_of_line) try pp.tokens.append(pp.gpa, tokFromRaw(tok)) else {
.hash => if (!start_of_line) try pp.addToken(tokFromRaw(tok)) else {
const directive = tokenizer.nextNoWS();
switch (directive.id) {
.keyword_error, .keyword_warning => {
@@ -654,13 +696,13 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
}
if (pp.preserve_whitespace) {
tok.id = .nl;
try pp.tokens.append(pp.gpa, tokFromRaw(tok));
try pp.addToken(tokFromRaw(tok));
}
},
.whitespace => if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok)),
.whitespace => if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok)),
.nl => {
start_of_line = true;
if (pp.preserve_whitespace) try pp.tokens.append(pp.gpa, tokFromRaw(tok));
if (pp.preserve_whitespace) try pp.addToken(tokFromRaw(tok));
},
.eof => {
if (if_level != 0) try pp.err(tok, .unterminated_conditional_directive);
@@ -696,14 +738,14 @@ fn preprocessExtra(pp: *Preprocessor, source: Source) MacroError!Token {
 
/// Get raw token source string.
/// Returned slice is invalidated when comp.generated_buf is updated.
pub fn tokSlice(pp: *Preprocessor, token: RawToken) []const u8 {
pub fn tokSlice(pp: *Preprocessor, token: anytype) []const u8 {
if (token.id.lexeme()) |some| return some;
const source = pp.comp.getSource(token.source);
return source.buf[token.start..token.end];
}
 
/// Convert a token from the Tokenizer into a token used by the parser.
fn tokFromRaw(raw: RawToken) Token {
fn tokFromRaw(raw: RawToken) TokenWithExpansionLocs {
return .{
.id = raw.id,
.loc = .{
@@ -725,7 +767,7 @@ fn err(pp: *Preprocessor, raw: RawToken, tag: Diagnostics.Tag) !void {
}, &.{});
}
 
fn errStr(pp: *Preprocessor, tok: Token, tag: Diagnostics.Tag, str: []const u8) !void {
fn errStr(pp: *Preprocessor, tok: TokenWithExpansionLocs, tag: Diagnostics.Tag, str: []const u8) !void {
try pp.comp.addDiagnostic(.{
.tag = tag,
.loc = tok.loc,
@@ -747,7 +789,7 @@ fn fatal(pp: *Preprocessor, raw: RawToken, comptime fmt: []const u8, args: anyty
return error.FatalError;
}
 
fn fatalNotFound(pp: *Preprocessor, tok: Token, filename: []const u8) Compilation.Error {
fn fatalNotFound(pp: *Preprocessor, tok: TokenWithExpansionLocs, filename: []const u8) Compilation.Error {
const old = pp.comp.diagnostics.fatal_errors;
pp.comp.diagnostics.fatal_errors = true;
defer pp.comp.diagnostics.fatal_errors = old;
@@ -790,7 +832,7 @@ fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
while (true) {
const tok = tokenizer.next();
if (tok.id == .nl or tok.id == .eof) return;
if (tok.id == .whitespace) continue;
if (tok.id == .whitespace or tok.id == .comment) continue;
if (!sent_err) {
sent_err = true;
try pp.err(tok, .extra_tokens_directive_end);
@@ -798,12 +840,24 @@ fn expectNl(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
}
}
 
fn getTokenState(pp: *const Preprocessor) TokenState {
return .{
.tokens_len = pp.tokens.len,
.expansion_entries_len = pp.expansion_entries.len,
};
}
 
fn restoreTokenState(pp: *Preprocessor, state: TokenState) void {
pp.tokens.len = state.tokens_len;
pp.expansion_entries.len = state.expansion_entries_len;
}
 
/// Consume all tokens until a newline and parse the result into a boolean.
fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
const start = pp.tokens.len;
const token_state = pp.getTokenState();
defer {
for (pp.top_expansion_buf.items) |tok| Token.free(tok.expansion_locs, pp.gpa);
pp.tokens.len = start;
for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
pp.restoreTokenState(token_state);
}
 
pp.top_expansion_buf.items.len = 0;
@@ -818,6 +872,7 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
} else unreachable;
if (pp.top_expansion_buf.items.len != 0) {
pp.expansion_source_loc = pp.top_expansion_buf.items[0].loc;
pp.hideset.clearRetainingCapacity();
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, pp.top_expansion_buf.items.len, false, .expr);
}
for (pp.top_expansion_buf.items) |tok| {
@@ -836,7 +891,7 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
}
 
// validate the tokens in the expression
try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.top_expansion_buf.items.len);
try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
var i: usize = 0;
const items = pp.top_expansion_buf.items;
while (i < items.len) : (i += 1) {
@@ -905,9 +960,9 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
}
},
}
pp.tokens.appendAssumeCapacity(tok);
pp.addTokenAssumeCapacity(tok);
}
try pp.tokens.append(pp.gpa, .{
try pp.addToken(.{
.id = .eof,
.loc = tokFromRaw(eof).loc,
});
@@ -918,7 +973,7 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
.comp = pp.comp,
.gpa = pp.gpa,
.tok_ids = pp.tokens.items(.id),
.tok_i = @intCast(start),
.tok_i = @intCast(token_state.tokens_len),
.arena = pp.arena.allocator(),
.in_macro = true,
.strings = std.ArrayList(u8).init(pp.comp.gpa),
@@ -941,7 +996,7 @@ fn expr(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!bool {
 
/// Turns macro_tok from .keyword_defined into .zero or .one depending on whether the argument is defined
/// Returns the number of tokens consumed
fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *Token, tokens: []const Token, eof: RawToken) !usize {
fn handleKeywordDefined(pp: *Preprocessor, macro_tok: *TokenWithExpansionLocs, tokens: []const TokenWithExpansionLocs, eof: RawToken) !usize {
std.debug.assert(macro_tok.id == .keyword_defined);
var it = TokenIterator.init(tokens);
const first = it.nextNoWS() orelse {
@@ -1056,7 +1111,7 @@ fn skip(
tokenizer.index += 1;
tokenizer.line += 1;
if (pp.preserve_whitespace) {
try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{
try pp.addToken(.{ .id = .nl, .loc = .{
.id = tokenizer.source,
.line = tokenizer.line,
} });
@@ -1079,21 +1134,21 @@ fn skipToNl(tokenizer: *Tokenizer) void {
}
}
 
const ExpandBuf = std.ArrayList(Token);
const ExpandBuf = std.ArrayList(TokenWithExpansionLocs);
fn removePlacemarkers(buf: *ExpandBuf) void {
var i: usize = buf.items.len -% 1;
while (i < buf.items.len) : (i -%= 1) {
if (buf.items[i].id == .placemarker) {
const placemarker = buf.orderedRemove(i);
Token.free(placemarker.expansion_locs, buf.allocator);
TokenWithExpansionLocs.free(placemarker.expansion_locs, buf.allocator);
}
}
}
 
const MacroArguments = std.ArrayList([]const Token);
const MacroArguments = std.ArrayList([]const TokenWithExpansionLocs);
fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void {
for (args.items) |item| {
for (item) |tok| Token.free(tok.expansion_locs, allocator);
for (item) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, allocator);
allocator.free(item);
}
args.deinit();
@@ -1102,6 +1157,10 @@ fn deinitMacroArguments(allocator: Allocator, args: *const MacroArguments) void
fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf {
var buf = ExpandBuf.init(pp.gpa);
errdefer buf.deinit();
if (simple_macro.tokens.len == 0) {
try buf.append(.{ .id = .placemarker, .loc = .{ .id = .generated } });
return buf;
}
try buf.ensureTotalCapacity(simple_macro.tokens.len);
 
// Add all of the simple_macros tokens to the new buffer handling any concats.
@@ -1161,7 +1220,7 @@ fn expandObjMacro(pp: *Preprocessor, simple_macro: *const Macro) Error!ExpandBuf
/// Returns error.ExpectedStringLiteral if parentheses are not balanced, a non-string-literal
/// is encountered, or if no string literals are encountered
/// TODO: destringize (replace all '\\' with a single `\` and all '\"' with a '"')
fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const Token) ![]const u8 {
fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const TokenWithExpansionLocs) ![]const u8 {
const char_top = pp.char_buf.items.len;
defer pp.char_buf.items.len = char_top;
var unwrapped = toks;
@@ -1180,7 +1239,7 @@ fn pasteStringsUnsafe(pp: *Preprocessor, toks: []const Token) ![]const u8 {
}
 
/// Handle the _Pragma operator (implemented as a builtin macro)
fn pragmaOperator(pp: *Preprocessor, arg_tok: Token, operator_loc: Source.Location) !void {
fn pragmaOperator(pp: *Preprocessor, arg_tok: TokenWithExpansionLocs, operator_loc: Source.Location) !void {
const arg_slice = pp.expandedSlice(arg_tok);
const content = arg_slice[1 .. arg_slice.len - 1];
const directive = "#pragma ";
@@ -1234,7 +1293,7 @@ fn destringify(pp: *Preprocessor, str: []const u8) void {
 
/// Stringify `tokens` into pp.char_buf.
/// See https://gcc.gnu.org/onlinedocs/gcc-11.2.0/cpp/Stringizing.html#Stringizing
fn stringify(pp: *Preprocessor, tokens: []const Token) !void {
fn stringify(pp: *Preprocessor, tokens: []const TokenWithExpansionLocs) !void {
try pp.char_buf.append('"');
var ws_state: enum { start, need, not_needed } = .start;
for (tokens) |tok| {
@@ -1281,7 +1340,8 @@ fn stringify(pp: *Preprocessor, tokens: []const Token) !void {
try pp.char_buf.appendSlice("\"\n");
}
 
fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token, embed_args: ?*[]const Token) !?[]const u8 {
fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const TokenWithExpansionLocs, embed_args: ?*[]const TokenWithExpansionLocs, first: TokenWithExpansionLocs) !?[]const u8 {
assert(param_toks.len != 0);
const char_top = pp.char_buf.items.len;
defer pp.char_buf.items.len = char_top;
 
@@ -1295,8 +1355,8 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token, embed_
if (params.len == 0) {
try pp.comp.addDiagnostic(.{
.tag = .expected_filename,
.loc = param_toks[0].loc,
}, param_toks[0].expansionSlice());
.loc = first.loc,
}, first.expansionSlice());
return null;
}
// no string pasting
@@ -1321,6 +1381,13 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token, embed_
 
const include_str = pp.char_buf.items[char_top..];
if (include_str.len < 3) {
if (include_str.len == 0) {
try pp.comp.addDiagnostic(.{
.tag = .expected_filename,
.loc = first.loc,
}, first.expansionSlice());
return null;
}
try pp.comp.addDiagnostic(.{
.tag = .empty_filename,
.loc = params[0].loc,
@@ -1356,7 +1423,7 @@ fn reconstructIncludeString(pp: *Preprocessor, param_toks: []const Token, embed_
}
}
 
fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const Token, src_loc: Source.Location) Error!bool {
fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []const TokenWithExpansionLocs, src_loc: Source.Location) Error!bool {
switch (builtin) {
.macro_param_has_attribute,
.macro_param_has_declspec_attribute,
@@ -1364,8 +1431,8 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
.macro_param_has_extension,
.macro_param_has_builtin,
=> {
var invalid: ?Token = null;
var identifier: ?Token = null;
var invalid: ?TokenWithExpansionLocs = null;
var identifier: ?TokenWithExpansionLocs = null;
for (param_toks) |tok| {
if (tok.id == .macro_ws) continue;
if (tok.id == .comment) continue;
@@ -1415,8 +1482,8 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
return Diagnostics.warningExists(warning_name);
},
.macro_param_is_identifier => {
var invalid: ?Token = null;
var identifier: ?Token = null;
var invalid: ?TokenWithExpansionLocs = null;
var identifier: ?TokenWithExpansionLocs = null;
for (param_toks) |tok| switch (tok.id) {
.macro_ws => continue,
.comment => continue,
@@ -1438,7 +1505,7 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
return id == .identifier or id == .extended_identifier;
},
.macro_param_has_include, .macro_param_has_include_next => {
const include_str = (try pp.reconstructIncludeString(param_toks, null)) orelse return false;
const include_str = (try pp.reconstructIncludeString(param_toks, null, param_toks[0])) orelse return false;
const include_type: Compilation.IncludeType = switch (include_str[0]) {
'"' => .quotes,
'<' => .angle_brackets,
@@ -1460,6 +1527,17 @@ fn handleBuiltinMacro(pp: *Preprocessor, builtin: RawToken.Id, param_toks: []con
}
}
 
/// Treat whitespace-only paste arguments as empty
fn getPasteArgs(args: []const TokenWithExpansionLocs) []const TokenWithExpansionLocs {
for (args) |tok| {
if (tok.id != .macro_ws) return args;
}
return &[1]TokenWithExpansionLocs{.{
.id = .placemarker,
.loc = .{ .id = .generated, .byte_offset = 0, .line = 0 },
}};
}
 
fn expandFuncMacro(
pp: *Preprocessor,
loc: Source.Location,
@@ -1482,7 +1560,7 @@ fn expandFuncMacro(
try variable_arguments.appendSlice(args.items[i]);
try expanded_variable_arguments.appendSlice(expanded_args.items[i]);
if (i != expanded_args.items.len - 1) {
const comma = Token{ .id = .comma, .loc = .{ .id = .generated } };
const comma = TokenWithExpansionLocs{ .id = .comma, .loc = .{ .id = .generated } };
try variable_arguments.append(comma);
try expanded_variable_arguments.append(comma);
}
@@ -1507,28 +1585,22 @@ fn expandFuncMacro(
.comment => if (!pp.comp.langopts.preserve_comments_in_macros)
continue
else
&[1]Token{tokFromRaw(raw_next)},
.macro_param, .macro_param_no_expand => if (args.items[raw_next.end].len > 0)
args.items[raw_next.end]
else
&[1]Token{tokFromRaw(.{ .id = .placemarker, .source = .generated })},
&[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
.macro_param, .macro_param_no_expand => getPasteArgs(args.items[raw_next.end]),
.keyword_va_args => variable_arguments.items,
.keyword_va_opt => blk: {
try pp.expandVaOpt(&va_opt_buf, raw_next, variable_arguments.items.len != 0);
if (va_opt_buf.items.len == 0) break;
break :blk va_opt_buf.items;
},
else => &[1]Token{tokFromRaw(raw_next)},
else => &[1]TokenWithExpansionLocs{tokFromRaw(raw_next)},
};
 
try pp.pasteTokens(&buf, next);
if (next.len != 0) break;
},
.macro_param_no_expand => {
const slice = if (args.items[raw.end].len > 0)
args.items[raw.end]
else
&[1]Token{tokFromRaw(.{ .id = .placemarker, .source = .generated })};
const slice = getPasteArgs(args.items[raw.end]);
const raw_loc = Source.Location{ .id = raw.source, .byte_offset = raw.start, .line = raw.line };
try bufCopyTokens(&buf, slice, &.{raw_loc});
},
@@ -1587,10 +1659,10 @@ fn expandFuncMacro(
try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = loc, .extra = extra }, &.{});
break :blk not_found;
} else res: {
var invalid: ?Token = null;
var vendor_ident: ?Token = null;
var colon_colon: ?Token = null;
var attr_ident: ?Token = null;
var invalid: ?TokenWithExpansionLocs = null;
var vendor_ident: ?TokenWithExpansionLocs = null;
var colon_colon: ?TokenWithExpansionLocs = null;
var attr_ident: ?TokenWithExpansionLocs = null;
for (arg) |tok| {
if (tok.id == .macro_ws) continue;
if (tok.id == .comment) continue;
@@ -1663,17 +1735,17 @@ fn expandFuncMacro(
try pp.comp.addDiagnostic(.{ .tag = .expected_arguments, .loc = loc, .extra = extra }, &.{});
break :blk not_found;
} else res: {
var embed_args: []const Token = &.{};
const include_str = (try pp.reconstructIncludeString(arg, &embed_args)) orelse
var embed_args: []const TokenWithExpansionLocs = &.{};
const include_str = (try pp.reconstructIncludeString(arg, &embed_args, arg[0])) orelse
break :res not_found;
 
var prev = tokFromRaw(raw);
prev.id = .eof;
var it: struct {
i: u32 = 0,
slice: []const Token,
prev: Token,
fn next(it: *@This()) Token {
slice: []const TokenWithExpansionLocs,
prev: TokenWithExpansionLocs,
fn next(it: *@This()) TokenWithExpansionLocs {
while (it.i < it.slice.len) switch (it.slice[it.i].id) {
.macro_ws, .whitespace => it.i += 1,
else => break,
@@ -1732,7 +1804,7 @@ fn expandFuncMacro(
};
 
var arg_count: u32 = 0;
var first_arg: Token = undefined;
var first_arg: TokenWithExpansionLocs = undefined;
while (true) {
const next = it.next();
if (next.id == .eof) {
@@ -1793,8 +1865,8 @@ fn expandFuncMacro(
// Clang and GCC require exactly one token (so, no parentheses or string pasting)
// even though their error messages indicate otherwise. Ours is slightly more
// descriptive.
var invalid: ?Token = null;
var string: ?Token = null;
var invalid: ?TokenWithExpansionLocs = null;
var string: ?TokenWithExpansionLocs = null;
for (param_toks) |tok| switch (tok.id) {
.string_literal => {
if (string) |_| invalid = tok else string = tok;
@@ -1884,27 +1956,11 @@ fn expandVaOpt(
}
}
 
fn shouldExpand(tok: Token, macro: *Macro) bool {
if (tok.loc.id == macro.loc.id and
tok.loc.byte_offset >= macro.start and
tok.loc.byte_offset <= macro.end)
return false;
for (tok.expansionSlice()) |loc| {
if (loc.id == macro.loc.id and
loc.byte_offset >= macro.start and
loc.byte_offset <= macro.end)
return false;
}
if (tok.flags.expansion_disabled) return false;
 
return true;
}
 
fn bufCopyTokens(buf: *ExpandBuf, tokens: []const Token, src: []const Source.Location) !void {
fn bufCopyTokens(buf: *ExpandBuf, tokens: []const TokenWithExpansionLocs, src: []const Source.Location) !void {
try buf.ensureUnusedCapacity(tokens.len);
for (tokens) |tok| {
var copy = try tok.dupe(buf.allocator);
errdefer Token.free(copy.expansion_locs, buf.allocator);
errdefer TokenWithExpansionLocs.free(copy.expansion_locs, buf.allocator);
try copy.addExpansionLocation(buf.allocator, src);
buf.appendAssumeCapacity(copy);
}
@@ -1917,7 +1973,7 @@ fn nextBufToken(
start_idx: *usize,
end_idx: *usize,
extend_buf: bool,
) Error!Token {
) Error!TokenWithExpansionLocs {
start_idx.* += 1;
if (start_idx.* == buf.items.len and start_idx.* >= end_idx.*) {
if (extend_buf) {
@@ -1933,7 +1989,7 @@ fn nextBufToken(
try buf.append(new_tok);
return new_tok;
} else {
return Token{ .id = .eof, .loc = .{ .id = .generated } };
return TokenWithExpansionLocs{ .id = .eof, .loc = .{ .id = .generated } };
}
} else {
return buf.items[start_idx.*];
@@ -1948,6 +2004,7 @@ fn collectMacroFuncArguments(
end_idx: *usize,
extend_buf: bool,
is_builtin: bool,
r_paren: *TokenWithExpansionLocs,
) !MacroArguments {
const name_tok = buf.items[start_idx.*];
const saved_tokenizer = tokenizer.*;
@@ -1974,7 +2031,7 @@ fn collectMacroFuncArguments(
var parens: u32 = 0;
var args = MacroArguments.init(pp.gpa);
errdefer deinitMacroArguments(pp.gpa, &args);
var curArgument = std.ArrayList(Token).init(pp.gpa);
var curArgument = std.ArrayList(TokenWithExpansionLocs).init(pp.gpa);
defer curArgument.deinit();
while (true) {
var tok = try nextBufToken(pp, tokenizer, buf, start_idx, end_idx, extend_buf);
@@ -1987,13 +2044,13 @@ fn collectMacroFuncArguments(
try args.append(owned);
} else {
const duped = try tok.dupe(pp.gpa);
errdefer Token.free(duped.expansion_locs, pp.gpa);
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
try curArgument.append(duped);
}
},
.l_paren => {
const duped = try tok.dupe(pp.gpa);
errdefer Token.free(duped.expansion_locs, pp.gpa);
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
try curArgument.append(duped);
parens += 1;
},
@@ -2002,10 +2059,11 @@ fn collectMacroFuncArguments(
const owned = try curArgument.toOwnedSlice();
errdefer pp.gpa.free(owned);
try args.append(owned);
r_paren.* = tok;
break;
} else {
const duped = try tok.dupe(pp.gpa);
errdefer Token.free(duped.expansion_locs, pp.gpa);
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
try curArgument.append(duped);
parens -= 1;
}
@@ -2028,7 +2086,7 @@ fn collectMacroFuncArguments(
},
else => {
const duped = try tok.dupe(pp.gpa);
errdefer Token.free(duped.expansion_locs, pp.gpa);
errdefer TokenWithExpansionLocs.free(duped.expansion_locs, pp.gpa);
try curArgument.append(duped);
},
}
@@ -2038,7 +2096,7 @@ fn collectMacroFuncArguments(
}
 
fn removeExpandedTokens(pp: *Preprocessor, buf: *ExpandBuf, start: usize, len: usize, moving_end_idx: *usize) !void {
for (buf.items[start .. start + len]) |tok| Token.free(tok.expansion_locs, pp.gpa);
for (buf.items[start .. start + len]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
try buf.replaceRange(start, len, &.{});
moving_end_idx.* -|= len;
}
@@ -2054,14 +2112,14 @@ const EvalContext = enum {
 
/// Helper for safely iterating over a slice of tokens while skipping whitespace
const TokenIterator = struct {
toks: []const Token,
toks: []const TokenWithExpansionLocs,
i: usize,
 
fn init(toks: []const Token) TokenIterator {
fn init(toks: []const TokenWithExpansionLocs) TokenIterator {
return .{ .toks = toks, .i = 0 };
}
 
fn nextNoWS(self: *TokenIterator) ?Token {
fn nextNoWS(self: *TokenIterator) ?TokenWithExpansionLocs {
while (self.i < self.toks.len) : (self.i += 1) {
const tok = self.toks[self.i];
if (tok.id == .whitespace or tok.id == .macro_ws) continue;
@@ -2108,13 +2166,24 @@ fn expandMacroExhaustive(
idx += it.i;
continue;
}
const macro_entry = pp.defines.getPtr(pp.expandedSlice(macro_tok));
if (macro_entry == null or !shouldExpand(buf.items[idx], macro_entry.?)) {
if (!macro_tok.id.isMacroIdentifier() or macro_tok.flags.expansion_disabled) {
idx += 1;
continue;
}
if (macro_entry) |macro| macro_handler: {
const expanded = pp.expandedSlice(macro_tok);
const macro = pp.defines.getPtr(expanded) orelse {
idx += 1;
continue;
};
const macro_hidelist = pp.hideset.get(macro_tok.loc);
if (pp.hideset.contains(macro_hidelist, expanded)) {
idx += 1;
continue;
}
 
macro_handler: {
if (macro.is_func) {
var r_paren: TokenWithExpansionLocs = undefined;
var macro_scan_idx = idx;
// to be saved in case this doesn't turn out to be a call
const args = pp.collectMacroFuncArguments(
@@ -2124,6 +2193,7 @@ fn expandMacroExhaustive(
&moving_end_idx,
extend_buf,
macro.is_builtin,
&r_paren,
) catch |er| switch (er) {
error.MissingLParen => {
if (!buf.items[idx].flags.is_macro_arg) buf.items[idx].flags.expansion_disabled = true;
@@ -2137,12 +2207,16 @@ fn expandMacroExhaustive(
},
else => |e| return e,
};
assert(r_paren.id == .r_paren);
defer {
for (args.items) |item| {
pp.gpa.free(item);
}
args.deinit();
}
const r_paren_hidelist = pp.hideset.get(r_paren.loc);
var hs = try pp.hideset.intersection(macro_hidelist, r_paren_hidelist);
hs = try pp.hideset.prepend(macro_tok.loc, hs);
 
var args_count: u32 = @intCast(args.items.len);
// if the macro has zero arguments g() args_count is still 1
@@ -2199,10 +2273,13 @@ fn expandMacroExhaustive(
for (res.items) |*tok| {
try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
const tok_hidelist = pp.hideset.get(tok.loc);
const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hs);
try pp.hideset.put(tok.loc, new_hidelist);
}
 
const tokens_removed = macro_scan_idx - idx + 1;
for (buf.items[idx .. idx + tokens_removed]) |tok| Token.free(tok.expansion_locs, pp.gpa);
for (buf.items[idx .. idx + tokens_removed]) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
try buf.replaceRange(idx, tokens_removed, res.items);
 
moving_end_idx += tokens_added;
@@ -2215,12 +2292,19 @@ fn expandMacroExhaustive(
const res = try pp.expandObjMacro(macro);
defer res.deinit();
 
const hs = try pp.hideset.prepend(macro_tok.loc, macro_hidelist);
 
const macro_expansion_locs = macro_tok.expansionSlice();
var increment_idx_by = res.items.len;
for (res.items, 0..) |*tok, i| {
tok.flags.is_macro_arg = macro_tok.flags.is_macro_arg;
try tok.addExpansionLocation(pp.gpa, &.{macro_tok.loc});
try tok.addExpansionLocation(pp.gpa, macro_expansion_locs);
 
const tok_hidelist = pp.hideset.get(tok.loc);
const new_hidelist = try pp.hideset.@"union"(tok_hidelist, hs);
try pp.hideset.put(tok.loc, new_hidelist);
 
if (tok.id == .keyword_defined and eval_ctx == .expr) {
try pp.comp.addDiagnostic(.{
.tag = .expansion_to_defined,
@@ -2233,7 +2317,7 @@ fn expandMacroExhaustive(
}
}
 
Token.free(buf.items[idx].expansion_locs, pp.gpa);
TokenWithExpansionLocs.free(buf.items[idx].expansion_locs, pp.gpa);
try buf.replaceRange(idx, 1, res.items);
idx += increment_idx_by;
moving_end_idx = moving_end_idx + res.items.len - 1;
@@ -2249,7 +2333,7 @@ fn expandMacroExhaustive(
 
// trim excess buffer
for (buf.items[moving_end_idx..]) |item| {
Token.free(item.expansion_locs, pp.gpa);
TokenWithExpansionLocs.free(item.expansion_locs, pp.gpa);
}
buf.items.len = moving_end_idx;
}
@@ -2260,30 +2344,35 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr
var source_tok = tokFromRaw(raw);
if (!raw.id.isMacroIdentifier()) {
source_tok.id.simplifyMacroKeyword();
return pp.tokens.append(pp.gpa, source_tok);
return pp.addToken(source_tok);
}
pp.top_expansion_buf.items.len = 0;
try pp.top_expansion_buf.append(source_tok);
pp.expansion_source_loc = source_tok.loc;
 
pp.hideset.clearRetainingCapacity();
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.top_expansion_buf.items.len);
try pp.ensureUnusedTokenCapacity(pp.top_expansion_buf.items.len);
for (pp.top_expansion_buf.items) |*tok| {
if (tok.id == .macro_ws and !pp.preserve_whitespace) {
Token.free(tok.expansion_locs, pp.gpa);
TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
continue;
}
if (tok.id == .comment and !pp.comp.langopts.preserve_comments_in_macros) {
Token.free(tok.expansion_locs, pp.gpa);
TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
continue;
}
if (tok.id == .placemarker) {
TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
continue;
}
tok.id.simplifyMacroKeywordExtra(true);
pp.tokens.appendAssumeCapacity(tok.*);
pp.addTokenAssumeCapacity(tok.*);
}
if (pp.preserve_whitespace) {
try pp.tokens.ensureUnusedCapacity(pp.gpa, pp.add_expansion_nl);
try pp.ensureUnusedTokenCapacity(pp.add_expansion_nl);
while (pp.add_expansion_nl > 0) : (pp.add_expansion_nl -= 1) {
pp.tokens.appendAssumeCapacity(.{ .id = .nl, .loc = .{
pp.addTokenAssumeCapacity(.{ .id = .nl, .loc = .{
.id = tokenizer.source,
.line = tokenizer.line,
} });
@@ -2291,7 +2380,7 @@ fn expandMacro(pp: *Preprocessor, tokenizer: *Tokenizer, raw: RawToken) MacroErr
}
}
 
fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
fn expandedSliceExtra(pp: *const Preprocessor, tok: anytype, macro_ws_handling: enum { single_macro_ws, preserve_macro_ws }) []const u8 {
if (tok.id.lexeme()) |some| {
if (!tok.id.allowsDigraphs(pp.comp.langopts) and !(tok.id == .macro_ws and macro_ws_handling == .preserve_macro_ws)) return some;
}
@@ -2312,18 +2401,18 @@ fn expandedSliceExtra(pp: *const Preprocessor, tok: Token, macro_ws_handling: en
}
 
/// Get expanded token source string.
pub fn expandedSlice(pp: *Preprocessor, tok: Token) []const u8 {
pub fn expandedSlice(pp: *const Preprocessor, tok: anytype) []const u8 {
return pp.expandedSliceExtra(tok, .single_macro_ws);
}
 
/// Concat two tokens and add the result to pp.generated
fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token) Error!void {
fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const TokenWithExpansionLocs) Error!void {
const lhs = while (lhs_toks.popOrNull()) |lhs| {
if ((pp.comp.langopts.preserve_comments_in_macros and lhs.id == .comment) or
(lhs.id != .macro_ws and lhs.id != .comment))
break lhs;
 
Token.free(lhs.expansion_locs, pp.gpa);
TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);
} else {
return bufCopyTokens(lhs_toks, rhs_toks, &.{});
};
@@ -2338,7 +2427,7 @@ fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token)
} else {
return lhs_toks.appendAssumeCapacity(lhs);
};
defer Token.free(lhs.expansion_locs, pp.gpa);
defer TokenWithExpansionLocs.free(lhs.expansion_locs, pp.gpa);
 
const start = pp.comp.generated_buf.items.len;
const end = start + pp.expandedSlice(lhs).len + pp.expandedSlice(rhs).len;
@@ -2375,8 +2464,8 @@ fn pasteTokens(pp: *Preprocessor, lhs_toks: *ExpandBuf, rhs_toks: []const Token)
try bufCopyTokens(lhs_toks, rhs_toks[rhs_rest..], &.{});
}
 
fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: Token) !Token {
var pasted_token = Token{ .id = id, .loc = .{
fn makeGeneratedToken(pp: *Preprocessor, start: usize, id: Token.Id, source: TokenWithExpansionLocs) !TokenWithExpansionLocs {
var pasted_token = TokenWithExpansionLocs{ .id = id, .loc = .{
.id = .generated,
.byte_offset = @intCast(start),
.line = pp.generated_line,
@@ -2441,8 +2530,6 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
.tokens = &.{},
.var_args = false,
.loc = tokFromRaw(macro_name).loc,
.start = 0,
.end = 0,
.is_func = false,
}),
.whitespace => first = tokenizer.next(),
@@ -2460,7 +2547,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
var need_ws = false;
// Collect the token body and validate any ## found.
var tok = first;
const end_index = while (true) {
while (true) {
tok.id.simplifyMacroKeyword();
switch (tok.id) {
.hash_hash => {
@@ -2479,7 +2566,7 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
try pp.token_buf.append(tok);
try pp.token_buf.append(next);
},
.nl, .eof => break tok.start,
.nl, .eof => break,
.comment => if (pp.comp.langopts.preserve_comments_in_macros) {
if (need_ws) {
need_ws = false;
@@ -2502,13 +2589,11 @@ fn define(pp: *Preprocessor, tokenizer: *Tokenizer) Error!void {
},
}
tok = tokenizer.next();
} else unreachable;
}
 
const list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
try pp.defineMacro(macro_name, .{
.loc = tokFromRaw(macro_name).loc,
.start = first.start,
.end = end_index,
.tokens = list,
.params = undefined,
.is_func = false,
@@ -2525,9 +2610,9 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
// Parse the parameter list.
var gnu_var_args: []const u8 = "";
var var_args = false;
const start_index = while (true) {
while (true) {
var tok = tokenizer.nextNoWS();
if (tok.id == .r_paren) break tok.end;
if (tok.id == .r_paren) break;
if (tok.id == .eof) return pp.err(tok, .unterminated_macro_param_list);
if (tok.id == .ellipsis) {
var_args = true;
@@ -2537,7 +2622,7 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
try pp.err(l_paren, .to_match_paren);
return skipToNl(tokenizer);
}
break r_paren.end;
break;
}
if (!tok.id.isMacroIdentifier()) {
try pp.err(tok, .invalid_token_param_list);
@@ -2556,22 +2641,22 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
try pp.err(l_paren, .to_match_paren);
return skipToNl(tokenizer);
}
break r_paren.end;
break;
} else if (tok.id == .r_paren) {
break tok.end;
break;
} else if (tok.id != .comma) {
try pp.err(tok, .expected_comma_param_list);
return skipToNl(tokenizer);
}
} else unreachable;
}
 
var need_ws = false;
// Collect the body tokens and validate # and ##'s found.
pp.token_buf.items.len = 0; // Safe to use since we can only be in one directive at a time.
const end_index = tok_loop: while (true) {
tok_loop: while (true) {
var tok = tokenizer.next();
switch (tok.id) {
.nl, .eof => break tok.start,
.nl, .eof => break,
.whitespace => need_ws = pp.token_buf.items.len != 0,
.comment => if (!pp.comp.langopts.preserve_comments_in_macros) continue else {
if (need_ws) {
@@ -2690,7 +2775,7 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
try pp.token_buf.append(tok);
},
}
} else unreachable;
}
 
const param_list = try pp.arena.allocator().dupe([]const u8, params.items);
const token_list = try pp.arena.allocator().dupe(RawToken, pp.token_buf.items);
@@ -2700,8 +2785,6 @@ fn defineFn(pp: *Preprocessor, tokenizer: *Tokenizer, macro_name: RawToken, l_pa
.var_args = var_args or gnu_var_args.len != 0,
.tokens = token_list,
.loc = tokFromRaw(macro_name).loc,
.start = start_index,
.end = end_index,
});
}
 
@@ -2714,7 +2797,7 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
error.InvalidInclude => return,
else => |e| return e,
};
defer Token.free(filename_tok.expansion_locs, pp.gpa);
defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);
 
// Check for empty filename.
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
@@ -2859,7 +2942,7 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
return;
}
 
try pp.tokens.ensureUnusedCapacity(pp.comp.gpa, 2 * embed_bytes.len - 1); // N bytes and N-1 commas
try pp.ensureUnusedTokenCapacity(2 * embed_bytes.len - 1); // N bytes and N-1 commas
 
// TODO: We currently only support systems with CHAR_BIT == 8
// If the target's CHAR_BIT is not 8, we need to write out correctly-sized embed_bytes
@@ -2870,14 +2953,14 @@ fn embed(pp: *Preprocessor, tokenizer: *Tokenizer) MacroError!void {
const byte = embed_bytes[0];
const start = pp.comp.generated_buf.items.len;
try writer.print("{d}", .{byte});
pp.tokens.appendAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok));
pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start, .embed_byte, filename_tok));
}
 
for (embed_bytes[1..]) |byte| {
const start = pp.comp.generated_buf.items.len;
try writer.print(",{d}", .{byte});
pp.tokens.appendAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
pp.tokens.appendAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok));
pp.addTokenAssumeCapacity(.{ .id = .comma, .loc = .{ .id = .generated, .byte_offset = @intCast(start) } });
pp.addTokenAssumeCapacity(try pp.makeGeneratedToken(start + 1, .embed_byte, filename_tok));
}
try pp.comp.generated_buf.append(pp.gpa, '\n');
 
@@ -2911,19 +2994,19 @@ fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInc
pp.verboseLog(first, "include file {s}", .{new_source.path});
}
 
const tokens_start = pp.tokens.len;
const token_state = pp.getTokenState();
try pp.addIncludeStart(new_source);
const eof = pp.preprocessExtra(new_source) catch |er| switch (er) {
error.StopPreprocessing => {
for (pp.tokens.items(.expansion_locs)[tokens_start..]) |loc| Token.free(loc, pp.gpa);
pp.tokens.len = tokens_start;
for (pp.expansion_entries.items(.locs)[token_state.expansion_entries_len..]) |loc| TokenWithExpansionLocs.free(loc, pp.gpa);
pp.restoreTokenState(token_state);
return;
},
else => |e| return e,
};
try eof.checkMsEof(new_source, pp.comp);
if (pp.preserve_whitespace and pp.tokens.items(.id)[pp.tokens.len - 1] != .nl) {
try pp.tokens.append(pp.gpa, .{ .id = .nl, .loc = .{
try pp.addToken(.{ .id = .nl, .loc = .{
.id = tokenizer.source,
.line = tokenizer.line,
} });
@@ -2945,7 +3028,7 @@ fn include(pp: *Preprocessor, tokenizer: *Tokenizer, which: Compilation.WhichInc
/// 3. Via a stringified macro argument which is used as an argument to `_Pragma`
/// operator_loc: Location of `_Pragma`; null if this is from #pragma
/// arg_locs: expansion locations of the argument to _Pragma. empty if #pragma or a raw string literal was used
fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !Token {
fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !TokenWithExpansionLocs {
var tok = tokFromRaw(raw);
if (operator_loc) |loc| {
try tok.addExpansionLocation(pp.gpa, &.{loc});
@@ -2954,28 +3037,52 @@ fn makePragmaToken(pp: *Preprocessor, raw: RawToken, operator_loc: ?Source.Locat
return tok;
}
 
pub fn addToken(pp: *Preprocessor, tok: TokenWithExpansionLocs) !void {
if (tok.expansion_locs) |expansion_locs| {
try pp.expansion_entries.append(pp.gpa, .{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
}
try pp.tokens.append(pp.gpa, .{ .id = tok.id, .loc = tok.loc });
}
 
pub fn addTokenAssumeCapacity(pp: *Preprocessor, tok: TokenWithExpansionLocs) void {
if (tok.expansion_locs) |expansion_locs| {
pp.expansion_entries.appendAssumeCapacity(.{ .idx = @intCast(pp.tokens.len), .locs = expansion_locs });
}
pp.tokens.appendAssumeCapacity(.{ .id = tok.id, .loc = tok.loc });
}
 
pub fn ensureTotalTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
try pp.tokens.ensureTotalCapacity(pp.gpa, capacity);
try pp.expansion_entries.ensureTotalCapacity(pp.gpa, capacity);
}
 
pub fn ensureUnusedTokenCapacity(pp: *Preprocessor, capacity: usize) !void {
try pp.tokens.ensureUnusedCapacity(pp.gpa, capacity);
try pp.expansion_entries.ensureUnusedCapacity(pp.gpa, capacity);
}
 
/// Handle a pragma directive
fn pragma(pp: *Preprocessor, tokenizer: *Tokenizer, pragma_tok: RawToken, operator_loc: ?Source.Location, arg_locs: []const Source.Location) !void {
const name_tok = tokenizer.nextNoWS();
if (name_tok.id == .nl or name_tok.id == .eof) return;
 
const name = pp.tokSlice(name_tok);
try pp.tokens.append(pp.gpa, try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs));
try pp.addToken(try pp.makePragmaToken(pragma_tok, operator_loc, arg_locs));
const pragma_start: u32 = @intCast(pp.tokens.len);
 
const pragma_name_tok = try pp.makePragmaToken(name_tok, operator_loc, arg_locs);
try pp.tokens.append(pp.gpa, pragma_name_tok);
try pp.addToken(pragma_name_tok);
while (true) {
const next_tok = tokenizer.next();
if (next_tok.id == .whitespace) continue;
if (next_tok.id == .eof) {
try pp.tokens.append(pp.gpa, .{
try pp.addToken(.{
.id = .nl,
.loc = .{ .id = .generated },
});
break;
}
try pp.tokens.append(pp.gpa, try pp.makePragmaToken(next_tok, operator_loc, arg_locs));
try pp.addToken(try pp.makePragmaToken(next_tok, operator_loc, arg_locs));
if (next_tok.id == .nl) break;
}
if (pp.comp.getPragma(name)) |prag| unknown: {
@@ -2995,7 +3102,7 @@ fn findIncludeFilenameToken(
first_token: RawToken,
tokenizer: *Tokenizer,
trailing_token_behavior: enum { ignore_trailing_tokens, expect_nl_eof },
) !Token {
) !TokenWithExpansionLocs {
var first = first_token;
 
if (first.id == .angle_bracket_left) to_end: {
@@ -3025,14 +3132,13 @@ fn findIncludeFilenameToken(
else => expanded: {
// Try to expand if the argument is a macro.
pp.top_expansion_buf.items.len = 0;
defer for (pp.top_expansion_buf.items) |tok| Token.free(tok.expansion_locs, pp.gpa);
defer for (pp.top_expansion_buf.items) |tok| TokenWithExpansionLocs.free(tok.expansion_locs, pp.gpa);
try pp.top_expansion_buf.append(source_tok);
pp.expansion_source_loc = source_tok.loc;
 
try pp.expandMacroExhaustive(tokenizer, &pp.top_expansion_buf, 0, 1, true, .non_expr);
var trailing_toks: []const Token = &.{};
const include_str = (try pp.reconstructIncludeString(pp.top_expansion_buf.items, &trailing_toks)) orelse {
try pp.err(first, .expected_filename);
var trailing_toks: []const TokenWithExpansionLocs = &.{};
const include_str = (try pp.reconstructIncludeString(pp.top_expansion_buf.items, &trailing_toks, tokFromRaw(first))) orelse {
try pp.expectNl(tokenizer);
return error.InvalidInclude;
};
@@ -3071,7 +3177,7 @@ fn findIncludeFilenameToken(
 
fn findIncludeSource(pp: *Preprocessor, tokenizer: *Tokenizer, first: RawToken, which: Compilation.WhichInclude) !Source {
const filename_tok = try pp.findIncludeFilenameToken(first, tokenizer, .expect_nl_eof);
defer Token.free(filename_tok.expansion_locs, pp.gpa);
defer TokenWithExpansionLocs.free(filename_tok.expansion_locs, pp.gpa);
 
// Check for empty filename.
const tok_slice = pp.expandedSliceExtra(filename_tok, .single_macro_ws);
@@ -3101,8 +3207,7 @@ fn printLinemarker(
) !void {
try w.writeByte('#');
if (pp.linemarkers == .line_directives) try w.writeAll("line");
// line_no is 0 indexed
try w.print(" {d} \"", .{line_no + 1});
try w.print(" {d} \"", .{line_no});
for (source.path) |byte| switch (byte) {
'\n' => try w.writeAll("\\n"),
'\r' => try w.writeAll("\\r"),
@@ -3219,7 +3324,7 @@ pub fn prettyPrintTokens(pp: *Preprocessor, w: anytype) !void {
.include_start => {
const source = pp.comp.getSource(cur.loc.id);
 
try pp.printLinemarker(w, 0, source, .start);
try pp.printLinemarker(w, 1, source, .start);
last_nl = true;
},
.include_resume => {
@@ -3259,7 +3364,7 @@ test "Preserve pragma tokens sometimes" {
 
const test_runner_macros = try comp.addSourceFromBuffer("<test_runner>", source_text);
const eof = try pp.preprocess(test_runner_macros);
try pp.tokens.append(pp.gpa, eof);
try pp.addToken(eof);
try pp.prettyPrintTokens(buf.writer());
return allocator.dupe(u8, buf.items);
}
 
lib/compiler/aro/aro/Toolchain.zig added: 876, removed: 300, total 576
@@ -487,3 +487,22 @@ pub fn addRuntimeLibs(tc: *const Toolchain, argv: *std.ArrayList([]const u8)) !v
try argv.append("-ldl");
}
}
 
pub fn defineSystemIncludes(tc: *Toolchain) !void {
return switch (tc.inner) {
.uninitialized => unreachable,
.linux => |*linux| linux.defineSystemIncludes(tc),
.unknown => {
if (tc.driver.nostdinc) return;
 
const comp = tc.driver.comp;
if (!tc.driver.nobuiltininc) {
try comp.addBuiltinIncludeDir(tc.driver.aro_name);
}
 
if (!tc.driver.nostdlibinc) {
try comp.addSystemIncludeDir("/usr/include");
}
},
};
}
 
lib/compiler/aro/aro/Tree.zig added: 876, removed: 300, total 576
@@ -12,6 +12,16 @@ const StringInterner = @import("StringInterner.zig");
 
pub const Token = struct {
id: Id,
loc: Source.Location,
 
pub const List = std.MultiArrayList(Token);
pub const Id = Tokenizer.Token.Id;
pub const NumberPrefix = number_affixes.Prefix;
pub const NumberSuffix = number_affixes.Suffix;
};
 
pub const TokenWithExpansionLocs = struct {
id: Token.Id,
flags: packed struct {
expansion_disabled: bool = false,
is_macro_arg: bool = false,
@@ -22,15 +32,15 @@ pub const Token = struct {
loc: Source.Location,
expansion_locs: ?[*]Source.Location = null,
 
pub fn expansionSlice(tok: Token) []const Source.Location {
pub fn expansionSlice(tok: TokenWithExpansionLocs) []const Source.Location {
const locs = tok.expansion_locs orelse return &[0]Source.Location{};
var i: usize = 0;
while (locs[i].id != .unused) : (i += 1) {}
return locs[0..i];
}
 
pub fn addExpansionLocation(tok: *Token, gpa: std.mem.Allocator, new: []const Source.Location) !void {
if (new.len == 0 or tok.id == .whitespace) return;
pub fn addExpansionLocation(tok: *TokenWithExpansionLocs, gpa: std.mem.Allocator, new: []const Source.Location) !void {
if (new.len == 0 or tok.id == .whitespace or tok.id == .macro_ws or tok.id == .placemarker) return;
var list = std.ArrayList(Source.Location).init(gpa);
defer {
@memset(list.items.ptr[list.items.len..list.capacity], .{});
@@ -70,14 +80,14 @@ pub const Token = struct {
gpa.free(locs[0 .. i + 1]);
}
 
pub fn dupe(tok: Token, gpa: std.mem.Allocator) !Token {
pub fn dupe(tok: TokenWithExpansionLocs, gpa: std.mem.Allocator) !TokenWithExpansionLocs {
var copy = tok;
copy.expansion_locs = null;
try copy.addExpansionLocation(gpa, tok.expansionSlice());
return copy;
}
 
pub fn checkMsEof(tok: Token, source: Source, comp: *Compilation) !void {
pub fn checkMsEof(tok: TokenWithExpansionLocs, source: Source, comp: *Compilation) !void {
std.debug.assert(tok.id == .eof);
if (source.buf.len > tok.loc.byte_offset and source.buf[tok.loc.byte_offset] == 0x1A) {
try comp.addDiagnostic(.{
@@ -90,11 +100,6 @@ pub const Token = struct {
}, &.{});
}
}
 
pub const List = std.MultiArrayList(Token);
pub const Id = Tokenizer.Token.Id;
pub const NumberPrefix = number_affixes.Prefix;
pub const NumberSuffix = number_affixes.Suffix;
};
 
pub const TokenIndex = u32;
 
lib/compiler/aro/aro/Type.zig added: 876, removed: 300, total 576
@@ -105,6 +105,7 @@ pub const Func = struct {
fn eql(a: *const Func, b: *const Func, a_spec: Specifier, b_spec: Specifier, comp: *const Compilation) bool {
// return type cannot have qualifiers
if (!a.return_type.eql(b.return_type, comp, false)) return false;
if (a.params.len == 0 and b.params.len == 0) return true;
 
if (a.params.len != b.params.len) {
if (a_spec == .old_style_func or b_spec == .old_style_func) {
@@ -114,6 +115,7 @@ pub const Func = struct {
}
return true;
}
return false;
}
if ((a_spec == .func) != (b_spec == .func)) return false;
// TODO validate this
@@ -887,7 +889,8 @@ pub fn hasIncompleteSize(ty: Type) bool {
.@"struct", .@"union" => ty.data.record.isIncomplete(),
.array, .static_array => ty.data.array.elem.hasIncompleteSize(),
.typeof_type => ty.data.sub_type.hasIncompleteSize(),
.typeof_expr => ty.data.expr.ty.hasIncompleteSize(),
.typeof_expr, .variable_len_array => ty.data.expr.ty.hasIncompleteSize(),
.unspecified_variable_len_array => ty.data.sub_type.hasIncompleteSize(),
.attributed => ty.data.attributed.base.hasIncompleteSize(),
else => false,
};
@@ -1053,7 +1056,7 @@ pub fn bitSizeof(ty: Type, comp: *const Compilation) ?u64 {
}
 
pub fn alignable(ty: Type) bool {
return ty.isArray() or !ty.hasIncompleteSize() or ty.is(.void);
return (ty.isArray() or !ty.hasIncompleteSize() or ty.is(.void)) and !ty.is(.invalid);
}
 
/// Get the alignment of a type
 
lib/compiler/aro/aro/Value.zig added: 876, removed: 300, total 576
@@ -60,7 +60,8 @@ test "minUnsignedBits" {
 
var comp = Compilation.init(std.testing.allocator);
defer comp.deinit();
comp.target = (try std.zig.CrossTarget.parse(.{ .arch_os_abi = "x86_64-linux-gnu" })).toTarget();
const target_query = try std.Target.Query.parse(.{ .arch_os_abi = "x86_64-linux-gnu" });
comp.target = try std.zig.system.resolveTargetQuery(target_query);
 
try Test.checkIntBits(&comp, 0, 0);
try Test.checkIntBits(&comp, 1, 1);
@@ -94,7 +95,8 @@ test "minSignedBits" {
 
var comp = Compilation.init(std.testing.allocator);
defer comp.deinit();
comp.target = (try std.zig.CrossTarget.parse(.{ .arch_os_abi = "x86_64-linux-gnu" })).toTarget();
const target_query = try std.Target.Query.parse(.{ .arch_os_abi = "x86_64-linux-gnu" });
comp.target = try std.zig.system.resolveTargetQuery(target_query);
 
try Test.checkIntBits(&comp, -1, 1);
try Test.checkIntBits(&comp, -2, 2);
@@ -224,7 +226,7 @@ pub fn intCast(v: *Value, dest_ty: Type, comp: *Compilation) !void {
v.* = try intern(comp, .{ .int = .{ .big_int = result_bigint.toConst() } });
}
 
/// Converts the stored value from an integer to a float.
/// Converts the stored value to a float of the specified type
/// `.none` value remains unchanged.
pub fn floatCast(v: *Value, dest_ty: Type, comp: *Compilation) !void {
if (v.opt_ref == .none) return;
 
lib/compiler/aro/aro/pragmas/gcc.zig added: 876, removed: 300, total 576
@@ -80,7 +80,7 @@ fn diagnosticHandler(self: *GCC, pp: *Preprocessor, start_idx: TokenIndex) Pragm
.tag = .pragma_requires_string_literal,
.loc = diagnostic_tok.loc,
.extra = .{ .str = "GCC diagnostic" },
}, diagnostic_tok.expansionSlice());
}, pp.expansionSlice(start_idx));
},
else => |e| return e,
};
@@ -90,7 +90,7 @@ fn diagnosticHandler(self: *GCC, pp: *Preprocessor, start_idx: TokenIndex) Pragm
.tag = .malformed_warning_check,
.loc = next.loc,
.extra = .{ .str = "GCC diagnostic" },
}, next.expansionSlice());
}, pp.expansionSlice(start_idx + 1));
}
const new_kind: Diagnostics.Kind = switch (diagnostic) {
.ignored => .off,
@@ -116,7 +116,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex
return pp.comp.addDiagnostic(.{
.tag = .unknown_gcc_pragma,
.loc = directive_tok.loc,
}, directive_tok.expansionSlice());
}, pp.expansionSlice(start_idx + 1));
 
switch (gcc_pragma) {
.warning, .@"error" => {
@@ -126,7 +126,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex
.tag = .pragma_requires_string_literal,
.loc = directive_tok.loc,
.extra = .{ .str = @tagName(gcc_pragma) },
}, directive_tok.expansionSlice());
}, pp.expansionSlice(start_idx + 1));
},
else => |e| return e,
};
@@ -134,7 +134,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex
const diagnostic_tag: Diagnostics.Tag = if (gcc_pragma == .warning) .pragma_warning_message else .pragma_error_message;
return pp.comp.addDiagnostic(
.{ .tag = diagnostic_tag, .loc = directive_tok.loc, .extra = extra },
directive_tok.expansionSlice(),
pp.expansionSlice(start_idx + 1),
);
},
.diagnostic => return self.diagnosticHandler(pp, start_idx + 2) catch |err| switch (err) {
@@ -143,12 +143,12 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex
return pp.comp.addDiagnostic(.{
.tag = .unknown_gcc_pragma_directive,
.loc = tok.loc,
}, tok.expansionSlice());
}, pp.expansionSlice(start_idx + 2));
},
else => |e| return e,
},
.poison => {
var i: usize = 2;
var i: u32 = 2;
while (true) : (i += 1) {
const tok = pp.tokens.get(start_idx + i);
if (tok.id == .nl) break;
@@ -157,14 +157,14 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex
return pp.comp.addDiagnostic(.{
.tag = .pragma_poison_identifier,
.loc = tok.loc,
}, tok.expansionSlice());
}, pp.expansionSlice(start_idx + i));
}
const str = pp.expandedSlice(tok);
if (pp.defines.get(str) != null) {
try pp.comp.addDiagnostic(.{
.tag = .pragma_poison_macro,
.loc = tok.loc,
}, tok.expansionSlice());
}, pp.expansionSlice(start_idx + i));
}
try pp.poisoned_identifiers.put(str, {});
}
 
lib/compiler/aro/aro/pragmas/message.zig added: 876, removed: 300, total 576
@@ -28,7 +28,7 @@ fn deinit(pragma: *Pragma, comp: *Compilation) void {
 
fn preprocessorHandler(_: *Pragma, pp: *Preprocessor, start_idx: TokenIndex) Pragma.Error!void {
const message_tok = pp.tokens.get(start_idx);
const message_expansion_locs = message_tok.expansionSlice();
const message_expansion_locs = pp.expansionSlice(start_idx);
 
const str = Pragma.pasteTokens(pp, start_idx + 1) catch |err| switch (err) {
error.ExpectedStringLiteral => {
 
lib/compiler/aro/aro/pragmas/once.zig added: 876, removed: 300, total 576
@@ -45,7 +45,7 @@ fn preprocessorHandler(pragma: *Pragma, pp: *Preprocessor, start_idx: TokenIndex
try pp.comp.addDiagnostic(.{
.tag = .extra_tokens_directive_end,
.loc = name_tok.loc,
}, next.expansionSlice());
}, pp.expansionSlice(start_idx + 1));
}
const seen = self.preprocess_count == pp.preprocess_count;
const prev = try self.pragma_once.fetchPut(name_tok.loc.id, {});
 
lib/compiler/aro/aro/pragmas/pack.zig added: 876, removed: 300, total 576
@@ -37,7 +37,7 @@ fn parserHandler(pragma: *Pragma, p: *Parser, start_idx: TokenIndex) Compilation
return p.comp.addDiagnostic(.{
.tag = .pragma_pack_lparen,
.loc = l_paren.loc,
}, l_paren.expansionSlice());
}, p.pp.expansionSlice(idx));
}
idx += 1;
 
 
lib/compiler/aro/aro/target.zig added: 876, removed: 300, total 576
@@ -102,6 +102,16 @@ pub fn int16Type(target: std.Target) Type {
};
}
 
/// sig_atomic_t for this target
pub fn sigAtomicType(target: std.Target) Type {
if (target.cpu.arch.isWasm()) return .{ .specifier = .long };
return switch (target.cpu.arch) {
.avr => .{ .specifier = .schar },
.msp430 => .{ .specifier = .long },
else => .{ .specifier = .int },
};
}
 
/// int64_t for this target
pub fn int64Type(target: std.Target) Type {
switch (target.cpu.arch) {
 
lib/compiler/aro/aro/toolchains/Linux.zig added: 876, removed: 300, total 576
@@ -373,6 +373,50 @@ fn getOSLibDir(target: std.Target) []const u8 {
return "lib64";
}
 
pub fn defineSystemIncludes(self: *const Linux, tc: *const Toolchain) !void {
if (tc.driver.nostdinc) return;
 
const comp = tc.driver.comp;
const target = tc.getTarget();
 
// musl prefers /usr/include before builtin includes, so musl targets will add builtins
// at the end of this function (unless disabled with nostdlibinc)
if (!tc.driver.nobuiltininc and (!target.isMusl() or tc.driver.nostdlibinc)) {
try comp.addBuiltinIncludeDir(tc.driver.aro_name);
}
 
if (tc.driver.nostdlibinc) return;
 
const sysroot = tc.getSysroot();
const local_include = try std.fmt.allocPrint(comp.gpa, "{s}{s}", .{ sysroot, "/usr/local/include" });
defer comp.gpa.free(local_include);
try comp.addSystemIncludeDir(local_include);
 
if (self.gcc_detector.is_valid) {
const gcc_include_path = try std.fs.path.join(comp.gpa, &.{ self.gcc_detector.parent_lib_path, "..", self.gcc_detector.gcc_triple, "include" });
defer comp.gpa.free(gcc_include_path);
try comp.addSystemIncludeDir(gcc_include_path);
}
 
if (getMultiarchTriple(target)) |triple| {
const joined = try std.fs.path.join(comp.gpa, &.{ sysroot, "usr", "include", triple });
defer comp.gpa.free(joined);
if (tc.filesystem.exists(joined)) {
try comp.addSystemIncludeDir(joined);
}
}
 
if (target.os.tag == .rtems) return;
 
try comp.addSystemIncludeDir("/include");
try comp.addSystemIncludeDir("/usr/include");
 
std.debug.assert(!tc.driver.nostdlibinc);
if (!tc.driver.nobuiltininc and target.isMusl()) {
try comp.addBuiltinIncludeDir(tc.driver.aro_name);
}
}
 
test Linux {
if (@import("builtin").os.tag == .windows) return error.SkipZigTest;
 
@@ -388,8 +432,8 @@ test Linux {
defer comp.environment = .{};
 
const raw_triple = "x86_64-linux-gnu";
const cross = std.zig.CrossTarget.parse(.{ .arch_os_abi = raw_triple }) catch unreachable;
comp.target = cross.toTarget(); // TODO deprecated
const target_query = try std.Target.Query.parse(.{ .arch_os_abi = raw_triple });
comp.target = try std.zig.system.resolveTargetQuery(target_query);
comp.langopts.setEmulatedCompiler(.gcc);
 
var driver: Driver = .{ .comp = &comp };
 
lib/compiler/aro/backend/Interner.zig added: 876, removed: 300, total 576
@@ -485,11 +485,11 @@ pub fn put(i: *Interner, gpa: Allocator, key: Key) !Ref {
.data = try i.addExtra(gpa, Tag.F64.pack(data)),
}),
.f80 => |data| i.items.appendAssumeCapacity(.{
.tag = .f64,
.tag = .f80,
.data = try i.addExtra(gpa, Tag.F80.pack(data)),
}),
.f128 => |data| i.items.appendAssumeCapacity(.{
.tag = .f64,
.tag = .f128,
.data = try i.addExtra(gpa, Tag.F128.pack(data)),
}),
},
 
lib/compiler/aro/backend/Ir.zig added: 876, removed: 300, total 576
@@ -649,7 +649,7 @@ fn writeValue(ir: Ir, val: Interner.Ref, config: std.io.tty.Config, w: anytype)
.float => |repr| switch (repr) {
inline else => |x| return w.print("{d}", .{@as(f64, @floatCast(x))}),
},
.bytes => |b| return std.zig.fmt.stringEscape(b, "", .{}, w),
.bytes => |b| return std.zig.stringEscape(b, "", .{}, w),
else => unreachable, // not a value
}
}