srctree

Andrew Kelley parent 32b4d856 ad34ed5a abadad46
Merge pull request #19402 from ianprime0509/markdown-autolinks

Autodoc: hyperlink URLs in text

inline split

lib/docs/wasm/markdown.zig added: 225, removed: 5, total 220

@@ -75,6 +75,17 @@

//! content. `target` may contain `\`-escaped characters and balanced

//! parentheses.

//!

//! - **Autolink** - an abbreviated link, of the format `<target>`, where

//! `target` serves as both the link target and text. `target` may not

//! contain spaces or `<`, and any `\` in it are interpreted literally (not as

//! escapes). `target` is expected to be an absolute URI: an autolink will not

//! be recognized unless `target` starts with a URI scheme followed by a `:`.

//!

//! For convenience, autolinks may also be recognized in plain text without

//! any `<>` delimiters. Such autolinks are restricted to start with `http://`

//! or `https://` followed by at least one other character, not including any

//! trailing punctuation after the link.

//!

//! - **Image** - a link directly preceded by a `!`. The link text is

//! interpreted as the alt text of the image.

//!

@@ -710,6 +721,50 @@ test "links" {

);

}

test "autolinks" {

try testRender(

\\<https://example.com>

\\**This is important: <https://example.com/strong>**

\\<https://example.com?query=abc.123#page(parens)>

\\<placeholder>

\\<data:>

\\1 < 2

\\4 > 3

\\Unclosed: <

\\<a href="https://example.com">https://example.com</a>

\\This is important: <a href="https://example.com/strong">https://example.com/strong</a>

\\<a href="https://example.com?query=abc.123#page(parens)">https://example.com?query=abc.123#page(parens)</a>

\\<placeholder>

\\<a href="data:">data:</a>

\\1 < 2

\\4 > 3

\\Unclosed: <

);

}

test "text autolinks" {

try testRender(

\\Text autolinks must start with http:// or https://.

\\This doesn't count: ftp://example.com.

\\Example: https://ziglang.org.

\\Here is an important link: **http://example.com**

\\(Links may be in parentheses: https://example.com/?q=(parens))

\\Escaping a link so it's plain text: https\://example.com

\\Text autolinks must start with http:// or https://.

\\This doesn't count: ftp://example.com.

\\Example: <a href="https://ziglang.org">https://ziglang.org</a>.

\\Here is an important link: <a href="http://example.com">http://example.com</a>

\\(Links may be in parentheses: <a href="https://example.com/?q=(parens)">https://example.com/?q=(parens)</a>)

\\Escaping a link so it's plain text: https://example.com

);

}

test "images" {

try testRender(

\\![Alt text](https://example.com/image.png)

lib/docs/wasm/markdown/Document.zig added: 225, removed: 5, total 220

@@ -51,6 +51,8 @@ pub const Node = struct {

// Inlines

/// Data is `link`.

link,

/// Data is `text`.

autolink,

/// Data is `link`.

image,

/// Data is `container`.

lib/docs/wasm/markdown/Parser.zig added: 225, removed: 5, total 220

@@ -985,8 +985,12 @@ const InlineParser = struct {

ip.pos += 1;

']' => try ip.parseLink(),

'<' => try ip.parseAutolink(),

'*', '_' => try ip.parseEmphasis(),

'`' => try ip.parseCodeSpan(),

'h' => if (ip.pos == 0 or isPreTextAutolink(ip.content[ip.pos - 1])) {

try ip.parseTextAutolink();

else => {},

}

@@ -1076,6 +1080,161 @@ const InlineParser = struct {

return @enumFromInt(string_top);

}

/// Parses an autolink, starting at the opening `<`. `ip.pos` is left at the

/// closing `>`, or remains unchanged at the opening `<` if there is none.

fn parseAutolink(ip: *InlineParser) !void {

const start = ip.pos;

ip.pos += 1;

var state: enum {

start,

scheme,

target,

} = .start;

while (ip.pos < ip.content.len) : (ip.pos += 1) {

switch (state) {

.start => switch (ip.content[ip.pos]) {

'A'...'Z', 'a'...'z' => state = .scheme,

else => break,

.scheme => switch (ip.content[ip.pos]) {

'A'...'Z', 'a'...'z', '0'...'9', '+', '.', '-' => {},

':' => state = .target,

else => break,

.target => switch (ip.content[ip.pos]) {

'<', ' ', '\t', '\n' => break, // Not allowed in autolinks

'>' => {

// Backslash escapes are not recognized in autolink targets.

const target = try ip.parent.addString(ip.content[start + 1 .. ip.pos]);

const node = try ip.parent.addNode(.{

.tag = .autolink,

.data = .{ .text = .{

.content = target,

} },

});

try ip.completed_inlines.append(ip.parent.allocator, .{

.node = node,

.start = start,

.len = ip.pos - start + 1,

});

return;

else => {},

}

ip.pos = start;

}

/// Parses a plain text autolink (not delimited by `<>`), starting at the

/// first character in the link (an `h`). `ip.pos` is left at the last

/// character of the link, or remains unchanged if there is no valid link.

fn parseTextAutolink(ip: *InlineParser) !void {

const start = ip.pos;

var state: union(enum) {

/// Inside `http`. Contains the rest of the text to be matched.

http: []const u8,

after_http,

after_https,

/// Inside `://`. Contains the rest of the text to be matched.

authority: []const u8,

/// Inside link content.

content: struct {

start: usize,

paren_nesting: usize,

} = .{ .http = "http" };

while (ip.pos < ip.content.len) : (ip.pos += 1) {

switch (state) {

.http => |rest| {

if (ip.content[ip.pos] != rest[0]) break;

if (rest.len > 1) {

state = .{ .http = rest[1..] };

} else {

state = .after_http;

}

.after_http => switch (ip.content[ip.pos]) {

's' => state = .after_https,

':' => state = .{ .authority = "//" },

else => break,

.after_https => switch (ip.content[ip.pos]) {

':' => state = .{ .authority = "//" },

else => break,

.authority => |rest| {

if (ip.content[ip.pos] != rest[0]) break;

if (rest.len > 1) {

state = .{ .authority = rest[1..] };

} else {

state = .{ .content = .{

.start = ip.pos + 1,

.paren_nesting = 0,

} };

}

.content => |*content| switch (ip.content[ip.pos]) {

' ', '\t', '\n' => break,

'(' => content.paren_nesting += 1,

')' => if (content.paren_nesting == 0) {

break;

} else {

content.paren_nesting -= 1;

else => {},

}

switch (state) {

.http, .after_http, .after_https, .authority => {

ip.pos = start;

.content => |content| {

while (ip.pos > content.start and isPostTextAutolink(ip.content[ip.pos - 1])) {

ip.pos -= 1;

}

if (ip.pos == content.start) {

ip.pos = start;

return;

}

const target = try ip.parent.addString(ip.content[start..ip.pos]);

const node = try ip.parent.addNode(.{

.tag = .autolink,

.data = .{ .text = .{

.content = target,

} },

});

try ip.completed_inlines.append(ip.parent.allocator, .{

.node = node,

.start = start,

.len = ip.pos - start,

});

ip.pos -= 1;

}

/// Returns whether `c` may appear before a text autolink is recognized.

fn isPreTextAutolink(c: u8) bool {

return switch (c) {

' ', '\t', '\n', '*', '_', '(' => true,

else => false,

};

}

/// Returns whether `c` is punctuation that may appear after a text autolink

/// and not be considered part of it.

fn isPostTextAutolink(c: u8) bool {

return switch (c) {

'?', '!', '.', ',', ':', '*', '_' => true,

else => false,

};

}

/// Parses emphasis, starting at the beginning of a run of `*` or `_`

/// characters. `ip.pos` is left at the last character in the run after

/// parsing.

lib/docs/wasm/markdown/renderer.zig added: 225, removed: 5, total 220

@@ -140,6 +140,10 @@ pub fn Renderer(comptime Writer: type, comptime Context: type) type {

}

try writer.writeAll("</a>");

.autolink => {

const target = doc.string(data.text.content);

try writer.print("<a href=\"{0}\">{0}</a>", .{fmtHtml(target)});

.image => {

const target = doc.string(data.link.target);

try writer.print("<img src=\"{}\" alt=\"", .{fmtHtml(target)});

@@ -215,7 +219,7 @@ pub fn renderInlineNodeText(

try renderInlineNodeText(doc, child, writer);

}

.code_span, .text => {

.autolink, .code_span, .text => {

const content = doc.string(data.text.content);

try writer.print("{}", .{fmtHtml(content)});