Skip to content

Commit

Permalink
Initial support for GitHub extended autolink syntax.
Browse files Browse the repository at this point in the history
  • Loading branch information
mkende committed Mar 23, 2024
1 parent 902a81c commit 5fe8761
Show file tree
Hide file tree
Showing 4 changed files with 109 additions and 16 deletions.
4 changes: 3 additions & 1 deletion .aspelldict
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
personal_ws-1.1 en 164
personal_ws-1.1 en 166
CDATA
CommonMark
CounterClockwiseContourIntegral
Expand Down Expand Up @@ -160,6 +160,8 @@ utf
wantarray
wget
whitespace
www
wwww
xfffd
xmp
zA
54 changes: 54 additions & 0 deletions lib/Markdown/Perl/Inlines.pm
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,10 @@ sub render {

process_styles($that, $tree);

if ($that->get_use_extended_autolinks) {
$tree->map(sub { create_autolinks($that, $_) });
}

# At this point we have added the emphasis, strong emphasis, etc. in the tree.

$tree->apply(
Expand Down Expand Up @@ -713,4 +717,54 @@ sub delim_characters {
return join('', uniq @c);
}

sub create_autolinks {
my ($that, $n) = @_;
if ($n->{type} ne 'text') {
return $n;
}

my @nodes;

# TODO: technically we should forbid the presence of _ in the last two parts
# of the domain, according to the gfm spec.
## no critic (ProhibitComplexRegexes)
while (
$n->{content} =~ m/
(?<prefix> ^ | [ \t\n*_~\(] ) # The link must start after a whitespace or some specific delimiters.
(?<url>
(?: (?<scheme>https?:\/\/) | www\. ) # It must start by a scheme or the string wwww.
[-_a-zA-Z0-9]+ (?: \. [-_a-zA-Z0-9]+ )* # Then there must be something that looks like a domain
(?: \/ [^ \t\n<]*? )? # Some characters are forbidden in the link.
)
[?!.,:*_~]* (?: [ \t\n<] | $) # We remove some punctuation from the end of the link.
/x
## use critic
) {
my $url = $+{url};
my $match_start = $LAST_MATCH_START[0] + length($LAST_PAREN_MATCH{prefix});
my $match_end = $match_start + length($url);
my $has_scheme = exists $LAST_PAREN_MATCH{scheme};
if ($url =~ m/\)+$/) {
my $nb_final_closing_parens = $LAST_MATCH_END[0] - $LAST_MATCH_START[0];
my $open = 0;
() = $url =~ m/ \( (?{$open++}) | \) (?{$open--}) /gx;
my $remove = min($nb_final_closing_parens, -$open);
if ($remove > 0) {
$match_end -= $remove;
substr $url, -$remove, $remove, '';
}
}
# TODO: handle an HTML entity at the end of the link.
if ($match_start > 0) {
push @nodes, new_text(substr $n->{content}, 0, $match_start);
}
my $scheme = $has_scheme ? '' : $that->get_default_extended_autolinks_scheme.'://';
push @nodes,
new_link($url, type => 'autolink', target => $scheme.$url, debug => 'extended autolink');
$n = new_text(substr $n->{content}, $match_end);
}
push @nodes, $n if length($n->{content}) > 0;
return @nodes;
}

1;
50 changes: 39 additions & 11 deletions lib/Markdown/Perl/Options.pm
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,14 @@ sub _regex {
};
}

sub _word_list {
return sub {
my @a = ref $_[0] eq 'ARRAY' ? @{$_[0]} : split(/,/, $_[0]);
# TODO: validate the values of a.
return \@a;
};
}

=pod
=head2 B<use_fenced_code_blocks> I<(boolean, default: true)>
Expand Down Expand Up @@ -477,19 +485,39 @@ they will be deactivated in the output.
=cut

# TODO: this is just a "word list" for now, see if this can be shared with other
# options.
sub _tag_list {
return sub {
my @a = ref $_[0] eq 'ARRAY' ? @{$_[0]} : split(/,/, $_[0]);
# TODO: validate the values of a.
return \@a;
};
}

_make_option(
disallowed_html_tags => [],
_tag_list,
_word_list,
github => [qw(title textarea style xmp iframe noembed noframes script plaintext)]);

=pod
=head2 B<use_extended_autolinks> I<(boolean, default: true)>
Allow some links to be recognised when they appear in plain text. These links
must start by C<http://>, C<https://>, or C<www.>.
=cut

_make_option(
use_extended_autolinks => 1,
_boolean, (
markdown => 0,
cmark => 0
));

=pod
=head2 B<default_extended_autolinks_scheme> I<(enum, default: https)>
Specify which scheme is added to the beginning of extended autolinks when none
was present initially.
=cut

_make_option(
default_extended_autolinks_scheme => 'https',
_enum(qw(http https)),
github => 'http');

1;
17 changes: 13 additions & 4 deletions t/901-github-test-suite.t
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,19 @@ use Test2::V0;

# TODO: remove these todos.
my %opt = (todo => [198 .. 202, 204, 205, 279, 280, 398, 426, 434 .. 436,
473 .. 475, 477, 621 .. 631, 652],
# The spec says that some HTML tags are forbidden in the output, but
# they still have examples with these tags.
bugs => [140 .. 142, 145, 147],
473 .. 475, 477, 626, 628 .. 631, 652],
# These are bugs in the GitHub spec, not in our implementation. All
# of these have been tested to be buggy in the real cmark-gfm
# implementation.
bugs => [
# The spec says that some HTML tags are forbidden in the output, but
# they still have examples with these tags.
140 .. 142, 145, 147,
# Some things that are not cmark autolinks are matched by the
# extended autolinks syntax (but the cmark part of the spec is not
# updated).
616, 619,
],
json_file => "${FindBin::Bin}/data/github.tests.json",
test_url => 'https://github.github.com/gfm/#example-%d',
spec_tool => "${FindBin::Bin}/../third_party/commonmark-spec/test/spec_tests.py",
Expand Down

0 comments on commit 5fe8761

Please sign in to comment.