Skip to content

Commit

Permalink
Merge pull request #753 from MihaZupan/perf-nov23-3
Browse files Browse the repository at this point in the history
A few more perf improvements
  • Loading branch information
xoofx authored Nov 29, 2023
2 parents 40fb2b8 + f3aa7e7 commit feeb186
Show file tree
Hide file tree
Showing 7 changed files with 184 additions and 140 deletions.
30 changes: 11 additions & 19 deletions src/Markdig/Helpers/EntityHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

using System.Text;

namespace Markdig.Helpers;

/// <summary>
Expand All @@ -57,41 +59,31 @@ public static class EntityHelper
/// <returns>The unicode character set or <c>null</c> if the entity was not recognized.</returns>
public static string DecodeEntity(int utf32)
{
if (!CharHelper.IsInInclusiveRange(utf32, 1, 1114111) || CharHelper.IsInInclusiveRange(utf32, 55296, 57343))
if (utf32 == 0 || !UnicodeUtility.IsValidUnicodeScalar((uint)utf32))
return CharHelper.ReplacementCharString;

if (utf32 < 65536)
if (UnicodeUtility.IsBmpCodePoint((uint)utf32))
return char.ToString((char)utf32);

utf32 -= 65536;
return new string(
#if NETSTANDARD2_1_OR_GREATER || NETCOREAPP3_1_OR_GREATER
stackalloc
#else
new
#endif
char[]
{
(char)((uint)utf32 / 1024 + 55296),
(char)((uint)utf32 % 1024 + 56320)
});
UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar((uint)utf32, out char high, out char low);
return new string([high, low]);
}

internal static void DecodeEntity(int utf32, ref ValueStringBuilder sb)
{
if (!CharHelper.IsInInclusiveRange(utf32, 1, 1114111) || CharHelper.IsInInclusiveRange(utf32, 55296, 57343))
if (utf32 == 0 || !UnicodeUtility.IsValidUnicodeScalar((uint)utf32))
{
sb.Append(CharHelper.ReplacementChar);
}
else if (utf32 < 65536)
else if (UnicodeUtility.IsBmpCodePoint((uint)utf32))
{
sb.Append((char)utf32);
}
else
{
utf32 -= 65536;
sb.Append((char)((uint)utf32 / 1024 + 55296));
sb.Append((char)((uint)utf32 % 1024 + 56320));
UnicodeUtility.GetUtf16SurrogatesFromSupplementaryPlaneScalar((uint)utf32, out char high, out char low);
sb.Append(high);
sb.Append(low);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/Markdig/Helpers/ThrowHelper.cs
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@ public static void CheckDepthLimit(int depth, bool useLargeLimit = false)
if (depth > limit)
DepthLimitExceeded();

[MethodImpl(MethodImplOptions.NoInlining)]
[DoesNotReturn]
static void DepthLimitExceeded() => throw new ArgumentException("Markdown elements in the input are too deeply nested - depth limit exceeded. Input is most likely not sensible or is a very large table.");
}

Expand Down
30 changes: 30 additions & 0 deletions src/Markdig/Helpers/UnicodeUtility.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
// Copyright (c) Alexandre Mutel. All rights reserved.
// This file is licensed under the BSD-Clause 2 license.
// See the license.txt file in the project root for more information.

using System.Diagnostics;
using System.Runtime.CompilerServices;

namespace System.Text;

// Based on https://github.com/dotnet/runtime/blob/main/src/libraries/System.Private.CoreLib/src/System/Text/UnicodeUtility.cs
internal static class UnicodeUtility
{
[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsBmpCodePoint(uint value) => value <= 0xFFFFu;

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static bool IsValidUnicodeScalar(uint value)
{
return ((value - 0x110000u) ^ 0xD800u) >= 0xFFEF0800u;
}

[MethodImpl(MethodImplOptions.AggressiveInlining)]
public static void GetUtf16SurrogatesFromSupplementaryPlaneScalar(uint value, out char highSurrogateCodePoint, out char lowSurrogateCodePoint)
{
Debug.Assert(IsValidUnicodeScalar(value) && IsBmpCodePoint(value));

highSurrogateCodePoint = (char)((value + ((0xD800u - 0x40u) << 10)) >> 10);
lowSurrogateCodePoint = (char)((value & 0x3FFu) + 0xDC00u);
}
}
87 changes: 32 additions & 55 deletions src/Markdig/Parsers/FencedBlockParserBase.cs
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ public abstract class FencedBlockParserBase : BlockParser, IAttributesParseable
/// <seealso cref="BlockParser" />
public abstract class FencedBlockParserBase<T> : FencedBlockParserBase where T : Block, IFencedBlock
{
private static readonly TransformedStringCache _infoStringCache = new(static infoString => HtmlHelper.Unescape(infoString));
private static readonly TransformedStringCache s_infoStringCache = new(static infoString => HtmlHelper.Unescape(infoString));
private static readonly TransformedStringCache s_argumentsStringCache = new(static argumentsString => HtmlHelper.Unescape(argumentsString));
private TransformedStringCache? _infoPrefixCache;

/// <summary>
Expand Down Expand Up @@ -176,7 +177,7 @@ public static bool RoundtripInfoParser(BlockProcessor blockProcessor, ref String

end:
fenced.TriviaAfterFencedChar = afterFence;
fenced.Info = _infoStringCache.Get(info.AsSpan());
fenced.Info = s_infoStringCache.Get(info.AsSpan());
fenced.UnescapedInfo = info;
fenced.TriviaAfterInfo = afterInfo;
fenced.Arguments = HtmlHelper.Unescape(arg.ToString());
Expand All @@ -197,71 +198,47 @@ public static bool RoundtripInfoParser(BlockProcessor blockProcessor, ref String
/// <returns><c>true</c> if parsing of the line is successfull; <c>false</c> otherwise</returns>
public static bool DefaultInfoParser(BlockProcessor state, ref StringSlice line, IFencedBlock fenced, char openingCharacter)
{
// An info string cannot contain any backticks (unless it is a tilde block)
int firstSpace = -1;
if (openingCharacter == '`')
ReadOnlySpan<char> lineSpan = line.AsSpan();

if (!lineSpan.IsEmpty)
{
for (int i = line.Start; i <= line.End; i++)
if (openingCharacter == '`')
{
char c = line.Text[i];
if (c == '`')
{
return false;
}
firstSpace = lineSpan.IndexOfAny(' ', '\t', '`');

if (firstSpace < 0 && c.IsSpaceOrTab())
// An info string cannot contain any backticks (unless it is a tilde block)
if (firstSpace >= 0 && lineSpan.Slice(firstSpace).Contains('`'))
{
firstSpace = i;
return false;
}
}
}
else
{
for (int i = line.Start; i <= line.End; i++)
else
{
if (line.Text[i].IsSpaceOrTab())
{
firstSpace = i;
break;
}
firstSpace = lineSpan.IndexOfAny(' ', '\t');
}
}

StringSlice infoStringSlice;
string? argString = null;

if (firstSpace > 0)
if (firstSpace >= 0)
{
firstSpace += line.Start;
infoStringSlice = new StringSlice(line.Text, line.Start, firstSpace - 1);

// Skip any spaces after info string
firstSpace++;
while (firstSpace <= line.End)
{
char c = line[firstSpace];
if (c.IsSpaceOrTab())
{
firstSpace++;
}
else
{
break;
}
}

var argStringSlice = new StringSlice(line.Text, firstSpace, line.End);
argStringSlice.Trim();
argString = argStringSlice.ToString();
fenced.Arguments = s_argumentsStringCache.Get(argStringSlice.AsSpan());
}
else
{
infoStringSlice = line;
fenced.Arguments = string.Empty;
}

infoStringSlice.Trim();

fenced.Info = _infoStringCache.Get(infoStringSlice.AsSpan());
fenced.Arguments = HtmlHelper.Unescape(argString);
fenced.Info = s_infoStringCache.Get(infoStringSlice.AsSpan());

return true;
}
Expand Down Expand Up @@ -303,17 +280,19 @@ public override BlockState TryOpen(BlockProcessor processor)
// Try to parse any attached attributes
TryParseAttributes?.Invoke(processor, ref line, fenced);

// If the info parser was not successfull, early exit
// If the info parser was not successful, early exit
if (InfoParser != null && !InfoParser(processor, ref line, fenced, matchChar))
{
return BlockState.None;
}

// Add the language as an attribute by default
if (!string.IsNullOrEmpty(fenced.Info))
string? info = fenced.Info;

if (!string.IsNullOrEmpty(info))
{
Debug.Assert(_infoPrefixCache is not null || InfoPrefix is null);
string infoWithPrefix = _infoPrefixCache?.Get(fenced.Info!) ?? fenced.Info!;
string infoWithPrefix = _infoPrefixCache?.Get(info!) ?? info!;
fenced.GetAttributes().AddClass(infoWithPrefix);
}

Expand All @@ -329,34 +308,32 @@ public override BlockState TryOpen(BlockProcessor processor)
public override BlockState TryContinue(BlockProcessor processor, Block block)
{
var fence = (IFencedBlock)block;
var openingCount = fence.OpeningFencedCharCount;

// Match if we have a closing fence
var line = processor.Line;
var sourcePosition = processor.Start;
var closingCount = line.CountAndSkipChar(fence.FencedChar);
var diff = openingCount - closingCount;

char c = line.CurrentChar;
var lastFenceCharPosition = processor.Start + closingCount;

// If we have a closing fence, close it and discard the current line
// The line must contain only fence opening character followed only by whitespaces.
var startBeforeTrim = line.Start;
var endBeforeTrim = line.End;
var trimmed = line.TrimEnd();
if (diff <= 0 && !processor.IsCodeIndent && (c == '\0' || c.IsWhitespace()) && trimmed)

if (fence.OpeningFencedCharCount <= closingCount &&
!processor.IsCodeIndent &&
(c == '\0' || c.IsWhitespace()) &&
line.TrimEnd())
{
block.UpdateSpanEnd(startBeforeTrim - 1);

var fencedBlock = (IFencedBlock)block;
fencedBlock.ClosingFencedCharCount = closingCount;
fence.ClosingFencedCharCount = closingCount;

if (processor.TrackTrivia)
{
fencedBlock.NewLine = processor.Line.NewLine;
fencedBlock.TriviaBeforeClosingFence = processor.UseTrivia(sourcePosition - 1);
fencedBlock.TriviaAfter = new StringSlice(processor.Line.Text, lastFenceCharPosition, endBeforeTrim);
fence.NewLine = line.NewLine;
fence.TriviaBeforeClosingFence = processor.UseTrivia(sourcePosition - 1);
fence.TriviaAfter = new StringSlice(line.Text, processor.Start + closingCount, processor.Line.End);
}

// Don't keep the last line
Expand Down
66 changes: 38 additions & 28 deletions src/Markdig/Parsers/MarkdownParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -53,34 +53,11 @@ public static MarkdownDocument Parse(string text, MarkdownPipeline? pipeline = n
{
blockProcessor.Open(document);

ProcessBlocks(blockProcessor, new LineReader(text));
ProcessBlocks(blockProcessor, text);

if (pipeline.TrackTrivia)
{
Block? lastBlock = blockProcessor.LastBlock;
if (lastBlock is null && document.Count == 0)
{
// this means we have unassigned characters
var noBlocksFoundBlock = new EmptyBlock(null);
List<StringSlice> linesBefore = blockProcessor.UseLinesBefore();
noBlocksFoundBlock.LinesAfter = new List<StringSlice>();
if (linesBefore != null)
{
noBlocksFoundBlock.LinesAfter.AddRange(linesBefore);
}

document.Add(noBlocksFoundBlock);
}
else if (lastBlock != null && blockProcessor.LinesBefore != null)
{
// this means we're out of lines, but still have unassigned empty lines.
// thus, we'll assign the empty unsassigned lines to the last block
// of the document.
var rootMostContainerBlock = Block.FindRootMostContainerParent(lastBlock);
rootMostContainerBlock.LinesAfter ??= new List<StringSlice>();
var linesBefore = blockProcessor.UseLinesBefore();
rootMostContainerBlock.LinesAfter.AddRange(linesBefore);
}
ProcessBlocksTrivia(blockProcessor, document);
}

// At this point the LineIndex is the same as the number of lines in the document
Expand Down Expand Up @@ -117,12 +94,15 @@ private static string FixupZero(string text)
return text.Replace('\0', CharHelper.ReplacementChar);
}

private static void ProcessBlocks(BlockProcessor blockProcessor, LineReader lineReader)
[MethodImpl(MethodImplOptions.NoInlining)]
private static void ProcessBlocks(BlockProcessor blockProcessor, string text)
{
var lineReader = new LineReader(text);

while (true)
{
// Get the precise position of the begining of the line
var lineText = lineReader.ReadLine();
// Get the precise position of the beginning of the line
StringSlice lineText = lineReader.ReadLine();

// If this is the end of file and the last line is empty
if (lineText.Text is null)
Expand All @@ -132,9 +112,39 @@ private static void ProcessBlocks(BlockProcessor blockProcessor, LineReader line

blockProcessor.ProcessLine(lineText);
}

blockProcessor.CloseAll(true);
}

private static void ProcessBlocksTrivia(BlockProcessor blockProcessor, MarkdownDocument document)
{
Block? lastBlock = blockProcessor.LastBlock;
if (lastBlock is null && document.Count == 0)
{
// this means we have unassigned characters
var noBlocksFoundBlock = new EmptyBlock(null);
List<StringSlice> linesBefore = blockProcessor.UseLinesBefore();
noBlocksFoundBlock.LinesAfter = [];
if (linesBefore != null)
{
noBlocksFoundBlock.LinesAfter.AddRange(linesBefore);
}

document.Add(noBlocksFoundBlock);
}
else if (lastBlock != null && blockProcessor.LinesBefore != null)
{
// this means we're out of lines, but still have unassigned empty lines.
// thus, we'll assign the empty unsassigned lines to the last block
// of the document.
var rootMostContainerBlock = Block.FindRootMostContainerParent(lastBlock);
rootMostContainerBlock.LinesAfter ??= [];
var linesBefore = blockProcessor.UseLinesBefore();
rootMostContainerBlock.LinesAfter.AddRange(linesBefore);
}
}

[MethodImpl(MethodImplOptions.NoInlining)]
private static void ProcessInlines(InlineProcessor inlineProcessor, MarkdownDocument document)
{
// "stackless" processor
Expand Down
7 changes: 7 additions & 0 deletions src/Markdig/Polyfills/IndexOfHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,13 @@ public static int IndexOfAny(this ReadOnlySpan<char> span, string values)
return -1;
}
#endif

#if !NET6_0_OR_GREATER
public static bool Contains<T>(this ReadOnlySpan<T> span, T value) where T : IEquatable<T>
{
return span.IndexOf(value) >= 0;
}
#endif
}

#endif
Loading

0 comments on commit feeb186

Please sign in to comment.