Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Typedbuffer and low level parsers #45

Open
wants to merge 23 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 21 commits
Commits
Show all changes
23 commits
Select commit Hold shift + click to select a range
f63fa58
added scoping mechanism to parsers
npenin Mar 16, 2021
4f76cea
code formatting
npenin Mar 18, 2021
ec9d71d
implemented strongly typed parsecontext
npenin Mar 20, 2021
795ed1e
Merge branch 'main' of https://github.com/sebastienros/parlot into sc…
Apr 30, 2021
0aaa2a6
Merge main branch
Apr 30, 2021
c691276
Merge branch 'main' of https://github.com/sebastienros/parlot into sc…
Apr 30, 2021
8cdf179
fixed compilation
npenin Apr 30, 2021
358143d
Fixing benchmarks compilation
sebastienros May 1, 2021
f18fd9a
removed interface to restore performances
npenin May 2, 2021
1d8c924
improved scoping usage
May 6, 2021
0eb62bd
Merge branch 'main' into scope2
sebastienros May 7, 2021
3d70885
typed scanner and cursors (to allow for byte parsing)
npenin May 8, 2021
42dafd4
fixed compilation after merge from main
npenin May 8, 2021
a2d11e2
Merge branch 'scope2' of https://github.com/npenin/parlot into typedb…
npenin May 9, 2021
00ceccb
fixed benchmark compilation
npenin May 9, 2021
be8c387
normalized tchar generics
npenin May 9, 2021
a0dafb3
simplified parsecontextwithscanner
npenin May 9, 2021
4787c8b
renamed TextSpan to bufferspan
npenin May 10, 2021
0ec9a68
renamed ParseContext<> to ScopeParseContext
npenin May 10, 2021
b253671
Merge branch 'scope2' of https://github.com/npenin/parlot into typedb…
npenin May 10, 2021
0e3d03f
merge from scope branch
npenin May 10, 2021
ceac2e6
improved compilation
npenin May 15, 2021
63fa49d
remove code duplication and improve number parsing
npenin Aug 13, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
138 changes: 138 additions & 0 deletions src/Parlot/BufferSpan.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
using System;

namespace Parlot
{
public readonly struct BufferSpan<T> : IEquatable<T[]>, IEquatable<BufferSpan<T>>
where T : IEquatable<T>
{
public BufferSpan(T[] buffer, int offset, int count)
{
Buffer = buffer;
Offset = offset;
Length = count;
}

#if SUPPORTS_READONLYSPAN
public BufferSpan(Span<T> buffer, int offset, int count)
{
Buffer = buffer.ToArray();
Offset = offset;
Length = count;
}
#endif
public BufferSpan(T[] buffer)
: this(buffer, 0, buffer?.Length ?? 0)
{
}

public T this[int i]
{
get { return Buffer[Offset + i]; }
}

public BufferSpan<T> SubBuffer(int start, int length)
{
return new(Buffer, start + Offset, length);
}

public readonly int Length;
public readonly int Offset;
public readonly T[] Buffer;

#if SUPPORTS_READONLYSPAN
public ReadOnlySpan<T> Span => Buffer == null ? ReadOnlySpan<T>.Empty : Buffer.AsSpan(Offset, Length);
#endif

public override string ToString()
{
if (typeof(T) == typeof(char))
{
if (Buffer == null)
return null;
return new string((char[])(object)Buffer, Offset, Length);
}
return base.ToString();
}

public bool Equals(T[] other)
{
if (other == null)
{
return Buffer == null;
}

#if NETSTANDARD2_0
if (Length != other.Length)
{
return false;
}

for (var i = 0; i < Length; i++)
{
if (!Buffer[Offset + i].Equals(other[i]))
{
return false;
}
}

return true;
#else
return Span.SequenceEqual(other);
#endif
}

public bool Equals(BufferSpan<T> other)
{
#if NETSTANDARD2_0
if (Length != other.Length)
{
return false;
}

for (var i = 0; i < Length; i++)
{
if (!Buffer[Offset + i].Equals(other.Buffer[other.Offset + i]))
{
return false;
}
}

return true;
#else
return Span.SequenceEqual(other.Span);
#endif
}

#if !NETSTANDARD2_0
public static implicit operator BufferSpan<T>(Span<T> s)
{
return new BufferSpan<T>(s, 0, s.Length);
}
public static implicit operator ReadOnlySpan<T>(BufferSpan<T> s)
{
return s.Span;
}
#endif
public static implicit operator BufferSpan<T>(T[] s)
{
return new BufferSpan<T>(s, 0, s.Length);
}

public int IndexOf(T startChar, int startOffset = 0, int end = -1)
{
// #if NETSTANDARD2_0
if (end == -1 || end > Length)
end = Length;
for (var i = startOffset; i < end; i++)
{
if (Buffer[Offset + i].Equals(startChar))
return i;
}

return -1;
// #else
// return Span.IndexOf(startChar, startOffset, end);
// #endif
}
}
}
88 changes: 41 additions & 47 deletions src/Parlot/Character.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ public static bool IsWhiteSpaceOrNewLine(char ch)
public static bool IsNewLine(char ch)
=> (ch == '\n') || (ch == '\r') || (ch == '\v');

public static char ScanHexEscape(string text, int index, out int length)
public static char ScanHexEscape(char[] text, int index, out int length)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why not ScanHexEscape(BufferSpan<char> text, int index, out int length) so it doesn't have to allocate a char[]

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

it will have to allocate a char[] as soon as you will have an escape

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

but here it has to allocate two of them, the argument, and the returned value.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

not really: ScanHexEscape just returns a char, so no allocation is done. And from the 2 places it is being called from, that's where the unescaping happens

{
var lastIndex = Math.Min(4 + index, text.Length - 1);
var code = 0;
Expand All @@ -68,75 +68,69 @@ public static char ScanHexEscape(string text, int index, out int length)
return (char)code;
}

public static TextSpan DecodeString(string s) => DecodeString(new TextSpan(s));
public static BufferSpan<char> DecodeString(string s) => DecodeString(s.ToCharArray());
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
public static BufferSpan<char> DecodeString(string s) => DecodeString(s.ToCharArray());
public static BufferSpan<char> DecodeString(string s) => DecodeString(new BufferSpan<char>(s));

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I thought so initially, but eventually, you need to build a char[] as you might be removing some characters if there are any escape to happen

Copy link
Author

@npenin npenin May 10, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

we may improve it in the case of Span support to avoid this ToCharArray call though


public static TextSpan DecodeString(TextSpan span)
public static BufferSpan<char> DecodeString(BufferSpan<char> span)
{
// Nothing to do if the string doesn't have any escape char
if (span.Buffer.IndexOf('\\', span.Offset, span.Length) == -1)
if (span.IndexOf('\\') == -1)
{
return span;
}

#if NETSTANDARD2_0
var result = CreateString(span.Length, span, static (chars, source) =>
#else
var result = String.Create(span.Length, span, static (chars, source) =>
#endif
{
// The asumption is that the new string will be shorter since escapes results are smaller than their source
var result = new char[span.Length];
// The asumption is that the new string will be shorter since escapes results are smaller than their source

var dataIndex = 0;
var buffer = span.Buffer;
var start = span.Offset;
var end = span.Offset + span.Length;

var dataIndex = 0;
var buffer = source.Buffer;
var start = source.Offset;
var end = source.Offset + source.Length;
for (var i = start; i < end; i++)
{
var c = buffer[i];

for (var i = start; i < end; i++)
if (c == '\\')
{
var c = buffer[i];
i++;
c = buffer[i];

if (c == '\\')
switch (c)
{
i++;
c = buffer[i];

switch (c)
{
case '\'': c = '\''; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case 'u':
c = Character.ScanHexEscape(buffer, i, out var length);
i += length;
break;
case 'x':
c = Character.ScanHexEscape(buffer, i, out length);
i += length;
break;
}
case '\'': c = '\''; break;
case '"': c = '\"'; break;
case '\\': c = '\\'; break;
case 'b': c = '\b'; break;
case 'f': c = '\f'; break;
case 'n': c = '\n'; break;
case 'r': c = '\r'; break;
case 't': c = '\t'; break;
case 'v': c = '\v'; break;
case 'u':
c = Character.ScanHexEscape(buffer, i, out var length);
i += length;
break;
case 'x':
c = Character.ScanHexEscape(buffer, i, out length);
i += length;
break;
}

chars[dataIndex++] = c;
}

chars[dataIndex++] = '\0';
});
result[dataIndex++] = c;
}

result[dataIndex++] = '\0';

for (var i = result.Length - 1; i >= 0; i--)
{
if (result[i] != '\0')
{
return new TextSpan(result, 0, i + 1);
return new BufferSpan<char>(result, 0, i + 1);
}
}

return new TextSpan(result);
return result;
}

private static int HexValue(char ch)
Expand Down
24 changes: 21 additions & 3 deletions src/Parlot/Compilation/CompilationContext.cs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
using Parlot.Fluent;
using System;
using System.Collections.Generic;
using System.Linq.Expressions;

namespace Parlot.Compilation
{

/// <summary>
/// Reprensents the context of a compilation phase, coordinating all the parsers involved.
/// </summary>
public class CompilationContext
public class CompilationContext<TParseContext>
where TParseContext : ParseContext
{
private int _number = 0;

Expand All @@ -18,7 +21,7 @@ public CompilationContext()
/// <summary>
/// Gets the expression containing the the <see cref="ParseContext"/> instance for the parser.
/// </summary>
public ParameterExpression ParseContext { get; } = Expression.Parameter(typeof(ParseContext));
public ParameterExpression ParseContext { get; } = Expression.Parameter(typeof(TParseContext));

/// <summary>
/// Gets or sets a counter used to generate unique variable names.
Expand All @@ -34,7 +37,7 @@ public CompilationContext()
/// Gets the list of global expressions to add the the final list of statements.
/// </summary>
public List<Expression> GlobalExpressions { get; } = new();

/// <summary>
/// Gets the list of shared lambda expressions representing intermediate parsers.
/// </summary>
Expand All @@ -51,5 +54,20 @@ public CompilationContext()
/// This is done to optimize compiled parser that are usually used for pattern matching only.
/// </remarks>
public bool DiscardResult { get; set; } = false;


public ParameterExpression DeclareValueVariable<T>(CompilationResult result)
{
return this.DeclareValueVariable<T, TParseContext>(result);
}
}

/// <summary>
/// Reprensents the context of a compilation phase, coordinating all the parsers involved.
/// </summary>
public class CompilationContext<TParseContext, TChar> : CompilationContext<TParseContext>
where TParseContext : ParseContextWithScanner<TChar>
where TChar : IEquatable<TChar>, IConvertible
{
}
}
12 changes: 7 additions & 5 deletions src/Parlot/Compilation/CompiledParser.cs
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,20 @@ public interface ICompiledParser
/// in order to expose is as as standard parser contract.
/// </summary>
/// <remarks>
/// This class is used in <see cref="Parser{T}.Compile"/>.
/// This class is used in <see cref="Parsers.Compile{T, TParseContext,TChar}"/>.
/// </remarks>
public class CompiledParser<T> : Parser<T>, ICompiledParser
public class CompiledParser<T, TParseContext, TChar> : Parser<T, TParseContext, TChar>, ICompiledParser
where TParseContext : ParseContextWithScanner<TChar>
where TChar : IEquatable<TChar>, IConvertible
{
private readonly Func<ParseContext, ValueTuple<bool, T>> _parse;
private readonly Func<TParseContext, ValueTuple<bool, T>> _parse;

public CompiledParser(Func<ParseContext, ValueTuple<bool, T>> parse)
public CompiledParser(Func<TParseContext, ValueTuple<bool, T>> parse)
{
_parse = parse ?? throw new ArgumentNullException(nameof(parse));
}

public override bool Parse(ParseContext context, ref ParseResult<T> result)
public override bool Parse(TParseContext context, ref ParseResult<T> result)
{
var start = context.Scanner.Cursor.Offset;
var parsed = _parse(context);
Expand Down
Loading