-
-
Notifications
You must be signed in to change notification settings - Fork 45
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Typedbuffer and low level parsers #45
base: main
Are you sure you want to change the base?
Changes from 21 commits
f63fa58
4f76cea
ec9d71d
795ed1e
0aaa2a6
c691276
8cdf179
358143d
f18fd9a
1d8c924
0eb62bd
3d70885
42dafd4
a2d11e2
00ceccb
be8c387
a0dafb3
4787c8b
0ec9a68
b253671
0e3d03f
ceac2e6
63fa49d
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
using System; | ||
|
||
namespace Parlot | ||
{ | ||
public readonly struct BufferSpan<T> : IEquatable<T[]>, IEquatable<BufferSpan<T>> | ||
where T : IEquatable<T> | ||
{ | ||
public BufferSpan(T[] buffer, int offset, int count) | ||
{ | ||
Buffer = buffer; | ||
Offset = offset; | ||
Length = count; | ||
} | ||
|
||
#if SUPPORTS_READONLYSPAN | ||
public BufferSpan(Span<T> buffer, int offset, int count) | ||
{ | ||
Buffer = buffer.ToArray(); | ||
Offset = offset; | ||
Length = count; | ||
} | ||
#endif | ||
public BufferSpan(T[] buffer) | ||
: this(buffer, 0, buffer?.Length ?? 0) | ||
{ | ||
} | ||
|
||
public T this[int i] | ||
{ | ||
get { return Buffer[Offset + i]; } | ||
} | ||
|
||
public BufferSpan<T> SubBuffer(int start, int length) | ||
{ | ||
return new(Buffer, start + Offset, length); | ||
} | ||
|
||
public readonly int Length; | ||
public readonly int Offset; | ||
public readonly T[] Buffer; | ||
|
||
#if SUPPORTS_READONLYSPAN | ||
public ReadOnlySpan<T> Span => Buffer == null ? ReadOnlySpan<T>.Empty : Buffer.AsSpan(Offset, Length); | ||
#endif | ||
|
||
public override string ToString() | ||
{ | ||
if (typeof(T) == typeof(char)) | ||
{ | ||
if (Buffer == null) | ||
return null; | ||
return new string((char[])(object)Buffer, Offset, Length); | ||
} | ||
return base.ToString(); | ||
} | ||
|
||
public bool Equals(T[] other) | ||
{ | ||
if (other == null) | ||
{ | ||
return Buffer == null; | ||
} | ||
|
||
#if NETSTANDARD2_0 | ||
if (Length != other.Length) | ||
{ | ||
return false; | ||
} | ||
|
||
for (var i = 0; i < Length; i++) | ||
{ | ||
if (!Buffer[Offset + i].Equals(other[i])) | ||
{ | ||
return false; | ||
} | ||
} | ||
|
||
return true; | ||
#else | ||
return Span.SequenceEqual(other); | ||
#endif | ||
} | ||
|
||
public bool Equals(BufferSpan<T> other) | ||
{ | ||
#if NETSTANDARD2_0 | ||
if (Length != other.Length) | ||
{ | ||
return false; | ||
} | ||
|
||
for (var i = 0; i < Length; i++) | ||
{ | ||
if (!Buffer[Offset + i].Equals(other.Buffer[other.Offset + i])) | ||
{ | ||
return false; | ||
} | ||
} | ||
|
||
return true; | ||
#else | ||
return Span.SequenceEqual(other.Span); | ||
#endif | ||
} | ||
|
||
#if !NETSTANDARD2_0 | ||
public static implicit operator BufferSpan<T>(Span<T> s) | ||
{ | ||
return new BufferSpan<T>(s, 0, s.Length); | ||
} | ||
public static implicit operator ReadOnlySpan<T>(BufferSpan<T> s) | ||
{ | ||
return s.Span; | ||
} | ||
#endif | ||
public static implicit operator BufferSpan<T>(T[] s) | ||
{ | ||
return new BufferSpan<T>(s, 0, s.Length); | ||
} | ||
|
||
public int IndexOf(T startChar, int startOffset = 0, int end = -1) | ||
{ | ||
// #if NETSTANDARD2_0 | ||
if (end == -1 || end > Length) | ||
end = Length; | ||
for (var i = startOffset; i < end; i++) | ||
{ | ||
if (Buffer[Offset + i].Equals(startChar)) | ||
return i; | ||
} | ||
|
||
return -1; | ||
// #else | ||
// return Span.IndexOf(startChar, startOffset, end); | ||
// #endif | ||
} | ||
} | ||
} |
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -45,7 +45,7 @@ public static bool IsWhiteSpaceOrNewLine(char ch) | |||||
public static bool IsNewLine(char ch) | ||||||
=> (ch == '\n') || (ch == '\r') || (ch == '\v'); | ||||||
|
||||||
public static char ScanHexEscape(string text, int index, out int length) | ||||||
public static char ScanHexEscape(char[] text, int index, out int length) | ||||||
{ | ||||||
var lastIndex = Math.Min(4 + index, text.Length - 1); | ||||||
var code = 0; | ||||||
|
@@ -68,75 +68,69 @@ public static char ScanHexEscape(string text, int index, out int length) | |||||
return (char)code; | ||||||
} | ||||||
|
||||||
public static TextSpan DecodeString(string s) => DecodeString(new TextSpan(s)); | ||||||
public static BufferSpan<char> DecodeString(string s) => DecodeString(s.ToCharArray()); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Suggested change
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I thought so initially, but eventually, you need to build a char[] as you might be removing some characters if there are any escape to happen There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we may improve it in the case of Span support to avoid this ToCharArray call though |
||||||
|
||||||
public static TextSpan DecodeString(TextSpan span) | ||||||
public static BufferSpan<char> DecodeString(BufferSpan<char> span) | ||||||
{ | ||||||
// Nothing to do if the string doesn't have any escape char | ||||||
if (span.Buffer.IndexOf('\\', span.Offset, span.Length) == -1) | ||||||
if (span.IndexOf('\\') == -1) | ||||||
{ | ||||||
return span; | ||||||
} | ||||||
|
||||||
#if NETSTANDARD2_0 | ||||||
var result = CreateString(span.Length, span, static (chars, source) => | ||||||
#else | ||||||
var result = String.Create(span.Length, span, static (chars, source) => | ||||||
#endif | ||||||
{ | ||||||
// The asumption is that the new string will be shorter since escapes results are smaller than their source | ||||||
var result = new char[span.Length]; | ||||||
// The asumption is that the new string will be shorter since escapes results are smaller than their source | ||||||
|
||||||
var dataIndex = 0; | ||||||
var buffer = span.Buffer; | ||||||
var start = span.Offset; | ||||||
var end = span.Offset + span.Length; | ||||||
|
||||||
var dataIndex = 0; | ||||||
var buffer = source.Buffer; | ||||||
var start = source.Offset; | ||||||
var end = source.Offset + source.Length; | ||||||
for (var i = start; i < end; i++) | ||||||
{ | ||||||
var c = buffer[i]; | ||||||
|
||||||
for (var i = start; i < end; i++) | ||||||
if (c == '\\') | ||||||
{ | ||||||
var c = buffer[i]; | ||||||
i++; | ||||||
c = buffer[i]; | ||||||
|
||||||
if (c == '\\') | ||||||
switch (c) | ||||||
{ | ||||||
i++; | ||||||
c = buffer[i]; | ||||||
|
||||||
switch (c) | ||||||
{ | ||||||
case '\'': c = '\''; break; | ||||||
case '"': c = '\"'; break; | ||||||
case '\\': c = '\\'; break; | ||||||
case 'b': c = '\b'; break; | ||||||
case 'f': c = '\f'; break; | ||||||
case 'n': c = '\n'; break; | ||||||
case 'r': c = '\r'; break; | ||||||
case 't': c = '\t'; break; | ||||||
case 'v': c = '\v'; break; | ||||||
case 'u': | ||||||
c = Character.ScanHexEscape(buffer, i, out var length); | ||||||
i += length; | ||||||
break; | ||||||
case 'x': | ||||||
c = Character.ScanHexEscape(buffer, i, out length); | ||||||
i += length; | ||||||
break; | ||||||
} | ||||||
case '\'': c = '\''; break; | ||||||
case '"': c = '\"'; break; | ||||||
case '\\': c = '\\'; break; | ||||||
case 'b': c = '\b'; break; | ||||||
case 'f': c = '\f'; break; | ||||||
case 'n': c = '\n'; break; | ||||||
case 'r': c = '\r'; break; | ||||||
case 't': c = '\t'; break; | ||||||
case 'v': c = '\v'; break; | ||||||
case 'u': | ||||||
c = Character.ScanHexEscape(buffer, i, out var length); | ||||||
i += length; | ||||||
break; | ||||||
case 'x': | ||||||
c = Character.ScanHexEscape(buffer, i, out length); | ||||||
i += length; | ||||||
break; | ||||||
} | ||||||
|
||||||
chars[dataIndex++] = c; | ||||||
} | ||||||
|
||||||
chars[dataIndex++] = '\0'; | ||||||
}); | ||||||
result[dataIndex++] = c; | ||||||
} | ||||||
|
||||||
result[dataIndex++] = '\0'; | ||||||
|
||||||
for (var i = result.Length - 1; i >= 0; i--) | ||||||
{ | ||||||
if (result[i] != '\0') | ||||||
{ | ||||||
return new TextSpan(result, 0, i + 1); | ||||||
return new BufferSpan<char>(result, 0, i + 1); | ||||||
} | ||||||
} | ||||||
|
||||||
return new TextSpan(result); | ||||||
return result; | ||||||
} | ||||||
|
||||||
private static int HexValue(char ch) | ||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
why not
ScanHexEscape(BufferSpan<char> text, int index, out int length)
so it doesn't have to allocate achar[]
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it will have to allocate a char[] as soon as you will have an escape
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
but here it has to allocate two of them, the argument, and the returned value.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
not really: ScanHexEscape just returns a char, so no allocation is done. And from the 2 places it is being called from, that's where the unescaping happens