-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathparse.cs
92 lines (91 loc) · 3.98 KB
/
parse.cs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
using System.Collections.Generic;
using System.IO;
using System.Threading.Tasks;
namespace System.Text {
public static class Document {
public static void Scan(string search, string[] paths, Action<string, Action<string>> read, Action<string, IList<string>> doc) {
Scan(Environment.ProcessorCount, paths, search, read, doc);
}
public static void Scan(string[] paths, Action<string, Action<string>> read, Action<string, IList<string>> doc) {
Scan(Environment.ProcessorCount, paths, "*.*", read, doc);
}
public static void Scan(int MaxDegreeOfParallelism, string[] paths, string search, Action<string, Action<string>> read, Action<string, IList<string>> process) {
ISet<string> files = new HashSet<string>(StringComparer.OrdinalIgnoreCase);
foreach (var path in paths) {
FileAttributes attr = FileAttributes.Offline;
try {
attr = File.GetAttributes(path);
} catch (System.IO.FileNotFoundException) {
}
if ((attr & FileAttributes.Directory) == FileAttributes.Directory) {
foreach (var file in Directory.EnumerateFiles(path, search, SearchOption.AllDirectories)) {
if (files.Contains(file)) {
continue;
}
files.Add(file);
}
} else {
files.Add(path);
}
}
if (process != null) {
Parallel.ForEach(files, new ParallelOptions() { MaxDegreeOfParallelism = MaxDegreeOfParallelism }, (file) => {
process(file, Scan(file, read));
});
}
}
public static void Scan(string plain, Action<string, Action<string>> read, Action<string, IList<string>> doc) {
if (doc != null) {
doc(null, Scan(read, plain));
}
}
public static IList<string> Scan(string file, Action<string, Action<string>> read) { return Scan(read, File.ReadAllText(file)); }
public static IList<string> Scan(Action<string, Action<string>> read, string plain) {
bool IsPunctuation(char c) {
switch (c) {
case 'ʻ': return true;
}
return char.IsPunctuation(c);
}
IList<string> doc = new List<string>();
for (int i = 0; i < plain.Length;) {
if (char.IsWhiteSpace(plain[i]) || IsPunctuation(plain[i])) {
i++;
while (i < plain.Length
&& (char.IsWhiteSpace(plain[i]))) {
i++;
}
} else {
int start = i;
while (i < plain.Length
&& !(char.IsWhiteSpace(plain[i]))) {
i++;
}
int end = i;
while (start < end
&& (IsPunctuation(plain[start]) || !char.IsLetter(plain[start]))) {
start++;
}
while (end > start
&& (IsPunctuation(plain[end - 1]) || !char.IsLetter(plain[end - 1]))) {
end--;
}
int len = end - start;
if (len > 0) {
string s = plain.Substring(start, len);
if (read != null) {
read(s, (w) => {
if (!String.IsNullOrWhiteSpace(w)) {
doc.Add(w);
}
});
} else {
doc.Add(s);
}
}
}
}
return doc;
}
}
}