From 4b420251f6d06ed22d120a623d7c279683becb89 Mon Sep 17 00:00:00 2001 From: 7702244 <56172899+7702244@users.noreply.github.com> Date: Wed, 26 Jun 2024 01:45:22 +0300 Subject: [PATCH] Update TextHelpers.cs --- src/MAOToolkit/Utilities/TextHelpers.cs | 143 +++++++++++++++--------- 1 file changed, 92 insertions(+), 51 deletions(-) diff --git a/src/MAOToolkit/Utilities/TextHelpers.cs b/src/MAOToolkit/Utilities/TextHelpers.cs index 8b3fb5b..83268f2 100644 --- a/src/MAOToolkit/Utilities/TextHelpers.cs +++ b/src/MAOToolkit/Utilities/TextHelpers.cs @@ -58,14 +58,14 @@ public static string SubstringBetween(string? str, string start, string end) throw new ArgumentException("String can't be null or empty.", nameof(end)); } - int startIndex = str.IndexOf(start); + int startIndex = str.IndexOf(start, StringComparison.OrdinalIgnoreCase); // Check if the start string was found. - if (startIndex > -1) + if (startIndex >= 0) { startIndex += start.Length; - int endIndex = str.IndexOf(end, startIndex); + int endIndex = str.IndexOf(end, startIndex, StringComparison.OrdinalIgnoreCase); if (endIndex > startIndex) { return str.Substring(startIndex, endIndex - startIndex); @@ -112,7 +112,7 @@ public static string GetDeclension(int number, string nominativ, string genetiv, /// public static string BytesToString(long byteCount) { - string[] suffix = { "B", "KB", "MB", "GB", "TB", "PB", "EB" }; //Longs run out around EB + string[] suffix = ["B", "KB", "MB", "GB", "TB", "PB", "EB"]; //Longs run out around EB if (byteCount == 0) return "0 " + suffix[0]; @@ -120,7 +120,7 @@ public static string BytesToString(long byteCount) long bytes = Math.Abs(byteCount); int place = Convert.ToInt32(Math.Floor(Math.Log(bytes, 1024))); double num = Math.Round(bytes / Math.Pow(1024, place), 1); - return (Math.Sign(byteCount) * num).ToString() + " " + suffix[place]; + return (Math.Sign(byteCount) * num) + " " + suffix[place]; } /// @@ -151,15 +151,12 @@ public static string CropWholeWords(string? str, int length, HashSet? nonW throw new ArgumentException("Negative values not allowed.", nameof(length)); } - if (nonWordCharacters is null) - { - nonWordCharacters = new HashSet { ',', '.', ':', ';' }; - } - if (length >= str.Length) { return str; } + + nonWordCharacters ??= [',', '.', ':', ';']; int end = length; @@ -190,39 +187,87 @@ public static string CropWholeWords(string? str, int length, HashSet? nonW end = length; } - return String.Concat(str.AsSpan(0, end), "..."); + return String.Concat(str.AsSpan(0, end), "…"); } - + /// - /// Get the first several words from the summary and truncates long words. + /// Gets the maximum number of first words from the text, truncates long words and adds "…" to the end if there was a truncation. /// /// Source string. - /// Maximum number of words in result string. 0 - without limit. + /// Maximum number of words in result string. 0 - without limit. /// Maximum length of each word in result string. 0 - without limit. - public static string TruncateText(string? str, int numberWords, int maxWordLength = 0) + /// The string that will be added when text or a word is truncated. Defaults: "…". + /// Additional word delimiters. + /// In the output line: line translations and all additional word delimiters remain as in the original text. + public static string TruncateText( + string? str, + int maxWords = 0, + int maxWordLength = 0, + string truncateWith = "…", + string additionalDelimiters = ",.;:!?()[]{}") { if (String.IsNullOrEmpty(str)) { return String.Empty; } + + return new string(IterateChars().ToArray()); - // You cannot do Trim for words, because this will clean up the translations of the lines. - string[] words = str.Split(' ', StringSplitOptions.RemoveEmptyEntries); + bool IsSeparator(char c) => Char.IsSeparator(c) || additionalDelimiters.Contains(c); - for (int i = 0; i < words.Length; i++) + IEnumerable IterateChars() { - if (numberWords > 0 && i == numberWords) - { - break; - } + yield return str[0]; + + int words = 1; + int wordLength = 0; - if (maxWordLength > 0 && words[i].Length > maxWordLength) + for (int i = 1; i < str.Length; i++) { - words[i] = String.Concat(words[i].AsSpan(0, maxWordLength), "..."); + if (IsSeparator(str[i]) && !IsSeparator(str[i - 1])) + { + if (words == maxWords) + { + // Avoid three dots when abbreviating words and shortening the line. + if (maxWordLength > 0 && wordLength >= maxWordLength) + { + break; + } + + if (!String.IsNullOrEmpty(truncateWith)) + { + foreach (char c in truncateWith) + yield return c; + } + + break; + } + + words++; + wordLength = 0; + } + else + { + wordLength++; + } + + if (maxWordLength > 0 && wordLength >= maxWordLength) + { + if (wordLength == maxWordLength) + { + if (!String.IsNullOrEmpty(truncateWith)) + { + foreach (char c in truncateWith) + yield return c; + } + } + } + else + { + yield return str[i]; + } } } - - return String.Join(' ', words); } /// @@ -234,28 +279,27 @@ public static string TruncateText(string? str, int numberWords, int maxWordLengt /// The maximum number of occurrences URL links. 0 - do not check. public static bool IsSpam(string? str, IEnumerable spamWords, int spamWordsScore = 3, int urlWordsScore = 1) { - if (!String.IsNullOrWhiteSpace(str)) + if (String.IsNullOrWhiteSpace(str)) { - // If the links are more than urlWordsScore, it is spam. - if (urlWordsScore > 0 && RegularExpressions.URL.Matches(str).Count >= urlWordsScore) - { - return true; - } - - // If spamWords is greater than spamWordsScore, it is spam. - if (spamWordsScore > 0 && spamWords.Any()) - { - int score = Words.Matches(str) - .Count(match => spamWords.Any(word => match.Value.Contains(word, StringComparison.OrdinalIgnoreCase))); - - if (score >= spamWordsScore) - { - return true; - } - } + return false; + } + + // If the links are more than urlWordsScore, it is spam. + if (urlWordsScore > 0 && RegularExpressions.URL.Matches(str).Count >= urlWordsScore) + { + return true; + } + + if (spamWordsScore <= 0 || !spamWords.Any()) + { + return false; } + + // If spamWords is greater than spamWordsScore, it is spam. + int score = Words.Matches(str) + .Count(match => spamWords.Any(word => match.Value.Contains(word, StringComparison.OrdinalIgnoreCase))); - return false; + return score >= spamWordsScore; } public static bool IsValidEmail(string email) @@ -280,11 +324,11 @@ public static string CanonizeEmailString(string? str, bool removeInvalid = false try { - var emails = new HashSet(str.Split(new char[] { ';', ',', '/', '\\', '|', '&' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)); + var emails = new HashSet(str.Split(new[] { ';', ',', '/', '\\', '|', '&' }, StringSplitOptions.RemoveEmptyEntries | StringSplitOptions.TrimEntries)); if (removeInvalid) { - emails.RemoveWhere(x => IsValidEmail(x)); + emails.RemoveWhere(x => !IsValidEmail(x)); } return String.Join(',', emails); @@ -298,10 +342,7 @@ public static string CanonizeEmailString(string? str, bool removeInvalid = false /// /// Returns a copy of string converted to HTML markup. /// - public static string ToHtml(string? s) - { - return ToHtml(s, false); - } + public static string ToHtml(string? s) => ToHtml(s, false); /// /// Returns a copy of string converted to HTML markup.