Fix usage of codepoints above U+100000

Should now properly work with 6-digit codepoints. Addresses #6
ThioJoe · Sep 26, 2024 · 9ccdd0c · 9ccdd0c
1 parent 27f0388
commit 9ccdd0c
Show file tree

Hide file tree

Showing 2 changed files with 67 additions and 11 deletions.
diff --git a/MainForm.cs b/MainForm.cs
@@ -16,7 +16,7 @@ namespace F_Key_Sender
 {
     public partial class MainForm : Form
     {
-        const string VERSION = "1.1.1";
+        const string VERSION = "1.1.2";
 
         // Dictionary to store virtual key codes and scan codes. Will want to use wscan codes for SendInput
         private static readonly Dictionary<string, (ushort vk, ushort scan)> keyCodes = new Dictionary<string, (ushort, ushort)>
@@ -836,10 +836,23 @@ private string PrepareUnicodeString(string rawInput)
             // Or if it's 6 characters and starts with a zero, remove the zero and return
             else if (inputNoSpaces.Length == 6 && inputNoSpaces.StartsWith("0"))
             {
-                return inputNoSpaces.Substring(1);
+                if (int.TryParse(inputNoSpaces.Substring(1), System.Globalization.NumberStyles.HexNumber, null, out int testCodePoint) && testCodePoint >= 0x00000 && testCodePoint <= 0xFFFFF)
+                {
+                    return inputNoSpaces.Substring(1);
+                }
+
+            }
+            // If it's 6 characters and doesn't start with a zero, check if it's a valid codepoint
+            else if (inputNoSpaces.Length == 6)
+            {
+                // Check if it's a valid codepoint and return if so.
+                if (int.TryParse(inputNoSpaces, System.Globalization.NumberStyles.HexNumber, null, out int testCodePoint) && testCodePoint >= 0x00000 && testCodePoint <= 0x10FFFF)
+                {
+                    return inputNoSpaces;
+                }
             }
 
-            // If input is longer than 5 characters, we can assume it's a zero-width joiner, so we must find a way to split each glyph and convert each codepoint to 5 characters
+            // If input is longer than 6 characters, we can assume it's a zero-width joiner, so we must find a way to split each glyph and convert each codepoint to 5 characters
             // Check if "U+" is present, because the beginning would have already been stripped, so if it's still present we can use it to split the string
             if (input.Contains("U+"))
             {
@@ -930,7 +943,26 @@ private ushort[] UnicodeToUShortArray(string input)
         {
             List<ushort> result = new List<ushort>();
 
-            int chunkSize = input.Length > 4 ? 5 : 4;
+            int chunkSize;
+
+            // We will default to 4 characters, but if it's 5 or 6, we will adjust the chunk size to allow for single larger code points
+            // More than 6 though will require splitting to surrogate pairs or ZWJ codepoints
+
+            // If length is exactly 6, assume top end code point and set chunk size to 6
+            if (input.Length == 6)
+            {
+                chunkSize = 6;
+            }
+            // If it's greater than 4 (but not 6 because we already checked that) set chunk size to 5
+            else if (input.Length >= 5)
+            {
+                chunkSize = 5;
+            }
+            // If it's less than 5 characters, or greater than 6, just set chunk size to 4. Any other cases must have the multiple codepoints in 4-character format
+            else
+            {
+                chunkSize = 4;
+            }
 
             // Process the input in chunks of 4 or 5 characters (4 for BMP, 5 for higher planes)
             for (int i = 0; i < input.Length; i += chunkSize)
@@ -944,9 +976,9 @@ private ushort[] UnicodeToUShortArray(string input)
                 }
 
                 // Convert the code point to UTF-16
-                string utf16String = char.ConvertFromUtf32(codePoint);
+                string utf16String = ConvertUTF32toUTF16(codePoint);
 
-                // Add each UTF-16 character to the result
+                // Add each UTF-16 character to the result. This will also split to array if it's a surrogate pair
                 result.AddRange(utf16String.Select(c => (ushort)c));
             }
 
@@ -955,6 +987,28 @@ private ushort[] UnicodeToUShortArray(string input)
             return finalArray;
         }
 
+        private static string ConvertUTF32toUTF16(int codePoint)
+        {
+            if (codePoint < 0 || codePoint > 0x10FFFF)
+            {
+                throw new ArgumentOutOfRangeException(nameof(codePoint), "Invalid Unicode code point");
+            }
+
+            if (codePoint <= 0xFFFF)
+            {
+                // BMP character
+                return ((char)codePoint).ToString();
+            }
+            else
+            {
+                // Supplementary character, needs surrogate pair
+                int adjusted = codePoint - 0x10000;
+                char highSurrogate = (char)((adjusted >> 10) + 0xD800);
+                char lowSurrogate = (char)((adjusted & 0x3FF) + 0xDC00);
+                return new string(new[] { highSurrogate, lowSurrogate });
+            }
+        }
+
         // Check if there are any duplicate Unicode code points in the array that are not zero-width joiners
         // Since the the keydown events are sent together, if any are sent twice, it will not print it twice
         private bool CheckDuplicateUnicodeCodepoints(ushort[] inputArray)
@@ -987,9 +1041,11 @@ private void buttonCustomInfo_Click(object sender, EventArgs e)
                 "    two bytes, starting with E0.\n\n" +
 
                 "For Unicode:\n" +
-                "    This should be a 4 or 5 character codepoint. If sending\n" +
-                "     a glyph that uses a zero-width joiner like some emojis,\n" +
-                "     all codepoints must be 5 characters or split by spaces or U+.\n\n" + 
+                "    This should be a 4, 5, or 6 character codepoint. If sending\n" +
+                "    a glyph that uses a zero-width joiner like some emojis,\n" +
+                "    all codepoints must be 5 characters or split by spaces or U+.\n\n" +
+                "    Note: If sending multiple characters above U+100000, you must split" +
+                "    each character into its surrogate pairs first.\n\n" + 
 
                 "-------------------- Examples --------------------\n\n" +
 

diff --git a/Properties/AssemblyInfo.cs b/Properties/AssemblyInfo.cs
@@ -32,5 +32,5 @@
 // You can specify all the values or you can default the Build and Revision Numbers
 // by using the '*' as shown below:
 // [assembly: AssemblyVersion("1.0.*")]
-[assembly: AssemblyVersion("1.1.1.0")]
-[assembly: AssemblyFileVersion("1.1.1.0")]
+[assembly: AssemblyVersion("1.1.2.0")]
+[assembly: AssemblyFileVersion("1.1.2.0")]