From 4ab7e453e26afc6e9f4938c65f89463fbba9e267 Mon Sep 17 00:00:00 2001
From: Albert Cervin <albert@acervin.com>
Date: Fri, 23 Aug 2024 17:07:27 +0200
Subject: Overhaul unicode parsing

It now instead iterates the actual unicode code points. This is better
than what it was previously doing but it is still not entirely correct
w.r.t to unicode sequences.

This handling of unicode code points does however make it slightly
easier to handle UTF-16 if needed in the future.

This also adds some long needed tests for buffer methods.
---
 src/dged/keyboard.c | 32 ++++++++++++++++++--------------
 1 file changed, 18 insertions(+), 14 deletions(-)

(limited to 'src/dged/keyboard.c')

diff --git a/src/dged/keyboard.c b/src/dged/keyboard.c
index 26eb308..04565e0 100644
--- a/src/dged/keyboard.c
+++ b/src/dged/keyboard.c
@@ -78,20 +78,24 @@ void parse_keys(uint8_t *bytes, uint32_t nbytes, struct key *out_keys,
       } else if (utf8_byte_is_unicode_continuation(b)) {
         // do nothing for these
       } else { // ascii char or unicode start byte (self-inserting)
-        uint32_t nb = utf8_byte_is_unicode_start(b)
-                          ? utf8_nbytes(bytes + bytei, nbytes - bytei, 1)
-                          : 1;
-
-        // "compress" number of keys if previous key was also a
-        // "simple" key
-        if (prev_kp != NULL && prev_kp->mod == None) {
-          prev_kp->end += nb;
-        } else {
-          kp->mod = None;
-          kp->key = b;
-          kp->start = bytei;
-          kp->end = bytei + nb;
-          ++nkps;
+        // TODO: do this better
+        struct utf8_codepoint_iterator iter =
+            create_utf8_codepoint_iterator(bytes + bytei, nbytes - bytei, 0);
+        struct codepoint *codepoint = utf8_next_codepoint(&iter);
+        if (codepoint != NULL) {
+          uint32_t nb = codepoint->nbytes;
+
+          // "compress" number of keys if previous key was also a
+          // "simple" key
+          if (prev_kp != NULL && prev_kp->mod == None) {
+            prev_kp->end += nb;
+          } else {
+            kp->mod = None;
+            kp->key = b;
+            kp->start = bytei;
+            kp->end = bytei + nb;
+            ++nkps;
+          }
         }
       }
     }
-- 
cgit v1.2.3