Handle unicode chars when unescaping JSON

The python LSP used some nbsp unicode chars when sending back documentation.
author: Albert Cervin <albert@acervin.com> 2025-11-26 23:34:08 +0100
committer: Albert Cervin <albert@acervin.com> 2025-11-26 23:34:08 +0100
commit: f06d8923e86a2af70f9c97f8484dc9e645dcefdb (patch)
tree: a2fce0ab790dda09bfb33544622f65d7953fc898 /src/dged/utf8.c
parent: b8f2c54675cfd4c89c13941503fb23eda0ad082d (diff)
download: dged-f06d8923e86a2af70f9c97f8484dc9e645dcefdb.tar.gz
dged-f06d8923e86a2af70f9c97f8484dc9e645dcefdb.tar.xz
dged-f06d8923e86a2af70f9c97f8484dc9e645dcefdb.zip
1 files changed, 25 insertions, 1 deletions
diff --git a/src/dged/utf8.c b/src/dged/utf8.c
index b47f5fc..cc5a66e 100644
--- a/src/dged/utf8.c
+++ b/src/dged/utf8.c
@@ -36,7 +36,7 @@ static const uint8_t utf8d[] = {
 // clang-format on
 
 /*
- * emoji decoding algorithm from
+ * unicode decoding algorithm from
  * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
  */
 static enum utf8_state decode(enum utf8_state *state, uint32_t *codep,
@@ -141,3 +141,27 @@ uint32_t unicode_visual_char_width(const struct codepoint *codepoint) {
     return 0;
   }
 }
+
+size_t utf8_encode(uint32_t codepoint, uint8_t buf[4]) {
+  if (codepoint <= 0x7F) {
+    buf[0] = (uint8_t)codepoint & 0xff;
+    return 1;
+  } else if (codepoint <= 0x7FF) {
+    buf[0] = 0xC0 | (codepoint >> 6);
+    buf[1] = 0x80 | (codepoint & 0x3F);
+    return 2;
+  } else if (codepoint <= 0xFFFF) {
+    buf[0] = 0xE0 | (codepoint >> 12);
+    buf[1] = 0x80 | ((codepoint >> 6) & 0x3F);
+    buf[2] = 0x80 | (codepoint & 0x3F);
+    return 3;
+  } else if (codepoint <= 0x10FFFF) {
+    buf[0] = 0xF0 | (codepoint >> 18);
+    buf[1] = 0x80 | ((codepoint >> 12) & 0x3F);
+    buf[2] = 0x80 | ((codepoint >> 6) & 0x3F);
+    buf[3] = 0x80 | (codepoint & 0x3F);
+    return 4;
+  }
+
+  return 0;
+}
author	Albert Cervin <albert@acervin.com>	2025-11-26 23:34:08 +0100
committer	Albert Cervin <albert@acervin.com>	2025-11-26 23:34:08 +0100
commit	f06d8923e86a2af70f9c97f8484dc9e645dcefdb (patch)
tree	a2fce0ab790dda09bfb33544622f65d7953fc898 /src/dged/utf8.c
parent	b8f2c54675cfd4c89c13941503fb23eda0ad082d (diff)
download	dged-f06d8923e86a2af70f9c97f8484dc9e645dcefdb.tar.gz dged-f06d8923e86a2af70f9c97f8484dc9e645dcefdb.tar.xz dged-f06d8923e86a2af70f9c97f8484dc9e645dcefdb.zip