diff options
| author | Albert Cervin <albert@acervin.com> | 2025-11-26 23:34:08 +0100 |
|---|---|---|
| committer | Albert Cervin <albert@acervin.com> | 2025-11-26 23:34:08 +0100 |
| commit | f06d8923e86a2af70f9c97f8484dc9e645dcefdb (patch) | |
| tree | a2fce0ab790dda09bfb33544622f65d7953fc898 /src/dged/utf8.c | |
| parent | b8f2c54675cfd4c89c13941503fb23eda0ad082d (diff) | |
| download | dged-f06d8923e86a2af70f9c97f8484dc9e645dcefdb.tar.gz dged-f06d8923e86a2af70f9c97f8484dc9e645dcefdb.tar.xz dged-f06d8923e86a2af70f9c97f8484dc9e645dcefdb.zip | |
Handle unicode chars when unescaping JSON
The python LSP used some nbsp unicode chars when sending back
documentation.
Diffstat (limited to 'src/dged/utf8.c')
| -rw-r--r-- | src/dged/utf8.c | 26 |
1 files changed, 25 insertions, 1 deletions
diff --git a/src/dged/utf8.c b/src/dged/utf8.c index b47f5fc..cc5a66e 100644 --- a/src/dged/utf8.c +++ b/src/dged/utf8.c @@ -36,7 +36,7 @@ static const uint8_t utf8d[] = { // clang-format on /* - * emoji decoding algorithm from + * unicode decoding algorithm from * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/ */ static enum utf8_state decode(enum utf8_state *state, uint32_t *codep, @@ -141,3 +141,27 @@ uint32_t unicode_visual_char_width(const struct codepoint *codepoint) { return 0; } } + +size_t utf8_encode(uint32_t codepoint, uint8_t buf[4]) { + if (codepoint <= 0x7F) { + buf[0] = (uint8_t)codepoint & 0xff; + return 1; + } else if (codepoint <= 0x7FF) { + buf[0] = 0xC0 | (codepoint >> 6); + buf[1] = 0x80 | (codepoint & 0x3F); + return 2; + } else if (codepoint <= 0xFFFF) { + buf[0] = 0xE0 | (codepoint >> 12); + buf[1] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[2] = 0x80 | (codepoint & 0x3F); + return 3; + } else if (codepoint <= 0x10FFFF) { + buf[0] = 0xF0 | (codepoint >> 18); + buf[1] = 0x80 | ((codepoint >> 12) & 0x3F); + buf[2] = 0x80 | ((codepoint >> 6) & 0x3F); + buf[3] = 0x80 | (codepoint & 0x3F); + return 4; + } + + return 0; +} |
