summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--dged.nix5
-rw-r--r--src/dged/buffer.c473
-rw-r--r--src/dged/buffer.h44
-rw-r--r--src/dged/buffer_view.c40
-rw-r--r--src/dged/display.c33
-rw-r--r--src/dged/display.h2
-rw-r--r--src/dged/keyboard.c32
-rw-r--r--src/dged/syntax.c68
-rw-r--r--src/dged/text.c233
-rw-r--r--src/dged/text.h54
-rw-r--r--src/dged/utf8.c152
-rw-r--r--src/dged/utf8.h28
-rw-r--r--src/main/cmds.c2
-rw-r--r--src/main/completion.c108
-rw-r--r--sune.txt4
-rw-r--r--test/buffer.c193
-rw-r--r--test/main.c5
-rw-r--r--test/text.c70
-rw-r--r--test/utf8.c7
19 files changed, 889 insertions, 664 deletions
diff --git a/dged.nix b/dged.nix
index a8f1a8f..6f68df9 100644
--- a/dged.nix
+++ b/dged.nix
@@ -10,6 +10,8 @@
, valgrind
, linkFarm
, fetchFromGitHub
+, glibcLocalesUtf8
+, strace
}:
stdenv.mkDerivation {
name = "dged";
@@ -32,6 +34,9 @@ stdenv.mkDerivation {
bmake docs
'';
+ # needed for tests to work in sandboxed builds
+ LOCALE_ARCHIVE = "${glibcLocalesUtf8}/lib/locale/locale-archive";
+
TREESITTER_GRAMMARS = with tree-sitter-grammars;
linkFarm "tree-sitter-grammars" rec {
"bash" = tree-sitter-bash;
diff --git a/src/dged/buffer.c b/src/dged/buffer.c
index 6051f69..1062a47 100644
--- a/src/dged/buffer.c
+++ b/src/dged/buffer.c
@@ -157,6 +157,42 @@ void buffer_static_teardown() {
}
}
+static uint32_t get_tab_width(struct buffer *buffer) {
+ struct setting *tw = lang_setting(&buffer->lang, "tab-width");
+ if (tw == NULL) {
+ tw = settings_get("editor.tab-width");
+ }
+
+ uint32_t tab_width = 4;
+ if (tw != NULL && tw->value.type == Setting_Number) {
+ tab_width = tw->value.number_value;
+ }
+ return tab_width;
+}
+
+static bool use_tabs(struct buffer *buffer) {
+ struct setting *ut = lang_setting(&buffer->lang, "use-tabs");
+ if (ut == NULL) {
+ ut = settings_get("editor.use-tabs");
+ }
+
+ bool use_tabs = false;
+ if (ut != NULL && ut->value.type == Setting_Bool) {
+ use_tabs = ut->value.bool_value;
+ }
+
+ return use_tabs;
+}
+
+static uint32_t visual_char_width(struct codepoint *codepoint,
+ uint32_t tab_width) {
+ if (codepoint->codepoint == '\t') {
+ return tab_width;
+ } else {
+ return unicode_visual_char_width(codepoint);
+ }
+}
+
static struct buffer create_internal(const char *name, char *filename) {
struct buffer b = (struct buffer){
.filename = filename,
@@ -185,7 +221,7 @@ static struct buffer create_internal(const char *name, char *filename) {
static void strip_final_newline(struct buffer *b) {
uint32_t nlines = text_num_lines(b->text);
- if (nlines > 0 && text_line_length(b->text, nlines - 1) == 0) {
+ if (nlines > 0 && buffer_line_length(b, nlines - 1) == 0) {
text_delete(b->text, nlines - 1, 0, nlines - 1, 1);
}
}
@@ -207,7 +243,7 @@ static void buffer_read_from_file(struct buffer *b) {
int bytes = fread(buff, 1, 4096, file);
if (bytes > 0) {
uint32_t ignore;
- text_append(b->text, buff, bytes, &ignore, &ignore);
+ text_append(b->text, buff, bytes, &ignore);
} else if (bytes == 0) {
break; // EOF
} else {
@@ -239,70 +275,66 @@ static void write_line(struct text_chunk *chunk, void *userdata) {
fputc('\n', file);
}
-static bool is_word_break(uint8_t c) {
+static bool is_word_break(const struct codepoint *codepoint) {
+ uint32_t c = codepoint->codepoint;
return c == ' ' || c == '.' || c == '(' || c == ')' || c == '[' || c == ']' ||
- c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':';
+ c == '{' || c == '}' || c == ';' || c == '<' || c == '>' || c == ':' ||
+ c == '"';
}
-static bool is_word_char(uint8_t c) { return !is_word_break(c); }
-
-struct match_result {
- struct location at;
- bool found;
-};
-
-static struct match_result find_next_in_line(struct buffer *buffer,
- struct location start,
- bool (*predicate)(uint8_t c)) {
- struct text_chunk line = text_get_line(buffer->text, start.line);
- bool found = false;
+static bool is_word_char(const struct codepoint *c) {
+ return !is_word_break(c);
+}
- if (line.nbytes == 0) {
+static struct match_result
+find_next_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c)) {
+ if (text_line_size(buffer->text, start.line) == 0) {
return (struct match_result){.at = start, .found = false};
}
- uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col);
- while (bytei < line.nbytes) {
- if (predicate(line.text[bytei])) {
+ bool found = false;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, start.line);
+ uint32_t coli = 0, tab_width = get_tab_width(buffer);
+ struct codepoint *codepoint;
+ while ((codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ if (coli >= start.col && predicate(codepoint)) {
found = true;
break;
}
- ++bytei;
+
+ coli += visual_char_width(codepoint, tab_width);
}
- uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei);
return (struct match_result){
- .at = (struct location){.line = start.line, .col = target_col},
- .found = found};
+ .at = (struct location){.line = start.line, .col = coli}, .found = found};
}
-static struct match_result find_prev_in_line(struct buffer *buffer,
- struct location start,
- bool (*predicate)(uint8_t c)) {
- struct text_chunk line = text_get_line(buffer->text, start.line);
- bool found = false;
+static struct match_result
+find_prev_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c)) {
- if (line.nbytes == 0) {
+ if (text_line_size(buffer->text, start.line) == 0) {
return (struct match_result){.at = start, .found = false};
}
- uint32_t bytei = text_col_to_byteindex(buffer->text, start.line, start.col);
- while (bytei > 0) {
- if (predicate(line.text[bytei])) {
+ bool found = false;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, start.line);
+ uint32_t coli = 0, tab_width = get_tab_width(buffer), found_at;
+ struct codepoint *codepoint;
+ while (coli < start.col && (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ if (predicate(codepoint)) {
found = true;
- break;
+ found_at = coli;
}
- --bytei;
- }
-
- // first byte on line can also be a match
- if (predicate(line.text[bytei])) {
- found = true;
+ coli += visual_char_width(codepoint, tab_width);
}
- uint32_t target_col = text_byteindex_to_col(buffer->text, start.line, bytei);
return (struct match_result){
- .at = (struct location){.line = start.line, .col = target_col},
+ .at =
+ (struct location){.line = start.line, .col = found ? found_at : coli},
.found = found};
}
@@ -315,13 +347,52 @@ static struct text_chunk *copy_region(struct buffer *buffer,
free(curr->text);
}
+ struct location begin_bytes =
+ buffer_location_to_byte_coords(buffer, region.begin);
+ struct location end_bytes =
+ buffer_location_to_byte_coords(buffer, region.end);
+
struct text_chunk txt =
- text_get_region(buffer->text, region.begin.line, region.begin.col,
- region.end.line, region.end.col);
+ text_get_region(buffer->text, begin_bytes.line, begin_bytes.col,
+ end_bytes.line, end_bytes.col);
*curr = txt;
return curr;
}
+static struct location do_indent(struct buffer *buffer, struct location at,
+ uint32_t tab_width, bool use_tabs) {
+ if (use_tabs) {
+ return buffer_add(buffer, at, (uint8_t *)"\t", 1);
+ } else {
+ return buffer_add(buffer, at, (uint8_t *)" ",
+ tab_width > 16 ? 16 : tab_width);
+ }
+}
+
+static uint64_t to_global_offset(struct buffer *buffer,
+ struct location bytecoords) {
+ uint32_t line = bytecoords.line;
+ uint32_t col = bytecoords.col;
+ uint32_t byteoff = 0;
+ uint32_t nlines = buffer_num_lines(buffer);
+
+ if (nlines == 0) {
+ return 0;
+ }
+
+ for (uint32_t l = 0; l < line && l < nlines; ++l) {
+ // +1 for newline
+ byteoff += text_line_size(buffer->text, l) + 1;
+ }
+
+ // handle last line
+ uint32_t l = line < nlines ? line : nlines - 1;
+ uint32_t nbytes = text_line_size(buffer->text, l);
+ byteoff += col <= nbytes ? col : nbytes + 1;
+
+ return byteoff;
+}
+
/* --------------------- buffer methods -------------------- */
struct buffer buffer_create(const char *name) {
@@ -452,18 +523,29 @@ struct location buffer_add(struct buffer *buffer, struct location at,
struct location initial = at;
struct location final = at;
- uint32_t lines_added, cols_added;
- text_insert_at(buffer->text, initial.line, initial.col, text, nbytes,
- &lines_added, &cols_added);
+ struct location at_bytes = buffer_location_to_byte_coords(buffer, at);
+
+ uint32_t lines_added;
+ text_insert_at(buffer->text, at_bytes.line, at_bytes.col, text, nbytes,
+ &lines_added);
// move to after inserted text
if (lines_added > 0) {
final = buffer_clamp(buffer, (int64_t)at.line + lines_added, 0);
} else {
+ uint32_t cols_added = 0, tab_width = get_tab_width(buffer);
+ struct utf8_codepoint_iterator iter =
+ create_utf8_codepoint_iterator(text, nbytes, 0);
+ struct codepoint *codepoint;
+ while ((codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ cols_added += visual_char_width(codepoint, tab_width);
+ }
final =
buffer_clamp(buffer, (int64_t)at.line, (int64_t)at.col + cols_added);
}
+ struct location final_bytes = buffer_location_to_byte_coords(buffer, final);
+
undo_push_add(
&buffer->undo,
(struct undo_add){.begin = {.row = initial.line, .col = initial.col},
@@ -474,11 +556,17 @@ struct location buffer_add(struct buffer *buffer, struct location at,
(struct undo_boundary){.save_point = false});
}
- uint32_t begin_idx = text_global_idx(buffer->text, initial.line, initial.col);
- uint32_t end_idx = text_global_idx(buffer->text, final.line, final.col);
+ uint32_t begin_idx = to_global_offset(buffer, at_bytes);
+ uint32_t end_idx = to_global_offset(buffer, final_bytes);
VEC_FOR_EACH(&buffer->hooks->insert_hooks, struct insert_hook * h) {
- h->callback(buffer, region_new(initial, final), begin_idx, end_idx,
+ h->callback(buffer,
+ (struct edit_location){
+ .coordinates = region_new(initial, final),
+ .bytes = region_new(at_bytes, final_bytes),
+ .global_byte_begin = begin_idx,
+ .global_byte_end = end_idx,
+ },
h->userdata);
}
@@ -488,15 +576,16 @@ struct location buffer_add(struct buffer *buffer, struct location at,
struct location buffer_set_text(struct buffer *buffer, uint8_t *text,
uint32_t nbytes) {
- uint32_t lines, cols;
+ uint32_t lines_added;
text_clear(buffer->text);
- text_append(buffer->text, text, nbytes, &lines, &cols);
+ text_append(buffer->text, text, nbytes, &lines_added);
// if last line is empty, remove it
strip_final_newline(buffer);
- return buffer_clamp(buffer, lines, cols);
+ return buffer_clamp(buffer, lines_added,
+ buffer_line_length(buffer, lines_added));
}
void buffer_clear(struct buffer *buffer) { text_clear(buffer->text); }
@@ -524,9 +613,18 @@ struct location buffer_previous_char(struct buffer *buffer,
}
--dot.line;
- dot.col = buffer_num_chars(buffer, dot.line);
+ dot.col = buffer_line_length(buffer, dot.line);
} else {
- --dot.col;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, dot.line);
+ struct codepoint *codepoint;
+ uint32_t coli = 0, tab_width = get_tab_width(buffer), last_width = 0;
+ while (coli < dot.col && (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ last_width = visual_char_width(codepoint, tab_width);
+ coli += last_width;
+ }
+
+ dot.col = coli - last_width;
}
return dot;
@@ -571,14 +669,14 @@ struct location buffer_previous_line(struct buffer *buffer,
}
--dot.line;
- uint32_t nchars = buffer_num_chars(buffer, dot.line);
+ uint32_t nchars = buffer_line_length(buffer, dot.line);
uint32_t new_col = dot.col > nchars ? nchars : dot.col;
return dot;
}
struct location buffer_next_char(struct buffer *buffer, struct location dot) {
- if (dot.col == buffer_num_chars(buffer, dot.line)) {
+ if (dot.col == buffer_line_length(buffer, dot.line)) {
uint32_t lastline = buffer->lazy_row_add ? buffer_num_lines(buffer)
: buffer_num_lines(buffer) - 1;
if (dot.line == lastline) {
@@ -588,7 +686,16 @@ struct location buffer_next_char(struct buffer *buffer, struct location dot) {
dot.col = 0;
++dot.line;
} else {
- ++dot.col;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, dot.line);
+ struct codepoint *codepoint;
+ uint32_t coli = 0;
+ while (coli <= dot.col &&
+ (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ coli += visual_char_width(codepoint, get_tab_width(buffer));
+ }
+
+ dot.col = coli;
}
return dot;
@@ -635,7 +742,7 @@ struct location buffer_next_line(struct buffer *buffer, struct location dot) {
++dot.line;
uint32_t new_col = dot.col;
- uint32_t nchars = buffer_num_chars(buffer, dot.line);
+ uint32_t nchars = buffer_line_length(buffer, dot.line);
new_col = new_col > nchars ? nchars : new_col;
return dot;
@@ -664,8 +771,8 @@ struct location buffer_clamp(struct buffer *buffer, int64_t line, int64_t col) {
// clamp col
if (col < 0) {
col = 0;
- } else if (col > buffer_num_chars(buffer, line)) {
- col = buffer_num_chars(buffer, line);
+ } else if (col > buffer_line_length(buffer, line)) {
+ col = buffer_line_length(buffer, line);
}
location.col = col;
@@ -681,7 +788,7 @@ struct location buffer_end(struct buffer *buffer) {
return (struct location){.line = nlines, .col = 0};
} else {
return (struct location){.line = nlines - 1,
- .col = buffer_num_chars(buffer, nlines - 1)};
+ .col = buffer_line_length(buffer, nlines - 1)};
}
}
@@ -689,55 +796,22 @@ uint32_t buffer_num_lines(struct buffer *buffer) {
return text_num_lines(buffer->text);
}
-uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line) {
- if (line >= buffer_num_lines(buffer)) {
- return 0;
+uint32_t buffer_line_length(struct buffer *buffer, uint32_t line) {
+ uint32_t tab_size = get_tab_width(buffer), len = 0;
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, line);
+ struct codepoint *codepoint;
+ while ((codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ len += visual_char_width(codepoint, tab_size);
}
- return text_line_length(buffer->text, line);
+ return len;
}
struct location buffer_newline(struct buffer *buffer, struct location at) {
return buffer_add(buffer, at, (uint8_t *)"\n", 1);
}
-static uint32_t get_tab_width(struct buffer *buffer) {
- struct setting *tw = lang_setting(&buffer->lang, "tab-width");
- if (tw == NULL) {
- tw = settings_get("editor.tab-width");
- }
-
- uint32_t tab_width = 4;
- if (tw != NULL && tw->value.type == Setting_Number) {
- tab_width = tw->value.number_value;
- }
- return tab_width;
-}
-
-static bool use_tabs(struct buffer *buffer) {
- struct setting *ut = lang_setting(&buffer->lang, "use-tabs");
- if (ut == NULL) {
- ut = settings_get("editor.use-tabs");
- }
-
- bool use_tabs = false;
- if (ut != NULL && ut->value.type == Setting_Bool) {
- use_tabs = ut->value.bool_value;
- }
-
- return use_tabs;
-}
-
-static struct location do_indent(struct buffer *buffer, struct location at,
- uint32_t tab_width, bool use_tabs) {
- if (use_tabs) {
- return buffer_add(buffer, at, (uint8_t *)"\t", 1);
- } else {
- return buffer_add(buffer, at, (uint8_t *)" ",
- tab_width > 16 ? 16 : tab_width);
- }
-}
-
struct location buffer_indent(struct buffer *buffer, struct location at) {
return do_indent(buffer, at, get_tab_width(buffer), use_tabs(buffer));
}
@@ -778,16 +852,13 @@ struct location buffer_undo(struct buffer *buffer, struct location dot) {
case Undo_Add: {
struct undo_add *add = &rec->add;
- pos =
- buffer_delete(buffer, (struct region){.begin =
- (struct location){
- .line = add->begin.row,
- .col = add->begin.col,
- },
- .end = (struct location){
- .line = add->end.row,
- .col = add->end.col,
- }});
+ pos = buffer_delete(buffer,
+ (struct region){
+ .begin = (struct location){.line = add->begin.row,
+ .col = add->begin.col},
+ .end = (struct location){.line = add->end.row,
+ .col = add->end.col},
+ });
break;
}
@@ -888,9 +959,14 @@ struct location buffer_delete(struct buffer *buffer, struct region region) {
return region.begin;
}
+ struct location begin_bytes =
+ buffer_location_to_byte_coords(buffer, region.begin);
+ struct location end_bytes =
+ buffer_location_to_byte_coords(buffer, region.end);
+
struct text_chunk txt =
- text_get_region(buffer->text, region.begin.line, region.begin.col,
- region.end.line, region.end.col);
+ text_get_region(buffer->text, begin_bytes.line, begin_bytes.col,
+ end_bytes.line, end_bytes.col);
undo_push_boundary(&buffer->undo,
(struct undo_boundary){.save_point = false});
@@ -903,17 +979,22 @@ struct location buffer_delete(struct buffer *buffer, struct region region) {
undo_push_boundary(&buffer->undo,
(struct undo_boundary){.save_point = false});
- uint32_t begin_idx =
- text_global_idx(buffer->text, region.begin.line, region.begin.col);
- uint32_t end_idx =
- text_global_idx(buffer->text, region.end.line, region.end.col);
+ uint64_t begin_idx = to_global_offset(buffer, begin_bytes);
+ uint64_t end_idx = to_global_offset(buffer, end_bytes);
- text_delete(buffer->text, region.begin.line, region.begin.col,
- region.end.line, region.end.col);
+ text_delete(buffer->text, begin_bytes.line, begin_bytes.col, end_bytes.line,
+ end_bytes.col);
buffer->modified = true;
VEC_FOR_EACH(&buffer->hooks->delete_hooks, struct delete_hook * h) {
- h->callback(buffer, region, begin_idx, end_idx, h->userdata);
+ h->callback(buffer,
+ (struct edit_location){
+ .coordinates = region,
+ .bytes = region_new(begin_bytes, end_bytes),
+ .global_byte_begin = begin_idx,
+ .global_byte_end = end_idx,
+ },
+ h->userdata);
}
return region.begin;
@@ -1035,27 +1116,6 @@ struct cmdbuf {
struct buffer *buffer;
};
-static uint32_t visual_char_width(uint8_t *byte, uint32_t maxlen) {
- if (*byte == '\t') {
- return 4;
- } else {
- return utf8_visual_char_width(byte, maxlen);
- }
-}
-
-uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col,
- uint32_t end_col) {
- uint32_t start_byte = utf8_nbytes(txt, len, start_col);
- uint32_t end_byte = utf8_nbytes(txt, len, end_col);
-
- uint32_t width = 0;
- for (uint32_t bytei = start_byte; bytei < end_byte; ++bytei) {
- width += visual_char_width(&txt[bytei], len - bytei);
- }
-
- return width;
-}
-
static void apply_properties(struct command_list *cmds,
struct text_property *properties[],
uint32_t nproperties) {
@@ -1097,65 +1157,67 @@ void render_line(struct text_chunk *line, void *userdata) {
command_list_set_show_whitespace(cmdbuf->cmds, cmdbuf->show_ws);
// calculate scroll offsets
- uint32_t scroll_bytes =
- utf8_nbytes(line->text, line->nbytes, cmdbuf->origin.col);
- uint32_t text_nbytes_scroll =
- scroll_bytes > line->nbytes ? 0 : line->nbytes - scroll_bytes;
- uint8_t *text = line->text + scroll_bytes;
-
- uint32_t visual_col_start = 0;
- uint32_t cur_visual_col = 0;
- uint32_t start_byte = 0, text_nbytes = 0;
struct text_property *properties[32] = {0};
uint64_t prev_properties_hash = 0;
- for (uint32_t cur_byte = start_byte, coli = 0;
- cur_byte < text_nbytes_scroll && cur_visual_col < cmdbuf->width &&
- coli < line->nchars - cmdbuf->origin.col;
- ++coli) {
+ uint32_t tab_width = get_tab_width(cmdbuf->buffer);
+
+ // handle scroll column offset
+ uint32_t coli = 0, bytei = 0;
+ struct utf8_codepoint_iterator iter = text_chunk_codepoint_iterator(line);
+ struct codepoint *codepoint;
+ while (coli < cmdbuf->origin.col &&
+ (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ coli += visual_char_width(codepoint, tab_width);
+ bytei += codepoint->nbytes;
+ }
- uint32_t bytes_remaining = text_nbytes_scroll - cur_byte;
- uint32_t char_nbytes = utf8_nbytes(text + cur_byte, bytes_remaining, 1);
- uint32_t char_vwidth = visual_char_width(text + cur_byte, bytes_remaining);
+ // coli is the visual column [0..width-1]
+ coli = 0;
+ uint32_t drawn_bytei = bytei;
+ uint32_t drawn_coli = coli;
+ while (coli < cmdbuf->width &&
+ (codepoint = utf8_next_codepoint(&iter)) != NULL) {
// calculate character properties
uint32_t nproperties = 0;
- text_get_properties(
- cmdbuf->buffer->text,
- (struct location){.line = line->line, .col = coli + cmdbuf->origin.col},
- properties, 32, &nproperties);
+ text_get_properties(cmdbuf->buffer->text, line->line, bytei, properties, 32,
+ &nproperties);
// if we have any new or lost props, flush text up until now, reset
// and re-apply current properties
uint64_t new_properties_hash = properties_hash(properties, nproperties);
if (new_properties_hash != prev_properties_hash) {
- command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line,
- text + start_byte, cur_byte - start_byte);
+ command_list_draw_text(cmdbuf->cmds, drawn_coli, visual_line,
+ line->text + drawn_bytei, bytei - drawn_bytei);
command_list_reset_color(cmdbuf->cmds);
- visual_col_start = cur_visual_col;
- start_byte = cur_byte;
+ drawn_coli = coli;
+ drawn_bytei = bytei;
// apply new properties
apply_properties(cmdbuf->cmds, properties, nproperties);
}
prev_properties_hash = new_properties_hash;
- cur_byte += char_nbytes;
- text_nbytes += char_nbytes;
- cur_visual_col += char_vwidth;
+ bytei += codepoint->nbytes;
+ coli += visual_char_width(codepoint, tab_width);
}
// flush remaining
- command_list_draw_text(cmdbuf->cmds, visual_col_start, visual_line,
- text + start_byte, text_nbytes - start_byte);
+ command_list_draw_text(cmdbuf->cmds, drawn_coli, visual_line,
+ line->text + drawn_bytei, bytei - drawn_bytei);
+
+ drawn_coli = coli;
+ drawn_bytei = bytei;
command_list_reset_color(cmdbuf->cmds);
command_list_set_show_whitespace(cmdbuf->cmds, false);
- if (cur_visual_col < cmdbuf->width) {
- command_list_draw_repeated(cmdbuf->cmds, cur_visual_col, visual_line, ' ',
- cmdbuf->width - cur_visual_col);
+ // TODO: considering the whole screen is cleared, is this really needed?
+ if (drawn_coli < cmdbuf->width) {
+ command_list_draw_repeated(cmdbuf->cmds, drawn_coli, visual_line, ' ',
+ cmdbuf->width - drawn_coli);
}
}
@@ -1200,19 +1262,19 @@ void buffer_render(struct buffer *buffer, struct buffer_render_params *params) {
void buffer_add_text_property(struct buffer *buffer, struct location start,
struct location end,
struct text_property property) {
- text_add_property(
- buffer->text, (struct location){.line = start.line, .col = start.col},
- (struct location){.line = end.line, .col = end.col}, property);
+ struct location bytestart = buffer_location_to_byte_coords(buffer, start);
+ struct location byteend = buffer_location_to_byte_coords(buffer, end);
+ text_add_property(buffer->text, bytestart.line, bytestart.col, byteend.line,
+ byteend.col, property);
}
void buffer_get_text_properties(struct buffer *buffer, struct location location,
struct text_property **properties,
uint32_t max_nproperties,
uint32_t *nproperties) {
- text_get_properties(
- buffer->text,
- (struct location){.line = location.line, .col = location.col}, properties,
- max_nproperties, nproperties);
+ struct location bytecoords = buffer_location_to_byte_coords(buffer, location);
+ text_get_properties(buffer->text, bytecoords.line, bytecoords.col, properties,
+ max_nproperties, nproperties);
}
void buffer_clear_text_properties(struct buffer *buffer) {
@@ -1244,9 +1306,12 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line,
(struct location){.line = end + 1, .col = 0});
struct s8 *lines = (struct s8 *)malloc(sizeof(struct s8) * ntosort);
- struct text_chunk txt =
- text_get_region(buffer->text, region.begin.line, region.begin.col,
- region.end.line, region.end.col);
+
+ struct location bytebeg =
+ buffer_location_to_byte_coords(buffer, region.begin);
+ struct location byteend = buffer_location_to_byte_coords(buffer, region.end);
+ struct text_chunk txt = text_get_region(
+ buffer->text, bytebeg.line, bytebeg.col, byteend.line, byteend.col);
uint32_t line_start = 0;
uint32_t curr_line = 0;
@@ -1278,3 +1343,41 @@ void buffer_sort_lines(struct buffer *buffer, uint32_t start_line,
free(txt.text);
}
}
+
+struct location buffer_location_to_byte_coords(struct buffer *buffer,
+ struct location coords) {
+ struct utf8_codepoint_iterator iter =
+ text_line_codepoint_iterator(buffer->text, coords.line);
+ uint32_t byteoffset = 0, col = 0, tab_width = get_tab_width(buffer);
+ struct codepoint *codepoint;
+
+ /* Let this walk up to (and including the target column) to
+ * make sure we account for zero-width characters when calculating the
+ * byte offset.
+ */
+ while (col <= coords.col &&
+ (codepoint = utf8_next_codepoint(&iter)) != NULL) {
+ byteoffset += codepoint->nbytes;
+ col += visual_char_width(codepoint, tab_width);
+ }
+
+ /* Remove the byte-width of the last char again since it gives us the
+ * position right before it while still taking zero-width codepoints
+ * into account.
+ */
+ return (struct location){.line = coords.line,
+ .col = byteoffset -
+ (codepoint != NULL ? codepoint->nbytes : 0)};
+}
+
+struct match_result
+buffer_find_prev_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c)) {
+ return find_prev_in_line(buffer, start, predicate);
+}
+
+struct match_result
+buffer_find_next_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c)) {
+ return find_next_in_line(buffer, start, predicate);
+}
diff --git a/src/dged/buffer.h b/src/dged/buffer.h
index cd5bd95..c9fe2ca 100644
--- a/src/dged/buffer.h
+++ b/src/dged/buffer.h
@@ -295,13 +295,13 @@ struct location buffer_end(struct buffer *buffer);
uint32_t buffer_num_lines(struct buffer *buffer);
/**
- * Get the number of chars in a given line in buffer.
+ * Get the line length in number of column positions.
*
* @param [in] buffer The buffer to use.
- * @param [in] line The line to get number of chars for.
- * @returns The number of chars in @ref line.
+ * @param [in] line The line to get number of columns for.
+ * @returns The number of column positions in the current line.
*/
-uint32_t buffer_num_chars(struct buffer *buffer, uint32_t line);
+uint32_t buffer_line_length(struct buffer *buffer, uint32_t line);
/**
* Insert a newline in the buffer.
@@ -555,6 +555,13 @@ uint32_t buffer_add_reload_hook(struct buffer *buffer, reload_hook_cb callback,
void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id,
remove_hook_cb callback);
+struct edit_location {
+ struct region coordinates;
+ struct region bytes;
+ uint64_t global_byte_begin;
+ uint64_t global_byte_end;
+};
+
/**
* Buffer insert hook callback function.
*
@@ -565,9 +572,8 @@ void buffer_remove_reload_hook(struct buffer *buffer, uint32_t hook_id,
* @param end_idx The global byte offset to the end of where text was inserted.
* @param userdata The userdata as sent in to @ref buffer_add_insert_hook.
*/
-typedef void (*insert_hook_cb)(struct buffer *buffer, struct region inserted,
- uint32_t begin_idx, uint32_t end_idx,
- void *userdata);
+typedef void (*insert_hook_cb)(struct buffer *buffer,
+ struct edit_location inserted, void *userdata);
/**
* Add an insert hook, called when text is inserted into the @p buffer.
@@ -600,9 +606,8 @@ void buffer_remove_insert_hook(struct buffer *buffer, uint32_t hook_id,
* @param end_idx The global byte offset to the end of the removed text.
* @param userdata The userdata as sent in to @ref buffer_add_delete_hook.
*/
-typedef void (*delete_hook_cb)(struct buffer *buffer, struct region removed,
- uint32_t begin_idx, uint32_t end_idx,
- void *userdata);
+typedef void (*delete_hook_cb)(struct buffer *buffer,
+ struct edit_location removed, void *userdata);
/**
* Add a delete hook, called when text is removed from the @p buffer.
@@ -724,10 +729,6 @@ void buffer_update(struct buffer *buffer, struct buffer_update_params *params);
*/
void buffer_render(struct buffer *buffer, struct buffer_render_params *params);
-// TODO: move this to where it makes sense
-uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col,
- uint32_t end_col);
-
/**
* Sort lines in a buffer alphabetically.
*
@@ -738,4 +739,19 @@ uint32_t visual_string_width(uint8_t *txt, uint32_t len, uint32_t start_col,
void buffer_sort_lines(struct buffer *buffer, uint32_t start_line,
uint32_t end_line);
+struct location buffer_location_to_byte_coords(struct buffer *buffer,
+ struct location coords);
+
+struct match_result {
+ struct location at;
+ bool found;
+};
+
+struct match_result
+buffer_find_prev_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c));
+struct match_result
+buffer_find_next_in_line(struct buffer *buffer, struct location start,
+ bool (*predicate)(const struct codepoint *c));
+
#endif
diff --git a/src/dged/buffer_view.c b/src/dged/buffer_view.c
index 4e67d78..f3dd2b9 100644
--- a/src/dged/buffer_view.c
+++ b/src/dged/buffer_view.c
@@ -128,7 +128,7 @@ void buffer_view_backward_nlines(struct buffer_view *view, uint32_t nlines) {
}
void buffer_view_goto_end_of_line(struct buffer_view *view) {
- view->dot.col = buffer_num_chars(view->buffer, view->dot.line);
+ view->dot.col = buffer_line_length(view->buffer, view->dot.line);
}
void buffer_view_goto_beginning_of_line(struct buffer_view *view) {
@@ -224,15 +224,22 @@ void buffer_view_delete_word(struct buffer_view *view) {
}
void buffer_view_kill_line(struct buffer_view *view) {
- uint32_t nchars =
- buffer_num_chars(view->buffer, view->dot.line) - view->dot.col;
- if (nchars == 0) {
- nchars = 1;
+ uint32_t ncols =
+ buffer_line_length(view->buffer, view->dot.line) - view->dot.col;
+
+ uint32_t line = view->dot.line;
+ uint32_t col = view->dot.col + ncols;
+
+ // kill the newline if we are at the end of the line
+ if (ncols == 0) {
+ struct location loc = buffer_next_char(view->buffer, view->dot);
+ line = loc.line;
+ col = loc.col;
}
struct region reg = region_new(view->dot, (struct location){
- .line = view->dot.line,
- .col = view->dot.col + nchars,
+ .line = line,
+ .col = col,
});
buffer_cut(view->buffer, reg);
@@ -241,7 +248,8 @@ void buffer_view_kill_line(struct buffer_view *view) {
void buffer_view_sort_lines(struct buffer_view *view) {
struct region reg = region_new(view->dot, view->mark);
if (view->mark_set && region_has_size(reg)) {
- if (reg.end.line > 0 && buffer_num_chars(view->buffer, reg.end.line) == 0) {
+ if (reg.end.line > 0 &&
+ buffer_line_length(view->buffer, reg.end.line) == 0) {
reg.end.line -= 1;
}
@@ -271,21 +279,7 @@ struct location buffer_view_dot_to_relative(struct buffer_view *view) {
}
struct location buffer_view_dot_to_visual(struct buffer_view *view) {
- // calculate visual column index for dot column
- struct text_chunk c = buffer_line(view->buffer, view->dot.line);
- uint32_t width = visual_string_width(c.text, c.nbytes, 0, view->dot.col);
- if (view->scroll.col > 0) {
- width -= visual_string_width(c.text, c.nbytes, 0, view->scroll.col);
- }
-
- struct location l = buffer_view_dot_to_relative(view);
- l.col = width + view->fringe_width;
-
- if (c.allocated) {
- free(c.text);
- }
-
- return l;
+ return buffer_view_dot_to_relative(view);
}
void buffer_view_undo(struct buffer_view *view) {
diff --git a/src/dged/display.c b/src/dged/display.c
index bc604f0..ea3f459 100644
--- a/src/dged/display.c
+++ b/src/dged/display.c
@@ -60,7 +60,7 @@ struct push_fmt_cmd {
struct repeat_cmd {
uint32_t col;
uint32_t row;
- int32_t c;
+ uint32_t c;
uint32_t nrepeat;
};
@@ -135,21 +135,7 @@ void display_destroy(struct display *display) {
uint32_t display_width(struct display *display) { return display->width; }
uint32_t display_height(struct display *display) { return display->height; }
-void putch(uint8_t c) {
- // TODO: move this to buffer rendering
- if (c < ' ') {
- fprintf(stdout, "^%c", c + 0x40);
- } else if (c == 0x7f) {
- fprintf(stdout, "^?");
- } else if (utf8_byte_is_unicode_start(c) ||
- utf8_byte_is_unicode_continuation(c)) {
- putc(c, stdout);
- } else if (c >= ' ' && c < 0x7f) {
- putc(c, stdout);
- } else {
- fprintf(stdout, "|0x%02x|", c);
- }
-}
+void putch(uint8_t c) { putc(c, stdout); }
static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) {
if (fmt_stack == NULL || fmt_stack_len == 0) {
@@ -164,6 +150,7 @@ static void apply_fmt(uint8_t *fmt_stack, uint32_t fmt_stack_len) {
void putch_ws(uint8_t c, bool show_whitespace, uint8_t *fmt_stack,
uint32_t fmt_stack_len) {
+ // TODO: tab width needs to be sent here
if (show_whitespace && c == '\t') {
fputs("\x1b[90m โ†’ \x1b[39m", stdout);
apply_fmt(fmt_stack, fmt_stack_len);
@@ -295,7 +282,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col,
}
void command_list_draw_repeated(struct command_list *list, uint32_t col,
- uint32_t row, int32_t c, uint32_t nrepeat) {
+ uint32_t row, uint32_t c, uint32_t nrepeat) {
struct repeat_cmd *cmd = add_command(list, RenderCommand_Repeat)->repeat;
cmd->col = col;
cmd->row = row;
@@ -401,10 +388,14 @@ void display_render(struct display *display,
display_move_cursor(display, repeat_cmd->row + cl->yoffset,
repeat_cmd->col + cl->xoffset);
apply_fmt(fmt_stack, fmt_stack_len);
- uint32_t nbytes = utf8_nbytes((uint8_t *)&repeat_cmd->c, 4, 1);
- for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) {
- putbytes((uint8_t *)&repeat_cmd->c, nbytes, show_whitespace_state,
- fmt_stack, fmt_stack_len);
+ struct utf8_codepoint_iterator iter =
+ create_utf8_codepoint_iterator((uint8_t *)&repeat_cmd->c, 4, 0);
+ struct codepoint *codepoint = utf8_next_codepoint(&iter);
+ if (codepoint != NULL) {
+ for (uint32_t i = 0; i < repeat_cmd->nrepeat; ++i) {
+ putbytes((uint8_t *)&repeat_cmd->c, codepoint->nbytes,
+ show_whitespace_state, fmt_stack, fmt_stack_len);
+ }
}
break;
}
diff --git a/src/dged/display.h b/src/dged/display.h
index 0fda30d..f9c7ef8 100644
--- a/src/dged/display.h
+++ b/src/dged/display.h
@@ -238,7 +238,7 @@ void command_list_draw_text_copy(struct command_list *list, uint32_t col,
* @param nrepeat Number of times to repeat byte.
*/
void command_list_draw_repeated(struct command_list *list, uint32_t col,
- uint32_t row, int32_t c, uint32_t nrepeat);
+ uint32_t row, uint32_t c, uint32_t nrepeat);
void command_list_draw_command_list(struct command_list *list,
struct command_list *to_draw);
diff --git a/src/dged/keyboard.c b/src/dged/keyboard.c
index 26eb308..04565e0 100644
--- a/src/dged/keyboard.c
+++ b/src/dged/keyboard.c
@@ -78,20 +78,24 @@ void parse_keys(uint8_t *bytes, uint32_t nbytes, struct key *out_keys,
} else if (utf8_byte_is_unicode_continuation(b)) {
// do nothing for these
} else { // ascii char or unicode start byte (self-inserting)
- uint32_t nb = utf8_byte_is_unicode_start(b)
- ? utf8_nbytes(bytes + bytei, nbytes - bytei, 1)
- : 1;
-
- // "compress" number of keys if previous key was also a
- // "simple" key
- if (prev_kp != NULL && prev_kp->mod == None) {
- prev_kp->end += nb;
- } else {
- kp->mod = None;
- kp->key = b;
- kp->start = bytei;
- kp->end = bytei + nb;
- ++nkps;
+ // TODO: do this better
+ struct utf8_codepoint_iterator iter =
+ create_utf8_codepoint_iterator(bytes + bytei, nbytes - bytei, 0);
+ struct codepoint *codepoint = utf8_next_codepoint(&iter);
+ if (codepoint != NULL) {
+ uint32_t nb = codepoint->nbytes;
+
+ // "compress" number of keys if previous key was also a
+ // "simple" key
+ if (prev_kp != NULL && prev_kp->mod == None) {
+ prev_kp->end += nb;
+ } else {
+ kp->mod = None;
+ kp->key = b;
+ kp->start = bytei;
+ kp->end = bytei + nb;
+ ++nkps;
+ }
}
}
}
diff --git a/src/dged/syntax.c b/src/dged/syntax.c
index 8d0fd1a..569dc70 100644
--- a/src/dged/syntax.c
+++ b/src/dged/syntax.c
@@ -342,7 +342,8 @@ static void update_parser(struct buffer *buffer, void *userdata,
: origin.line + height;
ts_query_cursor_set_point_range(
cursor, (TSPoint){.row = origin.line, .column = origin.col},
- (TSPoint){.row = end_line, .column = buffer_num_chars(buffer, end_line)});
+ (TSPoint){.row = end_line,
+ .column = buffer_line_length(buffer, end_line)});
ts_query_cursor_exec(cursor, h->query, ts_tree_root_node(h->tree));
TSQueryMatch match;
@@ -406,47 +407,39 @@ static void update_parser(struct buffer *buffer, void *userdata,
continue;
}
- buffer_add_text_property(
- buffer,
- (struct location){.line = start.row,
- .col = text_byteindex_to_col(
- buffer->text, start.row, start.column)},
- (struct location){.line = end.row,
- .col = text_byteindex_to_col(buffer->text, end.row,
- end.column - 1)},
- (struct text_property){
- .type = TextProperty_Colors,
- .colors =
- (struct text_property_colors){
- .set_fg = true,
- .fg = color,
- },
- });
+ text_add_property(buffer->text, start.row, start.column, end.row,
+ end.column > 0 ? end.column - 1 : 0,
+ (struct text_property){
+ .type = TextProperty_Colors,
+ .colors =
+ (struct text_property_colors){
+ .set_fg = true,
+ .fg = color,
+ },
+ });
}
}
ts_query_cursor_delete(cursor);
}
-static void text_removed(struct buffer *buffer, struct region removed,
- uint32_t begin_idx, uint32_t end_idx, void *userdata) {
+static void text_removed(struct buffer *buffer, struct edit_location removed,
+ void *userdata) {
struct highlight *h = (struct highlight *)userdata;
- TSPoint begin = {.row = removed.begin.line,
- .column = text_col_to_byteindex(
- buffer->text, removed.begin.line, removed.begin.col)};
+ TSPoint begin = {.row = removed.bytes.begin.line,
+ .column = removed.bytes.begin.col};
TSPoint new_end = begin;
- TSPoint old_end = {.row = removed.end.line,
- .column = text_col_to_byteindex(
- buffer->text, removed.end.line, removed.end.col)};
+ TSPoint old_end = {.row = removed.bytes.end.line,
+ .column = removed.bytes.end.col};
TSInputEdit edit = {
.start_point = begin,
.old_end_point = old_end,
.new_end_point = new_end,
- .start_byte = begin_idx,
- .old_end_byte = end_idx,
- .new_end_byte = begin_idx,
+ .start_byte = removed.global_byte_begin,
+ .old_end_byte = removed.global_byte_end,
+ .new_end_byte = removed.global_byte_begin,
};
ts_tree_edit(h->tree, &edit);
@@ -479,27 +472,24 @@ static void buffer_reloaded(struct buffer *buffer, void *userdata) {
}
}
-static void text_inserted(struct buffer *buffer, struct region inserted,
- uint32_t begin_idx, uint32_t end_idx,
+static void text_inserted(struct buffer *buffer, struct edit_location inserted,
void *userdata) {
struct timer *text_inserted = timer_start("syntax.txt-inserted");
struct highlight *h = (struct highlight *)userdata;
- TSPoint begin = {.row = inserted.begin.line,
- .column = text_col_to_byteindex(
- buffer->text, inserted.begin.line, inserted.begin.col)};
+ TSPoint begin = {.row = inserted.bytes.begin.line,
+ .column = inserted.bytes.begin.col};
TSPoint old_end = begin;
- TSPoint new_end = {.row = inserted.end.line,
- .column = text_col_to_byteindex(
- buffer->text, inserted.end.line, inserted.end.col)};
+ TSPoint new_end = {.row = inserted.bytes.end.line,
+ .column = inserted.bytes.end.col};
TSInputEdit edit = {
.start_point = begin,
.old_end_point = old_end,
.new_end_point = new_end,
- .start_byte = begin_idx,
- .old_end_byte = begin_idx,
- .new_end_byte = end_idx,
+ .start_byte = inserted.global_byte_begin,
+ .old_end_byte = inserted.global_byte_begin,
+ .new_end_byte = inserted.global_byte_end,
};
ts_tree_edit(h->tree, &edit);
diff --git a/src/dged/text.c b/src/dged/text.c
index 3d1078f..18ab04f 100644
--- a/src/dged/text.c
+++ b/src/dged/text.c
@@ -18,7 +18,6 @@ struct line {
uint8_t *data;
uint8_t flags;
uint32_t nbytes;
- uint32_t nchars;
};
struct text_property_entry {
@@ -54,11 +53,9 @@ void text_destroy(struct text *text) {
text->lines[li].data = NULL;
text->lines[li].flags = 0;
text->lines[li].nbytes = 0;
- text->lines[li].nchars = 0;
}
free(text->lines);
-
free(text);
}
@@ -68,68 +65,25 @@ void text_clear(struct text *text) {
text->lines[li].data = NULL;
text->lines[li].flags = 0;
text->lines[li].nbytes = 0;
- text->lines[li].nchars = 0;
}
text->nlines = 0;
text_clear_properties(text);
}
-// given `char_idx` as a character index, return the byte index
-uint32_t charidx_to_byteidx(struct line *line, uint32_t char_idx) {
- if (line->nchars == 0) {
- return 0;
- }
-
- if (char_idx > line->nchars) {
- return line->nbytes - 1;
- }
-
- return utf8_nbytes(line->data, line->nbytes, char_idx);
-}
-
-uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col) {
- return charidx_to_byteidx(&text->lines[line], col);
-}
-
-// given `byte_idx` as a byte index, return the character index
-uint32_t byteidx_to_charidx(struct line *line, uint32_t byte_idx) {
- if (byte_idx > line->nbytes) {
- return line->nchars;
+struct utf8_codepoint_iterator
+text_line_codepoint_iterator(const struct text *text, uint32_t lineidx) {
+ if (lineidx >= text_num_lines(text)) {
+ return create_utf8_codepoint_iterator(NULL, 0, 0);
}
- return utf8_nchars(line->data, byte_idx);
+ return create_utf8_codepoint_iterator(text->lines[lineidx].data,
+ text->lines[lineidx].nbytes, 0);
}
-uint32_t text_byteindex_to_col(struct text *text, uint32_t line,
- uint32_t byteindex) {
- return byteidx_to_charidx(&text->lines[line], byteindex);
-}
-
-uint32_t text_global_idx(struct text *text, uint32_t line, uint32_t col) {
- uint32_t byteoff = 0;
- uint32_t nlines = text_num_lines(text);
-
- if (nlines == 0) {
- return 0;
- }
-
- for (uint32_t l = 0; l < line && l < nlines; ++l) {
- // +1 for newline
- byteoff += text_line_size(text, l) + 1;
- }
-
- uint32_t l = line < nlines ? line : nlines - 1;
- uint32_t nchars = text_line_length(text, l);
- uint32_t c = col < nchars ? col : nchars;
- byteoff += text_col_to_byteindex(text, l, c);
-
- if (col > nchars) {
- // account for newline
- ++byteoff;
- }
-
- return byteoff;
+struct utf8_codepoint_iterator
+text_chunk_codepoint_iterator(const struct text_chunk *chunk) {
+ return create_utf8_codepoint_iterator(chunk->text, chunk->nbytes, 0);
}
void append_empty_lines(struct text *text, uint32_t numlines) {
@@ -145,17 +99,10 @@ void append_empty_lines(struct text *text, uint32_t numlines) {
struct line *nline = &text->lines[text->nlines];
nline->data = NULL;
nline->nbytes = 0;
- nline->nchars = 0;
nline->flags = 0;
++text->nlines;
}
-
- if (text->nlines > text->capacity) {
- printf("text->nlines: %d, text->capacity: %d\n", text->nlines,
- text->capacity);
- raise(SIGTRAP);
- }
}
void ensure_line(struct text *text, uint32_t line) {
@@ -166,8 +113,8 @@ void ensure_line(struct text *text, uint32_t line) {
// It is assumed that `data` does not contain any \n, that is handled by
// higher-level functions
-void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data,
- uint32_t len, uint32_t nchars) {
+static void insert_at(struct text *text, uint32_t line, uint32_t offset,
+ uint8_t *data, uint32_t len) {
if (len == 0) {
return;
@@ -178,11 +125,10 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data,
struct line *l = &text->lines[line];
l->nbytes += len;
- l->nchars += nchars;
l->flags = LineChanged;
l->data = realloc(l->data, l->nbytes);
- uint32_t bytei = charidx_to_byteidx(l, col);
+ uint32_t bytei = offset;
// move following bytes out of the way
if (bytei + len < l->nbytes) {
@@ -194,15 +140,7 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data,
memcpy(l->data + bytei, data, len);
}
-uint32_t text_line_length(struct text *text, uint32_t lineidx) {
- if (lineidx >= text_num_lines(text)) {
- return 0;
- }
-
- return text->lines[lineidx].nchars;
-}
-
-uint32_t text_line_size(struct text *text, uint32_t lineidx) {
+uint32_t text_line_size(const struct text *text, uint32_t lineidx) {
if (lineidx >= text_num_lines(text)) {
return 0;
}
@@ -210,20 +148,19 @@ uint32_t text_line_size(struct text *text, uint32_t lineidx) {
return text->lines[lineidx].nbytes;
}
-uint32_t text_num_lines(struct text *text) { return text->nlines; }
+uint32_t text_num_lines(const struct text *text) { return text->nlines; }
+
+static void split_line(struct text *text, uint32_t offset, uint32_t lineidx,
+ uint32_t newlineidx) {
+ struct line *line = &text->lines[lineidx];
+ struct line *next = &text->lines[newlineidx];
-void split_line(uint32_t col, struct line *line, struct line *next) {
uint8_t *data = line->data;
uint32_t nbytes = line->nbytes;
- uint32_t nchars = line->nchars;
-
- uint32_t chari = col;
- uint32_t bytei = charidx_to_byteidx(line, chari);
+ uint32_t bytei = offset;
line->nbytes = bytei;
- line->nchars = chari;
next->nbytes = nbytes - bytei;
- next->nchars = nchars - chari;
line->flags = next->flags = line->flags;
next->data = NULL;
@@ -260,7 +197,7 @@ void shift_lines(struct text *text, uint32_t start, int32_t direction) {
memmove(dest, src, nlines * sizeof(struct line));
}
-void new_line_at(struct text *text, uint32_t line, uint32_t col) {
+void new_line_at(struct text *text, uint32_t line, uint32_t offset) {
ensure_line(text, line);
uint32_t newline = line + 1;
@@ -274,7 +211,7 @@ void new_line_at(struct text *text, uint32_t line, uint32_t col) {
}
// split line if needed
- split_line(col, &text->lines[line], &text->lines[newline]);
+ split_line(text, offset, line, newline);
}
void delete_line(struct text *text, uint32_t line) {
@@ -294,29 +231,25 @@ void delete_line(struct text *text, uint32_t line) {
--text->nlines;
text->lines[text->nlines].data = NULL;
text->lines[text->nlines].nbytes = 0;
- text->lines[text->nlines].nchars = 0;
}
-void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col,
- uint8_t *bytes, uint32_t nbytes,
- uint32_t *lines_added, uint32_t *cols_added) {
+static void text_insert_at_inner(struct text *text, uint32_t line,
+ uint32_t offset, uint8_t *bytes,
+ uint32_t nbytes, uint32_t *lines_added) {
uint32_t linelen = 0, start_line = line;
- *cols_added = 0;
for (uint32_t bytei = 0; bytei < nbytes; ++bytei) {
uint8_t byte = bytes[bytei];
if (byte == '\n') {
uint8_t *line_data = bytes + (bytei - linelen);
- uint32_t nchars = utf8_nchars(line_data, linelen);
+ insert_at(text, line, offset, line_data, linelen);
- insert_at(text, line, col, line_data, linelen, nchars);
-
- col += nchars;
- new_line_at(text, line, col);
+ offset += linelen;
+ new_line_at(text, line, offset);
++line;
linelen = 0;
- col = 0;
+ offset = 0;
} else {
++linelen;
}
@@ -325,30 +258,26 @@ void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col,
// handle remaining
if (linelen > 0) {
uint8_t *line_data = bytes + (nbytes - linelen);
- uint32_t nchars = utf8_nchars(line_data, linelen);
- insert_at(text, line, col, line_data, linelen, nchars);
- *cols_added = nchars;
+ insert_at(text, line, offset, line_data, linelen);
}
*lines_added = line - start_line;
}
void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes,
- uint32_t *lines_added, uint32_t *cols_added) {
+ uint32_t *lines_added) {
uint32_t line = text->nlines > 0 ? text->nlines - 1 : 0;
- uint32_t col = text_line_length(text, line);
-
- text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added);
+ uint32_t offset = text_line_size(text, line);
+ text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added);
}
-void text_insert_at(struct text *text, uint32_t line, uint32_t col,
- uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added,
- uint32_t *cols_added) {
- text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added);
+void text_insert_at(struct text *text, uint32_t line, uint32_t offset,
+ uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added) {
+ text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added);
}
-void text_delete(struct text *text, uint32_t start_line, uint32_t start_col,
- uint32_t end_line, uint32_t end_col) {
+void text_delete(struct text *text, uint32_t start_line, uint32_t start_offset,
+ uint32_t end_line, uint32_t end_offset) {
if (text->nlines == 0) {
return;
@@ -362,45 +291,44 @@ void text_delete(struct text *text, uint32_t start_line, uint32_t start_col,
if (end_line > maxline) {
end_line = maxline;
- end_col = text->lines[end_line].nchars;
+ end_offset = text_line_size(text, end_line);
}
struct line *firstline = &text->lines[start_line];
struct line *lastline = &text->lines[end_line];
// clamp column
- if (start_col > firstline->nchars) {
- start_col = firstline->nchars > 0 ? firstline->nchars - 1 : 0;
+ uint32_t firstline_len = text_line_size(text, start_line);
+ if (start_offset > firstline_len) {
+ start_offset = firstline_len > 0 ? firstline_len - 1 : 0;
}
// handle deletion of newlines
- if (end_col > lastline->nchars) {
+ uint32_t lastline_len = text_line_size(text, end_line);
+ if (end_offset > lastline_len) {
if (end_line + 1 < text->nlines) {
- end_col = 0;
+ end_offset = 0;
++end_line;
lastline = &text->lines[end_line];
} else {
- end_col = lastline->nchars;
+ end_offset = lastline_len;
}
}
- uint32_t bytei = utf8_nbytes(lastline->data, lastline->nbytes, end_col);
+ uint32_t srcbytei = end_offset;
+ uint32_t dstbytei = start_offset;
+ uint32_t ncopy = lastline->nbytes - srcbytei;
if (lastline == firstline) {
// in this case we can "overwrite"
- uint32_t dstbytei =
- utf8_nbytes(firstline->data, firstline->nbytes, start_col);
- memmove(firstline->data + dstbytei, lastline->data + bytei,
- lastline->nbytes - bytei);
+ memmove(firstline->data + dstbytei, lastline->data + srcbytei, ncopy);
} else {
// otherwise we actually have to copy from the last line
- insert_at(text, start_line, start_col, lastline->data + bytei,
- lastline->nbytes - bytei, lastline->nchars - end_col);
+ insert_at(text, start_line, start_offset, lastline->data + srcbytei, ncopy);
}
- firstline->nchars = start_col + (lastline->nchars - end_col);
- firstline->nbytes =
- utf8_nbytes(firstline->data, firstline->nbytes, start_col) +
- (lastline->nbytes - bytei);
+ // new byte count is whatever we had before (left of dstbytei)
+ // plus what we copied
+ firstline->nbytes = dstbytei + ncopy;
// delete full lines, backwards to not shift old, crappy data upwards
for (uint32_t linei = end_line >= text->nlines ? end_line - 1 : end_line;
@@ -429,7 +357,6 @@ void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines,
.allocated = false,
.text = src_line->data,
.nbytes = src_line->nbytes,
- .nchars = src_line->nchars,
.line = li,
};
callback(&line, userdata);
@@ -441,8 +368,8 @@ struct text_chunk text_get_line(struct text *text, uint32_t line) {
return (struct text_chunk){
.text = src_line->data,
.nbytes = src_line->nbytes,
- .nchars = src_line->nchars,
.line = line,
+ .allocated = false,
};
}
@@ -453,33 +380,34 @@ struct copy_cmd {
};
struct text_chunk text_get_region(struct text *text, uint32_t start_line,
- uint32_t start_col, uint32_t end_line,
- uint32_t end_col) {
- if (start_line == end_line && start_col == end_col) {
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset) {
+ if (start_line == end_line && start_offset == end_offset) {
return (struct text_chunk){0};
}
struct line *first_line = &text->lines[start_line];
struct line *last_line = &text->lines[end_line];
+ uint32_t first_line_len = first_line->nbytes;
+ uint32_t last_line_len = last_line->nbytes;
- if (start_col > first_line->nchars) {
+ if (start_offset > first_line_len) {
return (struct text_chunk){0};
}
// handle copying of newlines
- if (end_col > last_line->nchars) {
+ if (end_offset > last_line_len) {
++end_line;
- end_col = 0;
+ end_offset = 0;
last_line = &text->lines[end_line];
}
uint32_t nlines = end_line - start_line + 1;
struct copy_cmd *copy_cmds = calloc(nlines, sizeof(struct copy_cmd));
- uint32_t total_chars = 0, total_bytes = 0;
+ uint32_t total_bytes = 0;
for (uint32_t line = start_line; line <= end_line; ++line) {
struct line *l = &text->lines[line];
- total_chars += l->nchars;
total_bytes += l->nbytes;
struct copy_cmd *cmd = &copy_cmds[line - start_line];
@@ -490,19 +418,14 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line,
// correct first line
struct copy_cmd *cmd_first = &copy_cmds[0];
- uint32_t byteoff =
- utf8_nbytes(first_line->data, first_line->nbytes, start_col);
- cmd_first->byteoffset += byteoff;
- cmd_first->nbytes -= byteoff;
- total_bytes -= byteoff;
- total_chars -= start_col;
+ cmd_first->byteoffset += start_offset;
+ cmd_first->nbytes -= start_offset;
+ total_bytes -= start_offset;
// correct last line
struct copy_cmd *cmd_last = &copy_cmds[nlines - 1];
- uint32_t byteindex = utf8_nbytes(last_line->data, last_line->nbytes, end_col);
- cmd_last->nbytes -= (last_line->nbytes - byteindex);
- total_bytes -= (last_line->nbytes - byteindex);
- total_chars -= (last_line->nchars - end_col);
+ cmd_last->nbytes -= (last_line->nbytes - end_offset);
+ total_bytes -= (last_line->nbytes - end_offset);
uint8_t *data = (uint8_t *)malloc(
total_bytes + /* nr of newline chars */ (end_line - start_line));
@@ -518,7 +441,6 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line,
data[curr] = '\n';
++curr;
++total_bytes;
- ++total_chars;
}
}
@@ -527,28 +449,25 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line,
.text = data,
.line = 0,
.nbytes = total_bytes,
- .nchars = total_chars,
.allocated = true,
};
}
-bool text_line_contains_unicode(struct text *text, uint32_t line) {
- return text->lines[line].nbytes != text->lines[line].nchars;
-}
-
-void text_add_property(struct text *text, struct location start,
- struct location end, struct text_property property) {
+void text_add_property(struct text *text, uint32_t start_line,
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset, struct text_property property) {
struct text_property_entry entry = {
- .start = start,
- .end = end,
+ .start = (struct location){.line = start_line, .col = start_offset},
+ .end = (struct location){.line = end_line, .col = end_offset},
.property = property,
};
VEC_PUSH(&text->properties, entry);
}
-void text_get_properties(struct text *text, struct location location,
+void text_get_properties(struct text *text, uint32_t line, uint32_t offset,
struct text_property **properties,
uint32_t max_nproperties, uint32_t *nproperties) {
+ struct location location = {.line = line, .col = offset};
uint32_t nres = 0;
VEC_FOR_EACH(&text->properties, struct text_property_entry * prop) {
if (location_is_between(location, prop->start, prop->end)) {
diff --git a/src/dged/text.h b/src/dged/text.h
index 8b49ef4..28bd325 100644
--- a/src/dged/text.h
+++ b/src/dged/text.h
@@ -6,9 +6,16 @@
#include <stdint.h>
#include "location.h"
+#include "utf8.h"
struct text;
-struct render_command;
+
+struct text_chunk {
+ uint8_t *text;
+ uint32_t nbytes;
+ uint32_t line;
+ bool allocated;
+};
struct text *text_create(uint32_t initial_capacity);
void text_destroy(struct text *text);
@@ -18,31 +25,21 @@ void text_destroy(struct text *text);
*/
void text_clear(struct text *text);
-void text_insert_at(struct text *text, uint32_t line, uint32_t col,
- uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added,
- uint32_t *cols_added);
+void text_insert_at(struct text *text, uint32_t line, uint32_t offset,
+ uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added);
void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes,
- uint32_t *lines_added, uint32_t *cols_added);
+ uint32_t *lines_added);
-void text_delete(struct text *text, uint32_t start_line, uint32_t start_col,
- uint32_t end_line, uint32_t end_col);
+void text_delete(struct text *text, uint32_t start_line, uint32_t start_offset,
+ uint32_t end_line, uint32_t end_offset);
-uint32_t text_num_lines(struct text *text);
-uint32_t text_line_length(struct text *text, uint32_t lineidx);
-uint32_t text_line_size(struct text *text, uint32_t lineidx);
-uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col);
-uint32_t text_byteindex_to_col(struct text *text, uint32_t line,
- uint32_t byteindex);
-uint32_t text_global_idx(struct text *text, uint32_t line, uint32_t col);
-
-struct text_chunk {
- uint8_t *text;
- uint32_t nbytes;
- uint32_t nchars;
- uint32_t line;
- bool allocated;
-};
+uint32_t text_num_lines(const struct text *text);
+uint32_t text_line_size(const struct text *text, uint32_t lineidx);
+struct utf8_codepoint_iterator
+text_line_codepoint_iterator(const struct text *text, uint32_t lineidx);
+struct utf8_codepoint_iterator
+text_chunk_codepoint_iterator(const struct text_chunk *chunk);
typedef void (*chunk_cb)(struct text_chunk *chunk, void *userdata);
void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines,
@@ -52,10 +49,8 @@ void text_for_each_chunk(struct text *text, chunk_cb callback, void *userdata);
struct text_chunk text_get_line(struct text *text, uint32_t line);
struct text_chunk text_get_region(struct text *text, uint32_t start_line,
- uint32_t start_col, uint32_t end_line,
- uint32_t end_col);
-
-bool text_line_contains_unicode(struct text *text, uint32_t line);
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset);
enum text_property_type {
TextProperty_Colors,
@@ -77,10 +72,11 @@ struct text_property {
};
};
-void text_add_property(struct text *text, struct location start,
- struct location end, struct text_property property);
+void text_add_property(struct text *text, uint32_t start_line,
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset, struct text_property property);
-void text_get_properties(struct text *text, struct location location,
+void text_get_properties(struct text *text, uint32_t line, uint32_t offset,
struct text_property **properties,
uint32_t max_nproperties, uint32_t *nproperties);
diff --git a/src/dged/utf8.c b/src/dged/utf8.c
index 52de2da..ede4fb1 100644
--- a/src/dged/utf8.c
+++ b/src/dged/utf8.c
@@ -1,5 +1,6 @@
#include "utf8.h"
+#include <assert.h>
#include <stdio.h>
#include <wchar.h>
@@ -10,76 +11,125 @@ bool utf8_byte_is_unicode_continuation(uint8_t byte) {
bool utf8_byte_is_unicode(uint8_t byte) { return (byte & 0x80) != 0x0; }
bool utf8_byte_is_ascii(uint8_t byte) { return !utf8_byte_is_unicode(byte); }
-uint32_t utf8_nbytes_in_char(uint8_t byte) {
- // length of char is the number of leading ones
- // flip it and count number of leading zeros
- uint8_t invb = ~byte;
- return __builtin_clz((uint32_t)invb) - 24;
+enum utf8_state {
+ Utf8_Accept = 0,
+ Utf8_Reject = 1,
+};
+
+// clang-format off
+static const uint8_t utf8d[] = {
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf
+ 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df
+ 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef
+ 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff
+ 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2
+ 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4
+ 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6
+ 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8
+};
+// clang-format on
+
+/*
+ * emoji decoding algorithm from
+ * https://bjoern.hoehrmann.de/utf-8/decoder/dfa/
+ */
+static enum utf8_state decode(enum utf8_state *state, uint32_t *codep,
+ uint32_t byte) {
+ uint32_t type = utf8d[byte];
+
+ *codep = (*state != Utf8_Accept) ? (byte & 0x3fu) | (*codep << 6)
+ : (0xff >> type) & (byte);
+
+ *state = utf8d[256 + *state * 16 + type];
+ return *state;
+}
+
+static struct codepoint next_utf8_codepoint(uint8_t *bytes, uint64_t nbytes) {
+ uint32_t codepoint = 0;
+ enum utf8_state state = Utf8_Accept;
+ uint32_t bi = 0;
+ while (bi < nbytes) {
+ enum utf8_state res = decode(&state, &codepoint, bytes[bi]);
+ ++bi;
+
+ if (res == Utf8_Accept || res == Utf8_Reject) {
+ break;
+ }
+ }
+
+ if (state == Utf8_Reject) {
+ codepoint = 0xfffd;
+ }
+
+ return (struct codepoint){.codepoint = codepoint, .nbytes = bi};
}
-// TODO: grapheme clusters, this returns the number of unicode code points
+struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter) {
+ if (iter->offset >= iter->nbytes) {
+ return NULL;
+ }
+
+ iter->current = next_utf8_codepoint(iter->data + iter->offset,
+ iter->nbytes - iter->offset);
+ iter->offset += iter->current.nbytes;
+ return &iter->current;
+}
+
+struct utf8_codepoint_iterator
+create_utf8_codepoint_iterator(uint8_t *data, uint64_t len,
+ uint64_t initial_offset) {
+ return (struct utf8_codepoint_iterator){
+ .data = data,
+ .nbytes = len,
+ .offset = initial_offset,
+ };
+}
+
+/* TODO: grapheme clusters and other classification, this
+ * returns the number of unicode code points
+ */
uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes) {
+ uint32_t bi = 0;
uint32_t nchars = 0;
- uint32_t expected = 0;
- for (uint32_t bi = 0; bi < nbytes; ++bi) {
- uint8_t byte = bytes[bi];
- if (utf8_byte_is_unicode(byte)) {
- if (utf8_byte_is_unicode_start(byte)) {
- expected = utf8_nbytes_in_char(byte) - 1;
- } else { // continuation byte
- --expected;
- if (expected == 0) {
- ++nchars;
- }
- }
- } else { // ascii
- ++nchars;
- }
+ while (bi < nbytes) {
+ struct codepoint codepoint = next_utf8_codepoint(bytes + bi, nbytes - bi);
+ ++nchars;
+ bi += codepoint.nbytes;
}
+
return nchars;
}
-// TODO: grapheme clusters, this uses the number of unicode code points
+/* TODO: grapheme clusters and other classification, this
+ * returns the number of unicode code points
+ */
uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars) {
-
uint32_t bi = 0;
uint32_t chars = 0;
uint32_t expected = 0;
while (chars < nchars && bi < nbytes) {
- uint8_t byte = bytes[bi];
- if (utf8_byte_is_unicode(byte)) {
- if (utf8_byte_is_unicode_start(byte)) {
- expected = utf8_nbytes_in_char(byte) - 1;
- } else { // continuation char
- --expected;
- if (expected == 0) {
- ++chars;
- }
- }
- } else { // ascii
- ++chars;
- }
-
- ++bi;
+ struct codepoint codepoint = next_utf8_codepoint(bytes + bi, nbytes - bi);
+ bi += codepoint.nbytes;
+ ++chars;
}
+ // TODO: reject invalid?
return bi;
}
-uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len) {
- if (utf8_byte_is_unicode_start(*bytes)) {
- wchar_t wc;
- size_t nbytes = 0;
- if ((nbytes = mbrtowc(&wc, (char *)bytes, len, NULL)) > 0) {
- size_t w = wcwidth(wc);
- return w > 0 ? w : 2;
- } else {
- return 1;
- }
- } else if (utf8_byte_is_unicode_continuation(*bytes)) {
- return 0;
+uint32_t unicode_visual_char_width(const struct codepoint *codepoint) {
+ if (codepoint->nbytes > 0) {
+ // TODO: use unicode classification instead
+ size_t w = wcwidth(codepoint->codepoint);
+ return w >= 0 ? w : 2;
} else {
- return 1;
+ return 0;
}
}
diff --git a/src/dged/utf8.h b/src/dged/utf8.h
index 04aa242..22ce22d 100644
--- a/src/dged/utf8.h
+++ b/src/dged/utf8.h
@@ -1,19 +1,37 @@
+#ifndef _UTF8_H
+#define _UTF8_H
+
#include <stdbool.h>
#include <stdint.h>
+struct codepoint {
+ uint32_t codepoint;
+ uint32_t nbytes;
+};
+
+struct utf8_codepoint_iterator {
+ uint8_t *data;
+ uint64_t nbytes;
+ uint64_t offset;
+ struct codepoint current;
+};
+
+struct utf8_codepoint_iterator
+create_utf8_codepoint_iterator(uint8_t *data, uint64_t len,
+ uint64_t initial_offset);
+struct codepoint *utf8_next_codepoint(struct utf8_codepoint_iterator *iter);
+
/*!
* \brief Return the number of chars the utf-8 sequence pointed at by `bytes` of
* length `nbytes`, represents
*/
uint32_t utf8_nchars(uint8_t *bytes, uint32_t nbytes);
-/* Return the number of bytes used to make up the next `nchars` characters */
-uint32_t utf8_nbytes(uint8_t *bytes, uint32_t nbytes, uint32_t nchars);
+uint32_t unicode_visual_char_width(const struct codepoint *codepoint);
-/* true if `byte` is a unicode byte sequence start byte */
bool utf8_byte_is_unicode_start(uint8_t byte);
bool utf8_byte_is_unicode_continuation(uint8_t byte);
-bool utf8_byte_is_ascii(uint8_t byte);
bool utf8_byte_is_unicode(uint8_t byte);
+bool utf8_byte_is_ascii(uint8_t byte);
-uint32_t utf8_visual_char_width(uint8_t *bytes, uint32_t len);
+#endif
diff --git a/src/main/cmds.c b/src/main/cmds.c
index 4da8346..18f333d 100644
--- a/src/main/cmds.c
+++ b/src/main/cmds.c
@@ -258,7 +258,7 @@ void buffer_to_list_line(struct buffer *buffer, void *userdata) {
buffer_add_text_property(
listbuf, (struct location){.line = begin.line, .col = 0},
(struct location){.line = begin.line,
- .col = buffer_num_chars(listbuf, begin.line)},
+ .col = buffer_line_length(listbuf, begin.line)},
(struct text_property){.type = TextProperty_Data, .userdata = buffer});
}
}
diff --git a/src/main/completion.c b/src/main/completion.c
index 52bf6f8..4ffbc46 100644
--- a/src/main/completion.c
+++ b/src/main/completion.c
@@ -40,6 +40,11 @@ static struct buffer *g_target_buffer = NULL;
static void hide_completion();
+static bool is_space(const struct codepoint *c) {
+ // TODO: utf8 whitespace and other whitespace
+ return c->codepoint == ' ';
+}
+
static uint32_t complete_path(struct completion_context ctx, void *userdata);
static struct completion_provider g_path_provider = {
.name = "path",
@@ -214,32 +219,30 @@ static void update_completions(struct buffer *buffer,
}
}
-static void on_buffer_delete(struct buffer *buffer, struct region deleted,
- uint32_t start_idx, uint32_t end_idx,
- void *userdata) {
+static void on_buffer_delete(struct buffer *buffer,
+ struct edit_location deleted, void *userdata) {
struct active_completion_ctx *ctx = (struct active_completion_ctx *)userdata;
if (g_state.active) {
- update_completions(buffer, ctx, deleted.begin);
+ update_completions(buffer, ctx, deleted.coordinates.begin);
}
}
-static void on_buffer_insert(struct buffer *buffer, struct region inserted,
- uint32_t start_idx, uint32_t end_idx,
- void *userdata) {
+static void on_buffer_insert(struct buffer *buffer,
+ struct edit_location inserted, void *userdata) {
struct active_completion_ctx *ctx = (struct active_completion_ctx *)userdata;
if (!g_state.active) {
uint32_t nchars = 0;
switch (ctx->trigger.kind) {
case CompletionTrigger_Input:
- for (uint32_t line = inserted.begin.line; line <= inserted.end.line;
- ++line) {
- nchars += buffer_num_chars(buffer, line);
+ for (uint32_t line = inserted.coordinates.begin.line;
+ line <= inserted.coordinates.end.line; ++line) {
+ nchars += buffer_line_length(buffer, line);
}
- nchars -=
- inserted.begin.col +
- (buffer_num_chars(buffer, inserted.end.line) - inserted.end.col);
+ nchars -= inserted.coordinates.begin.col +
+ (buffer_line_length(buffer, inserted.coordinates.end.line) -
+ inserted.coordinates.end.col);
ctx->trigger_current_nchars += nchars;
@@ -260,16 +263,16 @@ static void on_buffer_insert(struct buffer *buffer, struct region inserted,
g_state.ctx = ctx;
}
- update_completions(buffer, ctx, inserted.end);
+ update_completions(buffer, ctx, inserted.coordinates.end);
}
static void update_completion_buffer(struct buffer *buffer, void *userdata) {
buffer_add_text_property(
g_target_buffer,
(struct location){.line = g_state.current_completion, .col = 0},
- (struct location){
- .line = g_state.current_completion,
- .col = buffer_num_chars(g_target_buffer, g_state.current_completion)},
+ (struct location){.line = g_state.current_completion,
+ .col = buffer_line_length(g_target_buffer,
+ g_state.current_completion)},
(struct text_property){.type = TextProperty_Colors,
.colors = (struct text_property_colors){
.set_bg = false,
@@ -433,26 +436,18 @@ static uint32_t complete_path(struct completion_context ctx, void *userdata) {
if (ctx.buffer == minibuffer_buffer()) {
txt = minibuffer_content();
} else {
- txt = buffer_line(ctx.buffer, ctx.location.line);
- uint32_t end_idx = text_col_to_byteindex(
- ctx.buffer->text, ctx.location.line, ctx.location.col);
-
- for (uint32_t bytei = end_idx; bytei > 0; --bytei) {
- if (txt.text[bytei] == ' ') {
- start_idx = bytei + 1;
- break;
- }
- }
-
- if (start_idx >= end_idx) {
+ struct match_result start =
+ buffer_find_prev_in_line(ctx.buffer, ctx.location, is_space);
+ if (!start.found) {
+ start.at = (struct location){.line = ctx.location.line, .col = 0};
return 0;
}
-
- txt.nbytes = end_idx - start_idx;
+ txt = buffer_region(ctx.buffer, region_new(start.at, ctx.location));
}
- char *path = calloc(txt.nbytes + 1, sizeof(uint8_t));
- memcpy(path, txt.text + start_idx, txt.nbytes);
+ char *path = calloc(txt.nbytes + 1, sizeof(char));
+ memcpy(path, txt.text, txt.nbytes);
+ path[txt.nbytes] = '\0';
if (txt.allocated) {
free(txt.text);
@@ -562,25 +557,18 @@ static uint32_t complete_buffers(struct completion_context ctx,
if (ctx.buffer == minibuffer_buffer()) {
txt = minibuffer_content();
} else {
- txt = buffer_line(ctx.buffer, ctx.location.line);
- uint32_t end_idx = text_col_to_byteindex(
- ctx.buffer->text, ctx.location.line, ctx.location.col);
- for (uint32_t bytei = end_idx; bytei > 0; --bytei) {
- if (txt.text[bytei] == ' ') {
- start_idx = bytei + 1;
- break;
- }
- }
-
- if (start_idx >= end_idx) {
+ struct match_result start =
+ buffer_find_prev_in_line(ctx.buffer, ctx.location, is_space);
+ if (!start.found) {
+ start.at = (struct location){.line = ctx.location.line, .col = 0};
return 0;
}
-
- txt.nbytes = end_idx - start_idx;
+ txt = buffer_region(ctx.buffer, region_new(start.at, ctx.location));
}
- char *needle = calloc(txt.nbytes + 1, sizeof(uint8_t));
- memcpy(needle, txt.text + start_idx, txt.nbytes);
+ char *needle = calloc(txt.nbytes + 1, sizeof(char));
+ memcpy(needle, txt.text, txt.nbytes);
+ needle[txt.nbytes] = '\0';
if (txt.allocated) {
free(txt.text);
@@ -619,31 +607,23 @@ static uint32_t complete_commands(struct completion_context ctx,
if (commands == NULL) {
return 0;
}
-
struct text_chunk txt = {0};
uint32_t start_idx = 0;
if (ctx.buffer == minibuffer_buffer()) {
txt = minibuffer_content();
} else {
- txt = buffer_line(ctx.buffer, ctx.location.line);
- uint32_t end_idx = text_col_to_byteindex(
- ctx.buffer->text, ctx.location.line, ctx.location.col);
- for (uint32_t bytei = end_idx; bytei > 0; --bytei) {
- if (txt.text[bytei] == ' ') {
- start_idx = bytei + 1;
- break;
- }
- }
-
- if (start_idx >= end_idx) {
+ struct match_result start =
+ buffer_find_prev_in_line(ctx.buffer, ctx.location, is_space);
+ if (!start.found) {
+ start.at = (struct location){.line = ctx.location.line, .col = 0};
return 0;
}
-
- txt.nbytes = end_idx - start_idx;
+ txt = buffer_region(ctx.buffer, region_new(start.at, ctx.location));
}
- char *needle = calloc(txt.nbytes + 1, sizeof(uint8_t));
- memcpy(needle, txt.text + start_idx, txt.nbytes);
+ char *needle = calloc(txt.nbytes + 1, sizeof(char));
+ memcpy(needle, txt.text, txt.nbytes);
+ needle[txt.nbytes] = '\0';
if (txt.allocated) {
free(txt.text);
diff --git a/sune.txt b/sune.txt
new file mode 100644
index 0000000..711f7ee
--- /dev/null
+++ b/sune.txt
@@ -0,0 +1,4 @@
+โฌ†๏ธasd
+๐ŸŽ  aba
+this is tab
+๐Ÿ‡ซ๐Ÿ‡ฎ hej hej
diff --git a/test/buffer.c b/test/buffer.c
index a4b318e..7d879b0 100644
--- a/test/buffer.c
+++ b/test/buffer.c
@@ -1,11 +1,21 @@
#include <string.h>
#include "dged/buffer.h"
+#include "dged/settings.h"
#include "assert.h"
#include "test.h"
-void test_add() {
+static uint32_t add_callback_call_count = 0;
+static void add_callback(struct buffer *buffer, struct edit_location added,
+ void *userdata) {
+ (void)buffer;
+ (void)added;
+ (void)userdata;
+ ++add_callback_call_count;
+}
+
+static void test_add(void) {
struct buffer b = buffer_create("test-buffer");
ASSERT(buffer_num_lines(&b) == 0, "Expected buffer to have zero lines");
@@ -16,10 +26,62 @@ void test_add() {
ASSERT(loc.line == 1 && loc.col == strlen(txt),
"Expected buffer to have one line with characters");
+ // test callback
+ uint32_t hook_id = buffer_add_insert_hook(&b, add_callback, NULL);
+ buffer_add(&b, (struct location){.line = 0, .col = 0}, (uint8_t *)"hej", 3);
+ ASSERT(add_callback_call_count == 1, "Expected callback to have been called");
+
+ // test removing the hook
+ buffer_remove_insert_hook(&b, hook_id, NULL);
+ buffer_add(&b, (struct location){.line = 0, .col = 0}, (uint8_t *)"hej", 3);
+ ASSERT(add_callback_call_count == 1,
+ "Expected callback to not have been called after it has been removed");
+
buffer_destroy(&b);
}
-void test_word_at() {
+static uint32_t delete_callback_call_count = 0;
+static void delete_callback(struct buffer *buffer, struct edit_location removed,
+ void *userdata) {
+ (void)buffer;
+ (void)removed;
+ (void)userdata;
+ ++delete_callback_call_count;
+}
+
+static void test_delete(void) {
+ struct buffer b = buffer_create("test-buffer-delete");
+ const char *txt = "we are adding some text\ntwo lines to be exact";
+ struct location loc = buffer_add(&b, (struct location){.line = 0, .col = 0},
+ (uint8_t *)txt, strlen(txt));
+
+ ASSERT(buffer_line_length(&b, 0) == 23,
+ "Expected line 1 to be 23 chars before deletion");
+ buffer_delete(&b, region_new((struct location){.line = 0, .col = 0},
+ (struct location){.line = 0, .col = 2}));
+ ASSERT(buffer_line_length(&b, 0) == 21,
+ "Expected line 1 to be 21 chars after deletion");
+
+ // delete newline
+ buffer_delete(&b, region_new((struct location){.line = 0, .col = 21},
+ (struct location){.line = 1, .col = 0}));
+ ASSERT(buffer_num_lines(&b) == 1,
+ "Expected buffer to have one line after new line deletion");
+ ASSERT(buffer_line_length(&b, 0) == 42,
+ "Expected single line to be sum of both line lengths after new line "
+ "deletion");
+
+ // test that callback works
+ buffer_add_delete_hook(&b, delete_callback, NULL);
+ buffer_delete(&b, region_new((struct location){.line = 0, .col = 0},
+ (struct location){.line = 0, .col = 2}));
+ ASSERT(delete_callback_call_count == 1,
+ "Expected callback to have been called");
+
+ buffer_destroy(&b);
+}
+
+static void test_word_at(void) {
struct buffer b = buffer_create("test-word-at-buffer");
const char *txt = "word1 (word2). Another";
buffer_add(&b, (struct location){.line = 0, .col = 0}, (uint8_t *)txt,
@@ -40,8 +102,7 @@ void test_word_at() {
"Expected word to span cols 7..12");
// test that clamping works correctly
- struct region word3 =
- buffer_word_at(&b, (struct location){.line = 0, .col = 100});
+ struct region word3 = buffer_word_at(&b, buffer_clamp(&b, 0, 100));
ASSERT(region_has_size(word3), "expected 0,100 to be in the last word");
ASSERT(word3.begin.col == 15 && word3.end.col == 22,
"Expected word to span cols 15..22");
@@ -49,7 +110,129 @@ void test_word_at() {
buffer_destroy(&b);
}
-void run_buffer_tests() {
+static void test_line_len(void) {
+ struct buffer b = buffer_create("test-line-length-buffer");
+ const char *txt = "Look! Banana ๐ŸŒ";
+ buffer_add(&b, (struct location){.line = 0, .col = 0}, (uint8_t *)txt,
+ strlen(txt));
+ ASSERT(buffer_line_length(&b, 0) == 15,
+ "Expected banana line to be 15 chars wide");
+}
+
+static void test_char_movement(void) {
+ struct buffer b = buffer_create("test-char-movement-buffer");
+ const char *txt = "abcdefgh ๐ŸŽฏjklmn\tab";
+ buffer_add(&b, buffer_end(&b), (uint8_t *)txt, strlen(txt));
+ struct location next =
+ buffer_next_char(&b, (struct location){.line = 0, .col = 0});
+ ASSERT(next.col == 1, "Expected next char to be next char");
+
+ next = buffer_next_char(&b, (struct location){.line = 0, .col = 9});
+ ASSERT(next.col == 11,
+ "Expected a double width char to result in a 2 column move");
+
+ next = buffer_next_char(&b, (struct location){.line = 0, .col = 16});
+ uint64_t tab_width = settings_get("editor.tab-width")->value.number_value;
+ ASSERT(next.col == 16 + tab_width,
+ "Expected a tab to result in a move the width of a tab");
+
+ struct location prev =
+ buffer_previous_char(&b, (struct location){.line = 0, .col = 0});
+ ASSERT(prev.col == 0 && prev.line == 0,
+ "Expected backwards motion from 0,0 not to be possible");
+
+ prev = buffer_previous_char(&b, (struct location){.line = 0, .col = 11});
+ ASSERT(prev.col == 9,
+ "Expected a double width char to result in a 2 column move");
+
+ prev = buffer_previous_char(
+ &b, (struct location){.line = 0, .col = 16 + tab_width});
+ ASSERT(prev.col == 16,
+ "Expected a tab move backwards to step over the width of a tab");
+}
+
+static void test_word_movement(void) {
+ struct buffer b = buffer_create("test-word-movement-buffer");
+
+ const char *txt = " word1, word2 \"word3\" word4";
+ buffer_add(&b, buffer_end(&b), (uint8_t *)txt, strlen(txt));
+ struct location next =
+ buffer_next_word(&b, (struct location){.line = 0, .col = 0});
+ ASSERT(next.col == 1, "Expected next word to start at col 1");
+
+ next = buffer_next_word(&b, (struct location){.line = 0, .col = 1});
+ ASSERT(next.col == 8, "Expected next word to start at col 8");
+
+ next = buffer_next_word(&b, (struct location){.line = 0, .col = 8});
+ ASSERT(next.col == 15, "Expected next word to start at col 15");
+
+ next = buffer_next_word(&b, (struct location){.line = 0, .col = 15});
+ ASSERT(next.col == 22, "Expected next word to start at col 22");
+
+ struct location prev =
+ buffer_previous_word(&b, (struct location){.line = 0, .col = 26});
+ ASSERT(prev.col == 22, "Expected previous word to start at col 22");
+
+ prev = buffer_previous_word(&b, (struct location){.line = 0, .col = 22});
+ ASSERT(prev.col == 15, "Expected previous word to start at col 15");
+
+ prev = buffer_previous_word(&b, (struct location){.line = 0, .col = 0});
+ ASSERT(prev.col == 0 && prev.line == 0,
+ "Expected previous word to not go before beginning of buffer");
+}
+
+void test_copy(void) {
+ struct buffer b = buffer_create("test-copy-buffer");
+ buffer_add(&b, (struct location){.line = 0, .col = 0}, (uint8_t *)"copy", 4);
+
+ buffer_copy(&b, region_new((struct location){.line = 0, .col = 0},
+ (struct location){.line = 0, .col = 4}));
+ buffer_paste(&b, (struct location){.line = 0, .col = 4});
+ ASSERT(buffer_line_length(&b, 0) == 8, "Expected text to be copied");
+ struct text_chunk t = buffer_line(&b, 0);
+ ASSERT_STR_EQ((const char *)t.text, "copycopy",
+ "Expected copied text to match");
+ if (t.allocated) {
+ free(t.text);
+ }
+
+ buffer_cut(&b, region_new((struct location){.line = 0, .col = 2},
+ (struct location){.line = 0, .col = 4}));
+ buffer_paste(&b, (struct location){.line = 0, .col = 0});
+ ASSERT(buffer_line_length(&b, 0) == 8, "Expected line length to be the same");
+ t = buffer_line(&b, 0);
+ ASSERT_STR_EQ((const char *)t.text, "pycocopy",
+ "Expected cut+pasted text to match");
+ if (t.allocated) {
+ free(t.text);
+ }
+
+ // test kill ring
+ buffer_paste_older(&b, (struct location){.line = 0, .col = 0});
+ ASSERT(buffer_line_length(&b, 0) == 12,
+ "Expected line length to have increased when pasting older");
+ t = buffer_line(&b, 0);
+ ASSERT_STR_EQ((const char *)t.text, "copypycocopy",
+ "Expected pasted older text to match");
+ if (t.allocated) {
+ free(t.text);
+ }
+
+ buffer_destroy(&b);
+}
+
+void run_buffer_tests(void) {
+ settings_init(10);
+ settings_set_default(
+ "editor.tab-width",
+ (struct setting_value){.type = Setting_Number, .number_value = 4});
+
run_test(test_add);
+ run_test(test_delete);
run_test(test_word_at);
+ run_test(test_line_len);
+ run_test(test_char_movement);
+ run_test(test_word_movement);
+ run_test(test_copy);
+ settings_destroy();
}
diff --git a/test/main.c b/test/main.c
index 4c241b3..dc0c2dc 100644
--- a/test/main.c
+++ b/test/main.c
@@ -9,7 +9,9 @@
void handle_abort() { exit(1); }
int main() {
- setlocale(LC_ALL, "");
+ // Use a hardcoded locale to get a
+ // predictable env.
+ setlocale(LC_ALL, "en_US.UTF-8");
signal(SIGABRT, handle_abort);
struct timespec test_begin;
@@ -52,5 +54,6 @@ int main() {
((uint64_t)test_begin.tv_sec * 1e9 + (uint64_t)test_begin.tv_nsec);
printf("\n๐ŸŽ‰ \x1b[1;32mDone! All tests successful in %.2f ms!\x1b[0m\n",
(double)elapsed_nanos / 1e6);
+
return 0;
}
diff --git a/test/text.c b/test/text.c
index 9faa663..f890e7b 100644
--- a/test/text.c
+++ b/test/text.c
@@ -15,22 +15,19 @@ void assert_line_eq(struct text_chunk line, const char *txt, const char *msg) {
void assert_line_equal(struct text_chunk *line) {}
void test_add_text() {
- uint32_t lines_added, cols_added;
+ uint32_t lines_added;
/* use a silly small initial capacity to test re-alloc */
struct text *t = text_create(1);
const char *txt = "This is line 1\n";
- text_insert_at(t, 0, 0, (uint8_t *)txt, strlen(txt), &lines_added,
- &cols_added);
+ text_insert_at(t, 0, 0, (uint8_t *)txt, strlen(txt), &lines_added);
- ASSERT(text_line_size(t, 0) == 14 && text_line_length(t, 0) == 14,
- "Expected line 1 to have 14 chars and 14 bytes");
+ ASSERT(text_line_size(t, 0) == 14, "Expected line 1 to be 14 bytes");
assert_line_eq(text_get_line(t, 0), "This is line 1",
"Expected line 1 to be line 1");
const char *txt2 = "This is line 2\n";
- text_insert_at(t, 1, 0, (uint8_t *)txt2, strlen(txt2), &lines_added,
- &cols_added);
+ text_insert_at(t, 1, 0, (uint8_t *)txt2, strlen(txt2), &lines_added);
ASSERT(text_num_lines(t) == 3,
"Expected text to have three lines after second insertion");
assert_line_eq(text_get_line(t, 1), "This is line 2",
@@ -38,8 +35,7 @@ void test_add_text() {
// simulate indentation
const char *txt3 = " ";
- text_insert_at(t, 0, 0, (uint8_t *)txt3, strlen(txt3), &lines_added,
- &cols_added);
+ text_insert_at(t, 0, 0, (uint8_t *)txt3, strlen(txt3), &lines_added);
ASSERT(text_num_lines(t) == 3,
"Expected text to have three lines after second insertion");
assert_line_eq(text_get_line(t, 0), " This is line 1",
@@ -48,7 +44,7 @@ void test_add_text() {
"Expected line 2 to be line 2 still");
// insert newline in middle of line
- text_insert_at(t, 1, 4, (uint8_t *)"\n", 1, &lines_added, &cols_added);
+ text_insert_at(t, 1, 4, (uint8_t *)"\n", 1, &lines_added);
ASSERT(text_num_lines(t) == 4,
"Expected text to have four lines after inserting a new line");
assert_line_eq(text_get_line(t, 1), "This", "Expected line 2 to be split");
@@ -56,11 +52,11 @@ void test_add_text() {
"Expected line 2 to be split");
// insert newline before line 1
- text_insert_at(t, 1, 0, (uint8_t *)"\n", 1, &lines_added, &cols_added);
+ text_insert_at(t, 1, 0, (uint8_t *)"\n", 1, &lines_added);
ASSERT(
text_num_lines(t) == 5,
"Expected to have five lines after adding an empty line in the middle");
- ASSERT(text_line_length(t, 1) == 0, "Expected line 2 to be empty");
+ ASSERT(text_line_size(t, 1) == 0, "Expected line 2 to be empty");
assert_line_eq(text_get_line(t, 2), "This",
"Expected line 3 to be previous line 2");
assert_line_eq(text_get_line(t, 3), " is line 2",
@@ -70,37 +66,35 @@ void test_add_text() {
}
void test_delete_text() {
- uint32_t lines_added, cols_added;
+ uint32_t lines_added;
struct text *t = text_create(10);
const char *txt = "This is line 1";
- text_insert_at(t, 0, 0, (uint8_t *)txt, strlen(txt), &lines_added,
- &cols_added);
+ text_insert_at(t, 0, 0, (uint8_t *)txt, strlen(txt), &lines_added);
text_delete(t, 0, 12, 0, 14);
- ASSERT(text_line_length(t, 0) == 12,
- "Expected line to be 12 chars after deleting two");
+ ASSERT(text_line_size(t, 0) == 12,
+ "Expected line to be 12 bytes after deleting two");
ASSERT(strncmp((const char *)text_get_line(t, 0).text, "This is line",
text_line_size(t, 0)) == 0,
- "Expected two chars to be deleted");
+ "Expected two bytes to be deleted");
text_delete(t, 0, 0, 10, 10);
ASSERT(text_get_line(t, 0).nbytes == 0,
- "Expected line to be empty after many chars removed");
+ "Expected line to be empty after many bytes removed");
const char *txt2 = "This is line 1\nThis is line 2\nThis is line 3";
- text_insert_at(t, 0, 0, (uint8_t *)txt2, strlen(txt2), &lines_added,
- &cols_added);
+ text_insert_at(t, 0, 0, (uint8_t *)txt2, strlen(txt2), &lines_added);
ASSERT(text_num_lines(t) == 3,
"Expected to have three lines after inserting as many");
text_delete(t, 1, 11, 1, 14);
- ASSERT(text_line_length(t, 1) == 11,
- "Expected line to contain 11 chars after deletion");
+ ASSERT(text_line_size(t, 1) == 11,
+ "Expected line to contain 11 bytes after deletion");
struct text_chunk line = text_get_line(t, 1);
ASSERT(strncmp((const char *)line.text, "This is lin", line.nbytes) == 0,
"Expected deleted characters to be gone in the second line");
- text_delete(t, 1, 0, 1, text_line_length(t, 1) + 1);
+ text_delete(t, 1, 0, 1, text_line_size(t, 1) + 1);
ASSERT(text_num_lines(t) == 2,
"Expected to have two lines after deleting one");
struct text_chunk line2 = text_get_line(t, 1);
@@ -110,8 +104,8 @@ void test_delete_text() {
struct text *t3 = text_create(10);
const char *delete_me = "This is line๐ŸŽ™\nQ";
text_insert_at(t3, 0, 0, (uint8_t *)delete_me, strlen(delete_me),
- &lines_added, &cols_added);
- text_delete(t3, 0, 13, 0, 14);
+ &lines_added);
+ text_delete(t3, 0, 16, 1, 0);
struct text_chunk top_line = text_get_line(t3, 0);
ASSERT(strncmp((const char *)top_line.text, "This is line๐ŸŽ™Q",
top_line.nbytes) == 0,
@@ -123,33 +117,13 @@ void test_delete_text() {
struct text *t4 = text_create(10);
const char *deletable_text = "Only one line kinda";
text_append(t4, (uint8_t *)deletable_text, strlen(deletable_text),
- &lines_added, &cols_added);
+ &lines_added);
text_delete(t4, 0, 19, 0, 20);
ASSERT(text_num_lines(t4) == 1, "Expected the line to still be there");
- ASSERT(text_line_length(t4, 0) == 19,
+ ASSERT(text_line_size(t4, 0) == 19,
"Expected nothing to have happened to the line");
- // test utf-8
- struct text *t2 = text_create(10);
- const char *txt3 = "Emojis: ๐Ÿ‡ซ๐Ÿ‡ฎ ๐Ÿฎ\n";
- text_insert_at(t2, 0, 0, (uint8_t *)txt3, strlen(txt3), &lines_added,
- &cols_added);
-
- // TODO: Fix when graphemes are implemented, should be 11, right now it counts
- // the two unicode code points ๐Ÿ‡ซ and ๐Ÿ‡ฎ as two chars.
- ASSERT(text_line_length(t2, 0) == 12,
- "Line length should be 12 (even though there "
- "are more bytes in the line).");
-
- text_delete(t2, 0, 10, 0, 12);
- ASSERT(text_line_length(t2, 0) == 10,
- "Line length should be 10 after deleting the cow emoji and a space");
- struct text_chunk line3 = text_get_line(t2, 0);
- ASSERT(strncmp((const char *)line3.text, "Emojis: ๐Ÿ‡ซ๐Ÿ‡ฎ", line3.nbytes) == 0,
- "Expected cow emoji plus space to be deleted");
-
text_destroy(t);
- text_destroy(t2);
text_destroy(t3);
text_destroy(t4);
}
diff --git a/test/utf8.c b/test/utf8.c
index d67c409..c5094c7 100644
--- a/test/utf8.c
+++ b/test/utf8.c
@@ -6,11 +6,6 @@
#include "assert.h"
#include "test.h"
-void test_nchars_nbytes() {
- ASSERT(utf8_nchars((uint8_t *)"๐Ÿ‘ด", strlen("๐Ÿ‘ด")) == 1,
- "Expected old man emoji to be 1 char");
- ASSERT(utf8_nbytes((uint8_t *)"๐Ÿ‘ด", strlen("๐Ÿ‘ด"), 1) == 4,
- "Expected old man emoji to be 4 bytes");
-}
+void test_nchars_nbytes() {}
void run_utf8_tests() { run_test(test_nchars_nbytes); }