summaryrefslogtreecommitdiff
path: root/src/dged/text.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/dged/text.c')
-rw-r--r--src/dged/text.c233
1 files changed, 76 insertions, 157 deletions
diff --git a/src/dged/text.c b/src/dged/text.c
index 3d1078f..18ab04f 100644
--- a/src/dged/text.c
+++ b/src/dged/text.c
@@ -18,7 +18,6 @@ struct line {
uint8_t *data;
uint8_t flags;
uint32_t nbytes;
- uint32_t nchars;
};
struct text_property_entry {
@@ -54,11 +53,9 @@ void text_destroy(struct text *text) {
text->lines[li].data = NULL;
text->lines[li].flags = 0;
text->lines[li].nbytes = 0;
- text->lines[li].nchars = 0;
}
free(text->lines);
-
free(text);
}
@@ -68,68 +65,25 @@ void text_clear(struct text *text) {
text->lines[li].data = NULL;
text->lines[li].flags = 0;
text->lines[li].nbytes = 0;
- text->lines[li].nchars = 0;
}
text->nlines = 0;
text_clear_properties(text);
}
-// given `char_idx` as a character index, return the byte index
-uint32_t charidx_to_byteidx(struct line *line, uint32_t char_idx) {
- if (line->nchars == 0) {
- return 0;
- }
-
- if (char_idx > line->nchars) {
- return line->nbytes - 1;
- }
-
- return utf8_nbytes(line->data, line->nbytes, char_idx);
-}
-
-uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col) {
- return charidx_to_byteidx(&text->lines[line], col);
-}
-
-// given `byte_idx` as a byte index, return the character index
-uint32_t byteidx_to_charidx(struct line *line, uint32_t byte_idx) {
- if (byte_idx > line->nbytes) {
- return line->nchars;
+struct utf8_codepoint_iterator
+text_line_codepoint_iterator(const struct text *text, uint32_t lineidx) {
+ if (lineidx >= text_num_lines(text)) {
+ return create_utf8_codepoint_iterator(NULL, 0, 0);
}
- return utf8_nchars(line->data, byte_idx);
+ return create_utf8_codepoint_iterator(text->lines[lineidx].data,
+ text->lines[lineidx].nbytes, 0);
}
-uint32_t text_byteindex_to_col(struct text *text, uint32_t line,
- uint32_t byteindex) {
- return byteidx_to_charidx(&text->lines[line], byteindex);
-}
-
-uint32_t text_global_idx(struct text *text, uint32_t line, uint32_t col) {
- uint32_t byteoff = 0;
- uint32_t nlines = text_num_lines(text);
-
- if (nlines == 0) {
- return 0;
- }
-
- for (uint32_t l = 0; l < line && l < nlines; ++l) {
- // +1 for newline
- byteoff += text_line_size(text, l) + 1;
- }
-
- uint32_t l = line < nlines ? line : nlines - 1;
- uint32_t nchars = text_line_length(text, l);
- uint32_t c = col < nchars ? col : nchars;
- byteoff += text_col_to_byteindex(text, l, c);
-
- if (col > nchars) {
- // account for newline
- ++byteoff;
- }
-
- return byteoff;
+struct utf8_codepoint_iterator
+text_chunk_codepoint_iterator(const struct text_chunk *chunk) {
+ return create_utf8_codepoint_iterator(chunk->text, chunk->nbytes, 0);
}
void append_empty_lines(struct text *text, uint32_t numlines) {
@@ -145,17 +99,10 @@ void append_empty_lines(struct text *text, uint32_t numlines) {
struct line *nline = &text->lines[text->nlines];
nline->data = NULL;
nline->nbytes = 0;
- nline->nchars = 0;
nline->flags = 0;
++text->nlines;
}
-
- if (text->nlines > text->capacity) {
- printf("text->nlines: %d, text->capacity: %d\n", text->nlines,
- text->capacity);
- raise(SIGTRAP);
- }
}
void ensure_line(struct text *text, uint32_t line) {
@@ -166,8 +113,8 @@ void ensure_line(struct text *text, uint32_t line) {
// It is assumed that `data` does not contain any \n, that is handled by
// higher-level functions
-void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data,
- uint32_t len, uint32_t nchars) {
+static void insert_at(struct text *text, uint32_t line, uint32_t offset,
+ uint8_t *data, uint32_t len) {
if (len == 0) {
return;
@@ -178,11 +125,10 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data,
struct line *l = &text->lines[line];
l->nbytes += len;
- l->nchars += nchars;
l->flags = LineChanged;
l->data = realloc(l->data, l->nbytes);
- uint32_t bytei = charidx_to_byteidx(l, col);
+ uint32_t bytei = offset;
// move following bytes out of the way
if (bytei + len < l->nbytes) {
@@ -194,15 +140,7 @@ void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data,
memcpy(l->data + bytei, data, len);
}
-uint32_t text_line_length(struct text *text, uint32_t lineidx) {
- if (lineidx >= text_num_lines(text)) {
- return 0;
- }
-
- return text->lines[lineidx].nchars;
-}
-
-uint32_t text_line_size(struct text *text, uint32_t lineidx) {
+uint32_t text_line_size(const struct text *text, uint32_t lineidx) {
if (lineidx >= text_num_lines(text)) {
return 0;
}
@@ -210,20 +148,19 @@ uint32_t text_line_size(struct text *text, uint32_t lineidx) {
return text->lines[lineidx].nbytes;
}
-uint32_t text_num_lines(struct text *text) { return text->nlines; }
+uint32_t text_num_lines(const struct text *text) { return text->nlines; }
+
+static void split_line(struct text *text, uint32_t offset, uint32_t lineidx,
+ uint32_t newlineidx) {
+ struct line *line = &text->lines[lineidx];
+ struct line *next = &text->lines[newlineidx];
-void split_line(uint32_t col, struct line *line, struct line *next) {
uint8_t *data = line->data;
uint32_t nbytes = line->nbytes;
- uint32_t nchars = line->nchars;
-
- uint32_t chari = col;
- uint32_t bytei = charidx_to_byteidx(line, chari);
+ uint32_t bytei = offset;
line->nbytes = bytei;
- line->nchars = chari;
next->nbytes = nbytes - bytei;
- next->nchars = nchars - chari;
line->flags = next->flags = line->flags;
next->data = NULL;
@@ -260,7 +197,7 @@ void shift_lines(struct text *text, uint32_t start, int32_t direction) {
memmove(dest, src, nlines * sizeof(struct line));
}
-void new_line_at(struct text *text, uint32_t line, uint32_t col) {
+void new_line_at(struct text *text, uint32_t line, uint32_t offset) {
ensure_line(text, line);
uint32_t newline = line + 1;
@@ -274,7 +211,7 @@ void new_line_at(struct text *text, uint32_t line, uint32_t col) {
}
// split line if needed
- split_line(col, &text->lines[line], &text->lines[newline]);
+ split_line(text, offset, line, newline);
}
void delete_line(struct text *text, uint32_t line) {
@@ -294,29 +231,25 @@ void delete_line(struct text *text, uint32_t line) {
--text->nlines;
text->lines[text->nlines].data = NULL;
text->lines[text->nlines].nbytes = 0;
- text->lines[text->nlines].nchars = 0;
}
-void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col,
- uint8_t *bytes, uint32_t nbytes,
- uint32_t *lines_added, uint32_t *cols_added) {
+static void text_insert_at_inner(struct text *text, uint32_t line,
+ uint32_t offset, uint8_t *bytes,
+ uint32_t nbytes, uint32_t *lines_added) {
uint32_t linelen = 0, start_line = line;
- *cols_added = 0;
for (uint32_t bytei = 0; bytei < nbytes; ++bytei) {
uint8_t byte = bytes[bytei];
if (byte == '\n') {
uint8_t *line_data = bytes + (bytei - linelen);
- uint32_t nchars = utf8_nchars(line_data, linelen);
+ insert_at(text, line, offset, line_data, linelen);
- insert_at(text, line, col, line_data, linelen, nchars);
-
- col += nchars;
- new_line_at(text, line, col);
+ offset += linelen;
+ new_line_at(text, line, offset);
++line;
linelen = 0;
- col = 0;
+ offset = 0;
} else {
++linelen;
}
@@ -325,30 +258,26 @@ void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col,
// handle remaining
if (linelen > 0) {
uint8_t *line_data = bytes + (nbytes - linelen);
- uint32_t nchars = utf8_nchars(line_data, linelen);
- insert_at(text, line, col, line_data, linelen, nchars);
- *cols_added = nchars;
+ insert_at(text, line, offset, line_data, linelen);
}
*lines_added = line - start_line;
}
void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes,
- uint32_t *lines_added, uint32_t *cols_added) {
+ uint32_t *lines_added) {
uint32_t line = text->nlines > 0 ? text->nlines - 1 : 0;
- uint32_t col = text_line_length(text, line);
-
- text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added);
+ uint32_t offset = text_line_size(text, line);
+ text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added);
}
-void text_insert_at(struct text *text, uint32_t line, uint32_t col,
- uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added,
- uint32_t *cols_added) {
- text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added);
+void text_insert_at(struct text *text, uint32_t line, uint32_t offset,
+ uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added) {
+ text_insert_at_inner(text, line, offset, bytes, nbytes, lines_added);
}
-void text_delete(struct text *text, uint32_t start_line, uint32_t start_col,
- uint32_t end_line, uint32_t end_col) {
+void text_delete(struct text *text, uint32_t start_line, uint32_t start_offset,
+ uint32_t end_line, uint32_t end_offset) {
if (text->nlines == 0) {
return;
@@ -362,45 +291,44 @@ void text_delete(struct text *text, uint32_t start_line, uint32_t start_col,
if (end_line > maxline) {
end_line = maxline;
- end_col = text->lines[end_line].nchars;
+ end_offset = text_line_size(text, end_line);
}
struct line *firstline = &text->lines[start_line];
struct line *lastline = &text->lines[end_line];
// clamp column
- if (start_col > firstline->nchars) {
- start_col = firstline->nchars > 0 ? firstline->nchars - 1 : 0;
+ uint32_t firstline_len = text_line_size(text, start_line);
+ if (start_offset > firstline_len) {
+ start_offset = firstline_len > 0 ? firstline_len - 1 : 0;
}
// handle deletion of newlines
- if (end_col > lastline->nchars) {
+ uint32_t lastline_len = text_line_size(text, end_line);
+ if (end_offset > lastline_len) {
if (end_line + 1 < text->nlines) {
- end_col = 0;
+ end_offset = 0;
++end_line;
lastline = &text->lines[end_line];
} else {
- end_col = lastline->nchars;
+ end_offset = lastline_len;
}
}
- uint32_t bytei = utf8_nbytes(lastline->data, lastline->nbytes, end_col);
+ uint32_t srcbytei = end_offset;
+ uint32_t dstbytei = start_offset;
+ uint32_t ncopy = lastline->nbytes - srcbytei;
if (lastline == firstline) {
// in this case we can "overwrite"
- uint32_t dstbytei =
- utf8_nbytes(firstline->data, firstline->nbytes, start_col);
- memmove(firstline->data + dstbytei, lastline->data + bytei,
- lastline->nbytes - bytei);
+ memmove(firstline->data + dstbytei, lastline->data + srcbytei, ncopy);
} else {
// otherwise we actually have to copy from the last line
- insert_at(text, start_line, start_col, lastline->data + bytei,
- lastline->nbytes - bytei, lastline->nchars - end_col);
+ insert_at(text, start_line, start_offset, lastline->data + srcbytei, ncopy);
}
- firstline->nchars = start_col + (lastline->nchars - end_col);
- firstline->nbytes =
- utf8_nbytes(firstline->data, firstline->nbytes, start_col) +
- (lastline->nbytes - bytei);
+ // new byte count is whatever we had before (left of dstbytei)
+ // plus what we copied
+ firstline->nbytes = dstbytei + ncopy;
// delete full lines, backwards to not shift old, crappy data upwards
for (uint32_t linei = end_line >= text->nlines ? end_line - 1 : end_line;
@@ -429,7 +357,6 @@ void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines,
.allocated = false,
.text = src_line->data,
.nbytes = src_line->nbytes,
- .nchars = src_line->nchars,
.line = li,
};
callback(&line, userdata);
@@ -441,8 +368,8 @@ struct text_chunk text_get_line(struct text *text, uint32_t line) {
return (struct text_chunk){
.text = src_line->data,
.nbytes = src_line->nbytes,
- .nchars = src_line->nchars,
.line = line,
+ .allocated = false,
};
}
@@ -453,33 +380,34 @@ struct copy_cmd {
};
struct text_chunk text_get_region(struct text *text, uint32_t start_line,
- uint32_t start_col, uint32_t end_line,
- uint32_t end_col) {
- if (start_line == end_line && start_col == end_col) {
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset) {
+ if (start_line == end_line && start_offset == end_offset) {
return (struct text_chunk){0};
}
struct line *first_line = &text->lines[start_line];
struct line *last_line = &text->lines[end_line];
+ uint32_t first_line_len = first_line->nbytes;
+ uint32_t last_line_len = last_line->nbytes;
- if (start_col > first_line->nchars) {
+ if (start_offset > first_line_len) {
return (struct text_chunk){0};
}
// handle copying of newlines
- if (end_col > last_line->nchars) {
+ if (end_offset > last_line_len) {
++end_line;
- end_col = 0;
+ end_offset = 0;
last_line = &text->lines[end_line];
}
uint32_t nlines = end_line - start_line + 1;
struct copy_cmd *copy_cmds = calloc(nlines, sizeof(struct copy_cmd));
- uint32_t total_chars = 0, total_bytes = 0;
+ uint32_t total_bytes = 0;
for (uint32_t line = start_line; line <= end_line; ++line) {
struct line *l = &text->lines[line];
- total_chars += l->nchars;
total_bytes += l->nbytes;
struct copy_cmd *cmd = &copy_cmds[line - start_line];
@@ -490,19 +418,14 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line,
// correct first line
struct copy_cmd *cmd_first = &copy_cmds[0];
- uint32_t byteoff =
- utf8_nbytes(first_line->data, first_line->nbytes, start_col);
- cmd_first->byteoffset += byteoff;
- cmd_first->nbytes -= byteoff;
- total_bytes -= byteoff;
- total_chars -= start_col;
+ cmd_first->byteoffset += start_offset;
+ cmd_first->nbytes -= start_offset;
+ total_bytes -= start_offset;
// correct last line
struct copy_cmd *cmd_last = &copy_cmds[nlines - 1];
- uint32_t byteindex = utf8_nbytes(last_line->data, last_line->nbytes, end_col);
- cmd_last->nbytes -= (last_line->nbytes - byteindex);
- total_bytes -= (last_line->nbytes - byteindex);
- total_chars -= (last_line->nchars - end_col);
+ cmd_last->nbytes -= (last_line->nbytes - end_offset);
+ total_bytes -= (last_line->nbytes - end_offset);
uint8_t *data = (uint8_t *)malloc(
total_bytes + /* nr of newline chars */ (end_line - start_line));
@@ -518,7 +441,6 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line,
data[curr] = '\n';
++curr;
++total_bytes;
- ++total_chars;
}
}
@@ -527,28 +449,25 @@ struct text_chunk text_get_region(struct text *text, uint32_t start_line,
.text = data,
.line = 0,
.nbytes = total_bytes,
- .nchars = total_chars,
.allocated = true,
};
}
-bool text_line_contains_unicode(struct text *text, uint32_t line) {
- return text->lines[line].nbytes != text->lines[line].nchars;
-}
-
-void text_add_property(struct text *text, struct location start,
- struct location end, struct text_property property) {
+void text_add_property(struct text *text, uint32_t start_line,
+ uint32_t start_offset, uint32_t end_line,
+ uint32_t end_offset, struct text_property property) {
struct text_property_entry entry = {
- .start = start,
- .end = end,
+ .start = (struct location){.line = start_line, .col = start_offset},
+ .end = (struct location){.line = end_line, .col = end_offset},
.property = property,
};
VEC_PUSH(&text->properties, entry);
}
-void text_get_properties(struct text *text, struct location location,
+void text_get_properties(struct text *text, uint32_t line, uint32_t offset,
struct text_property **properties,
uint32_t max_nproperties, uint32_t *nproperties) {
+ struct location location = {.line = line, .col = offset};
uint32_t nres = 0;
VEC_FOR_EACH(&text->properties, struct text_property_entry * prop) {
if (location_is_between(location, prop->start, prop->end)) {