diff options
Diffstat (limited to 'src/dged/text.c')
| -rw-r--r-- | src/dged/text.c | 496 |
1 files changed, 496 insertions, 0 deletions
diff --git a/src/dged/text.c b/src/dged/text.c new file mode 100644 index 0000000..f8ba72d --- /dev/null +++ b/src/dged/text.c @@ -0,0 +1,496 @@ +#include "text.h" + +#include <assert.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "display.h" +#include "signal.h" +#include "utf8.h" + +enum flags { + LineChanged = 1 << 0, +}; + +struct line { + uint8_t *data; + uint8_t flags; + uint32_t nbytes; + uint32_t nchars; +}; + +struct text { + // raw bytes without any null terminators + struct line *lines; + uint32_t nlines; + uint32_t capacity; +}; + +struct text *text_create(uint32_t initial_capacity) { + struct text *txt = calloc(1, sizeof(struct text)); + txt->lines = calloc(initial_capacity, sizeof(struct line)); + txt->capacity = initial_capacity; + txt->nlines = 0; + + return txt; +} + +void text_destroy(struct text *text) { + for (uint32_t li = 0; li < text->nlines; ++li) { + free(text->lines[li].data); + text->lines[li].data = NULL; + text->lines[li].flags = 0; + text->lines[li].nbytes = 0; + text->lines[li].nchars = 0; + } + + free(text->lines); + + free(text); +} + +void text_clear(struct text *text) { + for (uint32_t li = 0; li < text->nlines; ++li) { + free(text->lines[li].data); + text->lines[li].data = NULL; + text->lines[li].flags = 0; + text->lines[li].nbytes = 0; + text->lines[li].nchars = 0; + } + + text->nlines = 0; +} + +// given `char_idx` as a character index, return the byte index +uint32_t charidx_to_byteidx(struct line *line, uint32_t char_idx) { + if (char_idx > line->nchars) { + return line->nbytes; + } + return utf8_nbytes(line->data, line->nbytes, char_idx); +} + +uint32_t text_col_to_byteindex(struct text *text, uint32_t line, uint32_t col) { + return charidx_to_byteidx(&text->lines[line], col); +} + +// given `byte_idx` as a byte index, return the character index +uint32_t byteidx_to_charidx(struct line *line, uint32_t byte_idx) { + if (byte_idx > line->nbytes) { + return line->nchars; + } + + return utf8_nchars(line->data, byte_idx); +} + +uint32_t text_byteindex_to_col(struct text *text, uint32_t line, + uint32_t byteindex) { + return byteidx_to_charidx(&text->lines[line], byteindex); +} + +void append_empty_lines(struct text *text, uint32_t numlines) { + + if (text->nlines + numlines >= text->capacity) { + text->capacity += text->capacity + numlines > text->capacity * 2 + ? numlines + 1 + : text->capacity; + text->lines = realloc(text->lines, sizeof(struct line) * text->capacity); + } + + for (uint32_t i = 0; i < numlines; ++i) { + struct line *nline = &text->lines[text->nlines]; + nline->data = NULL; + nline->nbytes = 0; + nline->nchars = 0; + nline->flags = 0; + + ++text->nlines; + } + + if (text->nlines > text->capacity) { + printf("text->nlines: %d, text->capacity: %d\n", text->nlines, + text->capacity); + raise(SIGTRAP); + } +} + +void ensure_line(struct text *text, uint32_t line) { + if (line >= text->nlines) { + append_empty_lines(text, line - text->nlines + 1); + } +} + +// It is assumed that `data` does not contain any \n, that is handled by +// higher-level functions +void insert_at(struct text *text, uint32_t line, uint32_t col, uint8_t *data, + uint32_t len, uint32_t nchars) { + + if (len == 0) { + return; + } + + ensure_line(text, line); + + struct line *l = &text->lines[line]; + + l->nbytes += len; + l->nchars += nchars; + l->flags = LineChanged; + l->data = realloc(l->data, l->nbytes); + + uint32_t bytei = charidx_to_byteidx(l, col); + + // move following bytes out of the way + if (bytei + len < l->nbytes) { + uint32_t start = bytei + len; + memmove(l->data + start, l->data + bytei, l->nbytes - start); + } + + // insert new chars + memcpy(l->data + bytei, data, len); +} + +uint32_t text_line_length(struct text *text, uint32_t lineidx) { + return text->lines[lineidx].nchars; +} + +uint32_t text_line_size(struct text *text, uint32_t lineidx) { + return text->lines[lineidx].nbytes; +} + +uint32_t text_num_lines(struct text *text) { return text->nlines; } + +void split_line(uint32_t col, struct line *line, struct line *next) { + uint8_t *data = line->data; + uint32_t nbytes = line->nbytes; + uint32_t nchars = line->nchars; + + uint32_t chari = col; + uint32_t bytei = charidx_to_byteidx(line, chari); + + line->nbytes = bytei; + line->nchars = chari; + next->nbytes = nbytes - bytei; + next->nchars = nchars - chari; + line->flags = next->flags = line->flags; + + next->data = NULL; + line->data = NULL; + + // first, handle some cases where the new line or the pre-existing one is + // empty + if (next->nbytes == 0) { + line->data = data; + } else if (line->nbytes == 0) { + next->data = data; + } else { + // actually split the line + next->data = (uint8_t *)realloc(next->data, next->nbytes); + memcpy(next->data, data + bytei, next->nbytes); + + line->data = (uint8_t *)realloc(line->data, line->nbytes); + memcpy(line->data, data, line->nbytes); + + free(data); + } +} + +void mark_lines_changed(struct text *text, uint32_t line, uint32_t nlines) { + for (uint32_t linei = line; linei < (line + nlines); ++linei) { + text->lines[linei].flags |= LineChanged; + } +} + +void shift_lines(struct text *text, uint32_t start, int32_t direction) { + struct line *dest = &text->lines[((int64_t)start + direction)]; + struct line *src = &text->lines[start]; + uint32_t nlines = text->nlines - (dest > src ? (start + direction) : start); + memmove(dest, src, nlines * sizeof(struct line)); +} + +void new_line_at(struct text *text, uint32_t line, uint32_t col) { + ensure_line(text, line); + + uint32_t newline = line + 1; + append_empty_lines(text, 1); + + mark_lines_changed(text, line, text->nlines - line); + + // move following lines out of the way, if there are any + if (newline + 1 < text->nlines) { + shift_lines(text, newline, 1); + } + + // split line if needed + split_line(col, &text->lines[line], &text->lines[newline]); +} + +void delete_line(struct text *text, uint32_t line) { + if (text->nlines == 0) { + return; + } + + mark_lines_changed(text, line, text->nlines - line); + + free(text->lines[line].data); + text->lines[line].data = NULL; + + if (line + 1 < text->nlines) { + shift_lines(text, line + 1, -1); + } + + --text->nlines; + text->lines[text->nlines].data = NULL; + text->lines[text->nlines].nbytes = 0; + text->lines[text->nlines].nchars = 0; +} + +void text_insert_at_inner(struct text *text, uint32_t line, uint32_t col, + uint8_t *bytes, uint32_t nbytes, + uint32_t *lines_added, uint32_t *cols_added, + bool force_newline) { + uint32_t linelen = 0, start_line = line; + + *cols_added = 0; + for (uint32_t bytei = 0; bytei < nbytes; ++bytei) { + uint8_t byte = bytes[bytei]; + if (byte == '\n') { + uint8_t *line_data = bytes + (bytei - linelen); + uint32_t nchars = utf8_nchars(line_data, linelen); + + insert_at(text, line, col, line_data, linelen, nchars); + + col += nchars; + + // only insert a newline if we have to + if (force_newline || linelen == 0 || col < text_line_length(text, line) || + line + 1 < text->nlines) { + new_line_at(text, line, col); + } + + ++line; + linelen = 0; + col = 0; + } else { + ++linelen; + } + } + + // handle remaining + if (linelen > 0) { + uint8_t *line_data = bytes + (nbytes - linelen); + uint32_t nchars = utf8_nchars(line_data, linelen); + insert_at(text, line, col, line_data, linelen, nchars); + *cols_added = nchars; + } + + *lines_added = line - start_line; +} + +void text_append(struct text *text, uint8_t *bytes, uint32_t nbytes, + uint32_t *lines_added, uint32_t *cols_added) { + uint32_t line = text->nlines > 0 ? text->nlines - 1 : 0; + uint32_t col = text_line_length(text, line); + + text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added, + true); +} + +void text_insert_at(struct text *text, uint32_t line, uint32_t col, + uint8_t *bytes, uint32_t nbytes, uint32_t *lines_added, + uint32_t *cols_added) { + text_insert_at_inner(text, line, col, bytes, nbytes, lines_added, cols_added, + false); +} + +void text_delete(struct text *text, uint32_t start_line, uint32_t start_col, + uint32_t end_line, uint32_t end_col) { + + if (text->nlines == 0) { + return; + } + + uint32_t maxline = text->nlines > 0 ? text->nlines - 1 : 0; + + // make sure we stay inside + if (start_line > maxline) { + start_line = maxline; + start_col = text->lines[start_line].nchars > 0 + ? text->lines[start_line].nchars - 1 + : 0; + } + + if (end_line > maxline) { + end_line = maxline; + end_col = text->lines[end_line].nchars; + } + + struct line *firstline = &text->lines[start_line]; + struct line *lastline = &text->lines[end_line]; + + // clamp column + if (start_col > firstline->nchars) { + start_col = firstline->nchars > 0 ? firstline->nchars - 1 : 0; + } + + // handle deletion of newlines + if (end_col > lastline->nchars) { + if (end_line + 1 < text->nlines) { + end_col = 0; + ++end_line; + lastline = &text->lines[end_line]; + } else { + end_col = lastline->nchars; + } + } + + uint32_t bytei = utf8_nbytes(lastline->data, lastline->nbytes, end_col); + if (lastline == firstline) { + // in this case we can "overwrite" + uint32_t dstbytei = + utf8_nbytes(firstline->data, firstline->nbytes, start_col); + memcpy(firstline->data + dstbytei, lastline->data + bytei, + lastline->nbytes - bytei); + } else { + // otherwise we actually have to copy from the last line + insert_at(text, start_line, start_col, lastline->data + bytei, + lastline->nbytes - bytei, lastline->nchars - end_col); + } + + firstline->nchars = start_col + (lastline->nchars - end_col); + firstline->nbytes = + utf8_nbytes(firstline->data, firstline->nbytes, start_col) + + (lastline->nbytes - bytei); + + // delete full lines, backwards to not shift old, crappy data upwards + for (uint32_t linei = end_line >= text->nlines ? end_line - 1 : end_line; + linei > start_line; --linei) { + delete_line(text, linei); + } + + // if this is the last line in the buffer, and it turns out empty, remove it + if (firstline->nbytes == 0 && start_line == text->nlines - 1) { + delete_line(text, start_line); + } +} + +void text_for_each_chunk(struct text *text, chunk_cb callback, void *userdata) { + // if representation of text is changed, this can be changed as well + text_for_each_line(text, 0, text->nlines, callback, userdata); +} + +void text_for_each_line(struct text *text, uint32_t line, uint32_t nlines, + chunk_cb callback, void *userdata) { + uint32_t nlines_max = + (line + nlines) > text->nlines ? text->nlines : (line + nlines); + for (uint32_t li = line; li < nlines_max; ++li) { + struct line *src_line = &text->lines[li]; + struct text_chunk line = (struct text_chunk){ + .text = src_line->data, + .nbytes = src_line->nbytes, + .nchars = src_line->nchars, + .line = li, + }; + callback(&line, userdata); + } +} + +struct text_chunk text_get_line(struct text *text, uint32_t line) { + struct line *src_line = &text->lines[line]; + return (struct text_chunk){ + .text = src_line->data, + .nbytes = src_line->nbytes, + .nchars = src_line->nchars, + .line = line, + }; +} + +struct copy_cmd { + uint32_t line; + uint32_t byteoffset; + uint32_t nbytes; +}; + +struct text_chunk text_get_region(struct text *text, uint32_t start_line, + uint32_t start_col, uint32_t end_line, + uint32_t end_col) { + if (start_line == end_line && start_col == end_col) { + return (struct text_chunk){0}; + } + + struct line *first_line = &text->lines[start_line]; + struct line *last_line = &text->lines[end_line]; + + if (start_col > first_line->nchars) { + return (struct text_chunk){0}; + } + + // handle copying of newlines + if (end_col > last_line->nchars) { + ++end_line; + end_col = 0; + last_line = &text->lines[end_line]; + } + + uint32_t nlines = end_line - start_line + 1; + struct copy_cmd *copy_cmds = calloc(nlines, sizeof(struct copy_cmd)); + + uint32_t total_chars = 0, total_bytes = 0; + for (uint32_t line = start_line; line <= end_line; ++line) { + struct line *l = &text->lines[line]; + total_chars += l->nchars; + total_bytes += l->nbytes; + + struct copy_cmd *cmd = ©_cmds[line - start_line]; + cmd->line = line; + cmd->byteoffset = 0; + cmd->nbytes = l->nbytes; + } + + // correct first line + struct copy_cmd *cmd_first = ©_cmds[0]; + uint32_t byteoff = + utf8_nbytes(first_line->data, first_line->nbytes, start_col); + cmd_first->byteoffset += byteoff; + cmd_first->nbytes -= byteoff; + total_bytes -= byteoff; + total_chars -= start_col; + + // correct last line + struct copy_cmd *cmd_last = ©_cmds[nlines - 1]; + uint32_t byteindex = utf8_nbytes(last_line->data, last_line->nbytes, end_col); + cmd_last->nbytes -= (last_line->nbytes - byteindex); + total_bytes -= (last_line->nbytes - byteindex); + total_chars -= (last_line->nchars - end_col); + + uint8_t *data = (uint8_t *)malloc( + total_bytes + /* nr of newline chars */ (end_line - start_line)); + + // copy data + for (uint32_t cmdi = 0, curr = 0; cmdi < nlines; ++cmdi) { + struct copy_cmd *c = ©_cmds[cmdi]; + struct line *l = &text->lines[c->line]; + memcpy(data + curr, l->data + c->byteoffset, c->nbytes); + curr += c->nbytes; + + if (cmdi != (nlines - 1)) { + data[curr] = '\n'; + ++curr; + ++total_bytes; + ++total_chars; + } + } + + free(copy_cmds); + return (struct text_chunk){ + .text = data, + .line = 0, + .nbytes = total_bytes, + .nchars = total_chars, + .allocated = true, + }; +} + +bool text_line_contains_unicode(struct text *text, uint32_t line) { + return text->lines[line].nbytes != text->lines[line].nchars; +} |
