diff options
| author | Albert Cervin <albert@acervin.com> | 2023-05-18 23:51:51 +0200 |
|---|---|---|
| committer | Albert Cervin <albert@acervin.com> | 2023-05-24 22:18:12 +0200 |
| commit | 4f3b576db6b01c8c88076985478e2a7fa37be340 (patch) | |
| tree | 9723ed39a19872fd52f2867613e78e02de3cf79b | |
| parent | a4d17ddb8e7d23ccca13132f4d88cfc5f5730b76 (diff) | |
| download | dged-4f3b576db6b01c8c88076985478e2a7fa37be340.tar.gz dged-4f3b576db6b01c8c88076985478e2a7fa37be340.tar.xz dged-4f3b576db6b01c8c88076985478e2a7fa37be340.zip | |
TOML settings parsing
Currently a very simplistic parser that do not support all TOML
datatypes. Supported are:
- Tables
- Strings (incl multiline)
- Integers
- Inline Tables
- Booleans
- Comments
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | src/dged/buffer.c | 3 | ||||
| -rw-r--r-- | src/dged/settings-parse.c | 277 | ||||
| -rw-r--r-- | src/dged/settings-parse.h | 44 | ||||
| -rw-r--r-- | src/dged/settings.c | 196 | ||||
| -rw-r--r-- | src/dged/settings.h | 22 | ||||
| -rw-r--r-- | src/dged/vec.h | 1 | ||||
| -rw-r--r-- | src/main/main.c | 31 | ||||
| -rw-r--r-- | test/settings.c | 74 |
9 files changed, 648 insertions, 2 deletions
@@ -10,7 +10,7 @@ build: SOURCES = src/dged/binding.c src/dged/buffer.c src/dged/command.c src/dged/display.c \ src/dged/keyboard.c src/dged/minibuffer.c src/dged/text.c \ src/dged/utf8.c src/dged/buffers.c src/dged/window.c src/dged/allocator.c src/dged/undo.c \ - src/dged/settings.c src/dged/lang.c + src/dged/settings.c src/dged/lang.c src/dged/settings-parse.c MAIN_SOURCES = src/main/main.c src/main/cmds.c src/main/bindings.c diff --git a/src/dged/buffer.c b/src/dged/buffer.c index 7e9789a..b3b3412 100644 --- a/src/dged/buffer.c +++ b/src/dged/buffer.c @@ -307,7 +307,7 @@ bool buffer_region_has_size(struct buffer_view *view) { struct text_chunk *copy_region(struct buffer *buffer, struct region region) { struct text_chunk *curr = &g_kill_ring.buffer[g_kill_ring.curr_idx]; - g_kill_ring.curr_idx = g_kill_ring.curr_idx + 1 % KILL_RING_SZ; + g_kill_ring.curr_idx = (g_kill_ring.curr_idx + 1) % KILL_RING_SZ; if (curr->allocated) { free(curr->text); @@ -405,6 +405,7 @@ void buffer_kill_line(struct buffer_view *view) { .col = view->dot.col + nchars, }, }; + copy_region(view->buffer, reg); delete_with_undo(view->buffer, view->dot, (struct buffer_location){ diff --git a/src/dged/settings-parse.c b/src/dged/settings-parse.c new file mode 100644 index 0000000..6c1c482 --- /dev/null +++ b/src/dged/settings-parse.c @@ -0,0 +1,277 @@ +#include "settings-parse.h" + +#include <errno.h> +#include <stdarg.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "utf8.h" + +enum byte_class { + Byte_Alphanumeric, + Byte_Symbol, +}; + +struct parser parser_create(struct reader reader) { + struct parser state = { + .row = 0, + .col = 0, + .reader = reader, + }; + + VEC_INIT(&state.buffer, 32); + + return state; +} + +void parser_destroy(struct parser *parser) { VEC_DESTROY(&parser->buffer); } + +static enum byte_class classify(uint8_t byte) { + if ((byte >= 'a' && byte <= 'z') || (byte >= 'A' && byte <= 'Z') || + (byte >= '0' && byte <= '9') || byte == '_' || byte == '-' || + utf8_byte_is_unicode(byte)) { + return Byte_Alphanumeric; + } + + return Byte_Symbol; +} + +static void trim_parse_buffer_whitespace(void **data, uint32_t *len) { + uint8_t *d = (uint8_t *)*data; + uint32_t new_len = *len; + + // beginning + while (d[0] == ' ' || d[0] == '\t') { + --new_len; + ++d; + } + + // end + while (d[new_len - 1] == ' ' || d[new_len - 1] == '\t') { + --new_len; + } + + *data = d; + *len = new_len; +} + +static bool read_data_with_initial(struct parser *state, uint8_t *initial_byte, + uint8_t end, void **data_out, + uint32_t *len_out) { + uint8_t byte; + VEC_CLEAR(&state->buffer); + if (initial_byte != NULL) { + VEC_PUSH(&state->buffer, *initial_byte); + } + + while (state->reader.getbytes(1, &byte, state->reader.userdata) > 0 && + byte != end) { + ++state->col; + VEC_PUSH(&state->buffer, byte); + } + + *data_out = VEC_FRONT(&state->buffer); + *len_out = VEC_SIZE(&state->buffer); + + trim_parse_buffer_whitespace(data_out, len_out); + + return byte == end; +} + +static bool read_data(struct parser *state, uint8_t end, void **data_out, + uint32_t *len_out) { + return read_data_with_initial(state, NULL, end, data_out, len_out); +} + +static bool discard(struct parser *state, uint8_t end) { + uint8_t byte; + while (state->reader.getbytes(1, &byte, state->reader.userdata) > 0 && + byte != end) { + ++state->col; + } + + return byte == end; +} + +static void errtoken(struct token *token_out, const char *fmt, ...) { + static char errmsgbuf[256] = {0}; + va_list args; + va_start(args, fmt); + size_t written = vsnprintf(errmsgbuf, 256, fmt, args); + va_end(args); + + token_out->type = Token_Error; + token_out->data = errmsgbuf; + token_out->len = written; +} + +bool parser_next_token(struct parser *state, struct token *token_out) { + uint8_t byte; + static bool parse_value = false; + static int64_t int_value = 0; + static bool bool_value = false; + + memset(token_out, 0, sizeof(struct token)); + + while (state->reader.getbytes(1, &byte, state->reader.userdata) > 0) { + switch (classify(byte)) { + case Byte_Alphanumeric: // unquoted key / value + if (!parse_value) { + token_out->type = Token_Key; + token_out->row = state->row; + token_out->col = state->col; + + if (!read_data_with_initial(state, &byte, '=', &token_out->data, + &token_out->len)) { + errtoken(token_out, "Unexpected EOF while looking for end of key"); + return true; + } + + parse_value = true; + } else { + parse_value = false; + token_out->row = state->row; + token_out->col = state->col; + + if (byte >= '0' && byte <= '9') { + token_out->type = Token_IntValue; + void *data; + uint32_t len; + read_data_with_initial(state, &byte, '\n', &data, &len); + + char *s = calloc(len + 1, 1); + strncpy(s, (char *)data, len); + + errno = 0; + int_value = strtol(s, NULL, 0); + free(s); + if (errno != 0) { + errtoken(token_out, "Invalid integer value %.*s: %s", len, + (char *)data, strerror(errno)); + return true; + } + + token_out->data = &int_value; + token_out->len = 0; + } else if (byte == 't' || byte == 'f') { + token_out->type = Token_BoolValue; + void *data = NULL; + uint32_t len = 0; + read_data_with_initial(state, &byte, '\n', &data, &len); + + if (strncmp((char *)data, "true", len) == 0) { + bool_value = true; + token_out->data = &bool_value; + token_out->len = 0; + } else if (strncmp((char *)data, "false", len) == 0) { + bool_value = false; + token_out->data = &bool_value; + token_out->len = 0; + } else { + errtoken(token_out, "Invalid bool value: %.*s", len, (char *)data); + } + } + } + + return true; + + case Byte_Symbol: + switch (byte) { + case '#': // comment + token_out->type = Token_Comment; + token_out->row = state->row; + token_out->col = state->col; + if (!read_data(state, '\n', &token_out->data, &token_out->len)) { + errtoken(token_out, + "Unexpected EOF while looking for end of comment line"); + return true; + } + + uint8_t *data = (uint8_t *)token_out->data; + if (data[token_out->len - 1] == '\r') { + --token_out->len; + } + + state->col = 0; + ++state->row; + + return true; + + case '{': // inline table + parse_value = false; + token_out->type = Token_InlineTable; + token_out->row = state->row; + token_out->col = state->col; + return true; + break; + + case '}': // end inline table + parse_value = false; + break; + + case '[': // table open + token_out->type = Token_Table; + token_out->row = state->row; + token_out->col = state->col; + if (!read_data(state, ']', &token_out->data, &token_out->len)) { + errtoken(token_out, "Unexpected EOF while looking for matching ']'"); + return true; + } + + ++state->col; + return true; + + case '"': // quoted key or string value + bool multiline = false; + if (parse_value) { + token_out->type = Token_StringValue; + } else { + token_out->type = Token_Key; + } + token_out->row = state->row; + token_out->col = state->col; + + // check for multiline + uint32_t numquotes = 1; + while (state->reader.getbytes(1, &byte, state->reader.userdata) > 0 && + byte == '"') { + ++numquotes; + } + + if (numquotes == 3) { + multiline = true; + } + + if (!read_data_with_initial(state, &byte, '"', &token_out->data, + &token_out->len)) { + errtoken(token_out, "Unexpected EOF while looking for matching '\"'"); + parse_value = false; + return true; + } + + if (!parse_value) { + discard(state, '='); + } + + if (multiline) { + discard(state, '"'); + discard(state, '"'); + } + + ++state->col; + parse_value = false; + return true; + + case '\n': + case '\r': + state->col = 0; + ++state->row; + break; + } + break; + } + } + + return false; +} diff --git a/src/dged/settings-parse.h b/src/dged/settings-parse.h new file mode 100644 index 0000000..63a0f1e --- /dev/null +++ b/src/dged/settings-parse.h @@ -0,0 +1,44 @@ +#include <stdbool.h> +#include <stdint.h> +#include <stddef.h> + +#include "vec.h" + +enum token_type { + Token_Comment, + Token_Key, + Token_StringValue, + Token_BoolValue, + Token_IntValue, + Token_Table, + Token_InlineTable, + + Token_Error, +}; + +struct token { + enum token_type type; + void *data; + uint32_t len; + uint32_t row; + uint32_t col; +}; + +typedef size_t (*getbytes)(size_t nbytes, uint8_t *buf, void *userdata); +struct reader { + getbytes getbytes; + void *userdata; +}; + +struct parser { + uint32_t row; + uint32_t col; + + struct reader reader; + VEC(uint8_t) buffer; +}; + +struct parser parser_create(struct reader reader); +void parser_destroy(struct parser *parser); + +bool parser_next_token(struct parser *state, struct token *token_out); diff --git a/src/dged/settings.c b/src/dged/settings.c index 524aa9b..4370aa9 100644 --- a/src/dged/settings.c +++ b/src/dged/settings.c @@ -3,11 +3,15 @@ #include "hash.h" #include "hashmap.h" #include "minibuffer.h" +#include "settings-parse.h" +#include "utf8.h" #include "vec.h" +#include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> static struct settings g_settings = {0}; @@ -80,6 +84,15 @@ void settings_set(const char *path, struct setting_value value) { } } +static void settings_upsert(const char *path, struct setting_value value) { + struct setting *setting = settings_get(path); + if (setting != NULL) { + setting_set_value(setting, value); + } else { + settings_register_setting(path, value); + } +} + void setting_to_string(struct setting *setting, char *buf, size_t n) { switch (setting->value.type) { case Setting_Bool: @@ -93,3 +106,186 @@ void setting_to_string(struct setting *setting, char *buf, size_t n) { break; } } + +static int32_t parse_toml(struct parser *state, char **errmsgs[]) { + char *curtbl = NULL; + char *curkey = NULL; + uint32_t errcnt = 0; + + VEC(char *) errs; + VEC_INIT(&errs, 16); + + struct token t = {0}; + while (parser_next_token(state, &t)) { + switch (t.type) { + case Token_Table: + if (curtbl != NULL) { + free(curtbl); + } + curtbl = calloc(t.len + 1, 1); + strncpy(curtbl, (char *)t.data, t.len); + break; + + case Token_InlineTable: + if (curkey != NULL) { + free(curtbl); + curtbl = strdup(curkey); + } + break; + + case Token_Key: + if (curkey != NULL) { + free(curkey); + } + uint32_t len = t.len + 1; + if (curtbl != NULL) { + len += strlen(curtbl) /* space for the . */ + 1; + } + + curkey = calloc(len, 1); + if (curtbl != NULL) { + strcpy(curkey, curtbl); + strncat(curkey, ".", 1); + } + + strncat(curkey, (char *)t.data, t.len); + break; + + case Token_IntValue: + int64_t i = *((int64_t *)t.data); + settings_upsert(curkey, (struct setting_value){.type = Setting_Number, + .number_value = i}); + break; + + case Token_BoolValue: + bool b = *((bool *)t.data); + settings_upsert(curkey, (struct setting_value){.type = Setting_Bool, + .bool_value = b}); + break; + + case Token_StringValue: + char *v = calloc(t.len + 1, 1); + strncpy(v, (char *)t.data, t.len); + settings_upsert(curkey, (struct setting_value){.type = Setting_String, + .string_value = v}); + free(v); + break; + + case Token_Error: + char *err = malloc(t.len + 128); + snprintf(err, t.len + 128, "error (%d:%d): %.*s\n", t.row, t.col, t.len, + (char *)t.data); + VEC_PUSH(&errs, err); + break; + } + } + + if (curtbl != NULL) { + free(curtbl); + } + + if (curkey != NULL) { + free(curkey); + } + + if (!VEC_EMPTY(&errs)) { + *errmsgs = VEC_ENTRIES(&errs); + } else { + *errmsgs = NULL; + VEC_DESTROY(&errs); + } + return VEC_SIZE(&errs); +} + +struct str_cursor { + const char *data; + uint32_t pos; + uint32_t size; +}; + +size_t get_bytes_from_str(size_t nbytes, uint8_t *buf, void *userdata) { + struct str_cursor *c = (struct str_cursor *)userdata; + size_t left = c->size - c->pos; + size_t to_copy = nbytes > left ? left : nbytes; + if (to_copy > 0) { + memcpy(buf, c->data + c->pos, to_copy); + } + + c->pos += to_copy; + + return to_copy; +} + +int32_t settings_from_string(const char *toml, char **errmsgs[]) { + struct str_cursor cursor = { + .data = toml, + .pos = 0, + .size = strlen(toml), + }; + + struct reader reader = { + .getbytes = get_bytes_from_str, + .userdata = (void *)&cursor, + }; + + struct parser parser = parser_create(reader); + int32_t ret = parse_toml(&parser, errmsgs); + + parser_destroy(&parser); + return ret; +} + +#define FILE_READER_BUFSZ 1024 +struct file_reader { + int fd; + uint8_t buffer[FILE_READER_BUFSZ]; + uint32_t buflen; +}; + +static struct file_reader file_reader_create(int fd) { + return (struct file_reader){ + .fd = fd, + .buffer = {0}, + .buflen = 0, + }; +} + +static size_t get_bytes_from_file(size_t nbytes, uint8_t *buf, void *userdata) { + struct file_reader *r = (struct file_reader *)userdata; + if (nbytes > FILE_READER_BUFSZ) { + return read(r->fd, buf, nbytes); + } + + if (nbytes > r->buflen) { + // fill buffer + r->buflen += + read(r->fd, r->buffer + r->buflen, FILE_READER_BUFSZ - r->buflen); + } + + size_t to_read = nbytes > r->buflen ? r->buflen : nbytes; + memcpy(buf, r->buffer, to_read); + + r->buflen -= to_read; + memcpy(r->buffer, r->buffer + to_read, r->buflen); + return to_read; +} + +int32_t settings_from_file(const char *path, char **errmsgs[]) { + int fd = open(path, O_RDONLY); + if (fd < 0) { + return fd; + } + + struct file_reader file_reader = file_reader_create(fd); + + struct reader reader = { + .getbytes = get_bytes_from_file, + .userdata = (void *)&file_reader, + }; + + struct parser parser = parser_create(reader); + int32_t ret = parse_toml(&parser, errmsgs); + + parser_destroy(&parser); + return ret; +} diff --git a/src/dged/settings.h b/src/dged/settings.h index 5d245d9..80f6b0f 100644 --- a/src/dged/settings.h +++ b/src/dged/settings.h @@ -135,3 +135,25 @@ void setting_set_value(struct setting *setting, struct setting_value val); * @param n Size in bytes of @ref buf. */ void setting_to_string(struct setting *setting, char *buf, size_t n); + +/** + * Parse settings from a string in TOML format. + * + * @param toml Pointer to a NULL-terminated string containing TOML settings. + * @param errmsgs Pointer to a string array where error messages will be placed. These + * messages must be freed after use. + * @returns 0 on success, n > 0 where n denotes the number of error messages in + * @ref errmsgs + */ +int32_t settings_from_string(const char *toml, char **errmsgs[]); + +/** + * Parse settings from a file in TOML format. + * + * @param toml Pointer to a NULL-terminated string describing a path to a TOML file with settings. + * @param errmsgs Pointer to a string array where error messages will be placed. These + * messages must be freed after use. + * @returns 0 on success, n > 0 where n denotes the number of error messages in + * @ref errmsgs + */ +int32_t settings_from_file(const char *path, char **errmsgs[]); diff --git a/src/dged/vec.h b/src/dged/vec.h index 073f978..df5cd0e 100644 --- a/src/dged/vec.h +++ b/src/dged/vec.h @@ -57,6 +57,7 @@ #define VEC_EMPTY(vec) ((vec)->nentries == 0) #define VEC_CLEAR(vec) (vec)->nentries = 0 +#define VEC_FRONT(vec) ((vec)->nentries > 0 ? &((vec)->entries[0]) : NULL) #define VEC_BACK(vec) \ ((vec)->nentries > 0 ? &((vec)->entries[(vec)->nentries - 1]) : NULL) diff --git a/src/main/main.c b/src/main/main.c index b14c6c0..b6ee3c0 100644 --- a/src/main/main.c +++ b/src/main/main.c @@ -6,6 +6,7 @@ #include <stdlib.h> #include <string.h> #include <time.h> +#include <unistd.h> #include "dged/allocator.h" #include "dged/binding.h" @@ -14,6 +15,7 @@ #include "dged/display.h" #include "dged/lang.h" #include "dged/minibuffer.h" +#include "dged/path.h" #include "dged/reactor.h" #include "dged/settings.h" @@ -163,6 +165,35 @@ int main(int argc, char *argv[]) { struct commands commands = command_registry_create(32); settings_init(64); + const char *config_path = getenv("XDG_CONFIG_HOME"); + if (config_path == NULL) { + config_path = "~/.config"; + } + char settings_file[1024]; + snprintf(settings_file, 1024, "%s/dged/dged.toml", config_path); + char *settings_file_abs = expanduser(settings_file); + char **errmsgs = NULL; + if (access(settings_file_abs, F_OK) == 0) { + int32_t ret = settings_from_file(settings_file_abs, &errmsgs); + if (ret > 0) { + fprintf(stderr, "Error reading settings from %s:\n", settings_file_abs); + for (uint32_t erri = 0; erri < ret; ++erri) { + fprintf(stderr, " - %s", errmsgs[erri]); + free(errmsgs[erri]); + } + free(errmsgs); + free(settings_file_abs); + return 3; + } else if (ret < 0) { + fprintf(stderr, "Error occured reading settings from %s:\n", + settings_file_abs); + free(settings_file_abs); + return 2; + } + } + + free(settings_file_abs); + languages_init(true); buffer_static_init(); diff --git a/test/settings.c b/test/settings.c index b1fdc9a..f56825f 100644 --- a/test/settings.c +++ b/test/settings.c @@ -68,7 +68,81 @@ void test_set() { settings_destroy(); } +void test_from_toml_string() { + char *content = "[ languages.c]\n" + "name = \"C\""; + + settings_init(10); + char **errmsgs = NULL; + int32_t res = settings_from_string(content, &errmsgs); + ASSERT(res == 0, "Expected valid TOML to parse successfully"); + + struct setting *setting = settings_get("languages.c.name"); + ASSERT(setting != NULL, + "Expected to be able to retrieve setting after parsed from string"); + ASSERT(setting->value.type == Setting_String, "Expected a string setting"); + ASSERT_STR_EQ(setting->value.string_value, "C", + "Expected setting value to be \"C\""); + + content = "sune = \"wrong"; + res = settings_from_string(content, &errmsgs); + ASSERT(res >= 1, "Expected (at least) one error from invalid toml"); + for (uint32_t i = 0; i < res; ++i) { + free(errmsgs[i]); + } + free(errmsgs); + + content = "boll = truj"; + res = settings_from_string(content, &errmsgs); + ASSERT(res >= 1, "Expected (at least) one error from an invalid bool"); + for (uint32_t i = 0; i < res; ++i) { + free(errmsgs[i]); + } + free(errmsgs); + + content = "[editor]\n" + "show-whitespace = true\n" + "tab-width = 3\n"; + res = settings_from_string(content, &errmsgs); + ASSERT(res == 0, "Expected valid TOML to parse successfully"); + + setting = settings_get("editor.show-whitespace"); + ASSERT(setting != NULL, + "Expected editor.show-whitespace to be set from TOML"); + ASSERT(setting->value.bool_value, + "Expected editor.show-whitespace to be set to true from TOML"); + + setting = settings_get("editor.tab-width"); + ASSERT(setting != NULL, "Expected editor.tab-width to be set from TOML"); + ASSERT(setting->value.number_value == 3, + "Expected editor.tab-width to be set to 3 from TOML"); + + content = "[languages]\n" + "pang = { name = \"Bom\", \n" + "description = \"Tjoff\" }\n"; + res = settings_from_string(content, &errmsgs); + ASSERT(res == 0, "Expected valid TOML to parse successfully"); + + setting = settings_get("languages.pang.name"); + ASSERT(setting != NULL, + "Expected languages.pang.name to be set through inline table"); + ASSERT_STR_EQ(setting->value.string_value, "Bom", + "Expected languages.pang.name to be \"Bom\""); + + content = "multi = \"\"\"This is\n" + "a multiline string\"\"\"\n"; + res = settings_from_string(content, &errmsgs); + ASSERT(res == 0, "Expected valid TOML to parse successfully"); + setting = settings_get("multi"); + ASSERT(setting != NULL, "Expected multi to be set"); + ASSERT_STR_EQ(setting->value.string_value, "This is\na multiline string", + "Expected newline to have been preserved in multiline string"); + + settings_destroy(); +} + void run_settings_tests() { run_test(test_get); run_test(test_set); + run_test(test_from_toml_string); } |
