streaming binary encoder/decoder
git clone http://git.nthia.dev/sbed
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <varint.h>
#include <ctype.h>
enum {
EXPR_CONST,
EXPR_VAR,
EXPR_OP,
};
struct Expr {
int code;
void *data;
};
void expr_init(struct Expr *e, int code) {
e->code = code;
e->data = NULL;
}
enum {
TYPE_UINT64,
TYPE_U8ARRAY,
};
enum {
RULE_VARINT,
RULE_U8ARRAY,
RULE_LOOP_BEGIN,
RULE_LOOP_END,
RULE_PRINTLN,
};
struct Rule {
int code;
struct Expr *n;
struct Expr *src;
struct Expr *dst;
};
void rule_init(struct Rule *r, int code) {
r->code = code;
r->n = NULL;
r->src = NULL;
r->dst = NULL;
}
struct Level {
struct Rule **rules;
size_t rules_index, rules_len, rules_cap;
};
void level_init(struct Level *l) {
l->rules_index = 0;
l->rules_len = 0;
l->rules_cap = 16;
l->rules = calloc(l->rules_cap, sizeof(struct Rule*));
}
struct Parser {
struct Level **levels;
size_t level, levels_cap;
size_t next_var_i;
char **vars;
size_t vars_len, vars_cap;
void **values;
int *types;
int *sizes;
size_t print_buffer_size;
char *print_buffer;
};
void parser_init(struct Parser *p) {
p->levels_cap = 16;
p->levels = calloc(p->levels_cap, sizeof(struct Level *));
p->levels[0] = malloc(sizeof(struct Level));
level_init(p->levels[0]);
p->level = 0;
p->next_var_i = 0;
p->vars = NULL;
p->vars_len = 0;
p->vars_cap = 0;
p->values = NULL;
p->types = NULL;
p->sizes = NULL;
p->print_buffer_size = 1024;
p->print_buffer = malloc(p->print_buffer_size);
}
int64_t expr_eval(struct Parser *p, struct Expr *e) {
switch (e->code) {
case EXPR_CONST:
return *(int64_t *) e->data;
case EXPR_VAR:
return *(int64_t*) p->values[*(u_int64_t *) e->data];
case EXPR_OP:
break;
}
return 0;
}
void put_value(struct Parser *p, struct Expr *e, int type, ssize_t size, void *value) {
if (e == NULL) return;
if (e->code != EXPR_VAR) return;
size_t i = *(u_int64_t*) e->data;
p->types[i] = type;
p->sizes[i] = size;
p->values[i] = value;
}
ssize_t parser_get_var(struct Parser *p, char *var) {
for (size_t i = 0; p->vars[i] != NULL; i++) {
char *v = p->vars[i];
if (strcmp(v, var) == 0) return i;
}
return -1;
}
ssize_t rule_write(struct Parser *p, struct Level *l, struct Rule *r, char *src, size_t src_len) {
size_t offset = 0, var_i = 0;
u_int64_t *x = NULL;
u_int64_t n = 0;
int64_t rn = 0;
switch (r->code) {
case RULE_LOOP_BEGIN:
rn = expr_eval(p, r->n);
l->rules_index = 0;
l = malloc(sizeof(struct Level));
level_init(l);
l->rules = (struct Rule**) r->src;
l->rules_index = 0;
l->rules_len = rn;
l->rules_cap = rn;
p->levels[++p->level] = l;
break;
case RULE_LOOP_END:
l->rules_index = 0;
break;
case RULE_VARINT:
rn = expr_eval(p, r->n);
for (int i = 0; i < rn; i++) {
x = malloc(sizeof(u_int64_t));
n = varint_decode(src+offset, src_len-offset, x);
put_value(p, r->dst, TYPE_UINT64, sizeof(u_int64_t), x);
if (n == 0) return -1;
offset += n;
}
l->rules_index++;
break;
case RULE_U8ARRAY:
rn = expr_eval(p, r->n);
put_value(p, r->dst, TYPE_U8ARRAY, rn, src);
offset += rn;
l->rules_index++;
break;
case RULE_PRINTLN:
n = *(u_int64_t*) r->n->data;
size_t poffset = 0, sn = 0;
u_int64_t *indexes = (u_int64_t*) r->dst->data;
for (size_t i = 0; i < n; i++) {
var_i = indexes[i];
int type = p->types[var_i];
int size = p->sizes[var_i];
switch (type) {
case TYPE_UINT64:
sn = snprintf(
p->print_buffer+poffset,
p->print_buffer_size-poffset,
var_i == 0 ? "%lu" : ",%lu",
*(u_int64_t*) p->values[var_i]
);
if (sn < 0) {
dprintf(1, "snprintf failed\n");
exit(1);
}
poffset += sn;
break;
case TYPE_U8ARRAY:
memcpy(p->print_buffer+poffset, p->values[var_i], size);
poffset += size;
break;
}
}
if (poffset >= p->print_buffer_size) {
dprintf(1, "insufficient print_buffer\n");
exit(1);
}
p->print_buffer[poffset++] = 0;
printf("%s\n", p->print_buffer);
l->rules_index++;
break;
}
return offset;
}
ssize_t parser_write(struct Parser *p, char *src, size_t src_len) {
size_t offset = 0;
while (offset < src_len) {
struct Level *l = p->levels[p->level];
struct Rule *r = l->rules[l->rules_index];
if (r == NULL) return -1;
ssize_t rlen = rule_write(p, l, r, src+offset, src_len-offset);
if (rlen < 0) return offset;
offset += rlen;
}
return offset;
}
void parser_push_rule(struct Parser *p, struct Rule *r) {
struct Level *l = NULL;
if (p->levels == NULL) {
p->levels_cap += 16;
p->levels = calloc(p->levels_cap, sizeof(struct Level*));
l = malloc(sizeof(struct Level));
level_init(l);
} else {
l = p->levels[p->level];
}
if (l->rules == NULL) {
l->rules_cap += 16;
l->rules = calloc(l->rules_cap, sizeof(struct Rule*));
}
if (l->rules_len+1 >= l->rules_cap) {
l->rules_cap += 16;
l->rules = reallocarray(l->rules, l->rules_cap, sizeof(struct Rule*));
}
l->rules[l->rules_len++] = r;
}
size_t parser_get_or_add_var(struct Parser *p, char *var) {
int i = 0;
if (p->vars == NULL) {
p->vars_cap += 16;
p->vars = calloc(p->vars_cap, sizeof(char*));
p->values = calloc(p->vars_cap, sizeof(void*));
p->types = calloc(p->vars_cap, sizeof(int));
p->sizes = calloc(p->vars_cap, sizeof(int));
}
for (; p->vars[i] != NULL; i++) {
char *v = p->vars[i];
if (strcmp(v, var) == 0) return i;
}
if (p->vars_len+1 >= p->vars_cap) {
p->vars_cap += 16;
p->vars = reallocarray(p->vars, p->vars_cap, sizeof(char*));
p->values = reallocarray(p->values, p->vars_cap, sizeof(void*));
p->types = reallocarray(p->types, p->vars_cap, sizeof(int));
p->sizes = reallocarray(p->types, p->vars_cap, sizeof(int));
}
p->values[i] = NULL;
p->vars[i] = var;
p->types[i] = -1;
p->sizes[i] = -1;
p->vars_len++;
return i;
}
void parser_push_src(struct Parser *p, char *src) {
char *vsrc, *vdst, *type;
int64_t n;
int len;
size_t offset = 0, src_len = strlen(src);
while (offset < src_len) {
if (isspace(*(src+offset))) {
offset++;
} else if (src[offset] == ']') {
struct Rule *r = malloc(sizeof(struct Rule));
rule_init(r, RULE_LOOP_END);
parser_push_rule(p, r);
struct Level *l = p->levels[p->level];
struct Level *ul = p->levels[p->level-1];
struct Rule *ulr = ul->rules[ul->rules_len-1];
switch(ulr->code) {
case RULE_LOOP_BEGIN:
// free(p->levels[--p->level]); // ???
ulr->n = malloc(sizeof(struct Expr));
expr_init(ulr->n, EXPR_CONST);
ulr->n->data = malloc(sizeof(int64_t));
*(int64_t*)(ulr->n->data) = l->rules_len;
ulr->src = (void*) l->rules;
break;
default:
printf("lol idk\n");
break;
}
p->level--;
offset++;
} else if (src_len-offset >= 5 && strncmp(src+offset, "loop[", 5) == 0) {
struct Rule *r = malloc(sizeof(struct Rule));
rule_init(r, RULE_LOOP_BEGIN);
parser_push_rule(p, r);
struct Level *l = malloc(sizeof(struct Level));
level_init(l);
p->levels[++p->level] = l;
offset += 5;
} else if (sscanf(src+offset, "loop%m[ ][%n", &vdst, &len) == 1) {
struct Rule *r = malloc(sizeof(struct Rule));
rule_init(r, RULE_LOOP_BEGIN);
parser_push_rule(p, r);
struct Level *l = malloc(sizeof(struct Level));
level_init(l);
p->levels[++p->level] = l;
offset += len;
} else if (sscanf(src+offset, "v:%m[^: ]%n", &vdst, &len) == 1) {
struct Rule *r = malloc(sizeof(struct Rule));
rule_init(r, RULE_VARINT);
r->n = malloc(sizeof(struct Expr));
expr_init(r->n, EXPR_CONST);
r->n->data = malloc(sizeof(int64_t));
*(int64_t*)(r->n->data) = 1;
r->dst = malloc(sizeof(struct Expr));
expr_init(r->dst, EXPR_VAR);
r->dst->data = malloc(sizeof(int64_t));
*(u_int64_t*)(r->dst->data) = parser_get_or_add_var(p, vdst);
parser_push_rule(p, r);
offset += len;
} else if (sscanf(src+offset, "%m[a-z0-9][%lu]:%m[^: ]%n", &type, &n, &vdst, &len) == 3) {
struct Rule *r = malloc(sizeof(struct Rule));
if (strcmp(type,"u8") == 0) {
rule_init(r, RULE_U8ARRAY);
} else {
dprintf(1, "unexpected array type: %s\n", type);
exit(1);
}
r->n = malloc(sizeof(struct Expr));
expr_init(r->n, EXPR_CONST);
r->n->data = malloc(sizeof(u_int64_t));
*(u_int64_t*)(r->n->data) = n;
r->dst = malloc(sizeof(struct Expr));
expr_init(r->dst, EXPR_VAR);
r->n->data = malloc(sizeof(u_int64_t));
*(u_int64_t*)(r->dst->data) = parser_get_or_add_var(p, vdst);
parser_push_rule(p, r);
offset += len;
} else if (sscanf(src+offset, "%m[a-z0-9][%m[^]]]:%m[^] ]%n", &type, &vsrc, &vdst, &len) == 3) {
struct Rule *r = malloc(sizeof(struct Rule));
if (strcmp(type,"u8") == 0) {
rule_init(r, RULE_U8ARRAY);
} else {
dprintf(1, "unexpected array type: %s\n", type);
exit(1);
}
r->n = malloc(sizeof(struct Expr));
expr_init(r->n, EXPR_VAR);
r->n->data = malloc(sizeof(u_int64_t));
*(u_int64_t*)(r->n->data) = parser_get_or_add_var(p, vsrc);
r->dst = malloc(sizeof(struct Expr));
expr_init(r->dst, EXPR_VAR);
r->dst->data = malloc(sizeof(u_int64_t));
*(u_int64_t*)(r->dst->data) = parser_get_or_add_var(p, vdst);
parser_push_rule(p, r);
offset += len;
} else if (sscanf(src+offset, "println:[%m[^]]]%n", &vsrc, &len) == 1) {
size_t nvars = 0, vsrclen = len-10;
int is_first = 1;
for (size_t i = 0; vsrc[i]; i++) {
if (isspace(vsrc[i])) continue;
if (is_first) {
is_first = 0;
nvars++;
}
if (vsrc[i] != ',') continue;
nvars++;
vsrc[i] = 0x00;
// trim leading and trailing whitespace:
for (size_t j = i-1; j >= 0; j--) {
if (isspace(vsrc[j])) vsrc[j] = 0x00;
else break;
}
for (size_t j = i+1; j < vsrclen; j++) {
if (isspace(vsrc[j])) vsrc[j] = 0x00;
else break;
}
}
struct Rule *r = malloc(sizeof(struct Rule));
rule_init(r, RULE_PRINTLN);
r->n = malloc(sizeof(struct Expr));
expr_init(r->n, EXPR_CONST);
r->n->data = malloc(sizeof(u_int64_t));
*(u_int64_t*)(r->n->data) = nvars;
r->dst = malloc(sizeof(struct Expr));
expr_init(r->dst, EXPR_VAR);
r->dst->data = calloc(nvars, sizeof(u_int64_t));
size_t vstart = 0, dst_i = 0;
ssize_t var_i = 0;
for (size_t i = 0; i < vsrclen; i++) {
if (vsrc[i] != 0x00 && i != vsrclen-1) continue;
if (i-vstart > 1) {
var_i = parser_get_var(p, vsrc+vstart);
if (var_i < 0) {
dprintf(1, "undefined variable in printf: %s\n", vsrc+vstart);
exit(1);
}
((u_int64_t*) r->dst->data)[dst_i++] = var_i;
}
vstart = i+1;
}
parser_push_rule(p, r);
offset += len;
} else {
dprintf(1, "unexpected token at offset=%lu\n", offset);
exit(1);
}
}
}
int main(int argc, char **argv) {
struct Parser p;
parser_init(&p);
size_t offset = 0, src_len = 0;
for (int i = 1; i < argc; i++) {
src_len += strlen(argv[i]) + (i > 1 ? 1 : 0);
}
char *src = malloc(src_len);
offset = 0;
for (int i = 1; i < argc; i++) {
size_t n = strlen(argv[i]);
if (i > 1) src[offset++] = ' ';
memcpy(src+offset, argv[i], n);
offset += n;
}
parser_push_src(&p, src);
char *buf = malloc(4096);
offset = 0;
ssize_t n = 0;
while ((n = read(0, buf+offset, 4096-offset)) > 0) {
if (n == 0) {
dprintf(1, "insufficient parse buffer for the given data\n");
exit(1);
}
ssize_t len = parser_write(&p, buf, n);
if (len < 0) return 0;
offset += n - len;
}
return 0;
}