sbed / main.c
streaming binary encoder/decoder
git clone http://git.nthia.dev/sbed

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <varint.h>
#include <ctype.h>

enum {
  EXPR_CONST,
  EXPR_VAR,
  EXPR_OP,
};

struct Expr {
  int code;
  void *data;
};

void expr_init(struct Expr *e, int code) {
  e->code = code;
  e->data = NULL;
}

enum {
  TYPE_UINT64,
  TYPE_U8ARRAY,
};

enum {
  RULE_VARINT,
  RULE_U8ARRAY,
  RULE_LOOP_BEGIN,
  RULE_LOOP_END,
  RULE_PRINTLN,
};

struct Rule {
  int code;
  struct Expr *n;
  struct Expr *src;
  struct Expr *dst;
};

void rule_init(struct Rule *r, int code) {
  r->code = code;
  r->n = NULL;
  r->src = NULL;
  r->dst = NULL;
}

struct Level {
  struct Rule **rules;
  size_t rules_index, rules_len, rules_cap;
};

void level_init(struct Level *l) {
  l->rules_index = 0;
  l->rules_len = 0;
  l->rules_cap = 16;
  l->rules = calloc(l->rules_cap, sizeof(struct Rule*));
}

struct Parser {
  struct Level **levels;
  size_t level, levels_cap;
  size_t next_var_i;
  char **vars;
  size_t vars_len, vars_cap;
  void **values;
  int *types;
  int *sizes;
  size_t print_buffer_size;
  char *print_buffer;
};

void parser_init(struct Parser *p) {
  p->levels_cap = 16;
  p->levels = calloc(p->levels_cap, sizeof(struct Level *));
  p->levels[0] = malloc(sizeof(struct Level));
  level_init(p->levels[0]);
  p->level = 0;
  p->next_var_i = 0;
  p->vars = NULL;
  p->vars_len = 0;
  p->vars_cap = 0;
  p->values = NULL;
  p->types = NULL;
  p->sizes = NULL;
  p->print_buffer_size = 1024;
  p->print_buffer = malloc(p->print_buffer_size);
}

int64_t expr_eval(struct Parser *p, struct Expr *e) {
  switch (e->code) {
    case EXPR_CONST:
      return *(int64_t *) e->data;
    case EXPR_VAR:
      return *(int64_t*) p->values[*(u_int64_t *) e->data];
    case EXPR_OP:
      break;
  }
  return 0;
}

void put_value(struct Parser *p, struct Expr *e, int type, ssize_t size, void *value) {
  if (e == NULL) return;
  if (e->code != EXPR_VAR) return;
  size_t i = *(u_int64_t*) e->data;
  p->types[i] = type;
  p->sizes[i] = size;
  p->values[i] = value;
}

ssize_t parser_get_var(struct Parser *p, char *var) {
  for (size_t i = 0; p->vars[i] != NULL; i++) {
    char *v = p->vars[i];
    if (strcmp(v, var) == 0) return i;
  }
  return -1;
}

ssize_t rule_write(struct Parser *p, struct Level *l, struct Rule *r, char *src, size_t src_len) {
  size_t offset = 0, var_i = 0;
  u_int64_t *x = NULL;
  u_int64_t n = 0;
  int64_t rn = 0;
  switch (r->code) {
    case RULE_LOOP_BEGIN:
      rn = expr_eval(p, r->n);
      l->rules_index = 0;
      l = malloc(sizeof(struct Level));
      level_init(l);
      l->rules = (struct Rule**) r->src;
      l->rules_index = 0;
      l->rules_len = rn;
      l->rules_cap = rn;
      p->levels[++p->level] = l;
      break;
    case RULE_LOOP_END:
      l->rules_index = 0;
      break;
    case RULE_VARINT:
      rn = expr_eval(p, r->n);
      for (int i = 0; i < rn; i++) {
        x = malloc(sizeof(u_int64_t));
        n = varint_decode(src+offset, src_len-offset, x);
        put_value(p, r->dst, TYPE_UINT64, sizeof(u_int64_t), x);
        if (n == 0) return -1;
        offset += n;
      }
      l->rules_index++;
      break;
    case RULE_U8ARRAY:
      rn = expr_eval(p, r->n);
      put_value(p, r->dst, TYPE_U8ARRAY, rn, src);
      offset += rn;
      l->rules_index++;
      break;
    case RULE_PRINTLN:
      n = *(u_int64_t*) r->n->data;
      size_t poffset = 0, sn = 0;
      u_int64_t *indexes = (u_int64_t*) r->dst->data;
      for (size_t i = 0; i < n; i++) {
        var_i = indexes[i];
        int type = p->types[var_i];
        int size = p->sizes[var_i];
        switch (type) {
          case TYPE_UINT64:
            sn = snprintf(
              p->print_buffer+poffset,
              p->print_buffer_size-poffset,
              var_i == 0 ? "%lu" : ",%lu",
              *(u_int64_t*) p->values[var_i]
            );
            if (sn < 0) {
              dprintf(1, "snprintf failed\n");
              exit(1);
            }
            poffset += sn;
            break;
          case TYPE_U8ARRAY:
            memcpy(p->print_buffer+poffset, p->values[var_i], size);
            poffset += size;
            break;
        }
      }
      if (poffset >= p->print_buffer_size) {
        dprintf(1, "insufficient print_buffer\n");
        exit(1);
      }
      p->print_buffer[poffset++] = 0;
      printf("%s\n", p->print_buffer);
      l->rules_index++;
      break;
  }
  return offset;
}

ssize_t parser_write(struct Parser *p, char *src, size_t src_len) {
  size_t offset = 0;
  while (offset < src_len) {
    struct Level *l = p->levels[p->level];
    struct Rule *r = l->rules[l->rules_index];
    if (r == NULL) return -1;
    ssize_t rlen = rule_write(p, l, r, src+offset, src_len-offset);
    if (rlen < 0) return offset;
    offset += rlen;
  }
  return offset;
}

void parser_push_rule(struct Parser *p, struct Rule *r) {
  struct Level *l = NULL;
  if (p->levels == NULL) {
    p->levels_cap += 16;
    p->levels = calloc(p->levels_cap, sizeof(struct Level*));
    l = malloc(sizeof(struct Level));
    level_init(l);
  } else {
    l = p->levels[p->level];
  }
  if (l->rules == NULL) {
    l->rules_cap += 16;
    l->rules = calloc(l->rules_cap, sizeof(struct Rule*));
  }
  if (l->rules_len+1 >= l->rules_cap) {
    l->rules_cap += 16;
    l->rules = reallocarray(l->rules, l->rules_cap, sizeof(struct Rule*));
  }
  l->rules[l->rules_len++] = r;
}

size_t parser_get_or_add_var(struct Parser *p, char *var) {
  int i = 0;
  if (p->vars == NULL) {
    p->vars_cap += 16;
    p->vars = calloc(p->vars_cap, sizeof(char*));
    p->values = calloc(p->vars_cap, sizeof(void*));
    p->types = calloc(p->vars_cap, sizeof(int));
    p->sizes = calloc(p->vars_cap, sizeof(int));
  }
  for (; p->vars[i] != NULL; i++) {
    char *v = p->vars[i];
    if (strcmp(v, var) == 0) return i;
  }
  if (p->vars_len+1 >= p->vars_cap) {
    p->vars_cap += 16;
    p->vars = reallocarray(p->vars, p->vars_cap, sizeof(char*));
    p->values = reallocarray(p->values, p->vars_cap, sizeof(void*));
    p->types = reallocarray(p->types, p->vars_cap, sizeof(int));
    p->sizes = reallocarray(p->types, p->vars_cap, sizeof(int));
  }
  p->values[i] = NULL;
  p->vars[i] = var;
  p->types[i] = -1;
  p->sizes[i] = -1;
  p->vars_len++;
  return i;
}

void parser_push_src(struct Parser *p, char *src) {
  char *vsrc, *vdst, *type;
  int64_t n;
  int len;
  size_t offset = 0, src_len = strlen(src);
  while (offset < src_len) {
    if (isspace(*(src+offset))) {
      offset++;
    } else if (src[offset] == ']') {
      struct Rule *r = malloc(sizeof(struct Rule));
      rule_init(r, RULE_LOOP_END);
      parser_push_rule(p, r);

      struct Level *l = p->levels[p->level];
      struct Level *ul = p->levels[p->level-1];
      struct Rule *ulr = ul->rules[ul->rules_len-1];
      switch(ulr->code) {
        case RULE_LOOP_BEGIN:
          // free(p->levels[--p->level]); // ???
          ulr->n = malloc(sizeof(struct Expr));
          expr_init(ulr->n, EXPR_CONST);
          ulr->n->data = malloc(sizeof(int64_t));
          *(int64_t*)(ulr->n->data) = l->rules_len;

          ulr->src = (void*) l->rules;
          break;
        default:
          printf("lol idk\n");
          break;
      }
      p->level--;
      offset++;
    } else if (src_len-offset >= 5 && strncmp(src+offset, "loop[", 5) == 0) {
      struct Rule *r = malloc(sizeof(struct Rule));
      rule_init(r, RULE_LOOP_BEGIN);
      parser_push_rule(p, r);

      struct Level *l = malloc(sizeof(struct Level));
      level_init(l);
      p->levels[++p->level] = l;
      offset += 5;
    } else if (sscanf(src+offset, "loop%m[ ][%n", &vdst, &len) == 1) {
      struct Rule *r = malloc(sizeof(struct Rule));
      rule_init(r, RULE_LOOP_BEGIN);
      parser_push_rule(p, r);

      struct Level *l = malloc(sizeof(struct Level));
      level_init(l);
      p->levels[++p->level] = l;
      offset += len;
    } else if (sscanf(src+offset, "v:%m[^: ]%n", &vdst, &len) == 1) {
      struct Rule *r = malloc(sizeof(struct Rule));
      rule_init(r, RULE_VARINT);

      r->n = malloc(sizeof(struct Expr));
      expr_init(r->n, EXPR_CONST);
      r->n->data = malloc(sizeof(int64_t));
      *(int64_t*)(r->n->data) = 1;

      r->dst = malloc(sizeof(struct Expr));
      expr_init(r->dst, EXPR_VAR);
      r->dst->data = malloc(sizeof(int64_t));
      *(u_int64_t*)(r->dst->data) = parser_get_or_add_var(p, vdst);

      parser_push_rule(p, r);
      offset += len;
    } else if (sscanf(src+offset, "%m[a-z0-9][%lu]:%m[^: ]%n", &type, &n, &vdst, &len) == 3) {
      struct Rule *r = malloc(sizeof(struct Rule));
      if (strcmp(type,"u8") == 0) {
        rule_init(r, RULE_U8ARRAY);
      } else {
        dprintf(1, "unexpected array type: %s\n", type);
        exit(1);
      }

      r->n = malloc(sizeof(struct Expr));
      expr_init(r->n, EXPR_CONST);
      r->n->data = malloc(sizeof(u_int64_t));
      *(u_int64_t*)(r->n->data) = n;

      r->dst = malloc(sizeof(struct Expr));
      expr_init(r->dst, EXPR_VAR);
      r->n->data = malloc(sizeof(u_int64_t));
      *(u_int64_t*)(r->dst->data) = parser_get_or_add_var(p, vdst);

      parser_push_rule(p, r);
      offset += len;
    } else if (sscanf(src+offset, "%m[a-z0-9][%m[^]]]:%m[^] ]%n", &type, &vsrc, &vdst, &len) == 3) {
      struct Rule *r = malloc(sizeof(struct Rule));
      if (strcmp(type,"u8") == 0) {
        rule_init(r, RULE_U8ARRAY);
      } else {
        dprintf(1, "unexpected array type: %s\n", type);
        exit(1);
      }

      r->n = malloc(sizeof(struct Expr));
      expr_init(r->n, EXPR_VAR);
      r->n->data = malloc(sizeof(u_int64_t));
      *(u_int64_t*)(r->n->data) = parser_get_or_add_var(p, vsrc);

      r->dst = malloc(sizeof(struct Expr));
      expr_init(r->dst, EXPR_VAR);
      r->dst->data = malloc(sizeof(u_int64_t));
      *(u_int64_t*)(r->dst->data) = parser_get_or_add_var(p, vdst);

      parser_push_rule(p, r);
      offset += len;
    } else if (sscanf(src+offset, "println:[%m[^]]]%n", &vsrc, &len) == 1) {
      size_t nvars = 0, vsrclen = len-10;
      int is_first = 1;
      for (size_t i = 0; vsrc[i]; i++) {
        if (isspace(vsrc[i])) continue;
        if (is_first) {
          is_first = 0;
          nvars++;
        }
        if (vsrc[i] != ',') continue;
        nvars++;
        vsrc[i] = 0x00;
        // trim leading and trailing whitespace:
        for (size_t j = i-1; j >= 0; j--) {
          if (isspace(vsrc[j])) vsrc[j] = 0x00;
          else break;
        }
        for (size_t j = i+1; j < vsrclen; j++) {
          if (isspace(vsrc[j])) vsrc[j] = 0x00;
          else break;
        }
      }

      struct Rule *r = malloc(sizeof(struct Rule));
      rule_init(r, RULE_PRINTLN);
      r->n = malloc(sizeof(struct Expr));
      expr_init(r->n, EXPR_CONST);
      r->n->data = malloc(sizeof(u_int64_t));
      *(u_int64_t*)(r->n->data) = nvars;

      r->dst = malloc(sizeof(struct Expr));
      expr_init(r->dst, EXPR_VAR);
      r->dst->data = calloc(nvars, sizeof(u_int64_t));
      size_t vstart = 0, dst_i = 0;
      ssize_t var_i = 0;
      for (size_t i = 0; i < vsrclen; i++) {
        if (vsrc[i] != 0x00 && i != vsrclen-1) continue;
        if (i-vstart > 1) {
          var_i = parser_get_var(p, vsrc+vstart);
          if (var_i < 0) {
            dprintf(1, "undefined variable in printf: %s\n", vsrc+vstart);
            exit(1);
          }
          ((u_int64_t*) r->dst->data)[dst_i++] = var_i;
        }
        vstart = i+1;
      }

      parser_push_rule(p, r);
      offset += len;
    } else {
      dprintf(1, "unexpected token at offset=%lu\n", offset);
      exit(1);
    }
  }
}

int main(int argc, char **argv) {
  struct Parser p;
  parser_init(&p);
  size_t offset = 0, src_len = 0;
  for (int i = 1; i < argc; i++) {
    src_len += strlen(argv[i]) + (i > 1 ? 1 : 0);
  }
  char *src = malloc(src_len);
  offset = 0;
  for (int i = 1; i < argc; i++) {
    size_t n = strlen(argv[i]);
    if (i > 1) src[offset++] = ' ';
    memcpy(src+offset, argv[i], n);
    offset += n;
  }
  parser_push_src(&p, src);

  char *buf = malloc(4096);
  offset = 0;
  ssize_t n = 0;
  while ((n = read(0, buf+offset, 4096-offset)) > 0) {
    if (n == 0) {
      dprintf(1, "insufficient parse buffer for the given data\n");
      exit(1);
    }
    ssize_t len = parser_write(&p, buf, n);
    if (len < 0) return 0;
    offset += n - len;
  }
  return 0;
}