#include #include #include #include #include #include "token.h" #include "symbol.h" #include "emalloc.h" static FILE *fp; static int linenumber, last_was_newline; static int c; static struct token grabstring(), grabalpha(), grabinteger(); static void consume() { c = getc(fp); if (last_was_newline) linenumber++; last_was_newline = (c == '\n'); } void token_init(FILE *fparg) { fp = fparg; linenumber = 1; last_was_newline = 0; consume(); /* prime the character buffer */ } struct token get_next_token() { struct token t; while (1) { if (c == EOF || strchr("{}().,;:+-*/%~&|=!<>", c)) { t.type = c; consume(); return t; } else if (isspace(c)) { consume(); } else if (c == '"') { return grabstring(); } else if (isalpha(c)) { return grabalpha(); } else if (isdigit(c)) { return grabinteger(); } else { fprintf(stderr, "completely unexpected character on line %d\n", linenumber); consume(); } } } struct token grabalpha() { char buf[100]; int i; struct token t; for (i = 0; (isalnum(c) || c == '_') && i < sizeof buf; i++) { buf[i] = c; consume(); } if (i == sizeof buf) { fprintf(stderr, "identifier exceeds 99 characters in length on line %d\n", linenumber); exit(1); } buf[i] = '\0'; if ((t.symvalue = symbol_lookup(buf)) == NULL) { fprintf(stderr, "out of memory on line %d\n", linenumber); exit(1); } t.type = TOKEN_IDENT; return t; } struct token grabstring() { char buf[300]; int i; struct token t; consume(); /* consume the opening double-quote */ for (i = 0; c != '"' && c != '\n' && i < sizeof buf; i++) { buf[i] = c; consume(); } if (i == sizeof buf) { fprintf(stderr, "string exceeds 299 characters in length on line %d\n", linenumber); exit(1); } else if (c < ' ') { fprintf(stderr, "end-of-line reached in the middle of a string on line %d\n", linenumber); /* might as well return the string anyway */ } buf[i] = '\0'; consume(); /* consume the terminating character (closing double-quote) */ t.strvalue = emalloc(strlen(buf) + 1); t.type = TOKEN_STRCONST; strcpy(t.strvalue, buf); return t; } struct token grabinteger() { struct token t; t.type = TOKEN_ICONST; for (t.ivalue = 0; c != EOF && isdigit(c); consume()) { if (t.ivalue > (INT_MAX - (c - '0')) / 10) { t.ivalue = 0; /* decrease quantity of further errors around loop */ /* yes this is hacky, but it's a simple lexer */ fprintf(stderr, "integer overflow on line %d\n", linenumber); } t.ivalue = t.ivalue * 10 + (c - '0'); } return t; }