How can I add float parsing capability to a C lexical analyzer? -
i trying add floating point functionality simple lexical analyzer i've written in c, c (among other things). have ideas on how this, incomplete solutions, involving adding if statement parse integer literals, still stop , count period period because of while statement. thought adding or while statement, not entirely sure how specify period only. here code:
/* front.c */ #include <stdio.h> #include <ctype.h> #include <string.h> #include <conio.h> /*global declarations */ /*variables*/ int charclass; char lexeme [100]; char nextchar; int lexlen; int token; int nexttoken; file *in_fp, *fopen(); /*function declarations*/ void addchar(); void getchar(); void getnonblank(); int lex(); /*character classes */ #define letter 0 #define digit 1 #define unknown 99 /*token codes*/ #define int_lit 10 #define float #define ident 11 #define assign_op 20 #define add_op 21 #define sub_op 22 #define mult_op 23 #define div_op 24 #define left_paren 25 #define right_paren 26 #define mod_op 27 #define semicol 28 #define comma 29 #define exclamation_mark 30 #define at_sign 31 #define pound_sign 32 #define dollar_sign 33 #define carat_sign 34 #define ampersand 35 #define period_mark 36 #define lessthan_sign 37 #define greaterthan_sign 38 #define question_mark 39 #define left_squarebracket 40 #define right_squarebracket 41 #define left_curlybracket 42 #define right_curlybracket 43 #define backslash 44 #define verticalbar 45 #define single_quote 46 #define double_quote 47 #define colon 48 #define underscore 49 #define tilde 50 #define grave_accent 51 /*********************/ /*main driver */ main() { /*open input data file , process contents*/ if ((in_fp = fopen("front.in", "r")) == null) printf("error - cannot open front.in \n"); else { getchar(); { lex(); } while (nexttoken != eof); } } /***************************/ /*lookup - function lookup operators , parentheses , return token */ int lookup(char ch) { switch (ch) { case '=': addchar(); nexttoken = assign_op break; case '(': addchar(); nexttoken = left_paren; break; case ')': addchar(); nexttoken = right_paren; break; case '+': addchar(); nexttoken = add_op; break; case '-': addchar(); nexttoken = sub_op; break; case '*': addchar(); nexttoken = mult_op; break; case '/': addchar(); nexttoken = div_op; break; case '%': addchar(); nexttoken = mod_op; break; case ';': addchar(); nexttoken = semicol; break; case ':': addchar(); nexttoken = colon; break; case '"': addchar(); nexttoken = double_quote; break; case ',': addchar(); nexttoken = comma; break; case '.': addchar(); nexttoken = period_mark; break; case '!': addchar(); nexttoken = exclamation_mark; break; case '@': addchar(); nexttoken = at_sign; break; case '#': addchar(); nexttoken = pound_sign; break; case '$': addchar(); nexttoken = dollar_sign; break; case '^': addchar(); nexttoken = carat_sign; break; case '&': addchar(); nexttoken = ampersand; break; case '<': addchar(); nexttoken = lessthan_sign; break; case '>': addchar(); nexttoken = greaterthan_sign; break; case '?': addchar(); nexttoken = question_mark; break; case '[': addchar(); nexttoken = left_squarebracket; break; case ']': addchar(); nexttoken = right_squarebracket; break; case '{': addchar(); nexttoken = left_curlybracket; break; case '}': addchar(); nexttoken = right_curlybracket; break; case '\'': addchar(); nexttoken = single_quote; break;* case '|': addchar(); nexttoken = verticalbar; break; case '_': addchar(); nexttoken = underscore; break; case '~': addchar(); nexttoken = tilde; break; case '`': addchar(); nexttoken = grave_accent; break; case '\\': addchar(); nexttoken = backslash; break; default: addchar(); nexttoken = eof; break; } return nexttoken; } /*****************************/ /* addchar = function add nextchar lexeme */ void addchar() { if (lexlen <= 98) { lexeme[lexlen++] = nextchar; lexeme[lexlen] = 0; } else printf("error - lexeme long \n"); } /**********************************/ /* getchar- function next character of input , determine character class */ void getchar() { if ((nextchar = getc(in_fp)) != eof) { if (isalpha(nextchar)) charclass = letter; else if (isdigit(nextchar)) charclass = digit; else charclass = unknown; } else charclass = eof; } /********************************************/ /* getnonblank - function call getchar until returns non-whitespace character */ void getnonblank() { while (isspace(nextchar)) getchar(); } /*******************************/ /* lex - simple lexical analyzer arithmetic expressions */ int lex() { lexlen = 0; getnonblank(); switch (charclass) { /*parse identifiers */ case letter: addchar(); getchar(); while (charclass == letter || charclass == digit) { addchar(); getchar(); } nexttoken = ident; break; /*parse integer literals , ?floats?*/ case digit: addchar(); getchar(); while (charclass == digit) { addchar(); getchar(); } nexttoken = int_lit; break; /*parentheses , operators*/ case unknown: lookup(nextchar); getchar(); break; /*eof*/ case eof: nexttoken = eof; lexeme[0] = 'e'; lexeme[1] = 'o'; lexeme[2] = 'f'; lexeme[3] = 0; break; }/*end of switch*/ printf("next token is: %d, next lexeme %s\n", nexttoken, lexeme); return nexttoken; } /*end of function lex*/
i thinking maybe if used "charclass.ch == '.'" in while statement extension of "charclass == digit" via || (or), think might getting mixed language or doing wrong. might not be, kind of hard test program @ moment.
here specific part think need change float:
/*parse integer literals , ?floats?*/ case digit: addchar(); getchar(); while (charclass == digit) { addchar(); getchar(); } nexttoken = int_lit; break;
/*parse integer literals , ?floats?*/ case digit: addchar(); getchar(); while (charclass == digit) { addchar(); getchar(); }
at point know nextchar
is. if it's dot, write more code consume , following digits , set nexttoken
float_lit.
otherwise fall through this:
nexttoken = int_lit; break;
Comments
Post a Comment