/* PURPOSE:
 *    SCANNER.C: Functions that implement a Lexical Scanner.
 *    scanner_init() must be called before using the scanner().
 *    When done, call scanner_term() to tidy up.
 * HISTORY:
 *    Ian D. Allen   idallen@freenet.carleton.ca
 */

//FIXME-- add system includes here
//FIXME-- add your local includes here

/* Initial character memory size and increment used to collect tokens.
 */
#define TOKEN_INIT 10
#define TOKEN_INCR 10

/* State names for the DFA.
 *
 * States less than ST__ACCEPTING_BEGIN are "going" states; not accepting.
 * States greater than ST__ACCEPTING_BEGIN cause the DFA to stop.
 * Dummy state ST__ACCEPTING_BEGIN is only used by the ACCEPTING() macro;
 * this dummy state divides the states into "not accepting" and "accepting".
 *
 * Accepting states are numbered above 128 so that the accepting state
 * numbers look quite different from the other states.  The other states
 * (non-accepting) must be consecutively numbered from zero so that
 * they may be used directly as indexes into a 2-D DFA Transition Table.
 *
 * The ST__END state isn't used; it's just there as a place holder
 * for silly compilers that don't want trailing commas inside enums.
 */
typedef enum {
	/* States < ST__ACCEPTING_BEGIN keep the DFA going.
	 * These states must match EXACTLY with the corresponding
	 * slices of the 2-D DFA Transition Table used by next_state().
	 */
	ST_START=0,	/* start */
	ST_INID=1,	/* inside identifier */
//FIXME-- carefully add your other non-accepting DFA states here

	/* States > ST__ACCEPTING_BEGIN cause the DFA to accept and stop.
	 * These states don't have to be in any order; but, each
	 * accepting state needs to be handled by the scanner() function.
	 */
#define ACCEPTING(s) ((s) > ST__ACCEPTING_BEGIN)
	ST__ACCEPTING_BEGIN=128,

	ST_ERROR,	/* a DFA error (unknown transition) */
	ST_GOTEOF,	/* got EOF */
	ST_GOTID,	/* got identifier */
//FIXME-- add your new accepting DFA states here, in any order

	ST__END		/* for compiler syntax: dummy entry without comma */
} State;


/* Here is the static information private to the scanner.
 * All of this data is initialized by scanner_init() so that
 * it can be used by scanner() without having any arguments passed.
 */
//FIXME-- put the rest of the static data initialized by scanner_init here
static long linecounter = -999;		/* initialize counter with garbage */

/* The English names of the TokenType enum, and the function
 * that turns the enum into the English name.  This list must match
 * exactly the TokenType enum in scanner.h.
 * The validity check in the function helps detect cases where we add
 * to the TokenType and forget to update the tokenName list here as well.
 */
static char *tokenName[] = {
	"Unknown",	/* each of these must match, in order, a TokenType */
	"Error",
	"EOF",
	"Junk",
	"Identifier",
	"Unsigned Integer",
//FIXME-- add the strings that match your new Token types here
};
	char *
token_name(		/* RET: English description of token */
	TokenType token	/* IN: token number from Scanner */
){
	/* run-time validity check on names vs. TokenType enum */
	if( NEL(tokenName) != TT_DIMENSION )
		eprintf("Number of lines in tokenName %d; expecting %d",
			NEL(tokenName), TT_DIMENSION);
	if( token < 0 || token >= NEL(tokenName) ){
		eprintf("Unknown token name for token %d; max is %d",
			token, NEL(tokenName));
		token = 0;	/* a safe index number */
	}
	return tokenName[token];
}

/* Put the character back in the input stream using stdio's ungetc().
 * If it was a newline, decrement the line count, because we
 * will count the newline again next time we read it.
 */
	static void
scanner_ungetc(
	int ch,		/* IN: character to put back */
	FILE *fd	/* IN: stream to receive ch */
){
//FIXME-- your code for this function goes here
}

/* Initialize the scanner:
 * Do some checking to prevent duplicate or incorrect calls to this function.
 * Remember the input stream fd and input stream file name.
 * Allocate a lexeme buffer for the scanner.
 * Start counting lines at line one.
 * Do a dummy call of token_name() to make sure it works.
 */
	void
scanner_init(
	FILE *fd,	/* IN: open file descriptor to read */
	char *fname	/* IN: name of open file */
){
//FIXME-- your code for this function goes here
	linecounter = 1;

	printf( "scanner_init: The Scanner defines %d token types\n",
		TT_DIMENSION );
	printf( "scanner_init: Reading from '%s'.\n", scanfnam);
	(void) token_name(0);	/* test validity of token names */
}

/* Terminate the scanner: Undo scanner_init()
 * Do some checking to prevent duplicate or incorrect calls to this function.
 * NULL out the file fd and file name.
 * Free the scanner lexeme buffer (the one allocated by scanner_init).
 * Check to see if the input stream has been read completely (is at EOF);
 * print a warning if not true.
 * Put garbage in the line counter.
 */
	void
scanner_term(void){
	printf( "scanner_term: The Scanner terminated in '%s' at line %ld\n",
		scanfnam, linecounter );

//FIXME-- your code for this function goes here

	linecounter = -999;	/* set counter back to garbage */
}

/* next_state: Given the current state and an input symbol
 * (which will be a character or EOF) return the next state.
 * The Transition Table of the DFA gives the design of this function.
 * We program the table as a two-dimensional array 'table[][]' here.
 *
 * Since we have more character classes than we have states, the
 * table looks best on paper if rows are classes and columns are states.
 *
 * NOTE: The table is indexed using a character class (row) and
 * a current state (column).  The first part of the next_state function
 * turns a character into a row index (character class) for the table.
 *
 * NOTE: The order of the columns (current state) must exactly match the
 * order of non-accepting states in the State enum, since those state
 * values are used to index the columns of this array directly.
 *
 * Make sure that EOF always causes a transition to an accepting
 * state, or the scanner may loop reading the same EOF over and over.
 */
	static State
table[13][4] = {
//FIXME-- your transition table goes here
};
	static State
next_state(		/* RET: the next DFA state */
	State current,	/* IN: the current DFA state */
	int ch		/* IN: the character just read, or EOF */
){
	unsigned int row = 999;	/* char class row index into above table */

//FIXME-- the code to turn the incoming character into a row index goes here

	/* Make sure the indicies into the table are valid, to
	 * catch programming errors.
	 */
	if( row >= NEL(table) || current < 0 || current >= NEL(table[0]) ){
		eprintf("Error: Invalid table index: row %d current %d\n",
			row, current);
		abort();
		exit( EXIT_FAILURE );
	}
	return table[row][current];
}

/* The Scanner:
 * Implement a Deterministic Finite Automaton.
 * Move from one state to another depending on the current state
 * and the input symbol (a character read from the stream, or EOF).
 *
 * The stream descriptor and file name from which we are reading
 * must be set by scanner_init() before we are called.  A lexeme
 * buffer must also be initialized there.
 *
 * Note that since you can't push back EOF, any reading of EOF
 * will set the feof() flag on the input fd.  If EOF ends a token,
 * the stream EOF flag will be set but the TT_EOF token itself won't be
 * returned until the *next* call to the scanner.  We check to see
 * if the stream is already at EOF and don't call getc() again.
 */
	Token
scanner(		/* RET: the next Token from the input stream */
	void
){
	State currentstate;	/* the Current State */
	State nextstate;	/* the Next State */
	Token token;		/* token to be built and returned */
	int ch;			/* character read from stream, or EOF */

	/* Check to make sure scanner_init() was called.
	 * Initialize the state of the DFA.
	 * Initialize our return token to garbage; it will be updated.
	 * Clear the scanner lexeme buffer that will hold the lexeme.
	 */
//FIXME-- your code for the above goes here

	/* Loop reading characters until we end up in an accepting state.
	 * Since some tokens may end at EOF, the scanner may be called
	 * when the input is techically in an EOF condition.  Since we
	 * are not allowed to read a stream after EOF, we simply set ch
	 * to be EOF instead of incorrectly calling getc() after EOF.
	 */
	while( ! ACCEPTING(currentstate) ){
		if( feof(scanfd) )
			ch = EOF;	/* can't use getc() after EOF */
		else {
			if( (ch = getc(scanfd)) == '\n' )
				++linecounter;
		}
		nextstate = next_state(currentstate,ch);

		/* Save characters into the lexeme buffer.
		 */
//FIXME-- your code to save characters in the lexeme buffer goes here

		currentstate = nextstate;
	}

	/* NUL terminate the lexeme in the lexeme buffer.
	 * Set the token structure to point to the lexeme in the buffer.
	 */
//FIXME-- your code for the above goes here

	/* Find out in which state we accepted.
	 * If that state requires a push-back, do the push-back by
	 * calling the utility function scanner_ungetc().
	 * Set the token type according to the lexeme we recognized.
	 */
//FIXME-- your code for the above goes here

	/* Make sure we record the line counter *after* doing any
	 * ungetc() push-back that might adjust the counter.
	 */
	token.lineno = linecounter;
#ifdef SCANNER_DEBUG
	printf("<%s>", token_name(token.type)); /* print the token types */
#endif /*SCANNER_DEBUG*/
	return token;		/* returns entire structure */
}