/* PURPOSE:
 *    SCANNER.C: Functions that implement a Lexical Scanner.
 *    scanner_init() must be called before using the scanner().
 *    When done, call scanner_term() to tidy up.
 * HISTORY:
 *    Ian D. Allen   idallen@freenet.carleton.ca
 */

//FIXME-- add system includes here
//FIXME-- add your local includes here

/* Initial memory size and increment for buffer used to collect lexemes.
 */
#define TOKEN_INIT 10
#define TOKEN_INCR 10

/* TYPEDEF: State
 * PURPOSE: State names for the DFA.  Private to the Scanner.
 *
 * States less than ST__ACCEPTING_BEGIN are "going" states; not accepting.
 * States greater than ST__ACCEPTING_BEGIN cause the DFA to stop.
 * Dummy state ST__ACCEPTING_BEGIN is only used by the ACCEPTING() macro;
 * this dummy state divides the states into "not accepting" and "accepting".
 *
 * Accepting states are numbered above 128 so that the accepting state
 * numbers look quite different from the other states.  The other states
 * (non-accepting) must be consecutively numbered from zero so that
 * they may be used directly as indexes into a 2-D DFA Transition Table.
 *
 * The ST__END state isn't used; it's just there as a place holder
 * for silly compilers that don't want trailing commas inside enums.
 */
typedef enum {
	/* States < ST__ACCEPTING_BEGIN keep the DFA going.
	 * These states must match EXACTLY with the corresponding
	 * slices of the 2-D DFA Transition Table used by next_state().
	 */
	ST_START=0,	/* start */
	ST_INID=1,	/* inside identifier */
//FIXME-- carefully add your other non-accepting DFA states here

	/* States > ST__ACCEPTING_BEGIN cause the DFA to accept and stop.
	 * These states don't have to be in any order; but, each
	 * accepting state needs to be handled by the scanner() function.
	 */
#define ACCEPTING(s) ((s) > ST__ACCEPTING_BEGIN)
	ST__ACCEPTING_BEGIN=128,

	ST_ERROR,	/* a DFA error (unknown transition) */
	ST_GOTEOF,	/* got EOF */
	ST_GOTID,	/* got identifier */
//FIXME-- add your new accepting DFA states here, in any order

	ST__END		/* for compiler syntax: dummy entry without comma */
} State;


/* DEFINITIONS: static data, global to this scanner source file
 * PURPOSE:
 *    Here is the static information private to the scanner.
 *    All of this data is initialized by scanner_init() so that
 *    it can be used by scanner() without having any arguments passed.
 */
static FILE *scanfd = NULL;	/* the open input stream */
static char *scanfnam = NULL;	/* name of open input stream */
static Buffer *scanbuf = NULL;	/* lexeme buffer */
static long linecounter = -999;	/* initialize line counter with garbage */

/* FUNCTION: token_name
 * PURPOSE:
 *    Here are the English names of the TokenType enum, and the function
 *    that turns the enum into the English name.  This list must match
 *    exactly the TokenType enum in scanner.h.
 *
 *    The validity check in the function helps detect cases where we add
 *    to the TokenType and forget to update the tokenName list here as well.
 */
static char *tokenName[] = {
	"Unknown",	/* each of these must match, in order, a TokenType */
	"Error",
	"EOF",
	"Junk",
	"Identifier",
	"Unsigned Integer",
//FIXME-- add the strings that match your new Token types here
};
	char *
token_name(		/* RET: English description of token */
	TokenType token	/* IN: token number from Scanner */
){
	/* run-time validity check on names vs. TokenType enum */
	if( NEL(tokenName) != TT_DIMENSION )
		eprintf("Number of lines in tokenName %d; expecting %d",
			NEL(tokenName), TT_DIMENSION);
	if( token < 0 || token >= NEL(tokenName) ){
		eprintf("Unknown token name for token %d; max is %d",
			token, NEL(tokenName));
		token = 0;	/* a safe index number */
	}
	return tokenName[token];
}

/* FUNCTION: scanner_ungetc
 * PURPOSE:
 *    Put the character back in the input stream using stdio's ungetc().
 *    If it was a newline, decrement the line count, because we
 *    will count the newline again next time we read it.
 *    Ignore the character if it is the EOF constant, since
 *    you can't push back an EOF.
 */
	static void
scanner_ungetc(
	int ch,		/* IN: character to put back */
	FILE *fd	/* IN: stream to receive ch */
){
//FIXME-- your code for this function goes here
}

/* FUNCTION: scanner_init
 * PURPOSE:
 *    Initialize the scanner:
 *    Do checking to prevent duplicate or incorrect calls to this function.
 *    Record a copy of the input stream file descriptor and the input stream
 *    file name pointer in the static storage in this file.
 *    Allocate a lexeme buffer for the scanner; the pointer to the
 *    buffer is also recorded in the static storage in this file.
 *    Start counting lines at line one.
 *    Do a dummy call of token_name() to make sure it works.
 */
	void
scanner_init(
	FILE *fd,	/* IN: open file descriptor to read */
	char *fname	/* IN: name of open file */
){
//FIXME-- your code for this function goes here
	linecounter = 1;

	printf( "scanner_init: The Scanner defines %d token types\n",
		TT_DIMENSION );
	printf( "scanner_init: Reading from '%s'.\n", scanfnam);
	(void) token_name(0);	/* test validity of token names */
}

/* FUNCTION: scanner_term
 * PURPOSE:
 *    Terminate the scanner: Undo scanner_init().
 *    Do checking to prevent duplicate or incorrect calls to this function.
 *    Put NULL into the the static copies of the file descriptor and file name.
 *    Free the scanner lexeme buffer (the one allocated by scanner_init)
 *    and set its static pointer to NULL.
 *    Check to see if the input stream has been read completely (is at EOF);
 *    print a warning if not true.
 *    Put garbage in the line counter.
 */
	void
scanner_term(void){
	printf( "scanner_term: The Scanner terminated in '%s' at line %ld\n",
		scanfnam, linecounter );

//FIXME-- your code for this function goes here

	linecounter = -999;	/* set counter back to garbage */
}

/* FUNCTION: next_state
 * PURPOSE:
 *    Given the current state and an input symbol
 *    (which will be a character or EOF) return the next state.
 *
 *    The Transition Table of the DFA gives the design of this function.
 *    We program the table as a two-dimensional array 'table[][]' here.
 *   
 *    Since we have more character classes than we have states, the
 *    table looks best on paper if rows are classes and columns are states.
 *   
 *    The table is indexed using a character class (row) and
 *    a current state (column).  The first part of the next_state function
 *    turns a character into a row index (character class) for the table.
 *   
 *    The order of the columns (current state) must exactly match the
 *    order of non-accepting states in the State enum, since those state
 *    values are used to index the columns of this array directly.
 *   
 *    Make sure that EOF always causes a transition to an accepting
 *    state, or the scanner may loop reading the same EOF over and over.
 */
	static State
table[?][?] = {
//FIXME-- Your transition table goes here.
//FIXME-- Label the rows and columns with comments, so that you
//FIXME-- can read the table.
};
	static State
next_state(		/* RET: the next DFA state */
	State current,	/* IN: the current DFA state */
	int ch		/* IN: the character just read, or EOF */
){
	unsigned int row = 999;	/* char class row index into above table */

//FIXME-- the code to turn the incoming character into a row index goes here

	/* Make sure the indicies into the table are valid, to
	 * catch programming errors that index outside the table.
	 */
	if( row >= NEL(table) || current < 0 || current >= NEL(table[0]) ){
		eprintf("Error: Invalid table index: row %d current %d\n",
			row, current);
		abort();
		exit( EXIT_FAILURE );
		/*NOTREACHED*/
	}
	return table[row][current];   /* chars are rows; states are columns */
}

/* FUNCTION: scanner
 * PURPOSE:
 *    The Scanner: Implement a Deterministic Finite Automaton.
 *
 *    Move from one state to another depending on the current state
 *    and the input symbol (a character read from the stream, or EOF).
 *    Save some of the characters read in a lexeme buffer.
 *    Return the token found.
 *   
 *    The stream descriptor and file name from which we are reading
 *    must be set by scanner_init() before scanner() is called.
 *    A buffer for the lexeme must also be initialized there.
 *   
 *    Note that since you can't push back EOF, any reading of EOF
 *    will set the feof() flag on the input fd.  If EOF ends a token,
 *    the stream EOF flag will be set but the TT_EOF token itself won't be
 *    returned until the *next* call to the scanner.  We check to see
 *    if the stream is already at EOF and don't call getc() again.
 */
	Token
scanner(		/* RET: the next Token from the input stream */
	void
){
	State currentstate;	/* the Current State */
	State nextstate;	/* the Next State */
	Token token;		/* token to be built and returned */
	int ch;			/* character read from stream, or EOF */

	/* Check to make sure scanner_init() was called.
	 * Initialize the state of the DFA.
	 * Initialize our return token to garbage; it will be updated.
	 * Clear the scanner lexeme buffer that will hold the lexeme.
	 */
//FIXME-- your code for the above goes here

	/* Loop reading characters until we end up in an accepting state.
	 * Since some tokens may end at EOF, the scanner may be called
	 * when the input is technically in an EOF condition.  Since we
	 * are not allowed to read a stream after EOF, we simply set ch
	 * to be EOF instead of incorrectly calling getc() after EOF.
	 */
	while( ! ACCEPTING(currentstate) ){
		if( feof(scanfd) )
			ch = EOF;	/* can't use getc() after EOF */
		else {
			if( (ch = getc(scanfd)) == '\n' )
				++linecounter;
		}
		nextstate = next_state(currentstate,ch);

		/* Save some of the characters into the lexeme buffer.
		 * The characters saved are selected based on the DFA.
		 * Looking at the DFA, we note which state transitions
		 * require us to save characters, and code the simplest
		 * logic that saves chars only for those state transitions.
		 */
//FIXME-- Your code to save some characters in the lexeme buffer goes here.
//FIXME-- Save only the characters that label desired DFA state transitions.

		currentstate = nextstate;
	}

	/* ASCII NUL terminate the lexeme in the lexeme buffer.
	 * Set the token structure to point to the lexeme in the buffer.
	 */
//FIXME-- your code for the above goes here

	/* Find out in which state (currentstate) we accepted.
	 * If that state requires a push-back, do the push-back by
	 * calling the utility function scanner_ungetc().
	 * For each accepting state, set the returned token type
	 * according to the lexeme we just recognized.
	 * Print an error message if we end up in an accepting state
	 * that isn't handled here -- this helps catch programming errors.
	 */
	switch( currentstate ){
//FIXME-- Your code for the above goes here.
//FIXME-- You will have one case for every accepting state, plus a default.
	default:
		/* Will never see this unless DFA is programmed incorrectly */
		eprintf("Scanner: file '%s' line %ld: Unexpected state %d",
			scanfnam, linecounter, currentstate);
		abort();
		/*NOTREACHED*/
	}

	/* Make sure we record the line counter *after* doing any
	 * ungetc() push-back that might adjust the counter.
	 */
	token.lineno = linecounter;

#ifdef SCANNER_DEBUG
	/* DEBUG: print the token type, lineno, and the lexeme */
	eprintf("<%s,%ld,'%s'>",
		token_name(token.type), token.lineno, token.lexeme);
#endif /*SCANNER_DEBUG*/

	return token;		/* returns entire structure */
}

