/* PURPOSE:
 *    SCANNER.C: Functions that implement a Lexical Scanner.
 *    scanner_init() must be called before using the scanner().
 *    When done, call scanner_term() to tidy up.
 * HISTORY:
 *    Ian D. Allen   idallen@freenet.carleton.ca
 */

//FIXME-- add system includes here
//FIXME-- add your local includes here

/* Initial character memory size and increment used to collect tokens.
 */
#define TOKEN_INIT 80
#define TOKEN_INCR 20

/* State names for the DFA.
 *
 * States less than ST__ACCEPTING_BEGIN are "going" states; not accepting.
 * States greater than ST__ACCEPTING_BEGIN cause the DFA to stop.
 * Dummy state ST__ACCEPTING_BEGIN is only used by the ACCEPTING() macro;
 * this dummy state divides the states into "not accepting" and "accepting".
 *
 * Accepting states are numbered above 128 so that the accepting state
 * numbers look quite different from the other states.  The other states
 * (non-accepting) must be consecutively numbered from zero so that
 * they may be used as row indexes into a DFA Transition Table.
 *
 * The ST__END state isn't used; it's just there as a place holder
 * for silly compilers that don't want trailing commas inside enums.
 */
typedef enum {
	/* States < ST__ACCEPTING_BEGIN keep the DFA going */

	ST_START=0,	/* start */
	ST_INID,	/* inside identifier */
//FIXME-- add your other non-accepting DFA states here

	/* States > ST__ACCEPTING_BEGIN cause the DFA to accept and stop */
#define ACCEPTING(s) ((s) > ST__ACCEPTING_BEGIN)
	ST__ACCEPTING_BEGIN=128,

	ST_ERROR,	/* a DFA error (unknown transition) */
	ST_GOTEOF,	/* got EOF */
	ST_GOTID,	/* got identifier */
//FIXME-- add your other accepting DFA states here

	ST__END		/* compiler syntax: dummy entry without comma */
} State;


/* Here is the static information private to the scanner.
 * All of this data is initialized by scanner_init() so that
 * it can be used by scanner() without having any arguments passed.
 */
//FIXME-- put the rest of the static data initialized by scanner_init here
static long linecounter = -999;		/* initialize counter with garbage */

/* The English names of the TokenType enum, and the function
 * that turns the enum into the English name.  The validity check
 * helps detect cases where we add to the TokenType and forget to
 * update the tokenName list here as well.
 */
static char *tokenName[] = {
	"Unknown",	/* each of these must match, in order, a TokenType */
	"Error",
	"EOF",
	"Junk",
	"Identifier",
	"UnsignedInt"
};
	char *
token_name(		/* RET: English description of token */
	TokenType token	/* IN: token number from Scanner */
){
	/* validity check on names vs. TokenType enum */
	if( NEL(tokenName) != TT_DIMENSION )
		eprintf("Number of lines in tokenName %d; expecting %d",
			NEL(tokenName), TT_DIMENSION);
	if( token < 0 || token >= NEL(tokenName) )
		eprintf("Unknown token name for token %d; max is %d",
			token, NEL(tokenName));
	return tokenName[token];
}

/* Put the character back in the input stream.
 * If it was a newline, decrement the line count, because we
 * will count the newline again next time we read it.
 */
	static void
scanner_ungetc(
	int ch,		/* IN: character to put back */
	FILE *fd	/* IN: stream to receive ch */
){
//FIXME-- your code for this function goes here
}

/* Initialize the scanner:
 * Do some checking to prevent duplicate or incorrect calls to this function.
 * Remember the input stream fd and input stream file name.
 * Allocate a lexeme buffer for the scanner.
 * Start counting lines at line one.
 * Do a dummy call of token_name() to make sure it works.
 */
	void
scanner_init(
	FILE *fd,	/* IN: open file descriptor to read */
	char *fname	/* IN: name of open file */
){
//FIXME-- your code for this function goes here
	linecounter = 1;

	printf( "INIT: The Scanner defines %d token types\n", TT_DIMENSION );
	(void) token_name(0);	/* test validity of token names */
}

/* Terminate the scanner: Undo scanner_init()
 * Do some checking to prevent duplicate or incorrect calls to this function.
 * NULL out the file fd and file name.
 * Free the scanner lexeme buffer (the one allocated by scanner_init).
 * Check to see if the input stream has been read completely (is at EOF);
 * print a warning if not true.
 * Put garbage in the line counter.
 */
	void
scanner_term(void){
//FIXME-- your code for this function goes here

	printf( "TERM: The Scanner terminated at line %ld\n", linecounter );
	linecounter = -999;	/* set counter back to garbage */
}

/* next_state: Given the current state and an input symbol
 * (which will be a character or EOF) return the next state.
 * The Transition Table of the DFA gives the design of this function.
 *
 * Make sure that EOF always causes a transition to an accepting
 * state, or the scanner will loop reading the same EOF over and over.
 */
	static State
next_state(		/* RET: the next DFA state */
	State current,	/* IN: the current DFA state */
	int ch		/* IN: the character just read, or EOF */
){
//FIXME-- your code for this function goes here
//FIXME-- typical implementations use a two-dimensional array of State's
}

/* The Scanner:
 * Implement a Deterministic Finite Automaton.
 * Move from one state to another depending on the current state
 * and the input symbol (a character read from the stream, or EOF).
 *
 * The stream descriptor and file name from which we are reading
 * must be set by scanner_init() before we are called.  A lexeme
 * buffer must also be initialized there.
 *
 * Note that since you can't push back EOF, any reading of EOF
 * will set the feof() flag on the input fd.  If EOF ends a token,
 * the stream EOF flag will be set but the TT_EOF token itself won't be
 * returned until the *next* call to the scanner.  We check to see
 * if the stream is already at EOF and don't call getc() again.
 */
	Token
scanner(		/* RET: the next Token from the input stream */
	void
){
	State currentstate;	/* the Current State */
	State nextstate;	/* the Next State */
	Token token;		/* token to be built and returned */
	int ch;			/* character read from stream, or EOF */

	/* Check to make sure scanner_init() was called.
	 * Initialize the state of the DFA.
	 * Initialize our return token to garbage; it will be updated.
	 * Clear the scanner lexeme buffer that will hold the lexeme.
	 */
//FIXME-- your code for the above goes here

	/* Loop reading characters until we end up in an accepting state.
	 * Since some tokens may end at EOF, the scanner may be called
	 * when the input is techically in an EOF condition.  Since we
	 * are not allowed to read a stream after EOF, we simply set ch
	 * to be EOF instead of calling getc().
	 */
	while( ! ACCEPTING(currentstate) ){
		if( feof(scanfd) )
			ch = EOF;	/* can't use getc() after EOF */
		else {
			if( (ch = getc(scanfd)) == '\n' )
				++linecounter;
		}
		nextstate = next_state(currentstate,ch);

		/* Save characters into the lexeme buffer.
		 */
//FIXME-- your code to save characters in the lexeme buffer goes here

		currentstate = nextstate;
	}

	/* NUL terminate the lexeme in the lexeme buffer.
	 * Set the token structure to point to the lexeme in the buffer.
	 */
//FIXME-- your code for the above goes here

	/* Find out in which state we accepted.
	 * If that state requires a push-back, do the push-back.
	 * Set the token type according to the lexeme we recognized.
	 */
//FIXME-- your code for the above goes here

	/* Make sure we record the line counter *after* doing any
	 * ungetc() push-back that might adjust the counter.
	 */
	token.lineno = linecounter;
	return token;		/* returns entire structure */
}