/*****************************************************
 * PROJECT:       BBB interpreter
 * ORGANIZATION:  Microfluffy Corp.
 * LANGUAGE:      any ANSI C++ complaint
 * FILE:          scan.cpp
 * DESCRIPTION:   This file implements the scanner
 * VERSION:       1.0
 *****************************************************/

#include <string.h>
#include <ctype.h>
#include "bbb.h"


/* this is used by the character table */
typedef enum {
  C_LETTER, C_DIGIT, C_QUOTE, C_SPECIAL, C_WHITESPACE, C_EOL
} CharCode;

/*globals that the scanner uses*/
char  ch;			/*the next character to be used*/
char  buffer[256];		/*the line buffer*/
int   buflen;			/*length of the current line*/
int   bufofs;			/*current position in the current line*/

/*the character table contains information used by the
  scanner to help it do its job.  It assigns a code to each unsigned
  character marking what kind of a character it is (ie,
  letter, digit, punctuation, etc.).  */
CharCode chartable[127];

/* Initialize the scanner */
void InitScanner(void)
{
  char c;

  /* initialize character table */
  for (c = 0; c < 127; c++)
    chartable[c] = C_SPECIAL;
  for (c = '0'; c <= '9'; c++)
    chartable[c] = C_DIGIT;
  for (c = 'A'; c <= 'Z'; c++)
    chartable[c] = C_LETTER;
  for (c = 'a'; c <= 'z'; c++)
    chartable[c] = C_LETTER;

  chartable['"']   = C_QUOTE;
  chartable[10]    = C_EOL;	/* CR and LF are end of line */
  chartable[13]    = C_EOL;
  chartable[0]	   = C_EOL;
    
  chartable[' ']   = C_WHITESPACE;
  chartable['\t']  = C_WHITESPACE;   /*tabs are whitespace*/

  /* initialize the scanner variables */
  buffer[0] = '\0';
  buflen = 0;
  bufofs = 0;
  ch = '\0';
}

/* gets the next character from the string.  If no more characters
    available, it returns an EOL character.  The character is placed
    in the ch variable.                                              */
void GetChar(void)
{
  bufofs++;
  if (bufofs >= buflen)
    ch = '\n';
  else
    ch = buffer[bufofs];
}

/* skips over any whitespace coming up next in the string.  if there
    is a comment, it is treated as the end of the line.              */
void SkipWhiteSpace(void)
{
  while ((chartable[ch] == C_WHITESPACE) || (ch == '\'')) {
    if (ch == '\'') {  /* beginning of comment marks the end of the line */
      ch = '\n';
      bufofs = buflen;
    }
    else GetChar();		/*else skip this character*/
  } 
} 

/* converts a string to all lowercase */
void ToLower(char* Word)
{
  int i;
  for (i = 0; i < strlen(Word); i++)
    Word[i] = tolower(Word[i]);
}

/* returns true if the word given is a reserved word, false otherwise.
    also sets the Token variable to the appropriate token.
    note: word should be all lowercase before calling this function. */
boolean IsReservedWord(char *Word)
{
  if (strcmp(Word, "abs") == 0) Token = T_ABS;
  else if (strcmp(Word, "and") == 0)    Token = T_AND;
  else if (strcmp(Word, "end") == 0)    Token = T_END;
  else if (strcmp(Word, "for") == 0)    Token = T_FOR;
  else if (strcmp(Word, "gosub") == 0)  Token = T_GOSUB;
  else if (strcmp(Word, "goto") == 0)   Token = T_GOTO;
  else if (strcmp(Word, "if") == 0)     Token = T_IF;
  else if (strcmp(Word, "input") == 0)  Token = T_INPUT;
  else if (strcmp(Word, "len") == 0)    Token = T_LEN;
  else if (strcmp(Word, "let") == 0)    Token = T_LET;
  else if (strcmp(Word, "mod") == 0)    Token = T_MOD;
  else if (strcmp(Word, "next") == 0)   Token = T_NEXT;
  else if (strcmp(Word, "not") == 0)    Token = T_NOT;
  else if (strcmp(Word, "or") == 0)     Token = T_OR;
  else if (strcmp(Word, "print") == 0)  Token = T_PRINT;
  else if (strcmp(Word, "return") == 0) Token = T_RETURN;
  else if (strcmp(Word, "step") == 0)   Token = T_STEP;
  else if (strcmp(Word, "str") == 0)    Token = T_STR;
  else if (strcmp(Word, "to") == 0)     Token = T_TO;
  else if (strcmp(Word, "val") == 0)    Token = T_VAL;
  else return false;
  return true;
}

/* gets a reserved word or a variable name from the string.
    words must start with a letter, but may include letters and
    digits.  If the word is a variable name, it may end with a
    '$' character.

    if it is a variable name and there are no errors, Token is set
    to T_VAR, and StrValue is set to the name of the variable.
    if it is a reserved word, Token is set to the appropriate value. */
void GetWord(void)
{
  int i;

  i = 0;
  /*words may be letters or digits*/
  while ((chartable[ch] == C_LETTER) || (chartable[ch] == C_DIGIT)) {
    StrValue[i] = ch;
    i++;
    GetChar();
  }

  /*words may end with '$'*/
  if (ch == '$') {
    StrValue[i] = ch;
    i++;
    GetChar();
  }

  StrValue[i] = '\0';

  ToLower(StrValue);
  if (!IsReservedWord(StrValue))
    Token = T_VAR;
}

/* gets a number from the string. Token is set to T_NUM, and
    IntValue contains the value of the number.                 */
void GetNumber(void)
{
  IntValue = 0;
  while (chartable[ch] == C_DIGIT) {
    IntValue = 10 * IntValue + (ch - '0');
    GetChar();
  }
  Token = T_NUM;
}

/* reads in a string from the input and places it in StrValue.
    note: the quote characters are not included in StrValue.
	  you cannot have a '"' character inside the string.    */
void GetString(void)
{
  int i;

  i = 0;
  GetChar();			/*step past open quote*/
  while ((ch != '"') && (chartable[ch] != C_EOL)) {
    StrValue[i] = ch;
    i++;
    GetChar();
  }
  if (ch != '"') {		/*unterminated string*/
    Token = T_ERROR;
    return;
  }
  StrValue[i] = '\0';		/* mark the end of the string */

  Token = T_STRING;		/*set the token value*/
  GetChar();			/*step past close quote*/
} 

/* gets a special character or special sequence of characters
    (such as '<>').  sets Token appropriately.                  */
void GetSpecial(void)
{
  switch(ch) {
  case ';': Token = T_SEMI;   GetChar(); break;
  case ',': Token = T_COMMA;  GetChar(); break;
  case ':': Token = T_COLON;  GetChar(); break;
  case '(': Token = T_LPAREN; GetChar(); break;
  case ')': Token = T_RPAREN; GetChar(); break;
  case '+': Token = T_PLUS;   GetChar(); break;
  case '-': Token = T_MINUS;  GetChar(); break;
  case '*': Token = T_STAR;   GetChar(); break;
  case '/': Token = T_SLASH;  GetChar(); break;
  case '=': Token = T_EQ;     GetChar(); break;
  case '>': /* > >= */
    GetChar();
    if (ch == '=') {
      Token = T_GE;
      GetChar();
    }
    else Token = T_GT;
    break;
  case '<' : /* < <= <> */
    GetChar();
    if (ch == '=') {
      Token = T_LE;
      GetChar();
    }
    else if (ch == '>') {
      Token = T_NEQ;
      GetChar();
    }
    else Token = T_LS;
    break;
  default:
    break;
  }
} 

/* Get the next token on the current line */
void NextToken()
{
  SkipWhiteSpace();
  switch(chartable[ch]) {
  case C_LETTER : GetWord(); break;       /*words must start with a letter*/
  case C_DIGIT  : GetNumber(); break;
  case C_QUOTE  : GetString(); break;
  case C_EOL    : Token = T_EOL; break;
  default:
    GetSpecial();
    break;
  }
} 

/* start scanning a new line */
void StartLine (char* Line)
{
  strcpy(buffer, Line);
  buflen = strlen(buffer);
  ch = Line[0];
  bufofs = 0;
  NextToken();
}


