(*****************************************************}
{ * PROJECT:       BBB interpreter}
{ * ORGANIZATION:  Microfluffy Corp.}
{ * LANGUAGE:      Turbo Pascal}
{ * FILE:          scan.pas}
{ * DESCRIPTION:   This file implements the scanner}
{ * VERSION:       1.0}
{ *****************************************************)

unit Scan;

interface

{ some global constants }

	const

		MAX_PROG = 200;       { maximum number of lines in the program }

		MAX_LINE = 80;       { maximum number of characters per line  }

		MAX_VAR = 15;       { maximum length of a variable name      }

		MAX_STRVARLEN = 127;       { maximum length of a string variable    }

		MAX_STRVAR = 50;       { maximum number of string variables     }

		MAX_INTVAR = 200;       { maximum number of integer variables    }

		MAX_STACK = 200;       { maximum number of subroutine calls     }



		NEXT_LINE = 0;         { code returned by parser to indicate we}
{}
{			       should go to the next line. }



  { error messages }

		e_syntax = 'Syntax error';

		e_badline = 'Undefined line number';

		e_divzero = 'Division by zero';

		e_type = 'Type mismatch';

		e_nomem = 'Out of memory';

		e_varlen = 'Variable too long';



{ scanner and parser data types }

	type

		LineStr = string[MAX_LINE];

		VarStr = string[MAX_VAR];

		StrVarStr = string[MAX_STRVARLEN];



  { an enumeration that represents all the different symbols that can}
{}
{    be in a BBB program. }

		TokenType = (

			T_VAR, T_NUM, T_STRING, 



			T_EQ, T_NEQ, T_GT, T_LS, T_GE, T_LE, 

			T_PLUS, T_MINUS, T_STAR, T_SLASH, 

			T_LPAREN, T_RPAREN, 

			T_SEMI, T_COMMA, T_COLON, 



			T_ABS, T_AND, T_ELSE, T_END, T_FOR, T_GOSUB, T_GOTO, T_IF, 

			T_INPUT, T_LEN, T_LET, T_MOD, T_NEXT, T_NOT, T_OR, T_PRINT, 

			T_RETURN, T_STEP, T_STR, T_THEN, T_TO, T_VAL, T_WEND, T_WHILE, 



			T_EOL, T_ERROR

			);





	var

		Token: TokenType;  { current token }

		IntValue: integer; { if current token is T_NUM, this holds its value }

		StrValue: string;  { if current token is T_STRING, this holds its value }



{ Call this procedure at the beginning of the program to initialize}
{}
{  the data structures that the scanner will use.                       }

	procedure InitScanner;



{ Call this procedure at the beginning of each line to be scanned to}
{}
{  reinitialize the scanner for that line.  Pass it a string containing}
{}
{  the line.  It will call NextToken to get the first token.            }

	procedure StartLine (Line: LineStr);



{ This procedure will get the next token in the current string.}
{}
{  It places the value of the token in the Token global variable, and}
{}
{  also sets the IntValue and StrValue variables as appropriate.        }

	procedure NextToken;









implementation



	type

  {this is used by the character table}

		CharCode = (C_LETTER, C_DIGIT, C_QUOTE, C_SPECIAL, C_WHITESPACE, C_EOL);



{globals that the scanner uses}

	var

		ch: char;             {the next character to be used}

		buffer: LineStr;          {the line buffer}

		buflen: integer;          {length of the current line}

		bufofs: integer;          {current position in the current line}



  {the character table contains information used by the scanner to help}
{}
{   it do its job.  It assigns a code to each character marking what}
{}
{   kind of a character it is (ie, letter, digit, punctuation, etc.).  }

		chartable: array[char] of CharCode;



{ Initialize the scanner }

	procedure InitScanner;

		var

			c: CHAR;

	begin {InitScanner}

  { initialize character table }

		for c := Chr(0) to Chr(127) do

			chartable[c] := C_SPECIAL;

		for c := '0' to '9' do

			chartable[c] := C_DIGIT;

		for c := 'A' to 'Z' do

			chartable[c] := C_LETTER;

		for c := 'a' to 'z' do

			chartable[c] := C_LETTER;

		chartable['"'] := C_QUOTE;

		chartable[Chr(13)] := C_EOL;         {CR and LF are end of line}

		chartable[Chr(10)] := C_EOL;

		chartable[' '] := C_WHITESPACE;

		chartable[Chr(9)] := C_WHITESPACE;   {tabs are whitespace}



  { initialize the scanner variables }

		buffer := '';

		buflen := 0;

		bufofs := 0;

		ch := Chr(0);

	end;  {InitScanner}



{ Get the next token on the current line }

	procedure NextToken;



  { gets the next character from the string.  If no more characters}
{}
{    available, it returns an EOL character.  The character is placed}
{}
{    in the ch variable.                                              }

		procedure GetChar;

		begin {GetChar}

			bufofs := bufofs + 1;

			if bufofs > buflen then


				ch := Chr(13)

			else

				ch := buffer[bufofs];

		end;  {GetChar}



  { skips over any whitespace coming up next in the string.  if there}
{}
{    is a comment, it is treated as the end of the line.              }

		procedure SkipWhiteSpace;

		begin {SkipWhiteSpace}

			while (chartable[ch] = C_WHITESPACE) or (ch = '''') do

				begin

					if ch = '''' then     {beginning of comment marks the end of the line}


						begin

							ch := Chr(13);

							bufofs := buflen;

						end

					else

						GetChar;      {else skip this character}

				end; {while}

		end;  {SkipWhiteSpace}



  { converts a string to all lowercase }

		procedure ToLower (var Word: string);

			var

				i: integer;

		begin {ToLower}

			for i := 1 to Length(Word) do

				begin

					if Word[i] in ['A'..'Z'] then


						Word[i] := Chr(Ord(Word[i]) + (Ord('a') - Ord('A')));

				end; {for}

		end;  {ToLower}



  { returns true if the word given is a reserved word, false otherwise.}
{}
{    also sets the Token variable to the appropriate token.}
{}
{    note: word should be all lowercase before calling this function.    }

		function IsReservedWord (var Word: string): boolean;

		begin {IsReservedWord}

			if Word = 'abs' then
				begin

					IsReservedWord := true;
					Token := T_ABS;
				end

			else if Word = 'and' then
				begin

					IsReservedWord := true;
					Token := T_AND;
				end

			else if Word = 'end' then
				begin

					IsReservedWord := true;
					Token := T_END;
				end

			else if Word = 'for' then
				begin

					IsReservedWord := true;
					Token := T_FOR;
				end

			else if Word = 'gosub' then
				begin

					IsReservedWord := true;
					Token := T_GOSUB;
				end

			else if Word = 'goto' then
				begin

					IsReservedWord := true;
					Token := T_GOTO;
				end

			else if Word = 'if' then
				begin

					IsReservedWord := true;
					Token := T_IF;
				end

			else if Word = 'input' then
				begin

					IsReservedWord := true;
					Token := T_INPUT;
				end

			else if Word = 'len' then
				begin

					IsReservedWord := true;
					Token := T_LEN;
				end

			else if Word = 'let' then
				begin

					IsReservedWord := true;
					Token := T_LET;
				end

			else if Word = 'mod' then
				begin

					IsReservedWord := true;
					Token := T_MOD;
				end

			else if Word = 'next' then
				begin

					IsReservedWord := true;
					Token := T_NEXT;
				end

			else if Word = 'not' then
				begin

					IsReservedWord := true;
					Token := T_NOT;
				end

			else if Word = 'or' then
				begin

					IsReservedWord := true;
					Token := T_OR;
				end

			else if Word = 'print' then
				begin

					IsReservedWord := true;
					Token := T_PRINT;
				end

			else if Word = 'return' then
				begin

					IsReservedWord := true;
					Token := T_RETURN;
				end

			else if Word = 'step' then
				begin

					IsReservedWord := true;
					Token := T_STEP;
				end

			else if Word = 'str' then
				begin

					IsReservedWord := true;
					Token := T_STR;
				end

			else if Word = 'to' then
				begin

					IsReservedWord := true;
					Token := T_TO;
				end

			else if Word = 'val' then
				begin

					IsReservedWord := true;
					Token := T_VAL;
				end

			else

				IsReservedWord := false;

		end;  {IsReservedWord}



  { gets a reserved word or a variable name from the string.}
{}
{    words must start with a letter, but may include letters and}
{}
{    digits.  If the word is a variable name, it may end with a}
{}
{    '$' character.}
{}
{}
{}
{    if it is a variable name and there are no errors, Token is set}
{}
{    to T_VAR, and StrValue is set to the name of the variable.}
{}
{    if it is a reserved word, Token is set to the appropriate value. }

		procedure GetWord;

			var

				i: integer;

		begin {GetWord}

			i := 1;

    {words may be letters or digits}

			while (chartable[ch] = C_LETTER) or (chartable[ch] = C_DIGIT) do

				begin

					StrValue[i] := ch;

					i := i + 1;

					GetChar;

				end; {while}

    {words may end with '$'}

			if ch = '$' then


				begin

					StrValue[i] := ch;

					i := i + 1;

					GetChar;

				end;

			StrValue[0] := Chr(i - 1);

			ToLower(StrValue);

			if not IsReservedWord(StrValue) then


				Token := T_VAR;

		end;  {GetWord}



  { gets a number from the string. Token is set to T_NUM, and}
{}
{    IntValue contains the value of the number.                 }

		procedure GetNumber;

		begin {GetNumber}

			IntValue := 0;

			while chartable[ch] = C_DIGIT do

				begin

					IntValue := 10 * IntValue + (Ord(ch) - Ord('0'));

					GetChar;

				end;

			Token := T_NUM;

		end;  {GetNumber}



  { reads in a string from the input and places it in StrValue.}
{}
{    note: the quote characters are not included in StrValue.}
{}
{          you cannot have a '"' character inside the string.    }

		procedure GetString;

			var

				i: integer;

		begin {GetString}

			i := 1;

			GetChar;              {step past open quote}

			while (ch <> '"') and (chartable[ch] <> C_EOL) do

				begin

					StrValue[i] := ch;

					i := i + 1;

					GetChar;

				end; {while}

			if ch <> '"' then


				begin               {unterminated string}

					Token := T_ERROR;

				end
			else
				begin
					StrValue[0] := Chr(i - 1); {set length of the string}

					Token := T_STRING;    {set the token value}

					GetChar;              {step past close quote}
				end
		end; {GetString}



  { gets a special character or special sequence of characters}
{}
{    (such as '<>').  sets Token appropriately.                  }

		procedure GetSpecial;

		begin {GetSpecial}

			case ch of

				';': 
					begin
						Token := T_SEMI;
						GetChar;
					end;

				',': 
					begin
						Token := T_COMMA;
						GetChar;
					end;

				':': 
					begin
						Token := T_COLON;
						GetChar;
					end;

				'(': 
					begin
						Token := T_LPAREN;
						GetChar;
					end;

				')': 
					begin
						Token := T_RPAREN;
						GetChar;
					end;

				'+': 
					begin
						Token := T_PLUS;
						GetChar;
					end;

				'-': 
					begin
						Token := T_MINUS;
						GetChar;
					end;

				'*': 
					begin
						Token := T_STAR;
						GetChar;
					end;

				'/': 
					begin
						Token := T_SLASH;
						GetChar;
					end;

				'=': 
					begin
						Token := T_EQ;
						GetChar;
					end;

				'>': { > >= }

					begin

						GetChar;

						if ch = '=' then


							begin

								Token := T_GE;

								GetChar;

							end

						else

							Token := T_GT;

					end;

				'<': { < <= <> }

					begin

						GetChar;

						if ch = '=' then


							begin

								Token := T_LE;

								GetChar;

							end

						else if ch = '>' then


							begin

								Token := T_NEQ;

								GetChar;

							end

						else

							Token := T_LS;

					end;

			end; {case}

		end;  {GetSpecial}



	begin {NextToken}

		SkipWhiteSpace;

		case chartable[ch] of

			C_LETTER: 
				GetWord;        {words must start with a letter}

			C_DIGIT: 
				GetNumber;

			C_QUOTE: 
				GetString;

			C_EOL: 
				Token := T_EOL;

			C_SPECIAL: 

				GetSpecial;

		end;

	end;  {NextToken}



{ start scanning a new line }

	procedure StartLine (Line: LineStr);

	begin {StartLine}

		buffer := Line;

		buflen := Length(buffer);

		ch := Line[1];

		bufofs := 1;

		NextToken;

	end;  {StartLine}



end.
