//Tetris for the CASM Processor
//Written by: Russ Christensen <rchriste@cs.utah.edu>

#include "../../tools/cr16lib.h"

#define FUNCTION_RETURN  \
	jCond(UC, ret_addr); \
	flushPipeline()

//when speed tuning you will not want to use this macro because it will cause nops() 
//between the two instructions that could be filled with useful instructions
#define FUNCTION_CALL(label)                                    \
	loadLabelAddress(label, r13); /*r13 is a scratch register*/ \
	jal(ret_addr, r13);                                         \
	flushPipeline()

//Some simple macros to make saving registers to memory and loading them
//back easier.  Please remember that these registers are being saved on
//the stack.  Which is a stack!!!  Meaning you can only remove items in the
//reverse order that you place them on.
#define SAVE_VARIABLE(var)                                      \
	store(var,SP);						                        \
	++SP

#define RESTORE_VARIABLE(var)                                   \
	--SP;					                               		\
	load(var, SP)

//returns number = number*10
//uses tmp as scratch space
#define MULT_10(var,tmp)                                         \
	tmp = var + var;    /* tmp = 2*n */                          \
	var = tmp + tmp;    /* n = 4*n   */                          \
	var += var;         /* n = 8*n   */                          \
	var = var + tmp     /* n = 10*n  */

#define MULT_8(var)                        \
	var += var; /* var = 2*var */          \
	var += var; /* var = 4*var */          \
	var += var  /* var = 8*var */

#define MULT_2(var)                             \
	var += var /* var = 2*var */

//returns var = var / 2
#define DIV_2(var)                                               \
	lshi(-1, var) /*var = var / 2 */

#define CR16_FOR(first,second,third,body)        \
	    first;                                   \
		cr16_while(second);                      \
		{                                        \
            body;                                \
			third;                               \
		} cr16_endwhile()

//var = var*amount
//The Console Revolution 16 does not support multiplication in hardware
//so here is a software algorithm to get it done.
//If you know the amount at compile time then use one of the other
//macros that is specialized.
//This is a simple algorithm, I'm sure you could look up something
//much fancier in a book and use that instead.  If I have some time
//to do some heavy optimizing then I will change this.
#define MULT(var, tmp, amount)                              \
        tmp = 0;                                            \
        cr16_while(amount);                                 \
        {                                                   \
			tmp = tmp + var;                                \
			--amount;                                       \
		} cr16_endwhile();                                  \
        var = tmp

namespace {
	bool debug = false; //should debug info be compiled in
}

/************************
 How things are setup with the current color pallet
	const int BLACK = 0;
	const int RED = 1;
	const int BLUE = 2;
	const int GREEN = 3;
*************************/

namespace tetris_constants {
	const int A_BUTTON      = 0x80;
	const int B_BUTTON      = 0x40;
	const int SELECT_BUTTON = 0x20;
	const int START_BUTTON  = 0x10;
	const int LEFT_BUTTON   = 0x08;
	const int RIGHT_BUTTON  = 0x04;
	const int UP_BUTTON     = 0x02;
	const int DOWN_BUTTON   = 0x01;
	const int SLIDE_MASK = 0x03;
	const int FALSE = 0;
	const int TRUE  = 1;
	const int WIDTH = 64;
	const int HEIGHT = 30;
	const int TETRIS_WIDTH = 10;
	const int TETRIS_HEIGHT = 20;
	const int LEFT = 20; //left boarder line
	const int RIGHT = LEFT+TETRIS_WIDTH*2+1; //right boarder line
	const int STARTING_COUNT_MAX = 20;
	const int MIN_COUNT_MAX = 5; //lowest I ever want count_max to be
	const int TOP = 5;
	const int BOTTOM = TOP+TETRIS_HEIGHT; //bottom line
	//Please note that it is absolutly nessessary for the correctness of this
	//program that the BACKGROUND_COLOR be 0 and the FALLEN_PIECE_COLOR be 3
	//I use bitwise operations that depend on this!!!  This is a speed
	//optimization
	//PIECE_COLOR and BOARDER_COLOR can be exchanged.
	const int PIECE_COLOR = 1;
	const int BOARDER_COLOR = 2;
	const int BACKGROUND_COLOR = 0;
	const int FALLEN_PIECE_COLOR = 3;
	const int NES_ADDR = 0xE000; //Memory mapped address of NES Controller
    const int NUMBER_OF_TETRIS_PIECES = 7;
    const int NUMBER_OF_ROTATIONS = 4;
	const int BLOCKS_PER_PIECE = 4;
	 //2 words (x position and y position) for each of the 4 blocks that make up each tetris piece.
	const int SPACE_FOR_EACH_ROTATION = BLOCKS_PER_PIECE * 2;
	const int PIECES_LIST_LENGTH = 35;

	const int SOUND_ENABLE = 0x08;
	const int SOUND_BIT2   = 0x04;
	const int SOUND_BIT1   = 0x02;
	const int SOUND_BIT0   = 0x01;

	const int ADDR_VGA_PIXELS	=	0xe800;
	const int ADDR_VGA_VSYNC	=	0xf800;
	const int ADDR_VGA_FLIP		=	0xf000;
	const int ADDR_NES			=	0xe000;
	const int ADDR_SOUND		=	0xd800;

	cr16_int SP(r15); //stack pointer
	cr16_int ret_addr(r14); //return address for function calls
}

using namespace tetris_constants;

namespace memory_map {
	/***map in the memory address of some global variables***/
	//game_board is an array that contains where fallen tetris peices lay.
	//it would be possible to use one bit per unit rather than one word
	//however memory is not my limitation speed is and this way will give me
	//[slightly] better performance
	const int game_board_start = 0;
	const int game_board_end = TETRIS_WIDTH*TETRIS_HEIGHT+1; //one past the end
	//memory to store data on what each peice looks like.  This memory will be initialized 
	//with constant values in the main function of the game
	const int game_pieces_start = game_board_end;
	const int game_pieces_end = game_pieces_start + NUMBER_OF_TETRIS_PIECES * NUMBER_OF_ROTATIONS * SPACE_FOR_EACH_ROTATION + 1; //one past the end
	const int current_piece = game_pieces_end; //only needs one word
	const int static_y = current_piece + 1;
	const int current_piece_list_position = static_y + 1;
	const int lines_distroyed = current_piece_list_position + 1; //number of lines the player has distroyed in the game
	const int count_max = lines_distroyed + 1;
	const int lines_were_distroyed = count_max + 1;
	const int pieces_list = lines_were_distroyed + 1;
    const int BEGIN_STACK = pieces_list + PIECES_LIST_LENGTH + 1;
}

namespace game_piece {
	//map out the memory addresses of each tetris peice.  The actual memory is mapped in with 
	//game_peices_start to game_peices_end.
	const int square    = memory_map::game_pieces_start;
	const int line      = square    + NUMBER_OF_ROTATIONS * SPACE_FOR_EACH_ROTATION;
	const int left_L    = line      + NUMBER_OF_ROTATIONS * SPACE_FOR_EACH_ROTATION;
	const int right_L   = left_L    + NUMBER_OF_ROTATIONS * SPACE_FOR_EACH_ROTATION;
    //don't know what to call the last peice so I call it foo
	const int left_foo  = right_L   + NUMBER_OF_ROTATIONS * SPACE_FOR_EACH_ROTATION;
	const int right_foo = left_foo  + NUMBER_OF_ROTATIONS * SPACE_FOR_EACH_ROTATION;
	const int mountain   = right_foo + NUMBER_OF_ROTATIONS * SPACE_FOR_EACH_ROTATION;
}

namespace inline_function {
	void flipPage();
}

namespace sound_inline_function {
	void inline sound_effect0() {
		cr16_int addr(r9);
		cr16_int value(r8);
		SAVE_VARIABLE(r8);
		SAVE_VARIABLE(r9);

		addr = tetris_constants::ADDR_SOUND;
		value = 10;
		store(value,addr);
		
		RESTORE_VARIABLE(r9);
		RESTORE_VARIABLE(r8);
	}

	void inline sound_effect1() {
		cr16_int addr(r6);
		cr16_int value(r7);
		SAVE_VARIABLE(r6);
		SAVE_VARIABLE(r7);

		addr = tetris_constants::ADDR_SOUND;
		value = SOUND_BIT0 | SOUND_BIT1 | SOUND_BIT2 | SOUND_ENABLE;
		store(value,addr);

		CR16_FOR(value = 0, value < 10, ++value,
		(
			inline_function::flipPage()
		));

		inline_function::flipPage();

		RESTORE_VARIABLE(r7);
		RESTORE_VARIABLE(r6);
	}

	void inline sound_off() {
		cr16_int addr(r9);
		cr16_int value(r8);
		SAVE_VARIABLE(r8);
		SAVE_VARIABLE(r9);

		addr = ADDR_SOUND;
		load(value,addr);
		value = 0;
		store(value,addr);

		RESTORE_VARIABLE(r9);
		RESTORE_VARIABLE(r8);
	}
}

//Put all the inline functions in a namespace to make the code more readable
//to those who don't know CASM inside out.
namespace inline_function {
	//The following functions are inline functions that are automatically
	//expanded by CASM at compile time.  Most of tetris is inline functions
	//because I get better performance doing this way.  I don't have to pay
	//the [small, but present] speed price of a function call.  And because
	//our Console Revolution 16 processor has no cache the only
	//drawback to making everything inline is code size.  Except so long
	//as the program fits inside of 64k words code size is not an issue either
//----------------------------------------------------------------------
//An inline function that changes the graphics buffer
void flipPage()
{
	cr16_int addr(r8), data(r9);
	
	// wait until Vsync starts
	data = 0;
	addr = 0xf800;
	cr16_while( data==0 );
	{		
		load( data, addr );
		flushPipeline();
	} cr16_endwhile();
	
	// Flip page
	addr = 0xf000;
	data = 0;
	store( data, addr );
	flushPipeline();
	flushPipeline();
	flushPipeline();
	flushPipeline();
	data = 1;
	store( data, addr );
	flushPipeline();	
}

//----------------------------------------------------------------------
//inline function that plots a point
void drawPixel( cr16_int x, cr16_int y, cr16_int color )
{
	cr16_int addr(r8);	
	cr16_int vga_addr(r9);
	vga_addr = ADDR_VGA_PIXELS;
	addr = y+y;				// 2*y
	addr += addr;			// 4*y
	addr += addr;			// 8*y
	vga_addr += x;
	addr += addr;			// 16*y
	addr += addr;			// 32*y
	addr += addr;			// 64*y
	addr += vga_addr;	
	store( color, addr );
}
//----------------------------------------------------------------------
// print content of reg9 (between 0..999)
void printReg9()
{
	SAVE_VARIABLE(r0);
	SAVE_VARIABLE(r1);
	SAVE_VARIABLE(r2);
	SAVE_VARIABLE(r3);
	SAVE_VARIABLE(r4);
	SAVE_VARIABLE(r5);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r8);

	loadLabelAddress( "skipDrawCode", r1 );
	jCond( UC, r1 );
	flushPipeline();

	// Code that draw the digit 0,1,2...9
	ifstream f("numbers.pix");							// read image data of all digits
	vector<int> x[10], y[10];
	float z;
	for( int j=29; j>=0; j-- )
		for( int i=0; i<64; i++ )		
		{
			f >> z;
			if( z<0.5 )
			{
				int which = i/6;				
				assert( which>=0 && which<10 );			// separate them into 10 sets (one for each digit)
				x[which].push_back(i-which*6);
				y[which].push_back(j);				
			}
		}
	
	for( int which=0; which<10; which++ )
	{
		addLabel( "drawDigitLabel" + int2bin(which,16) );
		r0 = ( ADDR_VGA_PIXELS + y[which][0]*64 + x[which][0]) + r4;
		r1 = ( ADDR_VGA_PIXELS + y[which][1]*64 + x[which][1]) + r4;
		r2 = ( ADDR_VGA_PIXELS + y[which][2]*64 + x[which][2]) + r4;		
		r3 = 1;
		unsigned int index = 3, size = x[which].size();
		
		while( index<size )
		{
			store( r3, r0 );
			store( r3, r1 );
			store( r3, r2 );						
			r0 += (y[which][index]*64+x[which][index] - y[which][index-3]*64-x[which][index-3]);  index ++;  if(index>=size) break;			
			r1 += (y[which][index]*64+x[which][index] - y[which][index-3]*64-x[which][index-3]);  index ++;  if(index>=size) break;			
			r2 += (y[which][index]*64+x[which][index] - y[which][index-3]*64-x[which][index-3]);  index ++;  if(index>=size) break;
		}
		store( r3, r0 );
		store( r3, r1 );
		store( r3, r2 );		
		jCond(UC,r6);		// return
		flushPipeline();
	}

	// r0 = digit to draw, r4=address offset
	addLabel("DrawDigit");
	cr16_if( r0==0 );	loadLabelAddress( "drawDigitLabel" + int2bin(0,16), r1 );   cr16_endif();
	cr16_if( r0==1 );	loadLabelAddress( "drawDigitLabel" + int2bin(1,16), r1 );   cr16_endif();
	cr16_if( r0==2 );	loadLabelAddress( "drawDigitLabel" + int2bin(2,16), r1 );   cr16_endif();
	cr16_if( r0==3 );	loadLabelAddress( "drawDigitLabel" + int2bin(3,16), r1 );   cr16_endif();
	cr16_if( r0==4 );	loadLabelAddress( "drawDigitLabel" + int2bin(4,16), r1 );   cr16_endif();
	cr16_if( r0==5 );	loadLabelAddress( "drawDigitLabel" + int2bin(5,16), r1 );   cr16_endif();
	cr16_if( r0==6 );	loadLabelAddress( "drawDigitLabel" + int2bin(6,16), r1 );   cr16_endif();
	cr16_if( r0==7 );	loadLabelAddress( "drawDigitLabel" + int2bin(7,16), r1 );   cr16_endif();
	cr16_if( r0==8 );	loadLabelAddress( "drawDigitLabel" + int2bin(8,16), r1 );   cr16_endif();
	cr16_if( r0==9 );	loadLabelAddress( "drawDigitLabel" + int2bin(9,16), r1 );   cr16_endif();	
	jal( r6, r1 );		// call draw
	flushPipeline();
	jCond(UC,r14);		// return
	flushPipeline();
	

	// determines what three digits to draw
	addLabel("skipDrawCode");

	r0 = 0;							// the 'hundred' digit
	cr16_while(r9 >= 100 );
		r9 -= 100;
		++r0;
		flushPipeline();
	cr16_endwhile();
	r4 = 57 + (64*0);
	loadLabelAddress( "DrawDigit", r1 );
	jal(r14,r1);
	flushPipeline();

	r0 = 0;							// the 'ten' digit
	cr16_while(r9 >= 10 );
		r9 -= 10;
		++r0;
		flushPipeline();
	cr16_endwhile();	
	r4 = 57 + (64*9);
	loadLabelAddress( "DrawDigit", r1 );
	jal(r14,r1);
	flushPipeline();

	r0 = r9;									// the 'one' digit	
	r4 = 57 + (64*18);
	loadLabelAddress( "DrawDigit", r1 );
	jal(r14,r1);
	flushPipeline();		

	RESTORE_VARIABLE(r8);
	RESTORE_VARIABLE(r7);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r4);
	RESTORE_VARIABLE(r3);
	RESTORE_VARIABLE(r2);
	RESTORE_VARIABLE(r1);
	RESTORE_VARIABLE(r0);
}
//----------------------------------------------------------------------
//inline function that will print the given register on the upper left hand 
//side of the screen on the second row down.  value is assumed to be in
//register r0
void debug_printValue(cr16_int value)
{
	cr16_int x(r7);
	cr16_int y(r6);
	cr16_int color(r5);
	cr16_int addr(r4);
	SAVE_VARIABLE(r4);
	SAVE_VARIABLE(r5);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r8);
	SAVE_VARIABLE(r9);

	addr = memory_map::static_y;
	load(y,addr);
	++y;
	cr16_if(y > 30);
	{
		y = 1;
	} cr16_endif();
	store(y,addr);
	
	for(int i=0; i<2; ++i) {
		SAVE_VARIABLE(value);
		//put a different color on both sides of the readout to make it easier to read
		x=1;
		color = 3;
		drawPixel(x,y,color);
		CR16_FOR(x=2, x < 18, ++x,
		(
			color = value & 1,
			lshi(-1, value),
			drawPixel(x,y,color)
		));
		color = 3;
		drawPixel(x,y,color);
		RESTORE_VARIABLE(value);
		flipPage();
	}

	RESTORE_VARIABLE(r9);
	RESTORE_VARIABLE(r8);
	RESTORE_VARIABLE(r7);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r4);
}
//----------------------------------------------------------------------
//sets background to BACKGROUND_COLOR
void setBackground() 
{
	cr16_int x(r7);
	cr16_int y(r6);
	cr16_int color(r5);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);

	x = 0;
	color = BACKGROUND_COLOR;
	cr16_while(x < WIDTH);
	{
		y=0;
		cr16_while(y < HEIGHT);
		{
			drawPixel(x, y, color);
			++y;
		} cr16_endwhile();
		++x;
	} cr16_endwhile();

	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
}
//----------------------------------------------------------------------
//sets background to BACKGROUND_COLOR
void clearScore() 
{
	cr16_int x(r7);
	cr16_int y(r6);
	cr16_int color(r5);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);

	x = 56;
	color = BACKGROUND_COLOR;
	cr16_while(x < WIDTH);
	{
		y=0;
		cr16_while(y < HEIGHT);
		{
			drawPixel(x, y, color);
			++y;
		} cr16_endwhile();
		++x;
	} cr16_endwhile();

	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
}

//----------------------------------------------------------------------
//inline function that draws the tetris boarder
void inline drawBorder() 
{
	cr16_int y(r7);
	cr16_int x(r6);
	cr16_int color(r5);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);

	//draw side lines
	color = BOARDER_COLOR;
	y = TOP;
	cr16_while(y < BOTTOM);
	{
		x = LEFT;
		drawPixel(x,y,color);
		--x;
		drawPixel(x,y,color);
		x = RIGHT;
		drawPixel(x,y,color);
		++x;
		drawPixel(x,y,color);
		++y;
	} cr16_endwhile();
	//draw bottom line
	x = LEFT-1;
	y = BOTTOM;
	cr16_while( x <= RIGHT + 1);
	{
		drawPixel(x,y,color);
		++y;
		drawPixel(x,y,color);
		--y;
		++x;
	} cr16_endwhile();

	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
}
//----------------------------------------------------------------------
//inline function that returns true if the position given is valid
void checkCurrentPiece(cr16_int result, cr16_int x, cr16_int y, cr16_int rotation)
{
	cr16_int tmp(r8);
	cr16_int addr(r7);
	cr16_int x_value(r6);
	cr16_int y_value(r5);
	cr16_int current_piece_ptr(r4);
	SAVE_VARIABLE(r8);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);
	SAVE_VARIABLE(r4);
	SAVE_VARIABLE(rotation);
	SAVE_VARIABLE(x);
	SAVE_VARIABLE(y);

	//checkSquare(result,x,y,rotation);

	result = TRUE;
	addr = memory_map::current_piece;	
	load(current_piece_ptr, addr);
	rotation = rotation & 3; //just get the lower 2 bits of the rotation
	assert(SPACE_FOR_EACH_ROTATION == 8 && "Must modify line below so the mult is correct");
	MULT_8(rotation);
	current_piece_ptr += rotation;
	//First check to see if the piece will remain in the tetris box
	SAVE_VARIABLE(current_piece_ptr);
	for(int i=0; i < BLOCKS_PER_PIECE; ++i)
	{
		load(x_value, current_piece_ptr);
		++current_piece_ptr;
		load(y_value, current_piece_ptr);
		++current_piece_ptr;
		MULT_2(x_value);
		x_value += x;
		y_value += y;

		//@todo This is weird behavior I don't understand why I have to have a minus 4 and a minus 2
		cr16_if((x_value < LEFT) | (x_value > RIGHT - 2) | (y_value > BOTTOM - 1));
		{
			result = FALSE;
		} cr16_endif();
	}
	//Now check to see if the space is filled on the game board
	RESTORE_VARIABLE(current_piece_ptr);
	for(int i=0; i < BLOCKS_PER_PIECE; ++i)
	{
		load(x_value, current_piece_ptr);
		++current_piece_ptr;
		MULT_2(x_value);
		load(y_value, current_piece_ptr);
		++current_piece_ptr;
		x_value += x;
		y_value += y;
		x_value = x_value-LEFT;
		DIV_2(x_value);
	    y_value = y_value-TOP;
		MULT_10(y_value,tmp);
		addr = x_value + y_value;

		////it is possible to get invalid input so check for that
		//if (debug) {
		//	SAVE_VARIABLE(r0);
		//	r0 = addr;
		//	debug_printValue(r0);
		//	RESTORE_VARIABLE(r0);
		//}
		cr16_if((addr >= 0) & (addr < memory_map::game_board_end));
		{
			load(tmp,addr);
			cr16_if(tmp == FALLEN_PIECE_COLOR);
			{
				result = FALSE;
			} cr16_endif();
		} cr16_endif();
	}

	RESTORE_VARIABLE(y);
	RESTORE_VARIABLE(x);
	RESTORE_VARIABLE(rotation);
	RESTORE_VARIABLE(r4);
	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
	RESTORE_VARIABLE(r8);
}
//----------------------------------------------------------------------
void placeCurrentPiece(cr16_int x, cr16_int y, cr16_int rotation)
{
	cr16_int tmp(r8);
	cr16_int addr(r7);
	cr16_int x_value(r6);
	cr16_int y_value(r5);
	cr16_int current_piece_ptr(r4);
	SAVE_VARIABLE(r8);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);
	SAVE_VARIABLE(r4);
	SAVE_VARIABLE(rotation);
	SAVE_VARIABLE(x);
	SAVE_VARIABLE(y);

	addr = memory_map::current_piece;
	load(current_piece_ptr,addr);
	rotation = rotation & 3; //just get the lower 2 bits of the rotation
	assert(SPACE_FOR_EACH_ROTATION == 8 && "Must modify line below so the mult is correct");
	MULT_8(rotation);
	current_piece_ptr += rotation;
	for(int i=0; i < BLOCKS_PER_PIECE; ++i)
	{
		load(x_value, current_piece_ptr);
		++current_piece_ptr;
		MULT_2(x_value);
		load(y_value, current_piece_ptr);
		++current_piece_ptr;
		x_value += x;
		y_value += y;
		x_value = x_value - LEFT;
		DIV_2(x_value);
		y_value = y_value - TOP;
		MULT_10(y_value,tmp);
		addr = x_value + y_value;
		tmp = FALLEN_PIECE_COLOR;
		cr16_if((addr >= 0) & (addr < memory_map::game_board_end));
		{
			store(tmp,addr);
		} cr16_else();
		{
			//signal an error by halting the program
			if(debug)
			{
				loadLabelAddress("waitForStart", r0);
				jCond(UC, r0);
				flushPipeline();
			}
		} cr16_endif();
	}

	RESTORE_VARIABLE(y);
	RESTORE_VARIABLE(x);
	RESTORE_VARIABLE(rotation);
	RESTORE_VARIABLE(r4);
	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
	RESTORE_VARIABLE(r8);
}
//----------------------------------------------------------------------
void drawCurrentPiece(cr16_int x, cr16_int y, cr16_int rotation)
{
	cr16_int addr(r7); 	/* NOTE!!! Union */
	cr16_int color(r7);	/* NOTE!!! Union */
	cr16_int x_value(r6);
	cr16_int y_value(r5);
	cr16_int current_piece_ptr(r4);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);
	SAVE_VARIABLE(r4);
	SAVE_VARIABLE(rotation);

	addr = memory_map::current_piece;
	load(current_piece_ptr, addr);
	rotation = rotation & 3; //just get the lower 2 bits of the rotation
	assert(SPACE_FOR_EACH_ROTATION == 8 && "Must modify line below so the mult is correct");
	MULT_8(rotation);
	current_piece_ptr += rotation;
	for(int i=0; i < BLOCKS_PER_PIECE; ++i)
	{
		load(x_value, current_piece_ptr);
		++current_piece_ptr;
		load(y_value, current_piece_ptr);
		MULT_2(x_value);
		x_value += x;
		y_value += y;
		++current_piece_ptr;
		color = PIECE_COLOR;
		inline_function::drawPixel(x_value,y_value,color);
		++x_value;
		inline_function::drawPixel(x_value,y_value,color);
	}

	RESTORE_VARIABLE(rotation);
	RESTORE_VARIABLE(r4);
	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
}
//----------------------------------------------------------------------
void drawFallenPieces()
{
	//because this function doesn't take any arguments it is okay to
	//use the lower registers, even though that is against my convention
	cr16_int pixel_x(r7);
	cr16_int pixel_y(r6);
	cr16_int pixel_color(r5);
	cr16_int addr(r4);
	cr16_int data(r3);
	cr16_int x(r2);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);
	SAVE_VARIABLE(r4);
	SAVE_VARIABLE(r3);
	SAVE_VARIABLE(r2);
	
	pixel_color = FALLEN_PIECE_COLOR;
	//the for loop is preprocessor loop unrolling
	for(int y=0; y < TETRIS_HEIGHT; ++y)
	{
		x = 0;
		cr16_while(x < TETRIS_WIDTH);
		{
			//I can get away with a multiplication 
			//because it is collapsed to a constant at compile time
			addr = (y*TETRIS_WIDTH)+x;
			load(data,addr);
			pixel_x = LEFT+1+x+x;
			pixel_y = TOP+y;
			drawPixel(pixel_x,pixel_y,data);
			++pixel_x;
			drawPixel(pixel_x,pixel_y,data);
			++x;
		} cr16_endwhile();
	}

	RESTORE_VARIABLE(r2);
	RESTORE_VARIABLE(r3);
	RESTORE_VARIABLE(r4);
	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
}
//----------------------------------------------------------------------
//Get a new game piece
void changeCurrentPiece()
{
	cr16_int piece_list_index(r7);
	cr16_int piece(r6);
	cr16_int addr(r5);
	cr16_int tmp(r4);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);
	SAVE_VARIABLE(r4);

	//This code reads the piece out of a list
	addr = memory_map::current_piece_list_position;
	load(piece_list_index, addr);
	++piece_list_index;
	cr16_if(piece_list_index >= PIECES_LIST_LENGTH);
	{
		piece_list_index = 0;
	} cr16_endif();
	store(piece_list_index,addr);

	tmp = memory_map::pieces_list;
	addr = piece_list_index + tmp;
	load(piece,addr);
	addr = memory_map::current_piece;
	store(piece,addr);

	////This code cycles thorugh the pieces
	//addr = memory_map::current_piece;
	//load(piece, addr);
	//piece += tetris_globals::SPACE_FOR_EACH_ROTATION * tetris_globals::NUMBER_OF_ROTATIONS;
	//cr16_if(piece > game_piece::right_foo);
	//{
	//	piece = game_piece::square;
	//} cr16_endif();
	//store(piece, addr);

	RESTORE_VARIABLE(r4);
	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
}
//----------------------------------------------------------------------
void initialize_global_data()
{
	cr16_int addr(r0);
	cr16_int part0_x(r1);
	cr16_int part0_y(r2);
	cr16_int part1_x(r3);
	cr16_int part1_y(r4);
	cr16_int part2_x(r5);
	cr16_int part2_y(r6);
	cr16_int part3_x(r7);
	cr16_int part3_y(r8);
	cr16_int tmp(r9);
	SAVE_VARIABLE(r9);
	SAVE_VARIABLE(r8);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);
	SAVE_VARIABLE(r4);
	SAVE_VARIABLE(r3);
	SAVE_VARIABLE(r2);
	SAVE_VARIABLE(r1);
	SAVE_VARIABLE(r0);

	{ //initalize memory_map
		cr16_int i(r8); //i is being treated as a pointer below
		cr16_int zero(r9);

		zero = 0;
		CR16_FOR(i = memory_map::game_board_start, i < memory_map::game_board_end, ++i, 
		( //becuase of how this is implemented must use '(' in place of '{' and ',' in place of ';' in the for loop
			store(zero,i)   
		)); //end CR16_FOR
	}

	///@todo also make this function do the inializing of the game_data memory area

	//load the memory address of the square peice
	addr = game_piece::square;
	//set the x and y value of all four tetris peice parts in all rotations
	for(int i=0; i < NUMBER_OF_ROTATIONS; ++i)
	{
		part0_x = 0;
		part0_y = 0;
		part1_x = 1;
		part1_y = 0;
		part2_x = 0;
		part2_y = 1;
		part3_x = 1;
		part3_y = 1;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;
	}

	addr = game_piece::line;
	//rotations 0 & 2 and 1 & 3 are the same
	for(int i=0; i < NUMBER_OF_ROTATIONS; i += 2)
	{
		//rotations 0 & 2
		part0_x = 1;
		part0_y = 0;
		part1_x = 1;
		part1_y = 1;
		part2_x = 1;
		part2_y = 2;
		part3_x = 1;
		part3_y = 3;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotations 1 & 3
		part0_x = 0;
		part0_y = 0;
		part1_x = 1;
		part1_y = 0;
		part2_x = 2;
		part2_y = 0;
		part3_x = 3;
		part3_y = 0;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;
	}

	addr = game_piece::left_foo;
	//rotations 0 & 2 and 1 & 3 are the same
	for(int i=0; i < NUMBER_OF_ROTATIONS; i += 2)
	{
		//rotations 0 & 2
		part0_x = 0;
		part0_y = 0;
		part1_x = 1;
		part1_y = 0;
		part2_x = 1;
		part2_y = 1;
		part3_x = 2;
		part3_y = 1;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotations 1 & 3
		part0_x = 1;
		part0_y = 0;
		part1_x = 0;
		part1_y = 1;
		part2_x = 1;
		part2_y = 1;
		part3_x = 0;
		part3_y = 2;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;
	}

	addr = game_piece::right_foo;
	//rotations 0 & 2 and 1 & 3 are the same
	for(int i=0; i < NUMBER_OF_ROTATIONS; i += 2)
	{
		//rotations 0 & 2
		part0_x = 1;
		part0_y = 0;
		part1_x = 2;
		part1_y = 0;
		part2_x = 0;
		part2_y = 1;
		part3_x = 1;
		part3_y = 1;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotations 1 & 3
		part0_x = 0;
		part0_y = 0;
		part1_x = 0;
		part1_y = 1;
		part2_x = 1;
		part2_y = 1;
		part3_x = 1;
		part3_y = 2;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;
	}

	addr = game_piece::left_L;
	//All rotations are different
	{
		//rotation 0
		part0_x = 0;
		part0_y = 0;
		part1_x = 1;
		part1_y = 0;
		part2_x = 1;
		part2_y = 1;
		part3_x = 1;
		part3_y = 2;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotation 1
		part0_x = 0;
		part0_y = 0;
		part1_x = 1;
		part1_y = 0;
		part2_x = 2;
		part2_y = 0;
		part3_x = 0;
		part3_y = 1;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotation 2
		part0_x = 0;
		part0_y = 0;
		part1_x = 0;
		part1_y = 1;
		part2_x = 0;
		part2_y = 2;
		part3_x = 1;
		part3_y = 2;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotation 3
		part0_x = 0;
		part0_y = 1;
		part1_x = 1;
		part1_y = 1;
		part2_x = 2;
		part2_y = 1;
		part3_x = 2;
		part3_y = 0;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

	}

	addr = game_piece::right_L;
	//All rotations are different
	{
		//rotation 0
		part0_x = 0;
		part0_y = 0;
		part1_x = 1;
		part1_y = 0;
		part2_x = 0;
		part2_y = 1;
		part3_x = 0;
		part3_y = 2;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotation 1
		part0_x = 0;
		part0_y = 0;
		part1_x = 0;
		part1_y = 1;
		part2_x = 1;
		part2_y = 1;
		part3_x = 2;
		part3_y = 1;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotation 2
		part0_x = 1;
		part0_y = 0;
		part1_x = 1;
		part1_y = 1;
		part2_x = 0;
		part2_y = 2;
		part3_x = 1;
		part3_y = 2;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotation 3
		part0_x = 0;
		part0_y = 0;
		part1_x = 1;
		part1_y = 0;
		part2_x = 2;
		part2_y = 0;
		part3_x = 2;
		part3_y = 1;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;
	}

	addr = game_piece::mountain;
	//All rotations are different
	{
		//rotation 0
		part0_x = 1;
		part0_y = 0;
		part1_x = 0;
		part1_y = 1;
		part2_x = 1;
		part2_y = 1;
		part3_x = 2;
		part3_y = 1;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotation 1
		part0_x = 2;
		part0_y = 0;
		part1_x = 1;
		part1_y = 1;
		part2_x = 2;
		part2_y = 1;
		part3_x = 2;
		part3_y = 2;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotation 2
		part0_x = 0;
		part0_y = 0;
		part1_x = 1;
		part1_y = 0;
		part2_x = 2;
		part2_y = 0;
		part3_x = 1;
		part3_y = 1;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;

		//rotation 3
		part0_x = 1;
		part0_y = 0;
		part1_x = 1;
		part1_y = 1;
		part2_x = 2;
		part2_y = 1;
		part3_x = 1;
		part3_y = 2;
		store(part0_x, addr);
		++addr;
		store(part0_y, addr);
		++addr;
		store(part1_x, addr);
		++addr;
		store(part1_y, addr);
		++addr;
		store(part2_x, addr);
		++addr;
		store(part2_y, addr);
		++addr;
		store(part3_x, addr);
		++addr;
		store(part3_y, addr);
		++addr;
	}


	//initalize the list of peices, rather than random numbers this is the list that we draw from
	{
		cr16_int value(r1);
		SAVE_VARIABLE(r1);

		assert(PIECES_LIST_LENGTH == 35 && "Inialize the values for a list size of 30");
		addr = memory_map::pieces_list;
		value = game_piece::square;
		store(value, addr);
		++addr;
		value = game_piece::left_L;
		store(value, addr);
		++addr;
		value = game_piece::mountain;
		store(value, addr);
		++addr;
		value = game_piece::line;
		store(value, addr);
		++addr;
		value = game_piece::left_L;
		store(value, addr);
		++addr;
		value = game_piece::right_foo;
		store(value, addr);
		++addr;
		value = game_piece::mountain;
		store(value, addr);
		++addr;
		value = game_piece::mountain;
		store(value, addr);
		++addr;
		value = game_piece::square;
		store(value, addr);
		++addr;
		value = game_piece::left_L;
		store(value, addr);
		++addr;
		value = game_piece::right_L;
		store(value, addr);
		++addr;
		value = game_piece::left_foo;
		store(value, addr);
		++addr;
		value = game_piece::left_L;
		store(value, addr);
		++addr;
		value = game_piece::square;
		store(value, addr);
		++addr;
		value = game_piece::line;
		store(value, addr);
		++addr;
		value = game_piece::line;
		store(value, addr);
		++addr;
		value = game_piece::mountain;
		store(value, addr);
		++addr;
		value = game_piece::left_foo;
		store(value, addr);
		++addr;
		value = game_piece::right_foo;
		store(value, addr);
		++addr;
		value = game_piece::right_L;
		store(value, addr);
		++addr;
		value = game_piece::square;
		store(value, addr);
		++addr;
		value = game_piece::square;
		store(value, addr);
		++addr;
		value = game_piece::left_foo;
		store(value, addr);
		++addr;
		value = game_piece::right_L;
		store(value, addr);
		++addr;
		value = game_piece::right_foo;
		store(value, addr);
		++addr;
		value = game_piece::mountain;
		store(value, addr);
		++addr;
		value = game_piece::left_L;
		store(value, addr);
		++addr;
		value = game_piece::right_foo;
		store(value, addr);
		++addr;
		value = game_piece::line;
		store(value, addr);
		++addr;
		value = game_piece::right_L;
		store(value, addr);
		++addr;
		value = game_piece::right_foo;
		store(value, addr);
		++addr;
		value = game_piece::left_foo;
		store(value, addr);
		++addr;
		value = game_piece::right_L;
		store(value, addr);
		++addr;
		value = game_piece::line;
		store(value, addr);
		++addr;
		value = game_piece::left_foo;
		store(value, addr);
		++addr;
		value = game_piece::mountain;
		store(value, addr);
		++addr;

		RESTORE_VARIABLE(r1);
	}

	inline_function::changeCurrentPiece();
	tmp = 0;
	addr = memory_map::static_y;
	store(tmp,addr);
	tmp = 0;
	addr = memory_map::lines_distroyed;
	store(tmp,addr);
	tmp = tetris_constants::STARTING_COUNT_MAX;
	addr = memory_map::count_max;
	store(tmp,addr);

	RESTORE_VARIABLE(r0);
	RESTORE_VARIABLE(r1);
	RESTORE_VARIABLE(r2);
	RESTORE_VARIABLE(r3);
	RESTORE_VARIABLE(r4);
	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
	RESTORE_VARIABLE(r8);
	RESTORE_VARIABLE(r9);
}
//----------------------------------------------------------------------

//inline function that prints what buttons are being pressed down in the 
//upper left hand corner of the screen
// uses R4-R9
void debug_print_controller()
{
	//because this function takes no arguments it is okay to use lower
	//registers even though that is against programming convention
	cr16_int tmp(r7);
	cr16_int bit(r6);
	cr16_int x(r5);
	cr16_int y(r4);
	cr16_int nes_buttons(r3);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);
	SAVE_VARIABLE(r4);
	SAVE_VARIABLE(r3);

	tmp = 0xE000;
	load(nes_buttons,tmp);
	x = 1;
	y = 0;
	bit = nes_buttons;
	andi(1, bit);
	drawPixel(x,y, bit);
	lshi(-1,nes_buttons);
	++x;
	bit = nes_buttons;
	andi(1, bit);
	drawPixel(x,y, bit);
	lshi(-1,nes_buttons);
	++x;
	bit = nes_buttons;
	andi(1, bit);
	drawPixel(x,y, bit);
	lshi(-1,nes_buttons);
	++x;
	bit = nes_buttons;
	andi(1, bit);
	drawPixel(x,y, bit);
	lshi(-1,nes_buttons);
	++x;
	bit = nes_buttons;
	andi(1, bit);
	drawPixel(x,y, bit);
	lshi(-1,nes_buttons);
	++x;
	bit = nes_buttons;
	andi(1, bit);
	drawPixel(x,y, bit);
	lshi(-1,nes_buttons);
	++x;
	bit = nes_buttons;
	andi(1, bit);
	drawPixel(x,y, bit);
	lshi(-1,nes_buttons);
	++x;
	bit = nes_buttons;
	andi(1, bit);
	drawPixel(x,y, bit);
	lshi(-1,nes_buttons);
	++x;

	RESTORE_VARIABLE(r3);
	RESTORE_VARIABLE(r4);
	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
}
//----------------------------------------------------------------------
//writes a checkered pattern to the screen
void debug_setCheckeredBackground()
{
	cr16_int x(r7);
	cr16_int y(r6);
	cr16_int color(r5);
	SAVE_VARIABLE(r7);
	SAVE_VARIABLE(r6);
	SAVE_VARIABLE(r5);

	x = 0;
	cr16_while(x < WIDTH);
	{
		y=0;
		cr16_while(y < HEIGHT);
		{
			//don't need to modulus color because the hardware 
			//only looks at the bottom two bits
			color = x+y;
			drawPixel(x, y, color);
			++y;
		} cr16_endwhile();
		++x;
	} cr16_endwhile();

	RESTORE_VARIABLE(r5);
	RESTORE_VARIABLE(r6);
	RESTORE_VARIABLE(r7);
}
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------
//----------------------------------------------------------------------

} //.end namespace inline_function

void cr16main()
{
//if (debug)	cerr << "Stack Begins at " << BEGIN_STACK << endl;
activateHazardDetectionUnit();

//main function
addLabel("main");
{
	//The convention that I will use is R0-R3 will NEVER be used
	//by any inline functions that take arguments because these
	//registers might be passed in as data.  Otherwise it is only
	//okay for [real] functions and inline functions that don't
	//take arguments to use these registers just so long as the
	//called function saves and restores the register.

    //This convention is nessesary because while CASM supports
    //inline functions it does not support automatic variable to
    //register mapping and as a result when you use inline
    //functions you must be careful.
	cr16_int x(r0);
	cr16_int y(r1);
	cr16_int rotation(r2);
	cr16_int result(r3);

	//These are registers that are local variables that will never
	//be passed into inline functions.  Called functions must save
	//away these registers and restore them so I can assume that
	//they always have the correct value
	cr16_int nes_buttons(r4);
	cr16_int rot_down(r5); //@todo could use the high bits of rotation
	cr16_int count(r6);
	cr16_int color(r7);

	//registers R8-9 are assumed to be distroyed when a function call is made
	//registers R10-R13 is reserved for the CASM compiler
	//register R14 is to store the return address for a function call
	//             because I have so much data memory (64k words) I almost
	//             always inline function calls so I don't use this much
	//register R15 is for the stack pointer

	SP = memory_map::BEGIN_STACK;
	//Most of the time I just do functions inline, but this is an example of
	//how an actual function call would be made.
	FUNCTION_CALL("initVGA");
	//This is an inline function at compile time it is expanded to be the full
	//function
	inline_function::flipPage();

	//initalize game data
	rotation = 0;
	count = 0;
	color = BACKGROUND_COLOR;
	r1 = 0; //begining memory address of game_data array
	CR16_FOR(r1=0, r1 < TETRIS_WIDTH * TETRIS_HEIGHT, ++r1,
	(
		store(color, r1)
	));
	//for(int i=0; i < TETRIS_WIDTH * TETRIS_HEIGHT; ++i) {
	//	//remember that this for loop happens at compile time, it can be thought of
	//	//as a preprocessor directive so what I am doing here
	//	//is loop unrolling at compile time.
	//	store(color, r1);
	//	++r1;
	//}
	CR16_FOR(r1=0, r1 < 2, ++r1,
	(
		inline_function::setBackground(),
		inline_function::drawBorder(),
		inline_function::flipPage()
	));
	addLabel("waitForStart");
	cr16_while(1);
	{
		cr16_int tmp(r8);

		sound_inline_function::sound_off();
		inline_function::changeCurrentPiece(); //so we get a random starting piece

		tmp = NES_ADDR;
		load(nes_buttons,tmp);
		cr16_if(nes_buttons & START_BUTTON);
		{
			loadLabelAddress("startGame",tmp);
			sound_inline_function::sound_off();
			jCond(UC, tmp);
			flushPipeline();
		} cr16_endif();
	} cr16_endwhile();

	addLabel("startGame");
	{
		//for loop happens at compile time, this is loop unrolling
		for(int i=0; i < 2; ++i)
		{
			//once for each memory buffer
			inline_function::setBackground();
			inline_function::drawBorder();
			inline_function::flipPage();
		}

		//initalize data that I am treating as global constants
		inline_function::initialize_global_data();

		y = TOP;
		x = LEFT+9;
		count = 0;
	}
	
////////////////////////////////////////////////////////////////////////////
//                     Main Game Loop                                     //
////////////////////////////////////////////////////////////////////////////
	addLabel("mainGameLoop");
	{
		cr16_int tmp_addr(r8);
		cr16_int tmp(r9);
		
		if (debug) { //debug should be thought of as a preprocessor directive
			inline_function::debug_print_controller();
		}
		tmp = NES_ADDR;
		load(nes_buttons,tmp);

		cr16_if(nes_buttons & RIGHT_BUTTON);
		{
			x = x+2;
			inline_function::checkCurrentPiece(result,x,y,rotation);
			cr16_if((result == FALSE) | (count & SLIDE_MASK));
			{
				x = x-2;
			} cr16_endif();
		} cr16_endif();
		cr16_if(nes_buttons & LEFT_BUTTON);
		{
			x = x-2;
			inline_function::checkCurrentPiece(result,x,y,rotation);
			cr16_if(result == FALSE | (count & SLIDE_MASK));
			{
				x = x+2;
			} cr16_endif();
		} cr16_endif();
		cr16_if(nes_buttons & UP_BUTTON);
		{
			//@todo Right now UP_BUTTON does nothing.  I think later on up should
			//drop the piece
		} cr16_endif();
		cr16_if(nes_buttons & DOWN_BUTTON);
		{
			++y;
			inline_function::checkCurrentPiece(result,x,y,rotation);
			cr16_if(result == FALSE);
			{
				--y;
			} cr16_else();
			{
				count = 0;
			} cr16_endif();
		} cr16_endif();
		cr16_if(rot_down);
		{
			//@todo can probably take these two statements out of the if block
			//@todo the next statement is probably not needed
			rot_down = rot_down & (A_BUTTON | B_BUTTON);
			rot_down = rot_down & (nes_buttons & (A_BUTTON | B_BUTTON));
		} cr16_else();
		{
			cr16_if(nes_buttons & B_BUTTON);
			{
				++rotation;
				rot_down = nes_buttons;
				inline_function::checkCurrentPiece(result,x,y,rotation);
				cr16_if(result == FALSE);
				{
					--rotation;
					rot_down = 0;
				} cr16_endif();
			} cr16_endif();
			cr16_if(nes_buttons & A_BUTTON);
			{
				--rotation;
				rot_down = nes_buttons;
				inline_function::checkCurrentPiece(result,x,y,rotation);
				cr16_if(result == FALSE);
				{
					++rotation;
					rot_down = 0;
				} cr16_endif();
			} cr16_endif();
		} cr16_endif();
		cr16_if(nes_buttons & SELECT_BUTTON);
		{
			//stop the game
			loadLabelAddress("waitForStart",r0);
			jCond(UC, r0);
			flushPipeline();
		} cr16_endif();

		tmp_addr = memory_map::count_max;
		load(tmp,tmp_addr);
		cr16_if(count > tmp+MIN_COUNT_MAX);
		{
			++y;
			inline_function::checkCurrentPiece(result,x,y,rotation);
			cr16_if(result == FALSE);
			{
				--y;
				inline_function::placeCurrentPiece(x,y,rotation);
				//distroyLines must be a real function call because it might call itself
				FUNCTION_CALL("distroyLines");
				y = TOP;
				x = LEFT+9;
				rotation = 0;
				inline_function::checkCurrentPiece(result,x,y,rotation);
				cr16_if(result == FALSE);
				{
					SAVE_VARIABLE(r0);
					loadLabelAddress("waitForStart", r0);
					jCond(UC, r0);
					RESTORE_VARIABLE(r0); //this will still happen becuase we have 4 instructions
					//after a jump before the jump takes effect
					flushPipeline();
				} cr16_endif();
				inline_function::changeCurrentPiece();
			} cr16_endif();
			count = 0;
		} cr16_endif();

		cr16_if(count < 3);
		{
			sound_inline_function::sound_effect0();
		} cr16_else();
		{
			sound_inline_function::sound_off();
		} cr16_endif();

		++count;
		inline_function::drawFallenPieces();
		inline_function::drawCurrentPiece(x,y,rotation);
		inline_function::flipPage();
		loadLabelAddress("mainGameLoop",tmp);
		jCond(UC, tmp);
		flushPipeline();
	} //.end main game loop

	//end the program
	loadLabelAddress("theEnd",r0);
	jCond(UC, r0);
	flushPipeline();
}


//----------------------------------------------------------------------
//Distroys complete lines at the bottom of the board.
//like a good tetris game should :)
//This function takes no arguments 
addLabel("distroyLines");
{
    cr16_int i(r7);
	cr16_int addr(r6);
	cr16_int tmp(r5);
	cr16_int lines_were_distroyed(r4);
    cr16_int function_call_ColumnToLookAt(r1);
    cr16_int result(r0);
    SAVE_VARIABLE(ret_addr);
    SAVE_VARIABLE(r7);
    SAVE_VARIABLE(r6);
    SAVE_VARIABLE(r5);
    SAVE_VARIABLE(r4);
	SAVE_VARIABLE(r3);
	SAVE_VARIABLE(r2);
	SAVE_VARIABLE(r1);
	SAVE_VARIABLE(r0);

	tmp = FALSE;
	addr = memory_map::lines_were_distroyed;
	store(tmp,addr);
	i = TETRIS_HEIGHT-1;
	cr16_while(i >= 0);
	{
		function_call_ColumnToLookAt = i;
		FUNCTION_CALL("lookAndDistroySingleLine");
		cr16_while(result);
		{
			addr = memory_map::lines_were_distroyed;
			load(tmp,addr);
			tmp = TRUE;
			store(tmp,addr);
			addr = memory_map::lines_distroyed;
			load(tmp,addr);
			++tmp;
			store(tmp,addr);
			addr = memory_map::count_max;
			load(tmp,addr);
			--tmp;
			cr16_if(tmp < 0);
			{
				tmp = 0;
			} cr16_endif();
			store(tmp,addr);
			function_call_ColumnToLookAt = i;
			FUNCTION_CALL("lookAndDistroySingleLine");
		} cr16_endwhile();
		--i;
	} cr16_endwhile();

	addr = memory_map::lines_were_distroyed;
	load(tmp,addr);
	cr16_if(tmp);
	{
		CR16_FOR(i=0, i < 2, ++i,
		(
		    inline_function::clearScore(),
			addr = memory_map::lines_distroyed,
			load(r9,addr),
			inline_function::printReg9(),
			inline_function::flipPage()
		));
		sound_inline_function::sound_effect1();
	} cr16_endif();

    RESTORE_VARIABLE(r0);
    RESTORE_VARIABLE(r1);
    RESTORE_VARIABLE(r2);
    RESTORE_VARIABLE(r3);
    RESTORE_VARIABLE(r4);
    RESTORE_VARIABLE(r5);
    RESTORE_VARIABLE(r6);
    RESTORE_VARIABLE(r7);
    RESTORE_VARIABLE(ret_addr);
    FUNCTION_RETURN;
}

//----------------------------------------------------------------------
//THIS FUNCTION TAKES PARAMETERS AND HAS A RETURN VALUE!!!!!!
//This function returns whether it distroyed a line in r0
addLabel("lookAndDistroySingleLine");
{
	//Arguments passed in:
	cr16_int ColumnToLookAt(r1);

	//Return Value
	cr16_int result(r0);
	cr16_int base_addr(r2);
	cr16_int addr(r3);
	cr16_int data(r4);
	cr16_int tmp(r5);

	result = FALSE;
	base_addr = TETRIS_WIDTH;
	MULT(base_addr, tmp, ColumnToLookAt); //base_addr = TETRIS_WIDTH * ColumnToLookAt
	data = FALLEN_PIECE_COLOR;
	addr = base_addr;
	//this for loop is CASM compile time loop unrolling
	for(int i=0; i < TETRIS_WIDTH; ++i)
	{
		load(tmp, addr);
		++addr;
		data = data & tmp;
	}
	cr16_if(data);
	{
		//redefine variable names for more readable code, because these registers are also
		//defined below this could best be though of as a C union
		cr16_int source_addr(r3);
		cr16_int dest_addr(r4);
		cr16_int data(r5);

		result = TRUE; //TRUE we did distroy a line
		//We have a complete row that needs to be distroyed.  Copy all the rows above down a row
		source_addr = base_addr - 1;
		dest_addr = source_addr + TETRIS_WIDTH;
		cr16_while(source_addr > TETRIS_WIDTH);
		{
			load (data, source_addr);
			store(data, dest_addr);
			--source_addr;
			--dest_addr;
		} cr16_endwhile();
		data = BACKGROUND_COLOR;
		cr16_while(source_addr >= 0);
		{
			store(data,source_addr);
			--source_addr;
		} cr16_endwhile();
		//See if there is another row to distroy.  This could be done as tail
		//recursion and probably should be.  I just want to show that CASM and our processor is
		//capable of true recursion.  There is a limit of 3k of data memory if that is exceeded
		//the behavior of Console Revolution 16 is undefined, however we will never get that high
		//in this case because there is a limited number of rows and columns
	} cr16_endif();
	FUNCTION_RETURN;
}

//----------------------------------------------------------------------
//You must call this function before using VGA
addLabel("initVGA");
{
	cr16_int addr(r8), data(r9);
	//if I was going to make any other function calls inside of this
	//function then I would need to push ret_addr onto the stack and restore
	//it right before the FUNCTION_RETURN call
	
	// setup first page
	addr = 0xf000;
	data = 1;
	store( data, addr );
	FUNCTION_RETURN;	
}

//have a label to the end of the program because CASM automatically puts an 
//infinite loop at the bottom of the program.
addLabel("theEnd");
}
