/* version of RasterOp for bitmaps organized in row-order (e.g. Sun-2 FB).
 * Very MC68xxx-dependant in details, though the logic should be portable.
 * Per Bothner, Oct 1984.
 * If UNARY is defined, generates UnaryROp2(out, function).
 */
/*#include <bitmaps.h>*/
#include <rasterops.h>
#include <framebuf.h>
#include <m68000.h>
#include "Vfont.h"
#define compileWI(x) {*code++ = (x);}
#define compileLI(x) { i = (x); asm(" movl d6,a2@+"); } 
#define loadW(x) {asm(" .word 0x3C3C"); asm(x);} /* i = x, short */
#define loadL(x) {asm(" .word 0x2C3C"); asm(x);} /* i = x, long */
#define compileW(x) {asm(" .word 0x34FC"); asm(x);} /* *code++ = asm(x) */
#define compileL(x) {loadL(x); asm(" movl d6,a2@+");} /* *code++ = x */
typedef int (*Func)();

VRaster *
#ifndef UNARY
RasterOpS(out, in, function)
  register VRaster *out, *in;		/* a5, a4 */
#else UNARY
UnaryOpS(out, function)
  register VRaster *out;		/* a5 */
#endif UNARY
  int function;	/* a subset of the GXfunction codes */
  {
#ifdef UNARY
    register *dummy;			/* a4 */
#endif
    register u_short *outp;		/* a3 */
    register u_short *code;		/* a2 */
    register unsigned mask;		/* d7 */
#define compileSetMaskW(mask) {compileWI(0x3E3C); compileWI(mask);}
#define compileSetMaskL(mask) {compileWI(0x2E3C); compileLI(mask);}
    register i;				/* d6 used by loadW etc */
    register width;			/* d5 */
    register rows;			/* d4 */
    register j;				/* d3 */
    short dstStride;	/* bytes to increment a3 at new row */
#ifndef UNARY
    short srcStride;	/* bytes to increment a1 at new row */
    u_short rotRight[4], rotLen;
    register shift;			/* d2 */
#endif UNARY
    u_short *outerTop, *innerTop;
    u_short CodeTable[60];	/* "compile" into this buffer */

/* Register usage (at "run"-time):
 * a1 - points to current source word.
 * a3 - points to current desctination word.
 * d0 - working storage, used by RasterCompile.
 * d1 - working storage, for current destination word.
 * d2 - save right-most 'shift' bits of source here for later.
 * d3 - working storage to help with masking and shifting.
 * d4 - number of rows remaining.
 * d5 - number of columns remaining
 * d7 - mask - 0 bits where dest should be unchanged.
 */

    width=out[-1].bBox.h;
    rows = out[-1].bBox.v;
    dstStride = out[-1].stride;
#ifndef UNARY
    if (width > (i = in[-1].bBox.h)) width = i;
    if (rows > (i = in[-1].bBox.v)) rows = i;
    shift = BitOffset(out) - BitOffset(in);
    srcStride = in[-1].stride;
#endif
    if (width <= 0 || rows <= 0) return;
 /* some of the code (primarily for the leftmost column)
  * assumes that shifts are -15..15, though for a true 32-bit machine it
  * should handle shifts of -31..31 (for alignment reasons)
  */

    code = CodeTable;
    outerTop = code;
    if ((i = BitOffset(out)) > 0) /* bitOffsets assumed 0..15 */
      { /* compile left column - currently does it as 16 bits only */
	mask = 0xFFFF >> i;
	if ((width += BitOffset(out) - 16) <= 0)
	  { /* only one destination column */
	    mask &= (-1) << (- width);
#ifndef UNARY
	    /* test if one one source column */
	    if (width + shift <= 0) shift &= 15; /* make positive */
#endif UNARY
	    width = 0;
	  }
#ifndef UNARY
	if (shift < 0)
          {
	    compileW(" movl a1@+,d1"); /* load src into d1 */
	    srcStride -= 4; /* since we load a whole long-word at once */
	    compileW(" movl d1,d2");
	    shift += 16; /* fix shift from -15..-1 to 1..15 */
	    if (shift <= 8)
	      {loadW(" rorl #8,d1"); i |= (shift&7) << 9; compileWI(i);}
	    else
	      { loadW(" moveq #0,d0"); compileWI(i + shift);
	        compileW(" rorl d0,d1");
	      }
	   }
	else
          {
	    compileW(" movw a1@+,d1");	/* load src into d1 */
	    srcStride -= 2;
	    if (shift > 0)
	      {
		compileW(" movw d1,d2");	/* save, for later columns */
		if ((shift&0xF) > 8)		/* rolw #16-shift,d1 */
		 { loadW(" rolw #8,d1"); compileWI(i | (((-shift)&0x7)<<9));}
		else		/* rorw #shift,d1 */
		  { loadW(" rorw #8,d1"); compileWI(i | ((shift&7)<<9));}
	      }
          }
#endif UNARY
	compileSetMaskW(mask);
	i = function; RasterCompileW();
	dstStride -= 2;
      }
#ifndef UNARY
    else if (shift != 0)
      {
	compileW(" movw a1@+,d2");	/* load src into d2 */
	srcStride -= 2;
	shift &= 15;
      }
    rotLen = 0;
    if (shift != 0)
      {
	if (shift > 8) { loadW(" swap d1"); rotRight[rotLen++] = i; }
	if ((shift&0xF) > 8)
	  { loadW(" roll #8,d1"); rotRight[rotLen++] = i|(((-shift)&0x7)<<9);}
	else
	  { loadW(" rorl #8,d1"); rotRight[rotLen++] = i | ((shift&7)<<9);}
      }
#endif UNARY
    if (width >= 32)
      {
	/* compile inner loop: scan along one row */
	mask = 0xFFFFFFFF; compileW(" moveq #-1,d7");
	i = width>>5; /* # of columns */
	*code++ = 0x3A3C; *code++ = i-1; /* movw #column-1,d5 */
	innerTop = code;
	i <<= 2; /* # of bytes */
	dstStride -= i;
#ifndef UNARY
	srcStride -= i;
	compileW(" movl a1@+,d1");	/* load src into d1 */
	if (shift != 0)
	  {
	    /* The idea is that the residue from the previous source fetch is
	     * kept in the right 'shift' bits of d2, and used the next time.
	     */
	    /* mask in left-over bits from previous word */
	    compileW(" eorl d1,d2");	/* d2 ^= d1 */
	    compileW(" andl d3,d2");	/* d2 &= j; */
	    compileW(" eorl d1,d2");	/* d2 ^= d1 */
	    /* move result to d1; save d1 in d2 */
	    compileW(" exg d1,d2");
	    /* next rotate 'd1' right by 'shift' */
	    for (j = rotLen, outp = rotRight; --j >= 0; ) *code++ = *outp++;
	  }
#endif UNARY
	i = function; RasterCompileL();
	/* Compile bottom of inner loop: dbf d5,innerTop */
	*code++ = 0x51CD; i = (char*)innerTop - (char*)code; *code++ = i;
	i = 4*(width>>5);
      }
    if (i = width & 31)
      {
	if (i <= 16)
	  {
	    compileSetMaskW(mask = (-1) << (16 - i));
#ifndef UNARY
	    if (i > shift)
	      {
		compileW(" movw a1@+,d1");	/* load src into d1 */
		srcStride -= 2;
		if (shift != 0)
		  {
		    /* mask in left-over bits from previous word */
		    compileW(" eorw d1,d2");	/* d2 ^= d1 */
		    compileW(" andw d3,d2");	/* d2 &= j; */
		    compileW(" eorw d1,d2");	/* d2 ^= d1 */
		    /* move result to d1 */
		    compileW(" movw d2,d1");
		  }
	      }
	    else
		compileW(" movw d2,d1");
	    if (shift >= 8)		/* rolw #16-shift,d1 */
 	     { loadW(" rolw #8,d1"); compileWI(i | (((-shift)&0x7)<<9));}
	    else if (shift > 0)		/* rorw #shift,d1 */
	      { loadW(" rorw #8,d1"); compileWI(i | (shift<<9));}
#endif UNARY
	    i = function; RasterCompileW(); dstStride -= 2;
	  }
	else
	  {
	    compileSetMaskL(mask = (-1) << (32-i));
#ifndef UNARY
	    if (i - shift > 16)
	      {
		compileW(" movl a1@+,d1");	/* load src into d1 */
	        srcStride -= 4;
	      }
	    else	    
	      {
		compileW(" movw a1@+,d1");	/* load src into d1 */
		compileW(" swap d1");
		srcStride -= 2;	    
	      }
	    if (shift != 0)
	      {
		/* mask in left-over bits from previous word */
		compileW(" eorl d1,d2");	/* d2 ^= d1 */
		compileW(" andl d3,d2");	/* d2 &= j; */
		compileW(" eorl d1,d2");	/* d2 ^= d1 */
		/* move result to d1 */
		compileW(" movl d2,d1");
	      }
	    for (j = rotLen, outp = rotRight; --j >= 0;) *code++ = *outp++;
#endif UNARY
	    i = function; RasterCompileL(); dstStride -= 4;
	  }
      }
#ifndef UNARY
    if (srcStride > 0)
      { *code++ = 0xD2FC; *code++ = srcStride; } /* addw #srcStride,a1 */
#endif UNARY
    if (dstStride > 0)
      { *code++ = 0xD6FC; *code++ = dstStride; } /* addw #dstStride,a3 */

    /* Compile bottom of outer loop: dbf d4,outerTop */
    *code++ = 0x51CC; i = (char*)outerTop - (char*)code; *code++ = i;
    compileW(" rts");
    rows--; /* since loop counts down to 0 */
    outp = (u_short*)out[-1].start;
#ifndef UNARY
    a1 = (long*)in[-1].start;
    j = ~((-1) << shift);	 /* setup d3 */
#else UNARY
    asm(" clrl d1");
#endif UNARY
    (*(Func)CodeTable)();
}
