/*
 * swab.c
 *
 * much faster than, functionally equivalent to
 * 	swab.c	4.1 (Berkeley) 12/21/80
 *
 *	Swap bytes in 16-bit [half-]words
 *	for going between the 11 and the interdata
 *
 * IMPORTANT: This routine allows pf == pt, although not general
 *		overlap.
 *
 * Author:
 *	Jeffrey Mogul @ Stanford	21 April 1983
 *
 * Method:
 *	(1) Use a better copy loop.  The non-overhead code takes
 *	3 Vax instructions, instead of 5 for old version.
 *
 *	(2) Unroll the loop.  If the loop is unrolled x times,
 *	swabbing n words takes about 3.25*n instructions instead of
 *	about 7*n instructions.  I picked x == 8, mostly by guess.
 *	x == 16 doesn't seem to change the timings at all, and x == 4
 *	seems to hurt performance slightly on long buffers.
 *
 *	(3) Change divides to shifts.  This makes a real difference for
 *	the Sun, since it lacks a long-word divide instruction.
 *
 * Results:
 *   Vax:
 *	[ratios are about the same on 11/780 and 11/750]
 *	For short buffers (32 bytes), this routine is about 1.7 times
 *	faster.  For long buffers (512 bytes and up), this routine
 *	is about 2.5 times faster.
 *   Sun:
 *	Short buffers: 2.8 times faster; long buffers: 3.1 times faster.
 *
 */

#define	UR_BITS	3		/* lg(# of times loop unrolled) */
#define	UR_TIMES (1<<UR_BITS)	/* # of times loop unrolled */
#define	UR_MASK	(UR_TIMES - 1)

swab(pf, pt, n)
register char *pf, *pt;
register int n;
{
	register unsigned long temp;
	
	n >>= 1;	/* n /= 2 */
	n++;
	while ((--n)&UR_MASK) {	/* until remainder is multiple of UR_TIMES */
		temp = *pf++;
		*pt++ = *pf++;
		*pt++ = temp;
	}
	
	n >>= UR_BITS;	/* n /= UR_TIMES, # of times for unrolled loop */
	while (--n >= 0) {

#define	STEP	temp = *pf++;*pt++ = *pf++;*pt++ = temp;

#if	UR_BITS == 0
		STEP
#endif	UR_BITS == 0

#if	UR_BITS == 1
		STEP
		STEP
#endif	UR_BITS == 1

#if	UR_BITS == 2
		STEP
		STEP
		STEP
		STEP
#endif	UR_BITS == 2

#if	UR_BITS == 3
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
#endif	UR_BITS == 3

#if	UR_BITS == 4
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
		STEP
#endif	UR_BITS == 4

	}
}

