
/* Copyright 1982 by David Zittin, Biosciences Data Centre. */
#include <stdio.h>
#include <rank.h>
#include "usr.h"			/* usr switches */

typedef struct datanode {
	float data;
	struct datanode *next;
} DNODE;

typedef struct hashtable {
	char *hkey;
	int cnt;
	double ssranks;				/* sum sqranks - T */
	struct datanode *startlist, *endlist;
} TABLE;

		/* a silly number which makes the hash table work better */
#define MAGIC 5


/* tabsiz is size of has table which keeps track of group strings and
	ptr to link list of each group's data. tabsiz should always
	be a prime number */
#define TABSIZ 53
TABLE table[TABSIZ];


srcc()
{
	int cmp();

			/* sort the htable for alphabetical output */
	qsort(table, TABSIZ, sizeof(TABLE), cmp);
	pair_test();	 /* mwu test all combinations of pairs */
}

pair_test()
{
	register int j; 
	char *s1, *s2;
	int i, cnt1, cnt2;
	double calc_ties(), rs();


		/* eat up the leading empty hash table elements */
	for(i=0; i < TABSIZ && !(table[i].startlist); i++)
		;

	printf("SPEARMAN RANK CORRELATION COEF:%s\n", usr.title);

	if (usr.echo)
		printf("\n DATA ECHO:\n");

	for(j=i; j < TABSIZ; j++) {
		if (usr.echo)
			printf("  GROUP ==> %s <==\n\t\trank  obsvd.\n",table[j].hkey);
		changerank(&table[j]);
	}

		/* now do all combinations of twos thru the hash table */
	for(; i < TABSIZ; i++) {
		s1 = table[i].hkey;
		cnt1 = table[i].cnt;
		for(j=i+1; j < TABSIZ; j++) {
			s2 = table[j].hkey;
			cnt2 = table[j].cnt;
			significant(s1, s2, cnt1, rs(&table[i], &table[j]) );
			if (cnt1 != cnt2)
				printf("\n\tWARNING:COUNTS UNEQUAL:%s=%d %s=%d\n\t\texpect flakey stats\n", s1, cnt1, s2, cnt2 );
		}
	}

}

double rs(t1, t2)	/* calc the spearman rank coeff */
TABLE *t1, *t2;
{
	register DNODE *d1, *d2;
	double tmp, diffsq=0.0;
	double sqrt();


	for(d1=t1->startlist,d2=t2->startlist; d1 && d2; d1=d1->next,d2=d2->next){
		tmp = d1->data - d2->data;
		diffsq += tmp * tmp;
	}

	return((t1->ssranks + t2->ssranks - diffsq) / (2.0 * sqrt(t1->ssranks * t2->ssranks)) );
}

		/* change each datum to its rank. calc sum of ties
			and put this value in the htable node */
changerank(tbl)
TABLE *tbl;
{
	register DNODE *dnode;
	RNODE *root=NULL;
	double tmp, ties;
	double get_rank(), calc_ties();

			/* make a rank tree */
	for(dnode=tbl->startlist; dnode; dnode = dnode->next)
		mkranks(&root, dnode->data);

			/* calc sigma T */
	ties = calc_ties(root) / 12.0;
	
	rank(root);

			/* replace each datum with it's rank */
	for(dnode=tbl->startlist; dnode; dnode = dnode->next) {
		tmp = dnode->data;
		dnode->data = get_rank(root, dnode->data);
		if (usr.echo)			/* print datum and its rank */
			printf("\t%12.1f  %g\n", dnode->data, tmp);
	}

	tmp = tbl->cnt;

	tbl->ssranks = ((tmp * tmp * tmp - tmp) / 12.0) - ties;

	myfree();	/* free the rank tree */
}



filltable(fp)
register FILE *fp;
{
	char *alpha, *number;	/* ptrs to alpha & num strs in input */
	char *badeod = "unexpected end of data\n";
	double x;			/* usr datum after sscanf conv */
	int hashdata(), sscanf();

	while (getdata(&alpha,fp) != EOF){
		switch(usr.label){
			case ON:	if(getdata(&number,fp) == EOF)
						err(badeod);
					break;

			case OFF:	number = alpha;
					alpha = usr.group;
					break;

			case AUTO:	number = alpha;
					alpha = usr.fname;
					break;
		} /* switch */
		if (sscanf(number, "%F", &x) != 1)
			err("bad data \"%s %s\"\n",alpha,number);

		hashdata(alpha, x, table, TABSIZ);
	} /* while reading data */
}


#define MAXALLOWED 4 / 5
int hashdata(str, x, table, tabsiz)
char *str;
double x;
TABLE *table;
int tabsiz;
{
	register int hval=0;
	register int displace=1;
	register TABLE *tbl;
	TABLE *endtable=table+tabsiz;
	int strcmp();
	char *c=str;
	char *strsave(), *malloc();
	static char *nospace="hash:nospace";
	static int ntable=0;
	int i=13;

	while(*c) {
		hval += *c++ * i;
		i *= MAGIC;
	}

	if (hval < 0)
		hval = -hval;

	tbl = table + (hval % tabsiz);

	if (tbl < table || tbl >= endtable)
		err("htab botch");


	for(;;){
		if ( !(tbl->hkey) ) {			/* at empty hole */
			if ( ++ntable > tabsiz * MAXALLOWED)
				err("htab full");
			tbl->cnt = 1;
			if (!(tbl->hkey = strsave (str)))
				err(nospace);
			if(!(tbl->startlist=tbl->endlist=(DNODE *)malloc(sizeof(DNODE))))
				err(nospace);
			tbl->startlist->data = x; 
			break;
		}

		if (!(strcmp(tbl->hkey,str))) {		/* found */
			tbl->cnt++;	
			if(!(tbl->endlist->next=(DNODE *)malloc(sizeof(DNODE))))
				err(nospace);
			tbl->endlist = tbl->endlist->next;
			tbl->endlist->data = x;
			return;
		}

		else {				/* collision, quad srch */
			tbl += displace;
			displace += 2;
			if (tbl >= endtable) 
				tbl -= tabsiz;
		}
	}
}

int cmp(arg1, arg2)
TABLE *arg1, *arg2;
{
	register int cmpval;
	int strcmp();

	if (!(cmpval = strcmp(arg1->hkey, arg2->hkey) ))
		return(0);	/* equality */

	return( (cmpval > 0) ? (1) : (-1) );
}
