/* bootstrap.c * written and copyright by Garf/ff123 * * peforms bootstrap analysis * *********************************************************************/ #include #include #include #include #include #define MAXTREAT 8 #define MAXSAMPLE 50 #define MAXCHAR 255 #define TRUE 1 #define FALSE 0 #define DELTA 1E-8 #define RESAMPLES 25000 #define SIMULS 25000 float or_data[MAXTREAT][MAXSAMPLE]; float rk_data[MAXTREAT][MAXSAMPLE]; float rs_data[MAXTREAT][MAXSAMPLE]; float mu_data[MAXTREAT][MAXSAMPLE]; char labels[MAXTREAT][MAXCHAR]; float or_ranksums[MAXTREAT]; float rs_ranksums[MAXTREAT]; float mu_ranksums[MAXTREAT]; #define COMPAR ((MAXTREAT*(MAXTREAT-1))/2) float or_rks_diff[COMPAR]; float rs_rks_diff[COMPAR]; float mu_rks_diff[COMPAR]; float rs_hits[COMPAR]; float mu_hits[COMPAR]; int alpha_hits[COMPAR]; float alphas[COMPAR]; int numtreat; int numsample; char *infile; typedef struct { float data[MAXTREAT]; /* data to be sorted */ int num[MAXTREAT]; /* numbering of data */ } samples_t; /* Random number generator stuff */ #define N (624) #define M (397) #define K (0x9908B0DFU) #define hiBit(u) ((u) & 0x80000000U) #define loBit(u) ((u) & 0x00000001U) #define loBits(u) ((u) & 0x7FFFFFFFU) #define mixBits(u, v) (hiBit(u)|loBits(v)) static unsigned long state[N+1]; static unsigned long *next; int left = -1; /* Mersenne Twister */ void seedMT(unsigned long seed) { register unsigned long x = (seed | 1U) & 0xFFFFFFFFU, *s = state; register int j; for(left=0, *s++=x, j=N; --j; *s++ = (x*=69069U) & 0xFFFFFFFFU); } unsigned long reloadMT(void) { register unsigned long *p0=state, *p2=state+2, *pM=state+M, s0, s1; register int j; if(left < -1) seedMT(4357U); left=N-1, next=state+1; for(s0=state[0], s1=state[1], j=N-M+1; --j; s0=s1, s1=*p2++) *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); for(pM=state, j=M; --j; s0=s1, s1=*p2++) *p0++ = *pM++ ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); s1=state[0], *p0 = *pM ^ (mixBits(s0, s1) >> 1) ^ (loBit(s1) ? K : 0U); s1 ^= (s1 >> 11); s1 ^= (s1 << 7) & 0x9D2C5680U; s1 ^= (s1 << 15) & 0xEFC60000U; return(s1 ^ (s1 >> 18)); } unsigned long randomMT(void) { unsigned long y; if(--left < 0) return(reloadMT()); y = *next++; y ^= (y >> 11); y ^= (y << 7) & 0x9D2C5680U; y ^= (y << 15) & 0xEFC60000U; return(y ^ (y >> 18)); } /**********************************************************************/ /* Take data from rawdata and returns the ranking data. Also returns * t, the array of tied groups. */ int rank_data(float rawdata[MAXTREAT][MAXSAMPLE], float rankdata[MAXTREAT][MAXSAMPLE]) { int sample; int i; /* index */ int j; /* index */ int k; /* index */ float sum; /* sum of rankvalues */ struct { int position; float rawvalue; float rankvalue; int t; } sorted[MAXTREAT], temp; for (sample = 0; sample < numsample; sample++) { /* initialize */ for (i = 0; i < numtreat; i++) { sorted[i].position = i; sorted[i].rawvalue = rawdata[i][sample]; sorted[i].rankvalue = 0; sorted[i].t = 1; } /* first sort from min to max */ for (i = 0; i < numtreat; i++) { for (j = i+1; j < numtreat; j++) { if (sorted[j].rawvalue < sorted[i].rawvalue - DELTA) { temp.position = sorted[i].position; temp.rawvalue = sorted[i].rawvalue; sorted[i].position = sorted[j].position; sorted[i].rawvalue = sorted[j].rawvalue; sorted[j].position = temp.position; sorted[j].rawvalue = temp.rawvalue; } } } /* now assign rankvalues */ for (i = 0; i < numtreat; i++) sorted[i].rankvalue = i + 1; i = 0; while (i < numtreat - 1) { sum = sorted[i].rankvalue; j = i + 1; while (fabs(sorted[j].rawvalue - sorted[i].rawvalue) < DELTA) { sum += sorted[j].rankvalue; sorted[i].t++; sorted[j].t--; j++; } for (k = i; k < j; k++) sorted[k].rankvalue = sum / sorted[i].t; i = j; } /* unsort data and pack into rankdata[] */ for (i = 0; i < numtreat; i++) { rankdata[sorted[i].position][sample] = sorted[i].rankvalue; } } return; } void rank_sums(float data[MAXTREAT][MAXSAMPLE], float ranksums[MAXTREAT]) { int i, j; float rs; for (i = 0; i < numtreat; i++) { rs = 0.0f; for (j = 0; j < numsample; j++) { rs += data[i][j]; } // We use means on raw data for now // ranksums[i] = rs; ranksums[i] = rs/numsample; } } /*********************************************************************/ /* sorts data in an array from max to min */ void sort(samples_t *sortedp, int n) { int i; /* index */ int j; /* index */ struct { float data; /* data to be sorted */ int num; /* numbering of data */ } temp; for (i=0; idata[j] > sortedp->data[i] + DELTA) { temp.data = sortedp->data[i]; temp.num = sortedp->num[i]; sortedp->data[i] = sortedp->data[j]; sortedp->num[i] = sortedp->num[j]; sortedp->data[j] = temp.data; sortedp->num[j] = temp.num; } } } } /**********************************************************************/ /* construct ranksum matrix */ void construct_rs_matrix(float ranksums[MAXTREAT], float rks_diff[COMPAR], int output) { samples_t sorted; /* data to be sorted */ samples_t *sortedp; /* pointer to sorted data */ int i; /* index */ int j; /* index */ char buf[MAXCHAR]; /* string buffer */ char buf2[MAXCHAR]; /* string buffer */ float lsdmatrix[MAXTREAT][MAXTREAT]; int is_better; /* flag */ int comp; for (i = 0; i < numtreat-1; i++) for (j = i + 1; j < numtreat; j++) lsdmatrix[i][j] = ranksums[i] - ranksums[j]; if (output) { printf("\n"); printf("%-9s", ""); for (i = 1; i < numtreat; i++) { sprintf(buf, labels[i]); buf[8] = '\0'; printf("%-9s", buf); } printf("\n"); } comp = 0; for (i = 0; i < numtreat-1; i++) { if (output) { sprintf(buf, labels[i]); buf[8] = '\0'; printf("%-9s", buf); } for (j = 1; j < numtreat; j++) { if (j <= i) { if (output) printf("%-9s", ""); } else { rks_diff[comp] = lsdmatrix[i][j]; comp++; if (output) printf("%6.2f ", lsdmatrix[i][j]); } } if (output) printf("\n"); } if (output) { /* now print out all scores */ printf("\n"); for (i = 0; i < numtreat; i++) { sprintf(buf, labels[i]); buf[8] = '\0'; printf("%-9s", buf); } printf("\n"); for (i = 0; i < numtreat; i++) printf("%6.2f ", ranksums[i]); printf("\n\n"); } } void print_rank_sums(float rks[MAXTREAT]) { int i; /* now print out all scores */ printf("\n"); for (i = 0; i < numtreat; i++) { printf("%-9s", labels[i]); } printf("\n"); for (i = 0; i < numtreat; i++) printf("%6.2f ", rks[i]); printf("\n\n"); } /**********************************************************************/ /* Reads the input file. Format is as described at the top of this * file. Lines starting with percent (%) or which are blank are * skipped over. Tokens are separated by commas, spaces, tabs, line * returns, and newlines. */ void read_input(void) { char line[MAXCHAR]; /* string buffer */ char *p; /* generic pointer */ char buf[MAXCHAR]; /* string buffer */ int labels_parsed = FALSE; /* flag for labels parsed */ int count; /* count of data */ int linenum = 0; /* line number */ int i; /* index */ FILE *ifp_s; ifp_s = fopen(infile, "r"); if (ifp_s == NULL) { fprintf(stderr, "Can't open input file %s\n", infile); exit(1); } else { printf("Input file : %s\n", infile); } numtreat = 0; numsample = 0; while (fgets(line, MAXCHAR, ifp_s)) { p = strtok(line, ", \t\r\n"); linenum++; /* ignore comments and blank lines */ if (p) { strncpy(buf, p, 1); buf[1] = '\0'; if (buf[0] == '%') continue; } else continue; if (labels_parsed == FALSE) { strcpy(labels[0], p); numtreat++; while (p = strtok(NULL, ", \t\r\n")) { strcpy(labels[numtreat], p); numtreat++; } if (numtreat > MAXSAMPLE) { fprintf(stderr, "Max treatements allowed: %d\n", MAXTREAT); exit(1); } labels_parsed = TRUE; continue; } /* parse data */ count = 0; strcpy(buf, p); or_data[count][numsample] = atof(buf); count++; while (p = strtok(NULL, ", \t\r\n")) { strcpy(buf, p); or_data[count][numsample] = atof(buf); count++; } numsample++; } printf("Read %d treatments, %d samples\n", numtreat, numsample); return; } void resample_data(float ordata[MAXTREAT][MAXSAMPLE], float rsdata[MAXTREAT][MAXSAMPLE]) { int i, j, k, l; int picksample; int picktreat; int picked[MAXTREAT]; int picks[MAXTREAT]; for (i = 0; i < numsample; i++) { /*picksample = (int)((numsample)*(randomMT()/(RAND_MAX+1.0)));*/ picksample = i; memset(picked, 0, sizeof(picked)); for (j = 0; j < numtreat; j++) { if (j == 0) { picktreat = (int)((numtreat)*(randomMT()/(ULONG_MAX+1.0))); } else { /* gather picks */ for (k = 0, l = 0; k < numtreat; k++) { if (!picked[k]) { picks[l] = k; l++; } } if (l == 1) { picktreat = picks[0]; } else { picktreat = picks[(int)((l)*(randomMT()/(ULONG_MAX+1.0)))]; } } picked[picktreat] = 1; rsdata[j][i] = ordata[picktreat][picksample]; } } } void analyze_diff(float or_diff[COMPAR], float rs_diff[COMPAR], float rs_hits[COMPAR]) { int i; for (i = 0; i < ((numtreat*(numtreat-1))/2); i++) { if (or_diff[i] > 0) { if (rs_diff[i] >= or_diff[i]) rs_hits[i]++; } else { if (rs_diff[i] <= or_diff[i]) rs_hits[i]++; } } }; int main(int argc, char *argv[]) { int rsc, rsc2; int comp, i, j, k; float sig, nsig; int ah; float minsig; infile = argv[1]; seedMT(4321); memset(rs_hits, 0, sizeof(rs_hits)); read_input(); // rank_data(or_data, rk_data); rank_sums(or_data, or_ranksums); construct_rs_matrix(or_ranksums, or_rks_diff, TRUE); printf("Resampling"); rsc = 0; for (rsc = 0; rsc < RESAMPLES; rsc++) { if ((rsc % 1000) == 0) { putchar('.'); fflush(stdout); }; resample_data(or_data, rs_data); //rank_data(rs_data, rk_data); rank_sums(rs_data, rs_ranksums); construct_rs_matrix(rs_ranksums, rs_rks_diff, FALSE); analyze_diff(or_rks_diff, rs_rks_diff, rs_hits); } printf("\n\n"); comp = 0; for (i = 0; i < numtreat-1; i++) { for (j = i + 1; j < numtreat; j++) { sig = (float)(rs_hits[comp]) / (float)(RESAMPLES); alphas[comp] = sig; comp++; if (sig < 0.05f+DELTA) { if (or_ranksums[i] > or_ranksums[j]) { printf("%s is better than %s (%2.5f)\n", labels[i], labels[j], sig); } else { printf("%s is worse than %s (%2.5f)\n", labels[i], labels[j], sig); } } } } printf("\n"); printf("Simultaneous corrections"); rsc = 0; memset(alpha_hits, 0, sizeof(alpha_hits)); //alpha_hits = 0; for (rsc = 0; rsc < SIMULS; rsc++) { putchar('+'); memset(mu_hits, 0, sizeof(mu_hits)); resample_data(or_data, rs_data); // rank_data(rs_data, rk_data); rank_sums(rs_data, rs_ranksums); construct_rs_matrix(rs_ranksums, rs_rks_diff, FALSE); rsc2 = 0; for (rsc2 = 0; rsc2 < RESAMPLES; rsc2++) { if ((rsc2 % 1000) == 0) { putchar('.'); fflush(stdout); }; resample_data(rs_data, mu_data); //rank_data(mu_data, rk_data); rank_sums(mu_data, mu_ranksums); construct_rs_matrix(mu_ranksums, mu_rks_diff, FALSE); analyze_diff(rs_rks_diff, mu_rks_diff, mu_hits); } comp = 0; ah = 0; minsig = 5000.0f; for (i = 0; i < numtreat-1; i++) { for (j = i + 1; j < numtreat; j++) { sig = (float)(mu_hits[comp]) / (float)(RESAMPLES); if (sig < minsig) { minsig = sig; } comp++; } } for (k = 0; k < (numtreat*(numtreat-1))/2; k++) { if (minsig < alphas[k]) { alpha_hits[k]++; } } } printf("\n"); comp = 0; for (i = 0; i < numtreat-1; i++) { for (j = i + 1; j < numtreat; j++) { sig = (float)(rs_hits[comp])/(float)(RESAMPLES); nsig = (float)(alpha_hits[comp])/(float)(SIMULS); comp++; if (sig < 0.05f) { if (or_ranksums[i] > or_ranksums[j]) { printf("%s is better than %s (%2.5f vs %2.5f)\n", labels[i], labels[j], sig, nsig); } else { printf("%s is worse than %s (%2.5f vs %2.5f)\n", labels[i], labels[j], sig, nsig); } } } } return 0; }