TangoCDE.index<-function(case,cont,s,Nrep){ # # ----------------------------------------------------------------- # An example of execution # # Nrep<-999 # s<- (3:15) # case <-scan("d:/demo/LKcase.geo",list(x=0,y=0)) # cont <-scan("d:/demo/LKcont.geo",list(x=0,y=0)) # out<-TangoCDE.index(case, cont, s, Nrep) # # --------------------------------------------------------------------- # ARGUEMENTS # case$x : a vector of x-coordinates of case # case$y : a vector of y-coordinates of case # cont$x : a vector of x-coordinates of control # cont$y : a vector of y-coordinates of control # s : a vector of values of cluster size theta for the analysis # Nrep : The number of Monte Carlo replications, e.g., 999, 9999 # # VALUES # Pmin : test statistic # p.value : Monte Calro simulated (adjusted) p-value # theta.min : value of theta that attains the minimum Pmin #----------------------------------------------------------------------- nloop<-Nrep svec<-s ns<-length(svec) par(mfrow=c(1,1)) start<-date() nr1<-length(case$x) nr2<-length(cont$x) nr<- nr1+nr2 q1<- c(rep(1,nr1), rep(0,nr2)) qdat<-q1 x<- c(case$x, cont$x) y<- c(case$y, cont$y) ncas<-nr1 dc<-matrix(0,nr,nr) frq<-matrix(0,nloop,nr) pval<-matrix(0,nloop,ns) pxx<-rep(0,nloop) pcc<-pxx ptan<- rep(0, ns) jjj<-rep(0, ns) for (i in 1:nr){ for (j in i:nr){ dij <- sqrt( (x[i]-x[j])**2 + (y[i]-y[j])**2 ) dc[i,j]<-dij dc[j,i]<-dij } } # # Calculation of (1) null distribution of test statistic Pmin # for (ijk in 1:nloop) { qq<-rep(0,nr) ind<-sample(nr, ncas) for (i in 1:ncas){ qq[ind[i]]<-1 } frq[ijk,]<-qq } ori<- qdat # #symbols(geo$x,geo$y,circles=p,inch=0.50) # Figure 1 #text(geo$x,geo$y,regid,adj= -0.5, col=8) # onev<-rep(1,nr) XN0<- ncas XN<- nr p1=XN0*(XN0-1.0)/XN/(XN-1.0) p2=p1*(XN0-2.0)/(XN-2.0) p3=p2*(XN0-3.0)/(XN-3.0) p4=p3*(XN0-4.0)/(XN-4.0) p5=p4*(XN0-5.0)/(XN-5.0) for (k in 1:ns){ rad<- s[k] cbb<- -4 * (dc/rad)^2 ac<-ifelse( cbb <= -4, 0, exp(cbb) ) for (i in 1:nr){ ac[i,i]<-0 } am <- ac %*% onev am2<- am %*% t(am) am3<- ac %*% ac g1<- sum( ac^2 ) g2<- sum( ac^3 ) g3<- sum( ac^2 %*% am ) g4<- sum( ac %*% am^2 ) g5<- sum( ac * am2 ) g6<- sum( ac * am3 ) g7<- sum( am^2 %*% t(am) ) g8<- sum( ac %*% am ) s0<- sum( ac ) s1<- g1*2.0 s2<- 4.0*(g8-g1) s3<- s0^2 -s1 -s2 w1<- g2*4.0 w2<- 24.0*(g3-g2) + 8.0*g6 w3<- 6.0*g1*s0 + 52.0*g2 - 96.0*g3 + 8.0*g4 +24.0*g5 - 24.0*g6 w4<- 12.0*( -s0*g1 -4.0*g2 +10.0*g3 - g4 - 4.0*g5 + 2.0*g6 + g7) w5<- s0^3 - w1-w2-w3-w4 et<- s0 * p1 var<-s1*p1 + s2*p2 + s3*p3 - et*et # h6<- w1*(p1-p5) + w2*(p2-p5) + w3*(p3-p5) + w4*(p4-p5) + s0^3 * p5- 3*et*var -et^3 h6<- p1*w1+p2*w2+p3*w3+p4*w4+p5*w5 - 3*et*var -et^3 skew<- h6/ (var)^1.5 df<- 8.0/skew/skew gt0<- qdat%*%ac%*%qdat stat0<-(gt0-et)/sqrt(var) aprox0<-df+sqrt(2*df)*stat0 ptan[k]<- 1-pchisq(aprox0, df) # for (ijk in 1:nloop){ q<-frq[ijk,] gt<- q%*%ac%*%q stat<-(gt-et)/sqrt(var) aprox<-df+sqrt(2*df)*stat pval[ijk,k]<- 1-pchisq(aprox, df) } } for (ijk in 1:nloop){ pcc[ijk]<-min( pval[ijk,] ) } pxx<- sort( pcc ) # Figure 3 p22<-min( ptan ) at<-c(pxx,p22) pres<-length(at[at<=p22])/(length(pxx)+1) for (i in 1:ns){ if ( abs(p22 - ptan[i] )< 0.00001 ){pind<-i} } #pind<-jjj[ptan==min(ptan)] plot(svec,ptan,pch=1,type="b",xlab="cluster size ( theta values ) ", ylab=" Unadjusted p-values") abline(v=svec[pind], col=4) text((max(svec)+2*min(svec))/3,(3*max(ptan)+min(ptan))/4, "Adjusted p-value = ",col=4, cex=1.4) text((1.5*max(svec)+min(svec))/2.5,(3*max(ptan)+min(ptan))/4, pres,col=4, cex=1.4) list(Pmin=ptan, p.value=pres, theta.min=svec[pind]) }