CI.func <- function(freq, K, alpha, if_n, if_p){ #------------------------------------------------------------------------------------ # Examples of execution # # ldata <- c(14, 2, 1, 0, # 0, 0, 0, 0, # 0, 0, 2, 0, # 0, 0, 2, 0) # out <- CI.func(ldata, 3, 0.05, 1, 0.1) # #> out #[[1]] #[[1]]$Score.lower.CI # [,1] #[1,] -0.1412687 # #[[1]]$Score.upper.CI # [,1] #[1,] 0.1814759 # #[[1]]$Wald.null.lower.CI # [,1] #[1,] -0.05624786 # #[[1]]$Wald.null.upper.CI # [,1] #[1,] 0.1197399 # #[[1]]$Wald.alternative.lower.CI # [,1] #[1,] -0.055194 # #[[1]]$Wald.alternative.upper.CI # [,1] #[1,] 0.1186861 # (R version 2.13.1) # # ldata <- c(9, 1, 0, # 3, 0, 0, # 0, 1, 1) # out2 <- CI.func(ldata, 2, 0.05, 1, 0) # # Results of the out2 are omitted here. #------------------------------------------------------------------------------------ # PURPOSE # The score-based confidence interval and the two Wald-type confidence intervals # (under the null and the alternative hypothesis) for the difference in correlated # proportions on the basis of the results from multiple raters are calculated # using this function. # # ARUGUMENTS # freq : a vector of elements in the (K+1) times (K+1) contingency table # K : a value of the number of raters # alpha : a value of the level of two-sided confidence interval # if_n : a value to adjust the initial value for calculation of the score-based # confidence limit in negative area # (Basically, We recommend to set a value of 1. If a calculation is not # complete, you should increase a value little by little.) # if_p : a value to adjust the initial value for calculation of the score-based # confidence limit in positive area # (Basically, We recommend to set a value of 0. If a calculation is not # complete, you should increase a value little by little.) # # OUTPUT # Score.lower.CI : the score-based 1-alpha lower confidence limit # Score.upper.CI : the score-based 1-alpha upper confidence limit # Wald.null.lower.CI : the Wald-type 1-alpha lower confidence limit # (under the null hypothesis) # Wald.null.upper.CI : the Wald-type 1-alpha upper confidence limit # (under the null hypothesis) # Wald.alternative.lower.CI : the Wald-type 1-alpha lower confidence limit # (under the alternative hypothesis) # Wald.alternative.upper.CI : the Wald-type 1-alpha upper confidence limit # (under the alternative hypothesis) # # NOTE # This function is available when the number of rater is two or three. # The generalized function adapted for any number of raters will be available near # future. # # Acknowledgement # The authors thank Dr. Jerry de Groot for him helpful comment to the R code. #------------------------------------------------------------------------------------ #### K=2 CI.func2 <- function(freq, K, alpha){ #### Making a dataset n <- sum(freq) new <- c(2,2,2,1,1,1,0,0,0) std <- c(2,1,0,2,1,0,2,1,0) dif <- new-std nx2 <- t(freq)%*%ifelse(dif==K, 1, 0) ny2 <- t(freq)%*%ifelse(dif==(-K), 1, 0) nx1 <- t(freq)%*%ifelse(dif==(K-1), 1, 0) ny1 <- t(freq)%*%ifelse(dif==(-(K-1)), 1, 0) diag <- n-nx2-ny2-nx1-ny1 #### Initial value ipy2 <- (nx2+ny2)/2/n+1e-12 ipx1 <- (nx1+ny1)/2/n+1e-12 ipy1 <- (nx1+ny1)/2/n+1e-12 #### The Wald 95% confidence interval under the alternative hypothesis d <- ((nx2+nx1/2)-(ny2+ny1/2))/n s <- ((nx2+nx1/4)+(ny2+ny1/4))/(n**2)-((nx2/n+nx1/n/2-ny2/n-ny1/n/2)**2)/n l_wald <- d-qnorm(1-alpha/2)*sqrt(s) u_wald <- d+qnorm(1-alpha/2)*sqrt(s) #### The Wald 95% confidence interval under the null hypothesis ss <- ((nx2+nx1/4)+(ny2+ny1/4))/(n**2) l_schou <- d-qnorm(1-alpha/2)*sqrt(ss) u_schou <- d+qnorm(1-alpha/2)*sqrt(ss) #### Estimating the MLEs for calculation of confidence interval stat1 <- function(delta){ psolv <- NULL fr <- function(x) { ## The log-likelihood function py2 <- x[1] px1 <- x[2] py1 <- x[3] nx2*log(py2+0.5*py1-0.5*px1+delta)+ny2*log(py2)+nx1*log(px1)+ny1*log(py1)+(n-nx2-ny2-nx1-ny1)*log(1-2*py2-0.5*px1-1.5*py1-delta) } grr <- function(x) { ## The gradients of log-likelihood function py2 <- x[1] px1 <- x[2] py1 <- x[3] c((nx2/(py2+0.5*py1-0.5*px1+delta)+ny2/py2-2*(n-nx2-ny2-nx1-ny1)/(1-2*py2-0.5*px1-1.5*py1-delta)), ((-1*nx2)/2/(py2+0.5*py1-0.5*px1+delta)+nx1/px1-(n-nx2-ny2-nx1-ny1)/2/(1-2*py2-0.5*px1-1.5*py1-delta)), (nx2/2/(py2+0.5*py1-0.5*px1+delta)+ny1/py1-3*(n-nx2-ny2-nx1-ny1)/2/(1-2*py2-0.5*px1-1.5*py1-delta))) } #### constrOptim function solv <- constrOptim(c(ipy2-if_n*delta, ipx1, ipy1), fr, grr, ui=rbind(c(1, 0, 0), c(0, 1, 0), c(0, 0, 1), c(1, -0.5, 0.5), c(-2, -0.5, -1.5)), ci=c(0, 0, 0, -delta, -1+delta), control=list(fnscale=-1), method='BFGS') psolv <- solv[[1]] py2 <- psolv[1] px1 <- psolv[2] py1 <- psolv[3] #### Normal deviate Based on Difference of Two Responce Rate #### difference d <- ((nx2+nx1/2)-(ny2+ny1/2))/n #### variance v <- (8*py2-px1+3*py1+4*delta-4*delta**2)/4/n #### z-score tsz <- (d-delta)/sqrt(v) } stat2 <- function(delta){ psolv <- NULL fr <- function(x) { #### The log-likelihood function py2 <- x[1] px1 <- x[2] py1 <- x[3] nx2*log(py2+0.5*py1-0.5*px1+delta)+ny2*log(py2)+nx1*log(px1)+ny1*log(py1)+(n-nx2-ny2-nx1-ny1)*log(1-2*py2-0.5*px1-1.5*py1-delta) } grr <- function(x) { #### The gradients of log-likelihood function py2 <- x[1] px1 <- x[2] py1 <- x[3] c((nx2/(py2+0.5*py1-0.5*px1+delta)+ny2/py2-2*(n-nx2-ny2-nx1-ny1)/(1-2*py2-0.5*px1-1.5*py1-delta)), ((-1*nx2)/2/(py2+0.5*py1-0.5*px1+delta)+nx1/px1-(n-nx2-ny2-nx1-ny1)/2/(1-2*py2-0.5*px1-1.5*py1-delta)), (nx2/2/(py2+0.5*py1-0.5*px1+delta)+ny1/py1-3*(n-nx2-ny2-nx1-ny1)/2/(1-2*py2-0.5*px1-1.5*py1-delta))) } solv <- constrOptim(c(ipy2+if_p*delta, ipx1, ipy1), fr, grr, ui=rbind(c(1, 0, 0), c(0, 1, 0), c(0, 0, 1), c(1, -0.5, 0.5), c(-2, -0.5, -1.5)), ci=c(0, 0, 0, -delta, -1+delta), control=list(fnscale=-1), method='BFGS') psolv <- solv[[1]] py2 <- psolv[1] px1 <- psolv[2] py1 <- psolv[3] #### Normal deviate Based on Difference of Two Responce Rate #### difference d <- ((nx2+nx1/2)-(ny2+ny1/2))/n #### variance v <- (8*py2-px1+3*py1+4*delta-4*delta**2)/4/n #### z-score tsz <- (d-delta)/sqrt(v) } #### Function for estimating the score-based lower CI based on the secant method # Output matrix lsecant <- matrix(0, 50, 7) #### Setup initial values for the secant method lrambda1 <- l_wald lrambda2 <- l_wald*0.95 j <- 0 if (lrambda1 >= 0){ z1 <- stat2(lrambda1)-qnorm(1-alpha/2) } else { z1 <- stat1(lrambda1)-qnorm(1-alpha/2) } if (lrambda2 >= 0){ z2 <- stat2(lrambda2)-qnorm(1-alpha/2) } else { z2 <- stat1(lrambda2)-qnorm(1-alpha/2) } while (abs(z2) > 1e-8){ j <- j+1 lrambda3 <- lrambda2-(lrambda2-lrambda1)*z2/(z2-z1) if (lrambda3 >= 0){ z3 <- stat2(lrambda3)-qnorm(1-alpha/2) } else { z3 <- stat1(lrambda3)-qnorm(1-alpha/2) } lsecant[j, 1] <- j lsecant[j, 2] <- lrambda1 lsecant[j, 3] <- lrambda2 lsecant[j, 4] <- lrambda3 lsecant[j, 5] <- z1 lsecant[j, 6] <- z2 lsecant[j, 7] <- z3 lrambda1 <- lrambda2 lrambda2 <- lrambda3 z1 <- z2 z2 <- z3 } lsecant[j, 1] <- j lsecant[j, 2] <- lrambda1 lsecant[j, 3] <- lrambda2 lsecant[j, 4] <- lrambda3 lsecant[j, 5] <- z1 lsecant[j, 6] <- z2 lsecant[j, 7] <- z3 #### Function for estimating the score-based upper CI based on the secant method #### Output matrix usecant <- matrix(0, 50, 7) #### Setup initial values for the secant method urambda1 <- u_wald urambda2 <- u_wald*0.95 j <- 0 if (urambda1 >= 0){ z1 <- stat2(urambda1)+qnorm(1-alpha/2) } else { z1 <- stat1(urambda1)+qnorm(1-alpha/2) } if (urambda2 >= 0){ z2 <- stat2(urambda2)+qnorm(1-alpha/2) } else { z2 <- stat1(urambda2)+qnorm(1-alpha/2) } while (abs(z2) > 1e-8){ j <- j+1 urambda3 <- urambda2-(urambda2-urambda1)*z2/(z2-z1) if (urambda3 >= 0){ z3 <- stat2(urambda3)+qnorm(1-alpha/2) } else { z3 <- stat1(urambda3)+qnorm(1-alpha/2) } usecant[j, 1] <- j usecant[j, 2] <- urambda1 usecant[j, 3] <- urambda2 usecant[j, 4] <- urambda3 usecant[j, 5] <- z1 usecant[j, 6] <- z2 usecant[j, 7] <- z3 urambda1 <- urambda2 urambda2 <- urambda3 z1 <- z2 z2 <- z3 } usecant[j, 1] <- j usecant[j, 2] <- urambda1 usecant[j, 3] <- urambda2 usecant[j, 4] <- urambda3 usecant[j, 5] <- z1 usecant[j, 6] <- z2 usecant[j, 7] <- z3 list(Score.lower.CI=lrambda3, Score.upper.CI=urambda3, Wald.null.lower.CI=l_schou, Wald.null.upper.CI=u_schou, Wald.alternative.lower.CI=l_wald, Wald.alternative.upper.CI=u_wald) } #### K=3 CI.func3 <- function(freq, K, alpha){ #### Making a dataset n <- sum(freq) new <- c(3, 3, 3, 3, 2, 2, 2, 2, 1, 1, 1, 1, 0, 0, 0, 0) std <- c(3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0, 3, 2, 1, 0) dif <- new-std nx3 <- t(freq)%*%ifelse(dif==K, 1, 0) ny3 <- t(freq)%*%ifelse(dif==(-K), 1, 0) nx2 <- t(freq)%*%ifelse(dif==(K-1), 1, 0) ny2 <- t(freq)%*%ifelse(dif==(-(K-1)), 1, 0) nx1 <- t(freq)%*%ifelse(dif==(K-2), 1, 0) ny1 <- t(freq)%*%ifelse(dif==(-(K-2)), 1, 0) diag <- n-nx3-ny3-nx2-ny2-nx1-ny1 #### Initial value ipy3 <- (nx3+ny3)/2/n+1e-12 ipx2 <- (nx2+ny2)/2/n+1e-12 ipy2 <- (nx2+ny2)/2/n+1e-12 ipx1 <- (nx1+ny1)/2/n+1e-12 ipy1 <- (nx1+ny1)/2/n+1e-12 #### The Wald 95% confidence interval under the alternative hypothesis d <- ((nx3+nx2*2/3+nx1/3)-(ny3+ny2*2/3+ny1/3))/n s <- ((nx3+nx2*4/9+nx1/9)+(ny3+ny2*4/9+ny1/9))/(n**2)-((nx3/n+nx2*2/3/n+nx1/3/n-ny3/n-ny2*2/3/n-ny1/3/n)**2)/n l_wald <- d-qnorm(1-alpha/2)*sqrt(s) u_wald <- d+qnorm(1-alpha/2)*sqrt(s) #### The Wald 95% confidence interval under the null hypothesis ss <- ((nx3+nx2*4/9+nx1/9)+(ny3+ny2*4/9+ny1/9))/(n**2) l_schou <- d-qnorm(1-alpha/2)*sqrt(ss) u_schou <- d+qnorm(1-alpha/2)*sqrt(ss) #### Estimating the MLEs for calculation of confidence interval stat1 <- function(delta){ psolv <- NULL fr <- function(x) { #### The log-likelihood function py3 <- x[1] px2 <- x[2] py2 <- x[3] px1 <- x[4] py1 <- x[5] nx3*log(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta)+ny3*log(py3)+nx2*log(px2)+ny2*log(py2)+nx1*log(px1)+ny1*log(py1)+(n-nx3-ny3-nx2-ny2-nx1-ny1)*log(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta) } grr <- function(x) { #### The gradients of log-likelihood function py3 <- x[1] px2 <- x[2] py2 <- x[3] px1 <- x[4] py1 <- x[5] c( (nx3/(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta)+ny3/py3-2*(n-nx3-ny3-nx2-ny2-nx1-ny1)/(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta)), # gradient of py3 (-2*nx3/(3*(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta))+nx2/px2-(n-nx3-ny3-nx2-ny2-nx1-ny1)/(3*(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta))), # gradient of px2 (2*nx3/(3*(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta))+ny2/py2-5*(n-nx3-ny3-nx2-ny2-nx1-ny1)/(3*(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta))), # gradient of py2 (-nx3/(3*(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta))+nx1/px1-2*(n-nx3-ny3-nx2-ny2-nx1-ny1)/(3*(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta))), # gradient of px1 (nx3/(3*(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta))+ny1/py1-4*(n-nx3-ny3-nx2-ny2-nx1-ny1)/(3*(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta))) # gradient of py1 ) } #### constrOptim function solv <- constrOptim(c(ipy3-if_n*delta, ipx2, ipy2, ipx1, ipy1), fr, grr, ui=rbind(c(1, 0, 0, 0, 0), c(0, 1, 0, 0, 0), c(0, 0, 1, 0, 0), c(0, 0, 0, 1, 0), c(0, 0, 0, 0, 1), c(1, -2/3, 2/3, -1/3, 1/3), c(-2, -1/3, -5/3, -2/3, -4/3)), ci=c(0, 0, 0, 0, 0, -delta, -1+delta), control=list(fnscale=-1, reltol=1e-12), method='BFGS') psolv <- solv[[1]] py3 <- psolv[1] px2 <- psolv[2] py2 <- psolv[3] px1 <- psolv[4] py1 <- psolv[5] #### Normal deviate Based on Difference of Two Responce Rate #### difference d <- ((nx3+2/3*nx2+1/3*nx1)-(ny3+2/3*ny2+1/3*ny1))/n #### variance v <- (18*py3+10*py2+4*py1-2*px1-2*px2+9*delta-9*delta**2)/(9*n) #### z-score tsz <- (d-delta)/sqrt(v) } stat2 <- function(delta){ psolv <- NULL fr <- function(x) { #### The log-likelihood function py3 <- x[1] px2 <- x[2] py2 <- x[3] px1 <- x[4] py1 <- x[5] nx3*log(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta)+ny3*log(py3)+nx2*log(px2)+ny2*log(py2)+nx1*log(px1)+ny1*log(py1)+(n-nx3-ny3-nx2-ny2-nx1-ny1)*log(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta) } grr <- function(x) { #### The gradients of the log-likelihood function py3 <- x[1] px2 <- x[2] py2 <- x[3] px1 <- x[4] py1 <- x[5] c( (nx3/(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta)+ny3/py3-2*(n-nx3-ny3-nx2-ny2-nx1-ny1)/(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta)), # gradient of py3 (-2*nx3/(3*(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta))+nx2/px2-(n-nx3-ny3-nx2-ny2-nx1-ny1)/(3*(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta))), # gradient of px2 (2*nx3/(3*(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta))+ny2/py2-5*(n-nx3-ny3-nx2-ny2-nx1-ny1)/(3*(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta))), # gradient of py2 (-nx3/(3*(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta))+nx1/px1-2*(n-nx3-ny3-nx2-ny2-nx1-ny1)/(3*(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta))), # gradient of px1 (nx3/(3*(py3+2/3*py2+1/3*py1-2/3*px2-1/3*px1+delta))+ny1/py1-4*(n-nx3-ny3-nx2-ny2-nx1-ny1)/(3*(1-2*py3-5/3*py2-4/3*py1-1/3*px2-2/3*px1-delta))) # gradient of py1 ) } solv <- constrOptim(c(ipy3+if_p*delta, ipx2, ipy2, ipx1, ipy1), fr, grr, ui=rbind(c(1, 0, 0, 0, 0), c(0, 1, 0, 0, 0), c(0, 0, 1, 0, 0), c(0, 0, 0, 1, 0), c(0, 0, 0, 0, 1), c(1, -2/3, 2/3, -1/3, 1/3), c(-2, -1/3, -5/3, -2/3, -4/3)), ci=c(0, 0, 0, 0, 0, -delta, -1+delta), control=list(fnscale=-1, reltol=1e-12), method='BFGS') psolv <- solv[[1]] py3 <- psolv[1] px2 <- psolv[2] py2 <- psolv[3] px1 <- psolv[4] py1 <- psolv[5] #### Normal deviate Based on Difference of Two Responce Rate #### difference d <- ((nx3+2/3*nx2+1/3*nx1)-(ny3+2/3*ny2+1/3*ny1))/n #### variance v <- (18*py3+10*py2+4*py1-2*px1-2*px2+9*delta-9*delta**2)/(9*n) #### z-score tsz <- (d-delta)/sqrt(v) } #### Function for estimating the score-based lower CI based on the secant method # Output matrix lsecant <- matrix(0, 50, 7) #### Setup initial values for the secant method lrambda1 <- l_wald lrambda2 <- l_wald*0.95 j <- 0 if (lrambda1 >= 0){ z1 <- stat2(lrambda1)-qnorm(1-alpha/2) } else { z1 <- stat1(lrambda1)-qnorm(1-alpha/2) } if (lrambda2 >= 0){ z2 <- stat2(lrambda2)-qnorm(1-alpha/2) } else { z2 <- stat1(lrambda2)-qnorm(1-alpha/2) } while (abs(z2) > 1e-8){ j <- j+1 lrambda3 <- lrambda2-(lrambda2-lrambda1)*z2/(z2-z1) if (lrambda3 >= 0){ z3 <- stat2(lrambda3)-qnorm(1-alpha/2) } else { z3 <- stat1(lrambda3)-qnorm(1-alpha/2) } lsecant[j, 1] <- j lsecant[j, 2] <- lrambda1 lsecant[j, 3] <- lrambda2 lsecant[j, 4] <- lrambda3 lsecant[j, 5] <- z1 lsecant[j, 6] <- z2 lsecant[j, 7] <- z3 lrambda1 <- lrambda2 lrambda2 <- lrambda3 z1 <- z2 z2 <- z3 } lsecant[j, 1] <- j lsecant[j, 2] <- lrambda1 lsecant[j, 3] <- lrambda2 lsecant[j, 4] <- lrambda3 lsecant[j, 5] <- z1 lsecant[j, 6] <- z2 lsecant[j, 7] <- z3 #### Function for estimating the score-based upper CI based on the secant method #### Output matrix usecant <- matrix(0, 50, 7) #### Setup initial values for the secant method urambda1 <- u_wald urambda2 <- u_wald*0.95 j <- 0 if (urambda1 >= 0){ z1 <- stat2(urambda1)+qnorm(1-alpha/2) } else { z1 <- stat1(urambda1)+qnorm(1-alpha/2) } if (urambda2 >= 0){ z2 <- stat2(urambda2)+qnorm(1-alpha/2) } else { z2 <- stat1(urambda2)+qnorm(1-alpha/2) } while (abs(z2) > 1e-8){ j <- j+1 urambda3 <- urambda2-(urambda2-urambda1)*z2/(z2-z1) if (urambda3 >= 0){ z3 <- stat2(urambda3)+qnorm(1-alpha/2) } else { z3 <- stat1(urambda3)+qnorm(1-alpha/2) } usecant[j, 1] <- j usecant[j, 2] <- urambda1 usecant[j, 3] <- urambda2 usecant[j, 4] <- urambda3 usecant[j, 5] <- z1 usecant[j, 6] <- z2 usecant[j, 7] <- z3 urambda1 <- urambda2 urambda2 <- urambda3 z1 <- z2 z2 <- z3 } usecant[j, 1] <- j usecant[j, 2] <- urambda1 usecant[j, 3] <- urambda2 usecant[j, 4] <- urambda3 usecant[j, 5] <- z1 usecant[j, 6] <- z2 usecant[j, 7] <- z3 list(Score.lower.CI=lrambda3, Score.upper.CI=urambda3, Wald.null.lower.CI=l_schou, Wald.null.upper.CI=u_schou, Wald.alternative.lower.CI=l_wald, Wald.alternative.upper.CI=u_wald) } #### Coding for the selection of the functions if (K==2){ res <- CI.func2(freq, K, alpha) }else{ res <- CI.func3(freq, K, alpha) } list(res) }