STAT 526 TERM 2 1998-99 TUES FEB 9, 1999 CV-GCV-PLUG-IN-TRUTH in LOCPOLY - LONG VERSION # Create 100 data sets set.seed(1) nsim <- 100 n <- length(x) Y <- matrix(.3*rnorm(n*nsim),n,nsim) + truem ## USE PLUG-IN Yhat.dpill <- matrix(0,n,nsim) hopt.dpill <- rep(0,nsim) for(isim in 1:nsim){ hopt.dpill[isim] <- dpill(x,Y[,isim]) Yhat.dpill[,isim] <- locpoly(x, Y[,isim], range.x=c(x1,x2), gridsize=n,bandwidth = h)$y } ## USE CV/GCV ## CALCULATE THE CV AND GCV FUNCTIONS FOR EACH h in a grid: hvec Yhat.CV <- matrix(0,n,nsim) Yhat <- matrix(0,n,nsim) hvec <- seq(.01,.06,.001) nh <- length(hvec) CV <- matrix(0,nsim,nh) GCV <- matrix(0,nsim,nh) TRUE.SSE <- matrix(0,nsim,nh) diag.hatmat <- matrix(0,n,nh) for(ih in 1:nh){ print(ih) junk <- hatmatrix.locpoly(x,hvec[ih]) diag.hatmat <- diag(junk[[1]]) trace.hatmat <- junk[[2]] for(isim in 1:nsim){ Yhat[,isim] <- locpoly(x, Y[,isim], range.x=c(x1,x2), gridsize=n,bandwidth = hvec[ih])$y CV[isim,ih] <- mean( ( (Yhat[,isim]-Y[,isim])/(1-diag.hatmat))^2) GCV[isim,ih] <- sum( (Yhat[,isim]-Y[,isim])^2)/(n-trace.hatmat)^2 TRUE.SSE[isim,ih]<- mean( (Yhat[,isim]-truem)^2 ) } } par(mfrow=c(2,2)) matplot(hvec,t(CV),type="l") title("CV for Locpoly Data") matplot(hvec,t(GCV),type="l") title("GCV for Locpoly Data") matplot(hvec,t(TRUE.SSE),type="l") title("TRUE SSE for Locpoly Data") ### FIND CV, GCV, and TRUE.SSE h's index <- apply(CV,1,order) h.index <- index[1,] hopt.CV <- hvec[h.index] index <- apply(GCV,1,order) h.index <- index[1,] hopt.GCV <- hvec[h.index] index <- apply(TRUE.SSE,1,order) h.index <- index[1,] hopt.SSE<- hvec[h.index] ########################## SUMMARIES: COMPARE THE METHODS: par(mfrow=c(2,2)) hist(hopt.dpill) hist(hopt.CV) hist(hopt.GCV) hist(hopt.SSE) hopt.matrix <- cbind(hopt.dpill,hopt.CV,hopt.GCV,hopt.SSE) h.diff <- hopt.matrix-hopt.SSE apply(h.diff,2,mean) hopt.dpill hopt.CV hopt.GCV hopt.SSE 0.001743698 -0.00112 -0.00259 0 sqrt(apply(h.diff,2,var)) hopt.dpill hopt.CV hopt.GCV hopt.SSE 0.006001538 0.01079495 0.01019615 0 apply(h.diff,2,median) hopt.dpill hopt.CV hopt.GCV hopt.SSE 0.001686056 -0.001 -0.002 0 pairs(hopt.matrix) cor(hopt.matrix) hopt.dpill hopt.CV hopt.GCV hopt.SSE hopt.dpill 1.0000001 0.3820252 0.4941065 -0.2030985 hopt.CV 0.3820252 1.0000001 0.8816916 -0.1584396 hopt.GCV 0.4941065 0.8816916 1.0000000 -0.1674796 hopt.SSE -0.2030985 -0.1584396 -0.1674796 1.0000000 ############################ LOOK AT HOW WELL THINGS FIT: SSE Yhat.CV <- matrix(0,n,nsim) Yhat.GCV <- matrix(0,n,nsim) Yhat.SSE <- matrix(0,n,nsim) for(isim in 1:nsim){ Yhat.CV[,isim] <- locpoly(x, Y[,isim], range.x=c(x1,x2), gridsize=n,bandwidth = hopt.CV[isim])$y Yhat.GCV[,isim] <- locpoly(x, Y[,isim], range.x=c(x1,x2), gridsize=n,bandwidth = hopt.GCV[isim])$y Yhat.SSE[,isim] <- locpoly(x, Y[,isim], range.x=c(x1,x2), gridsize=n,bandwidth = hopt.SSE[isim])$y } SSE.dpill <- apply( (Yhat.dpill -truem)^2,2,mean) SSE.CV <- apply( (Yhat.CV-truem)^2,2,mean) SSE.GCV <- apply( (Yhat.GCV-truem)^2,2,mean) SSE.SSE <- apply( (Yhat.SSE-truem)^2,2,mean) hist(SSE.dpill) hist(SSE.CV) hist(SSE.GCV) hist(SSE.SSE) SSE.matrix <- cbind(SSE.dpill,SSE.CV,SSE.GCV,SSE.SSE) pairs(SSE.matrix) ########### MY HATMATRIX.LOCPOLY FUNCTION ############# hatmatrix.locpoly <-0 function(x, h){ # input: x the vector of independent variable values # h the bandwidth to be used # output: hatmat in [[1]] the hatmatrix using locpoly ### COMPARE CV/GCV/PLUG-IN Plug-in calculated via dpill # trace of hatmatrix in [[2]] # n <- length(x) hatmat <- matrix(0, n, n) x1 <- min(x) x2 <- max(x) for(i in 1:n) { y <- rep(0, n) y[i] <- 1 # The following line can be changed to have a function that produces # the hat matrix for other smoothing methods hatmat[, i] <- locpoly(x, y, range.x=c(x1,x2), gridsize=n,bandwidth = h)$y } tr <- sum(diag(hatmat)) return(hatmat, tr) }