Files
@ 2aece7dd2719
Branch filter:
Location: DA/raaql-paper-experiments/survey.R
2aece7dd2719
4.0 KiB
text/S-plus
import
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 | library(survey)
options(na.action="na.pass")
# from survey package, R/surveyrep.R
# slightly patched to use numeric NAs in matrices and avoid over-decorating result
svymean <- function(x,design, na.rm=FALSE, rho=NULL,
return.replicates=FALSE,deff=FALSE,...)
{
# if (!exists(".Generic",inherits=FALSE))
# .Deprecated("svymean")
if (!inherits(design,"svyrep.design")) stop("design is not a replicate survey design")
if (inherits(x,"formula")){
## do the right thing with factors
mf<-model.frame(x,design$variables,na.action=na.pass)
xx<-lapply(attr(terms(x),"variables")[-1],
function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
cols<-sapply(xx,NCOL)
x<-matrix(data=as.numeric(NA), nrow=NROW(xx[[1]]),ncol=sum(cols))
scols<-c(0,cumsum(cols))
for(i in 1:length(xx)){
x[,scols[i]+1:cols[i]]<-xx[[i]]
}
colnames(x)<-do.call("c",lapply(xx,colnames))
} else {
if(typeof(x) %in% c("expression","symbol"))
x<-eval(x, design$variables)
else {
if(is.data.frame(x) && any(sapply(x,is.factor))){
xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
cols<-sapply(xx,NCOL)
scols<-c(0,cumsum(cols))
cn<-character(sum(cols))
for(i in 1:length(xx))
cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
for(i in 1:length(xx)){
x[,scols[i]+1:cols[i]]<-xx[[i]]
}
colnames(x)<-cn
}
}
}
x<-as.matrix(x)
if (na.rm){
nas<-rowSums(is.na(x))
design<-design[nas==0,]
x<-x[nas==0,,drop=FALSE]
}
wts<-design$repweights
scale<-design$scale
rscales<-design$rscales
if (!is.null(rho)) .NotYetUsed("rho")
if (!design$combined.weights)
pw<-design$pweights
else
pw<-1
rval<-colSums(design$pweights*x)/sum(design$pweights)
if (getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){
v<-matrix(0,length(rval),length(rval))
repmeans<-NULL
} else {
if (inherits(wts, "repweights_compressed")){
repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts$weights))
for(i in 1:ncol(wts$weights)){
wi<-wts$weights[wts$index,i]
repmeans[i,]<-t(colSums(wi*x*pw)/sum(pw*wi))
}
} else {
repmeans<-matrix(data=as.numeric(NA), ncol=NCOL(x), nrow=ncol(wts))
for(i in 1:ncol(wts)){
repmeans[i,]<-t(colSums(wts[,i]*x*pw)/sum(pw*wts[,i]))
}
}
repmeans<-drop(repmeans)
v <- svrVar(repmeans, scale, rscales,mse=design$mse, coef=rval)
}
# this is easy to fix, but for now...
return(v)
attr(rval,"var") <-v
attr(rval, "statistic")<-"mean"
if (return.replicates){
attr(repmeans,"scale")<-design$scale
attr(repmeans,"rscales")<-design$rscales
attr(repmeans,"mse")<-design$mse
rval<-list(mean=rval, replicates=repmeans)
}
if (is.character(deff) || deff){
nobs<-length(design$pweights)
npop<-sum(design$pweights)
vsrs<-unclass(svyvar(x,design,na.rm=na.rm, return.replicates=FALSE,estimate.only=TRUE))/length(design$pweights)
if (deff!="replace")
vsrs<-vsrs*(npop-nobs)/npop
attr(rval,"deff") <- v/vsrs
}
class(rval)<-"svrepstat"
rval
}
source("harness.R")
for (s in c("alabama", "california", "acs3yr" )) {
svydata <- readRDS(paste0(s,".rds"))
names(svydata) <- tolower(names(svydata))
print("loaded")
svydsgn <- svrepdesign(
weight = ~pwgtp ,
repweights = 'pwgtp[1-9]' ,
scale = 4 / 80 ,
rscales = rep( 1 , 80 ) ,
mse = TRUE ,
data = svydata)
print("initialized")
for (r in 1:5) {
timing <- system.time({
agep <- svymean(~agep, svydsgn, se=TRUE)
relp <- svymean(~relp, svydsgn, se=TRUE)
print(agep)
print(relp)
})[[3]]
log.result("survey", sys, conf, s, r, timing)
clearResultRecycler()
}
}
|