|
new file 100644
|
|
|
library(survey)
|
|
|
options(na.action="na.pass")
|
|
|
|
|
|
# from survey package, R/surveyrep.R
|
|
|
# slightly patched to use numeric NAs in matrices and avoid over-decorating result
|
|
|
svymean <- function(x,design, na.rm=FALSE, rho=NULL,
|
|
|
return.replicates=FALSE,deff=FALSE,...)
|
|
|
{
|
|
|
# if (!exists(".Generic",inherits=FALSE))
|
|
|
# .Deprecated("svymean")
|
|
|
if (!inherits(design,"svyrep.design")) stop("design is not a replicate survey design")
|
|
|
|
|
|
if (inherits(x,"formula")){
|
|
|
## do the right thing with factors
|
|
|
mf<-model.frame(x,design$variables,na.action=na.pass)
|
|
|
xx<-lapply(attr(terms(x),"variables")[-1],
|
|
|
function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
|
|
|
cols<-sapply(xx,NCOL)
|
|
|
x<-matrix(data=as.numeric(NA), nrow=NROW(xx[[1]]),ncol=sum(cols))
|
|
|
scols<-c(0,cumsum(cols))
|
|
|
for(i in 1:length(xx)){
|
|
|
x[,scols[i]+1:cols[i]]<-xx[[i]]
|
|
|
}
|
|
|
colnames(x)<-do.call("c",lapply(xx,colnames))
|
|
|
} else {
|
|
|
if(typeof(x) %in% c("expression","symbol"))
|
|
|
x<-eval(x, design$variables)
|
|
|
else {
|
|
|
if(is.data.frame(x) && any(sapply(x,is.factor))){
|
|
|
xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
|
|
|
cols<-sapply(xx,NCOL)
|
|
|
scols<-c(0,cumsum(cols))
|
|
|
cn<-character(sum(cols))
|
|
|
for(i in 1:length(xx))
|
|
|
cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
|
|
|
x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
|
|
|
for(i in 1:length(xx)){
|
|
|
x[,scols[i]+1:cols[i]]<-xx[[i]]
|
|
|
}
|
|
|
colnames(x)<-cn
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
|
|
|
x<-as.matrix(x)
|
|
|
|
|
|
if (na.rm){
|
|
|
nas<-rowSums(is.na(x))
|
|
|
design<-design[nas==0,]
|
|
|
x<-x[nas==0,,drop=FALSE]
|
|
|
}
|
|
|
|
|
|
wts<-design$repweights
|
|
|
scale<-design$scale
|
|
|
rscales<-design$rscales
|
|
|
if (!is.null(rho)) .NotYetUsed("rho")
|
|
|
|
|
|
if (!design$combined.weights)
|
|
|
pw<-design$pweights
|
|
|
else
|
|
|
pw<-1
|
|
|
|
|
|
rval<-colSums(design$pweights*x)/sum(design$pweights)
|
|
|
|
|
|
if (getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){
|
|
|
v<-matrix(0,length(rval),length(rval))
|
|
|
repmeans<-NULL
|
|
|
} else {
|
|
|
if (inherits(wts, "repweights_compressed")){
|
|
|
repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts$weights))
|
|
|
for(i in 1:ncol(wts$weights)){
|
|
|
wi<-wts$weights[wts$index,i]
|
|
|
repmeans[i,]<-t(colSums(wi*x*pw)/sum(pw*wi))
|
|
|
}
|
|
|
} else {
|
|
|
repmeans<-matrix(data=as.numeric(NA), ncol=NCOL(x), nrow=ncol(wts))
|
|
|
for(i in 1:ncol(wts)){
|
|
|
repmeans[i,]<-t(colSums(wts[,i]*x*pw)/sum(pw*wts[,i]))
|
|
|
}
|
|
|
}
|
|
|
repmeans<-drop(repmeans)
|
|
|
v <- svrVar(repmeans, scale, rscales,mse=design$mse, coef=rval)
|
|
|
}
|
|
|
# this is easy to fix, but for now...
|
|
|
return(v)
|
|
|
attr(rval,"var") <-v
|
|
|
attr(rval, "statistic")<-"mean"
|
|
|
if (return.replicates){
|
|
|
attr(repmeans,"scale")<-design$scale
|
|
|
attr(repmeans,"rscales")<-design$rscales
|
|
|
attr(repmeans,"mse")<-design$mse
|
|
|
rval<-list(mean=rval, replicates=repmeans)
|
|
|
}
|
|
|
if (is.character(deff) || deff){
|
|
|
nobs<-length(design$pweights)
|
|
|
npop<-sum(design$pweights)
|
|
|
vsrs<-unclass(svyvar(x,design,na.rm=na.rm, return.replicates=FALSE,estimate.only=TRUE))/length(design$pweights)
|
|
|
if (deff!="replace")
|
|
|
vsrs<-vsrs*(npop-nobs)/npop
|
|
|
attr(rval,"deff") <- v/vsrs
|
|
|
}
|
|
|
class(rval)<-"svrepstat"
|
|
|
rval
|
|
|
}
|
|
|
|
|
|
source("harness.R")
|
|
|
for (s in c("alabama", "california", "acs3yr" )) {
|
|
|
svydata <- readRDS(paste0(s,".rds"))
|
|
|
names(svydata) <- tolower(names(svydata))
|
|
|
print("loaded")
|
|
|
svydsgn <- svrepdesign(
|
|
|
weight = ~pwgtp ,
|
|
|
repweights = 'pwgtp[1-9]' ,
|
|
|
scale = 4 / 80 ,
|
|
|
rscales = rep( 1 , 80 ) ,
|
|
|
mse = TRUE ,
|
|
|
data = svydata)
|
|
|
print("initialized")
|
|
|
for (r in 1:5) {
|
|
|
timing <- system.time({
|
|
|
agep <- svymean(~agep, svydsgn, se=TRUE)
|
|
|
relp <- svymean(~relp, svydsgn, se=TRUE)
|
|
|
print(agep)
|
|
|
print(relp)
|
|
|
})[[3]]
|
|
|
|
|
|
log.result("survey", sys, conf, s, r, timing)
|
|
|
clearResultRecycler()
|
|
|
}
|
|
|
}
|
|
|
|