Files @ 5cabbae4fdd5
Branch filter:

Location: DA/raaql-paper-experiments/survey.R

Hannes Muehleisen
submitted
library(survey)
options(na.action="na.pass")

# from survey package, R/surveyrep.R
# slightly patched to use numeric NAs in matrices and avoid over-decorating result
svymean <- function(x,design, na.rm=FALSE, rho=NULL,
                                           return.replicates=FALSE,deff=FALSE,...)
{
  # if (!exists(".Generic",inherits=FALSE))
  #   .Deprecated("svymean")
  if (!inherits(design,"svyrep.design")) stop("design is not a replicate survey design")
  
  if (inherits(x,"formula")){
    ## do the right thing with factors
    mf<-model.frame(x,design$variables,na.action=na.pass)
    xx<-lapply(attr(terms(x),"variables")[-1],
               function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
    cols<-sapply(xx,NCOL)
    x<-matrix(data=as.numeric(NA), nrow=NROW(xx[[1]]),ncol=sum(cols))
    scols<-c(0,cumsum(cols))
    for(i in 1:length(xx)){
      x[,scols[i]+1:cols[i]]<-xx[[i]]
    }
    colnames(x)<-do.call("c",lapply(xx,colnames))
  } else {
      if(typeof(x) %in% c("expression","symbol"))
          x<-eval(x, design$variables)
      else {
          if(is.data.frame(x) && any(sapply(x,is.factor))){
              xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
              cols<-sapply(xx,NCOL)
              scols<-c(0,cumsum(cols))
              cn<-character(sum(cols))
              for(i in 1:length(xx))
                  cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
              x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
              for(i in 1:length(xx)){
                  x[,scols[i]+1:cols[i]]<-xx[[i]]
              }
              colnames(x)<-cn
          }
      }
  } 
  
  x<-as.matrix(x)
  
  if (na.rm){
    nas<-rowSums(is.na(x))
    design<-design[nas==0,]
    x<-x[nas==0,,drop=FALSE]
  }
  
  wts<-design$repweights
  scale<-design$scale
  rscales<-design$rscales
  if (!is.null(rho)) .NotYetUsed("rho")
  
  if (!design$combined.weights)
    pw<-design$pweights
  else
    pw<-1
  
  rval<-colSums(design$pweights*x)/sum(design$pweights)

  if (getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){
    v<-matrix(0,length(rval),length(rval))
    repmeans<-NULL
  } else {
  if (inherits(wts, "repweights_compressed")){
    repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts$weights))
    for(i in 1:ncol(wts$weights)){
      wi<-wts$weights[wts$index,i]
      repmeans[i,]<-t(colSums(wi*x*pw)/sum(pw*wi))
    }
  } else {
    repmeans<-matrix(data=as.numeric(NA), ncol=NCOL(x), nrow=ncol(wts))
    for(i in 1:ncol(wts)){
      repmeans[i,]<-t(colSums(wts[,i]*x*pw)/sum(pw*wts[,i]))
    }
  }
  repmeans<-drop(repmeans)
  v <- svrVar(repmeans, scale, rscales,mse=design$mse, coef=rval)
}
# this is easy to fix, but for now...
return(v)
  attr(rval,"var") <-v
  attr(rval, "statistic")<-"mean"
  if (return.replicates){
    attr(repmeans,"scale")<-design$scale
    attr(repmeans,"rscales")<-design$rscales
    attr(repmeans,"mse")<-design$mse
    rval<-list(mean=rval, replicates=repmeans)
  }
  if (is.character(deff) || deff){
      nobs<-length(design$pweights)
      npop<-sum(design$pweights)
      vsrs<-unclass(svyvar(x,design,na.rm=na.rm, return.replicates=FALSE,estimate.only=TRUE))/length(design$pweights)
      if (deff!="replace")
        vsrs<-vsrs*(npop-nobs)/npop
      attr(rval,"deff") <- v/vsrs
  }
  class(rval)<-"svrepstat"
  rval
}

source("harness.R")
for (s in c("alabama", "california", "acs3yr" )) { 
  svydata <- readRDS(paste0(s,".rds"))
  names(svydata) <- tolower(names(svydata))
  print("loaded")
  svydsgn <- svrepdesign(
      weight = ~pwgtp ,
      repweights = 'pwgtp[1-9]' ,
      scale = 4 / 80 ,
      rscales = rep( 1 , 80 ) ,
      mse = TRUE ,
      data = svydata) 
  print("initialized")
  for (r in 1:5) {
    timing <- system.time({
      agep <- svymean(~agep, svydsgn, se=TRUE)
      relp <- svymean(~adjinc, svydsgn, se=TRUE)
      print(agep)
      print(relp)
    })[[3]]

    log.result("survey", sys, conf, s, r, timing)
    clearResultRecycler()
  }
}