Files @ 32990bba4df7
Branch filter:

Location: DA/raaql-paper-experiments/survey.R - annotation

Hannes Muehleisen
mc2
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
32990bba4df7
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
2aece7dd2719
library(survey)
options(na.action="na.pass")

# from survey package, R/surveyrep.R
# slightly patched to use numeric NAs in matrices and avoid over-decorating result
svymean <- function(x,design, na.rm=FALSE, rho=NULL,
                                           return.replicates=FALSE,deff=FALSE,...)
{
  # if (!exists(".Generic",inherits=FALSE))
  #   .Deprecated("svymean")
  if (!inherits(design,"svyrep.design")) stop("design is not a replicate survey design")
  
  if (inherits(x,"formula")){
    ## do the right thing with factors
    mf<-model.frame(x,design$variables,na.action=na.pass)
    xx<-lapply(attr(terms(x),"variables")[-1],
               function(tt) model.matrix(eval(bquote(~0+.(tt))),mf))
    cols<-sapply(xx,NCOL)
    x<-matrix(data=as.numeric(NA), nrow=NROW(xx[[1]]),ncol=sum(cols))
    scols<-c(0,cumsum(cols))
    for(i in 1:length(xx)){
      x[,scols[i]+1:cols[i]]<-xx[[i]]
    }
    colnames(x)<-do.call("c",lapply(xx,colnames))
  } else {
      if(typeof(x) %in% c("expression","symbol"))
          x<-eval(x, design$variables)
      else {
          if(is.data.frame(x) && any(sapply(x,is.factor))){
              xx<-lapply(x, function(xi) {if (is.factor(xi)) 0+(outer(xi,levels(xi),"==")) else xi})
              cols<-sapply(xx,NCOL)
              scols<-c(0,cumsum(cols))
              cn<-character(sum(cols))
              for(i in 1:length(xx))
                  cn[scols[i]+1:cols[i]]<-paste(names(x)[i],levels(x[[i]]),sep="")
              x<-matrix(nrow=NROW(xx[[1]]),ncol=sum(cols))
              for(i in 1:length(xx)){
                  x[,scols[i]+1:cols[i]]<-xx[[i]]
              }
              colnames(x)<-cn
          }
      }
  } 
  
  x<-as.matrix(x)
  
  if (na.rm){
    nas<-rowSums(is.na(x))
    design<-design[nas==0,]
    x<-x[nas==0,,drop=FALSE]
  }
  
  wts<-design$repweights
  scale<-design$scale
  rscales<-design$rscales
  if (!is.null(rho)) .NotYetUsed("rho")
  
  if (!design$combined.weights)
    pw<-design$pweights
  else
    pw<-1
  
  rval<-colSums(design$pweights*x)/sum(design$pweights)

  if (getOption("survey.drop.replicates") && !is.null(design$selfrep) && all(design$selfrep)){
    v<-matrix(0,length(rval),length(rval))
    repmeans<-NULL
  } else {
  if (inherits(wts, "repweights_compressed")){
    repmeans<-matrix(ncol=NCOL(x), nrow=ncol(wts$weights))
    for(i in 1:ncol(wts$weights)){
      wi<-wts$weights[wts$index,i]
      repmeans[i,]<-t(colSums(wi*x*pw)/sum(pw*wi))
    }
  } else {
    repmeans<-matrix(data=as.numeric(NA), ncol=NCOL(x), nrow=ncol(wts))
    for(i in 1:ncol(wts)){
      repmeans[i,]<-t(colSums(wts[,i]*x*pw)/sum(pw*wts[,i]))
    }
  }
  repmeans<-drop(repmeans)
  v <- svrVar(repmeans, scale, rscales,mse=design$mse, coef=rval)
}
# this is easy to fix, but for now...
return(v)
  attr(rval,"var") <-v
  attr(rval, "statistic")<-"mean"
  if (return.replicates){
    attr(repmeans,"scale")<-design$scale
    attr(repmeans,"rscales")<-design$rscales
    attr(repmeans,"mse")<-design$mse
    rval<-list(mean=rval, replicates=repmeans)
  }
  if (is.character(deff) || deff){
      nobs<-length(design$pweights)
      npop<-sum(design$pweights)
      vsrs<-unclass(svyvar(x,design,na.rm=na.rm, return.replicates=FALSE,estimate.only=TRUE))/length(design$pweights)
      if (deff!="replace")
        vsrs<-vsrs*(npop-nobs)/npop
      attr(rval,"deff") <- v/vsrs
  }
  class(rval)<-"svrepstat"
  rval
}

source("harness.R")
for (s in c("alabama", "california", "acs3yr" )) { 
  svydata <- readRDS(paste0(s,".rds"))
  names(svydata) <- tolower(names(svydata))
  print("loaded")
  svydsgn <- svrepdesign(
      weight = ~pwgtp ,
      repweights = 'pwgtp[1-9]' ,
      scale = 4 / 80 ,
      rscales = rep( 1 , 80 ) ,
      mse = TRUE ,
      data = svydata) 
  print("initialized")
  for (r in 1:5) {
    timing <- system.time({
      agep <- svymean(~agep, svydsgn, se=TRUE)
      relp <- svymean(~adjinc, svydsgn, se=TRUE)
      print(agep)
      print(relp)
    })[[3]]

    log.result("survey", sys, conf, s, r, timing)
    clearResultRecycler()
  }
}