Untitled

# Loading data:
load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwa.rda"))
load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osoby.rda"))
load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/gospodarstwaDict.rda"))
load(url("https://github.com/pbiecek/Diagnoza/raw/master/data/osobyDict.rda"))

library("weights")
library("Hmisc")


#confidence interval of the satisfying household net income in 2003
tmp<-gospodarstwa[,c('waga_gd_2003', 'bl6')]
tmp<-na.omit(tmp)

weighted.ttest.ci <- function(x, weights, conf.level = 0.95) {
  require(Hmisc)
  nx <- length(x)
  df <- nx - 1
  vx <- wtd.var(x, weights, normwt = TRUE) ## From Hmisc
  mx <- weighted.mean(x, weights)
  stderr <- sqrt(vx/nx)
  tstat <- mx/stderr ## not mx - mu
  alpha <- 1 - conf.level
  cint <- qt(1 - alpha/2, df)
  cint <- tstat + c(-cint, cint)
  cint * stderr
}

x<-t(tmp[2])
weights<-t(tmp[1])
wtd.hist(x=x, weight=weights, breaks=100, xlim=c(0,10000))
answer <- weighted.ttest.ci(x,weights)
#95% confidence interval is equal (3387,3544)

#----------------------------------------------------------------------------------------------

#hypothesis of having cars and bigger average net income
tmp <- gospodarstwa[,c('fl2', 'ff9_12a', 'waga_gd_2011')]
tmp1 <- subset(tmp, ff9_12a == 1)
tmp2 <- subset(tmp, ff9_12a == 2)

wtd.t.test(x=tmp1$fl2, y=tmp2$fl2, weight=tmp1$waga_gd_2011, weighty = tmp2$waga_gd_2011, alternative = "more")
#p value is basically 0, so it's below alpha=0.05 and we reject H0 hypothesis
#car owners are gaining less than non-owners
wtd.hist(x=tmp1$fl2, weight = tmp1$waga_gd_2011, breaks=100, xlim=c(0,10000))
wtd.hist(x=tmp2$fl2, weight = tmp2$waga_gd_2011, breaks=100, xlim=c(0,6000))
#comparing  histograms we can see, that car owner's histogram is shifted to the right, with bigger mean 3927 > 1980


#----------------------------------------------------------------------------------------------

#hypothesis of phone owners being more happy than non-owners

tmp <- osoby[,c('ep3', 'ec25', 'waga_2009_osoby')]
tmp <- na.omit(tmp)
tmp1 <- subset(tmp, ec25 != 4)
tmp2 <- subset(tmp, ec25 == 4)

wtd.t.test(x=tmp1$ep3, y=tmp2$ep3, weight=tmp1$waga_2009_osoby, weighty = tmp2$waga_osoby_2009, alternative = "more")
wtd.hist(x=tmp1$ep3, weight=tmp1$waga_2009_osoby, breaks=6, xlim=c(0,7))
wtd.hist(x=tmp2$ep3, weight=tmp2$waga_2009_osoby, breaks=6, xlim=c(0,7))

wtd.mean(x=tmp1$ep3, weight=tmp1$waga_2009_osoby)

wtd.mean(x=tmp2$ep3, weight=tmp2$waga_2009_osoby)
#p value is basically 0, so it's below alpha=0.05 and we reject H0 hypothesis
#phone owners are less happy than phone-free people