## ----echo=FALSE,message=FALSE-------------------------------------------------
knitr::opts_chunk$set(comment=NA,
               fig.align="center",
               results="markup")

## ----echo=FALSE---------------------------------------------------------------
set.seed(20)
vote.probs <- c(.361,.29,.23,1-.361-.29-.23)
decis.probs <- c(vote.probs*.651,1-.651)
all.probs <- c(.9*decis.probs,.1*rep(1/3,3))
voteint <- sample(c(1:4,9,97,98,99),
                  prob=all.probs,
                  replace=TRUE,
                  size=200)                
library(memisc)

## -----------------------------------------------------------------------------
voteint

## -----------------------------------------------------------------------------
# This is to be run *after* memisc has been loaded.
labels(voteint) <- c(Conservative       =  1,
                     Labour             =  2,
                     "Liberal Democrat" =  3, # We have whitespace in the label, 
                     "Other Party"      =  4, # so we need quotation marks
                     "Will not vote"    =  9,
                     "Don't know"       = 97,
                     "Answer refused"   = 98,
                     "Not applicable"   = 99)

## -----------------------------------------------------------------------------
class(voteint)
str(voteint)
voteint

## -----------------------------------------------------------------------------
labels(voteint)

## -----------------------------------------------------------------------------
voteint <- relabel(voteint,
                   "Conservative"     = "Cons",
                   "Labour"           = "Lab",
                   "Liberal Democrat" = "LibDem",
                   "Other Party"      = "Other",
                   "Will not vote"    = "NoVote",
                   "Don't know"       = "DK",
                   "Answer refused"   = "Refused",
                   "Not applicable"   = "N.a.")

## -----------------------------------------------------------------------------
labels(voteint)
voteint
str(voteint)

## -----------------------------------------------------------------------------
missing.values(voteint) <- c(97,98,99)

## -----------------------------------------------------------------------------
voteint

## -----------------------------------------------------------------------------
missing.values(voteint) <- missing.values(voteint) + 9

## -----------------------------------------------------------------------------
missing.values(voteint)

## -----------------------------------------------------------------------------
as.numeric(voteint)[1:30]
as.factor(voteint)[1:30]

## -----------------------------------------------------------------------------
missing.values(voteint) <- NULL
missing.values(voteint)
as.numeric(voteint)[1:30]

## -----------------------------------------------------------------------------
valid.values(voteint) <- 1:4
valid.values(voteint)
missing.values(voteint)

## -----------------------------------------------------------------------------
valid.range(voteint) <- c(1,9)
missing.values(voteint)

## -----------------------------------------------------------------------------
description(voteint) <- "Vote intention"
description(voteint)

## -----------------------------------------------------------------------------
wording(voteint) <- "Which party are you going to vote for in the general election next Tuesday?"
wording(voteint)
annotation(voteint)
annotation(voteint)["wording"]

## -----------------------------------------------------------------------------
codebook(voteint)

## -----------------------------------------------------------------------------
voteint1 <- voteint
voteint1[sample(length(voteint),size=20)] <- c(rep(5,13),rep(7,7))

## -----------------------------------------------------------------------------
codebook(voteint1)

## -----------------------------------------------------------------------------
wild.codes(voteint1)

## -----------------------------------------------------------------------------
voteint2 <- voteint
labels(voteint2) <- NULL # This deletes all value labels
codebook(voteint2)

## -----------------------------------------------------------------------------
measurement(voteint2) <- "interval"
codebook(voteint2)

## ----results='asis'-----------------------------------------------------------
show_html(codebook(voteint))

## -----------------------------------------------------------------------------
Data <- data.set(
          vote = sample(c(1,2,3,4,8,9,97,99),
                        size=300,replace=TRUE),
          region = sample(c(rep(1,3),rep(2,2),3,99),
                          size=300,replace=TRUE),
          income = round(exp(rnorm(300,sd=.7))*2000)
          )

## -----------------------------------------------------------------------------
Data

## -----------------------------------------------------------------------------
options(show.max.obs=5)
Data
# Back to the default
options(show.max.obs=25)

## ----eval=FALSE---------------------------------------------------------------
#  print(Data)

## -----------------------------------------------------------------------------
Data <- within(Data,{
  description(vote) <- "Vote intention"
  description(region) <- "Region of residence"
  description(income) <- "Household income"
  wording(vote) <- "If a general election would take place next Tuesday,
                    the candidate of which party would you vote for?"
  wording(income) <- "All things taken into account, how much do all
                    household members earn in sum?"
  foreach(x=c(vote,region),{
    measurement(x) <- "nominal"
    })
  measurement(income) <- "ratio"
  labels(vote) <- c(
                    Conservatives         =  1,
                    Labour                =  2,
                    "Liberal Democrats"   =  3,
                    "Other"               =  4,
                    "Don't know"          =  8,
                    "Answer refused"      =  9,
                    "Not applicable"      = 97,
                    "Not asked in survey" = 99)
  labels(region) <- c(
                    England               =  1,
                    Scotland              =  2,
                    Wales                 =  3,
                    "Not applicable"      = 97,
                    "Not asked in survey" = 99)
  foreach(x=c(vote,region,income),{
    annotation(x)["Remark"] <- "This is not a real survey item, of course ..."
    })
  missing.values(vote) <- c(8,9,97,99)
  missing.values(region) <- c(97,99)

  # These to variables do not appear in the
  # the resulting data set, since they have the wrong length.
  junk1 <- 1:5
  junk2 <- matrix(5,4,4)
  
})

## -----------------------------------------------------------------------------
Data

## -----------------------------------------------------------------------------
EnglandData <- subset(Data,region == "England")
EnglandData

## -----------------------------------------------------------------------------
codebook(Data)

## ----results='asis'-----------------------------------------------------------
show_html(codebook(Data))

## -----------------------------------------------------------------------------
DataFr <- as.data.frame(Data)
## Looking a the data frame structure
str(DataFr)
## Looking at the first 25 observations
DataFr[1:25,]

## -----------------------------------------------------------------------------
xtabs(~vote+region,data=DataFr)

## -----------------------------------------------------------------------------
xtabs(~vote+region,data=Data)

## -----------------------------------------------------------------------------
xtabs(~vote+region,data=within(Data, 
                               vote <- include.missings(vote)))

## ----results='asis'-----------------------------------------------------------
show_html(codebook(DataFr))

## -----------------------------------------------------------------------------
load(system.file("gles/gles2013work.RData",package="memisc"))

## ----results='asis'-----------------------------------------------------------
with(gles2013work,
     show_html(codebook(bula)))

## -----------------------------------------------------------------------------
gles2013work <- within(gles2013work,
                       east.west <- recode(bula,
                                          East = 1 <- c(3,4,8,13,14,16),
                                          West = 2 <- c(1,2,5:7,9:12,15)
                                          ))

## -----------------------------------------------------------------------------
xtabs(~bula+east.west,data=gles2013work)

## -----------------------------------------------------------------------------
x <- 1:10
xc <- cases(x <= 3, 
            x > 3 & x <= 7, 
            x > 7)
data.frame(x,xc)

## -----------------------------------------------------------------------------
xn <- cases(1 <- x <= 3, 
            2 <- x > 3 & x <= 7, 
            3 <- x > 7)
data.frame(x,xn)

## -----------------------------------------------------------------------------
gles2013work <- within(gles2013work,{

  candidate.vote <- cases(
              wave == 1 & intent.turnout == 6 -> postal.vote.candidate,
              wave == 1 & intent.turnout %in% 4:5 -> 900,
              wave == 1 & intent.turnout %in% 1:3 -> voteint.candidate,
              wave == 2 & turnout == 1 -> vote.candidate,
              wave == 2 & turnout == 2 -> 900
            )

  list.vote <- cases(
              wave == 1 & intent.turnout == 6 -> postal.vote.list,
              wave == 1 & intent.turnout %in% 4:5 -> 900,
              wave == 1 & intent.turnout %in% 1:3 -> voteint.list,
              wave == 2 & turnout ==1 -> vote.list,
              wave == 2 & turnout ==2 -> 900
            )
})

## -----------------------------------------------------------------------------
gles2013work <- within(gles2013work,{
  candidate.vote <- recode(as.item(candidate.vote),
                      "CDU/CSU"   =  1 <- 1,
                      "SPD"       =  2 <- 4,
                      "FDP"       =  3 <- 5,
                      "Grüne"     =  4 <- 6,
                      "Linke"     =  5 <- 7,
                      "NPD"       =  6 <- 206,
                      "Piraten"   =  7 <- 215,
                      "AfD"       =  8 <- 322,
                      "Other"     = 10 <- 801,
                      "No Vote"   = 90 <- 900,
                      "WN"        = 98 <- -98,
                      "KA"        = 99 <- -99
                  )
  list.vote <- recode(as.item(list.vote),
                      "CDU/CSU"   =  1 <- 1,
                      "SPD"       =  2 <- 4,
                      "FDP"       =  3 <- 5,
                      "Grüne"     =  4 <- 6,
                      "Linke"     =  5 <- 7,
                      "NPD"       =  6 <- 206,
                      "Piraten"   =  7 <- 215,
                      "AfD"       =  8 <- 322,
                      "Other"     = 10 <- 801,
                      "No Vote"   = 90 <- 900,
                      "WN"        = 98 <- -98,
                      "KA"        = 99 <- -99
                  )
  
   missing.values(candidate.vote) <- 98:99
   missing.values(list.vote) <- 98:99
   measurement(candidate.vote) <- "nominal"
   measurement(list.vote) <- "nominal"
})

## ----width=120----------------------------------------------------------------
xtabs(~list.vote+east.west,data=gles2013work)
xtabs(~list.vote+candidate.vote,data=gles2013work)