---
title: "Creating The Example Data Set"
author: "Frederik Ziebell"
output:
  rmarkdown::html_vignette:
    df_print: paged
link-citations: TRUE
vignette: >
  %\VignetteIndexEntry{Creating The Example Data Set}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

Here we quickly show how the data objects of the example data set can be reproduced.

# Load Packages
```{r eval=FALSE}
library("recount3")
library("DESeq2")
library("dplyr")
library("stringr")
library("SummarizedExperiment")
library("tidyr")
library("tibble")
library("magrittr")
```

# Download GSE89888 via recount3 and format data
```{r eval=FALSE}

# download data recount3
proj_info <- available_projects() %>%
  filter(project == "SRP093386")
se <- create_rse(proj_info)

count_mat <- compute_read_counts(se)
rownames(count_mat) <- str_replace(rownames(count_mat), "\\.[:number:]+$", "")

meta <- colData(se) %>%
  as_tibble() %>%
  separate(sra.sample_title, into = c("cell_line", "treatment", "mutation", "replicate"), sep = "-") %>%
  select(cell_line, treatment, mutation, replicate)

colnames(count_mat) <- paste0(meta$treatment, "_", meta$mutation, "_", meta$replicate)

# subset to cell line T47D
count_mat <- count_mat[, meta$cell_line == "T47D"]
meta <- meta[meta$cell_line == "T47D", c("treatment", "mutation", "replicate")]

T47D <- make_dds(count_mat, meta, ah_record = "AH89426")
T47D <- T47D[rowSums(assay(T47D))>0,]

# round some numeric data to reduce the size of the data object
rowData(T47D)$gc_content <- round(rowData(T47D)$gc_content,1)
```

# Differential testing
```{r eval=FALSE}
dds <- T47D
dds <- filter_genes(dds, min_count = 5, min_rep = 4)
dds$mutation <- as.factor(dds$mutation)
dds$treatment <- as.factor(dds$treatment)
design(dds) <- ~ mutation + treatment

# to not run DESeq2 in the main vignette,
# wo pre-compute the dispersion plot and diff testing results
dds <- DESeq(dds, parallel=T)

png(filename="disp_ests.png", width=7, height=5, units="in", res=200)
plotDispEsts(dds)
dev.off()

T47D_diff_testing <- lfcShrink(dds, coef = "mutation_WT_vs_D538G", lfcThreshold = log2(1.5), type = "normal", parallel = TRUE)
T47D_diff_testing$stat <- NULL
T47D_diff_testing$lfcSE <- NULL
T47D_diff_testing$pvalue <- NULL
```