Read large text files based on splitting +
data.table::fread
# remotes::install_github("privefl/bigreadr")
library(bigreadr)
# Create a temporary file of ~141 MB (just as an example)
<- fwrite2(iris[rep(seq_len(nrow(iris)), 1e4), rep(1:5, 4)], tempfile())
csv format(file.size(csv), big.mark = ",")
## Splitting lines (1)
# Read (by parts) all data -> using `fread` would be faster
nlines(csv) ## 1M5 lines -> split every 500,000
<- big_fread1(csv, every_nlines = 5e5)
big_iris1 # Read and subset (by parts)
<- big_fread1(csv, every_nlines = 5e5, .transform = function(df) {
big_iris1_setosa ::filter(df, Species == "setosa")
dplyr
})
## Splitting columns (2)
<- big_fread2(csv, nb_parts = 3)
big_iris2 # Read and subset (by parts)
<- (fread2(csv, select = 5)[[1]] == "setosa")
species_setosa <- big_fread2(csv, nb_parts = 3, .transform = function(df) {
big_iris2_setosa ::filter(df, species_setosa)
dplyr
})
## Verification
identical(big_iris1_setosa, dplyr::filter(big_iris1, Species == "setosa"))
identical(big_iris2, big_iris1)
identical(big_iris2_setosa, big_iris1_setosa)
Please send me your use cases!
Read multiple files at once using
bigreadr::fread2()
.