Erste Schritte mit `vroom`
Nach readr
kommt vroom
. In der Zwischenzeit liegt vroom in der Version 1.2.0 vor und daher habe ich mir ein paar Stunden Zeit gekommen um ein paar erste Experimente damit zu machen.
Erste Schritte mit vroom
library(vroom)
# URL für die Quelle von tips.csv:
url <- "https://goo.gl/whKjnl"
# Locale auf Deutsche Sprache, Dezimalkomma und Gruppierungspunkte setzen
mylocale <- locale("de", decimal_mark = ",", grouping_mark = ".")
# Spaltentypen ggf. vorgeben:
mycols <- cols(
col_number(), # total_bill
col_number(), # tip
col_factor(), # sex
col_factor(), # smoker
col_factor(), # day
col_factor(), # time
col_integer() # size
)
# Laden mit vroom, Spaltentypen erraten, Locale auf mylocale
tips.vroom <- vroom(url, locale = mylocale)
## Rows: 244 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (4): sex, smoker, day, time
## dbl (3): total_bill, tip, size
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tips.vroom)
## # A tibble: 6 × 7
## total_bill tip sex smoker day time size
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 17.0 1.01 Female No Sun Dinner 2
## 2 10.3 1.66 Male No Sun Dinner 3
## 3 21.0 3.5 Male No Sun Dinner 3
## 4 23.7 3.31 Male No Sun Dinner 2
## 5 24.6 3.61 Female No Sun Dinner 4
## 6 25.3 4.71 Male No Sun Dinner 4
str(tips.vroom)
## spec_tbl_df [244 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ total_bill: num [1:244] 17 10.3 21 23.7 24.6 ...
## $ tip : num [1:244] 1.01 1.66 3.5 3.31 3.61 4.71 2 3.12 1.96 3.23 ...
## $ sex : chr [1:244] "Female" "Male" "Male" "Male" ...
## $ smoker : chr [1:244] "No" "No" "No" "No" ...
## $ day : chr [1:244] "Sun" "Sun" "Sun" "Sun" ...
## $ time : chr [1:244] "Dinner" "Dinner" "Dinner" "Dinner" ...
## $ size : num [1:244] 2 3 3 2 4 4 2 4 2 2 ...
## - attr(*, "spec")=
## .. cols(
## .. total_bill = col_double(),
## .. tip = col_double(),
## .. sex = col_character(),
## .. smoker = col_character(),
## .. day = col_character(),
## .. time = col_character(),
## .. size = col_double(),
## .. .delim = ";"
## .. )
## - attr(*, "problems")=<externalptr>
object.size(tips.vroom)
## 20632 bytes
# Laden mit vroom, Spaltentypen mycols, Locale auf mylocale
tips.vroom2 <- vroom(url, col_types = mycols, locale = mylocale)
head(tips.vroom2)
## # A tibble: 6 × 7
## total_bill tip sex smoker day time size
## <dbl> <dbl> <fct> <fct> <fct> <fct> <int>
## 1 17.0 1.01 Female No Sun Dinner 2
## 2 10.3 1.66 Male No Sun Dinner 3
## 3 21.0 3.5 Male No Sun Dinner 3
## 4 23.7 3.31 Male No Sun Dinner 2
## 5 24.6 3.61 Female No Sun Dinner 4
## 6 25.3 4.71 Male No Sun Dinner 4
str(tips.vroom2)
## spec_tbl_df [244 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ total_bill: num [1:244] 17 10.3 21 23.7 24.6 ...
## $ tip : num [1:244] 1.01 1.66 3.5 3.31 3.61 4.71 2 3.12 1.96 3.23 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 2 2 2 2 2 ...
## $ smoker : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ day : Factor w/ 4 levels "Sun","Sat","Thur",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ time : Factor w/ 2 levels "Dinner","Lunch": 1 1 1 1 1 1 1 1 1 1 ...
## $ size : int [1:244] 2 3 3 2 4 4 2 4 2 2 ...
## - attr(*, "spec")=
## .. cols(
## .. total_bill = col_number(),
## .. tip = col_number(),
## .. sex = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
## .. smoker = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
## .. day = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
## .. time = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
## .. size = col_integer(),
## .. .delim = ";"
## .. )
## - attr(*, "problems")=<externalptr>
object.size(tips.vroom2)
## 19416 bytes
# Readr
library(readr)
## Registered S3 methods overwritten by 'readr':
## method from
## as.data.frame.spec_tbl_df vroom
## as_tibble.spec_tbl_df vroom
## format.col_spec vroom
## print.col_spec vroom
## print.collector vroom
## print.date_names vroom
## print.locale vroom
## str.col_spec vroom
##
## Attache Paket: 'readr'
## Die folgenden Objekte sind maskiert von 'package:vroom':
##
## as.col_spec, col_character, col_date, col_datetime, col_double,
## col_factor, col_guess, col_integer, col_logical, col_number,
## col_skip, col_time, cols, cols_condense, cols_only, date_names,
## date_names_lang, date_names_langs, default_locale, fwf_cols,
## fwf_empty, fwf_positions, fwf_widths, locale, output_column,
## problems, spec
tips.readr <- readr::read_csv2(url)
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
## Rows: 244 Columns: 7
## ── Column specification ──────────────────────────────────────────────────────────────────────────────────────────────────
## Delimiter: ";"
## chr (4): sex, smoker, day, time
## dbl (3): total_bill, tip, size
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tips.readr)
## # A tibble: 6 × 7
## total_bill tip sex smoker day time size
## <dbl> <dbl> <chr> <chr> <chr> <chr> <dbl>
## 1 17.0 1.01 Female No Sun Dinner 2
## 2 10.3 1.66 Male No Sun Dinner 3
## 3 21.0 3.5 Male No Sun Dinner 3
## 4 23.7 3.31 Male No Sun Dinner 2
## 5 24.6 3.61 Female No Sun Dinner 4
## 6 25.3 4.71 Male No Sun Dinner 4
str(tips.readr)
## spec_tbl_df [244 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ total_bill: num [1:244] 17 10.3 21 23.7 24.6 ...
## $ tip : num [1:244] 1.01 1.66 3.5 3.31 3.61 4.71 2 3.12 1.96 3.23 ...
## $ sex : chr [1:244] "Female" "Male" "Male" "Male" ...
## $ smoker : chr [1:244] "No" "No" "No" "No" ...
## $ day : chr [1:244] "Sun" "Sun" "Sun" "Sun" ...
## $ time : chr [1:244] "Dinner" "Dinner" "Dinner" "Dinner" ...
## $ size : num [1:244] 2 3 3 2 4 4 2 4 2 2 ...
## - attr(*, "spec")=
## .. cols(
## .. total_bill = col_double(),
## .. tip = col_double(),
## .. sex = col_character(),
## .. smoker = col_character(),
## .. day = col_character(),
## .. time = col_character(),
## .. size = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
object.size(tips.readr)
## 20632 bytes
# Readr
tips.readr2 <- readr::read_csv2(url,
col_types = list(
col_double(), # total_bill
col_double(), # tip
col_factor(), # sex
col_factor(), # smoker
col_factor(), # day
col_factor(), # time
col_integer() # size
)
)
## ℹ Using "','" as decimal and "'.'" as grouping mark. Use `read_delim()` for more control.
head(tips.readr2)
## # A tibble: 6 × 7
## total_bill tip sex smoker day time size
## <dbl> <dbl> <fct> <fct> <fct> <fct> <int>
## 1 17.0 1.01 Female No Sun Dinner 2
## 2 10.3 1.66 Male No Sun Dinner 3
## 3 21.0 3.5 Male No Sun Dinner 3
## 4 23.7 3.31 Male No Sun Dinner 2
## 5 24.6 3.61 Female No Sun Dinner 4
## 6 25.3 4.71 Male No Sun Dinner 4
str(tips.readr2)
## spec_tbl_df [244 × 7] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ total_bill: num [1:244] 17 10.3 21 23.7 24.6 ...
## $ tip : num [1:244] 1.01 1.66 3.5 3.31 3.61 4.71 2 3.12 1.96 3.23 ...
## $ sex : Factor w/ 2 levels "Female","Male": 1 2 2 2 1 2 2 2 2 2 ...
## $ smoker : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
## $ day : Factor w/ 4 levels "Sun","Sat","Thur",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ time : Factor w/ 2 levels "Dinner","Lunch": 1 1 1 1 1 1 1 1 1 1 ...
## $ size : int [1:244] 2 3 3 2 4 4 2 4 2 2 ...
## - attr(*, "spec")=
## .. cols(
## .. total_bill = col_double(),
## .. tip = col_double(),
## .. sex = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
## .. smoker = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
## .. day = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
## .. time = col_factor(levels = NULL, ordered = FALSE, include_na = FALSE),
## .. size = col_integer()
## .. )
## - attr(*, "problems")=<externalptr>
object.size(tips.readr2)
## 19416 bytes
# Mit Bordmitteln von R
download.file("https://goo.gl/whKjnl", destfile = "tips.csv")
tips.csv2 <- read.csv2("tips.csv")
head(tips.csv2)
## total_bill tip sex smoker day time size
## 1 16.99 1.01 Female No Sun Dinner 2
## 2 10.34 1.66 Male No Sun Dinner 3
## 3 21.01 3.50 Male No Sun Dinner 3
## 4 23.68 3.31 Male No Sun Dinner 2
## 5 24.59 3.61 Female No Sun Dinner 4
## 6 25.29 4.71 Male No Sun Dinner 4
str(tips.csv2)
## 'data.frame': 244 obs. of 7 variables:
## $ total_bill: num 17 10.3 21 23.7 24.6 ...
## $ tip : num 1.01 1.66 3.5 3.31 3.61 4.71 2 3.12 1.96 3.23 ...
## $ sex : chr "Female" "Male" "Male" "Male" ...
## $ smoker : chr "No" "No" "No" "No" ...
## $ day : chr "Sun" "Sun" "Sun" "Sun" ...
## $ time : chr "Dinner" "Dinner" "Dinner" "Dinner" ...
## $ size : int 2 3 3 2 4 4 2 4 2 2 ...
object.size(tips.csv2)
## 14720 bytes