summary

John Mount

2018-01-20

replyr_summary example.

replyr_summary works on various data sources, counts NA, and returns a data.frame (instead of text).


d <- data.frame(x=c(NA,'b'), y=c(1,NA), stringsAsFactors= FALSE)

summary(d)
 #        x                   y    
 #   Length:2           Min.   :1  
 #   Class :character   1st Qu.:1  
 #   Mode  :character   Median :1  
 #                      Mean   :1  
 #                      3rd Qu.:1  
 #                      Max.   :1  
 #                      NA's   :1

replyr::replyr_summary(d)
 #    column index     class nrows nna nunique min max mean sd lexmin lexmax
 #  1      x     1 character     2   1      NA  NA  NA   NA NA      b      b
 #  2      y     2   numeric     2   1      NA   1   1    1 NA   <NA>   <NA>

my_db <- dplyr::src_sqlite(":memory:", create = TRUE)
dbData <- dplyr::copy_to(my_db, d)

summary(dbData)
 #      Length Class          Mode
 #  src 2      src_dbi        list
 #  ops 2      op_base_remote list

replyr::replyr_summary(dbData)
 #  Warning: Missing values are always removed in SQL.
 #  Use `SUM(x, na.rm = TRUE)` to silence this warning
 #    column index     class nrows nna nunique min max mean sd lexmin lexmax
 #  1      x     1 character     2   1      NA  NA  NA   NA NA      b      b
 #  2      y     2   numeric     2   1      NA   1   1    1 NA   <NA>   <NA>

# glimpse works more like str or head
dplyr::glimpse(dbData)
 #  Observations: NA
 #  Variables: 2
 #  $ x <chr> NA, "b"
 #  $ y <dbl> 1, NA
rm(list=ls())
gc()
 #            used (Mb) gc trigger (Mb) max used (Mb)
 #  Ncells 1131383 60.5    1770749 94.6  1770749 94.6
 #  Vcells 1787742 13.7    4201901 32.1  3505916 26.8