configr is an integrated parser package that json, ini, yaml and toml format files can now be processed. The vignette will walk you through the basics of using configr to extend existing parser in R.

Built-in examples of configuration file

Example of json, ini, yaml, toml can be used follow the instructions below.

Check the configuration file type

is.json.file, is.ini.file, is.yaml.file and is.toml.file can be used to check the configuration file type. If input file were coincident with required, it will return TRUE. get.config.type will using above functions and get the file type name: json, ini, yaml, toml or FALSE.

Get the configuration section names

Section names of configuration file can be get using eval.config.sections. Python package ConfigParser sections inspired us to add this function.

Read the configuration file

read.config can read a configuration file in R and as a list object that can pass parameter to inner read function (fromJSON/read.ini/yaml.load_file/parseToml) accordingly.

eval.config return a value or a list object containing the file path, config group, filetype as the attribute.

eval.config.merge will merge multiple sections (equal to config in eval.config function) and reduce the layer of configuration file.

fetch.config can parse configuration files from internet and local that merged the files and return a list.

links <- c("https://raw.githubusercontent.com/JhuangLab/BioInstaller/master/inst/extdata/config/db/db_annovar.toml", 
           "https://raw.githubusercontent.com/JhuangLab/BioInstaller/master/inst/extdata/config/db/db_main.toml", 
           system.file('extdata', 'config.toml', package = "configr"))
x <- fetch.config(links)
x[c(1:5, length(x))]
#> $db_annovar_1000g
#> $db_annovar_1000g$buildver_available
#> $db_annovar_1000g$buildver_available$`1000g`
#> [1] "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2010`
#> [1] "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2012apr`
#> [1] "hg19" "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2012jul`
#> [1] "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2014oct`
#> [1] "hg38" "hg19" "hg18"
#> 
#> $db_annovar_1000g$buildver_available$`1000g2015aug`
#> [1] "hg38" "hg19"
#> 
#> $db_annovar_1000g$buildver_available$other
#> [1] "hg19"
#> 
#> 
#> $db_annovar_1000g$description
#> [1] "alternative allele frequency data in 1000 Genomes Project"
#> 
#> $db_annovar_1000g$source_url
#> [1] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.zip"
#> 
#> $db_annovar_1000g$version_available
#>  [1] "1000g2015aug" "1000g2014oct" "1000g2014sep" "1000g2014aug"
#>  [5] "1000g2012apr" "1000g2012feb" "1000g2011may" "1000g2010nov"
#>  [9] "1000g2012apr" "1000g2010jul" "1000g2010"    "1000g"       
#> 
#> $db_annovar_1000g$version_newest
#> [1] "1000g2015aug"
#> 
#> 
#> $db_annovar_1000g_sqlite
#> $db_annovar_1000g_sqlite$buildver_available
#> [1] "hg19"
#> 
#> $db_annovar_1000g_sqlite$install
#> [1] "#R#for(i in c('all', 'afr', 'eas', 'eur', 'sas', 'amr')) {\\n  x <- set.1000g.db(sprintf('{{version}}_%s', i), '{{buildver}}', \\\"sql\\\");\\n  params <- list(sql.file = x, dbname = str_replace(x, '.sql$', ''));\\n  do.call(sql2sqlite, params)\\n}\\n#R#"
#> 
#> $db_annovar_1000g_sqlite$source_url
#> [1] "http://bioinfo.rjh.com.cn/download/annovarR/humandb/{{buildver}}_{{version}}.tar.gz"
#> 
#> $db_annovar_1000g_sqlite$version_available
#> [1] "1000g2015aug"
#> 
#> $db_annovar_1000g_sqlite$version_newest
#> [1] "1000g2015aug"
#> 
#> 
#> $db_annovar_abraom
#> $db_annovar_abraom$buildver_available
#> [1] "hg19" "hg38"
#> 
#> $db_annovar_abraom$description
#> [1] "abraom: 2.3 million [Brazilian genomic variants](https://www.ncbi.nlm.nih.gov/pubmed/28332257)"
#> 
#> $db_annovar_abraom$source_url
#> [1] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.gz"
#> 
#> $db_annovar_abraom$version_available
#> [1] "abraom"
#> 
#> 
#> $db_annovar_avsift
#> $db_annovar_avsift$buildver_available
#> [1] "hg19" "hg18"
#> 
#> $db_annovar_avsift$decompress
#> [1] TRUE TRUE
#> 
#> $db_annovar_avsift$description
#> [1] "whole-exome SIFT scores for non-synonymous variants (obselete and should not be uesd any more)"
#> 
#> $db_annovar_avsift$source_url
#> [1] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.gz"    
#> [2] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.idx.gz"
#> 
#> $db_annovar_avsift$version_available
#> [1] "avsift"
#> 
#> $db_annovar_avsift$version_newest
#> [1] "avsift"
#> 
#> 
#> $db_annovar_avsnp
#> $db_annovar_avsnp$buildver_available
#> $db_annovar_avsnp$buildver_available$avsnp138
#> [1] "hg19"
#> 
#> $db_annovar_avsnp$buildver_available$avsnp142
#> [1] "hg38" "hg19"
#> 
#> $db_annovar_avsnp$buildver_available$avsnp144
#> [1] "hg38" "hg19"
#> 
#> $db_annovar_avsnp$buildver_available$avsnp147
#> [1] "hg38" "hg19"
#> 
#> $db_annovar_avsnp$buildver_available$avsnp150
#> [1] "hg38" "hg19"
#> 
#> 
#> $db_annovar_avsnp$decompress
#> [1] TRUE TRUE
#> 
#> $db_annovar_avsnp$description
#> $db_annovar_avsnp$description$avsnp138
#> [1] "dbSNP138 with allelic splitting and left-normalization"
#> 
#> $db_annovar_avsnp$description$avsnp142
#> [1] "dbSNP142 with allelic splitting and left-normalization"
#> 
#> $db_annovar_avsnp$description$avsnp144
#> [1] "dbSNP144 with allelic splitting and left-normalization (http://annovar.openbioinformatics.org/en/latest/articles/dbSNP/#additional-discussions)"
#> 
#> $db_annovar_avsnp$description$avsnp147
#> [1] "dbSNP147 with allelic splitting and left-normalization"
#> 
#> 
#> $db_annovar_avsnp$source_url
#> [1] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.gz"    
#> [2] "http://www.openbioinformatics.org/annovar/download/{{buildver}}_{{version}}.txt.idx.gz"
#> 
#> $db_annovar_avsnp$version_available
#> [1] "avsnp150" "avsnp147" "avsnp144" "avsnp142" "avsnp138"
#> 
#> $db_annovar_avsnp$version_newest
#> [1] "avsnp150"
#> 
#> 
#> $title
#> [1] "TOML Example"

Converting and writing configuration file

convert.config will read a configuration file and write a configuration file with appointed file type (json. ini, yaml). Moreover, write.config is similar to convert.config but using the list object rather than a file.

Configr specific extra parse

configr own several userful extra parse function, you can use the parse.extra to finish these work for any list object. Of course, read.config, eval.config and eval.config.merge can directly using parse.extra by passing parameters to parse.extra.

Note: glue.parse using the glue package glue function to do that. Just like glue(‘{1:5}’) and be processed by unname(unlist(x)). The !!glue can be changed if you setted glue.flag. It is a remarkable fact that only contain the glue.flag character be parsed and the order of item will be changed if the glue result were multiple values. e.g. ['{a}', '!!glue {1:5}', '{{a}}'] will be parsed to ['{a}', '1', '2', '3', '4', '5', '{{a}}']

other.config <- system.file('extdata', 'config.other.yaml', package='configr')

read.config(file = other.config)
#> $key
#> $key$test_parse
#> [1] 123
#> 
#> $key$test_parse2
#> [1] 234
#> 
#> $key$yes_flag
#> [1] "yes"
#> 
#> $key$no_flag
#> [1] "no"
#> 
#> 
#> $`samtools@1.3.1`
#> $`samtools@1.3.1`$source_dir
#> [1] "/tmp"

config.1 <- read.config(file = config.json)
config.1$default
#> $debug
#> [1] "{{debug}} {{debug2}}"
read.config(file = config.json, extra.list = list(debug = "self", debug2 = "self2"))$default
#> $debug
#> [1] "self self2"

sections <- c('default', 'other_config_parse')
config.1[sections]
#> $default
#> $default$debug
#> [1] "{{debug}} {{debug2}}"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "{{key:yes_flag}} {{key:no_flag}}"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
read.config(file = config.json, extra.list = list(debug = "self", debug2 = "self2"), 
  other.config = other.config)[sections]
#> $default
#> $default$debug
#> [1] "self self2"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "yes no"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"

sections <- c('default', 'other_config_parse', 'rcmd_parse')
# The followed two line command will return the same value
config.1[sections]
#> $default
#> $default$debug
#> [1] "{{debug}} {{debug2}}"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "{{key:yes_flag}} {{key:no_flag}}"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "@>@ Sys.Date() @<@"
read.config(file = config.json, extra.list = list(debug = "self", debug2 = "self2"), 
  other.config = other.config, rcmd.parse = T)[sections]
#> $default
#> $default$debug
#> [1] "self self2"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "yes no"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "2018-06-22"
parse.extra(config.1, extra.list = list(debug = "self", debug2 = "self2"), 
  other.config = other.config, rcmd.parse = T)[sections]
#> $default
#> $default$debug
#> [1] "self self2"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "yes no"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "2018-06-22"


sections <- c('default', 'other_config_parse', 'rcmd_parse', 'mulitple_parse')
config.1[sections]
#> $default
#> $default$debug
#> [1] "{{debug}} {{debug2}}"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "{{key:yes_flag}} {{key:no_flag}}"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "@>@ Sys.Date() @<@"
#> 
#> 
#> $mulitple_parse
#> $mulitple_parse$raw
#> [1] "@>@str_replace('config','g$','gr')@<@, #>#echo configr#<#, {{key:yes_flag}}, {{yes}}, @>@str_replace('configr','r','')@<@, #># echo config#<#, {{key:no_flag}}, {{no}}"
#> 
#> $mulitple_parse$parsed
#> [1] "configr, configr, yes, 1, config, config, no, 0"
parse.extra(config.1, extra.list = list(debug = "self", debug2 = "self2", yes = "1", no = "0"), 
  other.config = other.config, rcmd.parse = T, bash.parse = T)[sections]
#> $default
#> $default$debug
#> [1] "self self2"
#> 
#> 
#> $other_config_parse
#> $other_config_parse$raw
#> [1] "yes no"
#> 
#> $other_config_parse$parsed
#> [1] "yes no"
#> 
#> 
#> $rcmd_parse
#> $rcmd_parse$raw
#> [1] "2018-06-22"
#> 
#> 
#> $mulitple_parse
#> $mulitple_parse$raw
#> [1] "configr, configr, yes, 1, config, config, no, 0"
#> 
#> $mulitple_parse$parsed
#> [1] "configr, configr, yes, 1, config, config, no, 0"

# glue parse
raw <- c("a", "!!glue{1:5}", "c")
list.raw <- list(glue = raw, nochange = 1:10)
list.raw
#> $glue
#> [1] "a"           "!!glue{1:5}" "c"          
#> 
#> $nochange
#>  [1]  1  2  3  4  5  6  7  8  9 10
expect.parsed.1 <- c("a", "1", "2", "3", "4", "5", "c")
expect.parsed.2 <- list(glue = expect.parsed.1, nochange = 1:10)
parse.extra(list.raw, glue.parse = TRUE, glue.flag = "!!glue")
#> $glue
#> [1] "a" "1" "2" "3" "4" "5" "c"
#> 
#> $nochange
#>  [1]  1  2  3  4  5  6  7  8  9 10

External urls about configuration format and others

configr provides a function config.help to access external resource about various configuration format and other related materies.

Others usage

config.section.del can be used to delete a section of config, just do config$section <- NULL.

str2config can be used to parse a string object to a configuration list

Session info

Here is the output of sessionInfo() on the system on which this document was compiled:

#> R version 3.5.0 (2018-04-23)
#> Platform: x86_64-apple-darwin15.6.0 (64-bit)
#> Running under: macOS High Sierra 10.13.5
#> 
#> Matrix products: default
#> BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] C/UTF-8/C/C/C/C
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices utils     datasets  methods   base     
#> 
#> other attached packages:
#> [1] configr_0.3.3
#> 
#> loaded via a namespace (and not attached):
#>  [1] Rcpp_0.12.17    digest_0.6.15   rprojroot_1.3-2 jsonlite_1.5   
#>  [5] backports_1.1.2 magrittr_1.5    evaluate_0.10.1 stringi_1.2.2  
#>  [9] ini_0.3.1       rmarkdown_1.10  tools_3.5.0     stringr_1.3.1  
#> [13] glue_1.2.0      prettydoc_0.2.1 yaml_2.1.19     RcppTOML_0.1.3 
#> [17] compiler_3.5.0  htmltools_0.3.6 knitr_1.20