Introduction to annovarR

Jianfeng Li

2018-01-08

Introduction

Interpretation of genetic variation data is a crucial step to understand the relationship between gene sequence changes and biological function. There are several annotation tools, such as ANNOVAR, VEP, vcfanno, have been developed. These tools make gene variation data annotation more convenient and faster than before. However, because different annotation tools have their own methods of use and design architecture, this increases the difficulty for bioinfomatics beginner to utilize these tools. In addition, many of existing database resources and annotation scripts have not been well integrated and shared.

So, it is worth us to develop an integrated annotation system that not only include the integration of different annotation tools but also integrate the relevant database resources. Here, we present a integrated annotation R package ‘annovarR’ to do this. It provides a series R functions to integrate external annotation tools and annotation databases.

Installation

To install annovarR, first you need to install R interpreter (Supported Linux, MAC and Windows). This package have been uploaded on The Comprehensive R Archive Network (CRAN, https://cran.r-project.org). You can use the command to install annovarR package easily:

# setRepositories ind 1 is CRAN, 2 is Bioconductor
setRepositories(ind=1:2)
install.packages('annovarR')

If you want to use the latest development version, you need to use devtools install_github function.

# Install the cutting edge development version from GitHub:
# install.packages("devtools")
devtools::install_github("JhuangLab/annovarR", ref = "develop")

Lastly, annovarR can also be installed using the source code archive (R CMD INSTALL). In this situation, you need to manually handle dependencies on many packages.

Tips: When the RMySQL or RSQLite package can not directly installed by R, conda is an optional solution: conda install -c r r-rmysql r-rsqlite. Or you need root permissions to install the corresponding system dependency.

Usage

Download database resource

annovarR use function download.database to download the annotation databases.

# Show all annovarR supported database
download.database(show.all.names = TRUE)
#>  [1] "db_annovar_1000g"                 "db_annovar_1000g_sqlite"         
#>  [3] "db_annovar_avsift"                "db_annovar_avsnp"                
#>  [5] "db_annovar_avsnp_sqlite"          "db_annovar_brvar"                
#>  [7] "db_annovar_cadd"                  "db_annovar_cadd_sqlite"          
#>  [9] "db_annovar_cg"                    "db_annovar_civic_gene_summaries" 
#> [11] "db_annovar_clinvar"               "db_annovar_clinvar_sqlite"       
#> [13] "db_annovar_cosmic"                "db_annovar_cosmic_sqlite"        
#> [15] "db_annovar_cscd"                  "db_annovar_darned_sqlite"        
#> [17] "db_annovar_dbnsfp"                "db_annovar_dbnsfp_sqlite"        
#> [19] "db_annovar_dbscsnv"               "db_annovar_dbscsnv_sqlite"       
#> [21] "db_annovar_dhs_gene_connectivity" "db_annovar_eigen"                
#> [23] "db_annovar_eigen_sqlite"          "db_annovar_ensgene"              
#> [25] "db_annovar_epi_genes"             "db_annovar_esp6500siv2"          
#> [27] "db_annovar_exac03"                "db_annovar_exac03_sqlite"        
#> [29] "db_annovar_fathmm"                "db_annovar_gdi_score"            
#> [31] "db_annovar_gerp"                  "db_annovar_gme"                  
#> [33] "db_annovar_gme_sqlite"            "db_annovar_gnomad"               
#> [35] "db_annovar_gnomad_sqlite"         "db_annovar_gtex_eqtl_egenes"     
#> [37] "db_annovar_gtex_eqtl_pairs"       "db_annovar_gwava"                
#> [39] "db_annovar_gwava_sqlite"          "db_annovar_hgnc"                 
#> [41] "db_annovar_hrcr1"                 "db_annovar_hrcr1_sqlite"         
#> [43] "db_annovar_icgc21"                "db_annovar_icgc_sqlite"          
#> [45] "db_annovar_intervar"              "db_annovar_intervar_sqlite"      
#> [47] "db_annovar_kaviar"                "db_annovar_knowngene"            
#> [49] "db_annovar_ljb26_all"             "db_annovar_lncediting_sqlite"    
#> [51] "db_annovar_loftool_scores"        "db_annovar_mcap"                 
#> [53] "db_annovar_mcap_sqlite"           "db_annovar_mitimpact"            
#> [55] "db_annovar_nci60"                 "db_annovar_nci60_sqlite"         
#> [57] "db_annovar_normal_pool"           "db_annovar_omim_genemap2"        
#> [59] "db_annovar_popfreq"               "db_annovar_popfreq_sqlite"       
#> [61] "db_annovar_radar_sqlite"          "db_annovar_rddpred_sqlite"       
#> [63] "db_annovar_rediportal_sqlite"     "db_annovar_refgene"              
#> [65] "db_annovar_regsnpintron"          "db_annovar_revel"                
#> [67] "db_annovar_revel_sqlite"          "db_annovar_rvis_esv_score"       
#> [69] "db_annovar_seeqtl"                "db_annovar_snp"                  
#> [71] "db_annovar_tall_somatic_genes"    "db_annovar_tmcsnpdb"             
#> [73] "db_annovar_varcards"              "db_annovar_varcards_sqlite"      
#> [75] "db_ucsc_cytoband"                 "db_ucsc_dnase_clustered"         
#> [77] "db_ucsc_ensgene"                  "db_ucsc_knowngene"               
#> [79] "db_ucsc_refgene"                  "db_ucsc_tfbs_clustered"

# Show all supported version of database (e.g. db_annovar_avsnp)
download.database(download.name = "db_annovar_avsnp", show.all.version = TRUE)
#> [1] "avsnp150" "avsnp147" "avsnp144" "avsnp142" "avsnp138"

# Show all supprted buildver of specific version database
download.database(download.name = "db_annovar_avsnp", version = "avsnp147", show.all.buildvers = TRUE)
#> $avsnp138
#> [1] "hg19"
#> 
#> $avsnp142
#> [1] "hg38" "hg19"
#> 
#> $avsnp144
#> [1] "hg38" "hg19"
#> 
#> $avsnp147
#> [1] "hg38" "hg19"
#> 
#> $avsnp150
#> [1] "hg38" "hg19"

# To reduce the download time, we use the local demo configuratin file to download demo file
demo.cfg <- system.file("extdata", "demo/demo.cfg", package = "annovarR")
download.database("download_demo", show.all.versions = T, download.cfg = demo.cfg)
#> [1] "demo"
download.database("download_demo", "demo", buildver = "GRCh37", database.dir = sprintf("%s/databases/", 
  tempdir()), download.cfg = demo.cfg)
#> [1] TRUE

# If you want to download other resource in BioInstaller,
# you can use function `install.bioinfo`
install.bioinfo(show.all.names = TRUE)
#>   [1] "abyss"                            "arnapipe"                        
#>   [3] "asap"                             "backspin"                        
#>   [5] "bamtools"                         "bamutil"                         
#>   [7] "bcftools"                         "bearscc"                         
#>   [9] "bedtools"                         "bowtie"                          
#>  [11] "bowtie2"                          "breakdancer"                     
#>  [13] "brie"                             "bwa"                             
#>  [15] "cnvkit"                           "cnvnator"                        
#>  [17] "dart"                             "delly"                           
#>  [19] "fastp"                            "fastq_tools"                     
#>  [21] "fastx_toolkit"                    "freebayes"                       
#>  [23] "fsclvm"                           "github_demo"                     
#>  [25] "hisat2"                           "htseq"                           
#>  [27] "igraph"                           "isop"                            
#>  [29] "jvarkit"                          "libgtextutils"                   
#>  [31] "lofreq"                           "macs"                            
#>  [33] "mdseq"                            "mimosca"                         
#>  [35] "multiqc"                          "oases"                           
#>  [37] "oncotator"                        "outrigger"                       
#>  [39] "picard"                           "pindel"                          
#>  [41] "pxz"                              "raceid"                          
#>  [43] "rca"                              "rum"                             
#>  [45] "samtools_old"                     "sclvm"                           
#>  [47] "scnorm"                           "seqtk"                           
#>  [49] "seurat"                           "singlesplice"                    
#>  [51] "sleuth"                           "somaticsniper"                   
#>  [53] "sparsehash"                       "speedseq"                        
#>  [55] "star"                             "strawberry"                      
#>  [57] "tmap"                             "tophat2"                         
#>  [59] "tracer"                           "trimgalore"                      
#>  [61] "trinityrnaseq"                    "varscan2"                        
#>  [63] "vcflib"                           "vcftools"                        
#>  [65] "vep"                              "zifa"                            
#>  [67] "annovar"                          "armadillo"                       
#>  [69] "bcl2fastq"                        "blast"                           
#>  [71] "blat"                             "bzip2"                           
#>  [73] "cesa"                             "cnvnator_samtools"               
#>  [75] "curl"                             "demo_2"                          
#>  [77] "edena"                            "ensemble_grch37_reffa"           
#>  [79] "ensemble_grch38_reffa"            "fastqc"                          
#>  [81] "fatotwobit"                       "fusioncatcher"                   
#>  [83] "fusioncatcher_reffa"              "gatk"                            
#>  [85] "gatk_bundle"                      "gmap"                            
#>  [87] "gridss"                           "hisat2_reffa"                    
#>  [89] "htslib"                           "imagej"                          
#>  [91] "interproscan"                     "liftover"                        
#>  [93] "lzo"                              "lzop"                            
#>  [95] "mapsplice2"                       "miniconda2"                      
#>  [97] "miniconda3"                       "mutect"                          
#>  [99] "ngs_qc_toolkit"                   "novoalign"                       
#> [101] "pcre"                             "pigz"                            
#> [103] "prinseq"                          "r"                               
#> [105] "reditools"                        "root"                            
#> [107] "samstat"                          "samtools"                        
#> [109] "snpeff"                           "solexaqa"                        
#> [111] "sqlite"                           "sratools"                        
#> [113] "srnanalyzer"                      "ssaha2"                          
#> [115] "strelka"                          "subread"                         
#> [117] "svtoolkit"                        "tvc"                             
#> [119] "ucsc_reffa"                       "ucsc_utils"                      
#> [121] "vcfanno"                          "velvet"                          
#> [123] "xz"                               "zlib"                            
#> [125] "db_atcircdb"                      "db_biosystems"                   
#> [127] "db_cancer_hotspot"                "db_cgi"                          
#> [129] "db_circbase"                      "db_circnet"                      
#> [131] "db_circrnadb"                     "db_civic"                        
#> [133] "db_cscd"                          "db_denovo_db"                    
#> [135] "db_dgidb"                         "db_differentialnet"              
#> [137] "db_diseaseenhancer"               "db_disgenet"                     
#> [139] "db_docm"                          "db_drugbank"                     
#> [141] "db_ecodrug"                       "db_eggnog"                       
#> [143] "db_exorbase"                      "db_expression_atlas"             
#> [145] "db_exsnp"                         "db_fantom_cage_peaks"            
#> [147] "db_fantom_co_expression_clusters" "db_fantom_enhancers"             
#> [149] "db_fantom_motifs"                 "db_fantom_ontology"              
#> [151] "db_fantom_tss_classifier"         "db_funcoup"                      
#> [153] "db_gtex"                          "db_hgnc"                         
#> [155] "db_hpo"                           "db_inbiomap"                     
#> [157] "db_interpro"                      "db_intogen"                      
#> [159] "db_lncediting"                    "db_medreaders"                   
#> [161] "db_mndr"                          "db_msdd"                         
#> [163] "db_omim_open"                     "db_omim_private"                 
#> [165] "db_oncotator"                     "db_pancanqtl"                    
#> [167] "db_proteinatlas"                  "db_rbp_var"                      
#> [169] "db_rddpred"                       "db_remap"                        
#> [171] "db_remap2"                        "db_rsnp3"                        
#> [173] "db_rvarbase"                      "db_seecancer"                    
#> [175] "db_seeqtl"                        "db_snipa3"                       
#> [177] "db_srnanalyzer"                   "db_superdrug2"                   
#> [179] "db_tumorfusions"                  "db_varcards"                     
#> [181] "db_annovar_1000g"                 "db_annovar_1000g_sqlite"         
#> [183] "db_annovar_avsift"                "db_annovar_avsnp"                
#> [185] "db_annovar_avsnp_sqlite"          "db_annovar_brvar"                
#> [187] "db_annovar_cadd"                  "db_annovar_cadd_sqlite"          
#> [189] "db_annovar_cg"                    "db_annovar_civic_gene_summaries" 
#> [191] "db_annovar_clinvar"               "db_annovar_clinvar_sqlite"       
#> [193] "db_annovar_cosmic"                "db_annovar_cosmic_sqlite"        
#> [195] "db_annovar_cscd"                  "db_annovar_darned_sqlite"        
#> [197] "db_annovar_dbnsfp"                "db_annovar_dbnsfp_sqlite"        
#> [199] "db_annovar_dbscsnv"               "db_annovar_dbscsnv_sqlite"       
#> [201] "db_annovar_dhs_gene_connectivity" "db_annovar_eigen"                
#> [203] "db_annovar_eigen_sqlite"          "db_annovar_ensgene"              
#> [205] "db_annovar_epi_genes"             "db_annovar_esp6500siv2"          
#> [207] "db_annovar_exac03"                "db_annovar_exac03_sqlite"        
#> [209] "db_annovar_fathmm"                "db_annovar_gdi_score"            
#> [211] "db_annovar_gerp"                  "db_annovar_gme"                  
#> [213] "db_annovar_gme_sqlite"            "db_annovar_gnomad"               
#> [215] "db_annovar_gnomad_sqlite"         "db_annovar_gtex_eqtl_egenes"     
#> [217] "db_annovar_gtex_eqtl_pairs"       "db_annovar_gwava"                
#> [219] "db_annovar_gwava_sqlite"          "db_annovar_hgnc"                 
#> [221] "db_annovar_hrcr1"                 "db_annovar_hrcr1_sqlite"         
#> [223] "db_annovar_icgc21"                "db_annovar_icgc_sqlite"          
#> [225] "db_annovar_intervar"              "db_annovar_intervar_sqlite"      
#> [227] "db_annovar_kaviar"                "db_annovar_knowngene"            
#> [229] "db_annovar_ljb26_all"             "db_annovar_lncediting_sqlite"    
#> [231] "db_annovar_loftool_scores"        "db_annovar_mcap"                 
#> [233] "db_annovar_mcap_sqlite"           "db_annovar_mitimpact"            
#> [235] "db_annovar_nci60"                 "db_annovar_nci60_sqlite"         
#> [237] "db_annovar_normal_pool"           "db_annovar_omim_genemap2"        
#> [239] "db_annovar_popfreq"               "db_annovar_popfreq_sqlite"       
#> [241] "db_annovar_radar_sqlite"          "db_annovar_rddpred_sqlite"       
#> [243] "db_annovar_rediportal_sqlite"     "db_annovar_refgene"              
#> [245] "db_annovar_regsnpintron"          "db_annovar_revel"                
#> [247] "db_annovar_revel_sqlite"          "db_annovar_rvis_esv_score"       
#> [249] "db_annovar_seeqtl"                "db_annovar_snp"                  
#> [251] "db_annovar_tall_somatic_genes"    "db_annovar_tmcsnpdb"             
#> [253] "db_annovar_varcards"              "db_annovar_varcards_sqlite"      
#> [255] "db_ucsc_cytoband"                 "db_ucsc_dnase_clustered"         
#> [257] "db_ucsc_ensgene"                  "db_ucsc_knowngene"               
#> [259] "db_ucsc_refgene"                  "db_ucsc_tfbs_clustered"          
#> [261] "db_blast_env_nr"                  "db_blast_est_human"              
#> [263] "db_blast_est_mouse"               "db_blast_est_others"             
#> [265] "db_blast_gss"                     "db_blast_htgs"                   
#> [267] "db_blast_human_genomic"           "db_blast_landmark"               
#> [269] "db_blast_mouse_genomic"           "db_blast_nr"                     
#> [271] "db_blast_nt"                      "db_blast_other_genomic"          
#> [273] "db_blast_pataa"                   "db_blast_patnt"                  
#> [275] "db_blast_pdbaa"                   "db_blast_pdbnt"                  
#> [277] "db_blast_ref_prok_rep_genomes"    "db_blast_ref_viroids_rep_genomes"
#> [279] "db_blast_ref_viruses_rep_genomes" "db_blast_refseq_genomic"         
#> [281] "db_blast_refseq_protein"          "db_blast_refseq_rna"             
#> [283] "db_blast_refseqgene"              "db_blast_sts"                    
#> [285] "db_blast_swissprot"               "db_blast_taxdb"                  
#> [287] "db_blast_tsa_nr"                  "db_blast_tsa_nt"                 
#> [289] "db_blast_vector"

Annotation


# Get all supprted anno.name in annovarR
get.annotation.names()
#>   [1] "1000g2015aug_all"                                    
#>   [2] "1000g2015aug_afr"                                    
#>   [3] "1000g2015aug_amr"                                    
#>   [4] "1000g2015aug_eas"                                    
#>   [5] "1000g2015aug_eur"                                    
#>   [6] "1000g2015aug_sas"                                    
#>   [7] "perl_annovar_refGene"                                
#>   [8] "perl_annovar_ensGene"                                
#>   [9] "perl_annovar_knownGene"                              
#>  [10] "perl_annovar_cytoBand"                               
#>  [11] "perl_annovar_genomicSuperDups"                       
#>  [12] "perl_annovar_clinvar_20170905"                       
#>  [13] "perl_annovar_esp6500siv2_all"                        
#>  [14] "perl_annovar_1000g2015aug_all"                       
#>  [15] "perl_annovar_1000g2015aug_afr"                       
#>  [16] "perl_annovar_1000g2015aug_eas"                       
#>  [17] "perl_annovar1000g2015aug_eur"                        
#>  [18] "perl_annovar_avsnp142"                               
#>  [19] "perl_annovar_avsnp144"                               
#>  [20] "perl_annovar_avsnp147"                               
#>  [21] "perl_annovar_avsnp150"                               
#>  [22] "perl_annovar_dbnsfp33a"                              
#>  [23] "perl_annovar_cosmic70"                               
#>  [24] "perl_annovar_cosmic81"                               
#>  [25] "perl_annovar_cosmic82"                               
#>  [26] "perl_annovar_eigen"                                  
#>  [27] "perl_annovar_gwava"                                  
#>  [28] "perl_annovar_cadd13"                                 
#>  [29] "perl_annovar_cadd13gt10"                             
#>  [30] "perl_annovar_cadd13gt20"                             
#>  [31] "perl_annovar_regsnpintron"                           
#>  [32] "perl_annovar_mitimpact24"                            
#>  [33] "perl_annovar_popfreq_all_20150413"                   
#>  [34] "perl_annovar_popfreq_max_20150413"                   
#>  [35] "perl_annovar_icgc21"                                 
#>  [36] "perl_annovar_nci60"                                  
#>  [37] "perl_annovar_gme"                                    
#>  [38] "perl_annovar_hrcr1"                                  
#>  [39] "perl_annovar_kaviar_20150923"                        
#>  [40] "perl_annovar_gnomad_genome"                          
#>  [41] "perl_annovar_gnomad_exome"                           
#>  [42] "perl_annovar_exac03nonpsych"                         
#>  [43] "perl_annovar_exac03nontcga"                          
#>  [44] "perl_annovar_exac03"                                 
#>  [45] "perl_annovar_cg69"                                   
#>  [46] "perl_annovar_cg46"                                   
#>  [47] "perl_annovar_intervar_20170202"                      
#>  [48] "perl_annovar_dbscsnv11"                              
#>  [49] "perl_annovar_merge"                                  
#>  [50] "avsnp138"                                            
#>  [51] "avsnp142"                                            
#>  [52] "avsnp144"                                            
#>  [53] "avsnp147"                                            
#>  [54] "avsnp150"                                            
#>  [55] "bioc_org_hs_eg"                                      
#>  [56] "bioc_gene2"                                          
#>  [57] "bioc_gene2alias"                                     
#>  [58] "bioc_gene2ensembl_gene_id"                           
#>  [59] "bioc_gene2entrez_gene_id"                            
#>  [60] "bioc_gene2gene_full_name"                            
#>  [61] "brvar_v1_core"                                       
#>  [62] "brvar_v1_extra"                                      
#>  [63] "cadd"                                                
#>  [64] "caddgt10"                                            
#>  [65] "caddgt20"                                            
#>  [66] "cadd13"                                              
#>  [67] "cadd13gt10"                                          
#>  [68] "cadd13gt20"                                          
#>  [69] "nightly_civic_gene_summaries"                        
#>  [70] "clinvar_20170130"                                    
#>  [71] "cosmic70"                                            
#>  [72] "cosmic81"                                            
#>  [73] "cosmic82"                                            
#>  [74] "cscd_cancer_circrna"                                 
#>  [75] "cscd_common_circrna"                                 
#>  [76] "cscd_normal_circrna"                                 
#>  [77] "darned"                                              
#>  [78] "dbnsfp30a"                                           
#>  [79] "dbnsfp31a_interpro"                                  
#>  [80] "dbnsfp33a"                                           
#>  [81] "dbscsnv11"                                           
#>  [82] "dhs_gene_connectivity_2012"                          
#>  [83] "eigen"                                               
#>  [84] "epi_genes_v1"                                        
#>  [85] "exac03"                                              
#>  [86] "exac03nontcga"                                       
#>  [87] "exac03nonpsych"                                      
#>  [88] "gdi_score_full_10282015"                             
#>  [89] "gme"                                                 
#>  [90] "gnomad_exome"                                        
#>  [91] "gnomad_genome"                                       
#>  [92] "gtex_adipose_subcutaneous.v7.egenes"                 
#>  [93] "gtex_adipose_visceral_omentum.v7.egenes"             
#>  [94] "gtex_adrenal_gland.v7.egenes"                        
#>  [95] "gtex_artery_aorta.v7.egenes"                         
#>  [96] "gtex_artery_coronary.v7.egenes"                      
#>  [97] "gtex_artery_tibial.v7.egenes"                        
#>  [98] "gtex_brain_amygdala.v7.egenes"                       
#>  [99] "gtex_brain_anterior_cingulate_cortex_ba24.v7.egenes" 
#> [100] "gtex_brain_caudate_basal_ganglia.v7.egenes"          
#> [101] "gtex_brain_cerebellar_hemisphere.v7.egenes"          
#> [102] "gtex_brain_cerebellum.v7.egenes"                     
#> [103] "gtex_brain_cortex.v7.egenes"                         
#> [104] "gtex_brain_frontal_cortex_ba9.v7.egenes"             
#> [105] "gtex_brain_hippocampus.v7.egenes"                    
#> [106] "gtex_brain_hypothalamus.v7.egenes"                   
#> [107] "gtex_brain_nucleus_accumbens_basal_ganglia.v7.egenes"
#> [108] "gtex_brain_putamen_basal_ganglia.v7.egenes"          
#> [109] "gtex_brain_spinal_cord_cervical_c_1.v7.egenes"       
#> [110] "gtex_brain_substantia_nigra.v7.egenes"               
#> [111] "gtex_breast_mammary_tissue.v7.egenes"                
#> [112] "gtex_cells_ebv_transformed_lymphocytes.v7.egenes"    
#> [113] "gtex_cells_transformed_fibroblasts.v7.egenes"        
#> [114] "gtex_colon_sigmoid.v7.egenes"                        
#> [115] "gtex_colon_transverse.v7.egenes"                     
#> [116] "gtex_esophagus_gastroesophageal_junction.v7.egenes"  
#> [117] "gtex_esophagus_mucosa.v7.egenes"                     
#> [118] "gtex_esophagus_muscularis.v7.egenes"                 
#> [119] "gtex_heart_atrial_appendage.v7.egenes"               
#> [120] "gtex_heart_left_ventricle.v7.egenes"                 
#> [121] "gtex_liver.v7.egenes"                                
#> [122] "gtex_lung.v7.egenes"                                 
#> [123] "gtex_minor_salivary_gland.v7.egenes"                 
#> [124] "gtex_muscle_skeletal.v7.egenes"                      
#> [125] "gtex_nerve_tibial.v7.egenes"                         
#> [126] "gtex_ovary.v7.egenes"                                
#> [127] "gtex_pancreas.v7.egenes"                             
#> [128] "gtex_pituitary.v7.egenes"                            
#> [129] "gtex_prostate.v7.egenes"                             
#> [130] "gtex_skin_not_sun_exposed_suprapubic.v7.egenes"      
#> [131] "gtex_skin_sun_exposed_lower_leg.v7.egenes"           
#> [132] "gtex_small_intestine_terminal_ileum.v7.egenes"       
#> [133] "gtex_spleen.v7.egenes"                               
#> [134] "gtex_stomach.v7.egenes"                              
#> [135] "gtex_testis.v7.egenes"                               
#> [136] "gtex_thyroid.v7.egenes"                              
#> [137] "gtex_uterus.v7.egenes"                               
#> [138] "gtex_vagina.v7.egenes"                               
#> [139] "gtex_whole_blood.v7.egenes"                          
#> [140] "gwava"                                               
#> [141] "hgnc_coding_gene2alias"                              
#> [142] "hgnc_coding_pre2gene"                                
#> [143] "hrcr1"                                               
#> [144] "icgc21"                                              
#> [145] "intervar_20170202"                                   
#> [146] "kaviar_20150923"                                     
#> [147] "lncediting"                                          
#> [148] "loftool_scores"                                      
#> [149] "mcap"                                                
#> [150] "nci60"                                               
#> [151] "2016sih_wes_ball"                                    
#> [152] "2016sih_wes_tall"                                    
#> [153] "2016sih_wes_nkt"                                     
#> [154] "2016sih_wgs_nkt"                                     
#> [155] "2016sih_wgs_dlbcl"                                   
#> [156] "omim_genemap2_ensembl_gene_id2phenotype"             
#> [157] "omim_genemap2_entrez_gene_id2phenotype"              
#> [158] "omim_genemap2_symbol2phenotype"                      
#> [159] "popfreq_max_20150413"                                
#> [160] "popfreq_all_20150413"                                
#> [161] "radar2"                                              
#> [162] "rddpred_mes"                                         
#> [163] "REDIportal"                                          
#> [164] "ucsc_refgene"                                        
#> [165] "ens_refgene"                                         
#> [166] "revel"                                               
#> [167] "rs2pos138"                                           
#> [168] "rs2pos142"                                           
#> [169] "rs2pos144"                                           
#> [170] "rs2pos147"                                           
#> [171] "rs2pos150"                                           
#> [172] "rvis_exac_4kw"                                       
#> [173] "seeqtl_qvalue_hapmap3_cis"                           
#> [174] "seeqtl_qvalue_hapmap3_trans"                         
#> [175] "seeqtl_qvalue_myers_cis"                             
#> [176] "seeqtl_qvalue_myers_trans"                           
#> [177] "tall_somatic_genes_20171206"                         
#> [178] "tmcsnpdb"                                            
#> [179] "varcards"                                            
#> [180] "vcfanno_demo"                                        
#> [181] "vep_all"

# Get annotation name needed download.name and 
# you can use download.database to download database using the download.name.
download.name <- get.download.name('avsnp147')

# Database configuration file
database.cfg <- system.file('extdata', 'config/databases.toml', package = "annovarR")

# Get anno.name needed input cols
get.annotation.needcols('avsnp147')
#> [1] "chr"   "start" "end"   "ref"   "alt"

# build sqlite database
for(i in c("hg19_ALL.sites.2015_08", "hg19_avsnp147")) {
  database <- system.file("extdata", sprintf("demo/%s.txt", i), package = "annovarR")
  sqlite.db <- sprintf("%s/%s.sqlite", tempdir(), i)
  file.copy(database, sprintf("%s/%s.txt", tempdir(), i))
  sqlite.build(database, sqlite.connect.params = list(dbname = sqlite.db, table.name = sprintf("%s", 
    i)))
}

# use the defined rule to annotate 1000 Genome Project frequency
database.dir <- tempdir()
chr <- c("chr1", "chr2", "chr1")
start <- c("10177", "10177", "10020")
end <- c("10177", "10177", "10020")
ref <- c("-", "A", "A")
alt <- c("C", "AC", "-")
dat <- data.table(chr = chr, start = start, end = end, ref = ref, alt = alt)
x <- annotation(dat = dat, anno.name = "1000g2015aug_all", database.dir = database.dir, db.type = "txt")
x
#>    1000g2015aug_all
#> 1:         0.425319
#> 2:               NA
#> 3:               NA
x <- annotation(dat = dat, anno.name = "1000g2015aug_all", database.dir = database.dir, db.type = "sqlite")
x
#>    1000g2015aug_all
#> 1:         0.425319
#> 2:               NA
#> 3:               NA

# Do annotation using full match function (default to use chr, start to select data 
# and use chr, start, end, ref, and alt to match data)
# Use `?annotation.cols.match` to see more detail about `annotation.cols.match`
chr <- c("chr1", "chr2", "chr1")
start <- c("10020", "10020", "10020")
end <- c("10020", "10020", "10020")
ref <- c("A", "A", "A")
alt <- c("-", "-", "-")
dat <- data.table(chr = chr, start = start, end = end, ref = ref, alt = alt)
x <- annotation.cols.match(dat, "avsnp147", database.dir = database.dir, 
  return.col.names = "avSNP147", db.type = "sqlite")
x
#>       avSNP147
#> 1: rs775809821
#> 2:          NA
#> 3: rs775809821

# Region match mode
bed.file <- system.file("extdata", "demo/example.bed", package = "annovarR")
chr <- c("chr10", "chr1")
start <- c("100188904", "100185955")
end <- c("100188904", "100185955")
dat <- data.table(chr = chr, start = start, end = end)

# format.cols.plus.chr will add "chr" in chr colum 
# if your input chr colum not contain string 'chr'
# format.db.region.tb will process the region matched data
#x <- annotation.region.match(dat = dat, database.dir = tempdir(), dbname.fixed = bed.file, 
#  table.name.fixed = "bed", db.type = "txt", format.dat.fun = "format.cols.plus.chr", 
#  format.db.tb.fun = "format.db.region.tb")
#x

# Convert snp rs number to genomic location
snp.id <- c("rs775809821", "rs768019142")
x <- annotation(dat = data.table(rs = rep(snp.id, 3)), database.dir = database.dir, anno.name = "rs2pos147", 
    buildver = "hg19", verbose = FALSE, db.type = "txt")

# Annotate avinput format R data using ANNOVAR
# set debug to TRUE will not to run command
chr = "chr1"
start = "123"
end = "123"
ref = "A"
alt = "C"
dat <- data.table(chr, start, end, ref, alt)
x <- annotation(dat, "perl_annovar_refGene", annovar.dir = "/opt/bin/annovar", 
             database.dir = "{{annovar.dir}}/humandb", debug = TRUE)
#> /usr/bin/perl /u4/jhuanglabbin/annovar/table_annovar.pl /tmp/RtmpsBkFv9/file3af747e0194f {annovar.dir}/humandb -buildver hg19  -remove -protocol refGene -operation g -nastring NA

# Annotate VCF file using ANNOVAR
# set debug to TRUE will not to run command
x <- annotation(anno.name = "perl_annovar_ensGene", input.file = "/tmp/test.vcf",
             annovar.dir = "/opt/bin/annovar/", database.dir = "{{annovar.dir}}/humandb", 
             out = tempfile(), vcfinput = TRUE, debug = TRUE)
#> /usr/bin/perl /u4/jhuanglabbin/annovar/table_annovar.pl /tmp/test.vcf {annovar.dir}/humandb -buildver hg19 -out /tmp/RtmpsBkFv9/file3af732639362 -remove -protocol ensGene -operation g -nastring NA  -vcfinput

# Annotation VCF file use VEP
vep(debug = TRUE)
#> vep --cache_version 91 --assembly GRCh37 --dir /home/ljf/.vep --output_file variant_effect_output.txt --cache --offline --everything
#> [1] "vep --cache_version 91 --assembly GRCh37 --dir /home/ljf/.vep --output_file variant_effect_output.txt --cache --offline --everything "
x <- annotation(anno.name = "vep_all", input.file = "/tmp/test.vcf",
             out = tempfile(), debug = TRUE)
#> vep --cache_version 91 --assembly hg19 --dir /home/ljf/.vep --output_file /tmp/RtmpsBkFv9/file3af72133bad9 --input_file /tmp/test.vcf --cache --offline --everything

# Annotation VCF file use vcfanno
vcfanno(debug = TRUE)
#> vcfanno_linux64 -p 2 /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/conf.toml input.vcf > output.vcf
#> [1] "vcfanno_linux64 -p 2 /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/conf.toml input.vcf > output.vcf"
x <- annotation(anno.name = "vcfanno_demo", input.file = system.file("extdata", "demo/vcfanno_demo/query.vcf.gz", 
                   package = "annovarR"), out = "test.vcf", vcfanno = "/path/vcfanno", debug = TRUE)
#> vcfanno_linux64 -base-path /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/ -lua /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/custom.lua -p 2 /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/conf.toml /tmp/RtmpgbgH1T/Rinst3a9d730a2e43/annovarR/extdata/demo/vcfanno_demo/query.vcf.gz > test.vcf


# Annotate gene from BioConductor org.hs.eg.db
gene <- c("TP53", "NSD2")
annotation(dat = gene, anno.name = "bioc_gene2alias")
#> 'select()' returned 1:many mapping between keys and columns
#>     SYMBOL  ALIAS
#>  1:   TP53   BCC7
#>  2:   TP53   LFS1
#>  3:   TP53    P53
#>  4:   TP53  TRP53
#>  5:   TP53   TP53
#>  6:   NSD2  KMT3F
#>  7:   NSD2  KMT3G
#>  8:   NSD2  MMSET
#>  9:   NSD2 REIIBP
#> 10:   NSD2   TRX5
#> 11:   NSD2    WHS
#> 12:   NSD2  WHSC1
#> 13:   NSD2   NSD2