Introduction to ngstk

Jianfeng Li

2018-01-04

Introduction

ngstk is an R package to facilitate the analysis of NGS data, such as visualization, conversion of the data format for WEB service input and another purpose.

You can learn some usage of ngstk through this tutorial.

Usage

Data format conversion

We defined a rule to facilitate the data format conversion. It is a good choice to save the all of output colums meta information. Moreover, the other input data should establish a connection with the output colums according the requirement.

The follow example configuration file can be used to convert iseq, a pipeline to analysis genetic variants from NGS data, output data to the web service ProteinPaint, a tool to visulize the mutation data.

Title = "Proteinpaint configuration file"

# muts2pp is a function that can convert mutation data to Proteinpaint input format
[muts2pp.meta.defined_cols]
colnames = ["gene", "refseq", "chromosome", "start", "aachange", "class", "disease", "sample"]
handler_lib = "default_handlers"
mhandler_lib = "default_mhandlers"
[muts2pp.meta.defined_cols.description]
gene = "Gene symbol, e.g TP53, PTEN"
refseq = "Transcript of refSeq or Ensemble, e.g NM_000546, ENST00000635293"
chromosome = "Chromosome, e.g. chr1, chr2"
start = "Chromosome start location of a mutation site, e.g. 153249385"
aachange = "Amino acid level change of gene mutation, e.g. p.R347C, p.L615delinsDL"
class = "Mutation type, e.g. nonsense, proteinIns, proteinDel, frameshift"
disease = "Disease name or sample group name, e.g. B-ALL, T-ALL, G1, G2"
sample = "Sample name"

[muts2pp.format.iseq.gene]
alias = ["gene", "symbol"]

[muts2pp.format.iseq.refseq]
alias = ["refseq", "transcription.id"]

[muts2pp.format.iseq.chromosome]
alias = ["chromosome"]

[muts2pp.format.iseq.start]
alias = ["start"]

[muts2pp.format.iseq.aachange]
alias = ["aachange", "amino.acid.change"]
extract_pattern = "p[.]+.*$"

[muts2pp.format.iseq.class]
alias = ["mutation_type", "mutation.type"]
raw = ["nonframeshift ins", "nonframeshift del",
       "frameshift ins", "frameshift del", "stoploss", "nonsense",
             "splice"]
new = ["proteinIns", "proteinDel", "frameshift", "frameshift",
       "nonsense", "nonsense", "splice"]
na_replace = "splice"

muts2pp, muts2mutation_mapper, muts2oncoprinter and fusions2pp are the example functions based on the established rules.

demo_file <- system.file("extdata", "demo/proteinpaint/muts2pp_iseq.txt", package = "ngstk")
input_data <- read.table(demo_file, sep = "\t", header = TRUE, stringsAsFactors = FALSE)
disease <- "T-ALL"
input_data <- data.frame(input_data, disease)
input_data$disease <- as.character(input_data$disease)

# Convert mutations data to proteinpaint input
result <- muts2pp(input_data, input_type = "iseq")
head(result)
#>    gene       refseq chromosome     start aachange      class disease
#> 1  IDH1 NM_001282386       chr2 209113113  p.R132S   missense   T-ALL
#> 2 PTPRC    NM_080921       chr1 198711362  p.A694T   missense   T-ALL
#> 3 PTPRC    NM_080921       chr1 198685843 p.I281fs frameshift   T-ALL
#> 4  ASPM NM_001206846       chr1 197093449 p.L1061F   missense   T-ALL
#> 5  ASPM    NM_018136       chr1 197070987 p.Q2465R   missense   T-ALL
#> 6  FAT1    NM_005245       chr4 187630354  p.L210F   missense   T-ALL
#>   sample
#> 1     A1
#> 2     A2
#> 3     A3
#> 4     A4
#> 5     A5
#> 6     A6
# Convert mutations data to cbioportal input
result <- muts2mutation_mapper(input_data, input_type = "iseq")
#> Warning in handler(handler_data, config_input, defined_cols, input_data, :
#> Validation_Status were not exists or not be recognize correctly in input
#> data!
#> Warning in handler(handler_data, config_input, defined_cols, input_data, :
#> Mutation_Status were not exists or not be recognize correctly in input
#> data!
#> Warning in handler(handler_data, config_input, defined_cols, input_data, :
#> Center were not exists or not be recognize correctly in input data!
head(result)
#>   Hugo_Symbol Sample_ID Protein_Change     Mutation_Type Chromosome
#> 1        IDH1        A1          R132S Missense_Mutation       chr2
#> 2       PTPRC        A2          A694T Missense_Mutation       chr1
#> 3       PTPRC        A3         I281fs   Frame_Shift_Ins       chr1
#> 4        ASPM        A4         L1061F Missense_Mutation       chr1
#> 5        ASPM        A5         Q2465R Missense_Mutation       chr1
#> 6        FAT1        A6          L210F Missense_Mutation       chr4
#>   Start_Position End_Position Reference_Allele Variant_Allele
#> 1      209113113    209113113                G              T
#> 2      198711362    198711362                G              A
#> 3      198685843    198685843                -              T
#> 4      197093449    197093449                G              A
#> 5      197070987    197070987                T              C
#> 6      187630354    187630354                G              A
#>   Validation_Status Mutation_Status Center
#> 1                NA              NA     NA
#> 2                NA              NA     NA
#> 3                NA              NA     NA
#> 4                NA              NA     NA
#> 5                NA              NA     NA
#> 6                NA              NA     NA
result <- muts2oncoprinter(input_data, input_type = "iseq")
head(result)
#>   Sample  Gene Alteration     Type
#> 1     A1  IDH1      R132S MISSENSE
#> 2     A2 PTPRC      A694T MISSENSE
#> 3     A3 PTPRC     I281fs    TRUNC
#> 4     A4  ASPM     L1061F MISSENSE
#> 5     A5  ASPM     Q2465R MISSENSE
#> 6     A6  FAT1      L210F MISSENSE

demo_file <- system.file('extdata', 'demo/proteinpaint/fusions2pp_fusioncatcher.txt', package = 'ngstk')
input_data <- read.table(demo_file, sep = '\t', header = TRUE, stringsAsFactors = FALSE)
disease <- 'B-ALL'
sampletype <- 'diagnose'
input_data <- data.frame(input_data, disease, sampletype)
input_data$disease <- as.character(input_data$disease)
# Convert fusions data to proteinpaint input
result <- fusions2pp(input_data, input_type = 'fusioncatcher')
head(result)
#>   disease sampletype gene_a        refseq_a chr_a position_a gene_b
#> 1   B-ALL   diagnose   TCF3 ENSG00000071564 chr19    1619110   PBX1
#> 2   B-ALL   diagnose   TCF3 ENSG00000071564 chr19    1619110   PBX1
#> 3   B-ALL   diagnose   TCF3 ENSG00000071564 chr19    1619110   PBX1
#> 4   B-ALL   diagnose   TCF3 ENSG00000071564 chr19    1619110   PBX1
#> 5   B-ALL   diagnose   TCF3 ENSG00000071564 chr19    1619110   PBX1
#> 6   B-ALL   diagnose   GYPE ENSG00000197465  chr4  144801564   GYPA
#>          refseq_b chr_b position_b strand_a strand_b patient
#> 1 ENSG00000185630  chr1  164761731        -        +      A1
#> 2 ENSG00000185630  chr1  164761731        -        +      A2
#> 3 ENSG00000185630  chr1  164761731        -        +      A3
#> 4 ENSG00000185630  chr1  164704095        -        +      A4
#> 5 ENSG00000185630  chr1  164704095        -        +      A5
#> 6 ENSG00000170180  chr4  145040934        -        -      A6

merge_table_files is the another util function to merge multiple table files.

a <- data.frame(col1=1:6, col2=2:7)
b <- data.frame(col1=6:11, col2=1:6)
file_a <- paste0(tempfile(), '_abcd')
file_b <- paste0(tempfile(), '_abcd')
write.table(a, file_a, sep = '\t', row.names = FALSE)
write.table(b, file_b, sep = '\t', row.names = FALSE)
input_files <- c(file_a, file_b)
x1 <- merge_table_files(input_files = input_files)
head(x1)
#>                                 filename col1 col2
#> 1 /tmp/RtmpUrg5Db/file18ba97fafd4cf_abcd    1    2
#> 2 /tmp/RtmpUrg5Db/file18ba97fafd4cf_abcd    2    3
#> 3 /tmp/RtmpUrg5Db/file18ba97fafd4cf_abcd    3    4
#> 4 /tmp/RtmpUrg5Db/file18ba97fafd4cf_abcd    4    5
#> 5 /tmp/RtmpUrg5Db/file18ba97fafd4cf_abcd    5    6
#> 6 /tmp/RtmpUrg5Db/file18ba97fafd4cf_abcd    6    7
x2 <- merge_table_files(files_dir = tempdir(), pattern = '.*_abcd$')
head(x2)
#>                                filename col1 col2
#> 1 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd    6    1
#> 2 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd    7    2
#> 3 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd    8    3
#> 4 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd    9    4
#> 5 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd   10    5
#> 6 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd   11    6
outfn = tempfile()
x3 <- merge_table_files(files_dir = tempdir(), pattern = ".*_abcd$", outfn = outfn)
head(read.table(outfn, sep = "\t", header = TRUE))
#>                                filename col1 col2
#> 1 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd    6    1
#> 2 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd    7    2
#> 3 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd    8    3
#> 4 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd    9    4
#> 5 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd   10    5
#> 6 /tmp/RtmpUrg5Db/file18ba912b1fa1_abcd   11    6

Data filtration

Data filtration or subset is an important step to clean or run the specific analysis. A series of data filtration function will be establish and fixed that can be re-used in the future.

demo_file <- system.file("extdata", "demo/proteinpaint/fusions2pp_fusioncatcher.txt", package = "ngstk")
input_data <- read.table(demo_file, sep = "\t", header = TRUE, stringsAsFactors = FALSE)
# Get data subset according the defined rule
mhandler_extra_params = list(gene_5 = 1, gene_3 = 2, any_gene = "TCF3", fusions_any_match_flag = TRUE)
result_1 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_1)
#>   gene5 gene3             fusion_type
#> 1  TCF3  PBX1                in-frame
#> 2  TCF3  PBX1                in-frame
#> 3  TCF3  PBX1                in-frame
#> 4  TCF3  PBX1 CDS(truncated)/intronic
#> 5  TCF3  PBX1 CDS(truncated)/intronic

mhandler_extra_params = list(gene_3 = 2, right_gene = "GYPA", fusions_right_match_flag = TRUE)
result_2 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_2)
#>   gene5 gene3 fusion_type
#> 6  GYPE  GYPA    in-frame
#> 7  GYPE  GYPA    in-frame

mhandler_extra_params = list(gene_5 = 1, left_gene = "GYPA", fusions_left_match_flag = TRUE)
result_3 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_3)
#> [1] gene5       gene3       fusion_type
#> <0 rows> (or 0-length row.names)

mhandler_extra_params = list(gene_5 = 1, gene_3 = 2, left_gene = "GYPE", right_gene = "GYPA", fusions_full_match_flag = TRUE)
result_4 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_4)
#>   gene5 gene3 fusion_type
#> 6  GYPE  GYPA    in-frame
#> 7  GYPE  GYPA    in-frame

mhandler_extra_params = list(gene_5 = 1, gene_3 = 2, left_gene = "GYPE", right_gene = "GYPA", fusions_anyfull_match_flag = TRUE)
result_5 <- fusions_filter(input_data, mhandler_extra_params = mhandler_extra_params)
head(result_5)
#>   gene5 gene3 fusion_type
#> 6  GYPE  GYPA    in-frame
#> 7  GYPE  GYPA    in-frame

Split data

Split data is an optional step if you want to parallel process the data stream. ngstk provide split_row_data and split_col_data to split data.frame and data.table object.

x1 <- data.frame(col1 = 1:39, col2 = 1:39)
x1
#>    col1 col2
#> 1     1    1
#> 2     2    2
#> 3     3    3
#> 4     4    4
#> 5     5    5
#> 6     6    6
#> 7     7    7
#> 8     8    8
#> 9     9    9
#> 10   10   10
#> 11   11   11
#> 12   12   12
#> 13   13   13
#> 14   14   14
#> 15   15   15
#> 16   16   16
#> 17   17   17
#> 18   18   18
#> 19   19   19
#> 20   20   20
#> 21   21   21
#> 22   22   22
#> 23   23   23
#> 24   24   24
#> 25   25   25
#> 26   26   26
#> 27   27   27
#> 28   28   28
#> 29   29   29
#> 30   30   30
#> 31   31   31
#> 32   32   32
#> 33   33   33
#> 34   34   34
#> 35   35   35
#> 36   36   36
#> 37   37   37
#> 38   38   38
#> 39   39   39
x <- split_row_data(x1, sections = 2)
x
#> [[1]]
#>    col1 col2
#> 1     1    1
#> 2     2    2
#> 3     3    3
#> 4     4    4
#> 5     5    5
#> 6     6    6
#> 7     7    7
#> 8     8    8
#> 9     9    9
#> 10   10   10
#> 11   11   11
#> 12   12   12
#> 13   13   13
#> 14   14   14
#> 15   15   15
#> 16   16   16
#> 17   17   17
#> 18   18   18
#> 19   19   19
#> 
#> [[2]]
#>    col1 col2
#> 20   20   20
#> 21   21   21
#> 22   22   22
#> 23   23   23
#> 24   24   24
#> 25   25   25
#> 26   26   26
#> 27   27   27
#> 28   28   28
#> 29   29   29
#> 30   30   30
#> 31   31   31
#> 32   32   32
#> 33   33   33
#> 34   34   34
#> 35   35   35
#> 36   36   36
#> 37   37   37
#> 38   38   38
#> 39   39   39
x <- split_row_data(x1, sections = 3)
x
#> [[1]]
#>    col1 col2
#> 1     1    1
#> 2     2    2
#> 3     3    3
#> 4     4    4
#> 5     5    5
#> 6     6    6
#> 7     7    7
#> 8     8    8
#> 9     9    9
#> 10   10   10
#> 11   11   11
#> 12   12   12
#> 13   13   13
#> 
#> [[2]]
#>    col1 col2
#> 14   14   14
#> 15   15   15
#> 16   16   16
#> 17   17   17
#> 18   18   18
#> 19   19   19
#> 20   20   20
#> 21   21   21
#> 22   22   22
#> 23   23   23
#> 24   24   24
#> 25   25   25
#> 26   26   26
#> 
#> [[3]]
#>    col1 col2
#> 27   27   27
#> 28   28   28
#> 29   29   29
#> 30   30   30
#> 31   31   31
#> 32   32   32
#> 33   33   33
#> 34   34   34
#> 35   35   35
#> 36   36   36
#> 37   37   37
#> 38   38   38
#> 39   39   39
x1 <- data.frame(col1 = 1:10, col2 = 11:20)
x1.t <- t(x1)
x <- split_col_data(x1.t, sections = 3)
x
#> [[1]]
#>      [,1] [,2] [,3]
#> col1    1    2    3
#> col2   11   12   13
#> 
#> [[2]]
#>      [,1] [,2] [,3]
#> col1    4    5    6
#> col2   14   15   16
#> 
#> [[3]]
#>      [,1] [,2] [,3] [,4]
#> col1    7    8    9   10
#> col2   17   18   19   20
# split file
dat <- data.frame(col1 = 1:10000)
outfn <- tempfile()
write.table(dat, outfn, sep = "\t")
split_row_file(outfn)
#> $`1`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_1"
#> 
#> $`2`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_2"
#> 
#> $`3`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_3"
#> 
#> $`4`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_4"
#> 
#> $`5`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_5"
#> 
#> $`6`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_6"
#> 
#> $`7`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_7"
#> 
#> $`8`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_8"
#> 
#> $`9`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_9"
#> 
#> $`10`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_10"
#> 
#> $`11`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_11"
#> 
#> $`12`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_12"
#> 
#> $`13`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_13"
#> 
#> $`14`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_14"
#> 
#> $`15`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_15"
#> 
#> $`16`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_16"
#> 
#> $`17`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_17"
#> 
#> $`18`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_18"
#> 
#> $`19`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_19"
#> 
#> $`20`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_20"
#> 
#> $`21`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_21"
#> 
#> $`22`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_22"
#> 
#> $`23`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_23"
#> 
#> $`24`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_24"
#> 
#> $`25`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_25"
#> 
#> $`26`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_26"
#> 
#> $`27`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_27"
#> 
#> $`28`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_28"
#> 
#> $`29`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_29"
#> 
#> $`30`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_30"
#> 
#> $`31`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_31"
#> 
#> $`32`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_32"
#> 
#> $`33`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_33"
#> 
#> $`34`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_34"
#> 
#> $`35`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_35"
#> 
#> $`36`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_36"
#> 
#> $`37`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_37"
#> 
#> $`38`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_38"
#> 
#> $`39`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_39"
#> 
#> $`40`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_40"
#> 
#> $`41`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_41"
#> 
#> $`42`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_42"
#> 
#> $`43`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_43"
#> 
#> $`44`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_44"
#> 
#> $`45`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_45"
#> 
#> $`46`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_46"
#> 
#> $`47`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_47"
#> 
#> $`48`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_48"
#> 
#> $`49`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_49"
#> 
#> $`50`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_50"
#> 
#> $`51`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_51"
#> 
#> $`52`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_52"
#> 
#> $`53`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_53"
#> 
#> $`54`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_54"
#> 
#> $`55`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_55"
#> 
#> $`56`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_56"
#> 
#> $`57`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_57"
#> 
#> $`58`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_58"
#> 
#> $`59`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_59"
#> 
#> $`60`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_60"
#> 
#> $`61`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_61"
#> 
#> $`62`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_62"
#> 
#> $`63`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_63"
#> 
#> $`64`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_64"
#> 
#> $`65`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_65"
#> 
#> $`66`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_66"
#> 
#> $`67`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_67"
#> 
#> $`68`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_68"
#> 
#> $`69`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_69"
#> 
#> $`70`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_70"
#> 
#> $`71`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_71"
#> 
#> $`72`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_72"
#> 
#> $`73`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_73"
#> 
#> $`74`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_74"
#> 
#> $`75`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_75"
#> 
#> $`76`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_76"
#> 
#> $`77`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_77"
#> 
#> $`78`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_78"
#> 
#> $`79`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_79"
#> 
#> $`80`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_80"
#> 
#> $`81`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_81"
#> 
#> $`82`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_82"
#> 
#> $`83`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_83"
#> 
#> $`84`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_84"
#> 
#> $`85`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_85"
#> 
#> $`86`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_86"
#> 
#> $`87`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_87"
#> 
#> $`88`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_88"
#> 
#> $`89`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_89"
#> 
#> $`90`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_90"
#> 
#> $`91`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_91"
#> 
#> $`92`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_92"
#> 
#> $`93`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_93"
#> 
#> $`94`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_94"
#> 
#> $`95`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_95"
#> 
#> $`96`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_96"
#> 
#> $`97`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_97"
#> 
#> $`98`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_98"
#> 
#> $`99`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_99"
#> 
#> $`100`
#> [1] "/tmp/RtmpUrg5Db/file18ba964c4e8a6_split_100"

Filename Process

files_dir <- system.file('extdata', 'demo/format', package = 'ngstk')
pattern <- '*.txt'
list.files(files_dir, pattern)
#> [1] "-cancer-circrna.txt"         "common-circrna.txt"         
#> [3] "hg38_cancer_circrna_mre.txt" "hg38-cancer-circrna.txt"    
#> [5] "hg38-common-circrna.txt"
x <- format_filenames(files_dir = files_dir, pattern = pattern, profix = 'hg38_')
x
#> [1] "/tmp/Rtmpd5dOjG/Rbuild18b98691c488a/ngstk/vignettes/hg38_cancer_circrna.txt"    
#> [2] "/tmp/Rtmpd5dOjG/Rbuild18b98691c488a/ngstk/vignettes/hg38_common_circrna.txt"    
#> [3] "/tmp/Rtmpd5dOjG/Rbuild18b98691c488a/ngstk/vignettes/hg38_cancer_circrna_mre.txt"
#> [4] "/tmp/Rtmpd5dOjG/Rbuild18b98691c488a/ngstk/vignettes/hg38_cancer_circrna.txt"    
#> [5] "/tmp/Rtmpd5dOjG/Rbuild18b98691c488a/ngstk/vignettes/hg38_common_circrna.txt"

Colors

set_colors('default')
#>  [1] "#0073c3" "#efc000" "#696969" "#ce534c" "#7ba6db" "#035892" "#052135"
#>  [8] "#666633" "#660000" "#990000"
set_colors('proteinpaint_mutations')
#>  [1] "#3987cc" "#ff7f0e" "#db3d3d" "#6633ff" "#bbbbbb" "#9467bd" "#998199"
#>  [8] "#8c564b" "#819981" "#5781ff"
set_colors('proteinpaint_chromHMM_state')
#>  [1] "#c0222c" "#f12424" "#ff00c7" "#d192fb" "#f9982f" "#fcc88e" "#fbf876"
#>  [8] "#a6d67b" "#1fb855" "#007d37" "#00a99e" "#11aaec" "#186db9" "#3800f8"
#> [15] "#961a8b" "#47005f"