functiondepends

Lifecycle: maturing Travis build status AppVeyor build status codecov

The goal of functiondepends is to allow for tidy exploration of unstructured codebase without evaluation of code.

Installation

One can install functiondepends from GitHub with:

# install.packages("devtools")
devtools::install_github("WelcomeToMyVirtualHome/functiondepends")
library(functiondepends)

Examples

# Create environment for loaded functions 
envir <- new.env()
# Search recursively current directory
functions <- find_functions(".", envir = envir, recursive = TRUE)
functions
#>   Level1              Level2          Function
#> 1      R find-dependencies.R find_dependencies
#> 2      R    find-functions.R       is_function
#> 3      R    find-functions.R get_function_name
#> 4      R    find-functions.R         is_assign
#> 5      R    find-functions.R    find_functions

Search for dependencies of function find_functions within parsed functions:

dependency <- find_dependencies("find_functions", envir = envir, in_envir = TRUE)
dependency
#> # A tibble: 2 x 5
#>   Source            SourceRep SourceNamespace Target         TargetInDegree
#>   <chr>                 <int> <chr>           <chr>                   <int>
#> 1 get_function_name         1 user-defined    find_functions              2
#> 2 is_function               1 user-defined    find_functions              2

Note that SourceNamespace column has value user-defined as the functions are searched within source of the package.

Search for all dependencies of find_functions function:

library(ggplot2)
library(dplyr)

dependency <- find_dependencies("find_functions", envir = envir, in_envir = FALSE)
dependency %>% 
  slice_max(SourceRep, n = 10) %>% 
  mutate(Source = reorder(Source, SourceRep)) %>% 
  ggplot(aes(x = Source, y = SourceRep, fill = SourceNamespace)) +
  geom_col() +
  coord_flip() +
  labs(caption = "Top 10 most repeated calls in 'find_functions'.")

Note that name df is often used to store object of type data.frame. df is also a name of F distribution density function from stats package. If you suspect that given function ought not to use a specific package, see the source code of function to check the context. To do so, one can execute find_dependencies function with add_info argument set to TRUE.

library(tidyr)

dependency <- find_dependencies("find_functions", envir = envir, in_envir = FALSE, add_info = TRUE)
dependency %>% 
  filter(SourceNamespace == "stats") %>% 
  select(Source, SourcePosition, SourceContext) %>% 
  unnest(c(SourcePosition, SourceContext)) 
#> # A tibble: 5 x 3
#>   Source SourcePosition SourceContext                                           
#>   <chr>           <dbl> <chr>                                                   
#> 1 df                  6 "    df <- purrr::map_dfr(sourceFiles, ~{"              
#> 2 df                 15 "    source_name <- basename(df$Path)"                  
#> 3 df                 17 "    df <- df %>% dplyr::mutate(Path = stringr::str_rem~
#> 4 df                 18 "    paths <- stringr::str_split(df$Path, \"/|\\\\\\\\\~
#> 5 df                 20 "    tidyr::separate(df, \"Path\", into = paste0(\"Leve~

One can see that indeed df is not a call to function stats::df.

dependency <- find_dependencies(unique(functions$Function), envir = envir, in_envir = FALSE)
dependency %>% 
  distinct(Target, TargetInDegree) %>%
  mutate(Target = reorder(Target, TargetInDegree)) %>%
  ggplot(aes(x = Target, y = TargetInDegree)) +
  geom_col() +
  coord_flip() + 
  labs(caption = "Functions with most function calls.")

dependency <- find_dependencies(unique(functions$Function), envir = envir, in_envir = FALSE)
dependency %>% 
  group_by(SourceNamespace) %>% 
  tally(name = "Count") %>% 
  slice_max(Count, n = 10) %>% 
  mutate(SourceNamespace = reorder(SourceNamespace, Count)) %>% 
  ggplot(aes(x = SourceNamespace, y = Count)) +
  geom_col() +
  coord_flip() +
  labs(caption = "Top 10 used namespaces.")

See which user-defined functions depend most on other user-defined functions within searched codebase.

dependency <- find_dependencies(unique(functions$Function), envir = envir, in_envir = TRUE)
dependency %>% 
  distinct(Target, TargetInDegree) %>% 
  arrange(-TargetInDegree)
#> # A tibble: 5 x 2
#>   Target            TargetInDegree
#>   <chr>                      <dbl>
#> 1 find_functions                 2
#> 2 is_function                    1
#> 3 find_dependencies              0
#> 4 get_function_name              0
#> 5 is_assign                      0
library(igraph)

edges <- dependency %>% 
  select(Source, Target) %>% 
  filter(!is.na(.))

vertices <- unique(c(dependency$Source, dependency$Target))
vertices <- vertices[!is.na(vertices)]

g <- graph_from_data_frame(d = edges, vertices = vertices)
deg <- degree(g, mode = "in")
V(g)$size <- deg * 10 + 5
V(g)$label.cex <- (degree(g, mode = "in", normalized = TRUE) + 1)

plot(
  g,
  vertex.color = "grey",
  edge.color = "grey",
  edge.arrow.size = .4,
  main = "Functions dependency graph"
)

dependency <- find_dependencies(unique(functions$Function), envir = envir, in_envir = FALSE)
edges <- dependency %>% 
  select(Source, Target) %>% 
  filter(!is.na(.))
vertices <- unique(c(edges$Source, edges$Target))

g <- graph_from_data_frame(edges)
deg <- degree(g, mode = "in")
V(g)$size <- deg
V(g)$label.cex <- (degree(g, mode = "in", normalized = TRUE) + 1) / 1.8

plot(
  g,
  vertex.color = "grey",
  edge.color = "grey",
  edge.arrow.size = .4,
  main = "Full functions dependency graph"
)