suppressPackageStartupMessages({
library(DT)
library(BiocPkgTools)
library(dplyr)
library(tidyr)
library(igraph)
library(ggplot2)
library(ggrepel)
})
Get list of packages
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## '?repositories' for details
##
## replacement repositories:
## CRAN: https://cloud.r-project.org
Get and summarize download stats
ds <- biocDownloadStats(pkgType = "software")
## adding rname 'http://bioconductor.org/packages/stats/bioc/bioc_pkg_stats.tab'
ads <- anacondaDownloadStats()
## Total downloads in 2022
ds <- ds %>%
dplyr::filter(Year == 2022) %>%
dplyr::group_by(Package) %>%
dplyr::summarize(NbDownloadsBioc = sum(Nb_of_downloads))
ads <- ads %>%
dplyr::filter(Year == 2022) %>%
dplyr::group_by(Package) %>%
dplyr::summarize(NbDownloadsConda = sum(Nb_of_downloads))
Find dependencies among packages
## Build package dependency graph
depdf <- BiocPkgTools::buildPkgDependencyDataFrame(
repo = c("BioCsoft", "CRAN"),
dependencies = c("Depends", "Imports")
)
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## '?repositories' for details
##
## replacement repositories:
## CRAN: https://cloud.r-project.org
g <- BiocPkgTools::buildPkgDependencyIgraph(depdf)
## Get all packages
pkgs <- setdiff(names(igraph::V(g)), "")
## Initialize dependency matrix
## Package in row depends on packages in columns
depmat <- matrix(0, nrow = length(pkgs), ncol = length(pkgs))
rownames(depmat) <- colnames(depmat) <- pkgs
## Find dependencies
for (p in pkgs) {
sc <- igraph::subcomponent(g, p, mode = "out")
depmat[p, setdiff(names(sc), "")] <- 1
}
## Subset dependency matrix to Bioc packages
bps <- bpi$Package
bps <- intersect(bps, pkgs)
depmat_bioc <- depmat[bps, bps]
## Column sums ("how many packages depend - directly or indirectly -
## on a given package")
cs <- colSums(depmat_bioc)
## Number of direct reverse dependencies
drevdeps <- igraph::ego(g, nodes = V(g), order = 1, mode = "in")
drevdeps <- lapply(drevdeps, function(w) intersect(names(w), bps))
drevdeps <- vapply(drevdeps, length, 0) - 1
names(drevdeps) <- names(V(g))
drevdeps <- drevdeps[bps]
Make summary table
bpisub <- bpi %>%
dplyr::select(Package, hasNEWS, VignetteBuilder, BugReports,
PackageStatus, Maintainer, dependencyCount) %>%
dplyr::left_join(ds, by = "Package") %>%
dplyr::left_join(ads, by = "Package") %>%
dplyr::mutate(NbDownloadsConda = tidyr::replace_na(NbDownloadsConda, 0)) %>%
dplyr::mutate(NbDownloadsBiocConda = NbDownloadsBioc +
NbDownloadsConda)
## Remove the package itself from the downstream dependencies
bpisub$NbDependentPkgs <- cs[bpisub$Package] - 1
bpisub$NbDirDependentPkgs <- drevdeps[bpisub$Package]
## Get download counts for downstream dependencies
diag(depmat_bioc) <- 0
NbDownloadsBiocDepPkgs <- (rbind(bpisub$NbDownloadsBioc[match(
rownames(depmat_bioc), bpisub$Package)]) %*% depmat_bioc)[1, ]
NbDownloadsCondaDepPkgs <- (rbind(bpisub$NbDownloadsConda[match(
rownames(depmat_bioc), bpisub$Package)]) %*% depmat_bioc)[1, ]
bpisub$NbDownloadsBiocDepPkgs <-
NbDownloadsBiocDepPkgs[bpisub$Package]
bpisub$NbDownloadsCondaDepPkgs <-
NbDownloadsCondaDepPkgs[bpisub$Package]
bpisub <- bpisub %>%
dplyr::mutate(NbDownloadsBiocCondaDepPkgs =
NbDownloadsBiocDepPkgs +
NbDownloadsCondaDepPkgs)
bpisub <- as.data.frame(bpisub)
DT::datatable(bpisub %>% dplyr::arrange(desc(NbDownloadsBiocConda)),
extensions = "FixedColumns",
options = list(scrollX = TRUE, fixedColumns = list(leftColumns = 2),
pageLength = 25))
Plots
ggplot(bpisub, aes(x = NbDownloadsBioc, y = NbDownloadsConda)) +
geom_point(size = 3, alpha = 0.5) + theme_bw() +
geom_text_repel(
data = bpisub %>%
dplyr::filter(
(NbDownloadsBioc > 750000 & NbDownloadsConda < 50000) |
(NbDownloadsBioc < 500000 & NbDownloadsConda > 100000)),
aes(label = Package), max.overlaps = Inf, size = 4,
min.segment.length = 0.1)
ggplot(bpisub, aes(x = NbDownloadsBiocConda,
y = NbDownloadsBiocCondaDepPkgs)) +
geom_point(size = 3, alpha = 0.5) + theme_bw() +
geom_text_repel(
data = bpisub %>%
dplyr::filter(
(NbDownloadsBiocConda > 750000) |
(NbDownloadsBiocConda < 500000 &
NbDownloadsBiocCondaDepPkgs > 5000000) |
(NbDownloadsBiocConda < 250000 &
NbDownloadsBiocCondaDepPkgs > 3000000)),
aes(label = Package), max.overlaps = Inf, size = 4,
min.segment.length = 0.1)
## Warning: Removed 25 rows containing missing values (`geom_point()`).
ggplot(bpisub, aes(x = NbDirDependentPkgs, y = NbDependentPkgs)) +
geom_point(size = 3, alpha = 0.5) + theme_bw() +
geom_text_repel(
data = bpisub %>%
dplyr::filter(
(NbDirDependentPkgs > 400) |
(NbDirDependentPkgs < 100 &
NbDependentPkgs > 500)),
aes(label = Package), max.overlaps = Inf, size = 4,
min.segment.length = 0.1)
## Warning: Removed 25 rows containing missing values (`geom_point()`).
Session info
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-apple-darwin17.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggrepel_0.9.2 ggplot2_3.4.0 igraph_1.3.5
## [4] tidyr_1.2.1 dplyr_1.0.10 BiocPkgTools_1.16.0
## [7] htmlwidgets_1.5.4 DT_0.26
##
## loaded via a namespace (and not attached):
## [1] Biobase_2.58.0 httr_1.4.4 sass_0.4.3
## [4] bit64_4.0.5 jsonlite_1.8.3 bslib_0.4.1
## [7] assertthat_0.2.1 highr_0.9 BiocManager_1.30.19
## [10] stats4_4.2.2 BiocFileCache_2.6.0 RBGL_1.74.0
## [13] blob_1.2.3 yaml_2.3.6 pillar_1.8.1
## [16] RSQLite_2.2.18 glue_1.6.2 RUnit_0.4.32
## [19] digest_0.6.30 rvest_1.0.3 colorspace_2.0-3
## [22] htmltools_0.5.3 XML_3.99-0.12 pkgconfig_2.0.3
## [25] httpcode_0.3.0 purrr_0.3.5 scales_1.2.1
## [28] whisker_0.4 tzdb_0.3.0 tibble_3.1.8
## [31] biocViews_1.66.0 farver_2.1.1 generics_0.1.3
## [34] ellipsis_0.3.2 cachem_1.0.6 withr_2.5.0
## [37] BiocGenerics_0.44.0 cli_3.4.1 magrittr_2.0.3
## [40] memoise_2.0.1 evaluate_0.18 fansi_1.0.3
## [43] xml2_1.3.3 graph_1.76.0 tools_4.2.2
## [46] gh_1.3.1 hms_1.1.2 lifecycle_1.0.3
## [49] stringr_1.4.1 munsell_0.5.0 compiler_4.2.2
## [52] jquerylib_0.1.4 rlang_1.0.6 grid_4.2.2
## [55] RCurl_1.98-1.9 rappdirs_0.3.3 crosstalk_1.2.0
## [58] labeling_0.4.2 bitops_1.0-7 rmarkdown_2.18
## [61] rorcid_0.7.0 gtable_0.3.1 DBI_1.1.3
## [64] curl_4.3.3 fauxpas_0.5.0 R6_2.5.1
## [67] knitr_1.41 fastmap_1.1.0 bit_4.0.5
## [70] utf8_1.2.2 filelock_1.0.2 readr_2.1.3
## [73] stringi_1.7.8 crul_1.3 Rcpp_1.0.9
## [76] vctrs_0.5.1 dbplyr_2.2.1 tidyselect_1.2.0
## [79] xfun_0.35