suppressPackageStartupMessages({
library(DT)
library(BiocPkgTools)
library(dplyr)
library(tidyr)
library(igraph)
library(ggplot2)
library(ggrepel)
library(plotly)
})
Get list of packages
bpi <- biocPkgList(version = "devel")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cran.rstudio.com
Get and summarize download stats
ds <- biocDownloadStats(pkgType = "software")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cran.rstudio.com
## adding rname 'https://bioconductor.org/packages/stats/bioc/bioc_pkg_stats.tab'
ads <- anacondaDownloadStats()
## Total downloads in 2024
ds <- ds |>
dplyr::filter(Year == 2024) |>
dplyr::group_by(Package) |>
dplyr::summarize(NbDownloadsBioc = sum(Nb_of_downloads))
ads <- ads |>
dplyr::filter(Year == 2024) |>
dplyr::group_by(Package) |>
dplyr::summarize(NbDownloadsConda = sum(Nb_of_downloads))
Find dependencies among packages
## Build package dependency graph
depdf <- BiocPkgTools::buildPkgDependencyDataFrame(
repo = c("BioCsoft", "CRAN"),
dependencies = c("Depends", "Imports")
)
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
## CRAN: https://cran.rstudio.com
## ... edges go from package to upstream dependencies/imports
g <- BiocPkgTools::buildPkgDependencyIgraph(depdf)
## Get all packages (will include both Bioc and CRAN packages, if the latter
## are dependencies for Bioc packages)
pkgs <- setdiff(names(igraph::V(g)), "")
length(pkgs)
## [1] 23275
## Get Bioc packages
bps <- bpi$Package
bps <- intersect(bps, pkgs)
length(bps)
## [1] 2279
## Initialize dependency matrix among Bioc packages
## Package in row depends on/imports packages in columns
# depmat <- matrix(0, nrow = length(pkgs), ncol = length(pkgs))
# rownames(depmat) <- colnames(depmat) <- pkgs
depmat <- matrix(0, nrow = length(bps), ncol = length(bps))
rownames(depmat) <- colnames(depmat) <- bps
## Find dependencies for each packages (direct and indirect)
for (p in bps) {
sc <- igraph::subcomponent(g, p, mode = "out")
depmat[p, intersect(setdiff(names(sc), ""), bps)] <- 1
# depmat[p, setdiff(names(sc), "")] <- 1
}
## Subset dependency matrix to Bioc packages
depmat_bioc <- depmat[bps, bps]
## Column sums ("how many packages depend - directly or indirectly -
## on a given package"). Subtract 1 to not include the package itself.
cs <- colSums(depmat_bioc) - 1
## Number of direct reverse dependencies
## ... for each node N, find all other nodes with an edge directly to N
## (i.e., depending directly on N)
drevdeps <- igraph::ego(g, nodes = V(g), order = 1, mode = "in")
drevdeps <- lapply(drevdeps, function(w) intersect(names(w), bps))
## Get number of packages depending directly on each package (subtract 1 to not
## count the package itself)
drevdeps <- vapply(drevdeps, length, 0) - 1
names(drevdeps) <- names(V(g))
drevdeps <- drevdeps[bps]
Make summary table
bpisub <- bpi |>
dplyr::select(Package, Maintainer, dependencyCount) |>
dplyr::left_join(ds, by = "Package") |>
dplyr::left_join(ads, by = "Package") |>
dplyr::mutate(NbDownloadsConda = replace_na(NbDownloadsConda, 0),
NbDownloadsBioc = replace_na(NbDownloadsBioc, 0)) |>
dplyr::mutate(NbDownloadsBiocConda = NbDownloadsBioc +
NbDownloadsConda)
## Add number of downstream dependencies (direct or direct + indirect)
bpisub$NbDependentPkgs <- cs[bpisub$Package]
bpisub$NbDirDependentPkgs <- drevdeps[bpisub$Package]
## Get download counts for downstream dependencies
diag(depmat_bioc) <- 0
NbDownloadsBiocDepPkgs <- (rbind(bpisub$NbDownloadsBioc[match(
rownames(depmat_bioc), bpisub$Package)]) %*% depmat_bioc)[1, ]
NbDownloadsCondaDepPkgs <- (rbind(bpisub$NbDownloadsConda[match(
rownames(depmat_bioc), bpisub$Package)]) %*% depmat_bioc)[1, ]
bpisub$NbDownloadsBiocDepPkgs <-
NbDownloadsBiocDepPkgs[bpisub$Package]
bpisub$NbDownloadsCondaDepPkgs <-
NbDownloadsCondaDepPkgs[bpisub$Package]
bpisub <- bpisub |>
dplyr::mutate(NbDownloadsBiocCondaDepPkgs =
NbDownloadsBiocDepPkgs +
NbDownloadsCondaDepPkgs)
bpisub <- as.data.frame(bpisub)
DT::datatable(bpisub |> dplyr::arrange(desc(NbDownloadsBiocConda)),
extensions = "FixedColumns",
options = list(scrollX = TRUE, fixedColumns = list(leftColumns = 2),
pageLength = 25))
Plots
ggplot(bpisub, aes(x = NbDownloadsBioc, y = NbDownloadsConda)) +
geom_point(size = 3, alpha = 0.5) + theme_bw() +
geom_text_repel(
data = bpisub |>
dplyr::filter(
(NbDownloadsBioc > 750000 & NbDownloadsConda < 50000) |
(NbDownloadsBioc < 500000 & NbDownloadsConda > 100000)),
aes(label = Package), max.overlaps = Inf, size = 4,
min.segment.length = 0.1) +
labs(x = "Number of Bioc downloads for indicated package",
y = "Number of conda downloads for indicated package")

## ... interactive version
ggplotly(
ggplot(bpisub, aes(x = NbDownloadsBioc, y = NbDownloadsConda,
label = Package)) +
geom_point(size = 3, alpha = 0.5) + theme_bw() +
labs(x = "Number of Bioc downloads for indicated package",
y = "Number of conda downloads for indicated package")
)
ggplot(bpisub, aes(x = NbDownloadsBiocConda,
y = NbDownloadsBiocCondaDepPkgs)) +
geom_abline(slope = 1, intercept = 0, color = "grey", linetype = "dotted") +
geom_point(size = 3, alpha = 0.5) + theme_bw() +
geom_text_repel(
data = bpisub |>
dplyr::filter(
(NbDownloadsBiocConda > 750000) |
(NbDownloadsBiocConda < 500000 &
NbDownloadsBiocCondaDepPkgs > 5000000) |
(NbDownloadsBiocConda < 250000 &
NbDownloadsBiocCondaDepPkgs > 3000000)),
aes(label = Package), max.overlaps = Inf, size = 4,
min.segment.length = 0.1) +
labs(x = "Number of downloads from Bioc + conda for indicated package",
y = "Number of downloads from Bioc + conda\nfor downstream dependencies of indicated package")
## Warning: Removed 27 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(bpisub, aes(x = NbDirDependentPkgs, y = NbDependentPkgs)) +
geom_abline(slope = 1, intercept = 0, color = "grey", linetype = "dotted") +
geom_point(size = 3, alpha = 0.5) + theme_bw() +
geom_text_repel(
data = bpisub |>
dplyr::filter(
(NbDirDependentPkgs > 120) |
(NbDirDependentPkgs < 120 &
NbDependentPkgs > 300)),
aes(label = Package), max.overlaps = Inf, size = 4,
min.segment.length = 0.1) +
labs(x = "Number of packages directly importing indicated package",
y = "Number of packages directly or indirectly\nimporting indicated package")
## Warning: Removed 27 rows containing missing values or values outside the scale range
## (`geom_point()`).

Session info
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.7.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.1
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## time zone: UTC
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] plotly_4.10.4 ggrepel_0.9.6 ggplot2_3.5.2
## [4] igraph_2.1.4 tidyr_1.3.1 dplyr_1.1.4
## [7] BiocPkgTools_1.26.2 htmlwidgets_1.6.4 DT_0.33
##
## loaded via a namespace (and not attached):
## [1] gtable_0.3.6 xfun_0.52 bslib_0.9.0
## [4] gh_1.5.0 Biobase_2.68.0 tzdb_0.5.0
## [7] crosstalk_1.2.1 vctrs_0.6.5 tools_4.5.0
## [10] bitops_1.0-9 generics_0.1.4 stats4_4.5.0
## [13] curl_6.2.3 RUnit_0.4.33 tibble_3.2.1
## [16] RSQLite_2.4.0 blob_1.2.4 pkgconfig_2.0.3
## [19] data.table_1.17.4 RColorBrewer_1.1-3 dbplyr_2.5.0
## [22] graph_1.86.0 lifecycle_1.0.4 farver_2.1.2
## [25] stringr_1.5.1 compiler_4.5.0 biocViews_1.76.0
## [28] htmltools_0.5.8.1 sass_0.4.10 lazyeval_0.2.2
## [31] RCurl_1.98-1.17 yaml_2.3.10 pillar_1.10.2
## [34] jquerylib_0.1.4 whisker_0.4.1 cachem_1.1.0
## [37] rvest_1.0.4 tidyselect_1.2.1 digest_0.6.37
## [40] stringi_1.8.7 purrr_1.0.4 labeling_0.4.3
## [43] grid_4.5.0 fastmap_1.2.0 cli_3.6.5
## [46] magrittr_2.0.3 RBGL_1.84.0 XML_3.99-0.18
## [49] crul_1.5.0 withr_3.0.2 readr_2.1.5
## [52] scales_1.4.0 filelock_1.0.3 bit64_4.6.0-1
## [55] lubridate_1.9.4 timechange_0.3.0 rmarkdown_2.29
## [58] httr_1.4.7 bit_4.6.0 hms_1.1.3
## [61] memoise_2.0.1 evaluate_1.0.3 knitr_1.50
## [64] viridisLite_0.4.2 BiocFileCache_2.16.0 rlang_1.1.6
## [67] Rcpp_1.0.14 glue_1.8.0 DBI_1.2.3
## [70] httpcode_0.3.0 BiocManager_1.30.25 xml2_1.3.8
## [73] fauxpas_0.5.2 BiocGenerics_0.54.0 rorcid_0.7.0
## [76] jsonlite_2.0.0 R6_2.6.1