suppressPackageStartupMessages({
    library(DT)
    library(BiocPkgTools)
    library(dplyr)
    library(tidyr)
    library(igraph)
    library(ggplot2)
    library(ggrepel)
    library(plotly)
})

Get list of packages

bpi <- biocPkgList(version = "devel")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://cran.rstudio.com

Get and summarize download stats

ds <- biocDownloadStats(pkgType = "software")
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://cran.rstudio.com
## adding rname 'https://bioconductor.org/packages/stats/bioc/bioc_pkg_stats.tab'
ads <- anacondaDownloadStats()

## Total downloads in 2024
ds <- ds |>
    dplyr::filter(Year == 2024) |>
    dplyr::group_by(Package) |>
    dplyr::summarize(NbDownloadsBioc = sum(Nb_of_downloads))
ads <- ads |>
    dplyr::filter(Year == 2024) |>
    dplyr::group_by(Package) |>
    dplyr::summarize(NbDownloadsConda = sum(Nb_of_downloads))

Find dependencies among packages

## Build package dependency graph
depdf <- BiocPkgTools::buildPkgDependencyDataFrame(
    repo = c("BioCsoft", "CRAN"),
    dependencies = c("Depends", "Imports")
)
## 'getOption("repos")' replaces Bioconductor standard repositories, see
## 'help("repositories", package = "BiocManager")' for details.
## Replacement repositories:
##     CRAN: https://cran.rstudio.com
## ... edges go from package to upstream dependencies/imports
g <- BiocPkgTools::buildPkgDependencyIgraph(depdf)

## Get all packages (will include both Bioc and CRAN packages, if the latter 
## are dependencies for Bioc packages)
pkgs <- setdiff(names(igraph::V(g)), "")
length(pkgs)
## [1] 23275
## Get Bioc packages
bps <- bpi$Package
bps <- intersect(bps, pkgs)
length(bps)
## [1] 2279
## Initialize dependency matrix among Bioc packages
## Package in row depends on/imports packages in columns
# depmat <- matrix(0, nrow = length(pkgs), ncol = length(pkgs))
# rownames(depmat) <- colnames(depmat) <- pkgs
depmat <- matrix(0, nrow = length(bps), ncol = length(bps))
rownames(depmat) <- colnames(depmat) <- bps

## Find dependencies for each packages (direct and indirect)
for (p in bps) {
    sc <- igraph::subcomponent(g, p, mode = "out")
    depmat[p, intersect(setdiff(names(sc), ""), bps)] <- 1
    # depmat[p, setdiff(names(sc), "")] <- 1
}

## Subset dependency matrix to Bioc packages
depmat_bioc <- depmat[bps, bps]

## Column sums ("how many packages depend - directly or indirectly - 
## on a given package"). Subtract 1 to not include the package itself.
cs <- colSums(depmat_bioc) - 1

## Number of direct reverse dependencies
## ... for each node N, find all other nodes with an edge directly to N
##     (i.e., depending directly on N)
drevdeps <- igraph::ego(g, nodes = V(g), order = 1, mode = "in")
drevdeps <- lapply(drevdeps, function(w) intersect(names(w), bps))
## Get number of packages depending directly on each package (subtract 1 to not 
## count the package itself)
drevdeps <- vapply(drevdeps, length, 0) - 1
names(drevdeps) <- names(V(g))
drevdeps <- drevdeps[bps]

Make summary table

bpisub <- bpi |>
    dplyr::select(Package, Maintainer, dependencyCount) |>
    dplyr::left_join(ds, by = "Package") |>
    dplyr::left_join(ads, by = "Package") |>
    dplyr::mutate(NbDownloadsConda = replace_na(NbDownloadsConda, 0),
                  NbDownloadsBioc = replace_na(NbDownloadsBioc, 0)) |>
    dplyr::mutate(NbDownloadsBiocConda = NbDownloadsBioc + 
                      NbDownloadsConda)

## Add number of downstream dependencies (direct or direct + indirect)
bpisub$NbDependentPkgs <- cs[bpisub$Package]
bpisub$NbDirDependentPkgs <- drevdeps[bpisub$Package]

## Get download counts for downstream dependencies
diag(depmat_bioc) <- 0
NbDownloadsBiocDepPkgs <- (rbind(bpisub$NbDownloadsBioc[match(
    rownames(depmat_bioc), bpisub$Package)]) %*% depmat_bioc)[1, ]
NbDownloadsCondaDepPkgs <- (rbind(bpisub$NbDownloadsConda[match(
    rownames(depmat_bioc), bpisub$Package)]) %*% depmat_bioc)[1, ]

bpisub$NbDownloadsBiocDepPkgs <-
    NbDownloadsBiocDepPkgs[bpisub$Package]
bpisub$NbDownloadsCondaDepPkgs <- 
    NbDownloadsCondaDepPkgs[bpisub$Package]

bpisub <- bpisub |>
    dplyr::mutate(NbDownloadsBiocCondaDepPkgs =
                      NbDownloadsBiocDepPkgs + 
                      NbDownloadsCondaDepPkgs)

bpisub <- as.data.frame(bpisub)

DT::datatable(bpisub |> dplyr::arrange(desc(NbDownloadsBiocConda)), 
              extensions = "FixedColumns",
              options = list(scrollX = TRUE, fixedColumns = list(leftColumns = 2), 
                             pageLength = 25))

Plots

ggplot(bpisub, aes(x = NbDownloadsBioc, y = NbDownloadsConda)) + 
    geom_point(size = 3, alpha = 0.5) + theme_bw() + 
    geom_text_repel(
        data = bpisub |> 
            dplyr::filter(
                (NbDownloadsBioc > 750000 & NbDownloadsConda < 50000) |
                    (NbDownloadsBioc < 500000 & NbDownloadsConda > 100000)),
        aes(label = Package), max.overlaps = Inf, size = 4,
        min.segment.length = 0.1) + 
    labs(x = "Number of Bioc downloads for indicated package",
         y = "Number of conda downloads for indicated package")

## ... interactive version
ggplotly(
    ggplot(bpisub, aes(x = NbDownloadsBioc, y = NbDownloadsConda, 
                       label = Package)) + 
        geom_point(size = 3, alpha = 0.5) + theme_bw() + 
    labs(x = "Number of Bioc downloads for indicated package",
         y = "Number of conda downloads for indicated package")
)
ggplot(bpisub, aes(x = NbDownloadsBiocConda,
                   y = NbDownloadsBiocCondaDepPkgs)) + 
    geom_abline(slope = 1, intercept = 0, color = "grey", linetype = "dotted") + 
    geom_point(size = 3, alpha = 0.5) + theme_bw() + 
    geom_text_repel(
        data = bpisub |> 
            dplyr::filter(
                (NbDownloadsBiocConda > 750000) |
                    (NbDownloadsBiocConda < 500000 &
                         NbDownloadsBiocCondaDepPkgs > 5000000) | 
                    (NbDownloadsBiocConda < 250000 &
                         NbDownloadsBiocCondaDepPkgs > 3000000)), 
        aes(label = Package), max.overlaps = Inf, size = 4,
        min.segment.length = 0.1) + 
    labs(x = "Number of downloads from Bioc + conda for indicated package",
         y = "Number of downloads from Bioc + conda\nfor downstream dependencies of indicated package")
## Warning: Removed 27 rows containing missing values or values outside the scale range
## (`geom_point()`).

ggplot(bpisub, aes(x = NbDirDependentPkgs, y = NbDependentPkgs)) + 
    geom_abline(slope = 1, intercept = 0, color = "grey", linetype = "dotted") + 
    geom_point(size = 3, alpha = 0.5) + theme_bw() + 
    geom_text_repel(
        data = bpisub |> 
            dplyr::filter(
                (NbDirDependentPkgs > 120) |
                    (NbDirDependentPkgs < 120 &
                         NbDependentPkgs > 300)), 
        aes(label = Package), max.overlaps = Inf, size = 4,
        min.segment.length = 0.1) + 
    labs(x = "Number of packages directly importing indicated package",
         y = "Number of packages directly or indirectly\nimporting indicated package")
## Warning: Removed 27 rows containing missing values or values outside the scale range
## (`geom_point()`).

Session info

sessionInfo()
## R version 4.5.0 (2025-04-11)
## Platform: aarch64-apple-darwin20
## Running under: macOS Sonoma 14.7.6
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: UTC
## tzcode source: internal
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
## [1] plotly_4.10.4       ggrepel_0.9.6       ggplot2_3.5.2      
## [4] igraph_2.1.4        tidyr_1.3.1         dplyr_1.1.4        
## [7] BiocPkgTools_1.26.2 htmlwidgets_1.6.4   DT_0.33            
## 
## loaded via a namespace (and not attached):
##  [1] gtable_0.3.6         xfun_0.52            bslib_0.9.0         
##  [4] gh_1.5.0             Biobase_2.68.0       tzdb_0.5.0          
##  [7] crosstalk_1.2.1      vctrs_0.6.5          tools_4.5.0         
## [10] bitops_1.0-9         generics_0.1.4       stats4_4.5.0        
## [13] curl_6.2.3           RUnit_0.4.33         tibble_3.2.1        
## [16] RSQLite_2.4.0        blob_1.2.4           pkgconfig_2.0.3     
## [19] data.table_1.17.4    RColorBrewer_1.1-3   dbplyr_2.5.0        
## [22] graph_1.86.0         lifecycle_1.0.4      farver_2.1.2        
## [25] stringr_1.5.1        compiler_4.5.0       biocViews_1.76.0    
## [28] htmltools_0.5.8.1    sass_0.4.10          lazyeval_0.2.2      
## [31] RCurl_1.98-1.17      yaml_2.3.10          pillar_1.10.2       
## [34] jquerylib_0.1.4      whisker_0.4.1        cachem_1.1.0        
## [37] rvest_1.0.4          tidyselect_1.2.1     digest_0.6.37       
## [40] stringi_1.8.7        purrr_1.0.4          labeling_0.4.3      
## [43] grid_4.5.0           fastmap_1.2.0        cli_3.6.5           
## [46] magrittr_2.0.3       RBGL_1.84.0          XML_3.99-0.18       
## [49] crul_1.5.0           withr_3.0.2          readr_2.1.5         
## [52] scales_1.4.0         filelock_1.0.3       bit64_4.6.0-1       
## [55] lubridate_1.9.4      timechange_0.3.0     rmarkdown_2.29      
## [58] httr_1.4.7           bit_4.6.0            hms_1.1.3           
## [61] memoise_2.0.1        evaluate_1.0.3       knitr_1.50          
## [64] viridisLite_0.4.2    BiocFileCache_2.16.0 rlang_1.1.6         
## [67] Rcpp_1.0.14          glue_1.8.0           DBI_1.2.3           
## [70] httpcode_0.3.0       BiocManager_1.30.25  xml2_1.3.8          
## [73] fauxpas_0.5.2        BiocGenerics_0.54.0  rorcid_0.7.0        
## [76] jsonlite_2.0.0       R6_2.6.1