Calculate scaled coverages and JCC scores — calculateJCCScores • jcc

This function calculates the scaled predicted junction coverage for each junction, and the JCC score for each gene.

calculateJCCScores(junctionCovs, geneQuants, mmFracThreshold = 0.25)

Arguments

junctionCovs: A data.frame with observed and predicted junction counts
geneQuants: A data.frame with gene-level abundances.
mmFracThreshold: Scalar value in [0, 1] indicating the maximal fraction of multimapping reads a junction can have and still contribute to the score.

Value

A list with two elements:

junctionCovs:: A data.frame with predicted and observed junction coverages.
geneScores:: A data.frame with gene scores.

References

Soneson C, Love MI, Patro R, Hussain S, Malhotra D, Robinson MD: A junction coverage compatibility score to quantify the reliability of transcript abundance estimates and annotation catalogs. bioRxiv doi:10.1101/378539 (2018)

Author

Charlotte Soneson

Examples

if (FALSE) { # \dontrun{
gtf <- system.file("extdata/Homo_sapiens.GRCh38.90.chr22.gtf.gz",
                   package = "jcc")
bam <- system.file("extdata/reads.chr22.bam", package = "jcc")
biasMod <- fitAlpineBiasModel(gtf = gtf, bam = bam,
                              organism = "Homo_sapiens",
                              genome = Hsapiens, genomeVersion = "GRCh38",
                              version = 90, minLength = 230,
                              maxLength = 7000, minCount = 10,
                              maxCount = 10000, subsample = TRUE,
                              nbrSubsample = 30, seed = 1, minSize = NULL,
                              maxSize = 220, verbose = TRUE)
tx2gene <- readRDS(system.file("extdata/tx2gene.sub.rds", package = "jcc"))
predCovProfiles <- predictTxCoverage(biasModel = biasMod$biasModel,
                                     exonsByTx = biasMod$exonsByTx,
                                     bam = bam, tx2gene = tx2gene,
                                     genome = Hsapiens,
                                     genes = c("ENSG00000070371",
                                               "ENSG00000093010"),
                                     nCores = 1, verbose = TRUE)
txQuants <- readRDS(system.file("extdata/quant.sub.rds", package = "jcc"))
txsc <- scaleTxCoverages(txCoverageProfiles = predCovProfiles,
                         txQuants = txQuants, tx2gene = tx2gene,
                         strandSpecific = TRUE, methodName = "Salmon",
                         verbose = TRUE)
jcov <- read.delim(system.file("extdata/sub.SJ.out.tab", package = "jcc"),
                   header = FALSE, as.is = TRUE) %>%
 setNames(c("seqnames", "start", "end", "strand", "motif", "annot",
           "uniqreads", "mmreads", "maxoverhang")) %>%
 dplyr::mutate(strand = replace(strand, strand == 1, "+")) %>%
 dplyr::mutate(strand = replace(strand, strand == 2, "-")) %>%
 dplyr::select(seqnames, start, end, strand, uniqreads, mmreads) %>%
 dplyr::mutate(seqnames = as.character(seqnames))
combCov <- combineCoverages(junctionCounts = jcov,
                            junctionPredCovs = txsc$junctionPredCovs,
                            txQuants = txsc$txQuants)
jcc <- calculateJCCScores(junctionCovs = combCov$junctionCovs,
                          geneQuants = combCov$geneQuants)
} # }