This function combines predicted junction coverages (from scaleTxCoverages) and observed junction coverages (obtained from aligning the reads to the genome and counting the number of reads spanning each junction). It also summarized transcript abundances on the gene level and adds information about the number/fraction of uniquely mapping reads and multimapping reads spanning each junction.

combineCoverages(junctionCounts, junctionPredCovs, txQuants)

Arguments

junctionCounts

A data.frame with observed junction coverages. Should have columns seqnames, start, end, strand, uniqreads and mmreads.

junctionPredCovs

A data.frame with predicted junction coverages.

txQuants

A data.frame with estimated transcript abundances.

Value

A list with three elements:

junctionCovs:

A GRanges object with predicted and observed junction coverages.

txQuants:

A tibble with estimated transcript abundances.

geneQuants:

A tibble with summarized gene abundances.

References

Soneson C, Love MI, Patro R, Hussain S, Malhotra D, Robinson MD: A junction coverage compatibility score to quantify the reliability of transcript abundance estimates and annotation catalogs. bioRxiv doi:10.1101/378539 (2018)

Author

Charlotte Soneson

Examples

if (FALSE) { # \dontrun{
gtf <- system.file("extdata/Homo_sapiens.GRCh38.90.chr22.gtf.gz",
                   package = "jcc")
bam <- system.file("extdata/reads.chr22.bam", package = "jcc")
biasMod <- fitAlpineBiasModel(gtf = gtf, bam = bam,
                              organism = "Homo_sapiens",
                              genome = Hsapiens, genomeVersion = "GRCh38",
                              version = 90, minLength = 230,
                              maxLength = 7000, minCount = 10,
                              maxCount = 10000, subsample = TRUE,
                              nbrSubsample = 30, seed = 1, minSize = NULL,
                              maxSize = 220, verbose = TRUE)
tx2gene <- readRDS(system.file("extdata/tx2gene.sub.rds", package = "jcc"))
predCovProfiles <- predictTxCoverage(biasModel = biasMod$biasModel,
                                     exonsByTx = biasMod$exonsByTx,
                                     bam = bam, tx2gene = tx2gene,
                                     genome = Hsapiens,
                                     genes = c("ENSG00000070371",
                                               "ENSG00000093010"),
                                     nCores = 1, verbose = TRUE)
txQuants <- readRDS(system.file("extdata/quant.sub.rds", package = "jcc"))
txsc <- scaleTxCoverages(txCoverageProfiles = predCovProfiles,
                         txQuants = txQuants, tx2gene = tx2gene,
                         strandSpecific = TRUE, methodName = "Salmon",
                         verbose = TRUE)
jcov <- read.delim(system.file("extdata/sub.SJ.out.tab", package = "jcc"),
                   header = FALSE, as.is = TRUE) %>%
 setNames(c("seqnames", "start", "end", "strand", "motif", "annot",
           "uniqreads", "mmreads", "maxoverhang")) %>%
 dplyr::mutate(strand = replace(strand, strand == 1, "+")) %>%
 dplyr::mutate(strand = replace(strand, strand == 2, "-")) %>%
 dplyr::select(seqnames, start, end, strand, uniqreads, mmreads) %>%
 dplyr::mutate(seqnames = as.character(seqnames))
combCov <- combineCoverages(junctionCounts = jcov,
                            junctionPredCovs = txsc$junctionPredCovs,
                            txQuants = txsc$txQuants)
} # }