## The following variables were specified as input arguments when calling the rendering function.
## They will be used in the workflow below.


experimentInfo <-
list()
species <-
"human"
mqFile <-
"/Users/charlottesoneson/Documents/Rpackages/einprot/einprot_examples/ionstar_mq/MAXQuant_IonStar2018_PXD003881/proteinGroups.txt"
mqParameterFile <-
"/Users/charlottesoneson/Documents/Rpackages/einprot/einprot_examples/ionstar_mq/MAXQuant_IonStar2018_PXD003881/mqpar.xml"
idCol <-
function(df) {
        einprot::getFirstId(df, colName = "Majority.protein.IDs",
                            separator = ";")
    }
labelCol <-
function(df) {
        einprot::getNthId(
            df = data.frame(
                x = einprot::getFirstId(df, colName = "Majority.protein.IDs", 
                                        separator = ";")), 
            colName = "x", separator = "\\|", N = 3)
    }
geneIdCol <-
function(df) {
        einprot::getNthId(
            df = data.frame(
                x = einprot::getFirstId(df, colName = "Majority.protein.IDs", 
                                        separator = ";")), 
            colName = "x", separator = "\\|", N = 3)
    }
proteinIdCol <-
function(df) {
        einprot::getNthId(
            df = data.frame(
                x = einprot::getFirstId(df, colName = "Majority.protein.IDs", 
                                        separator = ";")), 
            colName = "x", separator = "\\|", N = 2)
    }
stringIdCol <-
NULL
reportTitle <-
"IonStar Human/E.coli spike-in"
reportAuthor <-
"MaxQuant/LFQ analysis"
iColPattern <-
"^LFQ.intensity."
sampleAnnot <-
structure(list(sample = c("B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B", 
"B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
), group = c("b", "c", "d", "e", "e", "d", "c", "b", "a", "a", 
"b", "c", "d", "e", "e", "d", "c", "b", "a", "a")), row.names = c(NA, 
-20L), class = c("tbl_df", "tbl", "data.frame"))
includeOnlySamples <-
""
excludeSamples <-
""
minScore <-
10
minPeptides <-
2
imputeMethod <-
"MinProb"
assaysForExport <-
c("iBAQ", "Top3")
mergeGroups <-
list()
comparisons <-
list(b_vs_a_explfc0.58 = c("a", "b"), c_vs_a_explfc1 = c("a", 
"c"), e_vs_d_explfc0.26 = c("d", "e"))
ctrlGroup <-
""
allPairwiseComparisons <-
TRUE
singleFit <-
FALSE
subtractBaseline <-
FALSE
baselineGroup <-
""
normMethod <-
"none"
spikeFeatures <-
NULL
stattest <-
"limma"
minNbrValidValues <-
2
minlFC <-
0
samSignificance <-
TRUE
nperm <-
250
volcanoAdjPvalThr <-
0.050000000000000003
volcanoLog2FCThr <-
0
volcanoMaxFeatures <-
25
volcanoLabelSign <-
"both"
volcanoS0 <-
0.10000000000000001
volcanoFeaturesToLabel <-
""
addInteractiveVolcanos <-
TRUE
interactiveDisplayColumns <-
c(Label = "einprotLabel", adjP = "adj.P.Val", logFC = "logFC"
)
interactiveGroupColumn <-
NULL
complexFDRThr <-
0.10000000000000001
maxNbrComplexesToPlot <-
Inf
seed <-
42
includeFeatureCollections <-
NULL
minSizeToKeepSet <-
2
customComplexes <-
list(ECOLI_GENES = c("REM_ECOLI", "PPK1_ECOLI", "ARAE_ECOLI", 
"HYPE_ECOLI", "NFI_ECOLI", "PAAK_ECOLI", "6PGD_ECOLI", "SODM_ECOLI", 
"AAT_ECOLI", "RPOD_ECOLI", "ASPG2_ECOLI", "CAPP_ECOLI", "THRC_ECOLI", 
"MANA_ECOLI", "SYW_ECOLI", "SYI_ECOLI", "SYA_ECOLI", "SYM_ECOLI", 
"SYGA_ECOLI", "SYGB_ECOLI", "ASNA_ECOLI", "RS6_ECOLI", "RS7_ECOLI", 
"RL15_ECOLI", "ARAF_ECOLI", "RBSB_ECOLI", "TOLC_ECOLI", "OMPF_ECOLI", 
"LAMB_ECOLI", "MIOC_ECOLI", "MALM_ECOLI", "GUAA_ECOLI", "GSHB_ECOLI", 
"TYRB_ECOLI", "SYE_ECOLI", "AMPN_ECOLI", "NLPA_ECOLI", "KDSB_ECOLI", 
"RBSD_ECOLI", "RBSA_ECOLI", "PYRC_ECOLI", "FUMC_ECOLI", "PNP_ECOLI", 
"PDXB_ECOLI", "BTUB_ECOLI", "DLD_ECOLI", "BTUE_ECOLI", "GSHR_ECOLI", 
"ODP2_ECOLI", "DUT_ECOLI", "HIS8_ECOLI", "HISX_ECOLI", "RSMA_ECOLI", 
"OMPC_ECOLI", "PFKB_ECOLI", "PROA_ECOLI", "RF2_ECOLI", "SDHB_ECOLI", 
"USHA_ECOLI", "SYV_ECOLI", "SYFB_ECOLI", "AROB_ECOLI", "TYPH_ECOLI", 
"TDH_ECOLI", "PUR5_ECOLI", "IDH_ECOLI", "SYFA_ECOLI", "USG_ECOLI", 
"DACC_ECOLI", "DNAJ_ECOLI", "PT1_ECOLI", "T1MK_ECOLI", "MASY_ECOLI", 
"PURK_ECOLI", "EX3_ECOLI", "HEMX_ECOLI", "GALE_ECOLI", "OMPT_ECOLI", 
"GRPE_ECOLI", "PFLB_ECOLI", "GLPQ_ECOLI", "MTLD_ECOLI", "ARGT_ECOLI", 
"GLTD_ECOLI", "ACKA_ECOLI", "ACP_ECOLI", "ISCS_ECOLI", "AROK_ECOLI", 
"AROL_ECOLI", "ATPE_ECOLI", "GLPK_ECOLI", "CH60_ECOLI", "CH10_ECOLI", 
"CLPP_ECOLI", "CLPX_ECOLI", "HSLU_ECOLI", "KCY_ECOLI", "COAA_ECOLI", 
"DADA_ECOLI", "DEF_ECOLI", "DEOB_ECOLI", "DEOC_ECOLI", "DAPA_ECOLI", 
"NANA_ECOLI", "EFG_ECOLI", "EFP_ECOLI", "EFTS_ECOLI", "ENGB_ECOLI", 
"ENO_ECOLI", "FABA_ECOLI", "FABZ_ECOLI", "FABH_ECOLI", "FIS_ECOLI", 
"G6PI_ECOLI", "GAL1_ECOLI", "GREA_ECOLI", "GSH1_ECOLI", "HFQ_ECOLI", 
"IHFA_ECOLI", "IHFB_ECOLI", "HSLO_ECOLI", "DNAK_ECOLI", "HSCA_ECOLI", 
"HTPG_ECOLI", "IF2_ECOLI", "IF3_ECOLI", "KDSA_ECOLI", "KPRS_ECOLI", 
"NTPPB_ECOLI", "MGSA_ECOLI", "MINE_ECOLI", "MSRB_ECOLI", "NANE_ECOLI", 
"NDK_ECOLI", "NUSB_ECOLI", "PDXJ_ECOLI", "PFKA_ECOLI", "PGK_ECOLI", 
"IPYR_ECOLI", "LEXA_ECOLI", "PEPE_ECOLI", "PURA_ECOLI", "PUR7_ECOLI", 
"PYRD_ECOLI", "PYRG_ECOLI", "PYRH_ECOLI", "RBFA_ECOLI", "RECA_ECOLI", 
"RL10_ECOLI", "RL11_ECOLI", "RL7_ECOLI", "RL19_ECOLI", "RL1_ECOLI", 
"RL20_ECOLI", "RL28_ECOLI", "RL29_ECOLI", "RL31_ECOLI", "RL32_ECOLI", 
"RL33_ECOLI", "RL34_ECOLI", "RL9_ECOLI", "RS10_ECOLI", "RS11_ECOLI", 
"RS12_ECOLI", "RS13_ECOLI", "RS16_ECOLI", "RS18_ECOLI", "RS19_ECOLI", 
"RS20_ECOLI", "RS2_ECOLI", "RS3_ECOLI", "RS4_ECOLI", "RS5_ECOLI", 
"RS8_ECOLI", "RS9_ECOLI", "RPIA_ECOLI", "RPOA_ECOLI", "RPOZ_ECOLI", 
"RRF_ECOLI", "METK_ECOLI", "GLYA_ECOLI", "SUCC_ECOLI", "TIG_ECOLI", 
"TNAA_ECOLI", "TOLB_ECOLI", "TPIS_ECOLI", "TPX_ECOLI", "TALA_ECOLI", 
"TALB_ECOLI", "TRPA_ECOLI", "TRPB_ECOLI", "UBIE_ECOLI", "YBAB_ECOLI", 
"NRDR_ECOLI", "YAII_ECOLI", "YMDB_ECOLI", "YCFP_ECOLI", "YAJQ_ECOLI", 
"UPP_ECOLI", "UXAC_ECOLI", "NQOR_ECOLI", "EX7S_ECOLI", "TRMB_ECOLI", 
"RHLB_ECOLI", "PSD_ECOLI", "YAEP_ECOLI", "SYS_ECOLI", "SYN_ECOLI", 
"SYT_ECOLI", "YEEX_ECOLI", "SYK1_ECOLI", "SYK2_ECOLI", "FETP_ECOLI", 
"RRAA_ECOLI", "PRMA_ECOLI", "RPOC_ECOLI", "RPOB_ECOLI", "NANR_ECOLI", 
"SLYA_ECOLI", "YFBU_ECOLI", "YCEI_ECOLI", "BAMC_ECOLI", "SLYB_ECOLI", 
"MIPA_ECOLI", "OMPA_ECOLI", "PAL_ECOLI", "OMPW_ECOLI", "OMPX_ECOLI", 
"TSX_ECOLI", "BAME_ECOLI", "BAMA_ECOLI", "FABB_ECOLI", "ALKH_ECOLI", 
"CSPE_ECOLI", "DPO3B_ECOLI", "ALF1_ECOLI", "GLPC_ECOLI", "FTNA_ECOLI", 
"FTSZ_ECOLI", "FUR_ECOLI", "G3P1_ECOLI", "GLPA_ECOLI", "GALM_ECOLI", 
"GLN1B_ECOLI", "GSTA_ECOLI", "DAPD_ECOLI", "ACEA_ECOLI", "MUG_ECOLI", 
"RBSK_ECOLI", "PHOL_ECOLI", "SLYD_ECOLI", "FKBB_ECOLI", "PPIC_ECOLI", 
"LON_ECOLI", "HPRT_ECOLI", "DLDH_ECOLI", "TRXB_ECOLI", "ARCA_ECOLI", 
"ACCD_ECOLI", "ADHE_ECOLI", "DHAS_ECOLI", "FER_ECOLI", "FUCO_ECOLI", 
"SERA_ECOLI", "TAS_ECOLI", "LPTB_ECOLI", "ETTA_ECOLI", "MREB_ECOLI", 
"CSPC_ECOLI", "GLNB_ECOLI", "PTHP_ECOLI", "RL13_ECOLI", "OMPR_ECOLI", 
"THIO_ECOLI", "YEDF_ECOLI", "RSUA_ECOLI", "YEAY_ECOLI", "GALF_ECOLI", 
"USPD_ECOLI", "USPE_ECOLI", "ARTP_ECOLI", "FTSH_ECOLI", "FABF_ECOLI", 
"FABD_ECOLI", "YAIA_ECOLI", "IOJAP_ECOLI", "YBEL_ECOLI", "YBGS_ECOLI", 
"YBIS_ECOLI", "YCCJ_ECOLI", "LPOB_ECOLI", "YCHN_ECOLI", "YCIN_ECOLI", 
"ALF_ECOLI", "KBL_ECOLI", "PUR8_ECOLI", "AROG_ECOLI", "ATPF_ECOLI", 
"ATPD_ECOLI", "ATPG_ECOLI", "ATPA_ECOLI", "ATPB_ECOLI", "HFLC_ECOLI", 
"HFLK_ECOLI", "BFR_ECOLI", "ACCA_ECOLI", "BCCP_ECOLI", "BOLA_ECOLI", 
"CDD_ECOLI", "CISY_ECOLI", "CYOA_ECOLI", "CYDA_ECOLI", "CYDB_ECOLI", 
"CYSK_ECOLI", "DEOD_ECOLI", "DYR_ECOLI", "DKSA_ECOLI", "DPS_ECOLI", 
"MENB_ECOLI", "ELBB_ECOLI", "KDSC_ECOLI", "SURA_ECOLI", "BAMD_ECOLI", 
"FUMA_ECOLI", "ASPA_ECOLI", "SDHA_ECOLI", "FRDB_ECOLI", "G6PD_ECOLI", 
"GLRX2_ECOLI", "GLRX3_ECOLI", "GLRX4_ECOLI", "GLO2_ECOLI", "SSPA_ECOLI", 
"HEMY_ECOLI", "ERPA_ECOLI", "ISCU_ECOLI", "MBHL_ECOLI", "MBHM_ECOLI", 
"HINT_ECOLI", "DBHA_ECOLI", "DBHB_ECOLI", "HNS_ECOLI", "STPA_ECOLI", 
"ASNC_ECOLI", "LRP_ECOLI", "CRP_ECOLI", "OXYR_ECOLI", "MPRA_ECOLI", 
"YJDC_ECOLI", "LAPA_ECOLI", "YDCH_ECOLI", "YDHR_ECOLI", "YDJA_ECOLI", 
"YEAG_ECOLI", "YEJL_ECOLI", "YFCZ_ECOLI", "YFIA_ECOLI", "IVY_ECOLI", 
"KPYK1_ECOLI", "NLPD_ECOLI", "YAJG_ECOLI", "OSMB_ECOLI", "OSME_ECOLI", 
"ECNB_ECOLI", "LPTE_ECOLI", "KBP_ECOLI", "YGFZ_ECOLI", "SUHB_ECOLI", 
"IMDH_ECOLI", "YIAF_ECOLI", "YIFE_ECOLI", "YIFL_ECOLI", "YIGB_ECOLI", 
"YIHD_ECOLI", "YGAM_ECOLI", "ZAPA_ECOLI", "YGGE_ECOLI", "YGIM_ECOLI", 
"YGIW_ECOLI", "MLAC_ECOLI", "YHCB_ECOLI", "YHHA_ECOLI", "PPID_ECOLI", 
"RL14_ECOLI", "RL16_ECOLI", "RL23_ECOLI", "RS15_ECOLI", "YAJC_ECOLI", 
"ACRA_ECOLI", "AHPC_ECOLI", "APHA_ECOLI", "BCP_ECOLI", "CHAB_ECOLI", 
"CPXP_ECOLI", "CPXR_ECOLI", "DACA_ECOLI", "USPA_ECOLI", "DCRB_ECOLI", 
"DGAL_ECOLI", "DSBA_ECOLI", "ELAB_ECOLI", "FABG_ECOLI", "FABI_ECOLI", 
"FLIY_ECOLI", "FUCM_ECOLI", "GALU_ECOLI", "GLCG_ECOLI", "GLNH_ECOLI", 
"GYRB_ECOLI", "HDEA_ECOLI", "HDEB_ECOLI", "HDHA_ECOLI", "HISJ_ECOLI", 
"SKP_ECOLI", "MALE_ECOLI", "MIND_ECOLI", "MLTD_ECOLI", "MOAB_ECOLI", 
"MOG_ECOLI", "APBC_ECOLI", "MTNN_ECOLI", "NAGA_ECOLI", "NAGD_ECOLI", 
"NARL_ECOLI", "ZAPB_ECOLI", "YJBR_ECOLI", "YJEI_ECOLI", "RRAB_ECOLI", 
"RIDA_ECOLI", "TABA_ECOLI", "NUOA_ECOLI", "NUOB_ECOLI", "NUOE_ECOLI", 
"NUOI_ECOLI", "NUSA_ECOLI", "NUSG_ECOLI", "ODO1_ECOLI", "ODO2_ECOLI", 
"ODP1_ECOLI", "OSMY_ECOLI", "PMBA_ECOLI", "POTD_ECOLI", "PPIA_ECOLI", 
"PROX_ECOLI", "PSIF_ECOLI", "PSPA_ECOLI", "PSPB_ECOLI", "GCH1L_ECOLI", 
"YCIO_ECOLI", "RISA_ECOLI", "RMF_ECOLI", "ROF_ECOLI", "SEQA_ECOLI", 
"SSPB_ECOLI", "RPE_ECOLI", "PUR1_ECOLI", "PURE_ECOLI", "YIBN_ECOLI", 
"RHO_ECOLI", "RL17_ECOLI", "RL21_ECOLI", "RL30_ECOLI", "RL6_ECOLI", 
"RS14_ECOLI", "RS17_ECOLI", "RS1_ECOLI", "SUBI_ECOLI", "UGPB_ECOLI", 
"PSTS_ECOLI", "YGHA_ECOLI", "SECB_ECOLI", "SECG_ECOLI", "SODC_ECOLI", 
"SODF_ECOLI", "SRP54_ECOLI", "SSB_ECOLI", "CHRR_ECOLI", "SUCD_ECOLI", 
"TESB_ECOLI", "TLDD_ECOLI", "YFIF_ECOLI", "SYY_ECOLI", "TDCF_ECOLI", 
"RL18_ECOLI", "PPNP_ECOLI", "IBPA_ECOLI", "IBPB_ECOLI", "OSMC_ECOLI", 
"MSCS_ECOLI", "DEGP_ECOLI", "GATY_ECOLI", "EFTU2_ECOLI", "RCSB_ECOLI", 
"RLPA_ECOLI", "FTSY_ECOLI", "FADL_ECOLI", "SECA_ECOLI", "NARH_ECOLI", 
"DAMX_ECOLI", "SYR_ECOLI", "MOEA_ECOLI", "UDP_ECOLI", "FDHE_ECOLI", 
"KATG_ECOLI", "GLPB_ECOLI", "GLPD_ECOLI", "RPOS_ECOLI", "TREA_ECOLI", 
"PROV_ECOLI", "AMPP_ECOLI", "PEPD_ECOLI", "PUR9_ECOLI", "PUR2_ECOLI", 
"SELD_ECOLI", "SYP_ECOLI", "CYSP_ECOLI", "NFSA_ECOLI", "GLMS_ECOLI", 
"CYSI_ECOLI", "CYSH_ECOLI", "NADE_ECOLI", "AGP_ECOLI", "FIC_ECOLI", 
"FADA_ECOLI", "PEPQ_ECOLI", "SPEA_ECOLI", "CATE_ECOLI", "GLTP_ECOLI", 
"YCIF_ECOLI", "YCIE_ECOLI", "YCAC_ECOLI", "RNE_ECOLI", "KPYK2_ECOLI", 
"SYC_ECOLI", "SYD_ECOLI", "ASNB_ECOLI", "CYSQ_ECOLI", "GABT_ECOLI", 
"PCKA_ECOLI", "ADD_ECOLI", "RIHC_ECOLI", "SERC_ECOLI", "ECOT_ECOLI", 
"PHOP_ECOLI", "YICC_ECOLI", "OPPA_ECOLI", "DPPA_ECOLI", "PSPE_ECOLI", 
"PPIB_ECOLI", "GSA_ECOLI", "MAK_ECOLI", "ACCC_ECOLI", "FOLD_ECOLI", 
"UXUA_ECOLI", "FRMA_ECOLI", "ACNA_ECOLI", "MNME_ECOLI", "GABD_ECOLI", 
"ALDA_ECOLI", "MSYB_ECOLI", "LOIP_ECOLI", "PDXI_ECOLI", "AMY2_ECOLI", 
"MAO1_ECOLI", "ACUI_ECOLI", "GCST_ECOLI", "AHR_ECOLI", "OPDA_ECOLI", 
"TKT1_ECOLI", "STHA_ECOLI", "RODZ_ECOLI", "ACSA_ECOLI", "DCD_ECOLI", 
"QOR1_ECOLI", "METQ_ECOLI", "TREC_ECOLI", "ALR2_ECOLI", "YCEH_ECOLI", 
"PEPT_ECOLI", "NADC_ECOLI", "YBIB_ECOLI", "ARTI_ECOLI", "ARTJ_ECOLI", 
"YAFC_ECOLI", "PANB_ECOLI", "YEDD_ECOLI", "GLMM_ECOLI", "YDEI_ECOLI", 
"THTM_ECOLI", "LPTD_ECOLI", "HCHA_ECOLI", "PANC_ECOLI", "OTSA_ECOLI", 
"NUOF_ECOLI", "YIIM_ECOLI", "FDOG_ECOLI", "SBMC_ECOLI", "PSUT_ECOLI", 
"OPGG_ECOLI", "GCSP_ECOLI", "YEBF_ECOLI", "LLDD_ECOLI", "YEHZ_ECOLI", 
"BGLX_ECOLI", "YOHF_ECOLI", "TKT2_ECOLI", "NUOCD_ECOLI", "NUOG_ECOLI", 
"YDCF_ECOLI", "AHPF_ECOLI", "CUEO_ECOLI", "CBPA_ECOLI", "ZAPD_ECOLI", 
"ACNB_ECOLI", "PGM_ECOLI", "PEPB_ECOLI", "SLP_ECOLI", "MASZ_ECOLI", 
"XYLF_ECOLI", "UCPA_ECOLI", "PMRD_ECOLI", "YHII_ECOLI", "MDTE_ECOLI", 
"YHJD_ECOLI", "KDGK_ECOLI", "YHJJ_ECOLI", "GHRB_ECOLI", "ALDB_ECOLI", 
"ADH2_ECOLI", "GPMI_ECOLI", "RMLA1_ECOLI", "RMLC_ECOLI", "RMLB1_ECOLI", 
"KDUD_ECOLI", "GLTI_ECOLI", "USPF_ECOLI", "CYSJ_ECOLI", "NFSB_ECOLI", 
"YGGL_ECOLI", "DEGQ_ECOLI", "UXUB_ECOLI", "USPG_ECOLI", "AG43_ECOLI", 
"YTFJ_ECOLI", "PRMB_ECOLI", "ALSB_ECOLI", "RSGA_ECOLI", "YTFQ_ECOLI", 
"YJGR_ECOLI", "IADA_ECOLI", "RSMC_ECOLI", "YDFG_ECOLI", "PDXK_ECOLI", 
"RIHA_ECOLI", "PAT_ECOLI", "YQJG_ECOLI", "OBG_ECOLI", "LPOA_ECOLI", 
"YHBO_ECOLI", "FKBA_ECOLI", "PROQ_ECOLI", "TSAC_ECOLI", "NUDE_ECOLI", 
"CPOB_ECOLI", "YBHC_ECOLI", "6PGL_ECOLI", "DLHH_ECOLI", "RSMH_ECOLI", 
"RL2_ECOLI", "RL3_ECOLI", "KGUA_ECOLI", "RL24_ECOLI", "SPEB_ECOLI", 
"HIS6_ECOLI", "LIPA_ECOLI", "RL4_ECOLI", "HIS1_ECOLI", "SYH_ECOLI", 
"RL22_ECOLI", "LOLA_ECOLI", "LOLB_ECOLI", "CAN_ECOLI", "RISB_ECOLI", 
"MDH_ECOLI", "FLAV_ECOLI", "RL5_ECOLI", "GPMA_ECOLI", "YAEH_ECOLI", 
"NFUA_ECOLI", "GMHA_ECOLI", "GADC_ECOLI", "CLPB_ECOLI", "MLAF_ECOLI", 
"YDCL_ECOLI", "RCNB_ECOLI", "YQJD_ECOLI", "YRAP_ECOLI", "MLAD_ECOLI", 
"YGDI_ECOLI", "BEPA_ECOLI", "PLPHP_ECOLI", "YFCE_ECOLI", "HLDD_ECOLI", 
"GRCA_ECOLI", "SRA_ECOLI", "YJBJ_ECOLI", "RS21_ECOLI", "AMPA_ECOLI", 
"RL25_ECOLI", "DHSC_ECOLI", "IF1_ECOLI", "RCSF_ECOLI", "TATA_ECOLI", 
"KAD_ECOLI", "APT_ECOLI", "YTFE_ECOLI", "LPP_ECOLI", "PTGA_ECOLI", 
"PTNAB_ECOLI", "PTND_ECOLI", "PTFAH_ECOLI", "PTKA_ECOLI", "PTSN_ECOLI", 
"DCEB_ECOLI", "CSRA_ECOLI", "RIR2_ECOLI", "YBFF_ECOLI", "YBJP_ECOLI", 
"LTAE_ECOLI", "GLO22_ECOLI", "YCCU_ECOLI", "GHRA_ECOLI", "LOLD_ECOLI", 
"DHAL_ECOLI", "DHAK_ECOLI", "YDCS_ECOLI", "CURA_ECOLI", "YNCE_ECOLI", 
"LSRF_ECOLI", "TAM_ECOLI", "YNFD_ECOLI", "YDGH_ECOLI", "YDHF_ECOLI", 
"YNHG_ECOLI", "YEAO_ECOLI", "DMLA_ECOLI", "DCYD_ECOLI", "MPGP_ECOLI", 
"MTFA_ECOLI", "WZZB_ECOLI", "YEGP_ECOLI", "YFGM_ECOLI", "ZIPA_ECOLI", 
"GCS2_ECOLI", "HXPB_ECOLI", "YDEN_ECOLI", "HYFG_ECOLI", "CNOX_ECOLI", 
"GLSA1_ECOLI", "SUFC_ECOLI", "YPHE_ECOLI", "ASTC_ECOLI", "ABDH_ECOLI", 
"GNSB_ECOLI", "YBAY_ECOLI", "YAJO_ECOLI", "KT3K_ECOLI", "BAMB_ECOLI", 
"YFCH_ECOLI", "YDGA_ECOLI", "YIBT_ECOLI", "YGHU_ECOLI", "GPR_ECOLI", 
"YQHD_ECOLI", "DKGA_ECOLI", "KDUI_ECOLI"))
complexSpecies <-
"all"
complexDbPath <-
NULL
stringVersion <-
"11.5"
stringDir <-
NULL
linkTableColumns <-
"[^se]\\.logFC$"

This report describes a reproducible end-to-end analysis of a proteomics dataset quantified with MaxQuant (Cox and Mann 2008). Most of the code is hidden by default, but can be displayed by clicking on the Code buttons (or by selecting Code -> Show All Code in the top right corner of the report). Navigation between the different sections can be done via the table of contents in the left sidebar. In the first part of the report, the quantified data is read into R and passed through a range of processing and quality control steps. These are followed by statistical analysis to find and visualize differentially abundant features. A summary table provides direct links to external resources, and additional global overviews of the data are provided via principal component analysis and summary heatmaps.

## Get species info and define STRINGdb object
speciesInfo <- getSpeciesInfo(species)
if (is.null(stringDir)) stringDir <- ""
if (is.null(stringIdCol)) {
    ## If no STRING IDs are extracted, don't do STRING analysis
    string_db <- NULL
} else {
    string_db <- tryCatch({
        tmp <- STRINGdb$new(version = stringVersion, species = speciesInfo$taxId, 
                            score_threshold = 400, input_directory = stringDir)
        if (!exists("tmp")) {
            warning("The STRINGdb object can not be created. ", 
                    "No STRING analysis will be performed.", 
                    call. = FALSE)
            tmp <- NULL
        } else {
            print(tmp)
        }
        tmp
    }, error = function(e) {
        warning("The STRINGdb object can not be created. ", 
                "No STRING analysis will be performed.", 
                call. = FALSE)
        NULL
    })
}

## If needed and not provided, define path to complex DB 
## (will be added to summary table below)
if ("complexes" %in% includeFeatureCollections && is.null(complexDbPath)) {
    complexDbPath <- system.file(EINPROT_COMPLEXES_FILE,
                                 package = "einprot")
}

## Get conversion tables for PomBase and WormBase IDs
pbconv <- readRDS(system.file(EINPROT_POMBASE_CONVTABLE,
                              package = "einprot"))
wbconv <- readRDS(system.file(EINPROT_WORMBASE_CONVTABLE,
                              package = "einprot"))

Experiment details

makeTableFromList(c(experimentInfo, 
                    list(
                        "Species" = speciesInfo$species,
                        "Species (common)" = speciesInfo$speciesCommon,
                        "Taxonomic ID" = speciesInfo$taxId
                    )))

Species	Homo sapiens
Species (common)	human
Taxonomic ID	9606

MaxQuant analysis summary

mq <- readMaxQuantXML(mqParameterFile)
mq <- c(list("MaxQuant file" = mqFile), mq)
makeTableFromList(mq)

MaxQuant file	/Users/charlottesoneson/Documents/Rpackages/einprot/einprot_examples/ionstar_mq/MAXQuant_IonStar2018_PXD003881/proteinGroups.txt
MaxQuant version	1.6.10.43
Parameter file	/Users/charlottesoneson/Documents/Rpackages/einprot/einprot_examples/ionstar_mq/MAXQuant_IonStar2018_PXD003881/mqpar.xml
Search engine	Andromeda
Raw file location	D:/processing_backup/LFQ_Benchmark_DATA/IONStarBenchmarkData_PXD003881/RAW_DATA/
Raw files	B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw, B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B.raw
Sample names	B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B, B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
Databases	D:/processing_backup/LFQ_Benchmark_DATA/IONStarBenchmarkData_PXD003881/RAW_DATA/uniprot-proteome_UP000000625+reviewed_yes.fasta, >([^/s]), >(.), D:/processing_backup/LFQ_Benchmark_DATA/IONStarBenchmarkData_PXD003881/RAW_DATA/uniprot-proteome_UP000005640+reviewed_yes.fasta, >([^/s]), >(.)
Contaminants	*/MaxQuant_1.6.10.43/MaxQuant/bin/conf/contaminants.fasta
Quantification mode	1
Quantification settings (LFQ)	LFQ min. ratio count: 2, fastLFQ: True, match-between runs (MBR): True, Intensity based absolute quantification (iBAQ): False, Top3 quantification (top3): False
Min. razor peptides	1
Requantify	False
Enzymes	Trypsin/P
Variable modifications	Oxidation (M), Acetyl (Protein N-term)
Fixed modifications	Carbamidomethyl (C)
Max peptide mass	4600
Min peptide length	7

Settings

settingsList <- list(
    "Column pattern" = iColPattern,
    "Include only samples (if applicable)" = paste(includeOnlySamples, 
                                                   collapse = ", "),
    "Exclude samples" = paste(excludeSamples, collapse = ", "),
    "Min. number of peptides" = minPeptides,
    "Min. protein score" = minScore,
    "Imputation method" = imputeMethod,
    "Assays(s) to use for exported values" = paste(assaysForExport, collapse = ", "), 
    "Min. nbr valid values" = minNbrValidValues,
    "Model fit" = ifelse(singleFit, "Single (one model fit for all samples)", 
                         "Separate model fit for each comparison"),
    "Groups to merge" = paste(unlist(
        lapply(names(mergeGroups), 
               function(nm) paste0(nm, ":", paste(mergeGroups[[nm]], collapse = ",")))),
        collapse = "; "),
    "Comparisons" = paste(unlist(lapply(comparisons, 
                                        function(x) paste(x, collapse = " vs "))),
                          collapse = "; "),
    "Control group" = ctrlGroup,
    "Do all pairwise comparisons" = allPairwiseComparisons,
    "Subtract baseline" = subtractBaseline,
    "Baseline group" = baselineGroup,
    "Normalization method" = normMethod,
    "Spike features" = paste(spikeFeatures, collapse = ","),
    "Statistical test" = stattest,
    "Minimal fold change (limma/treat)" = minlFC,
    "Adjusted p-value threshold for volcano plots" = volcanoAdjPvalThr,
    "Log2 FC threshold for volcano plots" = volcanoLog2FCThr,
    "Max nbr features to indicate in volcano plots" = volcanoMaxFeatures,
    "Sign of features to indicate in volcano plots" = volcanoLabelSign,
    "Use SAM statistic for significance" = samSignificance,
    "s0" = volcanoS0,
    "Features to always label in volcano plots" = paste(volcanoFeaturesToLabel,
                                                        collapse = ", "),
    "Feature collections" = paste(includeFeatureCollections, collapse = "; "),
    "Min size to keep feature set" = minSizeToKeepSet,
    "Complexes file" = gsub(".+\\/(.+.rds)", "\\1", complexDbPath),
    "Complexes from species" = complexSpecies,
    "Custom complexes" = paste(names(customComplexes), collapse = ";"),
    "FDR Threshold for complexes" = complexFDRThr,
    "Max nbr complexes to plot" = maxNbrComplexesToPlot,
    "Number of permutations" = nperm,
    "Random seed" = seed,
    "Columns to add in link table" = paste(linkTableColumns, collapse = ";"),
    "Interactive display columns" = paste(interactiveDisplayColumns, collapse = ";"),
    "Interactive group column" = interactiveGroupColumn
)

if (stattest == "ttest") {
    settingsList <- settingsList[!(names(settingsList) %in% 
                                       c("Minimal fold change (limma/treat)",
                                         "Log2 FC threshold for volcano plots"))]
}
if (stattest %in% c("limma", "proDA")) {
    settingsList <- settingsList[!(names(settingsList) %in% 
                                       c("s0", "Number of permutations", 
                                         "Use SAM statistic for significance"))]
}

makeTableFromList(settingsList)

Column pattern	^LFQ.intensity.
Include only samples (if applicable)
Exclude samples
Min. number of peptides	2
Min. protein score	10
Imputation method	MinProb
Assays(s) to use for exported values	iBAQ, Top3
Min. nbr valid values	2
Model fit	Separate model fit for each comparison
Groups to merge
Comparisons	a vs b; a vs c; d vs e
Control group
Do all pairwise comparisons	TRUE
Subtract baseline	FALSE
Baseline group
Normalization method	none
Spike features
Statistical test	limma
Minimal fold change (limma/treat)	0
Adjusted p-value threshold for volcano plots	0.05
Log2 FC threshold for volcano plots	0
Max nbr features to indicate in volcano plots	25
Sign of features to indicate in volcano plots	both
Features to always label in volcano plots
Feature collections
Min size to keep feature set	2
Complexes from species	all
Custom complexes	ECOLI_GENES
FDR Threshold for complexes	0.1
Max nbr complexes to plot	Inf
Random seed	42
Columns to add in link table	[^se].logFC$
Interactive display columns	einprotLabel;adj.P.Val;logFC

Read MaxQuant output

The input to this workflow is a proteinGroups.txt file from MaxQuant (see path in the table above). We read the MaxQuant intensities into R and store them in a SingleCellExperiment object. This object will later be expanded with additional data, such as transformed and imputed abundances.

At this point, the SingleCellExperiment object holds assays with the different types of intensities and annotations from the MaxQuant file.

## Read MaxQuant output
tmp <- importExperiment(inFile = mqFile, iColPattern = iColPattern,
                        includeOnlySamples = includeOnlySamples,
                        excludeSamples = excludeSamples)
sce <- tmp$sce
aName <- tmp$aName

sce

## class: SingleCellExperiment 
## dim: 4957 20 
## metadata(1): colList
## assays(7): LFQ.intensity Intensity ... Peptides Identification.type
## rownames(4957): 1 2 ... 4956 4957
## rowData names(52): Protein.IDs Majority.protein.IDs ...
##   Oxidation.M.site.IDs Oxidation.M.site.positions
## colnames(20):
##   B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
##   B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B ...
##   B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
##   B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
## colData names(0):
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Click to see which columns from the input file were used to form each of the assays in the SingleCellExperiment object.

S4Vectors::metadata(sce)$colList

## $LFQ.intensity
##  [1] "LFQ.intensity.B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [2] "LFQ.intensity.B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [3] "LFQ.intensity.B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [4] "LFQ.intensity.B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [5] "LFQ.intensity.B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [6] "LFQ.intensity.B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [7] "LFQ.intensity.B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [8] "LFQ.intensity.B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [9] "LFQ.intensity.B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [10] "LFQ.intensity.B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [11] "LFQ.intensity.B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [12] "LFQ.intensity.B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [13] "LFQ.intensity.B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [14] "LFQ.intensity.B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [15] "LFQ.intensity.B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [16] "LFQ.intensity.B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [17] "LFQ.intensity.B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [18] "LFQ.intensity.B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [19] "LFQ.intensity.B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [20] "LFQ.intensity.B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## 
## $Intensity
##  [1] "Intensity.B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [2] "Intensity.B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [3] "Intensity.B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [4] "Intensity.B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [5] "Intensity.B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [6] "Intensity.B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [7] "Intensity.B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [8] "Intensity.B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [9] "Intensity.B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [10] "Intensity.B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [11] "Intensity.B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [12] "Intensity.B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [13] "Intensity.B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [14] "Intensity.B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [15] "Intensity.B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [16] "Intensity.B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [17] "Intensity.B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [18] "Intensity.B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [19] "Intensity.B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [20] "Intensity.B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## 
## $Sequence.coverage
##  [1] "Sequence.coverage.B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
##  [2] "Sequence.coverage.B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
##  [3] "Sequence.coverage.B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
##  [4] "Sequence.coverage.B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
##  [5] "Sequence.coverage.B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
##  [6] "Sequence.coverage.B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
##  [7] "Sequence.coverage.B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
##  [8] "Sequence.coverage.B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
##  [9] "Sequence.coverage.B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [10] "Sequence.coverage.B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [11] "Sequence.coverage.B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [12] "Sequence.coverage.B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [13] "Sequence.coverage.B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [14] "Sequence.coverage.B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [15] "Sequence.coverage.B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [16] "Sequence.coverage.B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [17] "Sequence.coverage.B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [18] "Sequence.coverage.B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [19] "Sequence.coverage.B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## [20] "Sequence.coverage.B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B...."
## 
## $Unique.peptides
##  [1] "Unique.peptides.B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [2] "Unique.peptides.B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [3] "Unique.peptides.B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [4] "Unique.peptides.B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [5] "Unique.peptides.B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [6] "Unique.peptides.B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [7] "Unique.peptides.B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [8] "Unique.peptides.B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [9] "Unique.peptides.B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [10] "Unique.peptides.B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [11] "Unique.peptides.B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [12] "Unique.peptides.B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [13] "Unique.peptides.B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [14] "Unique.peptides.B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [15] "Unique.peptides.B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [16] "Unique.peptides.B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [17] "Unique.peptides.B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [18] "Unique.peptides.B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [19] "Unique.peptides.B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [20] "Unique.peptides.B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## 
## $Razor.unique.peptides
##  [1] "Razor...unique.peptides.B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [2] "Razor...unique.peptides.B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [3] "Razor...unique.peptides.B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [4] "Razor...unique.peptides.B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [5] "Razor...unique.peptides.B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [6] "Razor...unique.peptides.B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [7] "Razor...unique.peptides.B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [8] "Razor...unique.peptides.B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [9] "Razor...unique.peptides.B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [10] "Razor...unique.peptides.B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [11] "Razor...unique.peptides.B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [12] "Razor...unique.peptides.B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [13] "Razor...unique.peptides.B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [14] "Razor...unique.peptides.B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [15] "Razor...unique.peptides.B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [16] "Razor...unique.peptides.B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [17] "Razor...unique.peptides.B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [18] "Razor...unique.peptides.B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [19] "Razor...unique.peptides.B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [20] "Razor...unique.peptides.B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## 
## $Peptides
##  [1] "Peptides.B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [2] "Peptides.B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [3] "Peptides.B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [4] "Peptides.B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [5] "Peptides.B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [6] "Peptides.B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [7] "Peptides.B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [8] "Peptides.B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [9] "Peptides.B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [10] "Peptides.B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [11] "Peptides.B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [12] "Peptides.B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [13] "Peptides.B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [14] "Peptides.B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [15] "Peptides.B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [16] "Peptides.B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [17] "Peptides.B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [18] "Peptides.B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [19] "Peptides.B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [20] "Peptides.B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## 
## $Identification.type
##  [1] "Identification.type.B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [2] "Identification.type.B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [3] "Identification.type.B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [4] "Identification.type.B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [5] "Identification.type.B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [6] "Identification.type.B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [7] "Identification.type.B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [8] "Identification.type.B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
##  [9] "Identification.type.B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [10] "Identification.type.B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [11] "Identification.type.B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [12] "Identification.type.B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [13] "Identification.type.B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [14] "Identification.type.B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [15] "Identification.type.B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [16] "Identification.type.B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [17] "Identification.type.B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [18] "Identification.type.B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [19] "Identification.type.B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"
## [20] "Identification.type.B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B"

Add sample annotations

Next, we compile the sample annotations. The sample IDs have been extracted from the column names in the MaxQuant file, by removing the provided iColPattern from the main intensity columns. The group column will be used to define groups for the statistical testing later. If a batch column is present, that will also be accounted for in the limma model. See the Comparisons and design section below for more details about the fitted model(s). Please check that the table below correspond to your expectations.

sce <- addSampleAnnots(sce, sampleAnnot = sampleAnnot)

DT::datatable(as.data.frame(colData(sce)),
              options = list(scrollX = TRUE, pageLength = 20))

Overview of the workflow

We already now define the names of the assays we will be generating and using later in the workflow.

aNames <- defineAssayNames(aName = aName, normMethod = normMethod, 
                           doBatchCorr = "batch" %in% colnames(colData(sce)))
makeTableFromList(aNames)

assayInput	LFQ.intensity
assayLog2WithNA	log2_LFQ.intensity_withNA
assayImputIndic	imputed_LFQ.intensity
assayLog2NormWithNA	log2_LFQ.intensity_withNA
assayImputed	log2_LFQ.intensity
assayBatchCorr	log2_LFQ.intensity

The figure below provides a high-level overview of the workflow, and where the assays defined above will be generated. Note that depending on the settings specified by the user, not all steps may be performed (this is typically indicated by multiple assay names in the table above being equal).

knitr::include_graphics(system.file("extdata", "einprot_workflow.png", 
                                    package = "einprot"))

Overall distribution of protein intensities

The box plot below displays the distribution of raw protein intensities in each sample, on a log scale (excluding any missing values), as well as the values in the input assay defined above.

if ("Intensity" %in% assayNames(sce)) {
    print(makeIntensityBoxplots(sce = sce, assayName = "Intensity", 
                                doLog = TRUE, ylab = "Intensity"))
}

makeIntensityBoxplots(sce = sce, assayName = aNames$assayInput, doLog = TRUE, 
                      ylab = aNames$assayInput)

Filter out contaminants, reverse hits, and proteins with low confidence

Next, we filter out any features classified by MaxQuant as potential contaminants or reverse (decoy) hits, and features identified only by site (which removes proteins that are only identified by peptides carrying one or more modified amino acids). In addition, we may remove protein identifications based on score and number of peptides (i.e. to exclude one-hit wonders). The excluded features, together with the available annotations, are written to a text file. The UpSet plot below illustrates the overlaps between the sets of features filtered out based on the different criteria (vertical bars).

nbrFeaturesBefore <- nrow(sce)
sce <- filterMaxQuant(sce = sce, minScore = minScore, minPeptides = minPeptides, 
                      plotUpset = TRUE, 
                      exclFile = sub("\\.Rmd$", paste0("_filtered_out_features.txt"), knitr::current_input(dir = TRUE)))

nbrFeaturesAfter <- nrow(sce)
if (nbrFeaturesAfter == 0) {
    stop("No features left after filtering!")
}

This filtering removed 2019 features. The new sce object has 2938 features.

Modify feature names

The feature ID used when reading the data above are numeric indices. We replace these IDs with more interpretable ones, defined by the idCol argument. We also add columns holding gene IDs (if applicable), protein IDs, IDs for matching to STRING (if applicable) and IDs for labelling points in plots.

sce <- fixFeatureIds(
    sce, 
    colDefs = list(einprotId = idCol, einprotLabel = labelCol,
                   einprotGene = geneIdCol, einprotProtein = proteinIdCol,
                   IDsForSTRING = stringIdCol)
)
if (any(duplicated(rowData(sce)[["einprotId"]]))) {
    stop("The 'einprotId' column cannot have duplicated entries.")
}
rownames(sce) <- rowData(sce)[["einprotId"]]

Click to see examples of the defined feature identifiers

##                einprotId einprotGene einprotProtein einprotLabel IDsForSTRING
## 1   sp|A0AVT1|UBA6_HUMAN  UBA6_HUMAN         A0AVT1   UBA6_HUMAN         <NA>
## 2  sp|A0FGR8|ESYT2_HUMAN ESYT2_HUMAN         A0FGR8  ESYT2_HUMAN         <NA>
## 3  sp|A0MZ66|SHOT1_HUMAN SHOT1_HUMAN         A0MZ66  SHOT1_HUMAN         <NA>
## 4  sp|A1L0T0|ILVBL_HUMAN ILVBL_HUMAN         A1L0T0  ILVBL_HUMAN         <NA>
## 5  sp|A3KN83|SBNO1_HUMAN SBNO1_HUMAN         A3KN83  SBNO1_HUMAN         <NA>
## 6                    ...         ...            ...          ...          ...
## 7   sp|Q9Y6M7|S4A7_HUMAN  S4A7_HUMAN         Q9Y6M7   S4A7_HUMAN         <NA>
## 8  sp|Q9Y6M9|NDUB9_HUMAN NDUB9_HUMAN         Q9Y6M9  NDUB9_HUMAN         <NA>
## 9   sp|Q9Y6N5|SQOR_HUMAN  SQOR_HUMAN         Q9Y6N5   SQOR_HUMAN         <NA>
## 10 sp|Q9Y6X9|MORC2_HUMAN MORC2_HUMAN         Q9Y6X9  MORC2_HUMAN         <NA>
## 11 sp|Q9Y6Y8|S23IP_HUMAN S23IP_HUMAN         Q9Y6Y8  S23IP_HUMAN         <NA>

Prepare feature collections for later testing

In addition to testing individual proteins for differential abundance between groups, we can also test collections of proteins. Here we define the collections that will be used.

if (is.null(geneIdCol)) {
    ## If no gene IDs are extracted, don't compare to feature collections
    featureCollections <- list()
} else {
    (featureCollections <- prepareFeatureCollections(
        sce = sce, idCol = "einprotGene", 
        includeFeatureCollections = includeFeatureCollections,
        complexDbPath = complexDbPath, speciesInfo = speciesInfo,
        complexSpecies = complexSpecies, customComplexes = customComplexes,
        minSizeToKeep = minSizeToKeepSet))
}

## $complexes
## CharacterList of length 1
## [["ECOLI_GENES"]] sp|P00350|6PGD_ECOLI ... sp|Q46938|KDUI_ECOLI

## Remove collections without any sets
featureCollections <- featureCollections[lapply(featureCollections, length) > 0]

Apply a log2 transformation

Before the downstream analysis, we log2-transform the measured intensities. We also add an additional assay to keep track of the position of the missing values (which will be imputed later).

assay(sce, aNames$assayLog2WithNA) <- log2(assay(sce, aNames$assayInput))

## Add assay indicating missing values, which will be imputed
assay(sce, aNames$assayImputIndic) <- !is.finite(assay(sce, aNames$assayLog2WithNA))
sce

## class: SingleCellExperiment 
## dim: 2938 20 
## metadata(1): colList
## assays(9): LFQ.intensity Intensity ... log2_LFQ.intensity_withNA
##   imputed_LFQ.intensity
## rownames(2938): sp|A0AVT1|UBA6_HUMAN sp|A0FGR8|ESYT2_HUMAN ...
##   sp|Q9Y6X9|MORC2_HUMAN sp|Q9Y6Y8|S23IP_HUMAN
## rowData names(57): Protein.IDs Majority.protein.IDs ... einprotProtein
##   IDsForSTRING
## colnames(20):
##   B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
##   B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B ...
##   B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
##   B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
## colData names(2): sample group
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Visualize missing value patterns

The plot below shows the fraction of the total set of features that are detected (with a non-missing value) in each of the samples.

## Replace zeros/-Inf values by explicit NA values in the assays
assay(sce, aNames$assayInput)[assay(sce, aNames$assayInput) == 0] <- NA
assay(sce, aNames$assayLog2WithNA)[!is.finite(assay(sce, aNames$assayLog2WithNA))] <- NA

## Count number of NA values and add to SCE
colData(sce)$nNA <- colSums(is.na(assay(sce, aNames$assayInput)))
colData(sce)$pNA <- 100 * sce$nNA/nrow(sce)
rowData(sce)$nNA <- rowSums(is.na(assay(sce, aNames$assayInput)))
rowData(sce)$pNA <- 100 * rowData(sce)$nNA/ncol(sce)

plotFractionDetectedPerSample(dfNA = as.data.frame(colData(sce)[, c("sample", "nNA", "pNA")]))

We also plot the number of features that are detected in a given number of samples.

plotDetectedInSamples(dfNA = as.data.frame(rowData(sce)[, c("nNA", "pNA")]))

Finally we show the full missing value structure in the data matrix. The white values correspond to missing values, and the grey ones to observed intensities.

plotMissingValuesHeatmap(sce, assayMissing = aNames$assayImputIndic)

The log2 intensities are not normalized further across samples since ‘normMethod’ is set to ‘none’.

Imputation

Next, we apply the MinProb method to perform imputation of the log2-transformed data, and plot the distribution of imputed and non-imputed values in each sample.

set.seed(seed)
sce <- doImputation(sce, method = imputeMethod, 
                    assayName = aNames$assayLog2NormWithNA,
                    imputedAssayName = aNames$assayImputed)

## [1] 0.1300254

plotImputationDistribution(sce, assayToPlot = aNames$assayImputed, 
                           assayImputation = aNames$assayImputIndic,
                           xlab = aNames$assayImputed)

Overall distribution of log2 protein intensities

Next we consider the overall distribution of log2-intensities among the samples (after imputation).

makeIntensityBoxplots(sce = sce, assayName = aNames$assayImputed,
                      doLog = FALSE, 
                      ylab = aNames$assayImputed)

if ("batch" %in% colnames(colData(sce))) {
    if (subtractBaseline) {
        assay(sce, aNames$assayBatchCorr) <- 
            getMatSubtractedBaseline(sce, assayName = aNames$assayImputed, 
                                     baselineGroup = baselineGroup, 
                                     sceFull = sce)
    } else {
        assay(sce, aNames$assayBatchCorr) <- 
            removeBatchEffect(assay(sce, aNames$assayImputed), 
                              batch = sce$batch, 
                              design = model.matrix(~ sce$group))
    }
}

Statistical testing

For each feature, we then compare the (possibly imputed) log2 intensities between groups. For this, we use the limma R/Bioconductor package (Ritchie et al. 2015; Phipson et al. 2016). For more information about the df.prior, representing the amount of extra information that is borrowed from the full set of features in order to improve the inference for each feature, see section 13.2 in the limma user guide. In addition to the feature-wise tests, we apply the camera method (Wu and Smyth 2012) to test for significance of each included feature collection. These tests are based on the t-statistics returned from limma.

Comparisons and design

## Set the assay to use for tests later
if (stattest == "proDA") {
    assayForTests <- aNames$assayLog2NormWithNA
} else {
    assayForTests <- aNames$assayImputed
}

The log2_LFQ.intensity assay will be used for the tests. The following pairwise comparisons will be performed (in each case, the first listed group will be the ‘baseline’ group):

if (subtractBaseline) {
    discardGroup <- baselineGroup
} else {
    discardGroup <- NULL
}
if (stattest == "none") {
    comparisonList <- list(comparisons = list(),
                           groupComposition = list())
} else {
    comparisonList <- makeListOfComparisons(
        allGroups = unique(sce$group), comparisons = comparisons, 
        mergeGroups = mergeGroups, 
        allPairwiseComparisons = allPairwiseComparisons,
        ctrlGroup = ctrlGroup, discardGroup = discardGroup)
}

Click to expand list of comparisons

comparisonList$comparisons

## $b_vs_a_explfc0.58
## [1] "a" "b"
## 
## $c_vs_a_explfc1
## [1] "a" "c"
## 
## $e_vs_d_explfc0.26
## [1] "d" "e"

Click to expand list of group compositions

comparisonList$groupComposition

## $b
## [1] "b"
## 
## $c
## [1] "c"
## 
## $d
## [1] "d"
## 
## $e
## [1] "e"
## 
## $a
## [1] "a"

if (any(assaysForExport %in% assayNames(sce))) {
    assaysForExport <- intersect(assaysForExport, assayNames(sce))
} else {
    assaysForExport <- aNames$assayInput
}

testres <- runTest(sce = sce, comparisons = comparisonList$comparisons,
                   groupComposition = comparisonList$groupComposition, 
                   testType = stattest, 
                   assayForTests = assayForTests,
                   assayImputation = aNames$assayImputIndic, 
                   minNbrValidValues = minNbrValidValues,
                   minlFC = minlFC, featureCollections = featureCollections, 
                   complexFDRThr = complexFDRThr,
                   volcanoAdjPvalThr = volcanoAdjPvalThr, 
                   volcanoLog2FCThr = volcanoLog2FCThr,
                   baseFileName = sub("\\.Rmd$", "", knitr::current_input()),
                   seed = seed, samSignificance = samSignificance, 
                   nperm = nperm, volcanoS0 = volcanoS0, 
                   aName = assaysForExport, addAbundanceValues = TRUE, 
                   singleFit = singleFit,
                   subtractBaseline = subtractBaseline, 
                   baselineGroup = baselineGroup, 
                   extraColumns = union(interactiveDisplayColumns,
                                        interactiveGroupColumn))

for (cmp in names(comparisonList$comparisons)) {
    ## Add WormBase/PomBase IDs if applicable
    if (speciesInfo$speciesCommon == "roundworm") {
        testres$tests[[cmp]]$WormBaseID <- 
            vapply(testres$tests[[cmp]][["einprotProtein"]],
                   function(mpds) {
                       wbids <- unlist(lapply(strsplit(mpds, ";")[[1]], function(mpd) {
                           wbconv$WormBaseID[wbconv$UniProtKB.ID == mpd |
                                                 wbconv$UniProtID == mpd]
                       }))
                       if (length(wbids[!is.na(wbids)]) != 0 && 
                           length(setdiff(wbids[!is.na(wbids)], "")) != 0) {
                           wbids <- setdiff(wbids, "")
                           wbids <- wbids[!is.na(wbids)]
                           paste(wbids, collapse = ";")
                       } else {
                           ""
                       }
                   }, "NA")
    } else if (speciesInfo$speciesCommon == "fission yeast") {
        testres$tests[[cmp]]$PomBaseID <- 
            vapply(testres$tests[[cmp]][["einprotProtein"]], 
                   function(mpds) {
                       pbids <- unlist(lapply(strsplit(mpds, ";")[[1]], function(mpd) {
                           pbconv$PomBaseID[pbconv$UniProtID == mpd]
                       }))
                       if (length(pbids[!is.na(pbids)]) != 0 && 
                           length(setdiff(pbids[!is.na(pbids)], "")) != 0) {
                           pbids <- setdiff(pbids, "")
                           pbids <- pbids[!is.na(pbids)]
                           paste(pbids, collapse = ";")
                       } else {
                           ""
                       }
                   }, "NA")
    }
}

The plots below illustrate the experimental design used for the linear model(s) and contrasts by limma. The plot to the right shows the number of samples for each combination of factor levels across the predictors, and is useful for detecting imbalances between group sizes for different conditions. The plot to the left summarizes the expected response value for each combination of predictor levels, expressed in terms of the linear model coefficients. For more details on how to interpret the plots, we refer to Soneson et al. (2020) or Law et al. (2020). Clicking on the arrow below the plots will reveal the design matrix used by limma, as well as the contrasts that were fit for each comparison.

if ("design" %in% names(testres$design)) {
    cat("\n### Overall design \n")
    vd <- VisualizeDesign(
        testres$design$sampleData, designFormula = NULL, 
        designMatrix = testres$design$design)
    print(cowplot::plot_grid(
        plotlist = c(vd$plotlist, vd$cooccurrenceplots), nrow = 1)
    )
    cat("\n\n")
    cat("<details>\n<summary><b>\nClick to display design matrix and contrast(s)\n</b></summary>\n")
    cat("\n````\n")
    print(testres$design$design)
    cat("\n````\n")
    cat("\n````\n")
    cat("Contrast(s): \n")
    print(testres$design$contrasts)
    cat("\n````\n")
    cat("\n````\n")
    cat("Sample weights: \n")
    print(testres$design$sampleWeights)
    cat("\n````\n")
    cat("\n</details>\n\n")

} else {
    for (nm in names(testres$design)) {
        cat("\n###", nm, "\n")
        vd <- VisualizeDesign(
            testres$design[[nm]]$sampleData, designFormula = NULL, 
            designMatrix = testres$design[[nm]]$design)
        print(cowplot::plot_grid(
            plotlist = c(vd$plotlist, vd$cooccurrenceplots), nrow = 1)
        )
        cat("\n\n")
        cat("<details>\n<summary><b>\nClick to display design matrix and contrast(s)\n</b></summary>\n")
        cat("\n````\n")
        print(testres$design[[nm]]$design)
        cat("\n````\n")
        cat("\n````\n")
        cat("Contrast: \n")
        print(testres$design[[nm]]$contrast)
        cat("\n````\n")
        cat("\n````\n")
        cat("Sample weights: \n")
        print(testres$design[[nm]]$sampleWeights)
        cat("\n````\n")
        cat("\n</details>\n\n")
    }
}

b_vs_a_explfc0.58

Click to display design matrix and contrast(s)

                                                                 (Intercept)
B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
                                                                 fcb
B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_09_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
B03_12_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_19_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$fc
[1] "contr.treatment"

Contrast: 
[1] 0 1

Sample weights: 
NULL

c_vs_a_explfc1

Click to display design matrix and contrast(s)

                                                                 (Intercept)
B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
                                                                 fcc
B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_08_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_10_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
B03_11_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
B03_13_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_18_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$fc
[1] "contr.treatment"

Contrast: 
[1] 0 1

Sample weights: 
NULL

e_vs_d_explfc0.26

Click to display design matrix and contrast(s)

                                                                 (Intercept)
B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B           1
                                                                 fce
B03_04_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
B03_05_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_06_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_07_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
B03_14_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
B03_15_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_16_150304_human_ecoli_E_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   1
B03_17_150304_human_ecoli_D_3ul_3um_column_95_HCD_OT_2hrs_30B_9B   0
attr(,"assign")
[1] 0 1
attr(,"contrasts")
attr(,"contrasts")$fc
[1] "contr.treatment"

Contrast: 
[1] 0 1

Sample weights: 
NULL

SA plots

We first show a diagnostic plot for each comparison. These plots display the square root of the residual standard deviation (y-axis) versus the mean abundance (across all the groups used to perform the model fit, x-axis). The curve indicated in the plots show the mean-variance trend inferred by limma.

for (i in seq.int(ceiling(length(testres$tests) / 3))) {
    tmplist <- testres$tests[(seq_along(testres$tests) - 1) %/% 3 == (i - 1)]
    print(makeSAPlot(tmplist))
}

Volcano plots

Below we display a volcano plot for each comparison. These plots are also saved to pdf files. In each plot, a subset of (up to 25) significant hits are indicated by name (selected as the ones with the largest Manhattan distance to the origin). These proteins are also used to generate STRING networks (Szklarczyk et al. 2021) (separately for the up- and downregulated ones), which are included in the pdf file. Any features explicitly requested (see the table above) are also labeled in the volcano plots. In addition to these pdf files, if “complexes” is specified to be included in the feature collections (and tested for significance using camera), we also generate a multi-page pdf file showing the position of the proteins of each significantly differentially abundant complex in the volcano plot, as well as bar plots of the proteins’ abundance values in the compared samples. This pdf file is only generated if there is at least one significant complex (with adjusted p-value below the specified complexFDRThr=0.1).

interactiveVolcanos <- htmltools::tagList()
for (nm in names(testres$tests)) {
    plots <- plotVolcano(
        sce = sce, res = testres$tests[[nm]], testType = stattest, 
        xv = NULL, yv = NULL, xvma = NULL, volcind = NULL, 
        plotnote = testres$plotnotes[[nm]], 
        plottitle = testres$plottitles[[nm]], 
        plotsubtitle = testres$plotsubtitles[[nm]],
        volcanoFeaturesToLabel = volcanoFeaturesToLabel, 
        volcanoMaxFeatures = volcanoMaxFeatures,
        volcanoLabelSign = volcanoLabelSign,
        baseFileName = paste0(sub("\\.Rmd$", "", knitr::current_input()),
                              "_volcano_", nm), 
        comparisonString = nm, 
        groupComposition = comparisonList$groupComposition[comparisonList$comparisons[[nm]]],
        stringDb = string_db,
        featureCollections = testres$featureCollections, 
        complexFDRThr = complexFDRThr,
        maxNbrComplexesToPlot = maxNbrComplexesToPlot,
        curveparam = testres$curveparams[[nm]],
        abundanceColPat = assaysForExport,
        xlab = "log2(fold change)", 
        ylab = "-log10(p-value)",
        xlabma = "Average abundance",
        labelOnlySignificant = TRUE,
        interactiveDisplayColumns = interactiveDisplayColumns, 
        interactiveGroupColumn = interactiveGroupColumn,
        maxTextWidthBarplot = 5.1)
    if (!is.null(plots$ggma) && !is.null(plots$ggwf) && !is.null(plots$ggbar)) {
        cat("\n\n### ", nm, " \n\n\n")
        print(plots$gg)
        print(plots$ggma)
        for (ggb in plots$ggbar) print(ggb)
        print(plots$ggwf)
        cat("\n\n")
    } else if (!is.null(plots$ggma) && !is.null(plots$ggwf)) {
        cat("\n\n### ", nm, " \n\n\n")
        print(plots$gg)
        print(plots$ggma)
        print(plots$ggwf)
        cat("\n\n")
    } else if (!is.null(plots$ggma) && !is.null(plots$ggbar)) {
        cat("\n\n### ", nm, " \n\n\n")
        print(plots$gg)
        print(plots$ggma)
        for (ggb in plots$ggbar) print(ggb)
        cat("\n\n")
    } else if (!is.null(plots$ggbar) && !is.null(plots$ggwf)) {
        cat("\n\n### ", nm, " \n\n\n")
        print(plots$gg)
        for (ggb in plots$ggbar) print(ggb)
        print(plots$ggwf)
        cat("\n\n")
    } else if (!is.null(plots$ggma)) {
        cat("\n\n### ", nm, " \n\n\n")
        print(plots$gg)
        print(plots$ggma)
        cat("\n\n")
    } else if (!is.null(plots$ggbar)) {
        cat("\n\n### ", nm, " \n\n\n")
        print(plots$gg)
        for (ggb in plots$ggbar) print(ggb)
        cat("\n\n")
    } else if (!is.null(plots$ggwf)) {
        cat("\n\n### ", nm, " \n\n\n")
        print(plots$gg)
        print(plots$ggwf)
        cat("\n\n")
    } else {
        cat("\n\n### ", nm, " \n\n\n")
        print(plots$gg)
        cat("\n\n")
    }
    interactiveVolcanos[[nm]] <- plots$ggint
}

b_vs_a_explfc0.58

c_vs_a_explfc1

e_vs_d_explfc0.26

Interactive volcano plots

Result export

For each comparison, we also save a text file with the “significant” features (defined as those colored in the volcano plots above). The features are ordered by the logFC value. In addition, we save a file with abundance values for all features that are “significant” in at least one comparison, across all the samples used in at least one comparison.

abundanceExport <- makeAbundanceExport(testresList = testres$tests, 
                                       abundancePrefix = assaysForExport)
write.table(abundanceExport, file = sub("\\.Rmd$", "_abundance_values_significant.txt",
                                   knitr::current_input()),
            row.names = FALSE, col.names = TRUE, quote = FALSE, sep = "\t")

## Merge results from all tests and add to rowData(sce)
tests <- testres$tests
for (nm in names(tests)) {
    idx <- which(colnames(tests[[nm]]) != "pid")
    colnames(tests[[nm]])[idx] <- paste0(nm, ".", colnames(tests[[nm]])[idx])
}
all_tests <- as.data.frame(Reduce(function(...) dplyr::full_join(..., by = "pid"),
                                  tests), optional = TRUE)
rownames(all_tests) <- all_tests$pid
all_tests$pid <- NULL

stopifnot(rownames(sce) == rownames(all_tests))
rowData(sce) <- cbind(rowData(sce), all_tests)

Overlap among sets of significant features

The UpSet plot below shows the overlap among the “significant” proteins (defined as the ones that are colored in the volcano plots above, based on the user specifications) from the different comparisons. Note that if there are many comparisons, not all combinations may be displayed in the plot (only the 50 combinations with the largest number of proteins are shown, for interpretability reasons). Moreover, the UpSet plot will only be shown if at least two comparisons have been made, and there are at least two comparisons where any proteins were deemed significant.

tmpsign <- all_tests %>% dplyr::select(contains("showInVolcano")) + 0
tmpsign[is.na(tmpsign)] <- 0
## Only features that are significant in at least one comparison
tmpsign <- tmpsign[rowSums(tmpsign) > 0, , drop = FALSE]
colnames(tmpsign) <- gsub("\\.showInVolcano$", "", colnames(tmpsign))
if (length(tests) > 1 && sum(colSums(tmpsign) > 0) > 1) {
    ComplexUpset::upset(tmpsign, intersect = colnames(tmpsign), 
                        sort_intersections_by = "cardinality")
}

Most significant feature sets

Finally, we display the top significant feature sets in each of the tested collections, for each comparison (if applicable). We recommend that the (adjusted) p-values for the feature sets is interpreted with caution, especially in situations where the protein abundances are measured on an isoform level and the feature sets are defined on the protein level, since there will be many (sometimes strongly correlated) features corresponding to a single gene or protein annotated to a feature set.

for (nm in names(testres$topsets)) {
    if (length(testres$topsets[[nm]]) > 0) {
        plts <- lapply(names(testres$topsets[[nm]]), function(snm) {
            df <- testres$topsets[[nm]][[snm]]
            if (nrow(df) > maxNbrComplexesToPlot) {
                df <- df[seq_len(maxNbrComplexesToPlot), ]
            }
            if (nrow(df) > 0) {
                ggplot(df %>% dplyr::mutate(set = factor(.data$set, levels = rev(.data$set))), 
                       aes(x = .data$set, y = -log10(.data[[paste0(nm, "_FDR")]]), 
                           fill = .data[[paste0(nm, "_Direction")]])) + 
                    geom_bar(stat = "identity") + 
                    coord_flip() + theme_bw() + 
                    labs(x = "", title = snm) + 
                    scale_fill_manual(values = c(Up = scales::muted("blue"), 
                                                 Down = scales::muted("red")), 
                                      name = "Direction") + 
                    theme(axis.text = element_text(size = 6))
            } else {
                NULL
            }
        })
        if (sum(sapply(plts, is.null)) < length(plts)) {
            cat("\n\n### ", nm, " \n\n\n")
            print(cowplot::plot_grid(plotlist = plts, ncol = 1, align = "v"))
            cat("\n\n") 
        }
    }
}

b_vs_a_explfc0.58

c_vs_a_explfc1

e_vs_d_explfc0.26

Table with direct database links to sequences, functional information and predicted structures

The table below provides autogenerated links to the UniProt and AlphaFold pages (as well as selected organism-specific databases) for the majority protein IDs corresponding to each feature in the data set. UniProt is a resource of protein sequence and functional information hosted by EMBL-EBI, PIR and SIB. The AlphaFold Protein Structure Database, developed by DeepMind and EMBL-EBI, provides open access to protein structure predictions for the human proteome and other key proteins of interest. Note that (depending on the species) many proteins are not yet covered in AlphaFold (in this case, the link below will lead to a non-existent page), and that numeric values are rounded to four significant digits to increase readability.

linkTable <- makeDbLinkTable(
    df = as.data.frame(rowData(sce)) %>%
        rownames_to_column("pid") %>%
        dplyr::select("pid", "einprotProtein", 
                      matches(setdiff(c("^einprotLabel$", linkTableColumns), ""), perl = TRUE)), 
    idCol = "einprotProtein", 
    speciesCommon = speciesInfo$speciesCommon, 
    addSpeciesSpecificColumns = TRUE, 
    convTablePomBase = pbconv, 
    convTableWormBase = wbconv,
    removeSuffix = TRUE, signifDigits = 4
) %>%
    dplyr::select(-.data$einprotProtein)
DT::datatable(
    as.data.frame(linkTable), escape = FALSE,
    filter = list(position = "top", clear = FALSE),
    extensions = "Buttons",
    options = list(scrollX = TRUE, pageLength = 20,
                   search = list(regex = FALSE, caseInsensitive = TRUE),
                   dom = "Bfrltip", buttons =
                       list(list(extend = "csv",
                                 filename = paste0(sub("\\.Rmd$", "", 
                                                       knitr::current_input()), "_linktable")),
                            list(extend = "excel", title = "",
                                 filename = paste0(sub("\\.Rmd$", "", 
                                                       knitr::current_input()), "_linktable"))))
)

Assemble SingleCellExperiment object

We assemble all the information calculated above in a SingleCellExperiment object, which can later be used e.g. for exploration with iSEE (Rue-Albrecht et al. 2018).

sce <- prepareFinalSCE(
    sce = sce, baseFileName = sub("\\.Rmd$", "", knitr::current_input()),
    featureCollections = testres$featureCollections, expType = "MaxQuant")

## Add experiment metadata
S4Vectors::metadata(sce) <- c(
    S4Vectors::metadata(sce), 
    list(
        mqFile = mqFile,
        aNames = aNames, 
        aName = aNames$assayInput,
        iColPattern = iColPattern,
        imputeMethod = imputeMethod,
        ctrlGroup = ctrlGroup,
        allPairwiseComparisons = allPairwiseComparisons,
        normMethod = normMethod,
        stattest = stattest,
        minlFC = minlFC,
        analysisDate = as.character(Sys.Date()),
        rmdFile = knitr::current_input(dir = TRUE),
        testres = testres,
        comparisonList = comparisonList
    )
)

sce

## class: SingleCellExperiment 
## dim: 2938 20 
## metadata(16): colList iSEE ... testres comparisonList
## assays(10): LFQ.intensity Intensity ... imputed_LFQ.intensity
##   log2_LFQ.intensity
## rownames(2938): sp|A0AVT1|UBA6_HUMAN sp|A0FGR8|ESYT2_HUMAN ...
##   sp|Q9Y6X9|MORC2_HUMAN sp|Q9Y6Y8|S23IP_HUMAN
## rowData names(152): Protein.IDs Majority.protein.IDs ...
##   e_vs_d_explfc0.26.log2_LFQ.intensity.e.sd
##   e_vs_d_explfc0.26.IDsForSTRING
## colnames(20):
##   B03_02_150304_human_ecoli_B_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
##   B03_03_150304_human_ecoli_C_3ul_3um_column_95_HCD_OT_2hrs_30B_9B ...
##   B03_20_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
##   B03_21_150304_human_ecoli_A_3ul_3um_column_95_HCD_OT_2hrs_30B_9B
## colData names(4): sample group nNA pNA
## reducedDimNames(0):
## mainExpName: NULL
## altExpNames(0):

Run PCA

pcafeatures <- which(rowSums(!assay(sce, aNames$assayImputIndic)) >= minNbrValidValues)

We run a principal component analysis to obtain a reduced dimensionality representation of the data, in order to visualize the samples in two dimensions. The PCA is based on features with at least 2 non-imputed values (2881 of the 2938 features). The figure below shows the sample representation in the first two principal components, the fraction of variance explained by each of the principal components, and the features with the highest positive and negative loadings in the displayed components.

interactivePCAs <- htmltools::tagList()
for (a in unique(c(aNames$assayImputed, aNames$assayBatchCorr))) {
    pcares <- doPCA(sce = sce, assayName = a, ncomponents = 10, ntop = Inf, 
                    plotpairs = list(c(1, 2)), maxTextWidthBarplot = 1.9, 
                    subset_row = pcafeatures)
    sce <- pcares$sce
    interactivePCAs[[a]] <- ggplotly(pcares$plotcoord[[1]], 
                                     width = 750, height = 500)
    for (plc in pcares$plotcombined) {
        print(plc)
    }
}

Heatmap with hierarchical clustering

For another birds-eye view of the data, we represent it using a heatmap of the (imputed and normalized) log intensities, and cluster the samples and proteins using hierarchical clustering. In the first heatmap below, the values represent the normalized log intensities directly. In the second heatmap, the values for each protein have been centered to mean 0. The latter is also exported to a pdf file with row labels for further exploration.

ht <- makeAbundanceHeatmap(sce, assayToPlot = aNames$assayImputed, doCenter = FALSE, 
                           settings = "report")
draw(ht, merge_legends = TRUE)

## Centered
ht <- makeAbundanceHeatmap(sce, assayToPlot = aNames$assayImputed, doCenter = TRUE,
                           settings = "report")
draw(ht, merge_legends = TRUE)

## Save to pdf (show row names, but no row dendrogram, order samples by group)
pdf(sub("\\.Rmd$", "_heatmap_centered.pdf", knitr::current_input(dir = TRUE)),
    width = ncol(sce)/3.3 + 3.375, height = nrow(sce)/6.33 + 4)
ht <- makeAbundanceHeatmap(sce, assayToPlot = aNames$assayImputed, doCenter = TRUE,
                           settings = "export")

## The automatically generated colors map from the minus and plus 99^th of
## the absolute values in the matrix. There are outliers in the matrix
## whose patterns might be hidden by this color mapping. You can manually
## set the color to `col` argument.
## 
## Use `suppressMessages()` to turn off this message.

draw(ht, merge_legends = TRUE, heatmap_legend_side = "bottom", 
     annotation_legend_side = "bottom")
dev.off()

Correlation plot

The plot below shows the pairwise correlation between all pairs of samples, based on the log2_LFQ.intensity assay.

plotassay <- assay(sce, aNames$assayImputed)
ggplot(data = as.data.frame(cor(plotassay)) %>%
           rownames_to_column("sample1") %>% 
           tidyr::pivot_longer(names_to = "sample2", values_to = "correlation",
                               -"sample1"), 
       aes(x = .data$sample1, y = .data$sample2, fill = .data$correlation)) +
    geom_tile(color = "grey95", linewidth = 0.1) +
    scale_fill_gradient(low = "grey95", high = "darkblue") +
    theme(axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5)) + 
    labs(x = "", y = "") + 
    scale_x_discrete(expand = c(0, 0)) + 
    scale_y_discrete(expand = c(0, 0))

Save SingleCellExperiment object

The SingleCellExperiment object created above is saved in the following location:

sceFile <- sub("\\.Rmd$", paste0("_sce.rds"), knitr::current_input(dir = TRUE))
saveRDS(sce, file = sceFile)
sceFile

## [1] "/Users/charlottesoneson/Documents/Rpackages/einprot/einprot_examples/ionstar_mq/ionstar_mq_einprot_0.7.4/ionstar_mq_einprot_0.7.4_sce.rds"

In addition, all feature information (the rowData of the SingleCellExperiment) is written to a text file:

textFile <- sub("\\.Rmd$", paste0("_feature_info.txt"), 
                knitr::current_input(dir = TRUE))
write.table(as.data.frame(rowData(sce)) %>% 
                rownames_to_column("FeatureID"), 
            file = textFile, row.names = FALSE, col.names = TRUE,
            quote = FALSE, sep = "\t")
textFile

## [1] "/Users/charlottesoneson/Documents/Rpackages/einprot/einprot_examples/ionstar_mq/ionstar_mq_einprot_0.7.4/ionstar_mq_einprot_0.7.4_feature_info.txt"

Explore the data interactively

For interactive exploration of your results, we generate a script to launch an adapted iSEE interface. The script can be sourced from an R console:

source('/Users/charlottesoneson/Documents/Rpackages/einprot/einprot_examples/ionstar_mq/ionstar_mq_einprot_0.7.4/ionstar_mq_einprot_0.7.4_iSEE.R')

That will open up an iSEE session where you can interactively explore your data.

Session info

This report was compiled with the following package versions:

Click to expand

## R version 4.3.0 (2023-04-21)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRblas.0.dylib 
## LAPACK: /Library/Frameworks/R.framework/Versions/4.3-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## time zone: Europe/Zurich
## tzcode source: internal
## 
## attached base packages:
## [1] grid      stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] einprot_0.7.4               ComplexHeatmap_2.16.0      
##  [3] plotly_4.10.2               ggalt_0.4.0                
##  [5] BiocSingular_1.16.0         scater_1.28.0              
##  [7] scuttle_1.10.1              SingleCellExperiment_1.22.0
##  [9] tibble_3.2.1                ggplot2_3.4.2              
## [11] ComplexUpset_1.3.3          dplyr_1.1.2                
## [13] htmltools_0.5.5             cowplot_1.1.1              
## [15] ExploreModelMatrix_1.12.0   limma_3.56.2               
## [17] DT_0.28                     SummarizedExperiment_1.30.2
## [19] Biobase_2.60.0              GenomicRanges_1.52.0       
## [21] GenomeInfoDb_1.36.0         IRanges_2.34.0             
## [23] S4Vectors_0.38.1            BiocGenerics_0.46.0        
## [25] MatrixGenerics_1.12.0       matrixStats_1.0.0          
## [27] STRINGdb_2.12.0            
## 
## loaded via a namespace (and not attached):
##   [1] ProtGenerics_1.32.0         bitops_1.0-7               
##   [3] webshot_0.5.5               httr_1.4.6                 
##   [5] ash_1.0-15                  RColorBrewer_1.1-3         
##   [7] doParallel_1.0.17           tools_4.3.0                
##   [9] utf8_1.2.3                  R6_2.5.1                   
##  [11] lazyeval_0.2.2              mgcv_1.8-42                
##  [13] GetoptLong_1.0.5            withr_2.5.0                
##  [15] iSEEhex_1.2.0               GGally_2.1.2               
##  [17] gridExtra_2.3               cli_3.6.1                  
##  [19] Cairo_1.6-0                 shinyjs_2.1.0              
##  [21] sandwich_3.0-2              labeling_0.4.2             
##  [23] sass_0.4.6                  robustbase_0.99-0          
##  [25] mvtnorm_1.2-2               readr_2.1.4                
##  [27] genefilter_1.82.1           systemfonts_1.0.4          
##  [29] svglite_2.1.1               stringdist_0.9.10          
##  [31] rrcov_1.7-4                 plotrix_3.8-2              
##  [33] maps_3.4.1                  rstudioapi_0.14            
##  [35] impute_1.74.1               RSQLite_2.3.1              
##  [37] generics_0.1.3              shape_1.4.6                
##  [39] crosstalk_1.2.0             gtools_3.9.4               
##  [41] Matrix_1.5-4.1              ggbeeswarm_0.7.2           
##  [43] fansi_1.0.4                 imputeLCMD_2.1             
##  [45] lifecycle_1.0.3             yaml_2.3.7                 
##  [47] iSEEu_1.12.0                gplots_3.1.3               
##  [49] blob_1.2.4                  promises_1.2.0.1           
##  [51] crayon_1.5.2                shinydashboard_0.7.2       
##  [53] miniUI_0.1.1.1              lattice_0.21-8             
##  [55] msigdbr_7.5.1               beachmat_2.16.0            
##  [57] annotate_1.78.0             KEGGREST_1.40.0            
##  [59] magick_2.7.4                pillar_1.9.0               
##  [61] knitr_1.43.1                rjson_0.2.21               
##  [63] codetools_0.2-19            glue_1.6.2                 
##  [65] ggiraph_0.8.7               pcaMethods_1.92.0          
##  [67] data.table_1.14.8           MultiAssayExperiment_1.26.0
##  [69] vctrs_0.6.3                 png_0.1-8                  
##  [71] gtable_0.3.3                gsubfn_0.7                 
##  [73] cachem_1.0.8                xfun_0.39                  
##  [75] S4Arrays_1.0.4              mime_0.12                  
##  [77] pcaPP_2.0-3                 survival_3.5-5             
##  [79] rrcovNA_0.5-0               iterators_1.0.14           
##  [81] gmm_1.8                     iSEE_2.12.0                
##  [83] ellipsis_0.3.2              nlme_3.1-162               
##  [85] bit64_4.0.5                 bslib_0.5.0                
##  [87] tmvtnorm_1.5                irlba_2.3.5.1              
##  [89] vipor_0.4.5                 KernSmooth_2.23-21         
##  [91] colorspace_2.1-0            DBI_1.1.3                  
##  [93] proDA_1.14.0                tidyselect_1.2.0           
##  [95] bit_4.0.5                   compiler_4.3.0             
##  [97] extrafontdb_1.0             chron_2.3-61               
##  [99] rvest_1.0.3                 BiocNeighbors_1.18.0       
## [101] xml2_1.3.4                  DelayedArray_0.26.3        
## [103] colourpicker_1.2.0          scales_1.2.1               
## [105] caTools_1.18.2              DEoptimR_1.0-14            
## [107] proj4_1.0-12                hexbin_1.28.3              
## [109] stringr_1.5.0               digest_0.6.31              
## [111] rmarkdown_2.23              XVector_0.40.0             
## [113] pkgconfig_2.0.3             extrafont_0.19             
## [115] sparseMatrixStats_1.12.0    highr_0.10                 
## [117] fastmap_1.1.1               rlang_1.1.1                
## [119] GlobalOptions_0.1.2         htmlwidgets_1.6.2          
## [121] shiny_1.7.4                 DelayedMatrixStats_1.22.0  
## [123] farver_2.1.1                jquerylib_0.1.4            
## [125] zoo_1.8-12                  jsonlite_1.8.7             
## [127] mclust_6.0.0                BiocParallel_1.34.2        
## [129] RCurl_1.98-1.12             magrittr_2.0.3             
## [131] kableExtra_1.3.4            GenomeInfoDbData_1.2.10    
## [133] patchwork_1.1.2             munsell_0.5.0              
## [135] Rcpp_1.0.10                 babelgene_22.9             
## [137] viridis_0.6.3               proto_1.0.0                
## [139] MsCoreUtils_1.13.1          sqldf_0.4-11               
## [141] stringi_1.7.12              rintrojs_0.3.2             
## [143] zlibbioc_1.46.0             MASS_7.3-60                
## [145] plyr_1.8.8                  ggseqlogo_0.1              
## [147] parallel_4.3.0              ggrepel_0.9.3              
## [149] forcats_1.0.0               Biostrings_2.68.1          
## [151] splines_4.3.0               hash_2.2.6.2               
## [153] hms_1.1.3                   circlize_0.4.15            
## [155] igraph_1.5.0                uuid_1.1-0                 
## [157] QFeatures_1.10.0            ScaledMatrix_1.8.1         
## [159] XML_3.99-0.14               evaluate_0.21              
## [161] tzdb_0.4.0                  foreach_1.5.2              
## [163] httpuv_1.6.11               Rttf2pt1_1.3.12            
## [165] tidyr_1.3.0                 purrr_1.0.1                
## [167] reshape_0.8.9               clue_0.3-64                
## [169] norm_1.0-11.1               rsvd_1.0.5                 
## [171] xtable_1.8-4                AnnotationFilter_1.24.0    
## [173] later_1.3.1                 viridisLite_0.4.2          
## [175] memoise_2.0.1               beeswarm_0.4.0             
## [177] AnnotationDbi_1.62.1        writexl_1.4.2              
## [179] cluster_2.1.4               shinyWidgets_0.7.6         
## [181] shinyAce_0.4.2

References

Cox, J, and M Mann. 2008. “MaxQuant Enables High Peptide Identification Rates, Individualized p.p.b.-Range Mass Accuracies and Proteome-Wide Protein Quantification.” Nature Biotechnology 26 (12): 1367–72. https://www.nature.com/articles/nbt.1511.

Law, CW, K Zeglinski, X Dong, M Alhamdoosh, GK Smyth, and ME Ritchie. 2020. “A Guide to Creating Design Matrices for Gene Expression Experiments.” F1000Research 9: 1444. https://f1000research.com/articles/9-1444.

Phipson, B, S Lee, IJ Majewski, WS Alexander, and GK Smyth. 2016. “Robust Hyperparameter Estimation Protects Against Hypervariable Genes and Improves Power to Detect Differential Expression.” Annals of Applied Statistics 10 (2): 946–63. http://projecteuclid.org/euclid.aoas/1469199900.

Ritchie, ME, B Phipson, D Wu, Y Hu, CW Law, W Shi, and GK Smyth. 2015. “Limma Powers Differential Expression Analyses for RNA-sequencing and Microarray Studies.” Nucleic Acids Research 43 (7): e47. https://academic.oup.com/nar/article/43/7/e47/2414268.

Rue-Albrecht, Kevin, Federico Marini, Charlotte Soneson, and Aaron T L Lun. 2018. “iSEE: Interactive SummarizedExperiment Explorer.” F1000Res. 7: 741.

Soneson, C, F Marini, F Geier, MI Love, and MB Stadler. 2020. “ExploreModelMatrix: Interactive Exploration for Improved Understanding of Design Matrices and Linear Models in R.” F1000Research 9: 512. https://f1000research.com/articles/9-512.

Szklarczyk, D, AL Gable, KC Nastou, D Lyon, R Kirsch, S Pyysalo, NT Doncheva, et al. 2021. “The STRING Database in 2021: Customizable Protein-Protein Networks, and Functional Characterization of User-Uploaded Gene/Measurement Sets.” Nucleic Acids Research 49 (D1): D605–12. https://academic.oup.com/nar/article/49/D1/D605/6006194.

Wu, D, and GK Smyth. 2012. “Camera: A Competitive Gene Set Test Accounting for Inter-Gene Correlation.” Nucleic Acids Research 40 (17): e133. https://academic.oup.com/nar/article/40/17/e133/2411151.

IonStar Human/E.coli spike-in

MaxQuant/LFQ analysis

2023-07-05 15:49:04 CEST (einprot v0.7.4)

Experiment details

MaxQuant analysis summary

Settings

Read MaxQuant output

Add sample annotations

Overview of the workflow

Overall distribution of protein intensities

Filter out contaminants, reverse hits, and proteins with low confidence

Modify feature names

Prepare feature collections for later testing

Apply a log2 transformation

Visualize missing value patterns

Imputation

Overall distribution of log2 protein intensities

Statistical testing

Comparisons and design

b_vs_a_explfc0.58

c_vs_a_explfc1

e_vs_d_explfc0.26

SA plots

Volcano plots

b_vs_a_explfc0.58

c_vs_a_explfc1

e_vs_d_explfc0.26

Interactive volcano plots

Result export

Overlap among sets of significant features

Most significant feature sets

b_vs_a_explfc0.58

c_vs_a_explfc1

e_vs_d_explfc0.26

Table with direct database links to sequences, functional information and predicted structures

Assemble SingleCellExperiment object

Run PCA

Heatmap with hierarchical clustering

Correlation plot

Save SingleCellExperiment object

Explore the data interactively

Session info

References