diff --git a/chromatoR/app.R b/chromatoR/app.R index 390fdcf..29cb029 100644 --- a/chromatoR/app.R +++ b/chromatoR/app.R @@ -1,25 +1,26 @@ -########################################################## -# # -# This app has been written by Marcel Costa-García, # -# and has reused some internal functions from sangerseqR # -# package, including getPeaks and peakvalues # -# # -########################################################## +############################################################ +# # +# This app has been written by Marcel Costa-García, # +# and has reused some internal functions from sangerseqR # +# package, including getPeaks and peakvalues # +# # +############################################################ +# Load of dependencies library(shiny) library(tidyverse) library(sangerseqR) library(msa) -# Define UI for application that draws a histogram -ui <- fluidPage( +# Define UI --------------------------------------------------------------- +ui <- fluidPage( # Application title titlePanel("ChromatoR"), - # Sidebar with a slider input for number of bins - navbarPage("ChromatoR", + navbarPage("Apps", + tabPanel("Detección de Bases", sidebarPanel( fileInput("file1", "Sube fichero ab1", multiple = FALSE), @@ -31,14 +32,13 @@ ui <- fluidPage( textInput("old", label="Referencia"), actionButton("calab", "Analizar") ), - - # Show a plot of the generated distribution mainPanel( tags$head(tags$style(HTML("pre,.wrap { white-space: pre-wrap; word-break: break-all; }"))), verbatimTextOutput("Primseq") %>% tagAppendAttributes(class="wrap"), verbatimTextOutput("align") ) ), + tabPanel("Visor de Cromatograma", sidebarPanel( numericInput("visStart", label="Inicio", value=0), @@ -55,29 +55,28 @@ ui <- fluidPage( ) ) -# Define server logic required to draw a histogram + +# Define server ----------------------------------------------------------- + server <- function(input, output) { - # observe({ - # - # obj<<-readsangerseq(input$file1) - # } - # }) + + # Define and initialize reactive variables obj<-reactiveValues() obj$aborig<-NULL obj_ab<-NULL obj$seq<-NULL + # File Input observe({ - if (!is.null(input$file1)){ + if (!is.null(input$file1)){ # This ensures that the reading is only tried when File selected obj$aborig<-readsangerseq(input$file1$datapath) } }) + # This generates the threshold selector calculating the median of the peaks output$thr<-renderUI({ - if (!is.null(obj$aborig)){ - print(1) + if (!is.null(obj$aborig)){ # Only when we have a sangerseq object obj_ab<-obj$aborig - ## Functions getpeaks <- function(trace) { r <- rle(trace) indexes <- which(rep(diff(sign(diff(c(-Inf, r$values, -Inf)))) == -2, @@ -94,15 +93,19 @@ server <- function(input, output) { Apeaks, Tpeaks, Cpeaks) - thr_calc<-quantile(peakCusMatrix[,2], 0.5) + + thr_calc<-quantile(peakCusMatrix[,2], 0.5) # Percentile 50 + numericInput("thr", label = "Peak Threshold", value = thr_calc) } }) + # Peak detection and Base asignation observeEvent(input$calab, { if (!is.null(obj$aborig)){ - obj_ab<-obj$aborig - ## Functions + obj_ab<-obj$aborig # We will work in a local variable + + ## Function definition getpeaks <- function(trace) { r <- rle(trace) indexes <- which(rep(diff(sign(diff(c(-Inf, r$values, -Inf)))) == -2, @@ -131,34 +134,40 @@ server <- function(input, output) { } return(pos) } + + # Progress notification progress <- shiny::Progress$new(min=0, max=3) progress$set(message = "Start", value = 1) print("Start") + # Peak detection for each channel Apeaks <- getpeaks(obj_ab@traceMatrix[,1]) Cpeaks <- getpeaks(obj_ab@traceMatrix[,2]) Gpeaks <- getpeaks(obj_ab@traceMatrix[,3]) Tpeaks <- getpeaks(obj_ab@traceMatrix[,4]) + + # Peaks joining and filtering by defined threshold peakthr<-input$thr peakCusMatrix<-rbind(Gpeaks[Gpeaks[,2] > peakthr,], Apeaks[Apeaks[,2] > peakthr,], Tpeaks[Tpeaks[,2] > peakthr,], Cpeaks[Cpeaks[,2] > peakthr,]) - peakCusMatrix<-peakCusMatrix %>% as.data.frame() %>% dplyr::rename(Int=V2) %>% group_by(indexes) %>% summarise(Int=max(Int)[1]) + # In case there is more than one peak by position, we keep the higher + peakCusMatrix<-peakCusMatrix %>% as.data.frame() %>% + dplyr::rename(Int=V2) %>% group_by(indexes) %>% summarise(Int=max(Int)[1]) peakCusMatrix<-as.matrix(peakCusMatrix) + # Using the custom function, we aggregate peaks defining two distance thresholds pos1<-redDiffs(peakCusMatrix, input$dist1) - pos<-redDiffs(pos1, input$dist2) - print(nrow(pos)) - - pos1<-pos1[,1] - pos<-pos[,1] - + pos<-redDiffs(pos1, input$dist2)[,1] + progress$set(message = "Peaks Detected", value = 2) print("Peaks Detected") + # This is taken from sangerseqR + # Defining starts and stops of peak windows primarypeaks <- pos diffs <- diff(c(0,primarypeaks)) starts <- primarypeaks - 0.5*diffs @@ -172,11 +181,12 @@ server <- function(input, output) { tempPosMatrix <- matrix(nrow=length(starts), ncol=4) tempAmpMatrix <- matrix(nrow=length(starts), ncol=4) + # Ratio min to include a second peak as secondary sequence ratio<-input$ratio - length(starts) - + # Base detection for each peak for(i in 1:length(starts)) { + # Detection of peak position and signal for each peak window Apeak <- peakvalues(Apeaks, starts[i], stops[i]) Cpeak <- peakvalues(Cpeaks, starts[i], stops[i]) Gpeak <- peakvalues(Gpeaks, starts[i], stops[i]) @@ -185,42 +195,59 @@ server <- function(input, output) { is.na(Cpeak[2]) & is.na(Gpeak[2]) & is.na(Tpeak[2])) next #rare case where no peak found + + # Signal of each channel join signals <- c(Apeak[1], Cpeak[1], Gpeak[1], Tpeak[1]) tempAmpMatrix[i,] <- signals + + # Peak position of each channel join positions <- c(Apeak[2], Cpeak[2], Gpeak[2], Tpeak[2]) tempPosMatrix[i,] <- positions + + # Ratio to the maximum by channel signalratios <- signals/max(signals, na.rm=TRUE) + + # Channel order definition Bases <- c("A", "C", "G", "T") - # Bases <- c("G", "A", "T", "G") + + # Discarding bases (same order as channel) that doesn't reach the minimum ratio to max Bases[signalratios < ratio] <- NA - #sort by decreasing signal strength + + # Sort by decreasing signal strength Bases <- Bases[order(signals, decreasing=TRUE)] + + # Definition of primary base and secondary base positions <- positions[order(signals, decreasing=TRUE)] - if(length(Bases[!is.na(Bases)]) == 4 + if(length(Bases[!is.na(Bases)]) == 4 #if there are 4 bases or none, "N" assignation | length(Bases[!is.na(Bases)]) == 0) { primary <- c(primary, "N") secondary <- c(secondary, "N") } - else if(length(Bases[!is.na(Bases)]) > 1) { + else if(length(Bases[!is.na(Bases)]) > 1) { #if there is more than 1 base primary <- c(primary, Bases[1]) Bases2 <- Bases[2:4] secondary <- c(secondary, mergeIUPACLetters(paste(sort(Bases2[!is.na(Bases2)]), collapse=""))) } - else { + else { #if there is only one base, primary and secondary are the same primary <- c(primary, Bases[1]) secondary <- c(secondary, Bases[1]) } } + # Redefining sangerseq object with the new sequence and matrices obj_ab@peakPosMatrix <- tempPosMatrix[rowSums(!is.na(tempPosMatrix)) > 0,] obj_ab@peakAmpMatrix <- tempAmpMatrix[rowSums(!is.na(tempPosMatrix)) > 0,] obj_ab@primarySeqID <- "sangerseq package primary basecalls" obj_ab@primarySeq <- DNAString(paste(primary, collapse="")) obj_ab@secondarySeqID <- "sangerseq package secondary basecalls" obj_ab@secondarySeq <- DNAString(paste(secondary, collapse="")) + + # We convert obj_ab into a global variable that other functions will be able to use obj_ab<<-obj_ab + + # This is a triggering for other observing events if (is.null(obj$seq)){ obj$seq<-1 }else{ @@ -234,25 +261,26 @@ server <- function(input, output) { } }) + # Predicted sequence printing output$Primseq <- renderText({ - observeEvent(obj$seq, {}) + observeEvent(obj$seq, {}) #This is triggered when the sequence is predicted if (!is.null(obj$seq)){ - print(1) as.character(obj_ab@primarySeq) } }) + # Alignament with a reference sequence output$align <- renderText({ - observeEvent(obj$seq, {}) + observeEvent(obj$seq, {}) #This is triggered when the sequence is predicted - if (!is.null(obj$seq) & input$old != ""){ - print(2) + if (!is.null(obj$seq) & input$old != ""){ #Only if there is a predicted sequence AND a reference sequence defined by the user alignament<-msaClustalW(c("Reference"=toupper(input$old),"Predicted"=obj_ab@primarySeq %>% as.character), type="dna") paste(capture.output(print(alignament, show="complete")), collapse="\n") } }) + # This is a minimal and maximal filter definition for the Visor Table output$tabmin<-renderUI({ if (!is.null(obj$seq) & input$old != ""){ numericInput("tabmin_num", label = "Mínimo en tabla", value = 0) @@ -265,46 +293,56 @@ server <- function(input, output) { } }) + # A table showing the predicted sequence discrepancies with a reference sequenc. output$visTab <- renderTable({ observeEvent(obj$seq, {}) - if (!is.null(obj$seq) & input$old != "" & !is.null(input$tabmin_num)){ - print(3) + if (!is.null(obj$seq) & input$old != "" & !is.null(input$tabmin_num)){ #Only if there is a predicted sequence,a reference sequence defined by the user AND the min and max filters have been constructed alignament<-msaClustalW(c("Reference"=toupper(input$old),"Predicted"=obj_ab@primarySeq %>% as.character), type="dna") + # We get each aligned sequence separately pred<-(alignament@unmasked[2] %>% as.character() %>% strsplit(""))[[1]] ref<-(alignament@unmasked[1] %>% as.character() %>% strsplit(""))[[1]] - cons<-pred == ref + cons<-pred == ref # T or F vector - aln<-data.frame(pred, ref, cons, PosAln=1:length(pred)) + aln<-data.frame(pred, ref, cons, PosAln=1:length(pred)) #We generate the Alignament Index aln<-mutate(aln, cons=case_when( cons == T~"", TRUE~"?" )) - aln %>% filter(pred != "-") %>% add_column("PosChrom"=1:nrow(.)) %>% - merge(aln, all=T) %>% arrange(PosAln) %>% add_column("PosChromAnterior"=c(NA,.$PosChrom[1:(nrow(.)-1)])) %>% + aln %>% filter(pred != "-") %>% add_column("PosChrom"=1:nrow(.)) %>% #We generate the Chromatogram Index + merge(aln, all=T) %>% arrange(PosAln) %>% + add_column("PosChromAnterior"=c(NA,.$PosChrom[1:(nrow(.)-1)])) %>% #After merging, we assign in new column the previous index filter(cons == "?") %>% filter(PosAln >= input$tabmin_num & PosAln <= input$tabmax_num) } }) + # Function to generate a plot object of the chromatogram with the peaks highlighted and the predicted sequence plotvis<-eventReactive( input$butvis, { if (!is.null(obj$seq)){ print("Inicio imagen") + + # Definition of the width (number of bases), min (start base index) and max (end base index) of the chromatogram width<-input$visWidth min<-input$visStart max<-min+width + # Defining the bases to be plotted let<-obj_ab@primarySeq %>% as.character %>% strsplit("") - peakpos<-apply(obj_ab@peakAmpMatrix,1,function(x) which(x == max(x,na.rm=T))[1]) - picks_x<-sapply(1:length(peakpos), function(i) obj_ab@peakPosMatrix[i, peakpos[i]]) + # Defining the peak position + peakpos<-apply(obj_ab@peakAmpMatrix,1,function(x) which(x == max(x,na.rm=T))[1]) #Getting the column position with the maximum signal by row + picks_x<-sapply(1:length(peakpos), function(i) obj_ab@peakPosMatrix[i, peakpos[i]]) #Obtaining the position of the peaks defined by the previous line + + # Construction of a data.frame with peak position, base letter and base index picks<-data.frame(rows=picks_x, Base=let[[1]], num=1:length(let[[1]])) %>% - filter(num >= min & num <= max) #%>% mutate(rows=rows-min) + filter(num >= min & num <= max) ranPeaks<-range(picks$rows) - obj$plot<-obj_ab@traceMatrix %>% as.data.frame() %>% dplyr::rename(A=V1,C=V2,G=V3,T=V4) %>% add_column(rows=1:nrow(.)) %>% + obj$plot<-obj_ab@traceMatrix %>% as.data.frame() %>% + dplyr::rename(A=V1,C=V2,G=V3,T=V4) %>% add_column(rows=1:nrow(.)) %>% filter(rows >= ranPeaks[1] & rows <= ranPeaks[2]) %>% gather(Base, Value, -rows) %>% mutate(Base=factor(Base, levels=c("G","A","T","C"))) %>% ggplot(aes(rows, Value))+ @@ -319,6 +357,7 @@ server <- function(input, output) { } }) + # Render of the chromatogram plot output$visor <- renderPlot({ observeEvent(input$butvis, {plotvis()}) obj$plot