Explicant i comentant l'script.

3 years ago · 6fcf75f887
1 changed files with 94 additions and 55 deletions
--- a/chromatoR/app.R
+++ b/chromatoR/app.R
@ -1,25 +1,26 @@
 ##########################################################
 #                                                        #
 # This app has been written by Marcel Costa-García,      #
 # and has reused some internal functions from sangerseqR #
 # package, including getPeaks and peakvalues             #
 #                                                        #
 ##########################################################
 ############################################################
 #                                                          #
 #  This app has been written by Marcel Costa-García,       #
 #  and has reused some internal functions from sangerseqR  #
 #  package, including getPeaks and peakvalues              #
 #                                                          #
 ############################################################

 # Load of dependencies

 library(shiny)
 library(tidyverse)
 library(sangerseqR)
 library(msa)

 # Define UI for application that draws a histogram
 ui <- fluidPage(
 # Define UI ---------------------------------------------------------------

 ui <- fluidPage(
    # Application title
    titlePanel("ChromatoR"),

    # Sidebar with a slider input for number of bins 
    navbarPage("ChromatoR",
    navbarPage("Apps",
               
        tabPanel("Detección de Bases",
          sidebarPanel(
            fileInput("file1", "Sube fichero ab1", multiple = FALSE),
@ -31,14 +32,13 @@ ui <- fluidPage(
            textInput("old", label="Referencia"),
            actionButton("calab", "Analizar")
          ),
  
          # Show a plot of the generated distribution
          mainPanel(
            tags$head(tags$style(HTML("pre,.wrap { white-space: pre-wrap; word-break: break-all; }"))),
            verbatimTextOutput("Primseq") %>% tagAppendAttributes(class="wrap"),
            verbatimTextOutput("align")
          )
      ),
      
      tabPanel("Visor de Cromatograma",
          sidebarPanel(
            numericInput("visStart", label="Inicio", value=0),
@ -55,29 +55,28 @@ ui <- fluidPage(
    )
 )

 # Define server logic required to draw a histogram

 # Define server -----------------------------------------------------------

 server <- function(input, output) {
    # observe({
    #   
    #     obj<<-readsangerseq(input$file1)
    #   }
    # })

  # Define and initialize reactive variables
  obj<-reactiveValues()
  obj$aborig<-NULL
  obj_ab<-NULL
  obj$seq<-NULL
  
  # File Input
  observe({
    if (!is.null(input$file1)){
    if (!is.null(input$file1)){ # This ensures that the reading is only tried when File selected
      obj$aborig<-readsangerseq(input$file1$datapath)
    }
  })
  
  # This generates the threshold selector calculating the median of the peaks
  output$thr<-renderUI({
    if (!is.null(obj$aborig)){
      print(1)
    if (!is.null(obj$aborig)){ # Only when we have a sangerseq object
      obj_ab<-obj$aborig
      ## Functions
      getpeaks <- function(trace) {
        r <- rle(trace)
        indexes <- which(rep(diff(sign(diff(c(-Inf, r$values, -Inf)))) == -2,
@ -94,15 +93,19 @@ server <- function(input, output) {
                           Apeaks, 
                           Tpeaks, 
                           Cpeaks)
      thr_calc<-quantile(peakCusMatrix[,2], 0.5)
      
      thr_calc<-quantile(peakCusMatrix[,2], 0.5) # Percentile 50
      
      numericInput("thr", label = "Peak Threshold", value = thr_calc)
    }
  })
  
  # Peak detection and Base asignation
  observeEvent(input$calab, {
    if (!is.null(obj$aborig)){
      obj_ab<-obj$aborig
      ## Functions
      obj_ab<-obj$aborig # We will work in a local variable
      
      ## Function definition
      getpeaks <- function(trace) {
        r <- rle(trace)
        indexes <- which(rep(diff(sign(diff(c(-Inf, r$values, -Inf)))) == -2,
@ -131,34 +134,40 @@ server <- function(input, output) {
        }
        return(pos)
      }
      
      # Progress notification
      progress <- shiny::Progress$new(min=0, max=3)
      progress$set(message = "Start", value = 1)
      print("Start")
      
      # Peak detection for each channel
      Apeaks <- getpeaks(obj_ab@traceMatrix[,1])
      Cpeaks <- getpeaks(obj_ab@traceMatrix[,2])
      Gpeaks <- getpeaks(obj_ab@traceMatrix[,3])
      Tpeaks <- getpeaks(obj_ab@traceMatrix[,4])
      
      
      # Peaks joining and filtering by defined threshold
      peakthr<-input$thr
      peakCusMatrix<-rbind(Gpeaks[Gpeaks[,2] > peakthr,], 
                           Apeaks[Apeaks[,2] > peakthr,], 
                           Tpeaks[Tpeaks[,2] > peakthr,], 
                           Cpeaks[Cpeaks[,2] > peakthr,])
      
      peakCusMatrix<-peakCusMatrix %>% as.data.frame() %>% dplyr::rename(Int=V2) %>% group_by(indexes) %>% summarise(Int=max(Int)[1])
      # In case there is more than one peak by position, we keep the higher
      peakCusMatrix<-peakCusMatrix %>% as.data.frame() %>% 
        dplyr::rename(Int=V2) %>% group_by(indexes) %>% summarise(Int=max(Int)[1])
      peakCusMatrix<-as.matrix(peakCusMatrix)

      # Using the custom function, we aggregate peaks defining two distance thresholds
      pos1<-redDiffs(peakCusMatrix, input$dist1)
      pos<-redDiffs(pos1, input$dist2)
      print(nrow(pos))
      
      pos1<-pos1[,1]
      pos<-pos[,1]
      
      pos<-redDiffs(pos1, input$dist2)[,1]

      progress$set(message = "Peaks Detected", value = 2)
      print("Peaks Detected")
      
      # This is taken from sangerseqR
      # Defining starts and stops of peak windows
      primarypeaks <- pos
      diffs <- diff(c(0,primarypeaks))
      starts <- primarypeaks - 0.5*diffs
@ -172,11 +181,12 @@ server <- function(input, output) {
      tempPosMatrix <- matrix(nrow=length(starts), ncol=4)
      tempAmpMatrix <- matrix(nrow=length(starts), ncol=4)
      
      # Ratio min to include a second peak as secondary sequence
      ratio<-input$ratio
      
      length(starts)
      
      # Base detection for each peak
      for(i in 1:length(starts)) {
        # Detection of peak position and signal for each peak window
        Apeak <- peakvalues(Apeaks, starts[i], stops[i])
        Cpeak <- peakvalues(Cpeaks, starts[i], stops[i])
        Gpeak <- peakvalues(Gpeaks, starts[i], stops[i])
@ -185,42 +195,59 @@ server <- function(input, output) {
           is.na(Cpeak[2]) & 
           is.na(Gpeak[2]) & 
           is.na(Tpeak[2])) next #rare case where no peak found 
        
        # Signal of each channel join
        signals <- c(Apeak[1], Cpeak[1], Gpeak[1], Tpeak[1])
        tempAmpMatrix[i,] <- signals
        
        # Peak position of each channel join
        positions <- c(Apeak[2], Cpeak[2], Gpeak[2], Tpeak[2])
        tempPosMatrix[i,] <- positions
        
        # Ratio to the maximum by channel
        signalratios <- signals/max(signals, na.rm=TRUE)
        
        # Channel order definition
        Bases <- c("A", "C", "G", "T")
        # Bases <- c("G", "A", "T", "G")
        
        # Discarding bases (same order as channel) that doesn't reach the minimum ratio to max
        Bases[signalratios < ratio] <- NA
        #sort by decreasing signal strength
        
        # Sort by decreasing signal strength
        Bases <- Bases[order(signals, decreasing=TRUE)] 
        
        # Definition of primary base and secondary base
        positions <- positions[order(signals, decreasing=TRUE)]
        if(length(Bases[!is.na(Bases)]) == 4 
        if(length(Bases[!is.na(Bases)]) == 4 #if there are 4 bases or none, "N" assignation
           | length(Bases[!is.na(Bases)]) == 0) {
          primary <- c(primary, "N")
          secondary <- c(secondary, "N")
        }
        else if(length(Bases[!is.na(Bases)]) > 1) {
        else if(length(Bases[!is.na(Bases)]) > 1) { #if there is more than 1 base
          primary <- c(primary, Bases[1]) 
          Bases2 <- Bases[2:4]
          secondary <- c(secondary, 
                         mergeIUPACLetters(paste(sort(Bases2[!is.na(Bases2)]), 
                                                 collapse="")))
        }
        else {
        else { #if there is only one base, primary and secondary are the same
          primary <- c(primary, Bases[1])
          secondary <- c(secondary, Bases[1])
        }
      }  
      
      # Redefining sangerseq object with the new sequence and matrices
      obj_ab@peakPosMatrix <- tempPosMatrix[rowSums(!is.na(tempPosMatrix)) > 0,]
      obj_ab@peakAmpMatrix <- tempAmpMatrix[rowSums(!is.na(tempPosMatrix)) > 0,]
      obj_ab@primarySeqID <- "sangerseq package primary basecalls"
      obj_ab@primarySeq <- DNAString(paste(primary, collapse=""))
      obj_ab@secondarySeqID <- "sangerseq package secondary basecalls"
      obj_ab@secondarySeq <- DNAString(paste(secondary, collapse=""))
      
      # We convert obj_ab into a global variable that other functions will be able to use
      obj_ab<<-obj_ab
      
      # This is a triggering for other observing events
      if (is.null(obj$seq)){
        obj$seq<-1
      }else{
@ -234,25 +261,26 @@ server <- function(input, output) {
    }
  })
  
  # Predicted sequence printing
  output$Primseq <- renderText({
    observeEvent(obj$seq, {})
    observeEvent(obj$seq, {}) #This is triggered when the sequence is predicted

    if (!is.null(obj$seq)){
      print(1)
      as.character(obj_ab@primarySeq)
    }
  })
  
  # Alignament with a reference sequence
  output$align <- renderText({
    observeEvent(obj$seq, {})
    observeEvent(obj$seq, {}) #This is triggered when the sequence is predicted
    
    if (!is.null(obj$seq) & input$old != ""){
      print(2)
    if (!is.null(obj$seq) & input$old != ""){ #Only if there is a predicted sequence AND a reference sequence defined by the user
      alignament<-msaClustalW(c("Reference"=toupper(input$old),"Predicted"=obj_ab@primarySeq %>% as.character), type="dna")
      paste(capture.output(print(alignament, show="complete")), collapse="\n")
    }
  })
  
  # This is a minimal and maximal filter definition for the Visor Table
  output$tabmin<-renderUI({
    if (!is.null(obj$seq) & input$old != ""){
      numericInput("tabmin_num", label = "Mínimo en tabla", value = 0)
@ -265,46 +293,56 @@ server <- function(input, output) {
    }
  })
  
  # A table showing the predicted sequence discrepancies with a reference sequenc.
  output$visTab <- renderTable({
    observeEvent(obj$seq, {})
    
    if (!is.null(obj$seq) & input$old != "" & !is.null(input$tabmin_num)){
      print(3)
    if (!is.null(obj$seq) & input$old != "" & !is.null(input$tabmin_num)){ #Only if there is a predicted sequence,a reference sequence defined by the user AND the min and max filters have been constructed
      alignament<-msaClustalW(c("Reference"=toupper(input$old),"Predicted"=obj_ab@primarySeq %>% as.character), type="dna")
      
      # We get each aligned sequence separately
      pred<-(alignament@unmasked[2] %>% as.character() %>% strsplit(""))[[1]]
      ref<-(alignament@unmasked[1] %>% as.character() %>% strsplit(""))[[1]]
      cons<-pred == ref
      cons<-pred == ref # T or F vector
      
      aln<-data.frame(pred, ref, cons, PosAln=1:length(pred))
      aln<-data.frame(pred, ref, cons, PosAln=1:length(pred)) #We generate the Alignament Index
      aln<-mutate(aln, cons=case_when(
        cons == T~"",
        TRUE~"?"
      ))
      aln %>% filter(pred != "-") %>% add_column("PosChrom"=1:nrow(.)) %>% 
        merge(aln, all=T) %>% arrange(PosAln) %>% add_column("PosChromAnterior"=c(NA,.$PosChrom[1:(nrow(.)-1)])) %>% 
      aln %>% filter(pred != "-") %>% add_column("PosChrom"=1:nrow(.)) %>% #We generate the Chromatogram Index
        merge(aln, all=T) %>% arrange(PosAln) %>% 
        add_column("PosChromAnterior"=c(NA,.$PosChrom[1:(nrow(.)-1)])) %>% #After merging, we assign in new column the previous index
        filter(cons == "?") %>% 
        filter(PosAln >= input$tabmin_num & PosAln <= input$tabmax_num)
    }
  })
  
  # Function to generate a plot object of the chromatogram with the peaks highlighted and the predicted sequence
  plotvis<-eventReactive( input$butvis, {
    if (!is.null(obj$seq)){
      print("Inicio imagen")
      
      # Definition of the width (number of bases), min (start base index) and max (end base index) of the chromatogram
      width<-input$visWidth
      min<-input$visStart
      max<-min+width
      
      # Defining the bases to be plotted
      let<-obj_ab@primarySeq %>% as.character %>% strsplit("")
      peakpos<-apply(obj_ab@peakAmpMatrix,1,function(x) which(x == max(x,na.rm=T))[1])
      picks_x<-sapply(1:length(peakpos), function(i) obj_ab@peakPosMatrix[i, peakpos[i]])
      
      # Defining the peak position
      peakpos<-apply(obj_ab@peakAmpMatrix,1,function(x) which(x == max(x,na.rm=T))[1]) #Getting the column position with the maximum signal by row
      picks_x<-sapply(1:length(peakpos), function(i) obj_ab@peakPosMatrix[i, peakpos[i]]) #Obtaining the position of the peaks defined by the previous line
      
      # Construction of a data.frame with peak position, base letter and base index
      picks<-data.frame(rows=picks_x, Base=let[[1]], num=1:length(let[[1]])) %>%
        filter(num >= min & num <= max) #%>% mutate(rows=rows-min)
        filter(num >= min & num <= max)
      
      ranPeaks<-range(picks$rows)
      
      obj$plot<-obj_ab@traceMatrix %>% as.data.frame() %>% dplyr::rename(A=V1,C=V2,G=V3,T=V4) %>% add_column(rows=1:nrow(.)) %>% 
      obj$plot<-obj_ab@traceMatrix %>% as.data.frame() %>% 
        dplyr::rename(A=V1,C=V2,G=V3,T=V4) %>% add_column(rows=1:nrow(.)) %>% 
        filter(rows >= ranPeaks[1] & rows <= ranPeaks[2]) %>% gather(Base, Value, -rows) %>% 
        mutate(Base=factor(Base, levels=c("G","A","T","C"))) %>% 
        ggplot(aes(rows, Value))+
@ -319,6 +357,7 @@ server <- function(input, output) {
    }
  })
  
  # Render of the chromatogram plot
  output$visor <- renderPlot({
    observeEvent(input$butvis, {plotvis()})
    obj$plot