@ -1,25 +1,26 @@
##########################################################
# #
# This app has been written by Marcel Costa-García, #
# and has reused some internal functions from sangerseqR #
# package, including getPeaks and peakvalues #
# #
##########################################################
############################################################
# #
# This app has been written by Marcel Costa-García, #
# and has reused some internal functions from sangerseqR #
# package, including getPeaks and peakvalues #
# #
############################################################
# Load of dependencies
library ( shiny )
library ( tidyverse )
library ( sangerseqR )
library ( msa )
# Define UI for application that draws a histogram
ui <- fluidPage (
# Define UI ---------------------------------------------------------------
ui <- fluidPage (
# Application title
titlePanel ( " ChromatoR" ) ,
# Sidebar with a slider input for number of bins
navbarPage ( " ChromatoR" ,
navbarPage ( " Apps" ,
tabPanel ( " Detección de Bases" ,
sidebarPanel (
fileInput ( " file1" , " Sube fichero ab1" , multiple = FALSE ) ,
@ -31,14 +32,13 @@ ui <- fluidPage(
textInput ( " old" , label = " Referencia" ) ,
actionButton ( " calab" , " Analizar" )
) ,
# Show a plot of the generated distribution
mainPanel (
tags $ head ( tags $ style ( HTML ( " pre,.wrap { white-space: pre-wrap; word-break: break-all; }" ) ) ) ,
verbatimTextOutput ( " Primseq" ) %>% tagAppendAttributes ( class = " wrap" ) ,
verbatimTextOutput ( " align" )
)
) ,
tabPanel ( " Visor de Cromatograma" ,
sidebarPanel (
numericInput ( " visStart" , label = " Inicio" , value = 0 ) ,
@ -55,29 +55,28 @@ ui <- fluidPage(
)
)
# Define server logic required to draw a histogram
# Define server -----------------------------------------------------------
server <- function ( input , output ) {
# observe({
#
# obj<<-readsangerseq(input$file1)
# }
# })
# Define and initialize reactive variables
obj <- reactiveValues ( )
obj $ aborig <- NULL
obj_ab <- NULL
obj $ seq <- NULL
# File Input
observe ( {
if ( ! is.null ( input $ file1 ) ) {
if ( ! is.null ( input $ file1 ) ) { # This ensures that the reading is only tried when File selected
obj $ aborig <- readsangerseq ( input $ file1 $ datapath )
}
} )
# This generates the threshold selector calculating the median of the peaks
output $ thr <- renderUI ( {
if ( ! is.null ( obj $ aborig ) ) {
print ( 1 )
if ( ! is.null ( obj $ aborig ) ) { # Only when we have a sangerseq object
obj_ab <- obj $ aborig
## Functions
getpeaks <- function ( trace ) {
r <- rle ( trace )
indexes <- which ( rep ( diff ( sign ( diff ( c ( - Inf , r $ values , - Inf ) ) ) ) == -2 ,
@ -94,15 +93,19 @@ server <- function(input, output) {
Apeaks ,
Tpeaks ,
Cpeaks )
thr_calc <- quantile ( peakCusMatrix [ , 2 ] , 0.5 )
thr_calc <- quantile ( peakCusMatrix [ , 2 ] , 0.5 ) # Percentile 50
numericInput ( " thr" , label = " Peak Threshold" , value = thr_calc )
}
} )
# Peak detection and Base asignation
observeEvent ( input $ calab , {
if ( ! is.null ( obj $ aborig ) ) {
obj_ab <- obj $ aborig
## Functions
obj_ab <- obj $ aborig # We will work in a local variable
## Function definition
getpeaks <- function ( trace ) {
r <- rle ( trace )
indexes <- which ( rep ( diff ( sign ( diff ( c ( - Inf , r $ values , - Inf ) ) ) ) == -2 ,
@ -131,34 +134,40 @@ server <- function(input, output) {
}
return ( pos )
}
# Progress notification
progress <- shiny :: Progress $ new ( min = 0 , max = 3 )
progress $ set ( message = " Start" , value = 1 )
print ( " Start" )
# Peak detection for each channel
Apeaks <- getpeaks ( obj_ab @ traceMatrix [ , 1 ] )
Cpeaks <- getpeaks ( obj_ab @ traceMatrix [ , 2 ] )
Gpeaks <- getpeaks ( obj_ab @ traceMatrix [ , 3 ] )
Tpeaks <- getpeaks ( obj_ab @ traceMatrix [ , 4 ] )
# Peaks joining and filtering by defined threshold
peakthr <- input $ thr
peakCusMatrix <- rbind ( Gpeaks [Gpeaks [ , 2 ] > peakthr , ] ,
Apeaks [Apeaks [ , 2 ] > peakthr , ] ,
Tpeaks [Tpeaks [ , 2 ] > peakthr , ] ,
Cpeaks [Cpeaks [ , 2 ] > peakthr , ] )
peakCusMatrix <- peakCusMatrix %>% as.data.frame ( ) %>% dplyr :: rename ( Int = V2 ) %>% group_by ( indexes ) %>% summarise ( Int = max ( Int ) [1 ] )
# In case there is more than one peak by position, we keep the higher
peakCusMatrix <- peakCusMatrix %>% as.data.frame ( ) %>%
dplyr :: rename ( Int = V2 ) %>% group_by ( indexes ) %>% summarise ( Int = max ( Int ) [1 ] )
peakCusMatrix <- as.matrix ( peakCusMatrix )
# Using the custom function, we aggregate peaks defining two distance thresholds
pos1 <- redDiffs ( peakCusMatrix , input $ dist1 )
pos <- redDiffs ( pos1 , input $ dist2 )
print ( nrow ( pos ) )
pos1 <- pos1 [ , 1 ]
pos <- pos [ , 1 ]
pos <- redDiffs ( pos1 , input $ dist2 ) [ , 1 ]
progress $ set ( message = " Peaks Detected" , value = 2 )
print ( " Peaks Detected" )
# This is taken from sangerseqR
# Defining starts and stops of peak windows
primarypeaks <- pos
diffs <- diff ( c ( 0 , primarypeaks ) )
starts <- primarypeaks - 0.5 * diffs
@ -172,11 +181,12 @@ server <- function(input, output) {
tempPosMatrix <- matrix ( nrow = length ( starts ) , ncol = 4 )
tempAmpMatrix <- matrix ( nrow = length ( starts ) , ncol = 4 )
# Ratio min to include a second peak as secondary sequence
ratio <- input $ ratio
length ( starts )
# Base detection for each peak
for ( i in 1 : length ( starts ) ) {
# Detection of peak position and signal for each peak window
Apeak <- peakvalues ( Apeaks , starts [i ] , stops [i ] )
Cpeak <- peakvalues ( Cpeaks , starts [i ] , stops [i ] )
Gpeak <- peakvalues ( Gpeaks , starts [i ] , stops [i ] )
@ -185,42 +195,59 @@ server <- function(input, output) {
is.na ( Cpeak [2 ] ) &
is.na ( Gpeak [2 ] ) &
is.na ( Tpeak [2 ] ) ) next #rare case where no peak found
# Signal of each channel join
signals <- c ( Apeak [1 ] , Cpeak [1 ] , Gpeak [1 ] , Tpeak [1 ] )
tempAmpMatrix [i , ] <- signals
# Peak position of each channel join
positions <- c ( Apeak [2 ] , Cpeak [2 ] , Gpeak [2 ] , Tpeak [2 ] )
tempPosMatrix [i , ] <- positions
# Ratio to the maximum by channel
signalratios <- signals / max ( signals , na.rm = TRUE )
# Channel order definition
Bases <- c ( " A" , " C" , " G" , " T" )
# Bases <- c("G", "A", "T", "G")
# Discarding bases (same order as channel) that doesn't reach the minimum ratio to max
Bases [signalratios < ratio ] <- NA
#sort by decreasing signal strength
# Sort by decreasing signal strength
Bases <- Bases [order ( signals , decreasing = TRUE ) ]
# Definition of primary base and secondary base
positions <- positions [order ( signals , decreasing = TRUE ) ]
if ( length ( Bases [ ! is.na ( Bases ) ] ) == 4
if ( length ( Bases [ ! is.na ( Bases ) ] ) == 4 #if there are 4 bases or none, "N" assignation
| length ( Bases [ ! is.na ( Bases ) ] ) == 0 ) {
primary <- c ( primary , " N" )
secondary <- c ( secondary , " N" )
}
else if ( length ( Bases [ ! is.na ( Bases ) ] ) > 1 ) {
else if ( length ( Bases [ ! is.na ( Bases ) ] ) > 1 ) { #if there is more than 1 base
primary <- c ( primary , Bases [1 ] )
Bases2 <- Bases [2 : 4 ]
secondary <- c ( secondary ,
mergeIUPACLetters ( paste ( sort ( Bases2 [ ! is.na ( Bases2 ) ] ) ,
collapse = " " ) ) )
}
else {
else { #if there is only one base, primary and secondary are the same
primary <- c ( primary , Bases [1 ] )
secondary <- c ( secondary , Bases [1 ] )
}
}
# Redefining sangerseq object with the new sequence and matrices
obj_ab @ peakPosMatrix <- tempPosMatrix [rowSums ( ! is.na ( tempPosMatrix ) ) > 0 , ]
obj_ab @ peakAmpMatrix <- tempAmpMatrix [rowSums ( ! is.na ( tempPosMatrix ) ) > 0 , ]
obj_ab @ primarySeqID <- " sangerseq package primary basecalls"
obj_ab @ primarySeq <- DNAString ( paste ( primary , collapse = " " ) )
obj_ab @ secondarySeqID <- " sangerseq package secondary basecalls"
obj_ab @ secondarySeq <- DNAString ( paste ( secondary , collapse = " " ) )
# We convert obj_ab into a global variable that other functions will be able to use
obj_ab <<- obj_ab
# This is a triggering for other observing events
if ( is.null ( obj $ seq ) ) {
obj $ seq <- 1
} else {
@ -234,25 +261,26 @@ server <- function(input, output) {
}
} )
# Predicted sequence printing
output $ Primseq <- renderText ( {
observeEvent ( obj $ seq , { } )
observeEvent ( obj $ seq , { } ) #This is triggered when the sequence is predicted
if ( ! is.null ( obj $ seq ) ) {
print ( 1 )
as.character ( obj_ab @ primarySeq )
}
} )
# Alignament with a reference sequence
output $ align <- renderText ( {
observeEvent ( obj $ seq , { } )
observeEvent ( obj $ seq , { } ) #This is triggered when the sequence is predicted
if ( ! is.null ( obj $ seq ) & input $ old != " " ) {
print ( 2 )
if ( ! is.null ( obj $ seq ) & input $ old != " " ) { #Only if there is a predicted sequence AND a reference sequence defined by the user
alignament <- msaClustalW ( c ( " Reference" = toupper ( input $ old ) , " Predicted" = obj_ab @ primarySeq %>% as.character ) , type = " dna" )
paste ( capture.output ( print ( alignament , show = " complete" ) ) , collapse = " \n" )
}
} )
# This is a minimal and maximal filter definition for the Visor Table
output $ tabmin <- renderUI ( {
if ( ! is.null ( obj $ seq ) & input $ old != " " ) {
numericInput ( " tabmin_num" , label = " Mínimo en tabla" , value = 0 )
@ -265,46 +293,56 @@ server <- function(input, output) {
}
} )
# A table showing the predicted sequence discrepancies with a reference sequenc.
output $ visTab <- renderTable ( {
observeEvent ( obj $ seq , { } )
if ( ! is.null ( obj $ seq ) & input $ old != " " & ! is.null ( input $ tabmin_num ) ) {
print ( 3 )
if ( ! is.null ( obj $ seq ) & input $ old != " " & ! is.null ( input $ tabmin_num ) ) { #Only if there is a predicted sequence,a reference sequence defined by the user AND the min and max filters have been constructed
alignament <- msaClustalW ( c ( " Reference" = toupper ( input $ old ) , " Predicted" = obj_ab @ primarySeq %>% as.character ) , type = " dna" )
# We get each aligned sequence separately
pred <- ( alignament @ unmasked [2 ] %>% as.character ( ) %>% strsplit ( " " ) ) [ [1 ] ]
ref <- ( alignament @ unmasked [1 ] %>% as.character ( ) %>% strsplit ( " " ) ) [ [1 ] ]
cons <- pred == ref
cons <- pred == ref # T or F vector
aln <- data.frame ( pred , ref , cons , PosAln = 1 : length ( pred ) )
aln <- data.frame ( pred , ref , cons , PosAln = 1 : length ( pred ) ) #We generate the Alignament Index
aln <- mutate ( aln , cons = case_when (
cons == T ~ " " ,
TRUE ~ " ?"
) )
aln %>% filter ( pred != " -" ) %>% add_column ( " PosChrom" = 1 : nrow ( .) ) %>%
merge ( aln , all = T ) %>% arrange ( PosAln ) %>% add_column ( " PosChromAnterior" = c ( NA , .$PosChrom [1 : ( nrow ( .) -1 ) ] ) ) %>%
aln %>% filter ( pred != " -" ) %>% add_column ( " PosChrom" = 1 : nrow ( .) ) %>% #We generate the Chromatogram Index
merge ( aln , all = T ) %>% arrange ( PosAln ) %>%
add_column ( " PosChromAnterior" = c ( NA , .$PosChrom [1 : ( nrow ( .) -1 ) ] ) ) %>% #After merging, we assign in new column the previous index
filter ( cons == " ?" ) %>%
filter ( PosAln >= input $ tabmin_num & PosAln <= input $ tabmax_num )
}
} )
# Function to generate a plot object of the chromatogram with the peaks highlighted and the predicted sequence
plotvis <- eventReactive ( input $ butvis , {
if ( ! is.null ( obj $ seq ) ) {
print ( " Inicio imagen" )
# Definition of the width (number of bases), min (start base index) and max (end base index) of the chromatogram
width <- input $ visWidth
min <- input $ visStart
max <- min + width
# Defining the bases to be plotted
let <- obj_ab @ primarySeq %>% as.character %>% strsplit ( " " )
peakpos <- apply ( obj_ab @ peakAmpMatrix , 1 , function ( x ) which ( x == max ( x , na.rm = T ) ) [1 ] )
picks_x <- sapply ( 1 : length ( peakpos ) , function ( i ) obj_ab @ peakPosMatrix [i , peakpos [i ] ] )
# Defining the peak position
peakpos <- apply ( obj_ab @ peakAmpMatrix , 1 , function ( x ) which ( x == max ( x , na.rm = T ) ) [1 ] ) #Getting the column position with the maximum signal by row
picks_x <- sapply ( 1 : length ( peakpos ) , function ( i ) obj_ab @ peakPosMatrix [i , peakpos [i ] ] ) #Obtaining the position of the peaks defined by the previous line
# Construction of a data.frame with peak position, base letter and base index
picks <- data.frame ( rows = picks_x , Base = let [ [1 ] ] , num = 1 : length ( let [ [1 ] ] ) ) %>%
filter ( num >= min & num <= max ) #%>% mutate(rows=rows-min)
filter ( num >= min & num <= max )
ranPeaks <- range ( picks $ rows )
obj $ plot <- obj_ab @ traceMatrix %>% as.data.frame ( ) %>% dplyr :: rename ( A = V1 , C = V2 , G = V3 , T = V4 ) %>% add_column ( rows = 1 : nrow ( .) ) %>%
obj $ plot <- obj_ab @ traceMatrix %>% as.data.frame ( ) %>%
dplyr :: rename ( A = V1 , C = V2 , G = V3 , T = V4 ) %>% add_column ( rows = 1 : nrow ( .) ) %>%
filter ( rows >= ranPeaks [1 ] & rows <= ranPeaks [2 ] ) %>% gather ( Base , Value , - rows ) %>%
mutate ( Base = factor ( Base , levels = c ( " G" , " A" , " T" , " C" ) ) ) %>%
ggplot ( aes ( rows , Value ) ) +
@ -319,6 +357,7 @@ server <- function(input, output) {
}
} )
# Render of the chromatogram plot
output $ visor <- renderPlot ( {
observeEvent ( input $ butvis , { plotvis ( ) } )
obj $ plot