Browse Source

Explicant i comentant l'script.

main
marcelcosta 1 year ago
parent
commit
6fcf75f887
1 changed files with 94 additions and 55 deletions
  1. +94
    -55
      chromatoR/app.R

+ 94
- 55
chromatoR/app.R

@ -1,25 +1,26 @@
##########################################################
# #
# This app has been written by Marcel Costa-García, #
# and has reused some internal functions from sangerseqR #
# package, including getPeaks and peakvalues #
# #
##########################################################
############################################################
# #
# This app has been written by Marcel Costa-García, #
# and has reused some internal functions from sangerseqR #
# package, including getPeaks and peakvalues #
# #
############################################################
# Load of dependencies
library(shiny)
library(tidyverse)
library(sangerseqR)
library(msa)
# Define UI for application that draws a histogram
ui <- fluidPage(
# Define UI ---------------------------------------------------------------
ui <- fluidPage(
# Application title
titlePanel("ChromatoR"),
# Sidebar with a slider input for number of bins
navbarPage("ChromatoR",
navbarPage("Apps",
tabPanel("Detección de Bases",
sidebarPanel(
fileInput("file1", "Sube fichero ab1", multiple = FALSE),
@ -31,14 +32,13 @@ ui <- fluidPage(
textInput("old", label="Referencia"),
actionButton("calab", "Analizar")
),
# Show a plot of the generated distribution
mainPanel(
tags$head(tags$style(HTML("pre,.wrap { white-space: pre-wrap; word-break: break-all; }"))),
verbatimTextOutput("Primseq") %>% tagAppendAttributes(class="wrap"),
verbatimTextOutput("align")
)
),
tabPanel("Visor de Cromatograma",
sidebarPanel(
numericInput("visStart", label="Inicio", value=0),
@ -55,29 +55,28 @@ ui <- fluidPage(
)
)
# Define server logic required to draw a histogram
# Define server -----------------------------------------------------------
server <- function(input, output) {
# observe({
#
# obj<<-readsangerseq(input$file1)
# }
# })
# Define and initialize reactive variables
obj<-reactiveValues()
obj$aborig<-NULL
obj_ab<-NULL
obj$seq<-NULL
# File Input
observe({
if (!is.null(input$file1)){
if (!is.null(input$file1)){ # This ensures that the reading is only tried when File selected
obj$aborig<-readsangerseq(input$file1$datapath)
}
})
# This generates the threshold selector calculating the median of the peaks
output$thr<-renderUI({
if (!is.null(obj$aborig)){
print(1)
if (!is.null(obj$aborig)){ # Only when we have a sangerseq object
obj_ab<-obj$aborig
## Functions
getpeaks <- function(trace) {
r <- rle(trace)
indexes <- which(rep(diff(sign(diff(c(-Inf, r$values, -Inf)))) == -2,
@ -94,15 +93,19 @@ server <- function(input, output) {
Apeaks,
Tpeaks,
Cpeaks)
thr_calc<-quantile(peakCusMatrix[,2], 0.5)
thr_calc<-quantile(peakCusMatrix[,2], 0.5) # Percentile 50
numericInput("thr", label = "Peak Threshold", value = thr_calc)
}
})
# Peak detection and Base asignation
observeEvent(input$calab, {
if (!is.null(obj$aborig)){
obj_ab<-obj$aborig
## Functions
obj_ab<-obj$aborig # We will work in a local variable
## Function definition
getpeaks <- function(trace) {
r <- rle(trace)
indexes <- which(rep(diff(sign(diff(c(-Inf, r$values, -Inf)))) == -2,
@ -131,34 +134,40 @@ server <- function(input, output) {
}
return(pos)
}
# Progress notification
progress <- shiny::Progress$new(min=0, max=3)
progress$set(message = "Start", value = 1)
print("Start")
# Peak detection for each channel
Apeaks <- getpeaks(obj_ab@traceMatrix[,1])
Cpeaks <- getpeaks(obj_ab@traceMatrix[,2])
Gpeaks <- getpeaks(obj_ab@traceMatrix[,3])
Tpeaks <- getpeaks(obj_ab@traceMatrix[,4])
# Peaks joining and filtering by defined threshold
peakthr<-input$thr
peakCusMatrix<-rbind(Gpeaks[Gpeaks[,2] > peakthr,],
Apeaks[Apeaks[,2] > peakthr,],
Tpeaks[Tpeaks[,2] > peakthr,],
Cpeaks[Cpeaks[,2] > peakthr,])
peakCusMatrix<-peakCusMatrix %>% as.data.frame() %>% dplyr::rename(Int=V2) %>% group_by(indexes) %>% summarise(Int=max(Int)[1])
# In case there is more than one peak by position, we keep the higher
peakCusMatrix<-peakCusMatrix %>% as.data.frame() %>%
dplyr::rename(Int=V2) %>% group_by(indexes) %>% summarise(Int=max(Int)[1])
peakCusMatrix<-as.matrix(peakCusMatrix)
# Using the custom function, we aggregate peaks defining two distance thresholds
pos1<-redDiffs(peakCusMatrix, input$dist1)
pos<-redDiffs(pos1, input$dist2)
print(nrow(pos))
pos1<-pos1[,1]
pos<-pos[,1]
pos<-redDiffs(pos1, input$dist2)[,1]
progress$set(message = "Peaks Detected", value = 2)
print("Peaks Detected")
# This is taken from sangerseqR
# Defining starts and stops of peak windows
primarypeaks <- pos
diffs <- diff(c(0,primarypeaks))
starts <- primarypeaks - 0.5*diffs
@ -172,11 +181,12 @@ server <- function(input, output) {
tempPosMatrix <- matrix(nrow=length(starts), ncol=4)
tempAmpMatrix <- matrix(nrow=length(starts), ncol=4)
# Ratio min to include a second peak as secondary sequence
ratio<-input$ratio
length(starts)
# Base detection for each peak
for(i in 1:length(starts)) {
# Detection of peak position and signal for each peak window
Apeak <- peakvalues(Apeaks, starts[i], stops[i])
Cpeak <- peakvalues(Cpeaks, starts[i], stops[i])
Gpeak <- peakvalues(Gpeaks, starts[i], stops[i])
@ -185,42 +195,59 @@ server <- function(input, output) {
is.na(Cpeak[2]) &
is.na(Gpeak[2]) &
is.na(Tpeak[2])) next #rare case where no peak found
# Signal of each channel join
signals <- c(Apeak[1], Cpeak[1], Gpeak[1], Tpeak[1])
tempAmpMatrix[i,] <- signals
# Peak position of each channel join
positions <- c(Apeak[2], Cpeak[2], Gpeak[2], Tpeak[2])
tempPosMatrix[i,] <- positions
# Ratio to the maximum by channel
signalratios <- signals/max(signals, na.rm=TRUE)
# Channel order definition
Bases <- c("A", "C", "G", "T")
# Bases <- c("G", "A", "T", "G")
# Discarding bases (same order as channel) that doesn't reach the minimum ratio to max
Bases[signalratios < ratio] <- NA
#sort by decreasing signal strength
# Sort by decreasing signal strength
Bases <- Bases[order(signals, decreasing=TRUE)]
# Definition of primary base and secondary base
positions <- positions[order(signals, decreasing=TRUE)]
if(length(Bases[!is.na(Bases)]) == 4
if(length(Bases[!is.na(Bases)]) == 4 #if there are 4 bases or none, "N" assignation
| length(Bases[!is.na(Bases)]) == 0) {
primary <- c(primary, "N")
secondary <- c(secondary, "N")
}
else if(length(Bases[!is.na(Bases)]) > 1) {
else if(length(Bases[!is.na(Bases)]) > 1) { #if there is more than 1 base
primary <- c(primary, Bases[1])
Bases2 <- Bases[2:4]
secondary <- c(secondary,
mergeIUPACLetters(paste(sort(Bases2[!is.na(Bases2)]),
collapse="")))
}
else {
else { #if there is only one base, primary and secondary are the same
primary <- c(primary, Bases[1])
secondary <- c(secondary, Bases[1])
}
}
# Redefining sangerseq object with the new sequence and matrices
obj_ab@peakPosMatrix <- tempPosMatrix[rowSums(!is.na(tempPosMatrix)) > 0,]
obj_ab@peakAmpMatrix <- tempAmpMatrix[rowSums(!is.na(tempPosMatrix)) > 0,]
obj_ab@primarySeqID <- "sangerseq package primary basecalls"
obj_ab@primarySeq <- DNAString(paste(primary, collapse=""))
obj_ab@secondarySeqID <- "sangerseq package secondary basecalls"
obj_ab@secondarySeq <- DNAString(paste(secondary, collapse=""))
# We convert obj_ab into a global variable that other functions will be able to use
obj_ab<<-obj_ab
# This is a triggering for other observing events
if (is.null(obj$seq)){
obj$seq<-1
}else{
@ -234,25 +261,26 @@ server <- function(input, output) {
}
})
# Predicted sequence printing
output$Primseq <- renderText({
observeEvent(obj$seq, {})
observeEvent(obj$seq, {}) #This is triggered when the sequence is predicted
if (!is.null(obj$seq)){
print(1)
as.character(obj_ab@primarySeq)
}
})
# Alignament with a reference sequence
output$align <- renderText({
observeEvent(obj$seq, {})
observeEvent(obj$seq, {}) #This is triggered when the sequence is predicted
if (!is.null(obj$seq) & input$old != ""){
print(2)
if (!is.null(obj$seq) & input$old != ""){ #Only if there is a predicted sequence AND a reference sequence defined by the user
alignament<-msaClustalW(c("Reference"=toupper(input$old),"Predicted"=obj_ab@primarySeq %>% as.character), type="dna")
paste(capture.output(print(alignament, show="complete")), collapse="\n")
}
})
# This is a minimal and maximal filter definition for the Visor Table
output$tabmin<-renderUI({
if (!is.null(obj$seq) & input$old != ""){
numericInput("tabmin_num", label = "Mínimo en tabla", value = 0)
@ -265,46 +293,56 @@ server <- function(input, output) {
}
})
# A table showing the predicted sequence discrepancies with a reference sequenc.
output$visTab <- renderTable({
observeEvent(obj$seq, {})
if (!is.null(obj$seq) & input$old != "" & !is.null(input$tabmin_num)){
print(3)
if (!is.null(obj$seq) & input$old != "" & !is.null(input$tabmin_num)){ #Only if there is a predicted sequence,a reference sequence defined by the user AND the min and max filters have been constructed
alignament<-msaClustalW(c("Reference"=toupper(input$old),"Predicted"=obj_ab@primarySeq %>% as.character), type="dna")
# We get each aligned sequence separately
pred<-(alignament@unmasked[2] %>% as.character() %>% strsplit(""))[[1]]
ref<-(alignament@unmasked[1] %>% as.character() %>% strsplit(""))[[1]]
cons<-pred == ref
cons<-pred == ref # T or F vector
aln<-data.frame(pred, ref, cons, PosAln=1:length(pred))
aln<-data.frame(pred, ref, cons, PosAln=1:length(pred)) #We generate the Alignament Index
aln<-mutate(aln, cons=case_when(
cons == T~"",
TRUE~"?"
))
aln %>% filter(pred != "-") %>% add_column("PosChrom"=1:nrow(.)) %>%
merge(aln, all=T) %>% arrange(PosAln) %>% add_column("PosChromAnterior"=c(NA,.$PosChrom[1:(nrow(.)-1)])) %>%
aln %>% filter(pred != "-") %>% add_column("PosChrom"=1:nrow(.)) %>% #We generate the Chromatogram Index
merge(aln, all=T) %>% arrange(PosAln) %>%
add_column("PosChromAnterior"=c(NA,.$PosChrom[1:(nrow(.)-1)])) %>% #After merging, we assign in new column the previous index
filter(cons == "?") %>%
filter(PosAln >= input$tabmin_num & PosAln <= input$tabmax_num)
}
})
# Function to generate a plot object of the chromatogram with the peaks highlighted and the predicted sequence
plotvis<-eventReactive( input$butvis, {
if (!is.null(obj$seq)){
print("Inicio imagen")
# Definition of the width (number of bases), min (start base index) and max (end base index) of the chromatogram
width<-input$visWidth
min<-input$visStart
max<-min+width
# Defining the bases to be plotted
let<-obj_ab@primarySeq %>% as.character %>% strsplit("")
peakpos<-apply(obj_ab@peakAmpMatrix,1,function(x) which(x == max(x,na.rm=T))[1])
picks_x<-sapply(1:length(peakpos), function(i) obj_ab@peakPosMatrix[i, peakpos[i]])
# Defining the peak position
peakpos<-apply(obj_ab@peakAmpMatrix,1,function(x) which(x == max(x,na.rm=T))[1]) #Getting the column position with the maximum signal by row
picks_x<-sapply(1:length(peakpos), function(i) obj_ab@peakPosMatrix[i, peakpos[i]]) #Obtaining the position of the peaks defined by the previous line
# Construction of a data.frame with peak position, base letter and base index
picks<-data.frame(rows=picks_x, Base=let[[1]], num=1:length(let[[1]])) %>%
filter(num >= min & num <= max) #%>% mutate(rows=rows-min)
filter(num >= min & num <= max)
ranPeaks<-range(picks$rows)
obj$plot<-obj_ab@traceMatrix %>% as.data.frame() %>% dplyr::rename(A=V1,C=V2,G=V3,T=V4) %>% add_column(rows=1:nrow(.)) %>%
obj$plot<-obj_ab@traceMatrix %>% as.data.frame() %>%
dplyr::rename(A=V1,C=V2,G=V3,T=V4) %>% add_column(rows=1:nrow(.)) %>%
filter(rows >= ranPeaks[1] & rows <= ranPeaks[2]) %>% gather(Base, Value, -rows) %>%
mutate(Base=factor(Base, levels=c("G","A","T","C"))) %>%
ggplot(aes(rows, Value))+
@ -319,6 +357,7 @@ server <- function(input, output) {
}
})
# Render of the chromatogram plot
output$visor <- renderPlot({
observeEvent(input$butvis, {plotvis()})
obj$plot

Loading…
Cancel
Save