library(shiny)
|
|
library(coRdon)
|
|
library(Biostrings)
|
|
library(tidyverse)
|
|
library(pcaMethods)
|
|
library(ggpubr)
|
|
|
|
# Define UI for application that draws a histogram
|
|
ui <- fluidPage(
|
|
verticalLayout(
|
|
titlePanel("ONATRY-CIT"),
|
|
wellPanel(
|
|
textAreaInput("query", "Text in fasta format", width = "100%", height = "300px"),
|
|
actionButton("submit", label = "Submit")
|
|
),
|
|
plotOutput("result", width = "1200px", height = "800px")
|
|
)
|
|
)
|
|
|
|
# Define server logic required to draw a histogram
|
|
server <- function(input, output) {
|
|
|
|
|
|
process<-eventReactive(input$submit, {
|
|
fa=input$query
|
|
|
|
|
|
fasplit<-strsplit(gsub("^>","",fa), ">")[[1]]
|
|
txtQuery<-c()
|
|
names<-c()
|
|
for (i in 1:length(fasplit)){
|
|
query_split<-strsplit(fasplit[[i]], "\n|\r")[[1]]
|
|
names<-c(names, strsplit(query_split[1], " ")[[1]][1])
|
|
txtQuery<-c(txtQuery, paste0(query_split[2:length(query_split)], collapse = ""))
|
|
}
|
|
names(txtQuery)<-names
|
|
|
|
txtQuery<-toupper(gsub("\n","",txtQuery))
|
|
dnaQuery<-DNAStringSet(txtQuery)
|
|
|
|
Query<-codonTable(dnaQuery)
|
|
|
|
CodRef<-list(
|
|
Ala=c("GCG","GCA","GCT","GCC"),
|
|
Cys=c("TGT","TGC"),
|
|
Asp=c("GAT","GAC"),
|
|
Glu=c("GAG","GAA"),
|
|
Phe=c("TTT","TTC"),
|
|
Gly=c("GGG","GGA","GGT","GGC"),
|
|
His=c("CAT","CAC"),
|
|
Ile=c("ATA","ATT","ATC"),
|
|
Lys=c("AAG","AAA"),
|
|
Leu=c("TTG","TTA","CTG","CTA","CTT","CTC"),
|
|
Met=c("ATG"),
|
|
Asn=c("AAT","AAC"),
|
|
Pro=c("CCG","CCA","CCT","CCC"),
|
|
Gln=c("CAG","CAA"),
|
|
Arg=c("AGG","AGA","CGG","CGA","CGT","CGC"),
|
|
Ser=c("AGT","AGC","TCG","TCA","TCT","TCC"),
|
|
Thr=c("ACG","ACA","ACT","ACC"),
|
|
Val=c("GTG","GTA","GTT","GTC"),
|
|
Trp=c("TGG"),
|
|
Tyr=c("TAT","TAC"),
|
|
End=c("TGA","TAG","TAA")
|
|
)
|
|
|
|
UsedCodons<-unlist(CodRef[sapply(CodRef,length) > 1 & names(CodRef) != "End"])
|
|
|
|
list2data<-function(x, var.column="Names", val.column="Values"){
|
|
names<-c()
|
|
for (name in names(x)){
|
|
names<-c(names, rep(name, length(x[[name]])))
|
|
}
|
|
df<-data.frame(names, unlist(x), row.names = NULL)
|
|
colnames(df)<-c(var.column, val.column)
|
|
return(df)
|
|
}
|
|
|
|
Query_CU<-t(codonCounts(Query)) %>%
|
|
as.data.frame() %>%
|
|
setNames(names(txtQuery)) %>%
|
|
rownames_to_column("codon") %>%
|
|
# mutate(across(all_of(names(txtQuery)), function(x) x=1000*x/sum(x))) %>%
|
|
merge(list2data(CodRef, "AA","codon")) %>%
|
|
group_by(AA) %>%
|
|
mutate(across(all_of(names(txtQuery)), function(x) x=x/sum(x))) %>%
|
|
ungroup()
|
|
|
|
pca_ref<-read.table("../Scripts/pca_HAdV5_EGFP_MONO_EMM_BH_VCN_APIS_MOD_PDT_ADAPT_Tox.txt",sep="\t", dec = ",", header = T)
|
|
pca_ref$codon<-gsub(" ","",pca_ref$codon)
|
|
pca_tab<-merge(pca_ref,
|
|
Query_CU %>% select(-AA)
|
|
)
|
|
|
|
|
|
pcares<-pca(pca_tab %>% column_to_rownames("codon") %>% as.matrix %>% t(),
|
|
method="svd", nPCs=10) #comando para hacer el PCA
|
|
|
|
g_PCA<-pcares@scores %>%
|
|
as.data.frame %>%
|
|
rownames_to_column("NAME") %>%
|
|
add_column("TYPE"=c(rep("Structural", 20),rep("Early",16), rep("References", 6),
|
|
rep("Query",length(names(txtQuery))))) %>%
|
|
ggplot(aes(PC1,PC2, color=TYPE))+
|
|
geom_point(size=15, alpha=0.25)+
|
|
geom_point(data=function(x){filter(x, TYPE %in% c("References","Query"))},size=2, alpha=0.5)+
|
|
geom_text(aes(label=NAME),show.legend=FALSE)+
|
|
scale_color_manual(values=c("Early"="cornflowerblue","Structural"="darkgrey","References"="brown","Query"="black"))+
|
|
theme_gray(base_size = 15)+
|
|
ggtitle("PCA Codon Usage")+
|
|
theme(aspect.ratio = 1)
|
|
|
|
hcu<-read.table("../Scripts/Human.Codon.Usage", header = F, sep=" ")
|
|
hcu<-rbind(
|
|
hcu[,c("V1","V2")] %>% rename(Codon=V1, Usage=V2),
|
|
hcu[,c("V3","V4")] %>% rename(Codon=V3, Usage=V4),
|
|
hcu[,c("V5","V6")] %>% rename(Codon=V5, Usage=V6),
|
|
hcu[,c("V7","V8")] %>% rename(Codon=V7, Usage=V8)
|
|
) %>%
|
|
mutate(Usage=as.numeric(gsub("\\([0-9]*\\)","",Usage)))
|
|
|
|
w<-merge(
|
|
list2data(CodRef, "AA","Codon"),
|
|
hcu
|
|
) %>%
|
|
group_by(AA) %>%
|
|
mutate(Weight=Usage/max(Usage))
|
|
|
|
Query_codons<-sapply(txtQuery, function(x) substring(x,seq(1,(nchar(x)-2),3), seq(3,nchar(x),3)))
|
|
|
|
Query_CAI<-do.call(rbind, lapply(names(Query_codons), function(x){
|
|
merge(
|
|
data.frame(Position=1:length(Query_codons[[x]]),
|
|
Codon=Query_codons[[x]],
|
|
Name=x),
|
|
w[,c("Codon","AA","Weight")]
|
|
) %>% arrange(Position)
|
|
}))
|
|
|
|
Query_CAI[Query_CAI$AA %in% c("Met","End","Trp"),"Weight"]<-0
|
|
|
|
#CAI
|
|
CAI<-Query_CAI %>% group_by(Name) %>%
|
|
summarise(mean=round(mean(Weight),digits = 2))
|
|
lengths<-Query_CAI %>% group_by(Name) %>% summarise(lengths=length(Weight))
|
|
lowess<-Query_CAI %>% group_by(Name) %>%
|
|
summarise(Weight=tail(lowess(Weight, f=0.15)$y,1))
|
|
|
|
g_CAI<-ggplot(Query_CAI, aes(Position, Weight, color=Name))+
|
|
geom_line(color=rgb(1,1,1,0.0))+
|
|
geom_smooth(se=F)+
|
|
geom_text(data=data.frame(Position=lengths$lengths+3,
|
|
Weight=lowess$Weight,
|
|
CAI=CAI$mean,
|
|
Name=CAI$Name),
|
|
aes(label=CAI), hjust=0, vjust=0.5, show.legend = F)+
|
|
ggtitle("CAI along sequence")+
|
|
theme_gray(base_size = 15)
|
|
|
|
AApos3<-sapply(txtQuery, function(x) strsplit(x, "")[[1]][seq(3, nchar(x), 3)])
|
|
GC3<-sapply(AApos3, function(x) 100*length(x[x %in% c("G","C")])/length(x))
|
|
|
|
|
|
table<-rbind(
|
|
read.table("../Scripts/tabla_cai_human_proteins.txt",header=TRUE,sep="\t",dec=".") %>% add_column(Type="Human"),
|
|
read.table("../Scripts/tabla_cai_early_proteins.txt",header=TRUE,sep="\t",dec=".") %>% add_column(Type="Early"),
|
|
read.table("../Scripts/tabla_cai_late_proteins.txt",header=TRUE,sep="\t",dec=".") %>% add_column(Type="Late")
|
|
)
|
|
|
|
dtGC3<-data.frame(
|
|
Name=c("LGFP", "EGFP","BWT","BHU",names(GC3)),
|
|
X.G.C.3.=c(44.8,96.7,70.8,84.9,GC3),
|
|
Type=factor(c("Low Expression","High Expression","High Expression","High Expression",rep("Query",length(GC3))),
|
|
levels = c("Early","Late","Human","High Expression","Low Expression","Query"))
|
|
)
|
|
table$Type<-factor(table$Type, levels=c("Early","Late","Human","High Expression","Low Expression","Query"))
|
|
g_GC3<-ggplot(table, aes(`X.G.C.3.`, fill=Type))+
|
|
geom_density(alpha=0.5)+
|
|
geom_density(fill=NA, aes(color=Type))+
|
|
scale_fill_manual(values=c("Early"="grey80","Late"="grey30", "Human"="#FFFF99",
|
|
"Low Expression"="red","High Expression"="green","Query"="purple"),drop=F)+
|
|
scale_color_manual(values=c("Early"="grey30","Late"="grey30", "Human"="#FFFF99",
|
|
"Low Expression"="red","High Expression"="green","Query"="purple"),drop=F)+
|
|
geom_segment(data=dtGC3, aes(x = X.G.C.3.,xend = X.G.C.3., y=0, yend=0.025, color=Type), linetype=2, linewidth=1, show.legend = F)+
|
|
geom_text(data=dtGC3, aes(x = X.G.C.3., y=0.026, label=Name, color=Type), angle=90, vjust=0.5, hjust=0, size=6, show.legend = F)+
|
|
xlim(0,100)+
|
|
labs(x="%GC3",y="Density")+
|
|
theme_classic(base_size = 15)+
|
|
ggtitle("%GC3")+
|
|
theme(panel.border = element_rect(fill=NA), aspect.ratio = 1)
|
|
|
|
ggarrange(
|
|
ggarrange(g_PCA, g_GC3),
|
|
g_CAI, ncol = 1, heights = c(0.6, 0.4)
|
|
)
|
|
})
|
|
|
|
output$result <- renderPlot({
|
|
observeEvent(input$submit, {})
|
|
if(input$query != ""){
|
|
process()
|
|
}
|
|
})
|
|
}
|
|
|
|
# Run the application
|
|
shinyApp(ui = ui, server = server)
|