You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

207 lines
7.9 KiB

library(shiny)
library(coRdon)
library(Biostrings)
library(tidyverse)
library(pcaMethods)
library(ggpubr)
# Define UI for application that draws a histogram
ui <- fluidPage(
verticalLayout(
titlePanel("ONATRY-CIT"),
wellPanel(
textAreaInput("query", "Text in fasta format", width = "100%", height = "300px"),
actionButton("submit", label = "Submit")
),
plotOutput("result", width = "1200px", height = "800px")
)
)
# Define server logic required to draw a histogram
server <- function(input, output) {
process<-eventReactive(input$submit, {
fa=input$query
fasplit<-strsplit(gsub("^>","",fa), ">")[[1]]
txtQuery<-c()
names<-c()
for (i in 1:length(fasplit)){
query_split<-strsplit(fasplit[[i]], "\n|\r")[[1]]
names<-c(names, strsplit(query_split[1], " ")[[1]][1])
txtQuery<-c(txtQuery, paste0(query_split[2:length(query_split)], collapse = ""))
}
names(txtQuery)<-names
txtQuery<-toupper(gsub("\n","",txtQuery))
dnaQuery<-DNAStringSet(txtQuery)
Query<-codonTable(dnaQuery)
CodRef<-list(
Ala=c("GCG","GCA","GCT","GCC"),
Cys=c("TGT","TGC"),
Asp=c("GAT","GAC"),
Glu=c("GAG","GAA"),
Phe=c("TTT","TTC"),
Gly=c("GGG","GGA","GGT","GGC"),
His=c("CAT","CAC"),
Ile=c("ATA","ATT","ATC"),
Lys=c("AAG","AAA"),
Leu=c("TTG","TTA","CTG","CTA","CTT","CTC"),
Met=c("ATG"),
Asn=c("AAT","AAC"),
Pro=c("CCG","CCA","CCT","CCC"),
Gln=c("CAG","CAA"),
Arg=c("AGG","AGA","CGG","CGA","CGT","CGC"),
Ser=c("AGT","AGC","TCG","TCA","TCT","TCC"),
Thr=c("ACG","ACA","ACT","ACC"),
Val=c("GTG","GTA","GTT","GTC"),
Trp=c("TGG"),
Tyr=c("TAT","TAC"),
End=c("TGA","TAG","TAA")
)
UsedCodons<-unlist(CodRef[sapply(CodRef,length) > 1 & names(CodRef) != "End"])
list2data<-function(x, var.column="Names", val.column="Values"){
names<-c()
for (name in names(x)){
names<-c(names, rep(name, length(x[[name]])))
}
df<-data.frame(names, unlist(x), row.names = NULL)
colnames(df)<-c(var.column, val.column)
return(df)
}
Query_CU<-t(codonCounts(Query)) %>%
as.data.frame() %>%
setNames(names(txtQuery)) %>%
rownames_to_column("codon") %>%
# mutate(across(all_of(names(txtQuery)), function(x) x=1000*x/sum(x))) %>%
merge(list2data(CodRef, "AA","codon")) %>%
group_by(AA) %>%
mutate(across(all_of(names(txtQuery)), function(x) x=x/sum(x))) %>%
ungroup()
pca_ref<-read.table("../Scripts/pca_HAdV5_EGFP_MONO_EMM_BH_VCN_APIS_MOD_PDT_ADAPT_Tox.txt",sep="\t", dec = ",", header = T)
pca_ref$codon<-gsub(" ","",pca_ref$codon)
pca_tab<-merge(pca_ref,
Query_CU %>% select(-AA)
)
pcares<-pca(pca_tab %>% column_to_rownames("codon") %>% as.matrix %>% t(),
method="svd", nPCs=10) #comando para hacer el PCA
g_PCA<-pcares@scores %>%
as.data.frame %>%
rownames_to_column("NAME") %>%
add_column("TYPE"=c(rep("Structural", 20),rep("Early",16), rep("References", 6),
rep("Query",length(names(txtQuery))))) %>%
ggplot(aes(PC1,PC2, color=TYPE))+
geom_point(size=15, alpha=0.25)+
geom_point(data=function(x){filter(x, TYPE %in% c("References","Query"))},size=2, alpha=0.5)+
geom_text(aes(label=NAME),show.legend=FALSE)+
scale_color_manual(values=c("Early"="cornflowerblue","Structural"="darkgrey","References"="brown","Query"="black"))+
theme_gray(base_size = 15)+
ggtitle("PCA Codon Usage")+
theme(aspect.ratio = 1)
hcu<-read.table("../Scripts/Human.Codon.Usage", header = F, sep=" ")
hcu<-rbind(
hcu[,c("V1","V2")] %>% rename(Codon=V1, Usage=V2),
hcu[,c("V3","V4")] %>% rename(Codon=V3, Usage=V4),
hcu[,c("V5","V6")] %>% rename(Codon=V5, Usage=V6),
hcu[,c("V7","V8")] %>% rename(Codon=V7, Usage=V8)
) %>%
mutate(Usage=as.numeric(gsub("\\([0-9]*\\)","",Usage)))
w<-merge(
list2data(CodRef, "AA","Codon"),
hcu
) %>%
group_by(AA) %>%
mutate(Weight=Usage/max(Usage))
Query_codons<-sapply(txtQuery, function(x) substring(x,seq(1,(nchar(x)-2),3), seq(3,nchar(x),3)))
Query_CAI<-do.call(rbind, lapply(names(Query_codons), function(x){
merge(
data.frame(Position=1:length(Query_codons[[x]]),
Codon=Query_codons[[x]],
Name=x),
w[,c("Codon","AA","Weight")]
) %>% arrange(Position)
}))
Query_CAI[Query_CAI$AA %in% c("Met","End","Trp"),"Weight"]<-0
#CAI
CAI<-Query_CAI %>% group_by(Name) %>%
summarise(mean=round(mean(Weight),digits = 2))
lengths<-Query_CAI %>% group_by(Name) %>% summarise(lengths=length(Weight))
lowess<-Query_CAI %>% group_by(Name) %>%
summarise(Weight=tail(lowess(Weight, f=0.15)$y,1))
g_CAI<-ggplot(Query_CAI, aes(Position, Weight, color=Name))+
geom_line(color=rgb(1,1,1,0.0))+
geom_smooth(se=F)+
geom_text(data=data.frame(Position=lengths$lengths+3,
Weight=lowess$Weight,
CAI=CAI$mean,
Name=CAI$Name),
aes(label=CAI), hjust=0, vjust=0.5, show.legend = F)+
ggtitle("CAI along sequence")+
theme_gray(base_size = 15)
AApos3<-sapply(txtQuery, function(x) strsplit(x, "")[[1]][seq(3, nchar(x), 3)])
GC3<-sapply(AApos3, function(x) 100*length(x[x %in% c("G","C")])/length(x))
table<-rbind(
read.table("../Scripts/tabla_cai_human_proteins.txt",header=TRUE,sep="\t",dec=".") %>% add_column(Type="Human"),
read.table("../Scripts/tabla_cai_early_proteins.txt",header=TRUE,sep="\t",dec=".") %>% add_column(Type="Early"),
read.table("../Scripts/tabla_cai_late_proteins.txt",header=TRUE,sep="\t",dec=".") %>% add_column(Type="Late")
)
dtGC3<-data.frame(
Name=c("LGFP", "EGFP","BWT","BHU",names(GC3)),
X.G.C.3.=c(44.8,96.7,70.8,84.9,GC3),
Type=factor(c("Low Expression","High Expression","High Expression","High Expression",rep("Query",length(GC3))),
levels = c("Early","Late","Human","High Expression","Low Expression","Query"))
)
table$Type<-factor(table$Type, levels=c("Early","Late","Human","High Expression","Low Expression","Query"))
g_GC3<-ggplot(table, aes(`X.G.C.3.`, fill=Type))+
geom_density(alpha=0.5)+
geom_density(fill=NA, aes(color=Type))+
scale_fill_manual(values=c("Early"="grey80","Late"="grey30", "Human"="#FFFF99",
"Low Expression"="red","High Expression"="green","Query"="purple"),drop=F)+
scale_color_manual(values=c("Early"="grey30","Late"="grey30", "Human"="#FFFF99",
"Low Expression"="red","High Expression"="green","Query"="purple"),drop=F)+
geom_segment(data=dtGC3, aes(x = X.G.C.3.,xend = X.G.C.3., y=0, yend=0.025, color=Type), linetype=2, linewidth=1, show.legend = F)+
geom_text(data=dtGC3, aes(x = X.G.C.3., y=0.026, label=Name, color=Type), angle=90, vjust=0.5, hjust=0, size=6, show.legend = F)+
xlim(0,100)+
labs(x="%GC3",y="Density")+
theme_classic(base_size = 15)+
ggtitle("%GC3")+
theme(panel.border = element_rect(fill=NA), aspect.ratio = 1)
ggarrange(
ggarrange(g_PCA, g_GC3),
g_CAI, ncol = 1, heights = c(0.6, 0.4)
)
})
output$result <- renderPlot({
observeEvent(input$submit, {})
if(input$query != ""){
process()
}
})
}
# Run the application
shinyApp(ui = ui, server = server)