In [8]:
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio import SeqIO
from Bio.SeqUtils import GC
from Bio.Align.Applications import ClustalwCommandline
from Bio import AlignIO
from Bio import Phylo
import os
from bokeh.io import output_notebook, show
from bokeh.plotting import figure
from bokeh.models.ranges import FactorRange
output_notebook()
In [9]:
seq_ids=['AY274119','AY278488.2','MG772933','MN908947','MN988668', 'MN988669']
fasta_sequences=[]
for id in seq_ids:
fasta_sequences.append(SeqIO.read("./Data/Coronavirus_genomes/"+id+".fna", "fasta"))
fasta_sequences
Out[9]:
In [10]:
#GC content comparison
gcContents =[]
for sequence in fasta_sequences:
gcContents.append(GC(sequence.seq))
In [11]:
gcPlot = figure(x_range=FactorRange(factors=seq_ids), height=400, width=700, title='GC content %')
gcPlot.vbar(x=seq_ids, top=gcContents, width=0.75)
gcPlot.title.text_font_size = '16pt'
gcPlot.title.align='center'
show(gcPlot)
In [12]:
SeqIO.write(fasta_sequences, 'coronaviruses.fasta', 'fasta')
Out[12]:
In [13]:
#cmd = ClustalwCommandline("clustalw2", infile="coronaviruses.fasta")
In [14]:
#import os
#stdout, stderr = cmd()
In [15]:
from Bio import AlignIO
alignment = AlignIO.read("coronaviruses.aln","clustal")
print(alignment[:, :100])
In [16]:
from Bio import Phylo
tree = Phylo.read("coronaviruses.dnd", "newick")
tree.name = 'Simple tree of three related coronaviruses'
tree.name
Out[16]:
In [17]:
Phylo.draw_ascii(tree)
In [18]:
fig = Phylo.draw(tree)