Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from Bio import AlignIO, SeqIO
- from scipy.stats import entropy
- import numpy as np
- import pandas as pd
- import seaborn as sns
- sns.set_style("whitegrid")
- records = list(AlignIO.read("ALL_PL_ALIGNMENT.fasta", "fasta"))
- seq_arr = np.zeros(shape=(len(records),len(records[0].seq.__str__())), dtype=object)
- for i in range(seq_arr.shape[0]):
- sequence = list(records[i].seq.__str__())
- seq_arr[i,:] = sequence
- seq_df = pd.DataFrame(seq_arr)
- entropies = []
- positions = list(range(seq_df.shape[1]))
- for j in range(seq_df.shape[1]):
- counts = seq_df.iloc[:,j].value_counts()
- entropies.append(entropy(counts))
- ent_df = pd.DataFrame({"Position": positions, "Entropy":entropies})
- ent_df.Position = ent_df.Position + 1
- sns.lineplot(x="Position", y="Entropy", data=ent_df.iloc[14380:14440,:])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement