Advertisement
michal_k

Untitled

Sep 21st, 2020
1,405
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 0.79 KB | None | 0 0
  1. from Bio import AlignIO, SeqIO
  2. from scipy.stats import entropy
  3. import numpy as np
  4. import pandas as pd
  5. import seaborn as sns
  6. sns.set_style("whitegrid")
  7. records = list(AlignIO.read("ALL_PL_ALIGNMENT.fasta", "fasta"))
  8. seq_arr = np.zeros(shape=(len(records),len(records[0].seq.__str__())), dtype=object)
  9. for i in range(seq_arr.shape[0]):
  10.     sequence = list(records[i].seq.__str__())
  11.     seq_arr[i,:] = sequence
  12. seq_df = pd.DataFrame(seq_arr)
  13. entropies = []
  14. positions = list(range(seq_df.shape[1]))
  15. for j in range(seq_df.shape[1]):
  16.     counts = seq_df.iloc[:,j].value_counts()
  17.     entropies.append(entropy(counts))
  18. ent_df = pd.DataFrame({"Position": positions, "Entropy":entropies})
  19. ent_df.Position = ent_df.Position + 1
  20. sns.lineplot(x="Position", y="Entropy", data=ent_df.iloc[14380:14440,:])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement