michal_k

Untitled

Sep 21st, 2020
909
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from Bio import AlignIO, SeqIO
  2. from scipy.stats import entropy
  3. import numpy as np
  4. import pandas as pd
  5. import seaborn as sns
  6. sns.set_style("whitegrid")
  7. records = list(AlignIO.read("ALL_PL_ALIGNMENT.fasta", "fasta"))
  8. seq_arr = np.zeros(shape=(len(records),len(records[0].seq.__str__())), dtype=object)
  9. for i in range(seq_arr.shape[0]):
  10.     sequence = list(records[i].seq.__str__())
  11.     seq_arr[i,:] = sequence
  12. seq_df = pd.DataFrame(seq_arr)
  13. entropies = []
  14. positions = list(range(seq_df.shape[1]))
  15. for j in range(seq_df.shape[1]):
  16.     counts = seq_df.iloc[:,j].value_counts()
  17.     entropies.append(entropy(counts))
  18. ent_df = pd.DataFrame({"Position": positions, "Entropy":entropies})
  19. ent_df.Position = ent_df.Position + 1
  20. sns.lineplot(x="Position", y="Entropy", data=ent_df.iloc[14380:14440,:])
RAW Paste Data