Advertisement
cdocpalao

pyMISA_LKMPalao

May 22nd, 2019
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.11 KB | None | 0 0
  1. ########
  2. """
  3. Script that will post-process the dataset derived from primerpro.pl
  4.  
  5. """
  6. ######## step 1
  7. import pandas as pd
  8. from pandas import DataFrame
  9. import matplotlib.pyplot as plt
  10. import seaborn as sns
  11. #import missingno as msno
  12. #from pandas import DataFrame
  13. #import numpy as np
  14. import os
  15.  
  16. os.chdir('C:/Users/bmpal/OneDrive/Desktop/csv/2019Feb3')
  17.  
  18. csv1_ = 'Abuabcontigs.fasta.results_forDatabase_AbuSSRsdroppedNA.csv'
  19. csv1 = os.path.abspath(csv1_)
  20.    
  21. csv2_ = 'contigbcmerged.fa.results_forDatabase_BCSSRsdroppedNA.csv'
  22. csv2 = os.path.abspath(csv2_)
  23.  
  24. csv3_ = 'Pacolcontigs.fasta.results_forDatabase_PacolSSRdroppedNA.csv'
  25. csv3 = os.path.abspath(csv3_)
  26.  
  27. #markers_used_ = '/Users/beatrizpalao/Documents/dna_seq/sequences/list_markers_mod.csv'
  28. #markers_used = os.path.abspath(markers_used_)
  29.  
  30. # Load files into dataframe
  31. df1 = pd.read_csv(csv1,sep=',',header = 0, encoding = "ISO-8859-1")
  32. df2 = pd.read_csv(csv2,sep=',',header = 0, encoding = "ISO-8859-1")
  33. df3 = pd.read_csv(csv3,sep=',',header = 0, encoding = "ISO-8859-1")
  34. #df_markers = pd.read_csv(markers_used,sep=',',header = 0, encoding = "ISO-8859-1")
  35.  
  36. SSR_Abu = tuple(df1['SSR Motif'])
  37. SSR_BC = tuple(df2['SSR Motif'])
  38. SSR_Pac = tuple(df3['SSR Motif'])
  39.  
  40. #abu_markers = list(df_markers['abu_specific'])
  41. #pac_markers = list(df_markers['pacol_specific'])
  42. #bc_markers = list(df_markers['bc_specific'])
  43.  
  44. # set processing
  45. SSR_union1 = list(set(SSR_BC) | set(SSR_Pac)) #Combined Pacol OR BC
  46. SSR_union2 = list(set(SSR_BC) | set(SSR_Abu)) #Combined Abuab OR BC
  47. SSR_union3 = list(set(SSR_Abu) | set(SSR_Pac)) #Combined Abuab OR Pacol
  48.  
  49. SSR_Abu_spef = list(set(SSR_Abu) - set(SSR_union1)) #specific to Abuab
  50. SSR_Pac_spef = list(set(SSR_Pac) - set(SSR_union2)) #specific to Pacol
  51. SSR_BC_spef = list(set(SSR_BC) - set(SSR_union3)) #specific to BC
  52.  
  53. com_Abu_BC_ = list((set(SSR_Abu) & set(SSR_BC)) - set(SSR_Pac)) #common to Abuab and BC
  54. com_Pac_BC_ = list((set(SSR_Pac) & set(SSR_BC)) - set(SSR_Abu)) #common to Pacol and BC
  55. com_Abu_Pac_ = list((set(SSR_Abu) & set(SSR_Pac)) - set(SSR_BC)) #common to Abuab and Pacol
  56. com_ALL_ = list(set(SSR_Abu) & set(SSR_Pac) & set(SSR_BC)) #common to ALL
  57.  
  58. # look up SSR to dataframe
  59. Abu_spef = df1[df1['SSR Motif'].isin(SSR_Abu_spef)]
  60. BC_spef = df2[df2['SSR Motif'].isin(SSR_BC_spef)]
  61. Pac_spef = df3[df3['SSR Motif'].isin(SSR_Pac_spef)]
  62.  
  63. com_Abu_BC = df1[df1['SSR Motif'].isin(com_Abu_BC_)]
  64. com_Pac_BC = df3[df3['SSR Motif'].isin(com_Pac_BC_)]
  65. com_Abu_Pac = df1[df1['SSR Motif']].isin(com_Abu_Pac_)
  66.  
  67. com_ALL_in_Abu = df1[df1['SSR Motif'].isin(com_ALL_)]
  68. com_ALL_in_Pac = df3[df3['SSR Motif'].isin(com_ALL_)]
  69. com_ALL_in_BC =  df2[df2['SSR Motif'].isin(com_ALL_)]
  70.  
  71. # save output as csv files
  72. Abu_spef.to_csv('Abuab_SSR_spef.csv', sep='\t')
  73. Abu_spef.dropna(axis=0, how = 'any').to_csv('Abuab_SSR_spef_dropNA.csv', sep='\t')
  74.  
  75. Pac_spef.to_csv('Pacol_SSR_spef.csv', sep='\t')
  76. Pac_spef.dropna(axis=0, how = 'any').to_csv('Pacol_SSR_spef_dropNA.csv', sep='\t')
  77.  
  78. BC_spef.to_csv('BC_SSR_spef.csv', sep='\t')
  79. BC_spef.dropna(axis=0, how = 'any').to_csv('BC_SSR_spef_dropNA.csv', sep='\t')
  80.  
  81. com_Abu_BC.to_csv('Abuab_SSR_common_to_BC.csv', sep='\t')
  82. com_Abu_BC.dropna(axis=0, how = 'any').to_csv('Abuab_SSR_common_to_BC_dropNA.csv', sep='\t')
  83.  
  84. com_Pac_BC.to_csv('Pacol_SSR_common_to_BC.csv', sep='\t')
  85. com_Pac_BC.dropna(axis=0, how = 'any').to_csv('Pacol_SSR_common_to_BC_dropNA.csv', sep='\t')
  86.  
  87. com_Abu_Pac.to_csv('Abuab_SSR_common_to_Pacol.csv', sep='\t')
  88. com_Abu_Pac.dropna(axis=0, how = 'any').to_csv('Abuab_SSR_common_to_Pacol_dropNA.csv', sep='\t')
  89.  
  90. com_ALL_in_Abu.to_csv('common_ALL_lookup_Abuab.csv', sep='\t')
  91. com_ALL_in_Abu.dropna(axis=0, how = 'any').to_csv('common_ALL_lookup_Abuab_dropNA.csv', sep='\t')
  92.  
  93. com_ALL_in_Pac.to_csv('common_ALL_lookup_Pacol.csv', sep='\t')
  94. com_ALL_in_Pac.dropna(axis=0, how = 'any').to_csv('common_ALL_lookup_Pacol_dropNA.csv', sep='\t')
  95.  
  96. com_ALL_in_BC.to_csv('common_ALL_lookup_BC.csv', sep='\t')
  97. com_ALL_in_BC.dropna(axis=0, how = 'any').to_csv('common_ALL_lookup_BC_dropNA.csv', sep='\t')
  98.  
  99. # output markers save to csv
  100. DataFrame(SSR_Abu_spef).to_csv("Abuab_SSR_spef_markers.csv", sep=",")
  101. DataFrame(SSR_Pac_spef).to_csv("Abuab_Pac_spef_markers.csv", sep=",")
  102. DataFrame(SSR_BC_spef).to_csv("Abuab_BC_spef_markers.csv", sep=",")
  103. DataFrame(com_Abu_BC_).to_csv("Abuab_commonto_BC_SSR_markers.csv", sep=",")
  104. DataFrame(com_Pac_BC_).to_csv("Pacol_commonto_BC_SSR_markers.csv", sep=",")
  105. DataFrame(com_Abu_Pac_).to_csv("Abuab_commonto_Pacol_SSR_markers.csv", sep=",")
  106. DataFrame(com_ALL_).to_csv("common_ALL_SSR_markers.csv", sep=",")
  107.  
  108. """
  109. Counter(SSR_Abu).values()
  110. dfx = Ca_Pac2
  111. dfx[dfx['SSR type'].isin(['p1','p2','p3','p4','p5','p6'])]
  112. dfx['SSR'].value_counts()
  113. dfx['SSR'].value_counts().plot('barh',figsize=(10,20))
  114. plt.savefig('test.png',bbox_inches='tight',pad_inches=1)
  115. Counter(SSR_Abu).keys()
  116. Abuab1_SSR = Ca_Abu1.to_csv('Abuab_SSR_spef.csv', sep='\t')
  117. Pacol_SSR = Ca_Pac1.to_csv('Pacol_SSR_spef.csv', sep='\t')
  118. BC_SSR = Ca_BC.to_csv('BC_SSR_spef.csv', sep='\t')
  119. Abuab2_SSR = Ca_Abu2.to_csv('Abuab_SSR_common_to_BC.csv', sep='\t')
  120. Paco2_SSR = Ca_Pac2.to_csv('Pacol_SSR_common_to_BC.csv', sep='\t')
  121. """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement