Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- ########
- """
- Script that will post-process the dataset derived from primerpro.pl
- """
- ######## step 1
- import pandas as pd
- from pandas import DataFrame
- import matplotlib.pyplot as plt
- import seaborn as sns
- #import missingno as msno
- #from pandas import DataFrame
- #import numpy as np
- import os
- os.chdir('C:/Users/bmpal/OneDrive/Desktop/csv/2019Feb3')
- csv1_ = 'Abuabcontigs.fasta.results_forDatabase_AbuSSRsdroppedNA.csv'
- csv1 = os.path.abspath(csv1_)
- csv2_ = 'contigbcmerged.fa.results_forDatabase_BCSSRsdroppedNA.csv'
- csv2 = os.path.abspath(csv2_)
- csv3_ = 'Pacolcontigs.fasta.results_forDatabase_PacolSSRdroppedNA.csv'
- csv3 = os.path.abspath(csv3_)
- #markers_used_ = '/Users/beatrizpalao/Documents/dna_seq/sequences/list_markers_mod.csv'
- #markers_used = os.path.abspath(markers_used_)
- # Load files into dataframe
- df1 = pd.read_csv(csv1,sep=',',header = 0, encoding = "ISO-8859-1")
- df2 = pd.read_csv(csv2,sep=',',header = 0, encoding = "ISO-8859-1")
- df3 = pd.read_csv(csv3,sep=',',header = 0, encoding = "ISO-8859-1")
- #df_markers = pd.read_csv(markers_used,sep=',',header = 0, encoding = "ISO-8859-1")
- SSR_Abu = tuple(df1['SSR Motif'])
- SSR_BC = tuple(df2['SSR Motif'])
- SSR_Pac = tuple(df3['SSR Motif'])
- #abu_markers = list(df_markers['abu_specific'])
- #pac_markers = list(df_markers['pacol_specific'])
- #bc_markers = list(df_markers['bc_specific'])
- # set processing
- SSR_union1 = list(set(SSR_BC) | set(SSR_Pac)) #Combined Pacol OR BC
- SSR_union2 = list(set(SSR_BC) | set(SSR_Abu)) #Combined Abuab OR BC
- SSR_union3 = list(set(SSR_Abu) | set(SSR_Pac)) #Combined Abuab OR Pacol
- SSR_Abu_spef = list(set(SSR_Abu) - set(SSR_union1)) #specific to Abuab
- SSR_Pac_spef = list(set(SSR_Pac) - set(SSR_union2)) #specific to Pacol
- SSR_BC_spef = list(set(SSR_BC) - set(SSR_union3)) #specific to BC
- com_Abu_BC_ = list((set(SSR_Abu) & set(SSR_BC)) - set(SSR_Pac)) #common to Abuab and BC
- com_Pac_BC_ = list((set(SSR_Pac) & set(SSR_BC)) - set(SSR_Abu)) #common to Pacol and BC
- com_Abu_Pac_ = list((set(SSR_Abu) & set(SSR_Pac)) - set(SSR_BC)) #common to Abuab and Pacol
- com_ALL_ = list(set(SSR_Abu) & set(SSR_Pac) & set(SSR_BC)) #common to ALL
- # look up SSR to dataframe
- Abu_spef = df1[df1['SSR Motif'].isin(SSR_Abu_spef)]
- BC_spef = df2[df2['SSR Motif'].isin(SSR_BC_spef)]
- Pac_spef = df3[df3['SSR Motif'].isin(SSR_Pac_spef)]
- com_Abu_BC = df1[df1['SSR Motif'].isin(com_Abu_BC_)]
- com_Pac_BC = df3[df3['SSR Motif'].isin(com_Pac_BC_)]
- com_Abu_Pac = df1[df1['SSR Motif']].isin(com_Abu_Pac_)
- com_ALL_in_Abu = df1[df1['SSR Motif'].isin(com_ALL_)]
- com_ALL_in_Pac = df3[df3['SSR Motif'].isin(com_ALL_)]
- com_ALL_in_BC = df2[df2['SSR Motif'].isin(com_ALL_)]
- # save output as csv files
- Abu_spef.to_csv('Abuab_SSR_spef.csv', sep='\t')
- Abu_spef.dropna(axis=0, how = 'any').to_csv('Abuab_SSR_spef_dropNA.csv', sep='\t')
- Pac_spef.to_csv('Pacol_SSR_spef.csv', sep='\t')
- Pac_spef.dropna(axis=0, how = 'any').to_csv('Pacol_SSR_spef_dropNA.csv', sep='\t')
- BC_spef.to_csv('BC_SSR_spef.csv', sep='\t')
- BC_spef.dropna(axis=0, how = 'any').to_csv('BC_SSR_spef_dropNA.csv', sep='\t')
- com_Abu_BC.to_csv('Abuab_SSR_common_to_BC.csv', sep='\t')
- com_Abu_BC.dropna(axis=0, how = 'any').to_csv('Abuab_SSR_common_to_BC_dropNA.csv', sep='\t')
- com_Pac_BC.to_csv('Pacol_SSR_common_to_BC.csv', sep='\t')
- com_Pac_BC.dropna(axis=0, how = 'any').to_csv('Pacol_SSR_common_to_BC_dropNA.csv', sep='\t')
- com_Abu_Pac.to_csv('Abuab_SSR_common_to_Pacol.csv', sep='\t')
- com_Abu_Pac.dropna(axis=0, how = 'any').to_csv('Abuab_SSR_common_to_Pacol_dropNA.csv', sep='\t')
- com_ALL_in_Abu.to_csv('common_ALL_lookup_Abuab.csv', sep='\t')
- com_ALL_in_Abu.dropna(axis=0, how = 'any').to_csv('common_ALL_lookup_Abuab_dropNA.csv', sep='\t')
- com_ALL_in_Pac.to_csv('common_ALL_lookup_Pacol.csv', sep='\t')
- com_ALL_in_Pac.dropna(axis=0, how = 'any').to_csv('common_ALL_lookup_Pacol_dropNA.csv', sep='\t')
- com_ALL_in_BC.to_csv('common_ALL_lookup_BC.csv', sep='\t')
- com_ALL_in_BC.dropna(axis=0, how = 'any').to_csv('common_ALL_lookup_BC_dropNA.csv', sep='\t')
- # output markers save to csv
- DataFrame(SSR_Abu_spef).to_csv("Abuab_SSR_spef_markers.csv", sep=",")
- DataFrame(SSR_Pac_spef).to_csv("Abuab_Pac_spef_markers.csv", sep=",")
- DataFrame(SSR_BC_spef).to_csv("Abuab_BC_spef_markers.csv", sep=",")
- DataFrame(com_Abu_BC_).to_csv("Abuab_commonto_BC_SSR_markers.csv", sep=",")
- DataFrame(com_Pac_BC_).to_csv("Pacol_commonto_BC_SSR_markers.csv", sep=",")
- DataFrame(com_Abu_Pac_).to_csv("Abuab_commonto_Pacol_SSR_markers.csv", sep=",")
- DataFrame(com_ALL_).to_csv("common_ALL_SSR_markers.csv", sep=",")
- """
- Counter(SSR_Abu).values()
- dfx = Ca_Pac2
- dfx[dfx['SSR type'].isin(['p1','p2','p3','p4','p5','p6'])]
- dfx['SSR'].value_counts()
- dfx['SSR'].value_counts().plot('barh',figsize=(10,20))
- plt.savefig('test.png',bbox_inches='tight',pad_inches=1)
- Counter(SSR_Abu).keys()
- Abuab1_SSR = Ca_Abu1.to_csv('Abuab_SSR_spef.csv', sep='\t')
- Pacol_SSR = Ca_Pac1.to_csv('Pacol_SSR_spef.csv', sep='\t')
- BC_SSR = Ca_BC.to_csv('BC_SSR_spef.csv', sep='\t')
- Abuab2_SSR = Ca_Abu2.to_csv('Abuab_SSR_common_to_BC.csv', sep='\t')
- Paco2_SSR = Ca_Pac2.to_csv('Pacol_SSR_common_to_BC.csv', sep='\t')
- """
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement