Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- chr start stop geneID
- 0 chr13 32889584 32889814 BRCA2
- 1 chr13 32890536 32890737 BRCA2
- 2 chr13 32893194 32893307 BRCA2
- 3 chr13 32893282 32893400 BRCA2
- 4 chr13 32893363 32893466 BRCA2
- 5 chr13 32899127 32899242 BRCA2
- import pandas as pd
- import numpy as np
- columns = ['chr','start','stop','geneID']
- bed = pd.read_table('bedfile.txt',sep='s',names=['chr','start','stop','geneID'],engine='python')
- def bed_prepare(inp_bed):
- inp_bed['next_start'] = inp_bed['start'].shift(periods=-1)
- inp_bed['distance_to_next'] = inp_bed['next_start'] - inp_bed['stop']
- inp_bed['next_region_overlap'] = inp_bed['next_start'] < inp_bed['stop']
- intermediate_bed = inp_bed
- return intermediate_bed
- print bed_prepare(bed)
- chr start stop geneID next_start distance_to_next next_region_overlap
- 0 chr13 32889584 32889814 BRCA2 32890536 722 False
- 1 chr13 32890536 32890737 BRCA2 32893194 2457 False
- 2 chr13 32893194 32893307 BRCA2 32893282 -25 True
- 3 chr13 32893282 32893400 BRCA2 32893363 -37 True
- 4 chr13 32893363 32893466 BRCA2 32899127 5661 False
- new_bed = pd.DataFrame(data=np.zeros((0,len(columns))),columns=columns)
- def bed_collapse(intermediate_bed, new_bed):
- for row in bed.itertuples():
- if row[7] == False:
- output_row = list(row[1:5])
- #print output_row
- if row[7] == True:
- output_row = list(row[1:3])
- bed.itertuples().next()
- print row
- output_row.append(row[3])
- output_row.append(row[4])
- #print output_row
- #print output_row
- new_bed = new_bed.append({columns[0]:output_row[0],columns[1]:output_row[1],columns[2]:output_row[2],columns[
- 3]:output_row[3]},ignore_index=True)
- output_bed = new_bed
- return output_bed
- int_bed = bed_prepare(bed)
- print bed_collapse(int_bed,new_bed)
- chr start stop geneID
- 0 chr13 32889584 32889814 BRCA2
- 1 chr13 32890536 32890737 BRCA2
- 2 chr13 32893194 32893466 BRCA2
- 5 chr13 32899127 32899242 BRCA2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement