- #!/usr/bin/env python
- # Time-stamp: <2012-01-10 17:21:24 Tao Liu>
- # in the wiggle file from MA2C, the 1st column -- position is actually
- # the center of each probe. Some probes may overlap. In order to
- # convert them to appropriate bigwig, I need to create bedGraph file
- # with above two problems fixed.
- import os
- import sys
- # ------------------------------------
- # Main function
- # ------------------------------------
- def main():
- if len(sys.argv) < 3:
- sys.stderr.write("""need 2 paras: %s <shift> <MA2C wig file>
- shift: if the position in wig file is not the leftmost position of the represented region, use this option to fix. For example, if a probe is 50bp, and in wig, 1st column is
- the middle points of probes, use -25. Otherwise, use 0
- **Make sure your variableStep wig file is from MA2C!
- **In the output, gaps will be filled with 0 and if two regions are overlapped, the later value will be used.
- """ % sys.argv[0])
- sys.exit(1)
- ssize = int(sys.argv[1])
- wigfile = open(sys.argv[2],"r")
- #fs = bdgfile.readline().rstrip().split()
- #prev_region = (fs[0],int(fs[1]),int(fs[2]),float(fs[3]))
- prev_region = None
- for l in wigfile:
- if l.startswith("track"):
- continue
- if l.startswith("variableStep"):
- chrom = l.rstrip().split()[1].split("=")[1]
- span = int(l.rstrip().split()[2].split("=")[1])
- continue
- (pos,value) = l.rstrip().split()
- startpos = int(pos) + ssize
- endpos = startpos + span
- curr_region = (chrom,startpos,endpos,float(value))
- if not prev_region:
- prev_region = curr_region
- continue
- if prev_region[0] == curr_region[0]:
- # only if they are in the same chromosome
- right_pos_of_prev_region = min(curr_region[1],prev_region[1]+span)
- if right_pos_of_prev_region > prev_region[1]:
- print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3])
- # fill in gaps with 0
- if curr_region[1] > prev_region[1]+span:
- print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1]+span,curr_region[1],0)
- else:
- right_pos_of_prev_region = prev_region[1]+span
- if right_pos_of_prev_region > prev_region[1]:
- print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3])
- prev_region = curr_region
- # last region
- right_pos_of_prev_region = prev_region[1]+span
- if right_pos_of_prev_region > prev_region[1]:
- print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3])
- if __name__ == '__main__':
- main()