Don't like ads? PRO users don't see any ads ;-)
Guest

Untitled

By: a guest on Aug 3rd, 2012  |  syntax: None  |  size: 2.74 KB  |  hits: 11  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #!/usr/bin/env python
  2. # Time-stamp: <2012-01-10 17:21:24 Tao Liu>
  3.  
  4. # in the wiggle file from MA2C, the 1st column -- position is actually
  5. # the center of each probe. Some probes may overlap. In order to
  6. # convert them to appropriate bigwig, I need to create bedGraph file
  7. # with above two problems fixed.
  8.  
  9. import os
  10. import sys
  11. # ------------------------------------
  12. # Main function
  13. # ------------------------------------
  14. def main():
  15.     if len(sys.argv) < 3:
  16.         sys.stderr.write("""need 2 paras: %s <shift> <MA2C wig file>
  17.  
  18.         shift: if the position in wig file is not the leftmost position of the represented region, use this option to fix. For example, if a probe is 50bp, and in wig, 1st column is
  19.  the middle points of probes, use -25. Otherwise, use 0
  20.        
  21.         **Make sure your variableStep wig file is from MA2C!
  22.         **In the output, gaps will be filled with 0 and if two regions are overlapped, the later value will be used.
  23. """ % sys.argv[0])
  24.         sys.exit(1)
  25.  
  26.     ssize = int(sys.argv[1])
  27.     wigfile = open(sys.argv[2],"r")
  28.  
  29.     #fs = bdgfile.readline().rstrip().split()
  30.     #prev_region = (fs[0],int(fs[1]),int(fs[2]),float(fs[3]))
  31.     prev_region = None
  32.  
  33.  
  34.     for l in wigfile:
  35.         if l.startswith("track"):
  36.             continue
  37.         if l.startswith("variableStep"):
  38.             chrom = l.rstrip().split()[1].split("=")[1]
  39.             span = int(l.rstrip().split()[2].split("=")[1])
  40.             continue
  41.         (pos,value) = l.rstrip().split()
  42.         startpos = int(pos) + ssize
  43.         endpos = startpos + span
  44.         curr_region = (chrom,startpos,endpos,float(value))
  45.        
  46.         if not prev_region:
  47.             prev_region = curr_region
  48.             continue
  49.            
  50.         if prev_region[0] == curr_region[0]:
  51.             # only if they are in the same chromosome
  52.             right_pos_of_prev_region = min(curr_region[1],prev_region[1]+span)
  53.             if right_pos_of_prev_region > prev_region[1]:
  54.                 print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3])
  55.             # fill in gaps with 0
  56.             if curr_region[1] > prev_region[1]+span:
  57.                 print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1]+span,curr_region[1],0)
  58.         else:
  59.             right_pos_of_prev_region = prev_region[1]+span
  60.             if right_pos_of_prev_region > prev_region[1]:
  61.                 print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3])
  62.         prev_region = curr_region
  63.  
  64.     # last region
  65.     right_pos_of_prev_region = prev_region[1]+span
  66.     if right_pos_of_prev_region > prev_region[1]:
  67.         print "%s\t%d\t%d\t%.4f" % (prev_region[0],prev_region[1],right_pos_of_prev_region,prev_region[3])
  68.  
  69.  
  70. if __name__ == '__main__':
  71.     main()