Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from Bio import SeqIO
- import numpy as np
- peaks = []
- with open("ENCFF091AYQbed.bed")as f:
- for line in f:
- peaks.append(line.strip().split())
- x = []
- for t in range(1, 23):
- data = list(SeqIO.parse("chroms/chr" + str(t) + ".fa", "fasta"))
- for item in peaks:
- mid = (int(item[1]) + int(item[2]))//2
- if (mid + 499 >= len(data[0].seq) or mid - 500 < 0):
- continue
- if (item[0] == "chr" + str(t) and data[0].seq[mid - 500].upper() != 'N' and data[0].seq[mid + 499].upper() != 'N'):
- k = 0;
- a = np.zeros((4, 1000))
- for i in range(mid - 500, mid + 500):
- if (data[0].seq[i].upper() == 'A'):
- a[0][k] = 1;
- if (data[0].seq[i].upper() == 'T'):
- a[1][k] = 1;
- if (data[0].seq[i].upper() == 'G'):
- a[2][k] = 1;
- if (data[0].seq[i].upper() == 'C'):
- a[3][k] = 1;
- k+=1;
- x.append(a);
- data = list(SeqIO.parse("chroms/chrX.fa", "fasta"))
- for item in peaks:
- mid = (int(item[1]) + int(item[2]))//2
- if (mid + 499 >= len(data[0].seq) or mid - 500 < 0):
- continue
- if (item[0] == "chrX" and data[0].seq[mid - 500].upper() != 'N' and data[0].seq[mid + 499].upper() != 'N'):
- k = 0;
- a = np.zeros((4, 1000))
- for i in range(mid - 500, mid + 500):
- if (data[0].seq[i].upper() == 'A'):
- a[0][k] = 1;
- if (data[0].seq[i].upper() == 'T'):
- a[1][k] = 1;
- if (data[0].seq[i].upper() == 'G'):
- a[2][k] = 1;
- if (data[0].seq[i].upper() == 'C'):
- a[3][k] = 1;
- k+=1;
- x.append(a);
- data = list(SeqIO.parse("chroms/chrY.fa", "fasta"))
- for item in peaks:
- mid = (int(item[1]) + int(item[2]))//2
- if (item[0] == "chrY" and data[0].seq[mid - 500].upper() != 'N' and data[0].seq[mid + 499].upper() != 'N'):
- k = 0;
- a = np.zeros((4, 1000))
- for i in range(mid - 500, mid + 500):
- if (data[0].seq[i].upper() == 'A'):
- a[0][k] = 1;
- if (data[0].seq[i].upper() == 'T'):
- a[1][k] = 1;
- if (data[0].seq[i].upper() == 'G'):
- a[2][k] = 1;
- if (data[0].seq[i].upper() == 'C'):
- a[3][k] = 1;
- k+=1;
- x.append(a);
- np.save('H3K4me3PeaksATGC', x)
- import random
- for t in range(1, 25):
- c = []
- if t == 23:
- t = 'X'
- if t == 24:
- t = 'Y'
- data = list(SeqIO.parse("chroms/chr" +str(t) + ".fa", "fasta"))
- m = 0
- l = len(data[0].seq) // 20000
- mid = 10001
- while (mid < len(data[0].seq) - l - 501):
- if (data[0].seq[mid - 500].upper() != 'N' and data[0].seq[mid + 499].upper() != 'N'):
- flag = 0
- for item in peaks:
- if ((item[0] == "chr" + str(t)) and ((mid - 500 < int(item[1]) and mid + 499 > int(item[1])) or (mid - 500 > int(item[1]) and mid - 500 < int(item[2])))):
- flag = 1
- if flag == 0:
- a = np.zeros((4, 1000))
- k = 0;
- for i in range(mid - 500, mid + 500):
- if (data[0].seq[i].upper() == 'A'):
- a[0][k] = 1;
- if (data[0].seq[i].upper() == 'T'):
- a[1][k] = 1;
- if (data[0].seq[i].upper() == 'G'):
- a[2][k] = 1;
- if (data[0].seq[i].upper() == 'C'):
- a[3][k] = 1;
- k+=1;
- c.append(a);
- m+=1;
- mid += l
- if (m >= 10000):
- break
- np.save('H3K4me3NotPeaksATGCChr'+str(t), c)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement