Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy
- import time
- from selenium import webdriver
- cell_data = numpy.zeros(shape=(205,7), dtype = "<U120")
- cell_data2 = numpy.zeros(shape=(205,7), dtype = "<U120")
- cell_data3 = numpy.zeros(shape=(205,7), dtype = "<U120")
- browser = webdriver.Chrome('c:\\Users\\lenovo\\desktop\\chromedriver')
- row_count = 0
- row_count2 = 0
- row_count3 = 0
- url_count = 1
- url_count2 = 1
- url_count3 = 1
- spam_count = 0
- spam_count2 = 0
- spam_count3 = 0
- passcode = ""
- bad_words = ["BUY", "ONLINE", "Sticky:", "Pharmacy", "EARN CASH", "shop", "MEDS", "puppy", "Tramadol", "Beagles","Remove warts","for sale","Essay service","buy","Buy"]
- dictionary_key = numpy.array(['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z','0','1','2','3','4','5','6','7','8','9'], dtype=numpy.str)
- dictionary = numpy.array(['a',
- '******** ** **** ********** ** **** ********** ',
- ' ****** ** **** ** ** ** ** ****** ',
- '******** ** **** **** **** **** ********** ',
- '********** ** ****** ** ** ********',
- '********** ** ****** ** ** ** ',
- ' ****** ** ** ** ** ****** ** ** ** ****** ',
- '** **** **** ************* **** **** **',
- 'i',
- ' ** ** ** **** **** ** ****** ',
- '** **** ** ** ** ***** ** ** ** ** ** **',
- '** ** ** ** ** ** ********',
- '** ***** ******* ****** *** **** **** **** **',
- '** ***** ****** **** ** **** ****** ***** **',
- 'o',
- '******** ** **** ********** ** ** ** ',
- 'q',
- 'r',
- 's',
- '******** ** ** ** ** ** ** ',
- '** **** **** **** **** **** ** ******* ',
- '** **** **** **** ** ** ** ** ** *** ',
- '** **** ** **** ** **** ** **** ** **** ** ** *** *** ',
- '** ** ** ** ** ** *** ** ** ** ** ** **',
- '** ** ** ** **** ** ** ** ** ',
- 'z',
- '0',
- '1',
- '2',
- ' ******* ** ** ** ******* **** ** ******* ',
- '** ** ** ** ** ** ** ********* ** ** ',
- '5',
- ' ******* ** **** ******** ** **** ** ******* ',
- '********** ** ** ** ** ** ** ',
- '8',
- ' ******* ** **** ** ******** **** ** ******* '], dtype=numpy.str)
- def get_page_1():
- global browser
- global cell_data
- global row_count
- global url_count
- browser.get('http://www.fairfaxunderground.com/forum/list/2.html')
- content = browser.find_element_by_class_name('PhorumStdTable')
- my_rows = content.find_elements_by_tag_name('tr')
- for i in my_rows:
- my_columns = i.find_elements_by_tag_name('td')
- k = 0
- for m in my_columns:
- cell_data[row_count][k] = m.get_attribute('innerText')
- k = k + 1
- row_count = row_count + 1
- print("Page 1 size: ", content.size)
- # gets urls and determine is url link is to thread or a profile, email, etc
- # then send urls to matrix with other 5 forms data
- my_urls = browser.find_elements_by_xpath("//table[@class = 'PhorumStdTable']//td/a")
- partial_string = "read"
- for z in my_urls:
- if partial_string in z.get_attribute("href"):
- cell_data[url_count][5] = z.get_attribute("href")
- url_count = url_count + 1
- def get_page_2():
- global browser
- global cell_data2
- global row_count2
- global url_count2
- browser.get('http://www.fairfaxunderground.com/forum/list/2/page-2.html')
- content = browser.find_element_by_class_name('PhorumStdTable')
- my_rows = content.find_elements_by_tag_name('tr')
- for i in my_rows:
- my_columns = i.find_elements_by_tag_name('td')
- k = 0
- for m in my_columns:
- cell_data2[row_count2][k] = m.get_attribute('innerText')
- k = k + 1
- row_count2 = row_count2 + 1
- print("Page 2 size: ", content.size)
- # get urls and determine is url link is to thread or a profile, email, etc
- # then send urls to matrix with other 5 forms data
- my_urls2 = browser.find_elements_by_xpath("//table[@class = 'PhorumStdTable']//td/a")
- partial_string = "read"
- for z in my_urls2:
- if partial_string in z.get_attribute("href"):
- cell_data2[url_count2][5] = z.get_attribute("href")
- url_count2 = url_count2 + 1
- def get_page_3():
- global browser
- global cell_data3
- global row_count3
- global url_count3
- browser.get('http://www.fairfaxunderground.com/forum/list/2/page-3.html')
- content = browser.find_element_by_class_name('PhorumStdTable')
- my_rows = content.find_elements_by_tag_name('tr')
- for i in my_rows:
- my_columns = i.find_elements_by_tag_name('td')
- k = 0
- for m in my_columns:
- cell_data3[row_count3][k] = m.get_attribute('innerText')
- k = k + 1
- row_count3 = row_count3 + 1
- print("Page 3 size: ", content.size)
- # get urls and determine is url link is to thread or a profile, email, etc
- # then send urls to matrix with other 5 forms data
- my_urls3 = browser.find_elements_by_xpath("//table[@class = 'PhorumStdTable']//td/a")
- partial_string = "read"
- for z in my_urls3:
- if partial_string in z.get_attribute("href"):
- cell_data3[url_count3][5] = z.get_attribute("href")
- url_count3 = url_count3 + 1
- def determine_spam():
- global cell_data
- global spam_count
- global bad_words
- t = 0
- for m in cell_data:
- if any(x in cell_data[t][0] for x in bad_words):
- cell_data[t][6] = "SPAM"
- spam_count = spam_count + 1
- t = t + 1
- def determine_spam2():
- global cell_data2
- global spam_count2
- global bad_words
- t = 0
- for m in cell_data2:
- if any(x in cell_data2[t][0] for x in bad_words):
- cell_data2[t][6] = "SPAM"
- spam_count2 = spam_count2 + 1
- t = t + 1
- def determine_spam3():
- global cell_data3
- global spam_count3
- global bad_words
- t = 0
- for m in cell_data3:
- if any(x in cell_data3[t][0] for x in bad_words):
- cell_data3[t][6] = "SPAM"
- spam_count3 = spam_count3 + 1
- t = t + 1
- def print_all_rows():
- global cell_data
- global cell_data2
- global cell_data3
- r = 0
- for y in cell_data:
- if cell_data[r][0] != "":
- print(r, " ", cell_data[r][0],cell_data[r][1],cell_data[r][2],cell_data[r][3],cell_data[r][4],cell_data[r][5],cell_data[r][6])
- r = r + 1
- r = 0
- for y in cell_data2:
- if cell_data2[r][0] != "":
- print(r, " ", cell_data2[r][0],cell_data2[r][1],cell_data2[r][2],cell_data2[r][3],cell_data2[r][4],cell_data2[r][5],cell_data2[r][6])
- r = r + 1
- r = 0
- for y in cell_data3:
- if cell_data3[r][0] != "":
- print(r, " ", cell_data3[r][0],cell_data3[r][1],cell_data3[r][2],cell_data3[r][3],cell_data3[r][4],cell_data3[r][5],cell_data3[r][6])
- r = r + 1
- def bump_thread():
- global cell_data
- global cell_data2
- global cell_data3
- global browser
- global passcode
- r = 102
- while(r>0):
- #for i in cell_data:
- if (cell_data3[r][6] == "" and cell_data3[r][0] != ""):
- print(cell_data3[r][5])
- browser.get(cell_data3[r][5])
- print(r, cell_data3[r][0], cell_data3[r][3], cell_data3[r][5], cell_data3[r][6])
- decode_letters()
- time.sleep(1)
- passcode_form = browser.find_element_by_id('spamhurdles_captcha_answer_input')
- passcode_form.send_keys(passcode)
- passcode_form.submit()
- time.sleep(.5)
- author_form = browser.find_element_by_xpath("//table[@class='PhorumFormTable']//input[@name='author']")
- author_form.send_keys(passcode)
- author_form.submit()
- time.sleep(.5)
- subject_form = browser.find_element_by_id('phorum_textarea')
- subject_form.send_keys('.')
- subject_form.submit()
- time.sleep(.5)
- submit_button = browser.find_element_by_name('finish')
- time.sleep(3)
- submit_button.click()
- #time.sleep(2)
- r = r - 1
- r = 102
- while(r>0):
- #for i in cell_data:
- if (cell_data2[r][6] == "" and cell_data2[r][0] != ""):
- browser.get(cell_data2[r][5])
- print(r, cell_data2[r][0], cell_data2[r][3], cell_data2[r][5], cell_data2[r][6])
- decode_letters()
- time.sleep(1)
- passcode_form = browser.find_element_by_id('spamhurdles_captcha_answer_input')
- passcode_form.send_keys(passcode)
- passcode_form.submit()
- time.sleep(.5)
- author_form = browser.find_element_by_xpath("//table[@class='PhorumFormTable']//input[@name='author']")
- author_form.send_keys(passcode)
- author_form.submit()
- time.sleep(.5)
- subject_form = browser.find_element_by_id('phorum_textarea')
- subject_form.send_keys('.')
- subject_form.submit()
- time.sleep(.5)
- submit_button = browser.find_element_by_name('finish')
- time.sleep(3)
- submit_button.click()
- #time.sleep(2)
- r = r - 1
- r = 102
- while(r>0):
- #for i in cell_data:
- if (cell_data[r][6] == "" and cell_data[r][0] != ""):
- browser.get(cell_data[r][5])
- print(r, cell_data[r][0], cell_data[r][3], cell_data[r][5], cell_data[r][6])
- decode_letters()
- time.sleep(1)
- passcode_form = browser.find_element_by_id('spamhurdles_captcha_answer_input')
- passcode_form.send_keys(passcode)
- passcode_form.submit()
- time.sleep(.5)
- author_form = browser.find_element_by_xpath("//table[@class='PhorumFormTable']//input[@name='author']")
- author_form.send_keys(passcode)
- author_form.submit()
- time.sleep(.5)
- subject_form = browser.find_element_by_id('phorum_textarea')
- subject_form.send_keys('.')
- subject_form.submit()
- time.sleep(.5)
- submit_button = browser.find_element_by_name('finish')
- time.sleep(3)
- submit_button.click()
- #time.sleep(2)
- r = r - 1
- def decode_letters():
- global dictionary
- global dictionary_key
- global browser
- global passcode
- f = 1
- while(f>0):
- t1 = browser.find_elements_by_id('spamhurdles_captcha_asciiart')
- for i in t1:
- #print(i.get_attribute('innerText'))
- my_letters = i.get_attribute('innerText')
- f = f - 1
- blah = my_letters.splitlines()
- ls = numpy.zeros(shape=(7, 60), dtype="<U1") #7x60 matrix to hold ascii art of 5 letters
- w = 0
- for z in blah[0]:
- ls[0][w] = z
- w = w + 1
- w = 0
- for z in blah[1]:
- ls[1][w] = z
- w = w + 1
- w = 0
- for z in blah[2]:
- ls[2][w] = z
- w = w + 1
- w = 0
- for z in blah[3]:
- ls[3][w] = z
- w = w + 1
- w = 0
- for z in blah[4]:
- ls[4][w] = z
- w = w + 1
- w = 0
- for z in blah[5]:
- ls[5][w] = z
- w = w + 1
- w = 0
- for z in blah[6]:
- ls[6][w] = z
- w = w + 1
- #determine break lines bp[0] - bp[9] between letters
- #6 different regions. 1 on each side of 1 width
- #4 in between letters of 2 width
- w = 0
- #bp = numpy.zeros(shape=(10, 1), dtype = "uint8") #breakpoints between letters, number represents column # in ls array
- bp = numpy.array([0,0,0,0,0,0,0,0,0,0], dtype=numpy.uint8)
- c = 0
- while(w<60):
- if(ls[0][w] == " " and ls[1][w] == " " and ls[2][w] == " " and ls[3][w] == " " and ls[4][w] == " " and ls[5][w] == " " and ls[6][w] == " "):
- bp[c] = w
- c = c + 1
- w = w + 1
- w1 = bp[1] - bp[0] - 1 #width of first letter in ls array
- s1 = bp[0] + 1 #leftmost cell of letter in ls array
- w2 = bp[3] - bp[2] - 1 #width of second letter in ls array
- s2 = bp[2] + 1 #leftmost cell of second letter in ls array
- w3 = bp[5] - bp[4] - 1 #width of third letter in ls array
- s3 = bp[4] + 1 #leftmost cell of third letter in ls array
- w4 = bp[7] - bp[6] - 1 #width of fourth letter in ls array
- s4 = bp[6] + 1 #leftmost cell of fourth letter in ls array
- w5 = bp[9] - bp[8] - 1 #width of fifth letter in ls array
- s5 = bp[8] + 1 #leftmost cell of fifth letter in ls array
- o = ""
- i = 0
- e = 0
- while(e<7):
- while(i<w1):
- o = o + ls[e][s1+i]
- i = i + 1
- e = e + 1
- i = 0
- l1 = o
- o = ""
- i = 0
- e = 0
- while(e<7):
- while(i<w2):
- o = o + ls[e][s2+i]
- i = i + 1
- e = e + 1
- i = 0
- l2 = o
- o = ""
- i = 0
- e = 0
- while(e<7):
- while(i<w3):
- o = o + ls[e][s3+i]
- i = i + 1
- e = e + 1
- i = 0
- l3 = o
- o = ""
- i = 0
- e = 0
- while(e<7):
- while(i<w4):
- o = o + ls[e][s4+i]
- i = i + 1
- e = e + 1
- i = 0
- l4 = o
- o = ""
- i = 0
- e = 0
- while(e<7):
- while(i<w5):
- o = o + ls[e][s5+i]
- i = i + 1
- e = e + 1
- i = 0
- l5 = o
- #match letters with dictionary keys
- y = 0
- while(y<36):
- if(l1 == dictionary[y]):
- l1p = dictionary_key[y]
- y = y + 1
- y = 0
- while (y < 36):
- if (l2 == dictionary[y]):
- l2p = dictionary_key[y]
- y = y + 1
- y = 0
- while (y < 36):
- if (l3 == dictionary[y]):
- l3p = dictionary_key[y]
- y = y + 1
- y = 0
- while (y < 36):
- if (l4 == dictionary[y]):
- l4p = dictionary_key[y]
- y = y + 1
- y = 0
- while (y < 36):
- if (l5 == dictionary[y]):
- l5p = dictionary_key[y]
- y = y + 1
- passcode = l1p+l2p+l3p+l4p+l5p
- print(passcode)
- get_page_1()
- get_page_2()
- get_page_3()
- determine_spam()
- determine_spam2()
- determine_spam3()
- print_all_rows()
- print("Spam count: ", spam_count, " of ", row_count, " records.")
- print("Spam count: ", spam_count2, " of ", row_count2, " records.")
- print("Spam count: ", spam_count3, " of ", row_count3, " records.")
- bump_thread()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement