lolz

import json
import codecs
from bs4 import BeautifulSoup

blah = 'put the rest of the path here'

#url = 'https://forum-en.guildwars2.com/forum/professions/thief/Nerf-Wish-list/page/'

folder_path = 'C:/Users/'+blah+'/Desktop/nerfwishlist/'

request_list = []

for x in range (1, 15):
   # page = urllib2.urlopen(url+"%d" % x).read()
   # print page
    target = open(folder_path+"page_%d.html" % x, 'r')
   # target.truncate()
   # target.write(page)
    page = target.read()

    soup = BeautifulSoup(page, "html.parser")
    soup.prettify()
    for post in soup.findAll('div', {'class' : 'post'}):
        header = post.find('div',{'class' : 'post-header'})

        member_html = header.find('a')
        member_full = member_html.text
        #member_number = member_full.find('span').text

        message_content = post.find('div',{'class' : 'message-content'})
        post_date = post.find('time').text
        permalink = post.find('a',{'class' : 'permalink icon'})['href']
        permalink = 'https://forum-en.guildwars2.com'+permalink

        if 'What:' in str(message_content):
            #print '====================================================================='
            recordWhat = False
            recordWhy = False
            recordThread = False
            recordSuggestion = False
            recordOrg = False

            request = {'Originally by': '', 'What': '', 'Why': '', 'Suggestion': '', 'Thread': '','Post Date': post_date,'Resurrected from':permalink}

            for chunk in message_content.findAll('p'):
                for line in chunk.stripped_strings:
                    #print "line: "+line

                    if (recordThread or recordWhy or recordSuggestion) and ('Originally by:' in line or 'What:' in line):
                        jsonarray = json.dumps(request)
                        request_list.append(jsonarray)
                        recordWhat = False
                        recordWhy = False
                        recordThread = False
                        recordSuggestion = False
                        recordOrg = False
                        request = {'Originally by': '', 'What': '', 'Why': '', 'Suggestion': '', 'Thread': '','Post Date': post_date,'Resurrected from':permalink}
                        #print 'Dump jason since it\'s multiparted'

                    if recordWhat and 'Why:' not in line and 'Suggestion:' not in line and 'Thread:' not in line and 'Originally by:' not in line:
                        #print '+++recording request['What']+++'
                        request['What'] = request['What'] + line
                    elif recordWhy and 'What:' not in line and 'Suggestion:' not in line and 'Thread:' not in line and 'Originally by:' not in line:
                        #print '+++recording request['Why']+++'
                        request['Why'] = request['Why'] + line+'\n'
                    elif recordSuggestion and 'What:' not in line and 'Why:' not in line and 'Thread:' not in line and 'Originally by:' not in line:
                        #print '+++recording request['Suggestion']+++'
                        request['Suggestion'] = request['Suggestion'] + line+'\n'
                    elif recordThread and 'What:' not in line and 'Why:' not in line and 'Suggestion:' not in line and 'Originally by:' not in line:
                        #print '+++recording request['Thread']+++'
                        request['Thread'] = request['Thread'] + line+'\n'
                    elif recordOrg and 'What:' not in line and 'Why:' not in line and 'Suggestion:' not in line and 'Thread:' not in line:
                        #print '+++recording request['Originally by']+++'
                        request['Originally by'] = request['Originally by'] + line+'\n'

                    if 'What:' in line:
                        if len(request['Originally by']) == 0:
                            request['Originally by'] = member_full
                        recordWhat = True
                        recordWhy = False
                        recordThread = False
                        recordSuggestion = False
                        recordOrg = False
                        if len(line[len('What:'):].strip()) != 0:
                            request['What'] = request['What'] + line[len('What:'):].strip()
                    elif 'Why:' in line:
                        recordWhy = True
                        recordWhat = False
                        recordThread = False
                        recordSuggestion = False
                        recordOrg = False
                        if len(line[len('Why:'):].strip()) != 0:
                            request['Why'] = request['Why'] + line[len('Why:'):].strip()+'\n'
                    elif 'Suggestion:' in line:
                        recordWhy = False
                        recordWhat = False
                        recordThread = False
                        recordSuggestion = True
                        recordOrg = False
                        if len(line[len('Suggestion:'):].strip()) != 0:
                            request['Suggestion'] = request['Suggestion'] + line[len('Suggestion:'):].strip()+'\n'
                            line = ''
                    elif 'Thread:' in line:
                        recordWhy = False
                        recordWhat = False
                        recordThread = True
                        recordSuggestion = False
                        recordOrg = False
                        if len(line[len('Thread:'):].strip()) != 0:
                            request['Thread'] = request['Thread'] + line[len('Thread:'):].strip()+'\n'
                    elif 'Originally by:' in line:
                        recordWhy = False
                        recordWhat = False
                        recordThread = False
                        recordSuggestion = False
                        recordOrg = True
                        if len(line[len('Originally by:'):].strip()) != 0:
                            request['Originally by'] = request['Originally by'] + line[len('Originally by:'):].strip()+'\n'

            jsonarray = json.dumps(request)
            request_list.append(jsonarray)

            '''
            request['Why'] = request['Why'].strip()
            print "Originally by: "+request['Originally by']
            print "What: "+request['What']
            print "Why: "+request['Why']
            print "Suggestion: "+request['Suggestion']
            print "Thread: "+request['Thread']
            print '------------------------------------------------------------'
            print message_content
            print '------------------------------------------------------------'
           '''

postString = ''
count = 0
currentBigPost = ''
for request in request_list:
    postString = ''
    j = ''
    try:
        j = json.loads(request)
    except:
        print request
        exit(-1)
    postString = postString + '*Originally by:* '+j['Originally by'].strip()+'\n'
    postString = postString + '*Post date:* '+j['Post Date'].strip()+'\n'
    postString = postString + '*Resurrected from:* '+j['Resurrected from'].strip()+'\n'
    postString = postString + '*What:* '+j['What'].strip()+'\n'
    postString = postString + '*Why:*\n'+j['Why'].strip()+'\n'
    if len(j['Suggestion']) != 0:
        postString = postString + '*Suggestion:*\n'+j['Suggestion'].strip()+'\n'
    if len(j['Thread']) != 0:
        postString = postString + '*Thread:* '+j['Thread'].strip()+'\n'
    postString = postString + '\n'

    if len(currentBigPost)+len(postString) >= 4500:
        file_path = folder_path+"post_%d.txt" % count
        target = codecs.open(file_path, 'w', 'utf-8')
        target.truncate()
        target.write(currentBigPost)
        currentBigPost = ''
        count = count + 1

    currentBigPost = currentBigPost + postString

count = count + 1
file_path = folder_path+"post_%d.txt" % count
target = codecs.open(file_path, 'w', 'utf-8')
target.truncate()
target.write(currentBigPost)