Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # coding=utf-8
- """
- Алгоритм:
- - Добавляем колонку "_id" (nullable) в kposts
- - Добавляем колонку "_parentid" (nullable) в kposts
- Смысл этих операций в том, чтобы запомнить прошлые значения постов
- Заполняем колонки значениями _id=id, _parentid=parentid
- - Меняем boardid 6 на boardid 4
- - Обнуляем id у всех постов с boardid = 4
- Номера всем постам мы поставим заново, это проще чем рассчитывать номера для слияния
- - Проставляем новые номера постам с boardid = 4, алгоритм при этом такой:
- - Сортируем все посты по возрастанию timestamp и проставляем каждому id начиная с 1
- - Проходимся по каждому посту еще раз, меняем parentid на новый id поста (матчим по _parentid),
- если в посте есть >>ссылка, меняем ее на новую ссылку (матчим по _id)
- """
- import psycopg2
- from bs4 import BeautifulSoup
- DB_NAME = 'zadrawch_wip'
- DB_USER = 'zadrawch'
- DB_HOST = 'localhost'
- DB_PASSWORD = '12345'
- SRC_BOARD_ID = 6 # С какой доски переносим посты
- DST_BOARD_ID = 4 # В какую доску переносим посты
- SRC_BOARD_NAME = '2dwip'
- DST_BOARD_NAME = '2d'
- MERGE_SQL = '''
- ALTER TABLE kposts DROP CONSTRAINT kposts_pkey;
- ALTER TABLE kposts ADD COLUMN "_id" integer;
- ALTER TABLE kposts ADD COLUMN "_parentid" integer;
- ALTER TABLE kposts ADD COLUMN "_boardid" integer;
- UPDATE kposts SET _id=id, _parentid=parentid, _boardid=boardid WHERE boardid IN (%(src_board_id)s, %(dst_board_id)s);
- UPDATE kposts SET boardid = %(dst_board_id)s WHERE boardid = %(src_board_id)s;
- WITH upd as (
- SELECT row_number() OVER (ORDER BY timestamp) as new_id,
- _boardid,
- id as old_id
- FROM kposts
- WHERE boardid = %(dst_board_id)s
- ORDER BY timestamp ASC
- )
- UPDATE kposts a
- SET
- id = b.new_id
- FROM upd b
- WHERE
- a.id = b.old_id AND a._boardid = b._boardid;
- ALTER TABLE kposts ADD PRIMARY KEY (boardid, id);
- UPDATE kposts a
- SET parentid = CASE
- WHEN a._parentid = 0 THEN 0
- ELSE b.id
- END
- FROM kposts b
- WHERE b._id = a._parentid AND a._boardid = b._boardid;
- '''
- CLEANUP_SQL = '''
- ALTER TABLE kposts DROP COLUMN _boardid;
- ALTER TABLE kposts DROP COLUMN _id;
- ALTER TABLE kposts DROP COLUMN _parentid;
- '''
- def fix_refs(cur, row):
- """
- <a href=\"/2dwip/res/88.html#93\" onclick=\"return highlight(\'93\', true);\" class=\"ref|2dwip|88|93\">>>93</a>\r
- <br />
- пока не очень получается, попытался изобразить кровь\r
- <br />
- <a href=\"/2dwip/res/88.html#94\" onclick=\"return highlight(\'94\', true);\" class=\"ref|2dwip|88|94\">>>94</a>
- <a href=\"/2dwip/res/88.html#95\" onclick=\"return highlight(\'95\', true);\" class=\"ref|2dwip|88|95\">>>95</a>\r
- <br />
- хочется дать карандашу отдохнуть анончики
- <br />
- """
- post_id, boardid, message = row
- message = message.replace('\r', '')
- soup = BeautifulSoup(message)
- for ref in soup.findAll('a'):
- try:
- ref_class = ref['class'][0]
- except KeyError:
- continue
- ref_href = ref['href']
- old_thread_id = int(ref_href.split('/')[-1].split('.')[0])
- cur.execute("SELECT id FROM kposts WHERE _id = %(old_id)s AND _boardid = %(old_boardid)s", {
- 'old_id': old_thread_id,
- 'old_boardid': SRC_BOARD_ID if SRC_BOARD_NAME in ref_class else DST_BOARD_ID,
- })
- try:
- new_thread_id = cur.fetchone()[0]
- except TypeError:
- continue
- old_post_id = int(u''.join([s for s in ref_href.split('/')[-1].split('#')[-1] if s.isdigit()]))
- cur.execute("SELECT id FROM kposts WHERE _id = %(old_id)s AND _boardid = %(old_boardid)s", {
- 'old_id': old_post_id,
- 'old_boardid': SRC_BOARD_ID if SRC_BOARD_NAME in ref_class else DST_BOARD_ID,
- })
- try:
- new_post_id = cur.fetchone()[0]
- except TypeError:
- continue
- new_href = '/{board}/res/{thread_id}.html#{post_id}'.format(
- board=DST_BOARD_NAME,
- thread_id=new_thread_id,
- post_id=new_post_id,
- )
- ref['href'] = new_href
- ref['onclick'] = "return highlight('{new_post_id}, true')".format(new_post_id=new_post_id)
- fixed_class = "ref|{board}|{thread_id}|{post_id}".format(
- board=DST_BOARD_NAME,
- thread_id=new_thread_id,
- post_id=new_post_id,
- )
- ref['class'] = [fixed_class]
- del ref['highlight']
- del ref['true']
- ref.string = '>>{new_post_id}'.format(new_post_id=new_post_id)
- fixed_message = str(soup)
- fixed_message = fixed_message.replace('<html><body>', '').replace('</body></html>', '')
- cur.execute('UPDATE kposts SET message = %(fixed_message)s WHERE id = %(post_id)s', {
- 'fixed_message': fixed_message,
- 'post_id': post_id,
- })
- def merge_boards():
- with psycopg2.connect(
- dbname=DB_NAME,
- user=DB_USER,
- host=DB_HOST,
- password=DB_PASSWORD,
- ) as conn:
- with conn.cursor() as cur:
- print 'Merging boards...'
- cur.execute(MERGE_SQL, {
- 'src_board_id': SRC_BOARD_ID,
- 'dst_board_id': DST_BOARD_ID,
- })
- print 'Updating refs...'
- # Selecting all '>>1222'-like refs
- cur.execute("SELECT id, boardid, \"message\" FROM kposts WHERE boardid = %(dst_board_id)s AND \"message\" like '%%>>%%'", {
- 'dst_board_id': DST_BOARD_ID,
- })
- rows = cur.fetchall()
- for i, row in enumerate(rows):
- if i % 10 == 0:
- print 'Updated {} posts'.format(i)
- fix_refs(cur, row)
- print 'Cleaning...'
- cur.execute(CLEANUP_SQL)
- if __name__ == '__main__':
- merge_boards()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement