Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import { TBoardPageInfo, TThreadInfo } from './types.ts'
- import {Post, Thread, ThreadMetrics} from './models.ts'
- const connection = new PostgresConnector({
- host: 'localhost',
- username: 'root',
- password: 'root',
- database: 'test_db',
- })
- const db = new Database(connection)
- db.link([Post, Thread, ThreadMetrics])
- try {
- await db.sync({drop: false})
- } catch (e) {}
- // Stupid executor jast to return result os the address
- export class Grabber<T> {
- isRunning = false
- queue: string[] = []
- callbacks: Array<(data: T) => void> = []
- wait() {
- return new Promise((resolve) => {
- setTimeout(() => {
- resolve(this.queue)
- }, 1000)
- })
- }
- start() {
- this.isRunning = true
- this.loop()
- }
- stop() {
- this.isRunning = false
- }
- addItems(url: string | string[]) {
- if (Array.isArray(url)) {
- this.queue.push(...url)
- } else {
- this.queue.push(url)
- }
- }
- addCallback(callback: (data: T) => void) {
- this.callbacks.push(callback)
- }
- async loop() {
- await this.wait()
- if (!this.isRunning) { this.loop() }
- const url = this.getUrl()
- const data = url && await this.load(url)
- data && this.callbacks.forEach(callback => callback(data))
- this.loop()
- }
- getUrl(): string | undefined {
- return this.queue.shift()
- }
- async load(url: string): Promise<T | undefined> {
- let result
- try {
- result = await fetch(url)
- } catch (e) {
- console.log(e)
- return
- }
- if (!result || result.status !== 200) {
- result?.status && console.log(result.status, url)
- return
- }
- try {
- const json = await result.json()
- return json as T
- } catch (e) {
- console.log(e, result)
- // throw new Error('JSON parse error');
- }
- }
- }
- export class Robot {
- private _postsCache: Set<number> = new Set()
- private _threadsCache: Set<number> = new Set()
- private _boardGrabber: Grabber<TBoardPageInfo> = new Grabber<TBoardPageInfo>()
- private _threadGrabber: Grabber<TThreadInfo> = new Grabber<TThreadInfo>()
- constructor() {
- this._boardGrabber.addCallback(this.onBoardPageLoaded.bind(this))
- this._threadGrabber.addCallback(this.onThreadLoaded.bind(this))
- this._boardGrabber.start()
- this._threadGrabber.start()
- this.resentUrls()
- // Медлыееные доски
- setTimeout(() => {
- this._threadGrabber.addItems([
- 'https://2ch.hk/biz/res/1007022.json',
- 'https://2ch.hk/cc/res/229275.json'
- ])
- this._boardGrabber.addItems(
- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(number => `https://2ch.hk/dev/${number}.json`)
- )
- }, 1000 * 60 * 100)
- // Быстрые доски
- setInterval(this.resentUrls.bind(this), 1000 * 60 * 5)
- }
- resentUrls() {
- const queueLen = this._threadGrabber.queue.length
- console.log('queueLen', queueLen)
- if (this._postsCache.size > 30000) {
- this._postsCache.clear()
- }
- if(this._threadsCache.size > 3000) {
- this._threadsCache.clear()
- }
- this._boardGrabber.addItems(this.getBoardPagesUrls())
- }
- getBoardPagesUrls(): string[] {
- let result: string[] = []
- result = result.concat([1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(number => `https://2ch.hk/b/${number}.json`))
- result = result.concat([1, 2].map(number => `https://2ch.hk/po/${number}.json`))
- return result
- }
- getThreadsUrls(boardPageInfo: TBoardPageInfo): string[] {
- const result: string[] = []
- const boardId = boardPageInfo.board.id
- boardPageInfo.threads.forEach(thread => {
- result.push(`https://2ch.hk/${boardId}/res/${thread.thread_num}.json`)
- })
- return result
- }
- onBoardPageLoaded(data: TBoardPageInfo) {
- const threadsUrls = this.getThreadsUrls(data)
- this._threadGrabber.addItems(threadsUrls)
- }
- onThreadLoaded(data: TThreadInfo) {
- this.saveThread(data)
- this.savePosts(data)
- }
- private saveThread(boardInfo: TThreadInfo): void {
- const {current_thread, unique_posters, posts_count} = boardInfo
- if (!this._threadsCache.has(current_thread)) {
- Thread.create({
- thread_num: current_thread,
- board: boardInfo.board.id,
- }).catch(() => {
- // console.log(e)
- })
- this._threadsCache.add(current_thread)
- }
- let filesCount = 0
- boardInfo.threads[0].posts.forEach(post => {
- filesCount += post.files?.length || 0
- })
- ThreadMetrics.create({
- thread_num: current_thread,
- board: boardInfo.board.id,
- unique_posters,
- posts_count,
- views: boardInfo.threads[0].posts[0].views,
- files_count: filesCount
- }).catch((e) => {
- // console.log(e)
- })
- }
- private savePosts(data: TThreadInfo): void {
- const {current_thread, board} = data
- data.threads[0].posts.forEach(post => {
- if (!this._postsCache.has(post.num)) {
- Post.create({
- board: board.id,
- num: post.num,
- number: post.number,
- comment: post.comment,
- email: post.email,
- timestamp: new Date(post.timestamp * 1000),
- thread_num: current_thread,
- name: post.name,
- files: post.files?.length || 0,
- views: post.views,
- }).catch((e) => {
- // console.log(e)
- })
- this._postsCache.add(post.num)
- }
- })
- }
- }
- const r = new Robot()
Add Comment
Please, Sign In to add comment