Guest User

Untitled

a guest
Feb 9th, 2023
106
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import {Database, PostgresConnector} from 'https://deno.land/x/[email protected]/mod.ts'
  2. import { TBoardPageInfo, TThreadInfo } from './types.ts'
  3. import {Post, Thread, ThreadMetrics} from './models.ts'
  4.  
  5. const connection = new PostgresConnector({
  6.     host: 'localhost',
  7.     username: 'root',
  8.     password: 'root',
  9.     database: 'test_db',
  10. })
  11. const db = new Database(connection)
  12. db.link([Post, Thread, ThreadMetrics])
  13.  
  14. try {
  15.     await db.sync({drop: false})
  16. } catch (e) {}
  17.  
  18. // Stupid executor jast to return result os the address
  19. export class Grabber<T> {
  20.     isRunning = false
  21.     queue: string[] = []
  22.     callbacks: Array<(data: T) => void> = []
  23.  
  24.  
  25.     wait() {
  26.         return new Promise((resolve) => {
  27.             setTimeout(() => {
  28.                 resolve(this.queue)
  29.             }, 1000)
  30.         })
  31.     }
  32.  
  33.     start() {
  34.         this.isRunning = true
  35.         this.loop()
  36.     }
  37.     stop() {
  38.         this.isRunning = false
  39.     }
  40.  
  41.     addItems(url: string | string[]) {
  42.         if (Array.isArray(url)) {
  43.             this.queue.push(...url)
  44.         } else {
  45.             this.queue.push(url)
  46.         }
  47.     }
  48.  
  49.     addCallback(callback: (data: T) => void) {
  50.         this.callbacks.push(callback)
  51.     }
  52.    
  53.     async loop() {
  54.         await this.wait()
  55.         if (!this.isRunning) { this.loop() }
  56.  
  57.         const url = this.getUrl()
  58.         const data = url && await this.load(url)
  59.  
  60.         data && this.callbacks.forEach(callback => callback(data))
  61.  
  62.         this.loop()
  63.     }
  64.  
  65.     getUrl(): string | undefined {
  66.         return this.queue.shift()
  67.     }
  68.  
  69.     async load(url: string): Promise<T | undefined> {
  70.         let result
  71.         try {
  72.             result = await fetch(url)
  73.         } catch (e) {
  74.             console.log(e)
  75.             return
  76.         }
  77.        
  78.         if (!result || result.status !== 200) {
  79.             result?.status && console.log(result.status, url)
  80.             return
  81.         }
  82.         try {
  83.             const json = await result.json()
  84.             return json as T
  85.         } catch (e) {
  86.             console.log(e, result)
  87.             // throw new Error('JSON parse error');
  88.         }
  89.     }
  90. }
  91.  
  92. export class Robot {
  93.     private _postsCache: Set<number> = new Set()
  94.     private _threadsCache: Set<number> = new Set()
  95.     private _boardGrabber: Grabber<TBoardPageInfo> = new Grabber<TBoardPageInfo>()
  96.     private _threadGrabber: Grabber<TThreadInfo> = new Grabber<TThreadInfo>()
  97.  
  98.     constructor() {
  99.         this._boardGrabber.addCallback(this.onBoardPageLoaded.bind(this))
  100.         this._threadGrabber.addCallback(this.onThreadLoaded.bind(this))
  101.  
  102.         this._boardGrabber.start()
  103.         this._threadGrabber.start()
  104.  
  105.         this.resentUrls()
  106.  
  107.         // Медлыееные доски
  108.         setTimeout(() => {
  109.             this._threadGrabber.addItems([
  110.                 'https://2ch.hk/biz/res/1007022.json',
  111.                 'https://2ch.hk/cc/res/229275.json'
  112.             ])
  113.             this._boardGrabber.addItems(
  114.                 [1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(number => `https://2ch.hk/dev/${number}.json`)
  115.             )
  116.         }, 1000 * 60 * 100)
  117.  
  118.         // Быстрые доски
  119.         setInterval(this.resentUrls.bind(this), 1000 * 60 * 5)
  120.     }
  121.  
  122.     resentUrls() {
  123.         const queueLen = this._threadGrabber.queue.length
  124.         console.log('queueLen', queueLen)
  125.  
  126.         if (this._postsCache.size > 30000) {
  127.             this._postsCache.clear()
  128.         }
  129.         if(this._threadsCache.size > 3000) {
  130.             this._threadsCache.clear()
  131.         }
  132.  
  133.         this._boardGrabber.addItems(this.getBoardPagesUrls())
  134.     }
  135.  
  136.     getBoardPagesUrls(): string[] {
  137.         let result: string[] = []
  138.  
  139.         result = result.concat([1, 2, 3, 4, 5, 6, 7, 8, 9, 10].map(number => `https://2ch.hk/b/${number}.json`))
  140.         result = result.concat([1, 2].map(number => `https://2ch.hk/po/${number}.json`))
  141.        
  142.         return result
  143.     }
  144.  
  145.     getThreadsUrls(boardPageInfo: TBoardPageInfo): string[] {
  146.         const result: string[] = []
  147.         const boardId = boardPageInfo.board.id
  148.         boardPageInfo.threads.forEach(thread => {
  149.             result.push(`https://2ch.hk/${boardId}/res/${thread.thread_num}.json`)
  150.         })
  151.  
  152.         return result
  153.     }
  154.  
  155.     onBoardPageLoaded(data: TBoardPageInfo) {
  156.         const threadsUrls = this.getThreadsUrls(data)
  157.  
  158.         this._threadGrabber.addItems(threadsUrls)
  159.     }
  160.  
  161.     onThreadLoaded(data: TThreadInfo) {
  162.         this.saveThread(data)
  163.         this.savePosts(data)
  164.     }
  165.  
  166.     private saveThread(boardInfo: TThreadInfo): void {
  167.         const {current_thread, unique_posters, posts_count} = boardInfo
  168.         if (!this._threadsCache.has(current_thread)) {
  169.             Thread.create({
  170.                 thread_num: current_thread,
  171.                 board: boardInfo.board.id,
  172.             }).catch(() => {
  173.                 // console.log(e)
  174.             })
  175.  
  176.             this._threadsCache.add(current_thread)
  177.         }
  178.  
  179.         let filesCount = 0
  180.         boardInfo.threads[0].posts.forEach(post => {
  181.             filesCount += post.files?.length || 0
  182.         })
  183.  
  184.         ThreadMetrics.create({
  185.             thread_num: current_thread,
  186.             board: boardInfo.board.id,
  187.             unique_posters,
  188.             posts_count,
  189.             views: boardInfo.threads[0].posts[0].views,
  190.             files_count: filesCount
  191.         }).catch((e) => {
  192.             // console.log(e)
  193.         })
  194.     }
  195.  
  196.     private savePosts(data: TThreadInfo): void {
  197.         const {current_thread, board} = data
  198.        
  199.         data.threads[0].posts.forEach(post => {
  200.             if (!this._postsCache.has(post.num)) {
  201.                 Post.create({
  202.                     board: board.id,
  203.                     num: post.num,
  204.                     number: post.number,
  205.                     comment: post.comment,
  206.                     email: post.email,
  207.                     timestamp: new Date(post.timestamp * 1000),
  208.                     thread_num: current_thread,
  209.                     name: post.name,
  210.                     files: post.files?.length || 0,
  211.                     views: post.views,
  212.                 }).catch((e) => {
  213.                     // console.log(e)
  214.                 })
  215.    
  216.                 this._postsCache.add(post.num)
  217.             }
  218.         })
  219.     }
  220. }
  221.  
  222. const r = new Robot()
Add Comment
Please, Sign In to add comment