Advertisement
huutho_96

Script

Jan 17th, 2019
561
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. // Đếm số lượng domain được quét trong danh sach theo tung loai
  2. db.getCollection('medusa_crawlUrls').aggregate([
  3.     {$group: {_id: "$domainId"}},
  4.     {$lookup: {from: "medusa_domains", localField: "_id", foreignField: "_id", as: "domain"}},
  5.     {$unwind: "$domain"},
  6.     {$project: {_id: "$domain._id", author: "$domain.author"}},
  7.     {$group: {_id: "$author", total: {$sum: 1}, rows: {$push: "$$ROOT"}}}
  8. ], {allowDiskUse: true})
  9. // Out: 2 => 114, 1 => 778, 3 => 3057
  10.  
  11.  
  12.  
  13. // Dem so link ket thuc boi hacked, hac9 thuoc danh sach cua anh Duc
  14. db.getCollection('medusa_crawlUrls').aggregate([
  15.     {$match: {url: new RegExp(/hacked\.html|hac9\.html/)}},
  16.     {$group: {_id: "$domainId", total: {$sum: 1}, rows: {$push: {url: "$url", _id: "$_id"}}}},
  17.     {$sort: {total: -1}},
  18.     {$lookup: {from: "medusa_domains", localField: "_id", foreignField: "_id", as: "domain"}},
  19.     {$unwind: "$domain"},
  20.     {$match: {"domain.author": 2}},
  21.     {$project: {total: 1, rows: 1}},
  22.     {$group: {_id: null, total: {$sum: 1}, rows: {$push: "$$ROOT"}}}
  23. ], {allowDiskUse: true})
  24. // Out: 104
  25.  
  26.  
  27.  
  28.  
  29. // Cac cau query ap dung trong danh sach 5K site viet nam. Thuc te so lieu duoc thong ke chua dat duoc muc 5K
  30. // xCache
  31. db.getCollection('medusa_crawlUrls').aggregate([
  32.     {$match: {xCache: {$ne: null}}},
  33.     {$group: {_id: {domainId: "$domainId", xCache: "$xCache"}}},
  34.     {$lookup: {from: "medusa_domains", localField: "_id.domainId", foreignField: "_id", as: "domain"}},
  35.     {$unwind: "$domain"},
  36.     {$match: {"domain.author": {$in: [2, 3]}}},
  37.     {$project: {domain: "$domain.domain", _id: 0, xCache: "$_id.xCache"}},
  38.     {$group: {_id: "$xCache", total: {$sum: 1}, domains: {$push: "$domain"}}},
  39.     {$sort: {total: -1}}
  40. ], {allowDiskUse:true})
  41.  
  42.  
  43. // cacheControl
  44. db.getCollection('medusa_crawlUrls').aggregate([
  45.     {$match: {cacheControl: {$ne: null}}},
  46.     {$group: {_id: {domainId: "$domainId", cacheControl: "$cacheControl"}}},
  47.     {$lookup: {from: "medusa_domains", localField: "_id.domainId", foreignField: "_id", as: "domain"}},
  48.     {$unwind: "$domain"},
  49.     {$match: {"domain.author": {$in: [2, 3]}}},
  50.     {$project: {domain: "$domain.domain", _id: 0, cacheControl: "$_id.cacheControl"}},
  51.     {$group: {_id: "$cacheControl", total: {$sum: 1}, domains: {$push: "$domain"}}},
  52.     {$sort: {total: -1}}
  53. ], {allowDiskUse:true})
  54.  
  55.  
  56. // domain
  57. db.getCollection('medusa_crawlUrls').aggregate([
  58.     {$match: {server: {$ne: null}}},
  59.     {$group: {_id: {domainId: "$domainId", server: "$server"}}},
  60.     {$lookup: {from: "medusa_domains", localField: "_id.domainId", foreignField: "_id", as: "domain"}},
  61.     {$unwind: "$domain"},
  62.     {$match: {"domain.author": {$in: [2, 3]}}},
  63.     {$project: {domain: "$domain.domain", _id: 0, server: "$_id.server"}},
  64.     {$group: {_id: "$server", total: {$sum: 1}, domains: {$push: "$domain"}}},
  65.     {$sort: {total: -1}}
  66. ], {allowDiskUse:true})
  67.  
  68.  
  69.  
  70. db.getCollection('medusa_crawlUrls').aggregate([
  71.     {$match: {"vector.iframe": {$gt: 0}}},
  72.     {$group: {_id: "$domain._id", rows: {$push: "$$ROOT"}}}
  73. ], {allowDiskUse:true})
  74.  
  75.  
  76.  
  77. // So domain chua hinh anh nhay cam
  78. db.getCollection('medusa_crawlUrls').aggregate([
  79.     {$unwind: "$images"},
  80.     {$match: {"images.highlight": true}},
  81.     {$group: {_id: "$_id", domainId: {$first: "$domainId"}}},
  82.     {$group: {_id: "$domainId", total: {$sum: 1}}},
  83.     {$group: {_id: null, total: {$sum: "$total"}}}
  84. ], {allowDiskUse: true})
  85.  
  86.  
  87. // So domain chua tu khoa nhay cam
  88. db.getCollection('medusa_crawlUrls').aggregate([
  89.     {$unwind: "$sensitiveStrings"},
  90.     {$group: {_id: "$_id", domainId: {$first: "$domainId"}}},
  91.     {$group: {_id: "$domainId", total: {$sum: 1}}},
  92.     {$lookup: {from: "medusa_domains", localField: "_id", foreignField: "_id", as: "domain"}},
  93.     {$unwind: "$domain"},
  94.     {$group: {_id: "$domain.author", records: {$push: "$$ROOT"}, total: {$sum: 1}}}
  95. ], {allowDiskUse: true})
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement