Advertisement
Guest User

Untitled

a guest
Nov 7th, 2014
248
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 20.40 KB | None | 0 0
  1. <?php
  2.  
  3. /**************************************************************************
  4. * Add more spiders to your database *
  5. ***************************************************************************/
  6. /*MyBB Plugin: Video Link v 0.1
  7. ---------------------------------------------------------------------------------------------------------
  8. http://www.edidesign.ca
  9. ---------------------------------------------------------------------------------------------------------
  10. Author : Thor2705
  11. http://www.edidesign.ca
  12. Please do not remove the links from this file. Please report any sites that are not working on mybb comunity forum
  13. */
  14.  
  15. if(!defined("IN_MYBB"))
  16. {
  17. die("Direct initialization of this file is not allowed.<br /><br />Please make sure IN_MYBB is defined.");
  18. }
  19.  
  20. $plugins->add_hook("admin_config_spiders_add", "addbot");
  21.  
  22. function addbot_info() {
  23. return array(
  24. "name" => "Add Spiders",
  25. "description" => "Add more spiders to your data base",
  26.  
  27. "website" => "http://www.edidesign.ca",
  28.  
  29. "author" => "Thor2705",
  30.  
  31. "authorsite" => "http://www.edidesign.ca",
  32.  
  33. "version" => "0.1",
  34.  
  35. "guid" => "66cc039962c0412fadbb48be13e96e37",
  36.  
  37. "compatibility" => "14*",
  38. );
  39. }
  40.  
  41.  
  42.  
  43.  
  44.  
  45. function addbot_activate()
  46. {
  47.  
  48. // Globals
  49. global $db;
  50.  
  51. $addSpider = array(
  52. // The big spiders detected by MyBB by default Website Description
  53. /* array(
  54. 'name' => 'Google',
  55. 'useragent' => 'Googlebot'), //http://www.google.com The main spider used by Google
  56. array(
  57. 'name' => 'Msn',
  58. 'useragent' => 'MsnBot'
  59. ), */ //http://www.msn.com The main spider used by MSN
  60. /* array(
  61. 'name' => 'Yahoo!',
  62. 'useragent' => 'Slurp'), */ //http://www.yahoo.com Worlds most aggressive spider
  63.  
  64. // MAJOR spiders Website Description
  65. array(
  66. 'name' => 'Ask',
  67. 'useragent' => 'Teoma'), // http://www.ask.com Spider for Ask Search Engine
  68. array(
  69. 'name' => 'Baidu',
  70. 'useragent' => 'Baiduspider'), // http://www.baidu.com Spider for Chinese search engine
  71. array(
  72. 'name' => 'GigaBot',
  73. 'useragent' => 'Gigabot'), // http://www.gigablast.com Another heavily travelled spider
  74. array(
  75. 'name' => 'Google-AdSense',
  76. 'useragent' => 'Mediapartners-Google'), // http://www.google.com Spider related to Adsense/Adwords
  77. array(
  78. 'name' => 'Google-Adwords',
  79. 'useragent' => 'AdsBot-Google'), // http://www.google.com Spider related to Adwords
  80. array(
  81. 'name' => 'Google-SA',
  82. 'useragent' => 'gsa-crawler'), // http://www.google.com Google Search Appliance Spider
  83. array(
  84. 'name' => 'Google-Image',
  85. 'useragent' => 'Googlebot-Image'), // http://www.google.com Spider for google image search
  86. array(
  87. 'name' => 'InternetArchive',
  88. 'useragent' => 'ia_archiver-web.archive.org'),// http://www.archive.org Way back When machine Spider
  89. array(
  90. 'name' => 'Alexa',
  91. 'useragent' => 'ia_archiver'), // http://www.alexa.com *Must be detected after Internet Archive
  92. array(
  93. 'name' => 'Omgili',
  94. 'useragent' => 'omgilibot'), // http://www.omgili.com Extremely aggressive Messageboard/forum Spider
  95. array(
  96. 'name' => 'Speedy Spider',
  97. 'useragent' => 'Speedy Spider'), // http://www.entireweb.com Entire web spider
  98. array(
  99. 'name' => 'Yahoo',
  100. 'useragent' => 'yahoo'), // http://www.yahoo.com For Yahoo Publisher Network (a variety in use)
  101. array(
  102. 'name' => 'Yahoo JP',
  103. 'useragent' => 'Y!J'), // http://www.yahoo.co.jp Spider for Yahoo Japan
  104.  
  105. // Checkers/Testers/Robots Website Description
  106. array(
  107. 'name' => 'DeadLinksChecker',
  108. 'useragent' => 'link validator'), // http://www.dead-links.com/ Checks your site for dead/bad links
  109. array(
  110. 'name' => 'W3C Validator',
  111. 'useragent' => 'W3C_Validator'), // http://validator.w3.org Checks standards validity of any html/xhtml page
  112. array(
  113. 'name' => 'W3C CSSValidator',
  114. 'useragent' => 'W3C_CSS_Validator'), // http://jigsaw.w3.org/css-validator/ Checks standards validity of css stylesheets
  115. array(
  116. 'name' => 'W3C FeedValidator',
  117. 'useragent' => 'FeedValidator'), // http://validator.w3.org/feed/ Checks standards validity of atom/rss feeds
  118. array(
  119. 'name' => 'W3C LinkChecker',
  120. 'useragent' => 'W3C-checklink'), // http://validator.w3.org/checklink Checks links on any html/xhtml page are valid
  121. array(
  122. 'name' => 'W3C mobileOK',
  123. 'useragent' => 'W3C-mobileOK'), // http://www.w3.org/2006/07/mobileok-ddc Checks page for how good it is for mobiles
  124. array(
  125. 'name' => 'W3C P3PValidator',
  126. 'useragent' => 'P3P Validator'), // http://www.w3.org/P3P/validator.html Checks something??
  127.  
  128. // Feed readers Website Description
  129. array(
  130. 'name' => 'Bloglines',
  131. 'useragent' => 'Bloglines'), // http://www.bloglines.com Spider for blog/rich web content (owned by Ask)
  132. array(
  133. 'name' => 'Feedburner',
  134. 'useragent' => 'Feedburner'), // http://www.feedburner.com Another RSS feed reader
  135.  
  136. // Website Thumbnail/Snapshot/Thumbshot takers Website Description
  137. array(
  138. 'name' => 'SnapBot',
  139. 'useragent' => 'Snapbot'), // http://www.snap.com Shapshots provider
  140. array(
  141. 'name' => 'Picsearch',
  142. 'useragent' => 'psbot'), // http://www.picsearch.com Picture/Image Search Engine
  143. array(
  144. 'name' => 'Websnapr',
  145. 'useragent' => 'Websnapr'), // http://www.websnapr.com Snapshot/site screenshot taker
  146.  
  147. // More MINOR Spiders/Robots Website Description
  148. array(
  149. 'name' => 'AllTheWeb',
  150. 'useragent' => 'FAST-WebCrawler'), // http://www.alltheweb.com Spider for alltheweb (now owned by Yahoo)
  151. array(
  152. 'name' => 'Altavista',
  153. 'useragent' => 'Scooter'), // http://www.altavista.com Another Major Search Engine spider
  154. array(
  155. 'name' => 'Asterias',
  156. 'useragent' => 'asterias'), // http://www.aol.com Media Spider
  157. array(
  158. 'name' => '192bot',
  159. 'useragent' => '192.comAgent'), // http://www.192.com Spider to index for 192.com
  160. array(
  161. 'name' => 'AbachoBot',
  162. 'useragent' => 'ABACHOBot'), // http://www.abacho.com Spider for multi language search engine/translator
  163. array(
  164. 'name' => 'Abdcatos',
  165. 'useragent' => 'abcdatos'), // http://www.abcdatos.com/botlink/ Spider for Italian Search Engine
  166. array(
  167. 'name' => 'Acoon',
  168. 'useragent' => 'Acoon'), // http://www.acoon.de Spider for small search engine
  169. array(
  170. 'name' => 'Accoona',
  171. 'useragent' => 'Accoona'), // http://www.accoona.com Spider for Accoona
  172. array(
  173. 'name' => 'BecomeBot',
  174. 'useragent' => 'BecomeBot'), // http://www.become.com Shopping/Products type search engine
  175. array(
  176. 'name' => 'BlogRefsBot',
  177. 'useragent' => 'BlogRefsBot'), // http://www.blogrefs.com/about/bloggers Blogs related spider
  178. array(
  179. 'name' => 'Daumoa',
  180. 'useragent' => 'Daumoa'), // http://ws.daum.net/aboutkr.html South Korean Search Engine Spider
  181. array(
  182. 'name' => 'DuckDuckBot',
  183. 'useragent' => 'DuckDuckBot'), // http://duckduckgo.com/duckduckbot.html Spider for small search engine
  184. array(
  185. 'name' => 'Exabot',
  186. 'useragent' => 'Exabot'), // http://www.exalead.com Spider for small search engine
  187. array(
  188. 'name' => 'Furl',
  189. 'useragent' => 'Furlbot'), // http://www.furl.net Spider for Furl social bookmarking site
  190. array(
  191. 'name' => 'FyperSpider',
  192. 'useragent' => 'FyberSpider'), // http://www.fybersearch.com Spider for Small Search Engine
  193. array(
  194. 'name' => 'Geona',
  195. 'useragent' => 'GeonaBot'), // http://www.geona.com Spider for another small search engine
  196. array(
  197. 'name' => 'GirafaBot',
  198. 'useragent' => 'Girafabot'), // http://www.girafa.com/ Thumbshot provider
  199. array(
  200. 'name' => 'GoSeeBot',
  201. 'useragent' => 'GoSeeBot'), // http://www.gosee.com/bot.html Spider for small search engine
  202. array(
  203. 'name' => 'Ichiro',
  204. 'useragent' => 'ichiro'), // http://help.goo.ne.jp/door/crawler.html Spider for Japanese search engine
  205. array(
  206. 'name' => 'LapozzBot',
  207. 'useragent' => 'LapozzBot'), // http://www.lapozz.hu Spider for Hungarian search engine
  208. array(
  209. 'name' => 'Looksmart',
  210. 'useragent' => 'WISENutbot'), // http://www.looksmart.com Spider related to advertising
  211. array(
  212. 'name' => 'Lycos',
  213. 'useragent' => 'Lycos_Spider'), // http://www.lycos.com Spider for search engine
  214. array(
  215. 'name' => 'Majestic12',
  216. 'useragent' => 'MJ12bot/v2'), // http://www.majestic12.co.uk/ Distributed Search Engine Project
  217. array(
  218. 'name' => 'MLBot',
  219. 'useragent' => 'MLBot'), // http://www.metadatalabs.com/ Media indexing spider
  220. array(
  221. 'name' => 'MSRBOT',
  222. 'useragent' => 'msrbot'), // http://research.microsoft.com/research/sv/msrbot/ Microsoft Research bot
  223. array(
  224. 'name' => 'MSR-ISRCCrawler',
  225. 'useragent' => 'MSR-ISRCCrawler'), // http://www.microsoft.com/research/ Another Microsoft Research bot
  226. array(
  227. 'name' => 'Naver',
  228. 'useragent' => 'NaverBot'), // http://www.naver.com South Korean Search Engine Spider
  229. array(
  230. 'name' => 'Naver',
  231. 'useragent' => 'Yeti'), // http://www.naver.com Another NaverBot for the South Korean Search Engine
  232. array(
  233. 'name' => 'NoxTrumBot',
  234. 'useragent' => 'noxtrumbot'), // http://www.noxtrum.com Spider for Spanish search engine
  235. array(
  236. 'name' => 'OmniExplorer',
  237. 'useragent' => 'OmniExplorer_Bot'), // http://www.omni-explorer.com/ Spider
  238. array(
  239. 'name' => 'OnetSzukaj',
  240. 'useragent' => 'OnetSzukaj'), // http://szukaj.onet.pl Polish Search Engine Spider
  241. array(
  242. 'name' => 'ScrubTheWeb',
  243. 'useragent' => 'Scrubby'), // http://www.scrubtheweb.com Spider for Scrub the web
  244. array(
  245. 'name' => 'SearchSight',
  246. 'useragent' => 'SearchSight'), // http://www.searchsite.com Another search engine
  247. array(
  248. 'name' => 'Seeqpod',
  249. 'useragent' => 'Seeqpod'), // http://www.seeqpod.com Spider for search engine (the google for mp3 files)
  250. array(
  251. 'name' => 'Shablast',
  252. 'useragent' => 'ShablastBot'), // http://www.shablast.com Spider for a small search engine
  253. array(
  254. 'name' => 'SitiDiBot',
  255. 'useragent' => 'SitiDiBot'), // http://www.sitidi.net Spider for italian Sitidi search engine
  256. array(
  257. 'name' => 'Slider',
  258. 'useragent' => 'silk/1.0'), // http://www.slider.com Spider for Slider, but it only spiders DMOZ entries
  259. array(
  260. 'name' => 'Sogou',
  261. 'useragent' => 'Sogou'), // http://www.sogou.com Spider for Chinese search engine
  262. array(
  263. 'name' => 'Sosospider',
  264. 'useragent' => 'Sosospider'), // http://help.soso.com/webspider.htm Non-english search engine
  265. array(
  266. 'name' => 'StackRambler',
  267. 'useragent' => 'StackRambler'), // http://www.rambler.ru/doc/robots.shtml Spider for Russian portal/search engine
  268. array(
  269. 'name' => 'SurveyBot',
  270. 'useragent' => 'SurveyBot'), // http://www.domaintools.com Probe for website statistics (WhoIs Source)
  271. array(
  272. 'name' => 'Touche',
  273. 'useragent' => 'Touche'), // http://www.touche.com.ve Another small search engine
  274. array(
  275. 'name' => 'Walhello',
  276. 'useragent' => 'appie'), // http://www.wahello.com/ Spider for wahello
  277. array(
  278. 'name' => 'WebAlta',
  279. 'useragent' => 'WebAlta'), // http://www.webalta.net Russian Search Engine
  280. array(
  281. 'name' => 'Wisponbot',
  282. 'useragent' => 'wisponbot'), // http://www.wispon.com Korean Search Engine
  283. array(
  284. 'name' => 'YacyBot',
  285. 'useragent' => 'yacybot'), // http://www.yacy.com Crawler for distributed search engine
  286. array(
  287. 'name' => 'YodaoBot',
  288. 'useragent' => 'YodaoBot'), // http://www.yodao.com Spider for Chinese Search Engine
  289.  
  290. // Google-Wanna-Be's - Spiders/Robots for Startups Website Description
  291. array(
  292. 'name' => 'Charlotte',
  293. 'useragent' => 'Charlotte'), // http://www.searchme.com/support/ Spider for new search engine (in beta)
  294. array(
  295. 'name' => 'DiscoBot',
  296. 'useragent' => 'DiscoBot'), // http://discoveryengine.com/discobot.html Spider for new search engine startup
  297. array(
  298. 'name' => 'EnaBot',
  299. 'useragent' => 'EnaBot'), // http://www.enaball.com/crawler.html Experimental new spider
  300. array(
  301. 'name' => 'Gaisbot',
  302. 'useragent' => 'Gaisbot'), // http://gais.cs.ccu.edu.tw/robot.php Spider for search engine startup
  303. array(
  304. 'name' => 'Kalooga',
  305. 'useragent' => 'kalooga'), // http://www.kalooga.com Spider for new media search engine (in beta)
  306. array(
  307. 'name' => 'ScoutJet',
  308. 'useragent' => 'ScoutJet'), // http://www.scoutjet.com/ Spider for new search engine (by the DMOZ founders)
  309. array(
  310. 'name' => 'TinEye',
  311. 'useragent' => 'TinEye'), // http://tineye.com/crawler.html Spider for search engine startup
  312. array(
  313. 'name' => 'Twiceler',
  314. 'useragent' => 'twiceler'), // http://www.cuill.com/twiceler/robot.html Experimental Spider, (aggressive)
  315.  
  316. // Software Website Description
  317. array(
  318. 'name' => 'GSiteCrawler',
  319. 'useragent' => 'GSiteCrawler'), // http://www.gsitecrawler.com/ Windows Based Sitemap Generator Software
  320. array(
  321. 'name' => 'HTTrack',
  322. 'useragent' => 'HTTrack'), // http://www.httrack.com HTTrack Website Copier - Offline Browser
  323. array(
  324. 'name' => 'Wget',
  325. 'useragent' => 'Wget'), // http://www.gnu.org/software/wget/ GNU software to retrieve files
  326. // Reason for detecting these: They can be very intensive. So seeing them in use, enables you to block if necessary.
  327.  
  328. );
  329.  
  330.  
  331. // Grab all the existing spiders to match against
  332. $query = $db->query("SELECT useragent
  333. FROM ".TABLE_PREFIX."spiders");
  334. /* $db->query('', '
  335. SELECT useragent
  336. FROM mybb_spiders',
  337. array()
  338. );
  339. */
  340. $knownspiders = $db->fetch_array($query);
  341. // $a = mysql_num_rows($request);
  342. /* if (db_num_rows($request) != "false")
  343. {
  344. // Store all found spiders in an array
  345. while ($row = $db->fetch_array($request))
  346. $knownspiders[] = $row['useragent'];
  347. }
  348. */
  349. // Now go through spider in the db
  350. foreach($addSpider as $spider)
  351. {
  352. // If doesn't already exist in the table, then add it
  353. if(!in_array($spider['useragent'], $knownspiders))
  354. {
  355. // Now add each spider
  356. $spiders_group = array(
  357. 'name' => addslashes($spider['name']),
  358. 'useragent' => addslashes($spider['useragent']),
  359. );
  360. $db->insert_query("spiders", $spiders_group);
  361. }
  362. }
  363.  
  364. //Unset everything
  365. unset($addSpider, $spider, $knownspiders);
  366.  
  367. }
  368. function addbot()
  369. {
  370. }
  371. function addbot_deactivate()
  372. {
  373. global $db;
  374. $addSpider = array(
  375. // The big spiders detected by MyBB by default
  376. // array('Google', 'Googlebot'),
  377. // array('Msn', 'MsnBot'),
  378. // array('Yahoo!', 'Slurp'),
  379. // MAJOR spiders
  380. array(
  381. 'name' => 'Ask',
  382. 'useragent' => 'Teoma'),
  383. array(
  384. 'name' => 'Baidu',
  385. 'useragent' => 'Baiduspider'),
  386. array(
  387. 'name' => 'GigaBot',
  388. 'useragent' => 'Gigabot'),
  389. array(
  390. 'name' => 'Google-AdSense',
  391. 'useragent' => 'Mediapartners-Google'),
  392. array(
  393. 'name' => 'Google-Adwords',
  394. 'useragent' => 'AdsBot-Google'),
  395. array(
  396. 'name' => 'Google-SA',
  397. 'useragent' => 'gsa-crawler'),
  398. array(
  399. 'name' => 'Google-Image',
  400. 'useragent' => 'Googlebot-Image'),
  401. array(
  402. 'name' => 'InternetArchive',
  403. 'useragent' => 'ia_archiver-web.archive.org'),
  404. array(
  405. 'name' => 'Alexa',
  406. 'useragent' => 'ia_archiver'),
  407. array(
  408. 'name' => 'Omgili',
  409. 'useragent' => 'omgilibot'),
  410. array(
  411. 'name' => 'Speedy Spider',
  412. 'useragent' => 'Speedy Spider'),
  413. array(
  414. 'name' => 'Yahoo',
  415. 'useragent' => 'yahoo'),
  416. array(
  417. 'name' => 'Yahoo JP',
  418. 'useragent' => 'Y!J'),
  419. // Checkers/Testers/Robots
  420. array(
  421. 'name' => 'DeadLinksChecker',
  422. 'useragent' => 'link validator'),
  423. array(
  424. 'name' => 'W3C Validator',
  425. 'useragent' => 'W3C_Validator'),
  426. array(
  427. 'name' => 'W3C CSSValidator',
  428. 'useragent' => 'W3C_CSS_Validator'),
  429. array(
  430. 'name' => 'W3C FeedValidator',
  431. 'useragent' => 'FeedValidator'),
  432. array(
  433. 'name' => 'W3C LinkChecker',
  434. 'useragent' => 'W3C-checklink'),
  435. array(
  436. 'name' => 'W3C mobileOK',
  437. 'useragent' => 'W3C-mobileOK'),
  438. array(
  439. 'name' => 'W3C P3PValidator',
  440. 'useragent' => 'P3P Validator'),
  441.  
  442. // Feed readers
  443. array(
  444. 'name' => 'Bloglines',
  445. 'useragent' => 'Bloglines'),
  446. array(
  447. 'name' => 'Feedburner',
  448. 'useragent' => 'Feedburner'),
  449. // Website Thumbnail/Snapshot/Thumbshot takers
  450. array(
  451. 'name' => 'SnapBot',
  452. 'useragent' => 'Snapbot'),
  453. array(
  454. 'name' => 'Picsearch',
  455. 'useragent' => 'psbot'),
  456. array(
  457. 'name' => 'Websnapr',
  458. 'useragent' => 'Websnapr'),
  459. // More MINOR Spiders/Robots
  460. array(
  461. 'name' => 'AllTheWeb',
  462. 'useragent' => 'FAST-WebCrawler'),
  463. array(
  464. 'name' => 'Altavista',
  465. 'useragent' => 'Scooter'),
  466. array(
  467. 'name' => 'Asterias',
  468. 'useragent' => 'asterias'),
  469. array(
  470. 'name' => '192bot',
  471. 'useragent' => '192.comAgent'),
  472. array(
  473. 'name' => 'AbachoBot',
  474. 'useragent' => 'ABACHOBot'),
  475. array(
  476. 'name' => 'Abdcatos',
  477. 'useragent' => 'abcdatos'),
  478. array(
  479. 'name' => 'Acoon',
  480. 'useragent' => 'Acoon'),
  481. array(
  482. 'name' => 'Accoona',
  483. 'useragent' => 'Accoona'),
  484. array(
  485. 'name' => 'BecomeBot',
  486. 'useragent' => 'BecomeBot'),
  487. array(
  488. 'name' => 'BlogRefsBot',
  489. 'useragent' => 'BlogRefsBot'),
  490. array(
  491. 'name' => 'Daumoa',
  492. 'useragent' => 'Daumoa'),
  493. array(
  494. 'name' => 'DuckDuckBot',
  495. 'useragent' => 'DuckDuckBot'),
  496. array(
  497. 'name' => 'Exabot',
  498. 'useragent' => 'Exabot'),
  499. array(
  500. 'name' => 'Furl',
  501. 'useragent' => 'Furlbot'),
  502. array(
  503. 'name' => 'FyperSpider',
  504. 'useragent' => 'FyberSpider'),
  505. array(
  506. 'name' => 'Geona',
  507. 'useragent' => 'GeonaBot'),
  508. array(
  509. 'name' => 'GirafaBot',
  510. 'useragent' => 'Girafabot'),
  511. array(
  512. 'name' => 'GoSeeBot',
  513. 'useragent' => 'GoSeeBot'),
  514. array(
  515. 'name' => 'Ichiro',
  516. 'useragent' => 'ichiro'),
  517. array(
  518. 'name' => 'LapozzBot',
  519. 'useragent' => 'LapozzBot'),
  520. array(
  521. 'name' => 'Looksmart',
  522. 'useragent' => 'WISENutbot'),
  523. array(
  524. 'name' => 'Lycos',
  525. 'useragent' => 'Lycos_Spider'),
  526. array(
  527. 'name' => 'Majestic12',
  528. 'useragent' => 'MJ12bot/v2'),
  529. array(
  530. 'name' => 'MLBot',
  531. 'useragent' => 'MLBot'),
  532. array(
  533. 'name' => 'MSRBOT',
  534. 'useragent' => 'msrbot'),
  535. array(
  536. 'name' => 'MSR-ISRCCrawler',
  537. 'useragent' => 'MSR-ISRCCrawler'),
  538. array(
  539. 'name' => 'Naver',
  540. 'useragent' => 'NaverBot'),
  541. array(
  542. 'name' => 'Naver',
  543. 'useragent' => 'Yeti'),
  544. array(
  545. 'name' => 'NoxTrumBot',
  546. 'useragent' => 'noxtrumbot'),
  547. array(
  548. 'name' => 'OmniExplorer',
  549. 'useragent' => 'OmniExplorer_Bot'),
  550. array(
  551. 'name' => 'OnetSzukaj',
  552. 'useragent' => 'OnetSzukaj'),
  553. array(
  554. 'name' => 'ScrubTheWeb',
  555. 'useragent' => 'Scrubby'),
  556. array(
  557. 'name' => 'SearchSight',
  558. 'useragent' => 'SearchSight'),
  559. array(
  560. 'name' => 'Seeqpod',
  561. 'useragent' => 'Seeqpod'),
  562. array(
  563. 'name' => 'Shablast',
  564. 'useragent' => 'ShablastBot'),
  565. array(
  566. 'name' => 'SitiDiBot',
  567. 'useragent' => 'SitiDiBot'),
  568. array(
  569. 'name' => 'Slider',
  570. 'useragent' => 'silk/1.0'),
  571. array(
  572. 'name' => 'Sogou',
  573. 'useragent' => 'Sogou'),
  574. array(
  575. 'name' => 'Sosospider',
  576. 'useragent' => 'Sosospider'),
  577. array(
  578. 'name' => 'StackRambler',
  579. 'useragent' => 'StackRambler'),
  580. array(
  581. 'name' => 'SurveyBot',
  582. 'useragent' => 'SurveyBot'),
  583. array(
  584. 'name' => 'Touche',
  585. 'useragent' => 'Touche'),
  586. array(
  587. 'name' => 'Walhello',
  588. 'useragent' => 'appie'),
  589. array(
  590. 'name' => 'WebAlta',
  591. 'useragent' => 'WebAlta'),
  592. array(
  593. 'name' => 'Wisponbot',
  594. 'useragent' => 'wisponbot'),
  595. array(
  596. 'name' => 'YacyBot',
  597. 'useragent' => 'yacybot'),
  598. array(
  599. 'name' => 'YodaoBot',
  600. 'useragent' => 'YodaoBot'),
  601. // Google-Wanna-Be's - Spiders/Robots for Startups
  602. array(
  603. 'name' => 'Charlotte',
  604. 'useragent' => 'Charlotte'),
  605. array(
  606. 'name' => 'DiscoBot',
  607. 'useragent' => 'DiscoBot'),
  608. array(
  609. 'name' => 'EnaBot',
  610. 'useragent' => 'EnaBot'),
  611. array(
  612. 'name' => 'Gaisbot',
  613. 'useragent' => 'Gaisbot'),
  614. array(
  615. 'name' => 'Kalooga',
  616. 'useragent' => 'kalooga'),
  617. array(
  618. 'name' => 'ScoutJet',
  619. 'useragent' => 'ScoutJet'),
  620. array(
  621. 'name' => 'TinEye',
  622. 'useragent' => 'TinEye'),
  623. array(
  624. 'name' => 'Twiceler',
  625. 'useragent' => 'twiceler'),
  626. // Software
  627. array(
  628. 'name' => 'GSiteCrawler',
  629. 'useragent' => 'GSiteCrawler'),
  630. array(
  631. 'name' => 'HTTrack',
  632. 'useragent' => 'HTTrack'),
  633. array(
  634. 'name' => 'Wget',
  635. 'useragent' => 'Wget'),
  636. // Reason for detecting these: They can be very intensive. So seeing them in use, enables you to block if necessary.
  637.  
  638. );
  639.  
  640.  
  641. // Now go through spider in the mo
  642. foreach($addSpider as $spider)
  643. {
  644.  
  645.  
  646. // Now delete each spider
  647. $spiders_group = array(
  648.  
  649. 'name' => addslashes($spider['name']),
  650.  
  651. );
  652. $db->write_query("DELETE FROM ".TABLE_PREFIX."spiders WHERE name='".$spiders_group['name']."'");
  653.  
  654.  
  655. }
  656. }
  657.  
  658. ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement