Advertisement
Cyb3r_h4ck3r

#OpUk dailymail.co.uk Robot.txt by Team IHC

Jan 25th, 2015
1,821
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.41 KB | None | 0 0
  1. #Team IHC #OpUk
  2. # Robots.txt for http://www.dailymail.co.uk/
  3. # TS-3260
  4.  
  5. # All Robots
  6. User-agent: *
  7.  
  8. # Begin Article Rules
  9. Disallow: /*/article-1051921/*
  10. Disallow: /*/article-1292332/*
  11. Disallow: /*/article-1311976/*
  12. Disallow: /*/article-1328592/*
  13. Disallow: /*/article-1342043/*
  14. Disallow: /*/article-1364992/*
  15. Disallow: /*/article-1367053/*
  16. Disallow: /*/article-1338655/*
  17. Disallow: /*/article-1251554/*
  18. Disallow: /*/article-1348967/*
  19. Disallow: /*/article-2016346/*
  20. Disallow: /*/article-2020536/*
  21. Disallow: /money/markets/article-2024243/Soci-t-G-n-rale.html
  22. Disallow: /*/article-2044583/*
  23. Disallow: /*/article-1363836/*
  24. Disallow: /*/article-2061072/*
  25. Disallow: /*/article-2056855/*
  26. Disallow: /*/article-2077814/*
  27. Disallow: /*/article-2102168/*
  28.  
  29.  
  30. # Begin Standard Rules
  31. Disallow: /*reportAbuseInComment.html?
  32. Disallow: /*pageOffset
  33. Disallow: /*previousThread.html
  34. Disallow: /*createThread.html
  35. Disallow: /*emailArticle.html$
  36. Disallow: /*nextThread.html
  37. Disallow: /*readLater.html$
  38. Disallow: /*myStories.html$
  39. Disallow: /*logout?redirectPath=
  40. Disallow: /*login?redirectPath=
  41. Disallow: /*createThread.html
  42. Disallow: /*refer_product.php?
  43. Disallow: /*startIndex=
  44. Disallow: /*pageSize
  45. Disallow: /*?start=
  46. Disallow: /SITE=DM/
  47. Disallow: /js
  48. Disallow: /*debateUserSearch.html
  49. Disallow: /*debateSearchResults.html
  50. Disallow: /*debateTagSearch.html
  51. Disallow: /*textbased/channel
  52. Disallow: /*goto.php?
  53. Disallow: /*?printingPage=true$
  54. Disallow: /news/ireland/
  55. Disallow: /home/scotland/
  56. Disallow: /*redirect.php?
  57. Disallow: /*searchPhrase=*
  58. Disallow: /*threadIndex=*
  59. Disallow: /*reportAbuse.html?messageId=*
  60. Disallow: /*questionId*
  61. Disallow: /*competitionId=*
  62. Disallow: /*no-text*
  63. Disallow: /*topGallery*
  64. Disallow: /*selectedImage*
  65. Disallow: /sudoku*
  66. Disallow: /ce/item.cms*
  67. Disallow: /mailshopQA/*
  68.  
  69. # Begin ACAP Rules
  70. ACAP-crawler: *
  71. ACAP-disallow-crawl: /*pageOffset
  72. ACAP-disallow-crawl: /*previousThread.html$
  73. ACAP-disallow-crawl: /*createThread.html$
  74. ACAP-disallow-crawl: /*emailArticle.html$
  75. ACAP-disallow-crawl: /*nextThread.html$
  76. ACAP-disallow-crawl: /*readLater.html$
  77. ACAP-disallow-crawl: /*myStories.html$
  78. ACAP-disallow-crawl: /*logout?redirectPath=
  79. ACAP-disallow-crawl: /*login?redirectPath=
  80. ACAP-disallow-crawl: /*createThread.html
  81. ACAP-disallow-crawl: /*reportAbuse.html
  82. ACAP-disallow-crawl: /*refer_product.php?
  83. ACAP-disallow-crawl: /*startIndex=
  84. ACAP-disallow-crawl: /*pageSize
  85. ACAP-disallow-crawl: /*?start=
  86. ACAP-disallow-crawl: /SITE=DM/
  87. ACAP-disallow-crawl: /js
  88. ACAP-disallow-crawl: /*debateUserSearch.html
  89. ACAP-disallow-crawl: /*debateSearchResults.html
  90. ACAP-disallow-crawl: /*debateTagSearch.html
  91. ACAP-disallow-crawl: /*textbased/channel
  92. ACAP-disallow-crawl: /*goto.php?
  93. ACAP-disallow-crawl: /*reportAbuseInComment.html?
  94. ACAP-disallow-crawl: /*?printingPage=true$
  95. ACAP-disallow-crawl: /news/ireland/
  96. ACAP-disallow-crawl: /home/scotland/
  97. ACAP-disallow-crawl: /*redirect.php?
  98. ACAP-disallow-crawl: /*searchPhrase=*
  99. ACAP-disallow-crawl: /*threadIndex=*
  100. ACAP-disallow-crawl: /*reportAbuse.html?messageId=*
  101. ACAP-disallow-crawl: /*questionId*
  102. ACAP-disallow-crawl: /*competitionId=*
  103. ACAP-disallow-crawl: /*no-text*
  104. ACAP-disallow-crawl: /*topGallery*
  105. ACAP-disallow-crawl: /*selectedImage*
  106. ACAP-disallow-crawl: /sudoku*
  107. ACAP-disallow-crawl: /ce/item.cms*
  108. ACAP-disallow-crawl: /mailshopQA/*
  109.  
  110. # Disallow Money
  111. Disallow: /money/
  112. ACAP-disallow-crawl: /money/
  113.  
  114. # Allow Adsense
  115. User-agent: Mediapartners-Google
  116. ACAP-crawler: Mediapartners-Google
  117. Disallow:
  118.  
  119. # Disallow Specific Robots
  120. User-agent: Meltwater
  121. Disallow: /
  122.  
  123. User-agent: omgilibot/0.3
  124. Disallow: /
  125.  
  126. User-agent: WebVac
  127. Disallow: /
  128.  
  129. User-agent: WebZip
  130. Disallow: /
  131.  
  132. User-agent: psbot
  133. Disallow: /
  134.  
  135.  
  136. # Disallow Specific Robots ACAP
  137. ACAP-crawler: Meltwater
  138. ACAP-disallow-crawl: /
  139.  
  140. ACAP-crawler: omgilibot/0.3
  141. ACAP-disallow-crawl: /
  142.  
  143. ACAP-crawler: WebVac
  144. ACAP-disallow-crawl: /
  145.  
  146. ACAP-crawler: WebZip
  147. ACAP-disallow-crawl: /
  148.  
  149. ACAP-crawler: psbot
  150. ACAP-disallow-crawl: /
  151.  
  152.  
  153. # Sitemap Files
  154. Sitemap: http://www.dailymail.co.uk/newssitemap.xml
  155. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2012.xml
  156. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2011.xml
  157. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2010.xml
  158. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2009.xml
  159. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2008.xml
  160. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2007.xml
  161. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2006.xml
  162. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2005.xml
  163. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2004.xml
  164. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2003.xml
  165. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2002.xml
  166. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2001.xml
  167. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~2000.xml
  168. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~1999.xml
  169. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~1998.xml
  170. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~1997.xml
  171. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~1996.xml
  172. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~1995.xml
  173. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~1994.xml
  174. Sitemap: http://www.dailymail.co.uk/sitemap-articles-year~1993.xml
  175. Sitemap: http://www.dailymail.co.uk/videositemap.xml
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement