Advertisement
Guest User

IMDB Scraper

a guest
Jun 3rd, 2010
260
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.74 KB | None | 0 0
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <scraper framework="1.1" date="2010-02-24" name="IMDb.com" content="movies" thumb="imdb.png" language="en">
  3. <include>common/imdb.xml</include>
  4. <include>common/tmdb.xml</include>
  5. <include>common/movieposterdb.xml</include>
  6. <include>common/impa.xml</include>
  7. <include>common/dtrailer.xml</include>
  8. <GetSettings dest="3">
  9. <RegExp input="$$5" output="&lt;settings&gt;\1&lt;/settings&gt;" dest="3">
  10. <RegExp input="$$1" output="&lt;setting label=&quot;Enable full cast credits&quot; type=&quot;bool&quot; id=&quot;fullcredits&quot; default=&quot;false&quot;&gt;&lt;/setting&gt;" dest="5">
  11. <expression/>
  12. </RegExp>
  13. <RegExp input="$$1" output="&lt;setting label=&quot;Enable fanart from themoviedb.org&quot; type=&quot;bool&quot; id=&quot;fanart&quot; default=&quot;true&quot;&gt;&lt;/setting&gt;" dest="5+">
  14. <expression/>
  15. </RegExp>
  16. <RegExp input="$$1" output="&lt;setting label=&quot;Enable posters from themoviedb.org&quot; type=&quot;bool&quot; id=&quot;tmdbthumbs&quot; default=&quot;true&quot;&gt;&lt;/setting&gt;" dest="5+">
  17. <expression/>
  18. </RegExp>
  19. <RegExp input="$$1" output="&lt;setting label=&quot;Enable posters from IMPAwards&quot; type=&quot;bool&quot; id=&quot;impawards&quot; default=&quot;false&quot;&gt;&lt;/setting&gt;" dest="5+">
  20. <expression/>
  21. </RegExp>
  22. <RegExp input="$$1" output="&lt;setting label=&quot;Enable posters from MoviePosterDB&quot; type=&quot;bool&quot; id=&quot;movieposterdb&quot; default=&quot;false&quot;&gt;&lt;/setting&gt;" dest="5+">
  23. <expression/>
  24. </RegExp>
  25. <RegExp input="$$1" output="&lt;setting label=&quot;Enable IMDb trailers&quot; type=&quot;bool&quot; id=&quot;trailer&quot; default=&quot;true&quot;&gt;&lt;/setting&gt;" dest="5+">
  26. <expression/>
  27. </RegExp>
  28. <RegExp input="$$1" output="&lt;setting label=&quot;Enable trailers from Dtrailer.com&quot; type=&quot;bool&quot; id=&quot;dtrailer&quot; default=&quot;false&quot;&gt;&lt;/setting&gt;" dest="5+">
  29. <expression/>
  30. </RegExp>
  31. <RegExp input="$$1" output="&lt;setting label=&quot;IMDb Poster &amp;amp; Actor Thumb(s) Size&quot; type=&quot;labelenum&quot; values=&quot;192|256|384|512|1024&quot; id=&quot;imdbscale&quot; default=&quot;512&quot;&gt;&lt;/setting&gt;" dest="5+">
  32. <expression/>
  33. </RegExp>
  34. <expression noclean="1"/>
  35. </RegExp>
  36. </GetSettings>
  37. <NfoUrl dest="3">
  38. <RegExp input="$$1" output="&lt;url&gt;http://www.\1/title/tt\2/&lt;/url&gt;&lt;id&gt;tt\2&lt;/id&gt;" dest="3">
  39. <expression clear="yes" noclean="1">(imdb.com)/Title\?([0-9]*)</expression>
  40. </RegExp>
  41. <RegExp input="$$1" output="&lt;url&gt;http://www.\1\2/&lt;/url&gt;&lt;id&gt;tt\2&lt;/id&gt;" dest="3+">
  42. <expression noclean="1">(imdb.com/title/tt)([0-9]*)</expression>
  43. </RegExp>
  44. </NfoUrl>
  45. <CreateSearchUrl dest="3" SearchStringEncoding="iso-8859-1">
  46. <RegExp input="$$1" output="&lt;url&gt;http://akas.imdb.com/find?s=tt;q=\1$$4&lt;/url&gt;" dest="3">
  47. <RegExp input="$$2" output="%20(\1)" dest="4">
  48. <expression clear="yes">(.+)</expression>
  49. </RegExp>
  50. <expression noclean="1"/>
  51. </RegExp>
  52. </CreateSearchUrl>
  53. <GetSearchResults dest="8">
  54. <RegExp input="$$5" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;iso-8859-1&quot; standalone=&quot;yes&quot;?&gt;&lt;results&gt;\1&lt;/results&gt;" dest="8">
  55. <RegExp input="$$1" output="\1" dest="7">
  56. <expression clear="yes">/title/([t0-9]*)/faq</expression>
  57. </RegExp>
  58. <RegExp input="$$1" output="&lt;entity&gt;&lt;title&gt;\1&lt;/title&gt;&lt;year&gt;\2&lt;/year&gt;&lt;url&gt;http://akas.imdb.com/title/$$7/&lt;/url&gt;&lt;id&gt;$$7&lt;/id&gt;&lt;/entity&gt;" dest="5">
  59. <expression clear="yes" noclean="1">&lt;meta name=&quot;title&quot; content=&quot;([^&quot;]*) \(([0-9]*)\)</expression>
  60. </RegExp>
  61. <RegExp input="$$1" output="\1" dest="4">
  62. <expression noclean="1">(&gt;&lt;a href=&quot;/title.*)</expression>
  63. </RegExp>
  64. <RegExp input="$$4" output="&lt;entity&gt;&lt;title&gt;\2&lt;/title&gt;&lt;year&gt;\3&lt;/year&gt;&lt;url&gt;http://akas.imdb.com/title/\1/&lt;/url&gt;&lt;id&gt;\1&lt;/id&gt;&lt;/entity&gt;" dest="5+">
  65. <expression repeat="yes" noclean="1,2">&gt;&lt;a href=&quot;/title/([t0-9]*)/[^&gt;]*&gt;([^&lt;]*)&lt;/a&gt; *\(([0-9]*)</expression>
  66. </RegExp>
  67. <expression clear="yes" noclean="1"/>
  68. </RegExp>
  69. </GetSearchResults>
  70. <GetDetails dest="3">
  71. <RegExp input="$$5" output="&lt;details&gt;\1&lt;/details&gt;" dest="3">
  72. <RegExp input="$$2" output="&lt;id&gt;\1&lt;/id&gt;" dest="5">
  73. <expression/>
  74. </RegExp>
  75. <RegExp input="$$1" output="&lt;title&gt;\1&lt;/title&gt;" dest="5+">
  76. <expression trim="1" noclean="1">&lt;h1&gt;([^&lt;]*)</expression>
  77. </RegExp>
  78. <RegExp input="$$1" output="&lt;year&gt;\1&lt;/year&gt;" dest="5+">
  79. <expression>a href=&quot;/year/([0-9]*)</expression>
  80. </RegExp>
  81. <RegExp input="$$1" output="&lt;top250&gt;\1&lt;/top250&gt;" dest="5+">
  82. <expression>Top 250: #([0-9]*)&lt;/a&gt;</expression>
  83. </RegExp>
  84. <RegExp input="$$1" output="&lt;mpaa&gt;\1&lt;/mpaa&gt;" dest="5+">
  85. <expression>MPAA&lt;/a&gt;:&lt;/h5&gt;\n&lt;div class=&quot;info-content&quot;&gt;\n(.[^&lt;]*)</expression>
  86. </RegExp>
  87. <RegExp input="$$1" output="&lt;certification&gt;\1 \3&lt;/certification&gt;" dest="5+">
  88. <expression repeat="yes">&lt;a href=&quot;/search/title\?certificates=[^&quot;]*&quot;&gt;([^&lt;]*)&lt;/a&gt;[^&lt;]*(&lt;i&gt;([^&lt;]*)&lt;/i&gt;)?</expression>
  89. </RegExp>
  90. <RegExp input="$$1" output="&lt;tagline&gt;\1&lt;/tagline&gt;" dest="5+">
  91. <expression>&lt;h5&gt;Tagline:&lt;/h5&gt;\n&lt;div class="info-content"&gt;([^&lt;]*)</expression>
  92. </RegExp>
  93. <RegExp input="$$1" output="&lt;runtime&gt;\1&lt;/runtime&gt;" dest="5+">
  94. <expression trim="1">&lt;h5&gt;Runtime:&lt;/h5&gt;[^0-9]*([^&lt;]*)</expression>
  95. </RegExp>
  96. <RegExp input="$$1" output="&lt;rating&gt;\1&lt;/rating&gt;&lt;votes&gt;\2&lt;/votes&gt;" dest="5+">
  97. <expression>&lt;b&gt;([0-9.]+)/10&lt;/b&gt;[^&lt;]*&lt;a href=&quot;ratings&quot; class=&quot;tn15more&quot;&gt;([0-9,]+) votes&lt;/a&gt;</expression>
  98. </RegExp>
  99. <RegExp input="$$1" output="&lt;genre&gt;\1&lt;/genre&gt;" dest="5+">
  100. <expression repeat="yes">&quot;/Sections/Genres/[^/]*/&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression>
  101. </RegExp>
  102. <RegExp input="$$1" output="&lt;studio&gt;\1&lt;/studio&gt;" dest="5+">
  103. <expression repeat="yes">&quot;/company/[^/]*/&quot;&gt;([^&lt;]*)&lt;/a&gt;</expression>
  104. </RegExp>
  105. <RegExp input="$$1" output="&lt;outline&gt;\1&lt;/outline&gt;&lt;plot&gt;\1&lt;/plot&gt;" dest="5+">
  106. <expression>Plot:&lt;/h5&gt;\n&lt;div class=&quot;info-content&quot;&gt;\n(.*?) \| &lt;a class=&quot;tn15more</expression>
  107. </RegExp>
  108. <RegExp input="$$2" output="&lt;url function=&quot;GetIMDBPlot&quot;&gt;$$3plotsummary&lt;/url&gt;" dest="5+">
  109. <expression/>
  110. </RegExp>
  111. <RegExp conditional="!fullcredits" input="$$2" output="&lt;url cache=&quot;$$2-credits.html&quot; function=&quot;GetIMDBCast&quot;&gt;$$3&lt;/url&gt;" dest="5+">
  112. <expression/>
  113. </RegExp>
  114. <RegExp conditional="!fullcredits" input="$$2" output="&lt;url cache=&quot;$$2-credits.html&quot; function=&quot;GetIMDBDirectors&quot;&gt;$$3&lt;/url&gt;" dest="5+">
  115. <expression/>
  116. </RegExp>
  117. <RegExp conditional="!fullcredits" input="$$2" output="&lt;url cache=&quot;$$2-credits.html&quot; function=&quot;GetIMDBWriters&quot;&gt;$$3&lt;/url&gt;" dest="5+">
  118. <expression/>
  119. </RegExp>
  120. <RegExp conditional="fullcredits" input="$$2" output="&lt;url cache=&quot;$$2-fullcredits.html&quot; function=&quot;GetIMDBCast&quot;&gt;$$3fullcredits&lt;/url&gt;" dest="5+">
  121. <expression/>
  122. </RegExp>
  123. <RegExp conditional="fullcredits" input="$$2" output="&lt;url cache=&quot;$$2-fullcredits.html&quot; function=&quot;GetIMDBDirectors&quot;&gt;$$3fullcredits&lt;/url&gt;" dest="5+">
  124. <expression/>
  125. </RegExp>
  126. <RegExp conditional="fullcredits" input="$$2" output="&lt;url cache=&quot;$$2-fullcredits.html&quot; function=&quot;GetIMDBWriters&quot;&gt;$$3fullcredits&lt;/url&gt;" dest="5+">
  127. <expression/>
  128. </RegExp>
  129. <RegExp conditional="tmdbthumbs" input="$$2" output="&lt;url function=&quot;GetTMDBThumbsById&quot;&gt;$$3&lt;/url&gt;" dest="5+">
  130. <expression/>
  131. </RegExp>
  132. <RegExp conditional="impawards" input="$$2" output="&lt;url cache=&quot;$$2-posters.html&quot; function=&quot;GetIMPALink&quot;&gt;$$3posters&lt;/url&gt;" dest="5+">
  133. <expression/>
  134. </RegExp>
  135. <RegExp conditional="movieposterdb" input="$$1" output="&lt;url function=&quot;GetMoviePosterDBLink&quot;&gt;http://www.movieposterdb.com/browse/search?type=movies&amp;amp;query=\1&lt;/url&gt;" dest="5+">
  136. <expression>/title/tt([t0-9]*)/faq</expression>
  137. </RegExp>
  138. <RegExp conditional="trailer" input="$$1" output="&lt;url function=&quot;GetIMDBTrailer&quot;&gt;http://akas.imdb.com/video/imdb/vi\1/player&lt;/url&gt;" dest="5+">
  139. <expression>/vi([0-9]*)/</expression>
  140. </RegExp>
  141. <RegExp conditional="dtrailer" input="$$6" output="&lt;url function=&quot;GetDTrailerLink&quot;&gt;http://en.dtrailer.com/movies/search/\1&lt;/url&gt;" dest="5+">
  142. <RegExp input="$$4" output="\1-" dest="6">
  143. <RegExp input="$$1" output="\1" dest="4">
  144. <expression trim="1" noclean="1">&lt;h1&gt;([^&lt;]*)</expression>
  145. </RegExp>
  146. <expression repeat="yes">([a-zA-Z0-9]+)</expression>
  147. </RegExp>
  148. <expression/>
  149. </RegExp>
  150. <RegExp input="$$2" output="&lt;url cache=&quot;$$2-posters.html&quot; function=&quot;GetIMDBThumbs&quot;&gt;$$3posters&lt;/url&gt;" dest="5+">
  151. <expression/>
  152. </RegExp>
  153. <RegExp conditional="fanart" input="$$2" output="&lt;url function=&quot;GetTMDBFanartById&quot;&gt;$$3&lt;/url&gt;" dest="5+">
  154. <expression/>
  155. </RegExp>
  156. <expression noclean="1"/>
  157. </RegExp>
  158. </GetDetails>
  159. </scraper>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement