Advertisement
Guest User

xbmc jav heyzo scraper - modified from laoyang @ github

a guest
Oct 25th, 2014
203
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
XML 3.30 KB | None | 0 0
  1. <?xml version="1.0" encoding="UTF-8"?>
  2. <scraper framework="1.1" date="2014-10-25" name="jav" content="movies" language="en">
  3.     <CreateSearchUrl SearchStringEncoding="utf-8" dest="2">
  4.         <RegExp input="$$1" output="&lt;url&gt;http://www.heyzo.com/moviepages/\1/index.html&lt;/url&gt;" dest="2">
  5.             <expression clear="yes" noclean="1" encode="1">([0-9]{4})</expression>
  6.         </RegExp>
  7.     </CreateSearchUrl>
  8.     <GetSearchResults clearbuffers="no" dest="4">
  9.         <RegExp input="$$3" output="&lt;?xml version=&quot;1.0&quot; encoding=&quot;UTF-8&quot; standalone=&quot;yes&quot;?&gt;&lt;results&gt;&lt;entity&gt;\1&lt;/entity&gt;&lt;/results&gt;" dest="4">
  10.             <RegExp input="$$1" output="&lt;title&gt;\1&lt;/title&gt;" dest="3">
  11.                 <expression>&lt;title\b[^&gt;]*&gt;(.*?)&lt;/title&gt;</expression>
  12.             </RegExp>
  13.             <RegExp input="$$1" output="&lt;url&gt;$$2&lt;/url&gt;" dest="3+">
  14.                 <expression/>
  15.             </RegExp>
  16.             <expression noclean="1"/>
  17.         </RegExp>
  18.     </GetSearchResults>
  19.     <GetDetails dest="5">
  20.         <RegExp input="$$8" output="&lt;details&gt;\1&lt;/details&gt;" dest="5">
  21.             <RegExp input="$$1" output="&lt;title&gt;\1&lt;/title&gt;" dest="8">
  22.                 <expression trim="1">&lt;h1&gt;(.*?)\s*-.*?&lt;/h1&gt;</expression>
  23.             </RegExp>
  24.             <RegExp input="$$1" output="&lt;year&gt;\1&lt;/year&gt;" dest="8+">
  25.                 <expression>&lt;span class=&quot;dataInfo&quot;&gt;\s*?([0-9]{4})-[0-9]{2}-[0-9]{2}</expression>
  26.             </RegExp>
  27.             <RegExp input="$$1" output="&lt;thumb&gt;http://www.heyzo.com\1001.jpg&lt;/thumb&gt;&lt;thumb&gt;http://www.heyzo.com\1002.jpg&lt;/thumb&gt;&lt;thumb&gt;http://www.heyzo.com\1003.jpg&lt;/thumb&gt;&lt;thumb&gt;http://www.heyzo.com\1004.jpg&lt;/thumb&gt;&lt;thumb&gt;http://www.heyzo.com\1005.jpg&lt;/thumb&gt;" dest="8+">
  28.                 <expression>(/contents/3000/[0-9]{4}/gallery/)</expression>
  29.             </RegExp>
  30.             <RegExp input="$$1" output="&lt;studio&gt;Heyzo&lt;/studio&gt;" dest="8+">
  31.                 <expression/>
  32.             </RegExp>
  33.             <RegExp input="$$3" output="&lt;genre&gt;\1&lt;/genre&gt;" dest="8+">
  34.                 <RegExp input="$$1" output="\1" dest="3">
  35.                     <expression noclean="1">&lt;div class=&quot;tagkeyword&quot;&gt;&lt;span&gt;タグキーワード&lt;/span&gt;&lt;/div&gt;(.*?&lt;/ul&gt;)</expression>
  36.                 </RegExp>
  37.                 <expression repeat="yes">&lt;li&gt;(.*?)&lt;/li&gt;</expression>
  38.             </RegExp>
  39.             <RegExp input="$$1" output="&lt;actor&gt;&lt;thumb spoof=&quot;http://www.heyzo.com&quot;&gt;http://www.heyzo.com/actorprofile/3000/0\1/profile.jpg&lt;/thumb&gt;&lt;name&gt;\2 &lt;/name&gt;&lt;/actor&gt;" dest="8+">
  40.                 <expression>&lt;span class=&quot;dataInfo&quot;&gt;\s*&lt;a href=&quot;/listpages/actor_([0-9]+)_1.html\?sort=pop&quot; title=&quot;&quot;&gt;(.*?)&lt;/a&gt;</expression>
  41.             </RegExp>
  42.             <RegExp input="$$1" output="&lt;plot&gt;\1&lt;/plot&gt;" dest="8+">
  43.                 <expression trim="1">&lt;p class=&quot;memo&quot;&gt;(.*?)&lt;</expression>
  44.             </RegExp>
  45.             <RegExp input="$$1" output="&lt;fanart&gt;&lt;thumb&gt;\1.jpg&lt;/thumb&gt;&lt;/fanart&gt;" dest="8+">
  46.                 <expression>(http://www.heyzo.com/contents/3000/[0-9]+/images/player_thumbnail)_450.jpg</expression>
  47.             </RegExp>
  48.             <RegExp input="$$1" output="&lt;set&gt;\1&lt;/set&gt;" dest="9">
  49.                 <expression>&lt;a href=&quot;/listpages/series_[0-9]+_1.html&quot;&gt;(.*?)&lt;/a&gt;</expression>
  50.             </RegExp>
  51.             <expression noclean="1"/>
  52.         </RegExp>
  53.     </GetDetails>
  54. </scraper>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement