Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /**
- * OFDB Parser
- *
- * Parses data from the OFDB
- *
- * @package Engines
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @link http://www.ofdb.de
- * @version $Id: ofdb.php,v 1.20 2009/02/28 12:09:50 andig2 Exp $
- */
- $GLOBALS['ofdbServer'] = 'http://www.ofdb.de';
- $GLOBALS['ofdbIdPrefix'] = 'ofdb:';
- /**
- * Get meta information about the engine
- *
- * @todo Include image search capabilities etc in meta information
- */
- function ofdbMeta()
- {
- return array(
- 'name' => 'OFDB (de)'
- , 'stable' => 1
- , 'supportsEANSearch' => 1
- );
- }
- /**
- * Get search Url for OfDB
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @param string The search string
- * @return string The search URL (GET)
- */
- function ofdbSearchUrl($title, $searchType = 'title')
- {
- global $ofdbServer;
- // auto switch to ean Mode if title is exactly 13 digits
- if (preg_match('#^\s*[0-9]{13}\s*$#',$title)) $searchType = 'ean';
- $url = $ofdbServer.'/view.php?page=suchergebnis&SText='.urlencode($title);
- switch($searchType)
- {
- default :
- case 'text': {
- $url = $url.'&Kat=All'; break;
- }
- case 'ean' : {
- $url = $url.'&Kat=EAN'; break;
- }
- }
- return $url;
- }
- /**
- * Get content overview URL
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @param string $id The movie's external id
- * @return string The visit URL
- */
- function ofdbContentUrl($id)
- {
- global $ofdbServer;
- global $ofdbIdPrefix;
- $id = preg_replace('/^'.$ofdbIdPrefix.'/', '', $id);
- list($id, $vid) = split("-", $id, 2);
- return $ofdbServer.'/view.php?page=film&fid='.$id;
- }
- /**
- * Get content detail URL
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @param string $id The movie's external id
- * @return string The visit URL
- */
- function ofdbDetailUrl($id)
- {
- global $ofdbServer;
- return $ofdbServer.'/view.php?page=film_detail&fid='.$id;
- }
- /**
- * Get explicit version URL
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @param string $id The movie's external id
- * @param string $vid The movie's version id
- * @return string The visit URL
- */
- function ofdbVersionUrl($id, $vid)
- {
- global $ofdbServer;
- return $ofdbServer.'/view.php?page=fassung&fid='.$id.'&vid='.$vid;
- }
- /**
- * Get content description URL
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @param string $id The movie's external id
- * @param string $sid The movie's description id
- * @return string The visit URL
- */
- function ofdbDescriptionUrl($id, $sid)
- {
- global $ofdbServer;
- return $ofdbServer.'/view.php?page=inhalt&fid='.$id.'&sid='.$sid;
- }
- /**
- * Search a Movie
- *
- * Searches for a given title on the OfDB and returns the found links in
- * an array
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @param string The search string
- * @return array Associative array with id and title
- */
- function ofdbSearch($title, $searchType = 'title')
- {
- global $ofdbServer;
- global $ofdbIdPrefix;
- global $CLIENTERROR;
- global $cache;
- // auto switch to ean Mode if title is exactly 13 digits
- if (preg_match('#^\s*[0-9]{13}\s*$#',$title)) $searchType = 'ean';
- // search for series
- $resp = httpClient(ofdbSearchUrl($title, $searchType), $cache);
- if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
- # dump($resp);
- // add encoding
- $ary['encoding'] = engine_get_encoding($resp);
- // !! it seems OFDB is lying in this case- claiming UTF 8 but returning ISO-8859-1; so lets fix it
- $ary['encoding'] = 'iso-8859-1';
- if (preg_match_all('/<br>[0-9]+\.\s*<a href="film\/([0-9]+),[^"]*" onmouseover="[^"]*"[^>]*>([^<]*)<font.*?\/font> \(([\/\-0-9]+)\)<\/a>/', $resp['data'], $data, PREG_SET_ORDER))
- {
- foreach ($data as $row) {
- $info['id'] = $ofdbIdPrefix.$row[1];
- $info['title'] = trim($row[2]).' ('.$row[3].')';
- $ary[] = $info;
- }
- }
- if (preg_match_all('/<br>[0-9]+\.\s*<a href="film\/([0-9]+),[^"]*" onmouseover="[^"]*"><b>([^<]*)<.*?<a href="view\.php\?page=fassung.*?fid=[0-9]+.*?vid=([0-9]+)" onmouseover="[^"]*">([^<]*)</i', $resp['data'], $data, PREG_SET_ORDER))
- {
- foreach ($data as $row) {
- $info['id'] = $ofdbIdPrefix.$row[1]."-".$row[3];
- $info['title'] = trim($row[2]).' - '.$row[4];
- $ary[] = $info;
- }
- }
- // do not return an array which contains only an encoding attribute
- if (count($ary) < 2) return array();
- return $ary;
- }
- /**
- * Fetches the data for a given OfDB id
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @param int OfDB id
- * @return array Result data
- */
- function ofdbData($id)
- {
- global $CLIENTERROR;
- global $ofdbServer;
- global $ofdbIdPrefix;
- global $cache;
- $id = preg_replace('/^'.$ofdbIdPrefix.'/', '', $id);
- list($id, $vid) = split("-", $id, 2);
- $data = array(); //result
- $ary = array(); //temp
- $ary2 = array(); //temp2
- // Fetch Mainpage
- $resp = httpClient(ofdbContentUrl($id), $cache);
- if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
- @@ //with utf8 encoding the parser will work with german mutated vowels
- @@ $resp['data'] = utf8_encode($resp['data']);
- // add encoding
- $data['encoding'] = engine_get_encoding($resp);
- // add engine ID -> important for non edit.php refetch
- $data['imdbID'] = $ofdbIdPrefix.$id;
- $resp['data'] = preg_replace('/[\r\n\t]/',' ', $resp['data']);
- // Titles / Year
- preg_match('/<title>(.*?)<\/title>/i', $resp['data'], $ary);
- $ary[1] = preg_replace('/^OFDb[\s-]*/', '', $ary[1]);
- $ary[1] = preg_replace('/\[.*\]/', ' ', $ary[1]);
- if (preg_match('/\(([0-9]*)\)/i',$ary[1],$ary2))
- {
- $data['year'] = trim($ary2[1]);
- }
- $ary[1] = preg_replace('/\([0-9]*\)/', ' ', $ary[1]);
- $ary[1] = preg_replace('/\s{2,}/s', ' ', $ary[1]);
- // check if there is a comma sperated article at the end
- if (preg_match('#(.*),\s*(A|The|Der|Die|Das|Ein|Eine|Einer)\s*$#i',$ary[1],$subRes)) {
- $ary[1] = $subRes[2].' '.$subRes[1];
- }
- list($t,$s) = split(" - ",trim($ary[1]),2);
- $data['title'] = trim($t);
- $data['subtitle'] = trim($s);
- // Original Title
- if (preg_match('/Originaltitel.*?<b>(.*?)</i', $resp['data'], $ary))
- {
- $data['orgtitle'] .= trim($ary[1]);
- }
- // Country
- if (preg_match('/>Herstellungsland:.*?<b><a.*?>(.*?)<\/a>/i', $resp['data'], $ary))
- {
- $data['country'] .= trim($ary[1]);
- }
- // Rating
- if (preg_match('/<br>Note:\s*([0-9\.]+)/', $resp['data'], $ary)) {
- $data['rating'] = $ary[1];
- }
- // Cover URL
- if (preg_match('#<img src="(http://img.ofdb.de/film/.*?\.jpg)"#i', $resp['data'], $ary))
- {
- $data['coverurl'] = trim($ary[1]);
- }
- // Fetch first VID if none already selected
- if (!$vid)
- {
- if (preg_match_all('/view\.php\?page=fassung&fid='.$id.'&vid=([0-9]+)".*?class="Klein">(.*?)</i', $resp['data'], $ary, PREG_SET_ORDER))
- {
- foreach($ary as $row)
- {
- if (trim($row[2]) == "K" || trim($row[2]) == "KV") // Check if there is a good result
- {
- $vid=$row[1];
- break;
- }
- }
- if (!$vid) // Still empty -> Take the first one
- {
- $vid=$ary[1][1];
- }
- }
- }
- // IMDB ID
- $data['imdbID'] = $ofdbIdPrefix."$id-$vid";
- @@ //Fixed plot
- @@ //Not the best way, but it works fine ;)
- @@ preg_match('#href="(plot/[^"]+)"#i', $resp['data'], $ary);
- @@ $plotTMP = httpClient("http://www.ofdb.de/".$ary[1]);
- @@ $plotTMP['data'] = utf8_encode($plotTMP['data']);
- @@ //print_r($plotTMP['data']);
- @@
- @@ $start = strpos($plotTMP['data'],"</b><br><br>");
- @@ $end = strpos($plotTMP['data'],"</font></p>");
- @@ $length = $end - $start;
- @@ $plotContent = substr($plotTMP['data'],$start,$length);
- @@ $plotContent = str_replace("</b>","",$plotContent);
- @@ $plotContent = str_replace("<br>","",$plotContent);
- @@ $data['plot'] = trim($plotContent);
- @@/*
- @@ // Fetch Plot
- @@ if (preg_match('#href="(plot/[^"]+)"#i', $resp['data'], $ary))
- @@ {
- @@ $subresp = httpClient($ofdbServer.'/'.$ary[1], $cache);
- @@ if (!$resp['success']) $CLIENTERROR .= $subresp['error']."\n";
- @@ $subresp['data'] = preg_replace('/[\r\n\t]/',' ', $subresp['data']);
- @@ //ofdbDbg($subresp['data'],false);
- @@ if (preg_match('#</b><br><br>(.*?)</font></p>#i', $subresp['data'], $ary))
- @@ {
- @@
- @@ $ary[1] = preg_replace('/\s{2,}/s', ' ', $ary[1]);
- @@ $ary[1] = preg_replace('#<(br|p)[ /]*>#i', "\n", $ary[1]);
- @@ $data['plot'] = trim($ary[1]);
- @@ //$data['plot'] = "aeääääaaaä";
- @@ }
- @@ }
- @@*/
- // Fetch Details
- $resp = httpClient(ofdbDetailUrl($id), $cache);
- if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
- $resp['data'] = preg_replace('/[\r\n\t]/',' ', $resp['data']);
- // Director
- if (preg_match('/<b><i>Regie<\/i><\/b>.*?<table.*?>(.*?)<\/table>/i', $resp['data'], $ary))
- {
- if (preg_match_all('/class="Daten"><a.*?>(.*?)<\/a>/i',$ary[1],$ary2, PREG_SET_ORDER))
- {
- foreach ($ary2 as $row)
- {
- $data['director'] .= trim($row[1]).', ';
- }
- $data['director'] = preg_replace('/, $/', '', $data['director']);
- }
- }
- // Cast
- if (preg_match('/<b><i>Darsteller<\/i><\/b>.*?<table.*?>(.*)<\/table>/', $resp['data'], $ary))
- {
- // dirty workaround for (.*?) failed on very long match groups issue (tested at PHP 5.2.5.5)
- // e.g.: ofdb:7749-111320 (Angel - Jäger der Finsternis)
- $ary[1] = preg_replace('#</table.*#','',$ary[1]);
- if (preg_match_all('/class="Daten"><a(.*?)">(.*?)<\/a>.*?<\/td> <td.*?<\/td> <td[^>]*>(.*?)<\/td>/i',$ary[1],$ary2, PREG_SET_ORDER))
- {
- foreach ($ary2 as $row)
- {
- $actor = trim(strip_tags($row[2]));
- $actorid = "";
- if (!empty($row[1]))
- {
- if (preg_match('#href="view.php\?page=person&id=([0-9]*)#i', $row[1], $idAry))
- {
- $actorid = $ofdbIdPrefix.$idAry[1];
- }
- }
- $character = "";
- if (!empty($row[3]))
- {
- if (preg_match('#class="Normal">... ([^<]*)<#i', $row[3], $charAry))
- {
- $character = trim(strip_tags($charAry[1]));
- }
- }
- $data['cast'] .= "$actor::$character::$actorid\n";
- }
- }
- }
- // Genres
- $genres = array(
- 'Amateur' => '',
- 'Eastern' => '',
- 'Experimentalfilm' => '',
- 'Mondo' => '',
- 'Kampfsport' => 'Sport',
- 'Biographie' => 'Biography',
- 'Katastrophen' => 'Thriller',
- 'Krimi' => 'Crime',
- 'Science-Fiction' => 'Sci-Fi',
- 'Kinder-/Familienfilm' => 'Family',
- 'Dokumentation' => 'Documentary',
- 'Action' => 'Action',
- 'Drama' => 'Drama',
- 'Abenteuer' => 'Adventure',
- 'Historienfilm' => 'History',
- 'Kurzfilm' => 'Short',
- 'Liebe/Romantik' => 'Romance',
- 'Heimatfilm' => 'Romance',
- 'Grusel' => 'Horror',
- 'Horror' => 'Horror',
- 'Erotik' => 'Adult',
- 'Hardcore' => 'Adult',
- 'Sex' => 'Adult',
- 'Musikfilm' => 'Musical',
- 'Animation' => 'Animation',
- 'Fantasy' => 'Fantasy',
- 'Trash' => 'Horror',
- 'Komödie' => 'Comedy',
- 'Krieg' => 'War',
- 'Mystery' => 'Mystery',
- 'Thriller' => 'Thriller',
- 'Tierfilm' => 'Documentary',
- 'Western' => 'Western',
- 'TV-Serie' => '',
- 'TV-Mini-Serie' => '',
- 'Sportfilm' => 'Sport',
- 'Splatter' => 'Horror',
- 'Manga/Anime' => 'Animation'
- );
- if (preg_match('/>Genre\(s\)\:.*?<b>(.*?)<\/b>/i', $resp['data'], $ary))
- {
- if (preg_match_all('/<a.*?>(.*?)<\/a>/i',$ary[1],$ary2, PREG_SET_ORDER))
- {
- foreach($ary2 as $row) {
- $genre = trim(html_entity_decode($row[1]));
- $genre = strip_tags($genre);
- if (!$genre) continue;
- if (isset($genres[$genre])) $data['genres'][] = $genres[$genre];
- }
- }
- }
- // Fetch Version
- $resp = httpClient(ofdbVersionUrl($id, $vid), $cache);
- if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
- $resp['data'] = preg_replace('/[\r\n\t]/',' ', $resp['data']);
- // FSK
- $fsks = array(
- 'FSK o.A.' => '0',
- 'FSK 6' => '6',
- 'FSK 12' => '12',
- 'FSK 16' => '16',
- 'FSK 18' => '18',
- 'Keine Jugendfreigabe' => '18',
- 'SPIO/JK' => '18',
- 'juristisch geprüft' => '',
- 'ungeprüft' => ''
- );
- if (preg_match('/>Freigabe:<.*?<b>(.*?)<\/tr>/i', $resp['data'], $ary))
- {
- $fsk = trim(html_entity_decode($ary[1]));
- $fsk = strip_tags($fsk);
- if (isset($fsks[$fsk])) $data['fsk'] = $fsks[$fsk];
- }
- // Languages
- // Languages (as Array)
- $laguages = array(
- 'arabisch' => 'arabic',
- 'bulgarisch' => 'bulgarian',
- 'chinesisch' => 'chinese',
- 'tschechisch' => 'czech',
- 'dänisch' => 'danish',
- 'holändisch' => 'dutch',
- 'englisch' => 'english',
- 'französisch' => 'french',
- 'deutsch' => 'german',
- 'griechisch' => 'greek',
- 'ungarisch' => 'hungarian',
- 'isländisch' => 'icelandic',
- 'indisch' => 'indian',
- 'israelisch' => 'israeli',
- 'italienisch' => 'italian',
- 'japanisch' => 'japanese',
- 'koreanisch' => 'korean',
- 'norwegisch' => 'norwegian',
- 'polnisch' => 'polish',
- 'portugisisch' => 'portuguese',
- 'rumänisch' => 'romanian',
- 'russisch' => 'russian',
- 'serbisch' => 'serbian',
- 'spanisch' => 'spanish',
- 'schwedisch' => 'swedish',
- 'thailändisch' => 'thai',
- 'türkisch' => 'turkish',
- 'vietnamesisch' => 'vietnamese',
- 'kantonesisch' => 'cantonese',
- 'katalanisch' => 'catalan',
- 'zypriotisch' => 'cypriot',
- 'zyprisch' => 'cypriot',
- 'esperanto' => 'esperanto',
- 'gälisch' => 'gaelic',
- 'hebräisch' => 'hebrew',
- 'hindi' => 'hindi',
- 'jüdisch' => 'jewish',
- 'lateinisch' => 'latin',
- 'mandarin' => 'mandarin',
- 'serbokroatisch' => 'serbo-croatian',
- 'somalisch' => 'somali'
- );
- $lang_list = array();
- if (preg_match('/>Tonformat:<.*?<b>(.*?)<\/b>/i', $resp['data'], $ary) &&
- preg_match_all('/(\w+)(\s|<).*?br>/si', $ary[1], $langs, PREG_PATTERN_ORDER))
- {
- foreach($langs[1] as $language) {
- $language = trim(strtolower($language));
- $language = html_entity_decode(strip_tags($language));
- $language = preg_replace('/\s+$/','',$language);
- if (!$language) continue;
- if (isset($laguages[$language])) $language = $laguages[$language];
- else continue;
- if (!$language) continue;
- $lang_list[] = $language;
- }
- $data['language'] = trim(join(', ', array_unique($lang_list)));
- }
- // Runtime
- if (preg_match('/>Laufzeit:<.*?<b>(.*?)\s*Min/i', $resp['data'], $ary))
- {
- $ary[1] = preg_replace('/:.*/','', $ary[1]);
- $data['runtime'] = trim($ary[1]);
- }
- // EAN-Code
- if (preg_match('/>EAN\/UPC<\/a>:.*?<b>\s*([0-9]+)\s*<\/b>/i', $resp['data'], $ary))
- {
- $data['barcode'] = $ary[1];
- }
- return $data;
- }
- /**
- * Get Url to visit OFDB for a specific actor
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @param string $name The actor's name
- * @param string $id The actor's external id
- * @return string The visit URL
- */
- function ofdbActorUrl($name, $id)
- {
- global $ofdbServer;
- global $ofdbIdPrefix;
- if ($id) {
- $id = preg_replace('/^'.$ofdbIdPrefix.'/', '', $id);
- } else {
- $id = ofdbGetActorId($name);
- }
- // now we have for shure an id
- return ($id!=0) ? $ofdbServer.'/view.php?page=person&id='.$id : '';
- }
- /**
- * Parses Actor-Details
- *
- * Find image and detail URL for actor.
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @param string $name Name of the actor
- * @param string $id Prefixed ofdb actor id
- * @return array array with Actor-URL and Thumbnail
- */
- function ofdbActor($name, $id)
- {
- global $ofdbServer;
- if ($id) {
- $id = preg_replace('/^'.$ofdbIdPrefix.'/', '', $id);
- } else {
- $id = ofdbGetActorId($name);
- }
- // now we have for shure an id
- $folderId = ($id < 1000) ? 0 : substr($id,0,strlen($id)-3);
- $imgUrl = $ofdbServer.'/images/person/'.$folderId.'/'.$id.'.jpg';
- $ary = array();
- $ary[0][0] = ofdbActorUrl($name, $id);
- $ary[0][1] = $imgUrl;
- return $ary;
- }
- function ofdbGetActorId($name)
- {
- global $ofdbServer;
- // try to guess the id -> first actor found with this name
- $url = $ofdbServer.'/view.php?page=liste&Name='.urlencode(html_entity_decode_all($name));
- $resp = httpClient($url, $cache);
- if (!$resp['success']) $CLIENTERROR .= $resp['error']."\n";
- $resp['data'] = preg_replace('/[\r\n\t]/',' ', $resp['data']);
- return (preg_match('#view.php?page=person&id=([0-9]+)#i', $resp['data'], $ary)) ? $ary[1] : 0;
- }
- /**
- * Get an array of all previous prefixes for the ImdbId
- *
- * @author Chinamann <chinamann@users.sourceforge.net>
- * @return array Associative array with ImdbId prefixes
- */
- function ofdbImdbIdPrefixes()
- {
- global $ofdbIdPrefix;
- return array($ofdbIdPrefix);
- }
- function ofdbDbg($text,$append = true)
- {
- file_append('debug.txt', $text, $append);
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement