Advertisement
Guest User

Mobi to XDXF

a guest
Feb 11th, 2020
502
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
PHP 2.06 KB | None | 0 0
  1. <?php
  2.  
  3. $re = '/<idx:entry.*?idx:orth value="([^"]+)">(.*?)<\/idx:entry>/m';
  4.  
  5. $str=file_get_contents('book.html');
  6.  
  7. $str=str_replace('<font size="3">en&sup2;','',$str);
  8. $str=str_replace('<font size="3">pl&sup1;','',$str);
  9. $str=str_replace('<font size="3">pl&sup3;','',$str);
  10. $str=str_replace('<blockquote>','',$str);
  11. $str=str_replace('</blockquote>','',$str);
  12. $str=str_replace('<div height="0"></div> <div height="0"></div>','<br/>',$str);
  13. $str=str_replace('<div height="0"></div>','<br/>',$str);
  14.  
  15. file_put_contents('book2.html',$str);
  16.  
  17.  
  18. preg_match_all($re, $str, $matches, PREG_SET_ORDER, 0);
  19.  
  20. $xml1='<?xml version="1.0" encoding="UTF-8" ?>
  21. <xdxf lang_from="ENG" lang_to="POL" format="visual">
  22.    <meta_info>
  23.        <title>Dictionary ENG-POL</title>
  24.        <full_title>Dictionary ENG-POL</full_title>
  25.        <description>Slownik angielsko polski</description>
  26.        <file_ver>001</file_version>
  27.        <creation_date>09-02-2020</creation_date>
  28.    </meta_info>
  29.    <lexicon>';
  30. $szab="<ar><k>%s</k><def>%s</def></ar>\n";
  31. $xml2='</lexicon>
  32. </xdxf>';
  33.  
  34. $fp = fopen('slownik.xdxf', 'w');
  35. fwrite($fp, $xml1);
  36.  
  37.  
  38. foreach($matches as $m){
  39.     $slowo=$m[1];
  40.     $opis=$m[2];
  41.    
  42.     if(!empty($slowo) && !empty($opis)){
  43.         $opis=str_replace('<br/>','__br__',$opis);
  44.         $opis=str_replace('<b>','__b__',$opis);
  45.         $opis=str_replace('</b>','__/b__',$opis);
  46.         $opis=str_replace('<i>','__i__',$opis);
  47.         $opis=str_replace('</i>','__/i__',$opis);
  48.         $opis=str_replace('<h2>','__b__',$opis);
  49.         $opis=str_replace('</h2>','__/b__',$opis);
  50.         $opis=html_entity_decode(strip_tags($opis));
  51.         $opis=htmlentities($opis,ENT_XML1);
  52.         $opis=str_replace('__br__','<br/>',$opis);
  53.         if(substr_count($opis,'__b__')==substr_count($opis,'__/b__')){
  54.             $opis=str_replace('__b__','<b>',$opis);
  55.             $opis=str_replace('__/b__','</b>',$opis);
  56.         }
  57.         if(substr_count($opis,'__i__')==substr_count($opis,'__/i__')){
  58.             $opis=str_replace('__i__','<i>',$opis);
  59.             $opis=str_replace('__/i__','</i>',$opis);
  60.         }
  61.         fwrite($fp, sprintf($szab,$slowo,$opis));
  62.  
  63.     }
  64.    
  65. }
  66.  
  67.  
  68. fwrite($fp, $xml2);
  69. fclose($fp);
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement