Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- if ($_SERVER["SERVER_NAME"]=='localhost')
- require 'alinc.php';
- else
- require 'statusparse.inc.php';
- //---------------------------
- function wr($instr) {
- global $statusparse;
- $statusparse->logt($instr);
- }
- //-----------------------------------------------------------------
- // parsing
- //-----------------------------------------------------------------
- function parse() {
- global $statusparse;
- $url8 = $statusparse->urls[$statusparse->step]["url"];
- $ztmuri=parse_url($url8);
- $hoststr=$ztmuri['scheme'].'://'.$ztmuri['host'];
- $pathstr=$ztmuri['path'];
- $deep8 = $statusparse->urls[$statusparse->step]["deep"];
- $regexp8= $statusparse->regarr[$deep8];
- $pattern="/".$regexp8."/ims";
- wr('steps down='.$deep8);
- wr('get address '.$url8);
- wr('current pattern '.$regexp8);
- /*if ($deep8==count($statusparse->regarr)) {
- wr('load content '.$url8);
- //wr(file_get_contents('http://localhost/learn/picport/req4.php?reqg=postform&'.
- // 'picaddr='.$url8));
- $zfl=file_get_contents($url8);
- if ($zfl==false) wr('error');
- $znn=array_pop(explode('/',$url8));
- wr($znn);
- file_put_contents('picn/'.$znn);
- return;
- }*/
- //------------------------------------------------
- //------------------------------------------------
- $teststr=file_get_contents($url8);
- if (preg_match('/<meta[^>]*charset[^>]*win[^>]*1251[^>]*>/',$teststr)>0)
- $teststr=iconv('windows-1251','UTF-8',$teststr);
- preg_match_all($pattern,$teststr,$out,PREG_SET_ORDER); //FTG
- //$out=array(0=>$s.'A'.$regexp8[0].$n,1=>$s.'B'.$regexp8[0].$n,
- // 2=>$s.'C'.$regexp8[0].$n,3=>$s.'D'.$regexp8[0].$n);
- //------------------------new urls
- if ($deep8+1<count($statusparse->regarr)) {
- for ($i=0;$i<count($out);$i++) {
- $tmp=Array();
- wr('link: '.$out[$i][2]);
- if (substr($out[$i][2],0,4)!='http') {
- if(substr($out[$i][2],0,1)=='?')
- $tmp["url"]=$hoststr.$pathstr;
- else
- $tmp["url"]=$hoststr.
- ((substr($out[$i][2],0,1)=='/') ? '' : '/');
- }
- else {
- $tmp["url"]='';
- };
- $tmp["url"].=$out[$i][2];
- $tmp["deep"]=$deep8+1;
- $statusparse->urls[]=$tmp;
- }
- }
- if ($deep8+1==count($statusparse->regarr)) {
- for ($i=0;$i<count($out);$i++) {
- wr('el: '.$out[$i][2]);
- if (substr($out[$i][2],0,4)!='http') {
- if(substr($out[$i][2],0,1)=='?')
- $ztmpstr=$hoststr.$pathstr;
- else
- $ztmpstr=$hoststr.
- ((substr($out[$i][2],0,1)=='/') ? '' : '/');
- }
- else {
- $ztmpstr='';
- };
- $statusparse->elems[]=$ztmpstr.$out[$i][2];
- $statusparse->log_res($ztmpstr.$out[$i][2]);
- }
- }
- }
- //-----------------------------------------------------------------
- // output
- //-----------------------------------------------------------------
- function output() {
- global $statusparse;
- if (isset($_GET["file8"])) {
- $statusparse->elems=file($_GET["file8"]);
- }
- //-------------------------------------------
- // formatted output
- //-------------------------------------------
- if (@$statusparse->formatted) {
- if (@$statusparse->json_format)
- //--------------------------------------------
- // json output
- //--------------------------------------------
- {
- }
- else {
- //--------------------------------------
- // html output
- //--------------------------------------
- if (@$statusparse->show_query) {
- echo '<textarea cols="40" rows="15">'.
- $statusparse->initquery.
- '</textarea>';
- }
- $frm_str=$statusparse->formatstring;
- for ($i=0;$i<count($statusparse->elems);$i++) {
- $tmp_str=$frm_str;
- $el=$statusparse->elems[$i];
- $tmp_str=str_replace('$uu$', $el,$tmp_str);
- echo $tmp_str;
- }
- }
- }
- //--------------------------------
- // unformatted output
- //--------------------------------
- else {
- header('Content-type:text/plain; charset=utf-8');
- echo print_r($statusparse->elems);
- }
- }
- //-----------------------------------------------------------------
- // main entrace
- //-----------------------------------------------------------------
- if (!isset($_GET["step"])) {
- session_start();
- session_destroy();
- session_commit();
- }
- //---------------------------------
- session_start();
- //---------------------------------
- if (isset($_SESSION["statusparse"]))
- $statusparse=$_SESSION["statusparse"];
- //else
- // $statusparse= new StatusParse(true);
- //---------------------------------- continue processing
- if (isset($_GET["step"])) {
- //----------------async get current status
- if (isset($_GET["status"])) {
- if ($statusparse->step<count($statusparse->urls)) {
- echo 'current step: '.$statusparse->step.'<br />';
- echo 'current el: '.
- $statusparse->urls[$statusparse->step]["url"].'<br />';
- echo 'current depth: '.$statusparse->urls[$statusparse->step]["deep"].'<br />';
- echo 'found elems: '.count($statusparse->urls).'<br />';
- }
- else
- echo('parse complete');
- exit();
- }
- //----------------processing
- if ($statusparse->step<count($statusparse->urls)) {
- parse();
- $statusparse->step++;
- $statusparse->n_redirects++;
- if ($statusparse->n_redirects>15)
- $statusparse->n_redirects=0;
- //----------------------
- $_SESSION["statusparse"]=$statusparse;
- session_commit();
- if ($statusparse->n_redirects==0) {
- echo '<head><script type="text/javascript">'.
- 'setTimeout(function() {location.replace("'.$_SERVER["PHP_SELF"].'?step");},1000)'.
- '</script></head><body>';
- echo 'parsing suspended to prevent error 310 <br />';
- echo '<a href="'.$_SERVER["PHP_SELF"].'?step">continue</a></body>';
- }
- else
- header('Location: '.$_SERVER["PHP_SELF"].'?step');
- } else
- //-----------------------
- {
- output();
- }
- //-------------------------------
- } else
- //--------------------------------- start processing
- if (isset($_GET["regexp8"])) {
- $statusparse= new StatusParse(true);
- wr('start processing...');
- $statusparse->regarr=explode('|ABC|',$_GET["regexp8"]);
- $statusparse->base_url=$_GET["url8"];
- $statusparse->formatstring=isset($_GET["formstr8"]) ? $_GET["formstr8"] : '';
- $statusparse->formatted=($statusparse->formatstring!='');
- $statusparse->json_format=isset($_GET["json8"]);
- $statusparse->show_query=!isset($_GET["nta"]);
- //$statusparse->initquery= 'http://'.$_SERVER["SERVER_NAME"].
- // $_SERVER["REQUEST_URI"].'&nta';
- $statusparse->initquery='http://'.$_SERVER["SERVER_NAME"].
- $_SERVER["PHP_SELF"].'?file8='.
- urlencode($statusparse->resfile).'&formstr8='.
- urlencode($statusparse->formatstring);
- $tmp=Array();
- $tmp["url"]=$statusparse->base_url;
- $tmp["deep"]=0;
- $statusparse->urls[]=$tmp;
- //-------------------------------
- $_SESSION["statusparse"]=$statusparse;
- session_commit();
- header('Location: '.$_SERVER["PHP_SELF"].'?step');
- }
- else
- if (isset($_GET["file8"])) {
- $statusparse->formatstring=isset($_GET["formstr8"]) ? $_GET["formstr8"] : '';
- $statusparse->formatted=($statusparse->formatstring!='');
- output();
- }
- else
- show_form();
- //---------------------------------------------------------------
- //-------------------------------------------
- // parsing
- //-------------------------------------------
- /* wr('parsing...');
- wr(print_r($regarr));
- error_reporting(E_ERROR | E_WARNING | E_PARSE);
- $rez= parse($regarr,$_GET["url8"]);
- error_reporting(-1); */
- //-------------------------------------------
- //-----------------------------------------------------------------
- function show_form() {
- ?>
- <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
- <html xmlns="http://www.w3.org/1999/xhtml">
- <head>
- <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
- <meta http-equiv="Cache-Control" content="no-cache" />
- <title>Parser</title>
- <style type="text/css">
- #formatted {
- position:absolute;
- width:390px;
- height:600px;
- left:10px;
- top:10px;
- border:solid 1px black;
- overflow:hidden;
- padding:10px;
- font-family:verdana;
- font-size:10px;
- }
- #zform {
- position:relative;
- left:65%;
- top:10px;
- margin-left:-400px;
- width:800px;
- }
- label {
- width: 6em;
- float:left;
- text-align:right;
- margin-right:1em;
- display:block;
- font-family:verdana;
- font-size:0.9em;
- }
- form p{
- padding:3px;
- margin:3px;
- }
- input {
- font-family:courier,monospace;
- }
- .submit input {
- position:relative;
- left:4.5em;
- }
- fieldset {
- border: solid 1px black;
- width:50em;
- padding:0em;
- }
- legend {
- border:solid 1px black;
- padding:2px 6px;
- }
- /*---------------------------*/
- #statframe {
- position:relative;
- top:10px;
- left:65%;
- width:800px;
- height:300px;
- margin-top:20px;
- margin-left:-400px;
- }
- #statframe iframe {
- border:solid 1px black;
- width:100%;
- height:100%;
- }
- </style>
- <script type="text/javascript" src="js/jquery-1.7.2.js"></script>
- <script type="text/javascript">
- var rr;
- function go(el) {
- var inf=<?='"'.array_pop(explode("/",$_SERVER['PHP_SELF'])).'"'?>;
- zWin=open(inf+'?regexp8='+encodeURIComponent(el.regexp8.value)+'&'+
- 'url8='+encodeURIComponent(el.url8.value)+'&'+
- 'formstr8='+encodeURIComponent(el.formstr8.value),'zgoinf');
- var x=frames['statifr'];
- x.document.body.innerHTML='';
- rr=setInterval(function() {
- x.location.replace(inf+'?step&status');
- //alert(x.document.body.innerHTML);
- if (x.document.body.innerHTML.indexOf('parse complete')!=-1) {
- clearInterval(rr);
- }
- },1000)
- //x.location.replace(inf+'?regexp8='+encodeURIComponent(el.regexp8.value)+'&'+
- //;
- /* $.ajax ( {
- type: "GET",
- url: inf,
- data: {regexp8: el.regexp8.value,
- url8: el.url8.value, formstr8: el.formstr8.value,
- submit: true}}).done(
- function(html) {
- $('#formatted').html(html);
- });*/
- }
- //------------------------------------------
- function stop_refr() {
- clearInterval(rr);
- }
- </script>
- </head>
- <body>
- <div id="formatted"></div>
- <div id="zform">
- <form onsubmit="go(this);return false;" name="parseform" method="post" enctype="application/x-www-form-urlencoded" >
- <fieldset>
- <legend>Parser go</legend>
- <p><label for="regexp8">RegExp</label>
- <textarea type="text" name="regexp8" cols="68"
- autocomplete="off"
- rows="8"></textarea></p>
- <p><label for="url8">Target url</label>
- <input type="text" name="url8" size="70" autocomplete="off" /></p>
- <p><label for="formstr8">format string</label>
- <textarea type="text" name="formstr8" cols="68" autocomplete="off" rows="4"></textarea></p>
- <p class="submit"><input type="submit" name="submit" value="parse" />
- <input type="reset" name="reset" value="reset" />
- <input type="button" name="zzu" value="stop" onclick="stop_refr();return false;" />
- </p>
- </fieldset>
- </form>
- </div>
- <div id="statframe">
- <iframe name="statifr"></iframe>
- </div>
- </body>
- </html>
- <?PHP
- }
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement