Дорг 1

<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<title></title>
</head>
<body>

<?php
//error_reporting(E_ALL | E_STRICT);
ini_set('display_errors', TRUE);
ini_set('display_startup_errors', TRUE);
header('Content-type: text/html; charset=UTF-8');

// Соединение с БД
    $link = mysqli_connect('localhost','mytest','qwerr','basat') or die("Error " . mysqli_error($link));
    if(mysqli_connect_errno()) die('Ошибка соединения: '.mysqli_connect_error()); //или if(!$link) {..
    mysqli_set_charset($link, "utf8");

// Время
    $g1 = microtime(true);

// Таблица с которой работаем
    $source_table = 'xml_list_3_50k_12';
    $time_table1 = "time_table1";

# Первый блок с базой
    #### СКОЛЬКО ФРАЗ В БАЗЕ (СКОЛЬКО СТРАНИЦ БУДЕТ СГЕНЕРИРОВАНО) ########################

    $res = mysqli_query($link," SELECT COUNT(*) FROM `".$source_table."` ");
    if($res){ //если запрос успешный
    while($row = mysqli_fetch_assoc($res))
        {$data[] = $row;}
    }
    $myc = implode("", $data[0]);
    echo "Всего фраз в таблице: = $myc<br>\n\n";


    #### БЕРЁМ ИЗ БД ФРАЗУ И URL КОНКРЕТНОГО ID ####################################################

    mysqli_query($link," LOCK TABLES `".$source_table."` WRITE ") or die (mysqli_errno($link) . ": ошибка лока " . mysqli_error($link). " ошибка лока \n");
    $res = mysqli_query($link," SELECT `id`, `key`, `urls` FROM `".$source_table."`   LIMIT 1 ");
    if($res) { //если запрос успешный
        while($row = mysqli_fetch_assoc($res)) {
            $mytxt[] = $row;
        }
            //var_dump($mytxt);
            $lastid = $mytxt[0]['id'];
            $arttitle = $mytxt[0]['key'];

                $pieces = explode("|", $mytxt[0]['urls']);
                //var_dump($pieces);
                $num_pieces = count($pieces);

                    $ist = array();
                    ### !!!!! ВАЖНО ЧТО БЫ 50 URL НЕ ОБРАБАТЫВАЛОСЬ вместо $num_pieces ставим 20
                    for ($i6 = 0; $i6 < 20; $i6++) {
                        if(strlen($pieces[$i6]) < 1){unset($pieces[$i6]);}
                        else { $ist[] = $pieces[$i6]; }
                    }
                sort($ist);

                echo "<b>Запрос:</b> $arttitle<br><br>";

                echo "<pre>";
                var_dump($ist);
                echo "</pre>";

                echo "<br><br><br>";
### Удаляю значение
    $zzapros = "DELETE FROM `".$source_table."` WHERE `id`=".$lastid." ";
    mysqli_query($link," $zzapros ") or die (mysqli_errno($link) . ": ошDEL " . mysqli_error($link). "  ошDEL \n");
    mysqli_query($link," UNLOCK TABLES ") or die (mysqli_errno($link) . ": ошибка анлока " . mysqli_error($link). " ошибка анлока \n");

        }

############################ ПОГНАЛ САМ CURL #######################################################


#######################################################################################
#######################################################################################

if (isset($ist)) {
    // Отладка
    echo "ist существует<br>";

$maintext = "";
    foreach ($ist as $url_link) { // здесь правильно
        $url_link = trim($url_link);
        if ($url_link) {
            // echo "Fetching {$url_link}...<br/>\r\n";
            $main = curl_init($url_link);
            //шлем заголовки
                curl_setopt($main, CURLOPT_RETURNTRANSFER, 1);
                curl_setopt($main, CURLOPT_USERAGENT, 'Opera/9.80 (Windows NT 5.1; U; ru) Presto/2.7.62 Version/11.01');

            if ($main) {

                // Выполняем запрос и приводим к одной кодировке
                $html = curl_exec($main);

######################### Определение кодировки #############################################################
    @$str_cp1251 = iconv('UTF-8', 'Windows-1251', $html);
    $kod1 = preg_match('#.#u', $html);
    $kod2 = preg_match('#.#u', $str_cp1251);
    if ($kod1 == 0) { // если $kod1 равен нулю то это 1251, конверируем её
        $html = iconv("WINDOWS-1251", "UTF-8", $html);
    }
    else { /* если $kod1 не равен нулю то это UTF-8 подробнее о кодировках тут: http://habrahabr.ru/post/107945/#comment_3411725 */ }


$n_mass = 0;
include 'stop_content_word.php';

// забираем title
    $base_title = '/<title>(.*?)<\/title>/ism';
    $pars_title = preg_match_all($base_title, $html, $title);
    $my_title = $title[1][0];
    echo "Title: <b>$my_title</b><br>";

############### ЕСЛИ В TITLE ЕСТЬ ФРАЗА - НЕ ОБРАБАТЫВАЕМ ЭТУ СТРАНИЦУ #########################
//
    $count_t_word = count($title_content_word);  // количество стоп-слов в title берётся из файла stop_content_word
    for ($iw = 0; $iw < $count_t_word; $iw++) {
        $word=$title_content_word[$iw];
        $pos = strpos($my_title, $word);
        if ($pos != false){ $html=""; /* exit("В TITLE обнаружено запрещённое слово, обработка прекращена"); */}
    }


$text = $html;
$nl = chr(13).chr(10);

// вначале избавимся от таблиц (пока их не берём, сэкономим время)
$text = preg_replace('~<table.*?>~is', '<table>', $text);
$text = preg_replace('/<table>(.*?)<\/table>/is', '', $text);

// удаление картинок
$text = preg_replace('/<img(.+?)>/is', '', $text);
// удаление ссылок
$text = preg_replace('~<a.*?>~is', '<a>', $text);
// $text = preg_replace('/<a>(.*?)<\/a>/i', '', $text); ссылки мне нужны ещё
$text = preg_replace('/<a><\/a>/i', '', $text);

// удаление 2 и более пробелов
$text = preg_replace("/\s{2,}/",' ',$text);
$text = preg_replace('/\s</i', '<', $text);
$text = preg_replace('/>\s/i', '>', $text);
$text = preg_replace('/\s\s</i', '<', $text);
$text = preg_replace('/>\s\s/i', '>', $text);
$text = preg_replace('/\s\s\s</i', '<', $text);
$text = preg_replace('/>\s\s\s/i', '>', $text);
$text = preg_replace('/\n\n\n/i', '\n\n', $text);

// переносы <br>
$text = preg_replace('/<br><br>/i', '</p><p>', $text);
$text = preg_replace('/<br\s\/><br\s\/>/i', '</p><p>', $text);
$text = preg_replace('/<br\/><br\/>/i', '</p><p>', $text);

$text = preg_replace('~<iframe.*?>~is', '<iframe>', $text);
$text = preg_replace('/<iframe>(.*?)<\/iframe>/is', '', $text);

$text = preg_replace('~<b.*?>~is', '<b>', $text);
$text = preg_replace('~<small.*?>~is', '<small>', $text);
$text = preg_replace('~<big.*?>~is', '<big>', $text);
$text = preg_replace('~<p.*?>~is', '<p>', $text);
$text = preg_replace('~<br.*?>~is', '<br>', $text);
$text = preg_replace('~<font.*?>~is', '<font>', $text);
$text = preg_replace('~<div.*?>~is', '<div>', $text);
$text = preg_replace('~<select.*?>~is', '<div>', $text);
$text = preg_replace('~<span.*?>~is', '<span>', $text);
$text = preg_replace('~<ul.*?>~is', '<ul>', $text);
$text = preg_replace('~<h1.*?>~is', '<h1>', $text);
$text = preg_replace('~<h2.*?>~is', '<h2>', $text);
$text = preg_replace('~<h3.*?>~is', '<h3>', $text);
$text = preg_replace('~<h4.*?>~is', '<h4>', $text);
$text = preg_replace('~<h5.*?>~is', '<h5>', $text);
$text = preg_replace('~<h6.*?>~is', '<h6>', $text);

// главная замена div и span на Р
$text = preg_replace('/<div>/i', '<p>', $text);
$text = preg_replace('/<\/div>/i', '</p>', $text);

$text = preg_replace('/<span>/i', '<p>', $text); // тест замена span на p
$text = preg_replace('/<\/span>/i', '</p>', $text); // тест замена span на p

$text = preg_replace('/<font>/i', ' ', $text);
$text = preg_replace('/<\/font>/i', ' ', $text);

$text = preg_replace('/<big>/i', '', $text);
$text = preg_replace('/<\/big>/i', '', $text);

$text = preg_replace('/<small>/i', '', $text);
$text = preg_replace('/<\/small>/i', '', $text);

$text = preg_replace('/<noindex>/i', '', $text);
$text = preg_replace('/<\/noindex>/i', '', $text);


$text = preg_replace('/<p><\/p>/i', '', $text);
$text = preg_replace('/<li><\/li>/i', '', $text);
$text = preg_replace('/<ul><\/ul>/i', '', $text);
$text = preg_replace('/<ol><\/ol>/i', '', $text);

$text = preg_replace('/<script(.*?)>/is', '<script>', $text);
$text = preg_replace('/<script>(.*?)<\/script>/is', '', $text);
$text = preg_replace('/<noscript>(.*?)<\/noscript>/is', '', $text);

$text = preg_replace('/<form(.*?)>/is', '<form>', $text);
$text = preg_replace('/<form>(.*?)<\/form>/is', '', $text);

$text = preg_replace('/<select(.*?)>/is', '<select>', $text);
$text = preg_replace('/<select>(.*?)<\/select>/is', '', $text);

$text = preg_replace('/<nav(.*?)>/is', '<nav>', $text);
$text = preg_replace('/<nav>.*?<\/nav>/is', '', $text);

$text = preg_replace('/<style(.*?)>/is', '<style>', $text);
$text = preg_replace('/<style>.*?<\/style>/is', '', $text);

$text = preg_replace('/<ins(.*?)>/is', '<ins>', $text);
$text = preg_replace('/<ins>(.*?)<\/ins>/is', '', $text);
$text = preg_replace('/<xml>(.*?)<\/xml>/is', '', $text);
//$text = preg_replace('/<strong>.*?<\/strong>/is', '', $text);

$text = preg_replace('/<input(.*?)>/is', '<input>', $text);
$text = preg_replace('/<!(.*?)>/is', '', $text);
$text = preg_replace('/<li(.*?)>/is', '<li>', $text);
$text = preg_replace('/<link(.*?)>/is', '', $text);

// em и знаки после неё
$text = preg_replace('/<em><\/em>/i', ' ', $text);
$text = preg_replace('/<\/em><em>/i', ' ', $text);
$text = preg_replace('/<em>/i', ' <em>', $text);
$text = preg_replace('/<\/em>/i', '</em> ', $text);

// strong и знаки после неё
$text = preg_replace('/<strong><\/strong>/i', ' ', $text);
$text = preg_replace('/<\/strong><strong>/i', ' ', $text);
$text = preg_replace('/<strong>/i', ' <strong>', $text);
$text = preg_replace('/<\/strong>/i', '</strong> ', $text);

// b и знаки после неё
$text = preg_replace('/<b><\/b>/i', ' ', $text);
$text = preg_replace('/<\/b><b>/i', ' ', $text);
$text = preg_replace('/<b>/i', ' <b>', $text);
$text = preg_replace('/<\/b>/i', '</b> ', $text);

// i и знаки после неё
$text = preg_replace('/<i><\/i>/i', '', $text);
$text = preg_replace('/<\/i><i>/i', ' ', $text);
$text = preg_replace('/<i>/i', ' <i>', $text);
$text = preg_replace('/<\/i>/i', '</i> ', $text);

//$text = preg_replace('/<h3><\/h3>/i', '', $text);
$text = preg_replace('/<p>&raquo;<\/p>/i', '', $text);
$text = preg_replace('/<li><\/li>/i', '', $text);
$text = preg_replace('/<ul><\/ul>/i', '', $text);
$text = preg_replace('/<ol><\/ol>/i', '', $text);
$text = preg_replace('/<p><p>/is', '<p>', $text);
$text = preg_replace('/<\/p><\/p>/is', '</p>', $text);
$text = preg_replace('/<hr\s\/>/i', '', $text);
$text = preg_replace('/<li>&raquo;<\/li>/i', '', $text);
$text = preg_replace('/–<i>«/i', '– <i>«', $text);
$text = preg_replace('/–<b>«/i', '– <b>«', $text);
$text = preg_replace('/–<em>«/i', '– <em>«', $text);

// не берёт H-заголовки, обернём в P
$text = preg_replace('/<h1>/i', '<p><h1>', $text);
$text = preg_replace('/<h2>/i', '<p><h2>', $text);
$text = preg_replace('/<h3>/i', '<p><h3>', $text);
$text = preg_replace('/<h4>/i', '<p><h4>', $text);
$text = preg_replace('/<h5>/i', '<p><h5>', $text);
$text = preg_replace('/<h6>/i', '<p><h6>', $text);

$text = preg_replace('/<\/h1>/is', '</h1></p>', $text);
$text = preg_replace('/<\/h2>/is', '</h2></p>', $text);
$text = preg_replace('/<\/h3>/is', '</h3></p>', $text);
$text = preg_replace('/<\/h4>/is', '</h4></p>', $text);
$text = preg_replace('/<\/h5>/is', '</h5></p>', $text);
$text = preg_replace('/<\/h6>/is', '</h6></p>', $text);

// убираем пробелы перед знаками препинания
$text = preg_replace('/\s\./i', '.', $text);
$text = preg_replace('/\s,/i', ',', $text);
$text = preg_replace('/\s:/i', ':', $text);
$text = preg_replace('/\s;/i', ';', $text);
$text = preg_replace('/\s!/i', '!', $text);
$text = preg_replace('/\s\?/i', '?', $text);

// пустые теги
$text = preg_replace('/<p><br\s\/>/i', '<p>', $text);
$text = preg_replace('/<p><br>/i', '<p>', $text);
$text = preg_replace('/<p><\/p>/is', '', $text);
$text = preg_replace('/<p><p>/is', '<p>', $text);
$text = preg_replace('/<p>\s<p>/is', '<p>', $text);
$text = preg_replace('/<b><p>/is', '<p>', $text);
$text = preg_replace('/<b><\/p>/is', '</p>', $text);

// отдельные русские слова
$text = preg_replace('/Содержание:/i', '', $text);
$text = preg_replace('/Глава 1./i', '', $text);
$text = preg_replace('/Глава 2./i', '', $text);
$text = preg_replace('/Глава 3./i', '', $text);
$text = preg_replace('/Глава 4./i', '', $text);
$text = preg_replace('/Глава 5./i', '', $text);
$text = preg_replace('/Глава 6./i', '', $text);
$text = preg_replace('/Глава 7./i', '', $text);
$text = preg_replace('/Глава 8./i', '', $text);
$text = preg_replace('/Глава 9./i', '', $text);
$text = preg_replace('/Глава 10./i', '', $text);
$text = preg_replace('/Раздел 1./i', '', $text);
$text = preg_replace('/Раздел 2./i', '', $text);
$text = preg_replace('/Раздел 3./i', '', $text);
$text = preg_replace('/Раздел 4./i', '', $text);
$text = preg_replace('/Раздел 5./i', '', $text);
$text = preg_replace('/Раздел 6./i', '', $text);
$text = preg_replace('/Раздел 7./i', '', $text);
$text = preg_replace('/Раздел 8./i', '', $text);
$text = preg_replace('/Раздел 9./i', '', $text);
$text = preg_replace('/Раздел 10./i', '', $text);

// это попытка убрать заголовки без окончания
$text = preg_replace('/<p><h1><p>/i', '<p>', $text);
$text = preg_replace('/<p><h2><p>/i', '<p>', $text);
$text = preg_replace('/<p><h3><p>/i', '<p>', $text);
$text = preg_replace('/<p><h4><p>/i', '<p>', $text);
$text = preg_replace('/<p><h5><p>/i', '<p>', $text);
$text = preg_replace('/<p><h6><p>/i', '<p>', $text);

$text = preg_replace('/<\/p><\/h1><\/p>/i', '</p>', $text);
$text = preg_replace('/<\/p><\/h2><\/p>/i', '</p>', $text);
$text = preg_replace('/<\/p><\/h3><\/p>/i', '</p>', $text);
$text = preg_replace('/<\/p><\/h4><\/p>/i', '</p>', $text);
$text = preg_replace('/<\/p><\/h5><\/p>/i', '</p>', $text);
$text = preg_replace('/\/<p><\/h6><\/p>/i', '</p>', $text);
$text_clear_long = strip_tags($text);
$long_text = strlen($text_clear_long);

// echo "$text<br>";
echo "Длина текста без тегов всего: $long_text<br>";


################### РАЗДЕЛЯЕМ НА <A> ССЫЛКИ И СЧИТАЕМ КОЛИЧЕСТВО ###################

// сколько ссылок на странице
    $match_a = '/<\/a>(.+?)<a>/ism';
    $run_title = preg_match_all($match_a, $text, $i_a);
    $count_a = count($i_a[0]); // считаем кол-во элементов массива num

// Обнуляем если самый большой кусок текста между ссылками < 300 символов
    $ma = array();
    for ($aid1 = 0; $aid1 < $count_a; $aid1++) {
        $strlen_a = strlen(strip_tags($i_a[1][$aid1]));
        $ma[] = $strlen_a;
        $m_sum = array_sum($ma);
    }
    $Count_ma = count($ma);
    if($Count_ma >0){
        $max = max($ma); // выбираем самый большой элемент из массива ma
        if ($max < 300){$text=""; /* exit("Самый большой текст между ссылками < 300"); */ }
    }
    echo "Самый большой кусрк между ссылок: $max<br>";
    echo "Длина между ссылок: $m_sum<br>";

// Обнуляем если длина ссылок больше длины между ссылок
    $match_a = '/<a>(.+?)<\/a>/ism';
    $run_title = preg_match_all($match_a, $text, $i_a);
    $count_a = count($i_a[0]); // считаем кол-во элементов массива num

    $sa = array();
    for ($aid1 = 0; $aid1 < $count_a; $aid1++) {
        $strlen_a = strlen(strip_tags($i_a[1][$aid1]));
        $sa[] = $strlen_a;
        $link_sum = array_sum($sa);
    }
    echo "Длина внутри ссылок: $link_sum<br>";
    if ($link_sum > $m_sum){$text=""; /* exit("Длина ссылок больше длины между ссылок"); */}

# ЕСЛИ НЕСКОЛЬКО СЛОВ КОММЕНТАРИЕВ ТО ЭТО ОГЛАВЛЕНИЕ ###################
    /*
    $match_num = '/омментариев/is';
    $run_title = preg_match_all($match_num, $text, $i_comm);
    $count_comm = count($i_comm[0]);
    echo "count_comm = $count_comm<br>";
    if ($count_comm > 4){$text=""; exit("На этой странице больше 4 слов омментариев");}
    */

################### РАЗДЕЛЯЕМ НА <Р> ССЫЛКИ ###################

$match_num = '/<p>(.+?)<\/p>/ism';
$run_title = preg_match_all($match_num, $text, $i_num);
$count_num = count($i_num[0]);
//var_dump($i_num);
echo "Всего абзацев Р обнаружено: $count_num<br>";
$only_echo = array();
################### ОБРАБОТКА <Р> В ПЕРВОМ МАССИВЕ ###################

for ($id1 = 0; $id1 < $count_num; $id1++) {

    #ZZZZZZZZZZZZZZZZZZZZZZZZ удаляю все после комментариев ZZZZZZZZZZZZZZZZZZZZZZZZ
    $count_s_word = count($stop_content_word);
    for ($iw = 0; $iw < $count_s_word; $iw++) {
        $word=$stop_content_word[$iw];
        $pos = strpos($i_num[0][$id1], $word);
        if ($pos === false){/* ничего не делаем */}
        else {
                for ($idp = $id1-1; $idp < $count_num; $idp++) {
                    unset($i_num[0][$idp]);
                }
             }
    }
    #ZZZZZZZZZZZZZZZZZZZZZZZZ обнуляю P со стоп-словами ZZZZZZZZZZZZZZZZZZZZZZZZ
    $count_c_word = count($clean_content_word);
    for ($iw = 0; $iw < $count_c_word; $iw++) {
        $word=$clean_content_word[$iw];
        $pos = strpos($i_num[0][$id1], $word);
        if ($pos === false){// ничего не делаем
            }
        else{unset($i_num[0][$id1]);}
    }
    #ZZZZZZZZZZZZZZZZZZZZZZZZ обнуляю P с URL ZZZZZZZZZZZZZZZZZZZZZZZZ
    if(preg_match('/[a-zA-Z]\.[a-zA-Z]/', $i_num[0][$id1], $matches) ){
    if ($matches) {unset($i_num[0][$id1]);}
    }
    #ZZZZZZZZZZZZZZZZZZZZZZZZ обнуляю P с датой ZZZZZZZZZZZZZZZZZZZZZZZZ
    if(preg_match('/([0-2]\d|3[01])\.(0\d|1[012])\.(\d{4})/i', $i_num[0][$id1], $matches) ){
    if ($matches) {unset($i_num[0][$id1]);}
    }
    #ZZZZZZZZZZZZZZZZZZZZZZZZ Убираю все <b> если нет закрывающей </b> ZZZZZZZZZZZZZZZZZZZZZZZZ
    $open_b = 0;
    $close_b = 0;
    if(preg_match('/<b>/i', $i_num[0][$id1], $matches) ){
    if ($matches) {$open_b = 1; /* echo "Да ТУТ ЕСТЬ ОТКР. Б <br> open_b = $open_b<br>"; */}
    }
    if(preg_match('/<\/b>/i', $i_num[0][$id1], $matches) ){
    if ($matches) {$close_b = 1; /* echo "Да ТУТ ЕСТЬ ЗАКР. Б <br> close_b = $close_b<br>"; */}
    }
    if($open_b == 1 && $close_b == 0){
        $i_num[0][$id1] = preg_replace('/<b>/i', '', $i_num[0][$id1]);
    }
    #ZZZZZZZZZZZZZZZZZZZZZZZZ если в Р нет ни одной русской буквы удаляю ZZZZZZZZZZZZZZZZZZZZZZZZ
    if(preg_match('/[а-я]/', $i_num[0][$id1], $matches4) ){
    if ($matches4) {}
    }else {unset($i_num[0][$id1]);}
    #ZZZZZZZZZZZZZZZZZ если есть заголвок пока ничего не делаю, если нет то стираю < 200 ZZZZZZZZZZZZZZZZ
    $long_id = strlen(strip_tags($i_num[0][$id1]));
    if(preg_match('/<h[1-6]>/',$i_num[0][$id1],$matches) ){
    if ($matches) {}
    }
    else {
        if($long_id < 200){unset($i_num[0][$id1]);}
         }

    #ZZZZZZZZZZZZZZZZZZZZZZZZ если элемент всё ещё существует - вывожу ZZZZZZZZZZZZZZZZZZZZZZZZ
    if(isset($i_num[0][$id1]) && strlen(strip_tags($i_num[0][$id1])) > 0)
        {
            // $i_num[0][$id1] = preg_replace("/<\/h1>/is", "</h1>\n", $i_num[0][$id1]);
            // $i_num[0][$id1] = preg_replace("/<\/p>/is", "</p>\n", $i_num[0][$id1]);
            $i_num[0][$id1] = preg_replace('/<p>[0-9]\.\s/i', '<p>', $i_num[0][$id1]);

        // echo strlen(strip_tags($i_num[0][$id1]));
        // echo "{$i_num[0][$id1]}";
        $n_mass++;
        $only_echo[] = $i_num[0][$id1];
        }

}

// echo "<br><br><br>ZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZZ<br><br><br>";
$oe_num = count($only_echo);
// Отладка
echo "oe_num = $oe_num<br>";
// var_dump($only_echo);

################### ОБРАБОТКА <Р> ВО ВТОРОМ МАССИВЕ ###################


if($n_mass > 4){

    #TTTTTTTTTTTTTTTTTTT сначала убираем <p> вокруг заголовков TTTTTTTTTTTTTTTTTTT
    for ($i = 0; $i < $oe_num; $i++) {

    // $i_num[0][$i] = preg_replace("/\n<\/p>/is", "</p>", $only_echo[$i]);

        if(preg_match('/<h[1-6]>/is',$only_echo[$i],$matches) ){
            // var_dump($matches);
            if ($matches) {
                echo "";
                    $only_echo[$i] = preg_replace("/<p>/is", "", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<\/p>/is", "", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<h2>/is", "<h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<h3>/is", "<h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<h4>/is", "<h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<h5>/is", "<h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<h6>/is", "<h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<\/h2>/is", "</h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<\/h3>/is", "</h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<\/h4>/is", "</h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<\/h5>/is", "</h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<\/h6>/is", "</h1>", $only_echo[$i]);
                    $only_echo[$i] = preg_replace("/<h1>/is", "<z1><h1>", $only_echo[$i]);
            }
        }
        else {echo "";}
    // echo "{$i_num[0][$i]}";
    }

// var_dump($only_echo);

    #TTTTTTTTTTTTTTTTTTT уберём предыдущий заголовок если в этом тоже есть TTTTTTTTTTTTTTTTTTT
    for ($i2 = 1; $i2 < $oe_num; $i2++) {

        $i_nex = $i2+1;
        $i_pred = $i2-1;
        if($i_pred <= 0){$i_pred = 0;}

        //echo "{$i_num[0][$i2]}";

        if(preg_match('/<h[1-6]>/is',$only_echo[$i2],$matches) ){
            // var_dump($matches);

            if ($matches)
            {
                //echo "Тут есть H1-6 ";

                    if(preg_match('/<h[1-6]>/is',$only_echo[$i_pred],$matches2) ){
                        // var_dump($matches);
                        if ($matches2) {
                            unset($only_echo[$i_pred]);
                        }
                    }
                    else { echo "";}

                    if(!isset($only_echo[$i_nex]) ){
                        //echo "Сработал isset";
                        unset($only_echo[$i2]);
                    }
                    else { echo "";}
            }
        }
        else {}
    }

    #TTTTTTTTTTTTTTTTTTT вот это я не помню что мы делаем TTTTTTTTTTTTTTTTTTT
    $only_echo[] = "<z1>";
    $content = "";

    for ($i3 = 0; $i3 < $count_num; $i3++) {
        $p = $count_num - 1;

        if(isset($only_echo[$i3])) {
            // echo "{$only_echo[$i3]}";
            $content .= "{$only_echo[$i3]}";
             // if($i3 == $p) { $content .= "<z1>"; echo "ДА ПРИКРЕПИЛ Z1";}
            }
    }

//var_dump($only_echo);


    #TTTTTTTTTTTTTTTTTTT объединяем блоки в элементы массива по Z1 TTTTTTTTTTTTTTTTTTT
    $content .= "<z1>";

    // echo "<br><br><br><br><br><br><br><br><br><br><br>$content<br><br><br><br><br><br><br><br><br><br><br>";

    $match_z1 = '/<z1>(.+?)<z1>/ism';
    $run_title = preg_match_all($match_z1, $content, $final);
    $count_num = count($final[0]); // считаем кол-во элементов массива num

// var_dump($final[1]); // - это итоговый var_dump, включать смотреть только его

$f_bd = count($final[1]);

    #BBBBBBBBBBBBBBBBBBB Пишем элементы массива в БД BBBBBBBBBBBBBBBBBBBBBBB
    for ($i4 = 0; $i4 < $f_bd; $i4++) {
        if(isset($final[1][$i4])) {
            $mytext = $final[1][$i4];
            // echo "ЭТО ТЕКСТ В БАЗУ ДАННЫХ:<br> $mytext";
            mysqli_query($link, "INSERT INTO `".$time_table1."` SET `text`='".$mytext."' ");
            }
    }
    $only_echo = array();
    $content = "";

// тут будет выемка из БД и запись в файл

} // конец $n_mass > 4

############################# ЗАВЕРШАБЩИЙ БЛОК #########################


            } else {
                echo "curl_init failure! <br/>\r\n";
            }
            curl_close($main);
        }

    } // конец foreach

############################# РАБОТАЕМ ПОСЛЕ foreach ТОЛЬКО С $maintext
}


#VVVVVVVVVVVVVVVVVVVVVVVVVV Вынимаю данные из базы для создания статьи VVVVVVVVVVVVVVVVVVVVVVVVVV
    $res = mysqli_query($link," SELECT `text` FROM `".$time_table1."` ");
    if($res) { //если запрос успешный
        while($row = mysqli_fetch_assoc($res)) {
            $mytxt[] = $row;
        }
            $m_long = count($mytxt);
            $gottext = "";
            //echo "<br><br><br><br><br><br> А это вывод из базы mytxt <br><br><br><br><br><br><br>";
            var_dump($mytxt);

            $narast_long = 0;
            for ($i5 = 0; $i5 < $m_long; $i5++) {
                $gottext .= $mytxt[$i5]['text'];
                $longelement = strlen($mytxt[$i5]['text']);
                $narast_long = $narast_long + $longelement;
                echo "longelement = $longelement<br>";
                echo "narast_long = $narast_long<br>";
                if($narast_long > 20000){ break; } // ЭТО МАКСИМАЛЬНЫЙ РАЗМЕР СТАТЬИ !!

            }
            //echo "<br><br><br><br><br><br> А это вывод gottext <br><br><br><br><br><br><br> $gottext";

        }

#VVVVVVVVVVVVVVVVVVVVVVVVVV Пишем элементы массива в БД VVVVVVVVVVVVVVVVVVVVVVVVVV
$fname = microtime(true); //имя файла нового
$file="html/$fname.html";

// вставка с дизайном
include 'grabber_site_xml_list_design.php';

mysqli_query($link, " TRUNCATE `".$time_table1."` ");

$g2 = microtime(true);
$g3 = $g2 - $g1; // время генерации
echo "\n<br>Время регулярок: ". $g3 ."<br>\n";

?>

<form action="" method="POST">
    <textarea name="text" cols="50">http://updiet.info/kak-poxudet-v-domashnix-usloviyax.html</textarea>
    <button type="submit" name="add">Add</button>
</form>

</body>
</html>