Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /**
- * Implements hook_menu().
- */
- function mymodule_menu() {
- $items = array();
- $items['mymodule'] = array(
- 'title' => 'mymodule',
- 'page callback' => 'pull_data',
- 'page arguments' => array(),
- 'access arguments' => array('access content'),
- );
- return $items;
- }
- function insert_term_if_not_exists($term_name, $vocabulary_name) {
- $vocabulary = taxonomy_vocabulary_machine_name_load($vocabulary_name);
- $matched_terms = taxonomy_get_term_by_name($term_name, $vocabulary->machine_name);
- if(empty($matched_terms)) {
- $term = new stdClass();
- $term->vid = $vocabulary->vid;
- $term->name = $term_name;
- taxonomy_term_save($term);
- } else {
- $term = array_shift($matched_terms);
- }
- return $term;
- }
- function pull_data() {
- $str = file_get_contents('http://www.remotesite.tld/news.php?S=left');
- $re = "/\?st=\K\S+(?=\s*target)/";
- preg_match_all($re, $str, $matches);
- $content ='<h3>The following remote URLs have been processed:</h3>';
- foreach ($matches[0] as $match) {
- $last_time = db_query('SELECT created FROM {node} WHERE created = :created', array(':created' => $match))->fetchField();
- if ($last_time<>$match) {
- $article = iconv('windows-1251', 'utf-8', file_get_contents('http://www.remotesite.tld/news2.php?st='.$match));
- $reg_country = "/(?<=target=left class=datn>).*?(?=<\/a> )/";
- preg_match_all($reg_country, $article, $country);
- $country = $country[0][0];
- if ($country == "Europe") {$country="Planet";}
- $reg_title = "/(?<=\<title>).*?(?= \| Europe)/";
- preg_match_all($reg_title, $article, $title);
- $title = $title[0][0];
- $title = str_replace('(фото)', '', $title);
- $title = str_replace('(фрейм)', '', $title);
- $reg_body = "/(?<=\<p align=justify>).*?(?=<p>)/";
- preg_match_all($reg_body, $article, $body);
- $body = $body[0][0];
- $reg_source = "/(?<=target=_blank class=datn>).*?(?=<\/a><br>Permalink)/";
- preg_match_all($reg_source, $article, $source);
- $source = $source[0][0];
- $node = new stdClass();
- $node->uid = '1';
- $node->type = 'article';
- node_object_prepare($node);
- $node->created = $match;
- $node->changed = $match;
- $node->title = $title;
- $node->status = 0;
- $node->promote = 0;
- $node->sticky = 0;
- if ($source == "Europe") { $node->flash=1; } else { $node->flash=0; }
- $node->language = 'en';
- $node->body['und'][0]['value'] = $body;
- $node->body['und'][0]['format'] = 'filtered_html';
- insert_term_if_not_exists($country, 'tags');
- $term_tags = taxonomy_get_term_by_name($country);
- $tid_tags = key($term_tags);
- $lang_tags = field_language('node', $node, 'field_tags');
- $node->field_tags[$lang_tags][0]['tid'] = $tid_tags;
- insert_term_if_not_exists($source, 'source');
- $term_source = taxonomy_get_term_by_name($source);
- $tid_source = key($term_source);
- $lang_source = field_language('node', $node, 'field_source');
- $node->field_source[$lang_source][0]['tid'] = $tid_source;
- $reg_image = "/(?<=\<td><IMG src=).*?(?=border=1)/";
- preg_match_all($reg_image, $article, $image);
- if (isset($image[0][0])) {
- $image_url = 'http://www.remotesite.tld/'.$image[0][0];
- $file = file_save_data(file_get_contents($image_url), file_default_scheme().'://field/image/'.basename($image_url));
- $file->status = 1;
- $node->field_image['und'][0] = (array)$file;
- }
- node_save($node);
- $content .= $title.'<br>';
- } else { $content .='http://www.remotesite.tld/news2.php?st='.$match.' had been previously added.<br>'; }
- } // for each end
- return $content;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement