Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /**
- * Class Geodata
- */
- class Geodata extends MX_Controller
- {
- private $countries = [
- 'RU',
- 'KZ',
- 'UA',
- 'BY',
- ];
- private $currency = [
- 'RU' => 'RUB',
- 'KZ' => 'KZT',
- 'UA' => 'UAH',
- 'BY' => 'BYN',
- ];
- /*
- RUB 810 (вообще 643 но у нас 810)
- BY 933
- UA 980
- KZ 398
- */
- private $currency_codes = [
- 'RU' => 810,
- 'KZ' => 398,
- 'UA' => 960,
- 'BY' => 933,
- ];
- private $path = FCPATH . '../data/';
- /**
- * @var \PDO
- */
- private $pdo = null;
- /**
- * @var int
- */
- private $insertChunk = 5000;
- /**
- * Geodata constructor.
- */
- public function __construct()
- {
- exit;
- header('Content-type: text/plain; charset=utf-8');
- ini_set('display_errors', 'yes');
- ini_set('error_reporting', 'E_ALL');
- //ini_set('max_execution_time', 10);
- $this->pdo = $this->db->conn_id;
- $this->pdo->setAttribute(\PDO::ATTR_ERRMODE, \PDO::ERRMODE_EXCEPTION);
- $this->pdo->setAttribute(\PDO::ATTR_DEFAULT_FETCH_MODE, \PDO::FETCH_ASSOC);
- $this->pdo->setAttribute(\PDO::ATTR_EMULATE_PREPARES, FALSE);
- parent::__construct();
- }
- /**
- *
- */
- public function __destruct()
- {
- echo bytesToKbOrMb(memory_get_peak_usage(true)), "\n";
- }
- /**
- *
- */
- public function countries()
- {
- header('Content-type: text/plain; charset=utf-8');
- $data = file(FCPATH . '../data/BY.txt');
- $cities = [];
- $i = 0;
- foreach ($data as $line) {
- $rows = explode("\t", $line);
- /* exit($rows[7]);*/
- $first = mb_substr($rows[7], 0, 1);
- if ($first != 'P' && $first != 'A') {
- // print_r($rows);
- continue;
- }
- $cities[] = [
- 'geonameid' => $rows[0],
- 'aliases' => array_unique(array_map('trim', explode(',', $rows[1] . ',' . $rows[3]))),
- 'lat' => round($rows[4] * 1000000),
- 'lon' => round($rows[5] * 1000000),
- 'timezone' => $rows[17],
- 'original' => $rows,
- ];
- ++$i;
- if ($i > 100) {
- break;
- }
- }
- print_r($cities);
- }
- /**
- *
- */
- public function cities()
- {
- // количество колонок в исходной таблице geonames (RU.txt KZ.txt etc.)
- $record_fields_count = 19;
- $this->pdo->query('SET NAMES \'utf8mb4\';');
- // все source файлы долэжны быть на месте
- foreach ($this->countries as $country) {
- if (!file_exists($this->path . $country . '.txt')) {
- exit('File ' . $country . '.txt Not found');
- }
- }
- $insertData = [];
- foreach ($this->countries as $country) {
- echo 'Country: ' . $country . "\n";
- $linesCount = 0;
- $realInsertLines = 0;
- // use generator
- $gline = $this->readFileByLines($this->path . $country . '.txt');
- while ($gline->valid()) {
- $data = explode("\t", $gline->current());
- if (count($data) !== $record_fields_count) {
- $gline->next();
- continue;
- }
- $data[7] = mb_strtoupper($data[7]);
- $first = mb_substr($data[7], 0, 3);
- if ($first !== 'PPL' && $first !== 'ADM') {
- $gline->next();
- continue;
- }
- // сколько строк надо вставить
- ++$linesCount;
- $tmp = [
- (int)$data[0],
- $this->pdo->quote($data[1]),
- round($data[4] * 1000000),
- round($data[5] * 1000000),
- $this->pdo->quote($data[6]),
- $this->pdo->quote($data[7]),
- $this->pdo->quote($data[8]),
- $this->pdo->quote($data[9]),
- $this->pdo->quote($data[10]),
- $this->pdo->quote($data[11]),
- $this->pdo->quote($data[12]),
- $this->pdo->quote($data[13]),
- $data[14],
- (int)$data[15],
- $this->pdo->quote($data[17]),
- $this->pdo->quote($data[18]),
- ];
- // сохраняем готовую для вставки строчку
- $insertData[] = $tmp;
- if (count($insertData) === $this->insertChunk) {
- $realInsertLines += $this->InsertBatchTo__geonames_cities($insertData);
- $insertData = [];
- }
- $gline->next();
- }
- // добавляем данные, если остались
- if (count($insertData)) {
- $realInsertLines += $this->InsertBatchTo__geonames_cities($insertData);
- }
- unset($insertData);
- echo "Lines $country ", $linesCount, "\n";
- echo "realInsertLines ", $realInsertLines, "\n";
- }
- }
- /**
- *
- */
- public function admin1Codes()
- {
- // количество колонок в исходной таблице geonames (RU.txt KZ.txt etc.)
- $record_fields_count = 4;
- $insertData = [];
- $linesCount = 0;
- $realInsertLines = 0;
- $key_geonameId = 1;
- $this->pdo->query('SET NAMES \'utf8mb4\';');
- // use generator
- $gline = $this->readFileByLines($this->path . 'admin1CodesASCII.txt');
- while ($gline->valid()) {
- $data = explode("\t", $gline->current());
- if (count($data) !== $record_fields_count) {
- $gline->next();
- continue;
- }
- ++$linesCount;
- $tmp = [
- $this->pdo->quote($data[0]),
- // $key_geonameId = 1 (array index)
- (int)$data[3],
- $this->pdo->quote($data[1]),
- $this->pdo->quote($data[2]),
- ];
- // сохраняем готовую для вставки строчку
- $insertData[] = $tmp;
- if (count($insertData) === $this->insertChunk) {
- $realInsertLines += $this->InsertBatchTo__geonames_admin1Codes($insertData, $key_geonameId);
- $insertData = [];
- }
- $gline->next();
- }
- // добавляем данные, если остались
- if (count($insertData)) {
- $realInsertLines += $this->InsertBatchTo__geonames_admin1Codes($insertData, $key_geonameId);
- }
- unset($insertData);
- echo "Lines ", $linesCount, "\n";
- echo "realInsertLines ", $realInsertLines, "\n";
- }
- /**
- *
- */
- public function admin2Codes()
- {
- // количество колонок в исходной таблице geonames (RU.txt KZ.txt etc.)
- $record_fields_count = 4;
- $insertData = [];
- $linesCount = 0;
- $realInsertLines = 0;
- $key_geonameId = 1;
- $this->pdo->query('SET NAMES \'utf8mb4\';');
- // use generator
- $gline = $this->readFileByLines($this->path . 'admin2Codes.txt');
- while ($gline->valid()) {
- $data = explode("\t", $gline->current());
- if (count($data) !== $record_fields_count) {
- $gline->next();
- continue;
- }
- ++$linesCount;
- $tmp = [
- $this->pdo->quote($data[0]),
- # $key_geonameId = 1 (array index)
- (int)$data[3],
- $this->pdo->quote($data[1]),
- $this->pdo->quote($data[2]),
- ];
- // сохраняем готовую для вставки строчку
- $insertData[] = $tmp;
- if (count($insertData) === $this->insertChunk) {
- $realInsertLines += $this->InsertBatchTo__geonames_admin2Codes($insertData, $key_geonameId);
- $insertData = [];
- }
- $gline->next();
- }
- // добавляем данные, если остались
- if (count($insertData)) {
- $realInsertLines += $this->InsertBatchTo__geonames_admin2Codes($insertData, $key_geonameId);
- }
- unset($insertData);
- echo "Lines ", $linesCount, "\n";
- echo "realInsertLines ", $realInsertLines, "\n";
- }
- /**
- *
- */
- public function altnames()
- {
- $record_fields_count = 8;
- $linesCount = 0;
- $realInsertLines = 0;
- $insertData = [];
- $key_geonameId = 1;
- $this->pdo->query('SET NAMES \'utf8mb4\';');
- // use generator
- $gline = $this->readFileByLines($this->path . 'alternateNames.txt');
- while ($gline->valid()) {
- $data = explode("\t", $gline->current());
- if (count($data) !== $record_fields_count) {
- $gline->next();
- continue;
- }
- ++$linesCount;
- $tmp = [
- // alternateNameId (primory)
- (int)$data[0],
- // $key_geonameId = 1 (array index)
- (int)$data[1],
- // isolanguage
- $this->pdo->quote(mb_strtolower($data[2])),
- // alternate nam
- $this->pdo->quote($data[3]),
- // isPreferredNam
- (int)$data[4],
- // isShortName
- (int)$data[5],
- // isColloquial
- (int)$data[6],
- // isHistoric
- (int)$data[7],
- ];
- $insertData[] = $tmp;
- if (count($insertData) === $this->insertChunk) {
- echo '--- INSERT CHUNK ---', "\n";
- $realInsertLines += $this->InsertBatchTo__geonames_altnames($insertData, $key_geonameId);
- $insertData = [];
- }
- $gline->next();
- }
- // добавляем данные, если остались
- if (count($insertData)) {
- echo '--- INSERT LAST CHUNK ---', "\n";
- $realInsertLines += $this->InsertBatchTo__geonames_altnames($insertData, $key_geonameId);
- }
- unset($insertData);
- echo "\n", "\n", '--- END ---', "\n";
- echo "Lines ", $linesCount, "\n";
- echo "realInsertLines ", $realInsertLines, "\n";
- }
- public function sphinx()
- {
- $chunkSize = 3000;
- $start = 0;
- $records = 0;
- do {
- $sql = 'SELECT `geonameid`, `name`, `country_code`, `admin1_code`, `admin2_code` FROM `_geonames_cities` WHERE `feature_class`="P" ORDER BY `geonameid` ASC LIMIT '
- . $start . ',' . $chunkSize . ';';
- $data = $this->pdo->query($sql)->fetchAll(\PDO::FETCH_ASSOC);
- $this->citiesToFullPath($data);
- $records += count($data);
- $start += $chunkSize;
- } while (is_array($data) && count($data) > 0);
- echo 'Records: ' . $records, "\n";
- }
- /**
- * @param string $file
- *
- * @return Generator|null
- */
- private function readFileByLines($file)
- {
- // 1Mb
- $bufSize = 2.5 * 1024 * 1024;
- $buf = '';
- if (!file_exists($file) || !is_readable($file)) {
- return null;
- }
- $fp = fopen($file, 'rb');
- while (is_resource($fp) && !feof($fp)) {
- $buf = fread($fp, $bufSize);
- if (is_resource($fp) && !feof($fp)) {
- $buf .= fgets($fp);
- }
- $lines = explode("\n", $buf);
- foreach ($lines as $line) {
- yield $line;
- }
- }
- if (is_resource($fp)) {
- fclose($fp);
- }
- }
- /**
- * @param array $data
- *
- * @return int
- */
- private function InsertBatchTo__geonames_cities(array $data)
- {
- $sql = 'INSERT INTO `_geonames_cities`
- (`geonameid`, `name`, `latitude`, `longitude`, `feature_class`, `feature_code`, `country_code`, `cc2`,
- `admin1_code`, `admin2_code`, `admin3_code`, `admin4_code`, `population`, `elevation`, `timezone`, `modification_date`)';
- return $this->InsertBatch($sql, $data);
- }
- /**
- * @param array $data
- * @param integer $key_geonameId
- *
- * @return int
- */
- private function InsertBatchTo__geonames_altnames(array $data, $key_geonameId)
- {
- $data = $this->filterGeonamesId($data, $key_geonameId);
- if (count($data) == 0) {
- return 0;
- }
- $sql = 'INSERT INTO `_geonames_altnames`
- (`alternateNameId`, `geonameid`, `isolanguage`, `alternate_name`, `isPreferredNam`, `isShortName`, `isColloquial`, `isHistoric`)';
- return $this->InsertBatch($sql, $data);
- }
- /**
- * @param array $data
- * @param integer $key_geonameId
- *
- * @return int
- */
- private function InsertBatchTo__geonames_admin1Codes(array $data, $key_geonameId)
- {
- $this->filterCountry($data, 0);
- if (count($data) == 0) {
- return 0;
- }
- $sql = 'INSERT INTO `_geonames_admin1Codes` (`code`, `geonameid`, `name1`, `name2`)';
- return $this->InsertBatch($sql, $data);
- }
- /**
- * @param array $data
- * @param integer $key_geonameId
- *
- * @return int
- */
- private function InsertBatchTo__geonames_admin2Codes(array $data, $key_geonameId)
- {
- $this->filterCountry($data, 0);
- if (count($data) == 0) {
- return 0;
- }
- $sql = 'INSERT INTO `_geonames_admin2Codes` (`code`, `geonameid`, `name1`, `name2`)';
- return $this->InsertBatch($sql, $data);
- }
- /**
- * @param string $sql
- * @param array $data
- *
- * @return int
- */
- private function InsertBatch(&$sql, array &$data)
- {
- $data = $this->joinForInsert($data);
- $chunks_insert_row_count = count($data);
- $stmt = $this->pdo->query($sql . ' VALUES (' . join('),(', $data) . ');');
- if (is_object($stmt) && $stmt->rowCount() !== $chunks_insert_row_count) {
- echo('Must: ' . $chunks_insert_row_count . ' REAL: ' . $stmt->rowCount() . "\n");
- var_dump($stmt->rowCount());
- exit('BAD INSERT #1');
- } elseif (!is_object($stmt)) {
- var_dump($stmt);
- print_r($this->pdo->errorInfo());
- //echo($sql);
- exit('BAD INSERT #2');
- }
- return $stmt->rowCount();
- }
- /**
- * @param array $data
- * @param integer $key_geonameId
- *
- * @return array
- */
- private function filterGeonamesId(array &$data, $key_geonameId)
- {
- $geonamesID = array_unique(array_column($data, $key_geonameId));
- $foundGeonamesId = $this->pdo
- ->query('SELECT `geonameid` FROM `_geonames_cities` WHERE `geonameid` IN (' . join(',', $geonamesID) . ');')
- ->fetchAll(PDO::FETCH_COLUMN);
- $foundGeonamesId1 = $this->pdo
- ->query('SELECT `geonameid` FROM `_geonames_admin1Codes` WHERE `geonameid` IN (' . join(',', $geonamesID) . ');')
- ->fetchAll(PDO::FETCH_COLUMN);
- $foundGeonamesId2 = $this->pdo
- ->query('SELECT `geonameid` FROM `_geonames_admin2Codes` WHERE `geonameid` IN (' . join(',', $geonamesID) . ');')
- ->fetchAll(PDO::FETCH_COLUMN);
- $foundGeonamesId = array_unique(array_merge($foundGeonamesId, $foundGeonamesId1, $foundGeonamesId2));
- if (count($foundGeonamesId) == 0) {
- return [];
- }
- foreach ($data as $k => $v) {
- if (!in_array($v[$key_geonameId], $foundGeonamesId)) {
- unset($data[$k]);
- }
- }
- return $data;
- }
- /**
- * @param array $data
- *
- * @return array
- */
- private function joinForInsert(array $data)
- {
- foreach ($data as $k => $v) {
- $data[$k] = join(',', $v);
- }
- return $data;
- }
- /**
- * @param array $data
- * @param integer $key
- */
- private function filterCountry(array &$data, $key)
- {
- foreach ($data as $k => $v) {
- list($country) = explode('.', $v[$key]);
- $country = trim($country, ' \'');
- if (!in_array($country, $this->countries)) {
- unset($data[$k]);
- }
- }
- }
- public function find()
- {
- $q = $this->input->get('q');
- $this->load->library('MY_Sphinx');
- $sphinx = $this->my_sphinx->init();
- $data = $sphinx->searchCity($q);
- //print_r($data);
- $order_field = array_column($data, 'id');
- $result = $this->pdo->query('SELECT * FROM `_geonames_altnames` WHERE `alternateNameId` IN (' . join(',', array_column($data, 'id')) . ')
- ORDER BY FIELD(`alternateNameId`,' . join(',', $order_field) . ') ASC;')->fetchAll(\PDO::FETCH_ASSOC);
- print_r($result);
- }
- private function citiesToFullPath(array &$data)
- {
- if (!isset($this->_geonames_admin1Codes)) {
- $this->_geonames_admin1Codes = $this->pdo->query('SELECT * FROM `_geonames_admin1Codes`;')->fetchAll();
- $this->_geonames_admin2Codes = $this->pdo->query('SELECT * FROM `_geonames_admin2Codes`;')->fetchAll();
- keyBy($this->_geonames_admin1Codes, 'code');
- keyBy($this->_geonames_admin2Codes, 'code');
- }
- foreach ($data as $k => $city) {
- $geonamesForAlt = [$city['geonameid']];
- $admin1_key = ($city['country_code'] . '.' . $city['admin1_code']);
- $admin2_key = ($city['country_code'] . '.' . $city['admin1_code'] . '.' . $city['admin2_code']);
- if (mb_strlen($city['admin1_code']) && isset($this->_geonames_admin1Codes[$admin1_key])) {
- $data[$k]['admin1'] = $this->_geonames_admin1Codes[$admin1_key];
- $geonamesForAlt[] = $data[$k]['admin1']['geonameid'];
- }
- if (mb_strlen($city['admin1_code']) && mb_strlen($city['admin2_code']) && isset($this->_geonames_admin2Codes[$admin2_key])) {
- $data[$k]['admin2'] = $this->_geonames_admin1Codes[$admin2_key];
- $geonamesForAlt[] = $data[$k]['admin2']['geonameid'];
- }
- $altNmaes = $this->getAltNames($geonamesForAlt);
- $this->setTopNames($data[$k], $altNmaes);
- print_r($data[$k]);
- print_r($altNmaes);
- exit;
- }
- }
- private function getAltNames($geonameId)
- {
- $out = [];
- if (!is_array($geonameId)) {
- $geonameId[] = $geonameId;
- }
- $data = $this->pdo->query('SELECT * FROM `_geonames_altnames` WHERE `geonameid` IN (' . join(',', $geonameId) . ')
- AND `isolanguage` IN (\'\', \'en\', \'ru\', \'be\', \'kk\', \'uk\');')
- ->fetchAll();
- foreach ($geonameId as $id)
- {
- $out[$id] = [];
- }
- foreach ($data as $row)
- {
- $out[$row['geonameid']][] = $row;
- }
- return $out;
- }
- private function setTopNames(array &$city, array &$altNmaes)
- {
- $city['topName'] = $city['name'];
- if (isset($altNmaes[$city['geonameid']]) && count($altNmaes[$city['geonameid']]))
- {
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement