Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /**
- * Script to create the language data in JSON format for ULS.
- *
- * Copyright (C) 2012 Alolita Sharma, Amir Aharoni, Arun Ganesh, Brandon Harris,
- * Niklas Laxström, Pau Giner, Santhosh Thottingal, Siebrand Mazeland and other
- * contributors. See CREDITS for a list.
- *
- * UniversalLanguageSelector is dual licensed GPLv2 or later and MIT. You dont
- * have to do anything special to choose one license or the other and you dont
- * have to notify anyone which license you are using. You are free to use
- * UniversalLanguageSelector in commercial projects as long as the copyright
- * header is left intact. See files GPL-LICENSE and MIT-LICENSE for details.
- *
- * @file
- * @ingroup Extensions
- * @licence GNU General Public Licence 2.0 or later
- * @licence MIT License
- */
- // Standard boilerplate to define $IP
- if (getenv('MW_INSTALL_PATH') !== false) {
- $IP = getenv('MW_INSTALL_PATH');
- } else {
- $dir = __DIR__;
- $IP = "$dir/../../..";
- }
- require_once ("$IP/maintenance/commandLine.inc");
- $languages = Language::fetchLanguageNames(null, 'all');
- $all = array();
- $buckets = array();
- foreach ($languages as $code => $name) {
- $all[$code][strtolower($name)] = true;
- $langnames = LanguageNames::getNames($code, 0, 2);
- foreach ($langnames as $code => $name) {
- $all[$code][] = strtolower($name);
- }
- }
- foreach ($all as $code => $names) {
- //$all[$code] = array_keys($names);
- foreach ($names as $index => $name) {
- $bucket = getBucket($name);
- $buckets[$bucket][$name] = $code;
- //echo "$bucketId = ( $name => $code )\n";
- }
- }
- function getBucket($name) {
- $codepoint = getCodepoint($name);
- if ($codepoint < 1000) {
- $bucket = $codepoint;
- } else {
- $bucket = $codepoint % 1000;
- }
- if (!isset($buckets[$bucket])) {
- $buckets[$bucket] = array();
- }
- return $bucket;
- }
- function getCodepoint($str) {
- $unicode = array();
- $values = array();
- $lookingFor = 1;
- for ($i = 0; $i < strlen($str); $i++) {
- $thisValue = ord($str[$i]);
- if ($thisValue < 128)
- return $thisValue;
- else {
- if (count($values) == 0)
- $lookingFor = ($thisValue < 224) ? 2 : 3;
- $values[] = $thisValue;
- if (count($values) == $lookingFor) {
- $number = ($lookingFor == 3) ? (($values[0] % 16) * 4096) + (($values[1] % 64) * 64) + ($values[2] % 64) : (($values[0] % 32) * 64) + ($values[1] % 64);
- return $number;
- } // if
- }// if
- }
- }
- /*
- foreach ($buckets as $bucketId => $names) {
- echo "Bucket $bucketId = " . count($buckets[$bucketId]) . "\n";
- }*/
- echo "Total Buckets " . count($buckets) . "\n";
- file_put_contents('langnames.ser', serialize($buckets));
- $serializedBuckets = file_get_contents('langnames.ser');
- $buckets = unserialize($serializedBuckets);
- // ===========================
- $start = microtime();
- $search = "ഹിന്ദി";
- $bucket = $buckets[getBucket($search)];
- foreach ($bucket as $name => $code) {
- if ($name === $search) {
- echo "$search is language " . $code ;
- $end = microtime();
- echo " (Time taken: ". ($end-$start). "ms)\n";
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement