Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /*
- (c) Decker, 2012
- */
- Error_Reporting(E_ALL & ~E_NOTICE);
- setlocale(LC_ALL, 'ru_RU.CP1251', 'rus_RUS.CP1251', 'Russian_Russia.1251');
- function sort_array($arr) {
- sort($arr);
- return $arr; }
- $string = file_get_contents('H:\war.cp1251.txt', true);
- //$string = preg_replace('#(\\|\.|\||\(|\)|\[|\]|\{|\}|\?|\*|\+|\?|\/|\#|\!|-|,|;|:|>|<)#', '', $string);
- $string = preg_replace('#[[:punct:]]+#', '', $string);
- $string = str_replace(chr(0x0A0),' ',$string);
- $tok = strtok($string, " \n\t");
- while ($tok !== false) {
- // $text = strtoupper(iconv("UTF-8","CP1251",trim($tok)));
- $text = strtoupper(trim($tok));
- $data[trim($text)] = join(sort_array(str_split($text),SORT_LOCALE_STRING));
- //$tok = strtok(" \n\t");
- $tok = strtok("\t\ \r\f\n");
- }
- ksort($data); $count = array_count_values($data);
- //echo "Слова:\n";
- //print_r($data);
- //echo "Количество:\n";
- //print_r($count);
- $sum = 0;
- foreach($count as $k => $cnt)
- if (($cnt > 1) && (strlen($k) >= 3))
- if (preg_match("/^[".chr(0x7F)."-".chr(0xff)."_-]+$/",$k))
- { $sum+=$cnt;
- // echo $k . "(" . strlen($k) . ")\n";
- // Раскомментировать для вывода "ключей" и количества в CSV
- // echo $k . ";" . $cnt . "\r\n";
- }
- echo "Total: " . $sum;
- ?>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement