Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- "use strict";
- var WordsCombo = Class.extend(
- {
- debug: true,
- min_word_len: 4,
- frag_min_len: 2,
- frag_max_len: 3,
- num_fragments: 12,
- all_words: null,
- words_map: null,
- all_fragments: null,
- all_fragments_sorted: null,
- fragments: null,
- matching_words: null,
- init: function(words)
- {
- this.all_words = words;
- for (var i=0; i<this.all_words.length; i++)
- {
- this.all_words[i] = this.all_words[i].toLowerCase();
- }
- },
- findMatchingWords: function(min_count)
- {
- this.splitWords();
- this.getRandomFragments();
- this.getMatchingWordsFromFragmentsList();
- if (this.matching_words.length >= min_count) {
- if (this.debug) console.log('words=', this.matching_words.length);
- return;
- }
- //console.log('matching_w: ', this.matching_words, this.matching_words.length);
- var t = new Date().getTime();
- var n = 0;
- var except_frags = [];
- while (this.matching_words.length < min_count)
- {
- var frag_words_count = this.getFragmentsWordCount();
- var except_frag = this.removeLeastUsedFragment(frag_words_count);
- except_frags.push(except_frag);
- this.addFragment(except_frags);
- this.getMatchingWordsFromFragmentsList();
- //console.log('matching_w: ', this.matching_words, this.matching_words.length);
- n++;
- }
- if (this.debug) console.log('words=', this.matching_words.length, 'n=', n, 't=', ( new Date().getTime() - t ) );
- if (this.debug) console.log(this.fragments, this.matching_words);
- // console.log('m1=', this.matching_words.length, frag_words_count);
- // console.log(this.all_fragments_sorted.length, this.fragments);
- },
- addFragment: function(except_frags)
- {
- for (var i=0; i<this.all_fragments_sorted.length; i++)
- {
- var frag = this.all_fragments_sorted[i];
- if (except_frags.indexOf(frag.s) == -1 && this.fragments.indexOf(frag.s) == -1)
- {
- if (this.debug) console.log('+frag:', frag.s);
- this.fragments.push(frag.s);
- break;
- }
- }
- return;
- /*
- var frag_count = this.all_fragments_sorted.length; //(this.all_fragments_sorted.length < 30) ? this.all_fragments_sorted.length : 30;
- var frag = '';
- do {
- frag = this.all_fragments_sorted[Math.floor(Math.random() * frag_count)];
- } while (this.fragments.indexOf(frag.s) >= 0);
- if (this.debug) console.log('+frag:', frag.s);
- this.fragments.push(frag.s);
- */
- },
- removeLeastUsedFragment: function(frag_words_count)
- {
- var self = this;
- var least_frag = null;
- for (var i=0; i<frag_words_count.length; i++)
- {
- if (least_frag == null)
- least_frag = frag_words_count[i];
- else if (frag_words_count[i].count < least_frag.count)
- least_frag = frag_words_count[i];
- }
- if (this.debug) console.log('-frag:', least_frag.frag);
- self.fragments.splice(self.fragments.indexOf(least_frag.frag), 1);
- if (least_frag.count == 0) return least_frag.frag; // nav neviens vārds piesaistīts
- var i = 0;
- while (i < self.matching_words.length)
- {
- var found = false;
- var w_frags = self.words_map[self.matching_words[i]];
- //console.log(self.matching_words[i], w_frags);
- if (w_frags.indexOf(least_frag.frag) >= 0)
- {
- if (this.debug) console.log('-word:', self.matching_words[i]);
- self.matching_words.splice(i, 1);
- continue;
- }
- i++;
- }
- return least_frag.frag;
- //console.log(least_frag, self.fragments, self.matching_words.length, self.matching_words);
- },
- getFragmentsWordCount: function()
- {
- var matching_words = this.matching_words;
- var words_map = this.words_map;
- var mw_count_map = {};
- for (var j=0; j<matching_words.length; j++)
- {
- for (var k=0; k<words_map[matching_words[j]].length; k++)
- {
- var frag = words_map[matching_words[j]][k];
- if (mw_count_map[frag] == null)
- {
- mw_count_map[frag] = {
- count: 0
- };
- }
- mw_count_map[frag].count++;
- }
- }
- for (var i=0; i<this.fragments.length; i++)
- {
- var frag = this.fragments[i];
- if (mw_count_map[frag] == null)
- {
- mw_count_map[frag] = {
- count: 0
- };
- }
- }
- var frag_words_count = [];
- for (var frag in mw_count_map)
- {
- frag_words_count.push({
- frag: frag,
- count: mw_count_map[frag].count
- })
- }
- return frag_words_count;
- },
- getRandomFragments: function()
- {
- var words_map = this.words_map;
- var real_frags = [];
- var frag_count = (this.all_fragments_sorted.length < 100) ? this.all_fragments_sorted.length : 100;
- for (var i=0; i<this.num_fragments; i++)
- {
- var frag = '';
- do {
- frag = this.all_fragments_sorted[Math.floor(Math.random() * frag_count)];
- } while (real_frags.indexOf(frag) >= 0);
- real_frags.push(frag.s);
- }
- this.fragments = real_frags;
- },
- getMatchingWordsFromFragmentsList: function()
- {
- var words_map = this.words_map;
- var matching_words = [];
- var iter = 0;
- for (var k=0; k<this.fragments.length; k++)
- {
- for (var j=0; j<this.all_fragments[this.fragments[k]].length; j++)
- {
- var word = this.all_fragments[this.fragments[k]][j];
- var matches = true;
- iter++;
- for (var i=0; i<words_map[word].length; i++)
- {
- if (this.fragments.indexOf(words_map[word][i]) == -1)
- {
- matches = false;
- break;
- }
- }
- if (matches && matching_words.indexOf(word) == -1)
- matching_words.push(word);
- }
- //if (this.all_fragments[this.fragments[i]] > 0)
- }
- if (this.debug) console.log('mw=', matching_words.length, 'iter=', iter);
- this.matching_words = matching_words;
- return;
- var iter = 0;
- for (var word in words_map)
- {
- iter++;
- var matches = true;
- for (var i=0; i<words_map[word].length; i++)
- {
- if (this.fragments.indexOf(words_map[word][i]) == -1)
- {
- matches = false;
- break;
- }
- }
- if (matches)
- matching_words.push(word);
- }
- console.log('mw=', matching_words.length, 'iter=', iter);
- this.matching_words = matching_words;
- },
- splitWords: function()
- {
- this.words_map = {};
- this.all_fragments = {};
- var words_map = this.words_map;
- var fragments = this.all_fragments;
- var num_words = 0;
- var words = this.all_words;
- for (var i=0; i<words.length; i++)
- {
- if (words[i].length < this.min_word_len) continue;
- var len = this.frag_min_len + Math.floor(Math.random() * (1 + this.frag_max_len - this.frag_min_len));
- var word = words[i];
- var parts = [];
- while (word.length > len)
- {
- var part = word.substr(0, len);
- if (parts.indexOf(part) == -1)
- parts.push(part);
- word = word.substr(len);
- }
- if (parts.indexOf(word) == -1)
- parts.push(word);
- var skip = false;
- for (var j=0; j<parts.length; j++)
- {
- if (parts[j].length < this.frag_min_len)
- {
- skip = true;
- break;
- }
- }
- if (skip) continue;
- num_words++;
- if (!words_map[words[i]]) {
- words_map[words[i]] = parts;
- }
- for (var j=0; j<parts.length; j++)
- {
- if (fragments[parts[j]] == null)
- fragments[parts[j]] = [];
- fragments[parts[j]].push(words[i]);
- }
- }
- var frag_list = [];
- this.all_fragments_sorted = frag_list;
- for (var k in this.all_fragments)
- {
- frag_list.push({
- s: k,
- count: this.all_fragments[k]
- });
- }
- frag_list.sort(function(a, b)
- {
- return b.count - a.count;
- });
- if (this.debug) console.log('num_words: ', num_words);
- }
- });
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement