Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * @see https://stackoverflow.com/a/58510782/1762224
- */
- var data = [
- 'apple ipad air 32gb', 'apple ipad air 64gb',
- 'apple ipad mini 32gb', 'apple ipad mini 64gb',
- 'apple iphone 65',
- 'panasonic gh4',
- 'samsung s2 galaxy', 'samsung s2 galaxy red',
- 'samsung s3 galaxy'
- ];
- console.log(groupData(chunkData(data.sort())));
- function sortedValue(str) {
- return str.split(' ').sort().join(' ');
- }
- function chunkData(data) {
- let matrix = data.map(d => d.split(' '));
- let keys = matrix.map(row => row[0]);
- matrix.forEach(row => {
- row.sort((a, b) => {
- if (keys.indexOf(a) > -1) return -1;
- if (keys.indexOf(b) > -1) return 1;
- return a.localeCompare(b);
- });
- });
- return matrix;
- }
- function groupData(list) {
- let groups = {}, i = 0;
- while (i < list.length) {
- let curr = list[i];
- //console.log('Curr:', curr);
- let similar = findSimilar(curr, list.slice(i + 1), 0.75, 0.20);
- let joined = similar.map(x => x.join(' ')).concat(curr.join(' '));
- let final = data.filter(item => {
- return joined.filter(join => {
- return sortedValue(join) === sortedValue(item);
- }).length;
- });
- let key = sharedStart(final).trim();
- groups[key] = final;
- i += final.length || 1;
- }
- return groups;
- }
- function findSimilar(word, words, macroThreshold, microThreshold) {
- return words.filter(w => {
- let chunks = Math.max(word.length, w.length);
- let len = Math.min(word.length, w.length);
- let wordDiff = -1, step = 0;
- while (wordDiff !== 0 && step < len) {
- let x = word[step];
- let y = w[step];
- let z = sharedStart([x, y]);
- let p = z.length / x.length;
- let q = z.length / y.length;
- //console.log('Similar:', x, y, z, p, q);
- wordDiff = (p + q) / 2;
- step++;
- }
- let chunkDiff = step / chunks;
- //console.log('Chunk %:', chunkDiff);
- return chunkDiff >= macroThreshold || wordDiff >= microThreshold;
- });
- }
- // https://stackoverflow.com/a/1917041/1762224
- function sharedStart(array) {
- let A = array.concat().sort(), a1 = A[0], a2 = A[A.length - 1], L = a1.length, i = 0;
- while (i < L && a1.charAt(i) === a2.charAt(i)) i++;
- return a1.substring(0, i);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement