Advertisement
MrPolywhirl

Chunk and Group

Oct 23rd, 2019
296
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. /**
  2.  * @see https://stackoverflow.com/a/58510782/1762224
  3.  */
  4. var data = [
  5.   'apple ipad air 32gb', 'apple ipad air 64gb',
  6.   'apple ipad mini 32gb', 'apple ipad mini 64gb',
  7.   'apple iphone 65',
  8.   'panasonic gh4',
  9.   'samsung s2 galaxy', 'samsung s2 galaxy red',
  10.   'samsung s3 galaxy'
  11. ];
  12.  
  13. console.log(groupData(chunkData(data.sort())));
  14.  
  15. function sortedValue(str) {
  16.   return str.split(' ').sort().join(' ');
  17. }
  18.  
  19. function chunkData(data) {
  20.   let matrix = data.map(d => d.split(' '));
  21.   let keys = matrix.map(row => row[0]);
  22.   matrix.forEach(row => {
  23.     row.sort((a, b) => {
  24.       if (keys.indexOf(a) > -1) return -1;
  25.       if (keys.indexOf(b) > -1) return 1;
  26.       return a.localeCompare(b);
  27.     });
  28.   });
  29.   return matrix;
  30. }
  31.  
  32. function groupData(list) {
  33.   let groups = {}, i = 0;
  34.   while (i < list.length) {
  35.     let curr = list[i];
  36.     //console.log('Curr:', curr);
  37.     let similar = findSimilar(curr, list.slice(i + 1), 0.75, 0.20);
  38.     let joined = similar.map(x => x.join(' ')).concat(curr.join(' '));
  39.     let final = data.filter(item => {
  40.       return joined.filter(join => {
  41.         return sortedValue(join) === sortedValue(item);
  42.       }).length;
  43.     });
  44.     let key = sharedStart(final).trim();
  45.     groups[key] = final;
  46.     i += final.length || 1;
  47.   }
  48.   return groups;
  49. }
  50.  
  51. function findSimilar(word, words, macroThreshold, microThreshold) {
  52.   return words.filter(w => {
  53.     let chunks = Math.max(word.length, w.length);
  54.     let len = Math.min(word.length, w.length);
  55.     let wordDiff = -1, step = 0;
  56.     while (wordDiff !== 0 && step < len) {
  57.       let x = word[step];
  58.       let y = w[step];
  59.       let z = sharedStart([x, y]);
  60.       let p = z.length / x.length;
  61.       let q = z.length / y.length;
  62.       //console.log('Similar:', x, y, z, p, q);
  63.       wordDiff = (p + q) / 2;
  64.       step++;
  65.     }
  66.     let chunkDiff = step / chunks;
  67.     //console.log('Chunk %:', chunkDiff);
  68.     return chunkDiff >= macroThreshold || wordDiff >= microThreshold;
  69.   });
  70. }
  71.  
  72. // https://stackoverflow.com/a/1917041/1762224
  73. function sharedStart(array) {
  74.   let A = array.concat().sort(), a1 = A[0], a2 = A[A.length - 1], L = a1.length, i = 0;
  75.   while (i < L && a1.charAt(i) === a2.charAt(i)) i++;
  76.   return a1.substring(0, i);
  77. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement