irapilguy

Untitled

Dec 15th, 2021
902
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import { Tokenizer } from "../tokenizer";
  2. import { Report, Occurrence } from "./report";
  3. import { WinnowwingFilter } from "../WinnowwingFilter";
  4. import { Options } from "../options";
  5. import { HashFilter } from "../hashFilter";
  6.  
  7. export class Indexer {
  8.   private readonly tokenizer: Tokenizer;
  9.   private readonly hashFilter: HashFilter;
  10.   private readonly index: Map<Hash, Array<Occurrence>> = new Map();
  11.  
  12.   constructor(options: Options = new Options()) {
  13.     this.hashFilter = new WinnowFilter(options.k, options.windowSize);
  14.   }
  15.  
  16. public async cloneDetector(files: File[]): Promise<Report> {
  17.     const tokenizedFiles = files.map(f => this.tokenizer.tokenizeFile(f));
  18.     const report = new Report(this.options);
  19.     for (const file of tokenizedFiles) {
  20.       let kgram = 0;
  21.       for await (const { hash, start, stop} of hashFilter.fingerprints(file.Ast)) {
  22.         file.kgrams.push(new Range(start, stop));
  23.         const part: Occurrence = {
  24.           file,
  25.           side: { index: kgram, start, stop, data, Region.merge(
  26.           file.mapping[start],
  27.           file.mapping[stop])}
  28.         };
  29.         // look if the index already contains the given hashing
  30.         const matches = this.index.get(hash);
  31.         if (matches) {
  32.           report.addOccurrences(hash, part, ...matches);
  33.           matches.push(part);
  34.         } else {
  35.           this.index.set(hash, [part]);
  36.         }
  37.         kgram += 1;
  38.       }
  39.     }
  40.     report.finish();
  41.     return report;
  42.   }
  43. }
Advertisement
Add Comment
Please, Sign In to add comment