Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import { Tokenizer } from "../tokenizer";
- import { Report, Occurrence } from "./report";
- import { WinnowwingFilter } from "../WinnowwingFilter";
- import { Options } from "../options";
- import { HashFilter } from "../hashFilter";
- export class Indexer {
- private readonly tokenizer: Tokenizer;
- private readonly hashFilter: HashFilter;
- private readonly index: Map<Hash, Array<Occurrence>> = new Map();
- constructor(options: Options = new Options()) {
- this.hashFilter = new WinnowFilter(options.k, options.windowSize);
- }
- public async cloneDetector(files: File[]): Promise<Report> {
- const tokenizedFiles = files.map(f => this.tokenizer.tokenizeFile(f));
- const report = new Report(this.options);
- for (const file of tokenizedFiles) {
- let kgram = 0;
- for await (const { hash, start, stop} of hashFilter.fingerprints(file.Ast)) {
- file.kgrams.push(new Range(start, stop));
- const part: Occurrence = {
- file,
- side: { index: kgram, start, stop, data, Region.merge(
- file.mapping[start],
- file.mapping[stop])}
- };
- // look if the index already contains the given hashing
- const matches = this.index.get(hash);
- if (matches) {
- report.addOccurrences(hash, part, ...matches);
- matches.push(part);
- } else {
- this.index.set(hash, [part]);
- }
- kgram += 1;
- }
- }
- report.finish();
- return report;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment