Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.IO;
- namespace SearchEngineWF.Data_Access_Layer
- {
- class Indexer
- {
- public static Dictionary<string, Dictionary<string, double>> dic_Indexer; //temporary dictionary
- static Indexer()
- {
- dic_Indexer = new Dictionary<string, Dictionary<string, double>>(StringComparer.OrdinalIgnoreCase); //Term Name,<Doc Name, tf>
- }
- public static void CalculateTerms() //gets information from the parser.
- {
- int DocNum = 0;
- string tmp_Term, tmp_Doc;
- double tf;
- while (Archive.m_dTerms.ContainsKey(DocNum))
- {
- tmp_Doc = Archive.m_dicDocNames[DocNum];
- foreach (string str in Archive.m_dTerms[DocNum].Keys)
- {
- tmp_Term = str;
- tf = (double)(Archive.m_dTerms[DocNum][str]) / (double)(Archive.m_dTerms[DocNum].Count);
- if (!dic_Indexer.ContainsKey(tmp_Term))
- dic_Indexer[tmp_Term] = new Dictionary<string, double>();
- dic_Indexer[tmp_Term][tmp_Doc] = tf;
- }
- DocNum++;
- }
- }
- public static void CalculateIDF()
- {
- double idf;
- int NumOfDocs = Archive.m_dicDocNames.Count();
- foreach (string str in dic_Indexer.Keys)
- {
- idf = Math.Log((NumOfDocs / dic_Indexer[str].Count()), 2);
- Archive.m_dicIndexer[str] = new Tuple<double, long>(idf, -1); //default position in file is -1.
- }
- }
- public static void CreatePosting()
- {
- long pointer = 0;
- double TermFreq, Weight, TotaOflPositions;
- string positions;
- // FileStream fs = new FileStream (
- StreamWriter sw = new StreamWriter("Posting.txt");
- foreach (string term in dic_Indexer.Keys)
- {
- Archive.m_dicIndexer[term] = new Tuple<double, long>(Archive.m_dicIndexer[term].Item1, pointer);
- foreach (string doc in dic_Indexer[term].Keys)
- {
- TotaOflPositions = 1;
- TermFreq = dic_Indexer[term][doc];
- foreach (int position in Archive.m_TermsPointers[doc][term])
- {
- TotaOflPositions += position;
- }
- Weight = (double)(TermFreq * Archive.m_dicIndexer[term].Item1) * (double)(1 / TotaOflPositions);
- positions = string.Join("-", Archive.m_TermsPointers[doc][term]);
- sw.WriteLine(term + "," + doc + "," + TermFreq + "," + Weight + "," + positions);
- pointer++;
- }
- }
- sw.Close();
- }
- public static void CreateIndex()
- {
- StreamWriter sw = new StreamWriter("Index.txt");
- foreach (string term in Archive.m_dicIndexer.Keys)
- {
- sw.WriteLine(term + "," + Archive.m_dicIndexer[term].Item1 + "," + Archive.m_dicIndexer[term].Item2);
- }
- sw.Close();
- }
- public static bool LoadIndexFromFile()
- {
- try
- {
- string[] IndexLines = File.ReadAllLines("Index.txt");
- foreach (string line in IndexLines)
- {
- string[] values = line.Split(',');
- double item1; long item2;
- double.TryParse(values[1], out item1);
- long.TryParse(values[2], out item2);
- Tuple<double, long> tuple = new Tuple<double, long>(item1, item2);
- Archive.m_dicIndexer[values[1]] = tuple;
- }
- return true;
- }
- catch
- {
- return false;
- }
- }
- public static void ClearMemory()
- {
- dic_Indexer.Clear();
- }
- }
- }
Add Comment
Please, Sign In to add comment