Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define _CRT_SECURE_NO_DEPRECATE
- #include <stdio.h>
- #include <cstdlib>
- #include <iostream>
- #include <string>
- #include <fstream>
- #include <vector>
- #include <map>
- #include <set>
- #include <algorithm>
- #include <exception>
- using namespace std;
- class InvalidUsageException : public exception{
- virtual const char* what() const throw() {
- return "Invalid regular expression syntax";
- }
- };
- class InvalidExpressionFileException : public exception{
- virtual const char* what() const throw() {
- return "Invalid expression file";
- }
- };
- class RegularExpression{
- static const int BEGIN_STATE = 1;
- static const int NULL_STATE = 0;
- vector<bool> terminalState;
- map <int, map<char, int> > nextState;
- vector <vector <int> > epsEdges;
- int stateNumber;
- int SecondAutIndex(int ind) {
- return ind + stateNumber - 1;
- }
- void DfsEpsEdges(int st, set <int> &states) {
- states.insert(st);
- for (int i = 0, l = epsEdges[st].size(); i < l; ++i) {
- int w = epsEdges[st][i];
- if (states.find(w) == states.end())
- DfsEpsEdges(w, states);
- }
- }
- void CopyAutStructure(const RegularExpression &that) {
- for (map <int, map<char, int> >::const_iterator it = that.nextState.begin(), end = that.nextState.end(); it != end; it++)
- for (map <char, int>::const_iterator it1 = (it->second).begin(), end1 = (it->second).end(); it1 != end1; it1++) {
- int from = it->first;
- int to = it1->second;
- char symb = it1->first;
- nextState[SecondAutIndex(from)][symb] = SecondAutIndex(to);
- }
- epsEdges.resize(SecondAutIndex(that.stateNumber));
- for (int i = 0, len = that.epsEdges.size(); i < len; ++i)
- for (int j = 0, len2 = that.epsEdges[i].size(); j < len2; ++j)
- epsEdges[SecondAutIndex(i)].push_back(SecondAutIndex(that.epsEdges[i][j]));
- terminalState.resize(SecondAutIndex(that.stateNumber));
- for (int i = 1; i < that.stateNumber; ++i)
- terminalState[SecondAutIndex(i)] = that.terminalState[i];
- }
- bool TerminalMultistateQ(const set<int> &states){
- for (set <int>::iterator it = states.begin(), end = states.end(); it != end; it++)
- if (terminalState[*it])
- return true;
- return false;
- }
- bool ReserveCharacter(char c){
- return c == '(' || c == ')'|| c == '+' || c == '*';
- }
- RegularExpression Parse(int l, int r, string &s) {
- if (r - l == 1) {
- if (!ReserveCharacter(s[l])) {
- return RegularExpression(s[l]);
- }
- else {
- throw InvalidUsageException();
- }
- }
- if (!(s[l] == '(' && s[r - 1] == ')')){
- throw InvalidUsageException();
- }
- l++; r--;
- if (s[l] == '+')
- return +(Parse(l + 1, r, s));
- int opSymbPos = -1;
- int balance = 0;
- for (int i = l; i < r; ++i) {
- if (balance < 0)
- throw InvalidUsageException();
- if (balance == 0 && (s[i] == '+' || s[i] == '*')) {
- opSymbPos = i;
- break;
- }
- if (s[i] == '(') {
- balance++;
- }
- if (s[i] == ')') {
- balance--;
- }
- }
- if (opSymbPos == -1 || opSymbPos >= r) {
- throw InvalidUsageException();
- }
- if (s[opSymbPos] == '+')
- return Parse(l, opSymbPos, s) + Parse(opSymbPos + 1, r, s);
- if (s[opSymbPos] == '*')
- return Parse(l, opSymbPos, s) * Parse(opSymbPos + 1, r, s);
- throw InvalidUsageException();
- }
- void Unify(const RegularExpression &that){
- CopyAutStructure(that);
- epsEdges[BEGIN_STATE].push_back(SecondAutIndex(BEGIN_STATE));
- stateNumber += that.stateNumber - 1;
- }
- void Concat(const RegularExpression &that) {
- CopyAutStructure(that);
- for (int st = 0; st < stateNumber; ++st)
- if (terminalState[st])
- epsEdges[st].push_back(SecondAutIndex(BEGIN_STATE));
- for (int i = 0; i < stateNumber; ++i)
- terminalState[i] = 0;
- stateNumber += that.stateNumber - 1;
- }
- void Star(){
- for (int st = 0; st < stateNumber; ++st)
- if (terminalState[st])
- epsEdges[st].push_back(BEGIN_STATE);
- }
- public:
- RegularExpression(){
- stateNumber = 2;
- terminalState.resize(2);
- epsEdges.resize(2);
- }
- RegularExpression(char symb){
- stateNumber = 3;
- terminalState.resize(3);
- epsEdges.resize(3);
- terminalState[2] = true;
- nextState[BEGIN_STATE][symb] = 2;
- }
- RegularExpression(string str){
- RegularExpression temp;
- try {
- temp = Parse(0, str.size(), str);
- *this = temp;
- }
- catch (const InvalidUsageException &e){
- *this = RegularExpression();
- throw e;
- }
- }
- vector < pair<int, string> > FindOccurences(string text){
- vector <bool> isRegularOccurence(text.size());
- vector <pair <int, string> > result;
- for (int i = 0, l = text.length(); i < l; ++i){
- set <int> states;
- DfsEpsEdges(BEGIN_STATE, states);
- int maxCorrectJ = -1;
- for (int j = i; j < l; ++j) {
- set <int> newStates;
- char c = text[j];
- for (set <int>::iterator it = states.begin(), end = states.end(); it != end; it++){
- int w = *it;
- if (nextState[w][c])
- DfsEpsEdges(nextState[w][c], newStates);
- }
- states = newStates;
- if (TerminalMultistateQ(states))
- maxCorrectJ = j;
- newStates.clear();
- }
- bool isFine = false;
- for (int j = i; j <= maxCorrectJ; ++j) {
- if (!isRegularOccurence[j])
- isFine = true;
- }
- if (isFine) {
- result.push_back(make_pair(i, text.substr(i, maxCorrectJ - i + 1)));
- for (int j = i; j <= maxCorrectJ; ++j)
- isRegularOccurence[j] = true;
- }
- states.clear();
- }
- return result;
- }
- bool Match(string str) {
- vector <pair<int, string> > res = FindOccurences(str);
- for (int i = 0, l = res.size(); i < l; ++i)
- if (res[i].second == str)
- return true;
- return false;
- }
- RegularExpression operator*(const RegularExpression &that) {
- RegularExpression res = *this;
- res.Concat(that);
- return res;
- }
- RegularExpression operator+(const RegularExpression &that) {
- RegularExpression res = *this;
- res.Unify(that);
- return res;
- }
- RegularExpression operator+() {
- RegularExpression res = *this;
- res.Star();
- return res;
- }
- };
- class Menu{
- static const int MAX_NUM = 10000;
- typedef vector < vector <pair<int, string> > > AnswerData;
- vector <RegularExpression> allRegExp;
- vector <string> files;
- int AddNewDocument(string path) {
- FILE * pFile;
- pFile = fopen(path.c_str(), "r");
- char fileText[MAX_NUM];
- if (pFile == NULL){
- throw InvalidExpressionFileException();
- }
- else{
- string res = "";
- while (fgets(fileText, MAX_NUM, pFile)) {
- res += string(fileText, fileText + strlen(fileText));
- }
- files.push_back(res);
- return files.size();
- }
- }
- AnswerData FindMaxAppearances(RegularExpression ®) {
- AnswerData occurences(files.size());
- AnswerData answer(files.size());
- for (int i = 0, l = files.size(); i < l; ++i)
- occurences[i] = reg.FindOccurences(files[i]);
- int maxVal = 0;
- for (int i = 0, l = occurences.size(); i < l; ++i) {
- if (maxVal < occurences[i].size())
- maxVal = occurences[i].size();
- }
- for (int i = 0, l = answer.size(); i < l; ++i) {
- if (occurences[i].size() == maxVal)
- answer[i] = occurences[i];
- }
- return answer;
- }
- AnswerData FindLongestAppearances(RegularExpression ®) {
- AnswerData occurences(files.size());
- AnswerData longestWords(files.size());
- int maxVal = 0;
- for (int i = 0, l = files.size(); i < l; ++i)
- occurences[i] = reg.FindOccurences(files[i]);
- for (int i = 0, l = occurences.size(); i < l; ++i) {
- for (int j = 0, l2 = occurences[i].size(); j < l2; ++j){
- int p = occurences[i][j].second.size();
- if (p > maxVal)
- maxVal = p;
- }
- }
- for (int i = 0, l = occurences.size(); i < l; ++i) {
- for (int j = 0, l2 = occurences[i].size(); j < l2; ++j) {
- if (occurences[i][j].second.size() == maxVal) {
- longestWords[i].push_back(occurences[i][j]);
- }
- }
- }
- return longestWords;
- }
- AnswerData FindAllAppearances(RegularExpression ®) {
- vector < vector <pair<int, string> > > allAppearances(files.size());
- for (int i = 0, l = files.size(); i < l; ++i)
- allAppearances[i] = reg.FindOccurences(files[i]);
- return allAppearances;
- }
- AnswerData FindSuperRelevant() {
- AnswerData answer(files.size());
- for (int i = 0, l = files.size(); i < l; ++i) {
- for (int j = 0, l2 = allRegExp.size(); j < l2; ++j) {
- vector < pair<int, string> > occurences = allRegExp[j].FindOccurences(files[i]);
- if (occurences.empty()) {
- answer[i].clear();
- break;
- }
- answer[i].push_back(occurences[0]);
- }
- }
- return answer;
- }
- void PrintAnswer(AnswerData data) {
- bool noRelDoc = true;
- for (int i = 0, l = data.size(); i < l; ++i) {
- if (data[i].empty()) continue;
- cout << "Document " << i + 1 << ":" << endl;
- for (int j = 0, l2 = data[i].size(); j < l2; ++j) {
- cout << data[i][j].first << ": " << data[i][j].second << endl;
- noRelDoc = false;
- }
- }
- if (noRelDoc)
- cout << "There are no relevant documents" << endl;
- }
- void PrintMenuDesc(){
- cout << "1) Add new document to the list of documents" << endl;
- cout << "2) Find max appearances" << endl;
- cout << "3) Find longest appearance" << endl;
- cout << "4) Find all documents" << endl;
- cout << "5) Find super - relevant documents" << endl;
- cout << "6) Exit" << endl;
- }
- public:
- void drive() {
- PrintMenuDesc();
- char c;
- while (cin >> c) {
- try{
- if (c == '1') {
- cout << "Please insert path for document" << endl;
- string path;
- cin >> path;
- int num = AddNewDocument(path);
- cout << "Document number ";
- cout << num;
- cout << " added" << endl;
- }
- if (c == '2') {
- cout << "Please insert regular expression" << endl;
- string regularExpString;
- cin >> regularExpString;
- allRegExp.push_back(RegularExpression(regularExpString));
- PrintAnswer(FindMaxAppearances(RegularExpression(regularExpString)));
- }
- if (c == '3') {
- cout << "Please insert regular expression" << endl;
- string regularExpString;
- cin >> regularExpString;
- allRegExp.push_back(RegularExpression(regularExpString));
- PrintAnswer(FindLongestAppearances(RegularExpression(regularExpString)));
- }
- if (c == '4') {
- cout << "Please insert regular expression" << endl;
- string regularExpString;
- cin >> regularExpString;
- allRegExp.push_back(RegularExpression(regularExpString));
- PrintAnswer(FindAllAppearances(RegularExpression(regularExpString)));
- }
- if (c == '5') {
- PrintAnswer(FindSuperRelevant());
- }
- if (c == '6') {
- cout << "Goodbye" << endl;
- return;
- }
- PrintMenuDesc();
- }
- catch (const exception &e) {
- cout << e.what() << endl;
- PrintMenuDesc();
- }
- }
- PrintMenuDesc();
- }
- Menu(){}
- };
- int main(int argc, char *argv[]) {
- Menu menu;
- menu.drive();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement