Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <fstream>
- #include <string>
- #include <queue>
- #include <vector>
- #include <cstdlib>
- #include <ctime>
- #include <cstdarg>
- using namespace std;
- #define UNAVAILABLE 0
- #define AVAILABLE 1
- #define CHOSEN 2
- #define PERM_UNAVAILABLE 3
- vector<string> output;
- vector<string> errors;
- class MotifNode {
- public:
- MotifNode (MotifNode*, string, string);
- ~MotifNode ();
- bool insertNode (queue<string>, string);
- bool deleteNode (queue<string>);
- void printNode (int);
- bool selectRandom (int);
- void resetAvailable ();
- void printChosen ();
- void setRandomWeight (queue<string>, int);
- void setStoryWeight (queue<string>, int);
- void makeExclusive (vector< queue<string> >);
- private:
- vector<MotifNode*> getAvailable (vector<MotifNode*>);
- string getFullClassification ();
- bool choose ();
- vector<MotifNode*> classificationToPointer
- (queue<string>, vector<MotifNode*>);
- MotifNode *parent;
- vector<MotifNode*> children;
- string classification;
- string description;
- int availability;
- int randomWeight;
- int storyWeight;
- vector<MotifNode*> exclusiveWith;
- };
- MotifNode::MotifNode (MotifNode *p, string cls, string desc) {
- parent = p;
- classification = cls;
- description = desc;
- availability = UNAVAILABLE;
- randomWeight = 100;
- storyWeight = 100;
- }
- MotifNode::~MotifNode () {
- // cout << "Destructor: deleting children (" << description << ")" << endl;
- for (int x = 0; x < children.size (); x++)
- delete children[x];
- }
- bool MotifNode::insertNode (queue<string> cls, string desc) {
- if (cls.empty ())
- return false;
- int next = -1;
- for (int x = 0; x < children.size (); x++)
- if (children[x]->classification == cls.front ()) {
- next = x;
- break;
- }
- if (cls.size () == 1) {
- if (next >= 0) {
- errors.push_back ("Replacing node with new description (" + desc + ")");
- children[next]->description = desc;
- return true;
- }
- // cout << "Adding new node (" << desc << ")" << endl;
- MotifNode *newNode = new MotifNode (this, cls.front (), desc);
- children.push_back (newNode);
- return true;
- }
- if (next < 0) {
- errors.push_back ("Adding unknown intermediate node");
- next = children.size ();
- MotifNode *newNode = new MotifNode (this, cls.front (), "(Unknown)");
- children.push_back (newNode);
- }
- cls.pop ();
- return children[next]->insertNode (cls, desc);
- }
- bool MotifNode::deleteNode (queue<string> cls) {
- if (cls.empty ())
- return false;
- int victim = -1;
- for (int x = 0; x < children.size (); x++)
- if (children[x]->classification == cls.front ()) {
- victim = x;
- break;
- }
- if (victim < 0) {
- errors.push_back ("Error: Motif node does not exist");
- return false;
- }
- if (cls.size () == 1) {
- errors.push_back ("Deleting node (" + children[victim]->description + ")");
- delete children[victim];
- return true;
- }
- cls.pop ();
- return children[victim]->deleteNode (cls);
- }
- void MotifNode::printNode (int indent) {
- // for (int x = 0; x < indent; x++)
- // cout << " ";
- if (parent != NULL)
- output.push_back (getFullClassification () + ": " + description);
- for (int x = 0; x < children.size (); x++)
- children[x]->printNode (indent + 2);
- }
- bool MotifNode::selectRandom (int num) {
- // cout << "Selecting " << num << " random motifs.\n";
- int x = num * 100;
- // cout << "total story: " << x << endl;
- while (x > 0) {
- vector<MotifNode*> choices = getAvailable (vector<MotifNode*>());
- if (choices.empty ()) break;
- int totalRandom = 0;
- for (int r = 0; r < choices.size (); r++)
- totalRandom += choices[r]->randomWeight;
- int roll = rand () % totalRandom;
- int index = 0;
- int threshold = 0;
- for (int i = 0; i < choices.size (); i++) {
- threshold += choices[i]->randomWeight;
- if (threshold > roll) {
- index = i;
- break;
- }
- }
- // cout << "chose " << choices[index]->storyWeight << " worth of story"
- // << endl;
- choices[index]->choose ();
- x -= choices[index]->storyWeight;
- // cout << x << " left to choose" << endl;
- }
- return true;
- }
- void MotifNode::resetAvailable () {
- if (parent == NULL)
- availability = CHOSEN;
- else if (parent->availability == CHOSEN)
- availability = AVAILABLE;
- else availability = UNAVAILABLE;
- for (int x = 0; x < children.size (); x++)
- children[x]->resetAvailable ();
- }
- void MotifNode::printChosen () {
- if (availability != CHOSEN)
- return;
- if (parent != NULL)
- output.push_back (description);
- for (int x = 0; x < children.size (); x++)
- children[x]->printChosen ();
- }
- void MotifNode::setRandomWeight (queue<string> cls, int weight) {
- if (cls.empty ())
- randomWeight = weight;
- else if (cls.front () == "*") {
- cls.pop ();
- for (int x = 0; x < children.size (); x++)
- children[x]->setRandomWeight (cls, weight);
- }
- else for (int x = 0; x < children.size (); x++)
- if (children[x]->classification == cls.front ()) {
- cls.pop ();
- children[x]->setRandomWeight (cls, weight);
- break;
- }
- }
- void MotifNode::setStoryWeight (queue<string> cls, int weight) {
- if (cls.empty ())
- storyWeight = weight;
- else if (cls.front () == "*") {
- cls.pop ();
- for (int x = 0; x < children.size (); x++)
- children[x]->setStoryWeight (cls, weight);
- }
- else for (int x = 0; x < children.size (); x++)
- if (children[x]->classification == cls.front ()) {
- cls.pop ();
- children[x]->setStoryWeight (cls, weight);
- break;
- }
- }
- void MotifNode::makeExclusive (vector< queue<string> > clslist) {
- vector<MotifNode*> nodelist;
- for (int x = 0; x < clslist.size (); x++) {
- vector<MotifNode*> tempnodelist =
- classificationToPointer (clslist[x], vector<MotifNode*>());
- for (int t = 0; t < tempnodelist.size (); t++)
- nodelist.push_back (tempnodelist[t]);
- }
- for (int x = 0; x < nodelist.size (); x++)
- for (int n = 0; n < nodelist.size (); n++) {
- if (nodelist[n] == NULL || x == n)
- continue;
- nodelist[x]->exclusiveWith.push_back (nodelist[n]);
- }
- }
- vector<MotifNode*> MotifNode::getAvailable (vector<MotifNode*> avail) {
- if (availability == AVAILABLE)
- avail.push_back (this);
- if (availability != UNAVAILABLE && availability != PERM_UNAVAILABLE)
- for (int x = 0; x < children.size (); x++)
- avail = children[x]->getAvailable (avail);
- return avail;
- }
- string MotifNode::getFullClassification () {
- if (parent == NULL)
- return "";
- string cls = parent->getFullClassification ();
- if (cls != "") cls += ".";
- return cls + classification;
- }
- bool MotifNode::choose () {
- // cout << "Choosing " << description << endl;
- availability = CHOSEN;
- for (int x = 0; x < children.size (); x++)
- if (children[x]->availability != PERM_UNAVAILABLE)
- children[x]->availability = AVAILABLE;
- for (int x = 0; x < exclusiveWith.size (); x++)
- exclusiveWith[x]->availability = PERM_UNAVAILABLE;
- return true;
- }
- vector<MotifNode*> MotifNode::classificationToPointer
- (queue<string> cls, vector<MotifNode*> nodelist) {
- if (cls.empty ())
- nodelist.push_back (this);
- else if (cls.front () == "*") {
- cls.pop ();
- for (int x = 0; x < children.size (); x++)
- nodelist = children[x]->classificationToPointer (cls, nodelist);
- }
- else for (int x = 0; x < children.size (); x++)
- if (children[x]->classification == cls.front ()) {
- cls.pop ();
- nodelist = children[x]->classificationToPointer (cls, nodelist);
- }
- return nodelist;
- }
- void readOptionsFile (string, MotifNode*);
- void readMotifFile (string, MotifNode*);
- void chooseRandomMotifs (MotifNode*);
- void finish ();
- vector<string> tokenize (istream&, bool, int, ...);
- queue<string> tokenize (string, char);
- int count = 10;
- string outFile = "out.txt";
- string errorFile = "error.txt";
- int main (int argc, char *argv[]) {
- srand (time (0));
- MotifNode *root = new MotifNode (NULL, "", "(Root)");
- readOptionsFile ("options.txt", root);
- output.push_back ("Printing a random collection of motifs:");
- chooseRandomMotifs (root);
- finish ();
- return 0;
- }
- void readOptionsFile (string filename, MotifNode *root) {
- if (root == NULL) return;
- ifstream optFile (filename.c_str ());
- if (optFile.fail ()) {
- errors.push_back ("Error: could not read option file: " + filename);
- return;
- }
- errors.push_back ("Reading option file: " + filename);
- vector<string> subFiles;
- // parse filenames and count
- vector<string> tokens = tokenize (optFile, true, 2, ' ');
- for (; !tokens.empty (); tokens = tokenize (optFile, true, 2, ' ')) {
- if (tokens[0] == "motif-file")
- for (int x = 1; x < tokens.size (); x++)
- readMotifFile (tokens[x], root);
- else if (tokens[0] == "options-file")
- for (int x = 1; x < tokens.size (); x++)
- subFiles.push_back (tokens[x]);
- else if (tokens[0] == "out-file") {
- if (tokens.size () > 1)
- outFile = tokens[1];
- }
- else if (tokens[0] == "error-file") {
- if (tokens.size () > 1)
- errorFile = tokens[1];
- }
- else if (tokens[0] == "count") {
- if (tokens.size () > 1)
- count = atoi (tokens[1].c_str ());
- }
- }
- optFile.close ();
- for (int x = 0; x < subFiles.size (); x++)
- readOptionsFile (subFiles[x], root);
- // read weights
- optFile.open (filename.c_str ());
- if (optFile.fail ()) {
- errors.push_back ("Error: could not reopen option file: " + filename);
- return;
- }
- tokens = tokenize (optFile, true, 2, ' ');
- for (; !tokens.empty (); tokens = tokenize (optFile, true, 2, ' ')) {
- if (tokens[0] == "weight") {
- if (tokens.size () != 3) {
- errors.push_back
- ("Error: \"weight\" takes exactly two arguments (a classification and a weight)");
- continue;
- }
- queue<string> cls = tokenize (tokens[1], '.');
- int weight = atoi (tokens[2].c_str ());
- root->setRandomWeight (cls, weight);
- }
- else if (tokens[0] == "value") {
- if (tokens.size () != 3) {
- errors.push_back
- ("Error: \"value\" takes exactly two arguments (a classification and a value)");
- continue;
- }
- queue<string> cls = tokenize (tokens[1], '.');
- int weight = atoi (tokens[2].c_str ());
- root->setStoryWeight (cls, weight);
- }
- else if (tokens[0] == "exclusive") {
- vector< queue<string> > clslist;
- for (int c = 1; c < tokens.size (); c++)
- clslist.push_back (tokenize (tokens[c], '.'));
- root->makeExclusive (clslist);
- }
- }
- }
- void readMotifFile (string filename, MotifNode *root) {
- if (root == NULL) return;
- ifstream motifFile (filename.c_str ());
- if (motifFile.fail ()) {
- errors.push_back ("Error: could not open motif file");
- return;
- }
- errors.push_back ("Reading motif file: " + filename);
- vector<string> tokens = tokenize (motifFile, false, 2, ' ');
- for (; !tokens.empty (); tokens = tokenize (motifFile, false, 2, ' ')) {
- if (tokens.size () < 2)
- errors.push_back ("Error: did not find description for " + tokens[0]);
- queue<string> cls = tokenize (tokens[0], '.');
- string description = tokens.size () >= 2 ? tokens[1] : "";
- if (cls.size ())
- root->insertNode (cls, description);
- else errors.push_back ("Error: invalid classification: " + tokens[0]);
- }
- motifFile.close ();
- }
- void chooseRandomMotifs (MotifNode *root) {
- root->resetAvailable ();
- root->selectRandom (count);
- root->printChosen ();
- }
- void finish () {
- ofstream out (outFile.c_str (), ios_base::app);
- if (!out.fail ())
- for (int x = 0; x < output.size (); x++)
- out << output[x] << endl;
- out.close ();
- ofstream error (errorFile.c_str (), ios_base::out | ios_base::trunc);
- if (!error.fail ())
- for (int x = 0; x < errors.size (); x++)
- error << errors[x] << endl;
- error.close ();
- }
- vector<string> tokenize (istream &in, bool cont, int num, ...) {
- vector<string> tokens;
- if (!in.good () || num < 1) return tokens;
- string line;
- bool foundLine = false;
- // read lines until we find something tokenizable
- while (in.good ()) {
- line = "";
- while (line == "" && in.good ()) // skip blank lines
- getline (in, line, '\n');
- if (line == "") break; // eof
- // comments are defined as lines where the first
- // non-whitespace character is #
- // we would also like to skip lines containing only whitespace
- int firstChar = 0; // index of first non-whitespace
- while (firstChar < line.size ()) {
- if (line[firstChar] == ' ' || line[firstChar] == '\t')
- firstChar++;
- else break;
- }
- if (firstChar >= line.size ()) // line contains only whitespace
- continue;
- if (line[firstChar] == '#') // comment
- continue;
- foundLine = true;
- break;
- }
- if (!foundLine) // eof
- return tokens;
- // tokenizing to a vector of size 1 is easy
- if (num == 1) {
- tokens.push_back (line);
- return tokens;
- }
- // tokenize!
- va_list delimList;
- va_start (delimList, num);
- char current; // last-used delimiter saved for later
- int lastDelim = -1; // index of last-used delimiter
- for (int x = 0; x < (num-1); x++) {
- // cstdarg doesn't like casting to char, so we cast to int first
- current = va_arg (delimList, int);
- int nextDelim = line.find (current, lastDelim+1);
- string tok = line.substr (lastDelim+1, nextDelim-lastDelim-1);
- tokens.push_back (tok);
- lastDelim = nextDelim;
- // break if we didn't actually find the last delimiter
- if (lastDelim == string::npos) break;
- }
- va_end (delimList);
- // don't continue - we want exactly num tokens (or less)
- if (!cont) {
- // add the final token, unless we've already run out of line
- if (lastDelim != string::npos)
- tokens.push_back (line.substr (lastDelim+1));
- return tokens;
- }
- // continue tokenizing using final delimiter until line is exhausted
- while (lastDelim != string::npos) {
- int nextDelim = line.find (current, lastDelim+1);
- string tok = line.substr (lastDelim+1, nextDelim-lastDelim-1);
- if (tok != "") tokens.push_back (tok);
- lastDelim = nextDelim;
- }
- return tokens;
- }
- queue<string> tokenize (string orig, char delim) {
- queue<string> tokens;
- int nextDelim = orig.find (delim);
- int lastDelim = -1;
- do {
- string tok = orig.substr (lastDelim+1, nextDelim-lastDelim-1);
- if (tok != "") tokens.push (tok);
- lastDelim = nextDelim;
- nextDelim = orig.find (delim, lastDelim+1);
- } while (lastDelim != string::npos);
- return tokens;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement