Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <stdlib.h>
- #include <string.h>
- #include "invertedindex.h"
- int is_letter(char c) // Nu merge strlen pe unsigned char, aparent
- {
- if(c < 'A' || (c > 'Z' && c < 'a') || c > 'z')
- return 0;
- return 1;
- }
- Map_t *Alocare_Mapare()
- {
- Map_t *map;
- map=(Map_t*)malloc(sizeof(Map_t));
- map->size = 1;
- map->buckets=(Node_t**)malloc(map->size * sizeof(Node_t));
- //map->buckets=(Node_t*)malloc(sizeof(Node_t));
- //list=(ANode_t)malloc(rest*sizeof(Node_t));
- return map;
- }
- void Realocare_Mapare(Map_t *map, int l)
- {
- map->size = l+1;
- map->buckets=(Node_t**)realloc(map->buckets, map->size * sizeof(Node_t));
- if(!map->buckets)
- printf("Nasol"), exit(5);
- }
- int IsWordInTable( Map_t map, int hs, char *s )
- {
- Node_t *nod;
- int cnt=0;
- for(nod=map.buckets[hs]; nod; nod=nod->next, ++cnt)
- {
- if( strcmp(nod->data.word, s) == 0)
- return cnt;
- }
- return -1;
- }
- void AddWord( Map_t *map, int hs, char *s, int docID )
- {
- Node_t *node=(Node_t*)malloc(sizeof(Node_t));
- node->data.word=strdup(s);
- node->data.documents.v = (int*)malloc(node->data.documents.cap * sizeof(int));
- node->data.documents.v[0]=docID;
- node->data.documents.n=1;
- node->next = map->buckets[hs];
- map->buckets[hs] = node;
- }
- void AddFile( Map_t *map, int hs, char *s, int docID, int loc_cuvant)
- {
- Node_t *node= map->buckets[hs];
- while(loc_cuvant--)
- node=node->next;
- if( node->data.documents.v[node->data.documents.n-1] != docID )
- node->data.documents.v[node->data.documents.n++] = docID;
- }
- void put_doc(Map_t *map, char *key, int docID)
- {
- int hs=hash((unsigned char*)key) % strlen(key);
- if(hs > map->size)
- Realocare_Mapare(map, hs);
- int z=IsWordInTable(*map, hs, key);
- if(z==-1) //Nu exista cuvant
- AddWord(map, hs, key, docID);
- else
- AddFile(map, hs, key, docID, z);
- }
- Array_t get_docs(Map_t *map, char *key)
- {
- int hs=hash((unsigned char*)key) % strlen(key);
- Node_t *nod = map->buckets[hs];
- while(nod)
- {
- if(strcmp(key, nod->data.word) == 0)
- return nod->data.documents;
- nod=nod->next;
- }
- Array_t nul;
- return nul;
- }
- Array_t reunion(const Array_t files1, const Array_t files2)
- {
- Array_t *array;
- array= (Array_t*)malloc(sizeof(Array_t));
- array->cap = 500;
- array->n = 0;
- array->v = (int*)malloc(array->cap * sizeof(int));
- int i=0, j=0, k=0;
- while (i<files1.n && j<files2.n)
- {
- while(files1.v[i] < files2.v[j])
- {
- if(i==files1.n)
- break;
- array->v[k++] = files1.v[i++];
- }
- while(files2.v[j] < files1.v[i])
- {
- if(j==files2.n)
- break;
- array->v[k++] = files2.v[j++];
- }
- if(files2.v[j] == files1.v[i])
- {
- int nr=files2.v[j];
- array->v[k++]=files2.v[j];
- while(files1.v[i] == nr)
- {
- if(i==files1.n)
- break;
- i++;
- }
- while(files2.v[j] == nr)
- {
- if(j==files2.n)
- break;
- j++;
- }
- }
- }
- while(i < files1.n)
- array->v[k++] = files1.v[i++];
- while(j < files2.n)
- array->v[k++] = files2.v[j++];
- array->n=k;
- return *array;
- }
- Array_t intersection(const Array_t files1, const Array_t files2)
- {
- Array_t *array;
- array= (Array_t*)malloc(sizeof(Array_t));
- array->cap = 500;
- array->n = 0;
- array->v = (int*)malloc(array->cap * sizeof(int));
- int i=0, j=0, k=0;
- while (i<files1.n && j<files2.n)
- {
- while(files1.v[i] < files2.v[j])
- {
- i++;
- if(i==files1.n)
- break;
- }
- while(files2.v[j] < files1.v[i])
- {
- j++;
- if(j==files2.n)
- break;
- }
- if(files2.v[j] == files1.v[i])
- {
- int nr=files2.v[j];
- array->v[k++]=files2.v[j];
- while(files1.v[i] == nr)
- {
- if(i==files1.n)
- break;
- i++;
- }
- while(files2.v[j] == nr)
- {
- if(j==files2.n)
- break;
- j++;
- }
- }
- }
- array->n=k;
- return *array;
- }
- void solve()
- {
- FILE *f[500], *in, *out;
- in=fopen("input.in", "rt");
- out=fopen("output.out", "wt");
- int nrFisiere, nrTokeni, i;
- Map_t *map=Alocare_Mapare();
- //Deschidere n fisiere
- fscanf(in, "%i", &nrFisiere);
- int cnt=0;
- for(i=0;i<nrFisiere;i++)
- {
- char nume[250];
- fscanf(in, "%s", nume);
- f[i]=fopen(nume, "rt");
- if(!f[i])
- printf("eroare la deschiderea fisierului %i\n", i+1);
- while(!feof(f[i])) // Ciudat, ultimul cuvant il citeste de 2 ori (unharmful)
- {
- char s[500];
- fscanf(f[i], "%s", s);
- int l=strlen(s);
- //Cazuri Exceptie
- if(s[l-3] == '.' && s[l-2] == '.' && s[l-1] == '.')
- s[l-3] = 0;
- if(!is_letter(s[0])) // (Ceva
- strcpy(s, s+1);
- if(!is_letter(s[0]))
- strcpy(s, s+1);
- if(!is_letter(s[l-2]) && l-3>0) //Asd).
- s[l-2]=0;
- else if(!is_letter(s[l-1]) && l-2>0) //Asd. sau Asd) sau asd! etc.
- s[l-1]=0;
- if( strcmp("subject", s) == 0)
- printf("%i\n", hash((unsigned char*)s)%strlen(s));
- if(s[0] == ' ' || strlen(s) == 0)
- continue;
- put_doc(map, s, i);
- }
- fclose(f[i]);
- }
- //Citire m tokeni
- fscanf(in, "%i", &nrTokeni);
- char token[500], ctoken[500];
- fgets(token, 500, in);
- for(i=0;i<nrTokeni;i++)
- {
- char *p;
- fgets(token, 500, in);
- Array_t files1, files2;
- if(token[strlen(token)-2] < 'A')
- token[strlen(token)-2]=0;
- else if(token[strlen(token)-1] < 'A')
- token[strlen(token)-1]=0;
- strcpy(ctoken, token);
- p=strtok(token, "!?; ");
- while(p!=NULL)
- {
- if(p[strlen(p)-2] < 'A') //Random char?
- p[strlen(p)-2]=0;
- else if(p[strlen(p)-1] < 'A' )
- p[strlen(p)-1]=0;
- if(strcmp(p, "&") == 0)
- {
- p=strtok(NULL, " !?;");
- if(p[strlen(p)-2] < 'A') //Random char?
- p[strlen(p)-2]=0;
- else if(p[strlen(p)-1] < 'A' )
- p[strlen(p)-1]=0;
- files2 = get_docs(map, p);
- files1 = intersection(files1, files2);
- }
- else if(strcmp(p, "|") == 0)
- {
- p=strtok(NULL, " !?;");
- if(p[strlen(p)-2] < 'A') //Random char?
- p[strlen(p)-2]=0;
- else if(p[strlen(p)-1] < 'A' )
- p[strlen(p)-1]=0;
- files2 = get_docs(map, p);
- files1 = reunion(files1, files2);
- }
- else
- files1 = get_docs(map, p);
- p=strtok(NULL, " !?;");
- }
- fprintf(out, "%s:", ctoken);
- int j;
- for(j=0;j<files1.n;j++)
- fprintf(out, " %i", files1.v[j]);
- fprintf(out, "\n");
- }
- }
- int main (int argc, char *argv[])
- {
- solve();
- return 0;
- }
- /*
- int ind=0;
- for(ind=0; ind<=map->size; ind++)
- {
- fprintf(out, "%i\n", ind);
- Node_t *nodut=map->buckets[ind];
- while(nodut)
- {
- fprintf(out, "%s ", nodut->data.word);
- int j;
- for(j=0;j<nodut->data.documents.n;j++)
- fprintf(out," %i", nodut->data.documents.v[j]);
- nodut=nodut->next;
- fprintf(out, "\n");
- }
- }
- */
Advertisement
Add Comment
Please, Sign In to add comment