Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- public static long doFreq(int n, long df, long tf){
- long returnValue = (long) (1+Math.log(tf)*Math.log(n/df));
- return returnValue;
- }
- public static void midTerms2 (Document doc, IndexReader reader, String field, int min, int max){
- ArrayList <DocInfo> docsInfo = new ArrayList<>();
- int i = 0;
- long freq = 0;
- long tf;
- long df;
- String [] content = doc.getValues(field);
- for (String term : content){
- Term termToSearch = new Term(field, term);
- try {
- tf = TermFreqInDoc(doc,field, term);
- df = reader.totalTermFreq(termToSearch);
- freq = doFreq(reader.maxDoc(),tf,df);
- } catch (IOException e) {
- e.printStackTrace();
- }
- docsInfo.add(new DocInfo(term,i,freq));
- content = remove(content,term);
- }
- Arrays.sort((DocInfo [])docsInfo.toArray(), new Comparator<DocInfo>() {
- @Override
- public int compare(DocInfo o1, DocInfo o2) {
- return Long.compare(o1.getTermFreq(), o2.getTermFreq());
- }
- });
- for (i = min; i<docsInfo.size() && i < max; i++){
- System.out.println("Termino "+docsInfo.get(i).getTerm()+" en el documento "+docsInfo.get(i).getDocId()+" con frecuencia "+docsInfo.get(i).getTermFreq());
- }
- }
- public static void midTerms (Document[] docs, IndexReader reader, String field, int min, int max){
- ArrayList <DocInfo> docsInfo = new ArrayList<>();
- int i = 0;
- long freq = 0;
- long tf;
- long df;
- for (Document doc : docs ){
- String [] content = doc.getValues(field);
- for (String term : content){
- Term termToSearch = new Term(field, term);
- try {
- tf = TermFreqInDoc(doc,field, term);
- df = reader.totalTermFreq(termToSearch);
- freq = doFreq(reader.maxDoc(),tf,df);
- } catch (IOException e) {
- e.printStackTrace();
- }
- docsInfo.add(new DocInfo(term,i,freq));
- content = remove(content,term);
- }
- }
- Arrays.sort((DocInfo [])docsInfo.toArray(), new Comparator<DocInfo>() {
- @Override
- public int compare(DocInfo o1, DocInfo o2) {
- return Long.compare(o1.getTermFreq(), o2.getTermFreq());
- }
- });
- for (i = min; i<docsInfo.size() && i < max; i++){
- System.out.println("Termino "+docsInfo.get(i).getTerm()+" en el documento "+docsInfo.get(i).getDocId()+" con frecuencia "+docsInfo.get(i).getTermFreq());
- }
- }
- public static void bottomTerms2 (Document doc, IndexReader reader,String field, int number){
- ArrayList <DocInfo> docsInfo = new ArrayList<>();
- int i = 0;
- long freq = 0;
- long tf;
- long df;
- String [] content = doc.getValues(field);
- for (String term : content){
- Term termToSearch = new Term(field, term);
- try {
- tf = TermFreqInDoc(doc,field, term);
- df = reader.totalTermFreq(termToSearch);
- freq = doFreq(reader.maxDoc(),tf,df);
- } catch (IOException e) {
- e.printStackTrace();
- }
- docsInfo.add(new DocInfo(term,i,freq));
- content = remove(content,term);
- }
- Arrays.sort((DocInfo [])docsInfo.toArray(), new Comparator<DocInfo>() {
- @Override
- public int compare(DocInfo o1, DocInfo o2) {
- return Long.compare(o1.getTermFreq(), o2.getTermFreq());
- }
- });
- for (i = 0; i<docsInfo.size() && i < number; i++){
- System.out.println("Termino "+docsInfo.get(i).getTerm()+" en el documento "+docsInfo.get(i).getDocId()+" con frecuencia "+docsInfo.get(i).getTermFreq());
- }
- }
- public static void bottomTerms (Document[] docs, IndexReader reader,String field, int number){
- ArrayList <DocInfo> docsInfo = new ArrayList<>();
- int i = 0;
- long freq = 0;
- long tf;
- long df;
- for (Document doc : docs ){
- String [] content = doc.getValues(field);
- for (String term : content){
- Term termToSearch = new Term(field, term);
- try {
- tf = TermFreqInDoc(doc,field, term);
- df = reader.totalTermFreq(termToSearch);
- freq = doFreq(reader.maxDoc(),tf,df);
- } catch (IOException e) {
- e.printStackTrace();
- }
- docsInfo.add(new DocInfo(term,i,freq));
- content = remove(content,term);
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment