Advertisement
Guest User

Untitled

a guest
Aug 21st, 2014
190
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.40 KB | None | 0 0
  1. /*
  2. * Terrier - Terabyte Retriever
  3. * Webpage: http://terrier.org
  4. * Contact: terrier{a.}dcs.gla.ac.uk
  5. * University of Glasgow - School of Computing Science
  6. * http://www.gla.ac.uk/
  7. *
  8. * The contents of this file are subject to the Mozilla Public License
  9. * Version 1.1 (the "License"); you may not use this file except in
  10. * compliance with the License. You may obtain a copy of the License at
  11. * http://www.mozilla.org/MPL/
  12. *
  13. * Software distributed under the License is distributed on an "AS IS"
  14. * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
  15. * the License for the specific language governing rights and limitations
  16. * under the License.
  17. *
  18. * The Original Code is BM25.java.
  19. *
  20. * The Original Code is Copyright (C) 2004-2011 the University of Glasgow.
  21. * All Rights Reserved.
  22. *
  23. * Contributor(s):
  24. * Gianni Amati <gba{a.}fub.it> (original author)
  25. * Ben He <ben{a.}dcs.gla.ac.uk>
  26. * Vassilis Plachouras <vassilis{a.}dcs.gla.ac.uk>
  27. */
  28. package org.terrier.matching.models;
  29. /**
  30. * This class implements the Okapi BM25 weighting model. The
  31. * default parameters used are:<br>
  32. * k_1 = 1.2d<br>
  33. * k_3 = 8d<br>
  34. * b = 0.75d<br> The b parameter can be altered by using the setParameter method.
  35. * @author Gianni Amati, Ben He, Vassilis Plachouras
  36. */
  37. public class BM25 extends WeightingModel {
  38. private static final long serialVersionUID = 1L;
  39.  
  40. /** The constant k_1.*/
  41. private double k_1 = 1.2d;
  42.  
  43. /** The constant k_3.*/
  44. private double k_3 = 8d;
  45.  
  46. /** The parameter b.*/
  47. private double b;
  48.  
  49. /** A default constructor.*/
  50. public BM25() {
  51. super();
  52. b=0.75d;
  53. }
  54. /**
  55. * Returns the name of the model.
  56. * @return the name of the model
  57. */
  58. public final String getInfo() {
  59. return "BM25b"+b;
  60. }
  61. /**
  62. * Uses BM25 to compute a weight for a term in a document.
  63. * @param tf The term frequency in the document
  64. * @param docLength the document's length
  65. * @return the score assigned to a document with the given
  66. * tf and docLength, and other preset parameters
  67. */
  68. public double score(double tf, double docLength) {
  69. double K = k_1 * ((1 - b) + b * docLength / averageDocumentLength) + tf;
  70. return (tf * (k_3 + 1d) * keyFrequency / ((k_3 + keyFrequency) * K))
  71. * Idf.log((numberOfDocuments - documentFrequency + 0.5d) / (documentFrequency + 0.5d));
  72. }
  73. /**
  74. * Uses BM25 to compute a weight for a term in a document.
  75. * @param tf The term frequency in the document
  76. * @param docLength the document's length
  77. * @param n_t The document frequency of the term
  78. * @param F_t the term frequency in the collection
  79. * @param keyFrequency the term frequency in the query
  80. * @return the score assigned by the weighting model BM25.
  81. */
  82. public double score(
  83. double tf,
  84. double docLength,
  85. double n_t,
  86. double F_t,
  87. double keyFrequency) {
  88. double K = k_1 * ((1 - b) + b * docLength / averageDocumentLength) + tf;
  89. return Idf.log((numberOfDocuments - n_t + 0.5d) / (n_t+ 0.5d)) *
  90. ((k_1 + 1d) * tf / (K + tf)) *
  91. ((k_3+1)*keyFrequency/(k_3+keyFrequency));
  92. }
  93.  
  94. /**
  95. * Sets the b parameter to BM25 ranking formula
  96. * @param _b the b parameter value to use.
  97. */
  98. public void setParameter(double _b) {
  99. this.b = _b;
  100. }
  101.  
  102.  
  103. /**
  104. * Returns the b parameter to the BM25 ranking formula as set by setParameter()
  105. */
  106. public double getParameter() {
  107. return this.b;
  108. }
  109.  
  110. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement