Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Factor {
- Index: 0
- CppName: "RF_MAX_FI_HOPS"
- Name: "RF_Max_Hops"
- Description: "Number of url hops to bypass (such as less - closer to the snout, the smaller the value (0 - snout, 1 - can't be reached from the snout, 0 < can be reached from the snout < 1). The normal value for the nose root is 0.0039)."
- Authors: "denplusplus"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["Hops"], Slice: "web_production"}]
- }
- Factor {
- Index: 1
- CppName: "RF_MAX_FI_QUERY_DOWNER_YABAR_AVG_TIME"
- Name: "RF_Max_QueryDOwnerYabarAvgTime"
- Description: "User average active continuous time spent by a user (in seconds) on host pages after clicking on a query from a search engine (the factor depends on the (query,domAttr) pair)."
- Authors: "akhropov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["QueryDOwnerYabarAvgTime"], Slice: "web_production"}]
- }
- Factor {
- Index: 2
- CppName: "RF_MEAN_FI_COMM_LINKS_SEO_HOSTS"
- Name: "RF_Mean_CommLinksSEOHosts"
- Description: "Percentage of incoming sales links. An algorithm for recognizing commercial links has been implemented. The factor is remapped to [0,1] if the share of such links is > 50%, otherwise 0. ((http://wiki.yandex-team.ru/SvetlanaShorina/topseolinks a selection of cheated sites))"
- Authors: "kvn"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["CommLinksSEOHosts"], Slice: "web_production"}]
- }
- Factor {
- Index: 3
- CppName: "RF_MEAN_FI_PERCENT_FREQ_WORDS"
- Name: "RF_Mean_PercentFreqWords"
- Description: "Percentage of the number of words that are the 200 most frequent words of the language, out of the total number of words in the text"
- Authors: ""
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["PercentFreqWords"], Slice: "web_production"}]
- }
- Factor {
- Index: 4
- CppName: "RF_MIN_FI_TRIGRAMS_COND_PROB"
- Name: "RF_Min_TrigramsCondProb"
- Description: "Logarithm of the geometric mean of the conditional probabilities of trigrams. The conditional probability of a trigram is its probability divided by the probability of a bigram from the first two words"
- Authors: ""
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- DependsOn: [{Feature: ["TrigramsCondProb"], Slice: "web_production"}]
- }
- Factor {
- Index: 5
- CppName: "RF_MAX_FI_TEXT_WEIGHTED_FORMS"
- Name: "RF_Max_TextWeightedForms"
- Description: "Word weighted sum of number of shapes - sum over all query words of number_of_forms_for_word/64*word_weight; remap of the form x/(1 + x)."
- Authors: ""
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["TextWeightedForms"], Slice: "web_production"}]
- }
- Factor {
- Index: 6
- CppName: "RF_MEAN_FI_ADV_PRONOUNS_PORTION"
- Name: "RF_Mean_AdvPronounsPortion"
- Description: "share of pronominal nouns"
- Authors: ""
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["AdvPronounsPortion"], Slice: "web_production"}]
- }
- Factor {
- Index: 7
- CppName: "RF_MIN_FI_ADV_PRONOUNS_PORTION"
- Name: "RF_Min_AdvPronounsPortion"
- Description: "share of pronominal nouns"
- Authors: ""
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- DependsOn: [{Feature: ["AdvPronounsPortion"], Slice: "web_production"}]
- }
- Factor {
- Index: 8
- CppName: "RF_MAX_FI_FEM_MAS_NOUNS_PORTION"
- Name: "RF_Max_FemAndMasNounsPortion"
- Description: "the proportion of words that can be both masculine and feminine nouns, but not neuter, among all nouns (examples: 'hummingbird' is an example of an indefinite gender that can be defined in two ways, 'Alexandra' is a homonymy )."
- Authors: ""
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["FemAndMasNounsPortion"], Slice: "web_production"}]
- }
- Factor {
- Index: 9
- CppName: "RF_MEAN_FI_LONGEST_TEXT"
- Name: "RF_Mean_LongestText"
- Description: "Size of the largest text segment on the page (from factor [18] PureText)"
- Authors: "denplusplus"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["LongestText"], Slice: "web_production"}]
- }
- Factor {
- index: 10
- CppName: "RF_FI_QCLASS_KAK"
- Name: "RF_QClassKak"
- Description: "question"
- Authors: "agorodilov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_FIRST_DOC]
- DependsOn: [{Feature: ["QClassKak"], Slice: "web_production"}]
- }
- Factor {
- Index: 11
- CppName: "RF_REMOVED_11"
- Name: "RF_Removed_11"
- Description: ""
- Responsibles: ""
- Tags: [TG_REMOVED]
- }
- Factor {
- Index: 12
- CppName: "RF_REMOVED_12"
- Name: "RF_Removed_12"
- Description: ""
- Responsibles: ""
- Tags: [TG_REMOVED]
- }
- Factor {
- index: 13
- CppName: "RF_REMOVED_13"
- Name: "RF_Removed_13"
- Description: ""
- Responsibles: ""
- Tags: [TG_REMOVED]
- }
- Factor {
- Index: 14
- CppName: "RF_MEAN_FI_NUM_SLASHES"
- Name: "RF_Mean_NumSlashes"
- Description: "Number of slashes in url"
- Authors: "denplusplus"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["NumSlashes"], Slice: "web_production"}]
- }
- Factor {
- Index: 15
- CppName: "RF_MAX_FI_TITLE_TRIGRAMS_T"
- Name: "RF_Max_TitleTrigramsTitle"
- Description: "Calculates the heading coverage by letter trigrams of the document heading"
- Authors: "akorsun"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["TitleTrigramsTitle"], Slice: "web_production"}]
- }
- Factor {
- Index: 16
- CppName: "RF_MAX_FI_NUM_LINKS_FROM_SEGMENT_CONTENT"
- Name: "RF_Max_NumLinksFromSegmentContent"
- Description: ""
- Authors: "vvp"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["NumLinksFromSegmentContent"], Slice: "web_production"}]
- }
- Factor {
- Index: 17
- CppName: "RF_MEAN_FI_SEO_IN_PAY_LINKS"
- Name: "RF_Mean_SeoInPayLinks"
- Description: "Number of incoming seo trash links between hosts"
- Authors: "suncpp"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["SeoInPayLinks"], Slice: "web_production"}]
- }
- Factor {
- Index: 18
- CppName: "RF_FI_QUERY_MAXONE"
- Name: "RF_MaxOne"
- Description: "Returns, under the name wmaxone, the maximum degree of commonality of encountered objects in the request. (See ((http://wiki.yandex-team.ru/AlekseySokirko/QueryObjects som markup))).((http://wiki .yandex-team.ru/ArsenGadzhikurbanov/Wares#maxone More))"
- Authors: "arseny"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_FIRST_DOC]
- DependsOn: [{Feature: ["MaxOne"], Slice: "web_production"}]
- }
- Factor {
- Index: 19
- CppName: "RF_MAX_FI_METRIKA_URL_AVG_TIME"
- Name: "RF_Max_MetrikaUrlAvgTime"
- Description: "Similar to YabarUrlAvgTime"
- Authors: "smikler"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["MetrikaUrlAvgTime"], Slice: "web_production"}]
- }
- Factor {
- Index: 20
- CppName: "RF_MIN_FI_DBM40"
- Name: "RF_Min_DBM40"
- Description: "Variation on the theme ((http://wiki.yandex-team.ru/JandeksPoisk/KachestvoPoiska/ObshayaFormula/TekushhieKomponenty/DBM25 DBM25)), see ysite/yandex/relevance/dbm25.cpp"
- Authors: "denplusplus"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- DependsOn: [{Feature: ["DBM40"], Slice: "web_production"}]
- }
- Factor {
- Index: 21
- CppName: "RF_MAX_FI_NAV_LINEAR"
- Name: "RF_Max_NavLinear"
- Description: "((http://wiki.yandex-team.ru/JandeksPoisk/Antispam/polunavigacionnyezaprosy#faktornavigacionnostiparyurl-zapros classifier)) pair of vitals [request-url], vital url for the request, if the value on it is >0.5"
- Authors: ["burangulov", "kustarev"]
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["NavLinear"], Slice: "web_production"}]
- }
- Factor {
- Index: 22
- CppName: "RF_FI_QUERY_TH3561"
- Name: "RF_QueryThEncyclopedic"
- Description: "The result of the lexical query classifier, which predicts the probability of a click on the topic page 3561"
- Authors: "esoloviev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_FIRST_DOC]
- DependsOn: [{Feature: ["QueryThEncyclopedic"], Slice: "web_production"}]
- }
- Factor {
- Index: 23
- CppName: "RF_FI_YABAR_WORD_DNGI"
- Name: "RF_YabarWordDepthNodesGradientMin"
- Description: "Angle in Depth Nodes space, counted by words only (Min by all)"
- Authors: ["atolstikov", "smikler"]
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_FIRST_DOC]
- DependsOn: [{Feature: ["YabarWordDepthNodesGradientMin"], Slice: "web_production"}]
- }
- Factor {
- Index: 24
- CppName: "RF_MEAN_FI_SEGMENT_WORD_PORTION_FROM_MAINCONTENT"
- Name: "RF_Mean_SegmentWordPortionFromMainContent"
- Description: "Proportion of document words from segments with score > 2."
- Authors: "nordic"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["SegmentWordPortionFromMainContent"], Slice: "web_production"}]
- }
- Factor {
- Index: 25
- CppName: "RF_MEAN_FI_NHOP_IS_FINAL"
- Name: "RF_Mean_NHopIsFinal"
- Description: "The number of threads in which the url was last, normalized by the total number of threads in which this url was."
- Authors: "vvp"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["NHopIsFinal"], Slice: "web_production"}]
- }
- Factor {
- Index: 26
- CppName: "RF_MIN_FI_BCLMF"
- Name: "RF_Min_Bclmf"
- Description: "BCLM for Annotation index, doc text and links."
- Authors: "alsafr"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- DependsOn: [{Feature: ["Bclmf"], Slice: "web_production"}]
- }
- Factor {
- Index: 27
- CppName: "RF_MEAN_FI_URL_CLICKS_MAX_GEO_CITY_FRC_WEIGHT"
- Name: "RF_Mean_URLClicksMaxGeoCityFRCWeight"
- Description: "Normalized corrected clicks count by query with user's city(gc=) mentioned"
- Authors: "esoloviev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["URLClicksMaxGeoCityFRCWeight"], Slice: "web_production"}]
- }
- Factor {
- Index: 28
- CppName: "RF_MEAN_FI_YABAR_URL_REVISITS"
- Name: "RF_Mean_YabarUrlRevisits"
- Description: "Returning user to url"
- Authors: ["shpilman", "atolstikov"]
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["YabarUrlRevisits"], Slice: "web_production"}]
- }
- Factor {
- Index: 29
- CppName: "RF_MAX_FI_YABAR_URL_REVISITS"
- Name: "RF_Max_YabarUrlRevisits"
- Description: "Returning user to url"
- Authors: ["shpilman", "atolstikov"]
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["YabarUrlRevisits"], Slice: "web_production"}]
- }
- Factor {
- Index: 30
- CppName: "RF_MAX_FI_CORRECTED_CTR_XFACTOR_VALUE_WCM_AVG"
- Name: "RF_Max_CorrectedCtrXfactorValueWcmAvg"
- Description: "CorrectedCtrXfactor in annotation index, ValueWcmAvg factor"
- Authors: ["vvp", "alsafr", "avatar"]
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["CorrectedCtrXfactorValueWcmAvg"], Slice: "web_production"}]
- }
- Factor {
- Index: 31
- CppName: "RF_MAX_FI_DOUBLE_FRC_QUERY_MATCH_PREDICTION"
- Name: "RF_Max_DoubleFrcQueryMatchPrediction"
- Description: "DoubleFrc in annotation index, QueryMatchPrediction factor"
- Authors: ["alsafr", "yustuken"]
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["DoubleFrcQueryMatchPrediction"], Slice: "web_production"}]
- }
- Factor {
- Index: 32
- CppName: "RF_MAX_FI_XF_DT_SHOW_ALL_MAX_F_TEXT_COSINE_MATCH_MAX_PREDICTION"
- Name: "RF_Max_XfDtShowAllMaxFTextCosineMatchMaxPrediction"
- Description: "Linguistic boosting factor. Extension type: XfDtShow. Factor: CosineMaxMatchPrediction by text and title. Maximum factor value by extensions."
- Authors: ["gotmanov", "alsafr"]
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["XfDtShowAllMaxFTextCosineMatchMaxPrediction"], Slice: "web_production"}]
- }
- Factor {
- Index: 33
- CppName: "RF_MAX_FI_ONE_CLICK_FRC_XF_SP_SUFFIX_MATCH_COUNT"
- Name: "RF_Max_OneClickFrcXfSpSuffixMatchCount"
- Description: "OneClickFrc calculated by sampled period and collaboratively expanded, SuffixMatchCount factor"
- Authors: ["avatar", "yustuken"]
- Responsibles: "nkireev"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- DependsOn: [{Feature: ["OneClickFrcXfSpSuffixMatchCount"], Slice: "web_production"}]
- }
- Factor {
- Index: 34
- CppName: "SF_MEAN_A2_TSIM"
- Name: "SF_Mean_tsim"
- Description: ""
- Authors: "divankov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["tsim"]}]
- }
- Factor {
- Index: 35
- CppName: "SF_MEAN_A2_END"
- Name: "SF_Mean_end"
- Description: ""
- Authors: "divankov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["end"]}]
- }
- Factor {
- Index: 36
- CppName: "SF_MEAN_A2_BLOCKS"
- Name: "SF_Mean_blocks"
- Description: ""
- Authors: "divankov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["blocks"]}]
- }
- Factor {
- Index: 37
- CppName: "SF_MEAN_A2_MTCH2"
- Name: "SF_Mean_mtch2"
- Description: ""
- Authors: "divankov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- DependsOn: [{Feature: ["mtch2"]}]
- }
- Factor {
- Index: 38
- CppName: "SF_MAX_A2_SEG_WEIGHT_SUM"
- Name: "SF_Max_seg_weight_sum"
- Description: ""
- Authors: "divankov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- DependsOn: [{Feature: ["seg_weight_sum"]}]
- }
- Factor {
- Index: 39
- CppName: "BF_FI_WEB_CTR_0123"
- Name: "BF_WebCTR0123"
- Description: "Sum of web CTR of first 4 elements"
- Authors: "gilazhev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_BLENDER, TG_AGGR_FIRST_DOC]
- DependsOn: [{Feature: ["WebCTR0123"], Slice: "blender_production"}]
- }
- Factor {
- Index: 40
- CppName: "BF_FI_ERF_ALL_WORDS_TRFM_5HOURS"
- Name: "BF_HasAllWordsTRFmHisto5hFraction"
- Description: "Similar to HasAllWordsTRFmHisto3dFraction, the numerator is the number of documents in the last 5 hours"
- Authors: "nkmakarov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_BLENDER, TG_AGGR_FIRST_DOC]
- DependsOn: [{Feature: ["HasAllWordsTRFmHisto5hFraction"], Slice: "blender_production"}]
- }
- Factor {
- Index: 41
- CppName: "BF_FI_AUTO_HOST_CLASSIFIER"
- Name: "BF_AutoHostClassifier"
- Description: "host classifier for auto vertical"
- Authors: "r-vetrov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_BLENDER, TG_AGGR_FIRST_DOC]
- DependsOn: [{Feature: ["AutoHostClassifier"], Slice: "blender_production"}]
- }
- Factor {
- Index: 42
- CppName: "BF_FI_CLASSIFICATION_KAK"
- Name: "BF_ClassificationKak"
- Description: "Classification wizard rule class Kak"
- Authors: "agorodilov"
- Responsibles: "nkireev"
- Tags: [TG_SRC_BLENDER, TG_AGGR_FIRST_DOC]
- DependsOn: [{Feature: ["ClassificationKak"], Slice: "blender_production"}]
- }
- Factor {
- Index: 43
- CppName: "BF_FI_VIDEO_MAX_WORDS_CSTR"
- Name: "BF_VideoMaxWordsCSTR"
- Description: "VideoMaxWordsCSTR"
- Authors: "orepin"
- Responsibles: "nkireev"
- Tags: [TG_SRC_BLENDER, TG_AGGR_FIRST_DOC]
- DependsOn: [{Feature: ["VideoMaxWordsCSTR"], Slice: "blender_production"}]
- }
- Factor {
- Index: 44
- CppName: "FF_FI_QUERY_MODEL"
- Name: "FF_FI_query_model"
- Description: "speed of a unigram text classifier trained on fact-checking queries from cleanup"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 45
- CppName: "FF_MAX_FI_QUERYFACT_W2V_SIM"
- Name: "FF_Max_FI_queryfact_w2v_sim"
- Description: "resemblance of the snippet to the query, calculated based on the non-lemmatized word2vec runet"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_MAX]
- }
- Factor {
- Index: 46
- CppName: "FF_FI_QUERY_HOST2VEC"
- Name: "FF_FI_query_host2vec_weight"
- Description: "the value of the logistic regression that predicts the questionability of the query by the average vector of hosts in the output"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 47
- CppName: "FF_MEAN_FI_QUERYDOC_HOST2VEC"
- Name: "FF_Mean_FI_querydoc_host2vec"
- Description: "the value of the logistic regression predicting that the answer is correct from the concatenation of the host vector and the average host vector in the output"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_MEAN]
- }
- Factor {
- Index: 48
- CppName: "FF_MIN_FI_QUERYDOC_HOST2VEC"
- Name: "FF_Min_FI_querydoc_host2vec"
- Description: "the value of the logistic regression predicting that the answer is correct from the concatenation of the host vector and the average host vector in the output"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_MIN]
- }
- Factor {
- Index: 49
- CppName: "FF_MAX_FI_QUERYDOC_HOST2VEC"
- Name: "FF_Max_FI_querydoc_host2vec"
- Description: "the value of the logistic regression predicting that the answer is correct from the concatenation of the host vector and the average host vector in the output"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_MAX]
- }
- Factor {
- Index: 50
- CppName: "FF_FI_QUERY_IS_ENCYC"
- Name: "FF_FI_query_is_encyc"
- Description: "checks if the Encyc wizard rule about encyclopedic query is triggered"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 51
- CppName: "FF_FI_CLUSTER_7"
- Name: "FF_FI_cluster_7"
- Description: "proximity of the total vector of hosts to one of the clusters built on the basis of answers from cleanup"
- Authors: "stasd07"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 52
- CppName: "FF_FI_CLUSTER_8"
- Name: "FF_FI_cluster_8"
- Description: "proximity of the total vector of hosts to one of the clusters built on the basis of answers from cleanup"
- Authors: "stasd07"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 53
- CppName: "FF_MEAN_FI_CROSS_MODEL"
- Name: "FF_Mean_FI_cross_model"
- Description: "the value of the regression trained on bigrams, in which the first word is taken from the query, the second from the answer"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_MEAN]
- }
- Factor {
- Index: 54
- CppName: "FF_MAX_FI_CROSS_MODEL"
- Name: "FF_Max_FI_cross_model"
- Description: "the value of the regression trained on bigrams, in which the first word is taken from the query, the second from the answer"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_MAX]
- }
- Factor {
- Index: 55
- CppName: "FF_MAX_FI_SNIPPET_UNIGRAM_WEIGHT"
- Name: "FF_Max_FI_snippet_unigram_weight"
- Description: "the value of a regression trained on the frequencies of the words in the snippet, predicting that the snippet contains an answer"
- Authors: "nkireev"
- Responsibles: "nkireev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_MAX]
- }
- Factor {
- Index: 56
- CppName: "SF_MEAN_A2_FQ_SCHEMA_IS_QUESTION"
- Name: "SF_Mean_fq_schema_is_question"
- Description: "page has markup schema.org question (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 57
- CppName: "SF_MAX_A2_FQ_SCHEMA_IS_QUESTION"
- Name: "SF_Max_fq_schema_is_question"
- Description: "page has markup schema.org question (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 58
- CppName: "SF_MEAN_A2_FQ_SCHEMA_HAS_APPROVED_ANSWER"
- Name: "SF_Mean_fq_schema_has_approved_answer"
- Description: "page has schema.org question markup and best answer selected (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 59
- CppName: "SF_MAX_A2_FQ_SCHEMA_HAS_APPROVED_ANSWER"
- Name: "SF_Max_fq_schema_has_approved_answer"
- Description: "page has schema.org question markup and best answer selected (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 60
- CppName: "SF_MEAN_A2_FQ_SCHEMA_BEST_ANS_WORD_COUNT"
- Name: "SF_Mean_fq_schema_best_ans_word_count"
- Description: "length of the best answer of the schema.org question in words (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 61
- CppName: "SF_MAX_A2_FQ_SCHEMA_BEST_ANS_WORD_COUNT"
- Name: "SF_Max_fq_schema_best_ans_word_count"
- Description: "best answer length of schema.org question in words (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 62
- CppName: "SF_MEAN_A2_FQ_SCHEMA_BEST_ANS_UPVOTE_COUNT"
- Name: "SF_Mean_fq_schema_best_ans_upvote_count"
- Description: "votes for the best schema.org question answer (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 63
- CppName: "SF_MAX_A2_FQ_SCHEMA_BEST_ANS_UPVOTE_COUNT"
- Name: "SF_Max_fq_schema_best_ans_upvote_count"
- Description: "votes for the best schema.org question answer (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 64
- CppName: "SF_MEAN_A2_FQ_SCHEMA_BEST_ANS_MAX_SPAN_LCSWC_DIV_SPAN_WC"
- Name: "SF_Mean_fq_schema_best_ans_max_span_lcswc_div_span_wc"
- Description: "length of longest common substring of schema.org question and snippet best answer (percentage of snippet words) (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 65
- CppName: "SF_MAX_A2_FQ_SCHEMA_BEST_ANS_MAX_SPAN_LCSWC_DIV_SPAN_WC"
- Name: "SF_Max_fq_schema_best_ans_max_span_lcswc_div_span_wc"
- Description: "length of longest common substring of best schema.org question answer and snippet (percentage of snippet words) (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 66
- CppName: "SF_MEAN_A2_FQ_SCHEMA_BEST_ANS_MAX_SPAN_LCSWC_DIV_ANS_WC"
- Name: "SF_Mean_fq_schema_best_ans_max_span_lcswc_div_ans_wc"
- Description: "length of longest common substring of the best answer schema.org question and snippet (percentage of words of the best answer) (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 67
- CppName: "SF_MAX_A2_FQ_SCHEMA_BEST_ANS_MAX_SPAN_LCSWC_DIV_ANS_WC"
- Name: "SF_Max_fq_schema_best_ans_max_span_lcswc_div_ans_wc"
- Description: "length of longest common substring of best answer schema.org question and snippet (best answer word share) (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 68
- CppName: "SF_MEAN_A2_FQ_SCHEMA_BEST_ANS_LCSW_POS_RATIO_IN_SNIP"
- Name: "SF_Mean_fq_schema_best_ans_lcsw_pos_ratio_in_snip"
- Description: "position of the greatest common substring of the best answer of the schema.org question and the snippet in the snippet (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 69
- CppName: "SF_MAX_A2_FQ_SCHEMA_BEST_ANS_LCSW_POS_RATIO_IN_SNIP"
- Name: "SF_Max_fq_schema_best_ans_lcsw_pos_ratio_in_snip"
- Description: "greatest common substring position of best schema.org question answer and snippet in snippet (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 70
- CppName: "SF_MEAN_A2_FQ_SCHEMA_BEST_ANS_LCSW_POS_RATIO_IN_ANSWER"
- Name: "SF_Mean_fq_schema_best_ans_lcsw_pos_ratio_in_ans"
- Description: "greatest common substring position of best answer schema.org question and snippet in best answer (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 71
- CppName: "SF_MAX_A2_FQ_SCHEMA_BEST_ANS_LCSW_POS_RATIO_IN_ANSWER"
- Name: "SF_Max_fq_schema_best_ans_lcsw_pos_ratio_in_ans"
- Description: "greatest common substring position of best answer schema.org question and snippet in best answer (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 72
- CppName: "SF_MEAN_A2_FQ_SCHEMA_MATCHED_ANS_WORD_COUNT"
- Name: "SF_Mean_fq_schema_matched_ans_word_count"
- Description: "word length of the schema.org question most similar to the snippet (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 73
- CppName: "SF_MAX_A2_FQ_SCHEMA_MATCHED_ANS_WORD_COUNT"
- Name: "SF_Max_fq_schema_matched_ans_word_count"
- Description: "word length of the schema.org question most similar to the snippet (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 74
- CppName: "SF_MEAN_A2_FQ_SCHEMA_MATCHED_ANS_UPVOTE_COUNT"
- Name: "SF_Mean_fq_schema_matched_ans_upvote_count"
- Description: "number of votes for the schema.org question most similar to the snippet (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 75
- CppName: "SF_MAX_A2_FQ_SCHEMA_MATCHED_ANS_UPVOTE_COUNT"
- Name: "SF_Max_fq_schema_matched_ans_upvote_count"
- Description: "votes per schema.org question most similar to the snippet (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 76
- CppName: "SF_MEAN_A2_FQ_SCHEMA_MATCHED_ANS_MAX_SPAN_LCSWC_DIV_SPAN_WC"
- Name: "SF_Mean_fq_schema_matched_ans_max_span_lcswc_div_span_wc"
- Description: "length of longest common substring of schema.org question most similar to snippet and snippet (percentage of snippet words) (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 77
- CppName: "SF_MAX_A2_FQ_SCHEMA_MATCHED_ANS_MAX_SPAN_LCSWC_DIV_SPAN_WC"
- Name: "SF_Max_fq_schema_matched_ans_max_span_lcswc_div_span_wc"
- Description: "longest common substring of schema.org question most similar to snippet and snippet (percentage of snippet words) (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 78
- CppName: "SF_MEAN_A2_FQ_SCHEMA_MATCHED_ANS_MAX_SPAN_LCSWC_DIV_ANS_WC"
- Name: "SF_Mean_fq_schema_matched_ans_max_span_lcswc_div_ans_wc"
- Description: "length of longest common substring of schema.org question most similar to snippet and snippet (response word ratio) (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 79
- CppName: "SF_MAX_A2_FQ_SCHEMA_MATCHED_ANS_MAX_SPAN_LCSWC_DIV_ANS_WC"
- Name: "SF_Max_fq_schema_matched_ans_max_span_lcswc_div_ans_wc"
- Description: "longest common substring of schema.org question answer most similar to snippet and snippet (response word ratio) (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 80
- CppName: "SF_MEAN_A2_FQ_SCHEMA_MATCHED_ANS_LCSW_POS_RATIO_IN_SNIP"
- Name: "SF_Mean_fq_schema_matched_ans_lcsw_pos_ratio_in_snip"
- Description: "the position of the largest common substring of the schema.org question most similar to the snippet and the snippet in the snippet (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 81
- CppName: "SF_MAX_A2_FQ_SCHEMA_MATCHED_ANS_LCSW_POS_RATIO_IN_SNIP"
- Name: "SF_Max_fq_schema_matched_ans_lcsw_pos_ratio_in_snip"
- Description: "position of the largest common substring of the schema.org question most similar to the snippet and the snippet in the snippet (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 82
- CppName: "SF_MEAN_A2_FQ_SCHEMA_MATCHED_ANS_LCSW_POS_RATIO_IN_ANSWER"
- Name: "SF_Mean_fq_schema_matched_ans_lcsw_pos_ratio_in_ans"
- Description: "position of the largest common substring of the schema.org question most similar to the snippet and the snippet in the response (mean)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEAN]
- }
- Factor {
- Index: 83
- CppName: "SF_MAX_A2_FQ_SCHEMA_MATCHED_ANS_LCSW_POS_RATIO_IN_ANSWER"
- Name: "SF_Max_fq_schema_matched_ans_lcsw_pos_ratio_in_ans"
- Description: "position of the largest common substring of the schema.org question answer most similar to the snippet and the snippet in the answer (max)"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 84
- CppName: "SF_MEDIAN_A2_FQ_RU_FACT_SNIPPET_DSSM_FACTOID_SCORE"
- Name: "Sf_Median_fq_ru_fact_snippet_dssm_factoid_score"
- Description: "Median of factoid DSSM scores of <query, snippet> pairs for first issue documents FACTS-747, FACTS-19"
- Authors: "bogolubsky"
- Responsibles: "bogolubsky"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEDIAN]
- }
- Factor {
- Index: 85
- CppName: "SF_MIN_A2_FQ_RU_FACT_SNIPPET_DSSM_FACTOID_SCORE"
- Name: "Sf_Min_fq_ru_fact_snippet_dssm_factoid_score"
- Description: "Minimum factoid DSSM scores of <query, snippet> pairs for the first FACTS-747, FACTS-19 issuance documents"
- Authors: "bogolubsky"
- Responsibles: "bogolubsky"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MIN]
- }
- Factor {
- Index: 86
- CppName: "SF_STD_A2_FQ_RU_FACT_SNIPPET_DSSM_FACTOID_SCORE"
- Name: "Sf_Std_fq_ru_fact_snippet_dssm_factoid_score"
- Description: "Root-mean-square deviation of the vector of factoid DSSM scores of <query, snippet> pairs for the first issuance documents FACTS-747, FACTS-19"
- Authors: "bogolubsky"
- Responsibles: "bogolubsky"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_STD]
- }
- Factor {
- Index: 87
- CppName: "FF_FI_IS_ASSISTANT"
- Name: "FF_FI_is_assistant"
- Description: "an indication that the request came from an assistant"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 88
- CppName: "SF_MEDIAN_A2_FQ_TOMATO_DSSM_FACTOID_SCORE"
- Name: "Sf_Median_fq_tomato_dssm_factoid_score"
- Description: "Median of factoid DSSM scores for the new tomato dssm formula. FACTS-2545"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MEDIAN]
- }
- Factor {
- Index: 89
- CppName: "SF_MIN_A2_FQ_TOMATO_DSSM_FACTOID_SCORE"
- Name: "Sf_Min_fq_tomato_dssm_factoid_score"
- Description: "Minimum factoid DSSM scores by new tomato dssm formula. FACTS-2545"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MIN]
- }
- Factor {
- Index: 90
- CppName: "SF_STD_A2_FQ_TOMATO_DSSM_FACTOID_SCORE"
- Name: "Sf_Std_fq_tomato_dssm_factoid_score"
- Description: "Standard deviation of DSSM scores by the new tomato dssm formula. FACTS-2545"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_STD]
- }
- Factor {
- Index: 91
- CppName: "FF_FI_NEOCORTEX_SERP_ITEMS_WIZ_IMAGES"
- Name: "FF_FI_neocortex_serp_items_wiz_images"
- Description: "Cosine between request embeddings and presence of wiz-images on sickle"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 92
- CppName: "FF_FI_NEOCORTEX_SERP_ITEMS_WIZ_VIDEO"
- Name: "FF_FI_neocortex_serp_items_wiz_video"
- Description: "Cosine between request embeddings and presence of wiz-video on sickle"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 93
- CppName: "FF_FI_NEOCORTEX_SERP_ITEMS_UNION_FACTS"
- Name: "FF_FI_neocortex_serp_items_union_facts"
- Description: "Cosine between request embeddings and presence of union-facts on sickle"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 94
- CppName: "FF_FI_NEOCORTEX_SERP_ITEMS_WIZ_MUSICPLAYER"
- Name: "FF_FI_neocortex_serp_items_wiz_musicplayer"
- Description: "Cosine between request embeddings and having wiz-musicplayer on sickle"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 95
- CppName: "FF_FI_NEOCORTEX_SERP_ITEMS_WIZ_MAPS"
- Name: "FF_FI_neocortex_serp_items_wiz_maps"
- Description: "Cosine between request embeddings and presence of wiz-maps on sickle"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 96
- CppName: "FF_FI_NEOCORTEX_SERP_ITEMS_POSITIVE_QUERY_MX"
- Name: "FF_FI_neocortex_serp_items_positive_query_mx"
- Description: "Cosine between query embeddings and presence of positive query mx"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 97
- CppName: "SF_MAX_A2_FQ_TOMATO_DSSM_FACTOID_SCORE"
- Name: "Sf_Max_fq_tomato_dssm_factoid_score"
- Description: "Maximum DSSM score with new tomato dssm formula. FACTS-2545"
- Authors: "antonio"
- Responsibles: "antonio"
- Tags: [TG_SRC_SNIPPET, TG_AGGR_MAX]
- }
- Factor {
- Index: 98
- CppName: "FF_MAX_FI_BERT_FACTSNIP_ANSWER_DSSM"
- Name: "Ff_Max_fi_bert_factsnip_answer_dssm"
- Description: "Maximum DSSM scores based on bert_factsnip_answer"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_FACTOID, TG_AGGR_MAX]
- }
- Factor {
- Index: 99
- CppName: "FF_MEAN_FI_BERT_FACTSNIP_ANSWER_DSSM"
- Name: "Ff_Mean_fi_bert_factsnip_answer_dssm"
- Description: "DSSM Average Score from bert_factsnip_answer"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_FACTOID, TG_AGGR_MEAN]
- }
- Factor {
- Index: 100
- CppName: "FF_FI_FACT_WORD_MIN_FREQUENCY"
- Name: "FF_FI_fact_word_min_frequency"
- Description: "Minimum word frequency from query, based on word frequencies in actual queries"
- Authors: "agroshev"
- Responsibles: "agroshev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 101
- CppName: "FF_FI_FACT_WORD_MAX_FREQUENCY"
- Name: "FF_FI_fact_word_max_frequency"
- Description: "Maximum frequency of a word from a query, based on word frequencies in actual queries"
- Authors: "agroshev"
- Responsibles: "agroshev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 102
- CppName: "FF_FI_FACT_WORD_MED_SMOOTH_INVERSE_FREQUENCY"
- Name: "FF_FI_fact_word_med_smooth_inverse_frequency"
- Description: "Smoothed inverted median of word frequencies from query, by word frequencies in actual queries"
- Authors: "agroshev"
- Responsibles: "agroshev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 103
- CppName: "FF_FI_FACT_WORD_RELATIVE_MIN_FREQUENCY"
- Name: "FF_FI_fact_word_relative_min_frequency"
- Description: "Minimum frequency of a word from a query, by word frequencies in fact queries, relative to word frequency in general texts"
- Authors: "agroshev"
- Responsibles: "agroshev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 104
- CppName: "FF_FI_FACT_WORD_RELATIVE_MEAN_FREQUENCY"
- Name: "FF_FI_fact_word_relative_mean_frequency"
- Description: "Average frequency of words from the query, by word frequencies in factual queries, relative to word frequencies in general texts"
- Authors: "agroshev"
- Responsibles: "agroshev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 105
- CppName: "FF_FI_FACT_WORD_RELATIVE_MED_SMOOTH_INVERSE_FREQUENCY"
- Name: "FF_FI_fact_word_relative_med_smooth_inverse_frequency"
- Description: "Smoothed inverted median of word frequencies from the query, over word frequencies in fact queries, relative to word frequencies in general texts"
- Authors: "agroshev"
- Responsibles: "agroshev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 106
- CppName: "FF_FI_FACT_BIGRAM_MAX_FREQUENCY"
- Name: "FF_FI_fact_bigram_max_frequency"
- Description: "Maximum frequency of bigrams from the query, according to the frequencies of bigrams in actual queries"
- Authors: "agroshev"
- Responsibles: "agroshev"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 107
- CppName: "FF_FI_NORM_QUERY_CHAR_LEN"
- Name: "FF_FI_norm_query_char_len"
- Description: "Request length after NormalizeText in characters"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 108
- CppName: "FF_FI_NORM_QUERY_WORD_LEN"
- Name: "FF_FI_norm_query_word_len"
- Description: "Request length after NormalizeText in words"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 109
- CppName: "FF_FI_QUESTION_WORD_COUNT"
- Name: "FF_FI_question_word_count"
- Description: "Number of question words in the query"
- Authors: "zhenek"
- Responsibles: "zhenek"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 110
- CppName: "FF_FI_FIRST_ADVPRO_HASH"
- Name: "FF_FI_first_advpro_hash"
- Description: "Hash of the first pronominal adverb in the query"
- Authors: "darui99"
- Responsibles: "darui99"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 111
- CppName: "FF_FI_FIRST_PREPOSITION_HASH"
- Name: "FF_FI_first_preposition_hash"
- Description: "Hash of the first preposition in the query"
- Authors: "darui99"
- Responsibles: "darui99"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 112
- CppName: "FF_FI_LONG_WORD_COUNT"
- Name: "FF_FI_long_word_count"
- Description: "Number of words greater than 3 in normalized query"
- Authors: "darui99"
- Responsibles: "darui99"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 113
- CppName: "FF_FI_QUERY_NORMALIZIED_LENGTH_DIFF"
- Name: "FF_FI_query_normalizied_length_diff"
- Description: "(length_of_query_without_normalization - length_of_all_normalized_words) / length_of_non-normalized_query (word lengths have coefficients of arithmetic progression)"
- Authors: "darui99"
- Responsibles: "darui99"
- Tags: [TG_SRC_FACTOID, TG_AGGR_FIRST_DOC]
- }
- Factor {
- Index: 114
- CppName: "RF_MEAN_FI_BQPRSAMPLE_MIX_MATCH_WEIGHTED_VALUE"
- Name: "RF_MEAN_FI_BQPRSampleMixMatchWeightedValue"
- Description: "average from MixMatchWeightedValue factor over hits from BQPRSample stream"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- }
- Factor {
- Index: 115
- CppName: "RF_MEAN_FI_SAMPLE_PERIOD_DAY_FRC_FULL_MATCH_VALUE"
- Name: "RF_MEAN_FI_SamplePeriodDayFrcFullMatchValue"
- Description: "average of FullMatchValue factor over hits from SamplePeriodDayFrc stream"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- }
- Factor {
- Index: 116
- CppName: "RF_MEAN_FI_SAMPLE_PERIOD_DAY_FRC_MIX_MATCH_WEIGHTED_VALUE"
- Name: "RF_MEAN_FI_SamplePeriodDayFrcMixMatchWeightedValue"
- Description: "average of SamplePeriodDayFrcMixMatchWeightedValue"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- }
- Factor {
- Index: 117
- CppName: "RF_STD_FI_DOUBLE_FRC_CM_MATCH_TOP5_AVG_MATCH"
- Name: "RF_STD_FI_DoubleFrcCMMatchTop5AvgMatch"
- Description: "average deviation of DoubleFrcCMMatchTop5AvgMatch"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_STD]
- }
- Factor {
- Index: 118
- CppName: "RF_STD_FI_ONE_CLICK_FRC_XF_SP_PER_WORD_CM_MAX_MATCH_MIN"
- Name: "RF_STD_FI_OneClickFrcXfSpPerWordCMaxMatchMin"
- Description: "average deviation from SamplePeriodDayFrcMixMatchWeightedValue"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_STD]
- }
- Factor {
- Index: 119
- CppName: "RF_MEAN_FI_AVG_DT_WEIGHTED_BY_RANK_MOBILE_FULL_MATCH_VALUE"
- Name: "RF_MEAN_FI_AvgDTWeightedByRankMobileFullMatchValue"
- Description: "average of AvgDTWeightedByRankMobileFullMatchValue"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- }
- Factor {
- Index: 120
- CppName: "RF_MIN_FI_QFUF_ALL_AVG_W"
- Name: "RF_MIN_FI_QfufAllAvgW"
- Description: "minimum from QfufAllAvgW, which is: Linguistic boosting factor. Average weight of extensions of type Qfuf."
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 121
- CppName: "RF_MAX_FI_QFUF_ALL_TOTAL_W"
- Name: "RF_MAX_FI_QfufAllTotalW"
- Description: "maximum from QfufAllTotalW, which is: Linguistic boosting factor. Extension type: Qfuf. Renormalized total extension weight."
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- }
- Factor {
- Index: 122
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_ADD_TIME"
- Name: "RF_MIN_FI_RandomLogQueryAvgAddTime"
- Description: "Minimum from RandomLogQueryAvgAddTime: Average AddTime value for query over the year. Calculated offline."
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 123
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_TEXT_HI_RELEV_SYN"
- Name: "RF_MIN_FI_RandomLogQueryAvgTxtHiRelSy"
- Description: "Minimum from RandomLogQueryAvgTxtHiRelSy: Average TxtHiRelSy on request for a year. Calculated offline."
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 124
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_TEXT_LIKE"
- Name: "RF_MIN_FI_RandomLogQueryAvgTextLike"
- Description: "Minimum from RandomLogQueryAvgTextLike: Average value of TextLike by query over the year. Calculated offline."
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 125
- CppName: "RF_MAX_FI_RANDOM_LOG_QUERY_AVG_HAS_NO_ALL_WORDS_TR_SYN"
- Name: "RF_MAX_FI_RandomLogQueryAvgHasNoAllWordsTRSy"
- Description: "Maximum from RandomLogQueryAvgHasNoAllWordsTRSy: Average of HasNoAllWordsTRSy per query over the year. Calculated offline."
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- }
- Factor {
- Index: 126
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_YABAR_HOST_AVG_TIME2"
- Name: "RF_MIN_FI_RandomLogQueryAvgIsForum"
- Description: "minimum from RandomLogQueryAvgIsForum"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 127
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_QUERY_DOWNER_ONLY_CLICK_RATE"
- Name: "RF_MIN_FI_RandomLogQueryAvgQueryDOwnerOnlyClickRate"
- Description: "minimum from RandomLogQueryAvgQueryDOwnerOnlyClickRate"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 128
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_LONGEST_TEXT"
- Name: "RF_MIN_FI_RandomLogQueryAvgLongestText"
- Description: "minimum from RandomLogQueryAvgLongestText"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 129
- CppName: "RF_MEAN_FI_RANDOM_LOG_QUERY_AVG_DIFFERENT_INTERNAL_LINKS"
- Name: "RF_MEAN_FI_RandomLogQueryAvgDifferentInternalLinks"
- Description: "mean of RandomLogQueryAvgDifferentInternalLinks"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- }
- Factor {
- Index: 130
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_QUERY_DOWNER_ONLY_CLICK_RATE_REG"
- Name: "RF_MIN_FI_RandomLogQueryAvgQueryDOwnerOnlyClickRate_Reg"
- Description: "minimum from RandomLogQueryAvgQueryDOwnerOnlyClickRate_Reg"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 131
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_BM25_0"
- Name: "RF_MIN_FI_RandomLogQueryAvgBM25_0"
- Description: "minimum from RandomLogQueryAvgBM25_0"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 132
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_QUERIES_AVG_CM2"
- Name: "RF_MIN_FI_RandomLogQueryAvgQueriesAvgCM2"
- Description: "minimum from RandomLogQueryAvgQueriesAvgCM2"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 133
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_REG_BROWSER_USER_HUB"
- Name: "RF_MIN_FI_RandomLogQueryAvgRegBrowserUserHub"
- Description: "minimum from RandomLogQueryAvgRegBrowserUserHub"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 134
- CppName: "RF_MAX_FI_RANDOM_LOG_QUERY_AVG_QUERY_URL_CORRECTED_CTR_XFACTOR"
- Name: "RF_MAX_FI_RandomLogQueryAvgQueryUrlCorrectedCtrXfactor"
- Description: "maximum from RandomLogQueryAvgQueryUrlCorrectedCtrXfactor"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- }
- Factor {
- Index: 135
- CppName: "RF_MIN_FI_RANDOM_LOG_QUERY_AVG_XF_DT_SHOW_ALL_SUM_WF_SUM_W_BODY_MIN_WINDOW_SIZE"
- Name: "RF_MIN_FI_RandomLogQueryAvgXfDtShowAllSumWFSumWBodyMinWindowSize"
- Description: "minimum from RandomLogQueryAvgXfDtShowAllSumWFSumWBodyMinWindowSize"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MIN]
- }
- Factor {
- Index: 136
- CppName: "RF_MAX_FI_RANDOM_LOG_QUERY_CLICKS_WEIGHTED_AVG_YABAR_URL_AVG_TIME"
- Name: "RF_MAX_FI_RandomLogQueryClicksWeightedAvgYabarUrlAvgTime"
- Description: "Maximum from RandomLogQueryClicksWeightedAvgYabarUrlAvgTime: Click-weighted average of YabarUrlAvgTime per request over the year. Calculated offline."
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- }
- Factor {
- Index: 137
- CppName: "RF_MAX_FI_RANDOM_LOG_QUERY_CLICKS_WEIGHTED_AVG_DIFFERENT_INTERNAL_LINKS"
- Name: "RF_MAX_FI_RandomLogQueryClicksWeightedAvgDifferentInternalLinks"
- Description: "maximum from RandomLogQueryClicksWeightedAvgDifferentInternalLinks"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- }
- Factor {
- Index: 138
- CppName: "RF_STD_FI_VPCG_CORRECTED_CLICKS_SLP_PER_WORD_CM_MAX_PREDICTION_MIN"
- Name: "RF_STD_FI_VpcgCorrectedClicksSLPPerWordCMMaxPredictionMin"
- Description: "average deviation from VpcgCorrectedClicksSLPPerWordCMaxPredictionMin"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_STD]
- }
- Factor {
- Index: 139
- CppName: "RF_MEAN_FI_VPCG_CORRECTED_CLICKS_SLP_MIX_MATCH_WEIGHTED_VALUE"
- Name: "RF_MEAN_FI_VpcgCorrectedClicksSLPMixMatchWeightedValue"
- Description: "average of VpcgCorrectedClicksSLPMixMatchWeightedValue"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- }
- Factor {
- Index: 140
- CppName: "RF_STD_FI_VPCG_CORRECTED_CLICKS_SLP_CM_MATCH_TOP5_AVG_PREDICTION"
- Name: "RF_STD_FI_VpcgCorrectedClicksSLPCMMatchTop5AvgPrediction"
- Description: "average deviation from VpcgCorrectedClicksSLPCMMatchTop5AvgPrediction"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_STD]
- }
- Factor {
- Index: 141
- CppName: "RF_MAX_FI_QUERY_DOPP_MEDIAN_DWELLTIME"
- Name: "RF_MAX_FI_QueryDoppMedianDwelltime"
- Description: "maximum from QueryDoppMedianDwelltime"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- }
- Factor {
- Index: 142
- CppName: "RF_MEAN_FI_QUERY_DOPP_MULTIPLE_CLICKS_SHOWS"
- Name: "RF_MEAN_FI_QueryDoppMultipleClicksShows"
- Description: "average of QueryDoppMultipleClicksShows"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MEAN]
- }
- Factor {
- Index: 143
- CppName: "RF_MAX_FI_QUERY_DOPP_MULTIPLE_CLICKS_PROBABILITY"
- Name: "RF_MAX_FI_QueryDoppMultipleClicksProbability"
- Description: "maximum from QueryDoppMultipleClicksProbability"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- }
- Factor {
- Index: 144
- CppName: "RF_MAX_FI_XFDT_SHOW_ALL_TOTAL_W"
- Name: "RF_MAX_FI_XfDtShowAllTotalW"
- Description: "maximum from XfDtShowAllTotalW"
- Authors: "nouret"
- Responsibles: "zhenek"
- Tags: [TG_SRC_WEB, TG_AGGR_MAX]
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement