Advertisement
Guest User

Weight of Evidence Transformation - RapidMiner

a guest
Jul 29th, 2015
209
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
XML 18.99 KB | None | 0 0
  1. <?xml version="1.0" encoding="UTF-8" standalone="no"?>
  2. <process version="6.4.000">
  3.   <context>
  4.     <input/>
  5.     <output/>
  6.     <macros/>
  7.   </context>
  8.   <operator activated="true" class="process" compatibility="6.4.000" expanded="true" name="Process">
  9.     <process expanded="true">
  10.       <operator activated="true" class="retrieve" compatibility="6.4.000" expanded="true" height="60" name="Get Data" width="90" x="45" y="30">
  11.         <parameter key="repository_entry" value="../data/large_set"/>
  12.       </operator>
  13.       <operator activated="true" class="discretize_by_entropy" compatibility="6.4.000" expanded="true" height="94" name="Discretize" width="90" x="179" y="30">
  14.         <description align="center" color="transparent" colored="false" width="126">Testing Entropy Discretization.</description>
  15.       </operator>
  16.       <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="Create Pos &amp; Neg labels as attributes" width="90" x="380" y="30">
  17.         <list key="function_descriptions">
  18.           <parameter key="label_pos" value="if(LABELFIELD == &quot;1.0&quot;,1,0)"/>
  19.           <parameter key="label_neg" value="if(LABELFIELD == &quot;0.0&quot;,1,0)"/>
  20.         </list>
  21.         <description align="center" color="transparent" colored="false" width="126">If not created already create two attributes holding values for positive and negative labels.</description>
  22.       </operator>
  23.       <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples" width="90" x="514" y="30">
  24.         <parameter key="condition_class" value="all"/>
  25.         <list key="filters_list">
  26.           <parameter key="filters_entry_key" value="CTRYCD.equals."/>
  27.         </list>
  28.         <description align="center" color="transparent" colored="false" width="126">Use this filter to remove example values you do not want to appear in the WoE calculations. For example, values that deliver infinite results.</description>
  29.       </operator>
  30.       <operator activated="true" class="set_macros" compatibility="6.4.000" expanded="true" height="76" name="Set Macros for Loop Att" width="90" x="648" y="30">
  31.         <list key="macros">
  32.           <parameter key="label_pos" value="label_pos"/>
  33.           <parameter key="label_neg" value="label_neg"/>
  34.         </list>
  35.         <description align="center" color="transparent" colored="false" width="126">Set the macro values for your positive and negative labels.</description>
  36.       </operator>
  37.       <operator activated="true" class="subprocess" compatibility="6.4.000" expanded="true" height="112" name="Weight of Evidence Calculator" width="90" x="782" y="30">
  38.         <process expanded="true">
  39.           <operator activated="true" class="loop_attributes" compatibility="6.4.000" expanded="true" height="112" name="WoE Table" width="90" x="45" y="30">
  40.             <parameter key="attribute_filter_type" value="value_type"/>
  41.             <parameter key="value_type" value="nominal"/>
  42.             <parameter key="iteration_macro" value="woeAttribute"/>
  43.             <process expanded="true">
  44.               <operator activated="true" class="select_attributes" compatibility="6.4.000" expanded="true" height="76" name="Select Attributes" width="90" x="45" y="30">
  45.                 <parameter key="attribute_filter_type" value="subset"/>
  46.                 <parameter key="attributes" value="%{woeAttribute}|%{label_pos}|%{label_neg}"/>
  47.                 <parameter key="include_special_attributes" value="true"/>
  48.               </operator>
  49.               <operator activated="true" class="subprocess" compatibility="6.4.000" expanded="true" height="76" name="WoE Steps" width="90" x="179" y="120">
  50.                 <process expanded="true">
  51.                   <operator activated="true" class="rename" compatibility="6.4.000" expanded="true" height="76" name="Rename label classes" width="90" x="45" y="30">
  52.                     <parameter key="old_name" value="%{label_pos}"/>
  53.                     <parameter key="new_name" value="label_pos"/>
  54.                     <list key="rename_additional_attributes">
  55.                       <parameter key="%{label_neg}" value="label_neg"/>
  56.                     </list>
  57.                     <description align="center" color="transparent" colored="false" width="126">Rename label classes to label_pos &amp;amp; label_neg</description>
  58.                   </operator>
  59.                   <operator activated="true" class="aggregate" compatibility="6.4.000" expanded="true" height="76" name="ForceAgg" width="90" x="179" y="30">
  60.                     <list key="aggregation_attributes">
  61.                       <parameter key="label_neg" value="sum"/>
  62.                       <parameter key="label_pos" value="sum"/>
  63.                     </list>
  64.                     <parameter key="group_by_attributes" value="%{woeAttribute}"/>
  65.                   </operator>
  66.                   <operator activated="true" class="rename" compatibility="6.4.000" expanded="true" height="76" name="Rename (2)" width="90" x="313" y="30">
  67.                     <parameter key="old_name" value="sum(label_pos)"/>
  68.                     <parameter key="new_name" value="label_pos"/>
  69.                     <list key="rename_additional_attributes">
  70.                       <parameter key="sum(label_neg)" value="label_neg"/>
  71.                     </list>
  72.                   </operator>
  73.                   <operator activated="true" class="subprocess" compatibility="6.4.000" expanded="true" height="76" name="WoE Calculation" width="90" x="447" y="30">
  74.                     <process expanded="true">
  75.                       <operator activated="true" class="generate_id" compatibility="6.4.000" expanded="true" height="76" name="Generate ID" width="90" x="45" y="30">
  76.                         <description align="center" color="transparent" colored="false" width="126">target attribute made into IDs to allow for easy join following WoE calculation.</description>
  77.                       </operator>
  78.                       <operator activated="true" class="multiply" compatibility="6.4.000" expanded="true" height="94" name="Multiply" width="90" x="179" y="30"/>
  79.                       <operator activated="true" class="aggregate" compatibility="6.4.000" expanded="true" height="76" name="Aggregate (2)" width="90" x="447" y="75">
  80.                         <list key="aggregation_attributes">
  81.                           <parameter key="label_neg" value="sum (fractional)"/>
  82.                           <parameter key="label_pos" value="sum (fractional)"/>
  83.                         </list>
  84.                         <parameter key="group_by_attributes" value="id"/>
  85.                       </operator>
  86.                       <operator activated="true" class="rename_by_replacing" compatibility="6.4.000" expanded="true" height="76" name="tidy names 1" width="90" x="581" y="75">
  87.                         <parameter key="replace_what" value="fractional_sum"/>
  88.                         <parameter key="replace_by" value="distribution_"/>
  89.                       </operator>
  90.                       <operator activated="true" class="rename_by_replacing" compatibility="6.4.000" expanded="true" height="76" name="tidy names 2" width="90" x="715" y="75"/>
  91.                       <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="WoE Generation" width="90" x="849" y="75">
  92.                         <list key="function_descriptions">
  93.                           <parameter key="WoE" value="ln(distribution_label_pos/distribution_label_neg)"/>
  94.                           <parameter key="%{woeAttribute}_MarginalContribution" value="(distribution_label_pos-distribution_label_neg)*WoE"/>
  95.                         </list>
  96.                       </operator>
  97.                       <operator activated="true" class="set_role" compatibility="6.4.000" expanded="true" height="76" name="Set Role" width="90" x="983" y="75">
  98.                         <parameter key="attribute_name" value="id"/>
  99.                         <parameter key="target_role" value="id"/>
  100.                         <list key="set_additional_roles"/>
  101.                         <description align="center" color="transparent" colored="false" width="126">Set id as id</description>
  102.                       </operator>
  103.                       <operator activated="true" class="rename" compatibility="6.4.000" expanded="true" height="76" name="Rename WoE" width="90" x="1117" y="75">
  104.                         <parameter key="old_name" value="WoE"/>
  105.                         <parameter key="new_name" value="%{woeAttribute}_WoE"/>
  106.                         <list key="rename_additional_attributes"/>
  107.                         <description align="center" color="transparent" colored="false" width="126">Rename here rather than calculation to ensure bad attribute names do not cause errors in the process.</description>
  108.                       </operator>
  109.                       <operator activated="true" class="select_attributes" compatibility="6.4.000" expanded="true" height="76" name="Select Attributes (2)" width="90" x="1251" y="75">
  110.                         <parameter key="attribute_filter_type" value="subset"/>
  111.                         <parameter key="attributes" value="distribution_label_neg|distribution_label_pos"/>
  112.                         <parameter key="invert_selection" value="true"/>
  113.                         <description align="center" color="transparent" colored="false" width="126">Remove unnecessary attributes</description>
  114.                       </operator>
  115.                       <operator activated="true" class="join" compatibility="6.4.000" expanded="true" height="76" name="WoE Table (2)" width="90" x="1385" y="30">
  116.                         <list key="key_attributes"/>
  117.                       </operator>
  118.                       <connect from_port="in 1" to_op="Generate ID" to_port="example set input"/>
  119.                       <connect from_op="Generate ID" from_port="example set output" to_op="Multiply" to_port="input"/>
  120.                       <connect from_op="Multiply" from_port="output 1" to_op="WoE Table (2)" to_port="left"/>
  121.                       <connect from_op="Multiply" from_port="output 2" to_op="Aggregate (2)" to_port="example set input"/>
  122.                       <connect from_op="Aggregate (2)" from_port="example set output" to_op="tidy names 1" to_port="example set input"/>
  123.                       <connect from_op="tidy names 1" from_port="example set output" to_op="tidy names 2" to_port="example set input"/>
  124.                       <connect from_op="tidy names 2" from_port="example set output" to_op="WoE Generation" to_port="example set input"/>
  125.                       <connect from_op="WoE Generation" from_port="example set output" to_op="Set Role" to_port="example set input"/>
  126.                       <connect from_op="Set Role" from_port="example set output" to_op="Rename WoE" to_port="example set input"/>
  127.                       <connect from_op="Rename WoE" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
  128.                       <connect from_op="Select Attributes (2)" from_port="example set output" to_op="WoE Table (2)" to_port="right"/>
  129.                       <connect from_op="WoE Table (2)" from_port="join" to_port="out 1"/>
  130.                       <portSpacing port="source_in 1" spacing="0"/>
  131.                       <portSpacing port="source_in 2" spacing="0"/>
  132.                       <portSpacing port="sink_out 1" spacing="0"/>
  133.                       <portSpacing port="sink_out 2" spacing="0"/>
  134.                     </process>
  135.                     <description align="center" color="transparent" colored="false" width="126">Requirements.&lt;br&gt;Dataset containing target variable name (define by macro), a label.</description>
  136.                   </operator>
  137.                   <connect from_port="in 1" to_op="Rename label classes" to_port="example set input"/>
  138.                   <connect from_op="Rename label classes" from_port="example set output" to_op="ForceAgg" to_port="example set input"/>
  139.                   <connect from_op="ForceAgg" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
  140.                   <connect from_op="Rename (2)" from_port="example set output" to_op="WoE Calculation" to_port="in 1"/>
  141.                   <connect from_op="WoE Calculation" from_port="out 1" to_port="out 1"/>
  142.                   <portSpacing port="source_in 1" spacing="0"/>
  143.                   <portSpacing port="source_in 2" spacing="0"/>
  144.                   <portSpacing port="sink_out 1" spacing="0"/>
  145.                   <portSpacing port="sink_out 2" spacing="0"/>
  146.                 </process>
  147.                 <description align="center" color="transparent" colored="false" width="126">This process takes a dataset of the form:&lt;br&gt;&lt;br&gt;[ attribute | class_pos | class_neg ]&lt;br&gt;and calculates the Weight of Evidence &amp;amp; Information Value for this attribute. This is a process that would normally be called inside a loop attributes and with additional aggregation steps applied prior.&lt;br&gt;</description>
  148.               </operator>
  149.               <operator activated="true" class="aggregate" compatibility="6.4.000" expanded="true" height="76" name="Aggregate" width="90" x="380" y="120">
  150.                 <list key="aggregation_attributes">
  151.                   <parameter key="%{woeAttribute}_MarginalContribution" value="sum"/>
  152.                 </list>
  153.               </operator>
  154.               <operator activated="true" class="subprocess" compatibility="6.4.000" expanded="true" height="76" name="Tidy up" width="90" x="581" y="75">
  155.                 <process expanded="true">
  156.                   <operator activated="true" class="rename_by_replacing" compatibility="6.4.000" expanded="true" height="76" name="Rename by Replacing" width="90" x="45" y="30">
  157.                     <parameter key="replace_what" value="(^.*)"/>
  158.                     <parameter key="replace_by" value="%{woeAttribute}"/>
  159.                   </operator>
  160.                   <operator activated="true" class="transpose" compatibility="6.4.000" expanded="true" height="76" name="Transpose" width="90" x="179" y="30"/>
  161.                   <operator activated="true" class="rename" compatibility="6.4.000" expanded="true" height="76" name="tidy att1 name" width="90" x="313" y="30">
  162.                     <parameter key="old_name" value="att_1"/>
  163.                     <parameter key="new_name" value="InformationValue"/>
  164.                     <list key="rename_additional_attributes"/>
  165.                   </operator>
  166.                   <connect from_port="in 1" to_op="Rename by Replacing" to_port="example set input"/>
  167.                   <connect from_op="Rename by Replacing" from_port="example set output" to_op="Transpose" to_port="example set input"/>
  168.                   <connect from_op="Transpose" from_port="example set output" to_op="tidy att1 name" to_port="example set input"/>
  169.                   <connect from_op="tidy att1 name" from_port="example set output" to_port="out 1"/>
  170.                   <portSpacing port="source_in 1" spacing="0"/>
  171.                   <portSpacing port="source_in 2" spacing="0"/>
  172.                   <portSpacing port="sink_out 1" spacing="0"/>
  173.                   <portSpacing port="sink_out 2" spacing="0"/>
  174.                 </process>
  175.               </operator>
  176.               <connect from_port="example set" to_op="Select Attributes" to_port="example set input"/>
  177.               <connect from_op="Select Attributes" from_port="example set output" to_op="WoE Steps" to_port="in 1"/>
  178.               <connect from_op="Select Attributes" from_port="original" to_port="example set"/>
  179.               <connect from_op="WoE Steps" from_port="out 1" to_op="Aggregate" to_port="example set input"/>
  180.               <connect from_op="Aggregate" from_port="example set output" to_op="Tidy up" to_port="in 1"/>
  181.               <connect from_op="Aggregate" from_port="original" to_port="result 2"/>
  182.               <connect from_op="Tidy up" from_port="out 1" to_port="result 1"/>
  183.               <portSpacing port="source_example set" spacing="0"/>
  184.               <portSpacing port="sink_example set" spacing="0"/>
  185.               <portSpacing port="sink_result 1" spacing="0"/>
  186.               <portSpacing port="sink_result 2" spacing="0"/>
  187.               <portSpacing port="sink_result 3" spacing="0"/>
  188.             </process>
  189.             <description align="center" color="transparent" colored="false" width="126">This will work for every nominal attribute in your exampleset.</description>
  190.           </operator>
  191.           <operator activated="true" class="append" compatibility="6.4.000" expanded="true" height="76" name="InformationValues" width="90" x="380" y="75">
  192.             <description align="center" color="transparent" colored="false" width="126">Appends IV into a table</description>
  193.           </operator>
  194.           <connect from_port="in 1" to_op="WoE Table" to_port="example set"/>
  195.           <connect from_op="WoE Table" from_port="example set" to_port="out 1"/>
  196.           <connect from_op="WoE Table" from_port="result 1" to_op="InformationValues" to_port="example set 1"/>
  197.           <connect from_op="WoE Table" from_port="result 2" to_port="out 3"/>
  198.           <connect from_op="InformationValues" from_port="merged set" to_port="out 2"/>
  199.           <portSpacing port="source_in 1" spacing="0"/>
  200.           <portSpacing port="source_in 2" spacing="0"/>
  201.           <portSpacing port="sink_out 1" spacing="0"/>
  202.           <portSpacing port="sink_out 2" spacing="54"/>
  203.           <portSpacing port="sink_out 3" spacing="126"/>
  204.           <portSpacing port="sink_out 4" spacing="0"/>
  205.         </process>
  206.       </operator>
  207.       <connect from_op="Get Data" from_port="output" to_op="Discretize" to_port="example set input"/>
  208.       <connect from_op="Discretize" from_port="example set output" to_op="Create Pos &amp; Neg labels as attributes" to_port="example set input"/>
  209.       <connect from_op="Create Pos &amp; Neg labels as attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
  210.       <connect from_op="Filter Examples" from_port="example set output" to_op="Set Macros for Loop Att" to_port="through 1"/>
  211.       <connect from_op="Set Macros for Loop Att" from_port="through 1" to_op="Weight of Evidence Calculator" to_port="in 1"/>
  212.       <connect from_op="Weight of Evidence Calculator" from_port="out 1" to_port="result 1"/>
  213.       <connect from_op="Weight of Evidence Calculator" from_port="out 2" to_port="result 2"/>
  214.       <connect from_op="Weight of Evidence Calculator" from_port="out 3" to_port="result 3"/>
  215.       <portSpacing port="source_input 1" spacing="0"/>
  216.       <portSpacing port="sink_result 1" spacing="36"/>
  217.       <portSpacing port="sink_result 2" spacing="0"/>
  218.       <portSpacing port="sink_result 3" spacing="0"/>
  219.       <portSpacing port="sink_result 4" spacing="0"/>
  220.       <description align="center" color="yellow" colored="false" height="415" resized="true" width="757" x="10" y="10"/>
  221.       <description align="center" color="red" colored="true" height="105" resized="false" width="180" x="10" y="303">Make edits in the yellow area.</description>
  222.     </process>
  223.   </operator>
  224. </process>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement