Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?xml version="1.0" encoding="UTF-8" standalone="no"?>
- <process version="6.4.000">
- <context>
- <input/>
- <output/>
- <macros/>
- </context>
- <operator activated="true" class="process" compatibility="6.4.000" expanded="true" name="Process">
- <process expanded="true">
- <operator activated="true" class="retrieve" compatibility="6.4.000" expanded="true" height="60" name="Get Data" width="90" x="45" y="30">
- <parameter key="repository_entry" value="../data/large_set"/>
- </operator>
- <operator activated="true" class="discretize_by_entropy" compatibility="6.4.000" expanded="true" height="94" name="Discretize" width="90" x="179" y="30">
- <description align="center" color="transparent" colored="false" width="126">Testing Entropy Discretization.</description>
- </operator>
- <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="Create Pos & Neg labels as attributes" width="90" x="380" y="30">
- <list key="function_descriptions">
- <parameter key="label_pos" value="if(LABELFIELD == "1.0",1,0)"/>
- <parameter key="label_neg" value="if(LABELFIELD == "0.0",1,0)"/>
- </list>
- <description align="center" color="transparent" colored="false" width="126">If not created already create two attributes holding values for positive and negative labels.</description>
- </operator>
- <operator activated="true" class="filter_examples" compatibility="6.4.000" expanded="true" height="94" name="Filter Examples" width="90" x="514" y="30">
- <parameter key="condition_class" value="all"/>
- <list key="filters_list">
- <parameter key="filters_entry_key" value="CTRYCD.equals."/>
- </list>
- <description align="center" color="transparent" colored="false" width="126">Use this filter to remove example values you do not want to appear in the WoE calculations. For example, values that deliver infinite results.</description>
- </operator>
- <operator activated="true" class="set_macros" compatibility="6.4.000" expanded="true" height="76" name="Set Macros for Loop Att" width="90" x="648" y="30">
- <list key="macros">
- <parameter key="label_pos" value="label_pos"/>
- <parameter key="label_neg" value="label_neg"/>
- </list>
- <description align="center" color="transparent" colored="false" width="126">Set the macro values for your positive and negative labels.</description>
- </operator>
- <operator activated="true" class="subprocess" compatibility="6.4.000" expanded="true" height="112" name="Weight of Evidence Calculator" width="90" x="782" y="30">
- <process expanded="true">
- <operator activated="true" class="loop_attributes" compatibility="6.4.000" expanded="true" height="112" name="WoE Table" width="90" x="45" y="30">
- <parameter key="attribute_filter_type" value="value_type"/>
- <parameter key="value_type" value="nominal"/>
- <parameter key="iteration_macro" value="woeAttribute"/>
- <process expanded="true">
- <operator activated="true" class="select_attributes" compatibility="6.4.000" expanded="true" height="76" name="Select Attributes" width="90" x="45" y="30">
- <parameter key="attribute_filter_type" value="subset"/>
- <parameter key="attributes" value="%{woeAttribute}|%{label_pos}|%{label_neg}"/>
- <parameter key="include_special_attributes" value="true"/>
- </operator>
- <operator activated="true" class="subprocess" compatibility="6.4.000" expanded="true" height="76" name="WoE Steps" width="90" x="179" y="120">
- <process expanded="true">
- <operator activated="true" class="rename" compatibility="6.4.000" expanded="true" height="76" name="Rename label classes" width="90" x="45" y="30">
- <parameter key="old_name" value="%{label_pos}"/>
- <parameter key="new_name" value="label_pos"/>
- <list key="rename_additional_attributes">
- <parameter key="%{label_neg}" value="label_neg"/>
- </list>
- <description align="center" color="transparent" colored="false" width="126">Rename label classes to label_pos &amp; label_neg</description>
- </operator>
- <operator activated="true" class="aggregate" compatibility="6.4.000" expanded="true" height="76" name="ForceAgg" width="90" x="179" y="30">
- <list key="aggregation_attributes">
- <parameter key="label_neg" value="sum"/>
- <parameter key="label_pos" value="sum"/>
- </list>
- <parameter key="group_by_attributes" value="%{woeAttribute}"/>
- </operator>
- <operator activated="true" class="rename" compatibility="6.4.000" expanded="true" height="76" name="Rename (2)" width="90" x="313" y="30">
- <parameter key="old_name" value="sum(label_pos)"/>
- <parameter key="new_name" value="label_pos"/>
- <list key="rename_additional_attributes">
- <parameter key="sum(label_neg)" value="label_neg"/>
- </list>
- </operator>
- <operator activated="true" class="subprocess" compatibility="6.4.000" expanded="true" height="76" name="WoE Calculation" width="90" x="447" y="30">
- <process expanded="true">
- <operator activated="true" class="generate_id" compatibility="6.4.000" expanded="true" height="76" name="Generate ID" width="90" x="45" y="30">
- <description align="center" color="transparent" colored="false" width="126">target attribute made into IDs to allow for easy join following WoE calculation.</description>
- </operator>
- <operator activated="true" class="multiply" compatibility="6.4.000" expanded="true" height="94" name="Multiply" width="90" x="179" y="30"/>
- <operator activated="true" class="aggregate" compatibility="6.4.000" expanded="true" height="76" name="Aggregate (2)" width="90" x="447" y="75">
- <list key="aggregation_attributes">
- <parameter key="label_neg" value="sum (fractional)"/>
- <parameter key="label_pos" value="sum (fractional)"/>
- </list>
- <parameter key="group_by_attributes" value="id"/>
- </operator>
- <operator activated="true" class="rename_by_replacing" compatibility="6.4.000" expanded="true" height="76" name="tidy names 1" width="90" x="581" y="75">
- <parameter key="replace_what" value="fractional_sum"/>
- <parameter key="replace_by" value="distribution_"/>
- </operator>
- <operator activated="true" class="rename_by_replacing" compatibility="6.4.000" expanded="true" height="76" name="tidy names 2" width="90" x="715" y="75"/>
- <operator activated="true" class="generate_attributes" compatibility="6.4.000" expanded="true" height="76" name="WoE Generation" width="90" x="849" y="75">
- <list key="function_descriptions">
- <parameter key="WoE" value="ln(distribution_label_pos/distribution_label_neg)"/>
- <parameter key="%{woeAttribute}_MarginalContribution" value="(distribution_label_pos-distribution_label_neg)*WoE"/>
- </list>
- </operator>
- <operator activated="true" class="set_role" compatibility="6.4.000" expanded="true" height="76" name="Set Role" width="90" x="983" y="75">
- <parameter key="attribute_name" value="id"/>
- <parameter key="target_role" value="id"/>
- <list key="set_additional_roles"/>
- <description align="center" color="transparent" colored="false" width="126">Set id as id</description>
- </operator>
- <operator activated="true" class="rename" compatibility="6.4.000" expanded="true" height="76" name="Rename WoE" width="90" x="1117" y="75">
- <parameter key="old_name" value="WoE"/>
- <parameter key="new_name" value="%{woeAttribute}_WoE"/>
- <list key="rename_additional_attributes"/>
- <description align="center" color="transparent" colored="false" width="126">Rename here rather than calculation to ensure bad attribute names do not cause errors in the process.</description>
- </operator>
- <operator activated="true" class="select_attributes" compatibility="6.4.000" expanded="true" height="76" name="Select Attributes (2)" width="90" x="1251" y="75">
- <parameter key="attribute_filter_type" value="subset"/>
- <parameter key="attributes" value="distribution_label_neg|distribution_label_pos"/>
- <parameter key="invert_selection" value="true"/>
- <description align="center" color="transparent" colored="false" width="126">Remove unnecessary attributes</description>
- </operator>
- <operator activated="true" class="join" compatibility="6.4.000" expanded="true" height="76" name="WoE Table (2)" width="90" x="1385" y="30">
- <list key="key_attributes"/>
- </operator>
- <connect from_port="in 1" to_op="Generate ID" to_port="example set input"/>
- <connect from_op="Generate ID" from_port="example set output" to_op="Multiply" to_port="input"/>
- <connect from_op="Multiply" from_port="output 1" to_op="WoE Table (2)" to_port="left"/>
- <connect from_op="Multiply" from_port="output 2" to_op="Aggregate (2)" to_port="example set input"/>
- <connect from_op="Aggregate (2)" from_port="example set output" to_op="tidy names 1" to_port="example set input"/>
- <connect from_op="tidy names 1" from_port="example set output" to_op="tidy names 2" to_port="example set input"/>
- <connect from_op="tidy names 2" from_port="example set output" to_op="WoE Generation" to_port="example set input"/>
- <connect from_op="WoE Generation" from_port="example set output" to_op="Set Role" to_port="example set input"/>
- <connect from_op="Set Role" from_port="example set output" to_op="Rename WoE" to_port="example set input"/>
- <connect from_op="Rename WoE" from_port="example set output" to_op="Select Attributes (2)" to_port="example set input"/>
- <connect from_op="Select Attributes (2)" from_port="example set output" to_op="WoE Table (2)" to_port="right"/>
- <connect from_op="WoE Table (2)" from_port="join" to_port="out 1"/>
- <portSpacing port="source_in 1" spacing="0"/>
- <portSpacing port="source_in 2" spacing="0"/>
- <portSpacing port="sink_out 1" spacing="0"/>
- <portSpacing port="sink_out 2" spacing="0"/>
- </process>
- <description align="center" color="transparent" colored="false" width="126">Requirements.<br>Dataset containing target variable name (define by macro), a label.</description>
- </operator>
- <connect from_port="in 1" to_op="Rename label classes" to_port="example set input"/>
- <connect from_op="Rename label classes" from_port="example set output" to_op="ForceAgg" to_port="example set input"/>
- <connect from_op="ForceAgg" from_port="example set output" to_op="Rename (2)" to_port="example set input"/>
- <connect from_op="Rename (2)" from_port="example set output" to_op="WoE Calculation" to_port="in 1"/>
- <connect from_op="WoE Calculation" from_port="out 1" to_port="out 1"/>
- <portSpacing port="source_in 1" spacing="0"/>
- <portSpacing port="source_in 2" spacing="0"/>
- <portSpacing port="sink_out 1" spacing="0"/>
- <portSpacing port="sink_out 2" spacing="0"/>
- </process>
- <description align="center" color="transparent" colored="false" width="126">This process takes a dataset of the form:<br><br>[ attribute | class_pos | class_neg ]<br>and calculates the Weight of Evidence &amp; Information Value for this attribute. This is a process that would normally be called inside a loop attributes and with additional aggregation steps applied prior.<br></description>
- </operator>
- <operator activated="true" class="aggregate" compatibility="6.4.000" expanded="true" height="76" name="Aggregate" width="90" x="380" y="120">
- <list key="aggregation_attributes">
- <parameter key="%{woeAttribute}_MarginalContribution" value="sum"/>
- </list>
- </operator>
- <operator activated="true" class="subprocess" compatibility="6.4.000" expanded="true" height="76" name="Tidy up" width="90" x="581" y="75">
- <process expanded="true">
- <operator activated="true" class="rename_by_replacing" compatibility="6.4.000" expanded="true" height="76" name="Rename by Replacing" width="90" x="45" y="30">
- <parameter key="replace_what" value="(^.*)"/>
- <parameter key="replace_by" value="%{woeAttribute}"/>
- </operator>
- <operator activated="true" class="transpose" compatibility="6.4.000" expanded="true" height="76" name="Transpose" width="90" x="179" y="30"/>
- <operator activated="true" class="rename" compatibility="6.4.000" expanded="true" height="76" name="tidy att1 name" width="90" x="313" y="30">
- <parameter key="old_name" value="att_1"/>
- <parameter key="new_name" value="InformationValue"/>
- <list key="rename_additional_attributes"/>
- </operator>
- <connect from_port="in 1" to_op="Rename by Replacing" to_port="example set input"/>
- <connect from_op="Rename by Replacing" from_port="example set output" to_op="Transpose" to_port="example set input"/>
- <connect from_op="Transpose" from_port="example set output" to_op="tidy att1 name" to_port="example set input"/>
- <connect from_op="tidy att1 name" from_port="example set output" to_port="out 1"/>
- <portSpacing port="source_in 1" spacing="0"/>
- <portSpacing port="source_in 2" spacing="0"/>
- <portSpacing port="sink_out 1" spacing="0"/>
- <portSpacing port="sink_out 2" spacing="0"/>
- </process>
- </operator>
- <connect from_port="example set" to_op="Select Attributes" to_port="example set input"/>
- <connect from_op="Select Attributes" from_port="example set output" to_op="WoE Steps" to_port="in 1"/>
- <connect from_op="Select Attributes" from_port="original" to_port="example set"/>
- <connect from_op="WoE Steps" from_port="out 1" to_op="Aggregate" to_port="example set input"/>
- <connect from_op="Aggregate" from_port="example set output" to_op="Tidy up" to_port="in 1"/>
- <connect from_op="Aggregate" from_port="original" to_port="result 2"/>
- <connect from_op="Tidy up" from_port="out 1" to_port="result 1"/>
- <portSpacing port="source_example set" spacing="0"/>
- <portSpacing port="sink_example set" spacing="0"/>
- <portSpacing port="sink_result 1" spacing="0"/>
- <portSpacing port="sink_result 2" spacing="0"/>
- <portSpacing port="sink_result 3" spacing="0"/>
- </process>
- <description align="center" color="transparent" colored="false" width="126">This will work for every nominal attribute in your exampleset.</description>
- </operator>
- <operator activated="true" class="append" compatibility="6.4.000" expanded="true" height="76" name="InformationValues" width="90" x="380" y="75">
- <description align="center" color="transparent" colored="false" width="126">Appends IV into a table</description>
- </operator>
- <connect from_port="in 1" to_op="WoE Table" to_port="example set"/>
- <connect from_op="WoE Table" from_port="example set" to_port="out 1"/>
- <connect from_op="WoE Table" from_port="result 1" to_op="InformationValues" to_port="example set 1"/>
- <connect from_op="WoE Table" from_port="result 2" to_port="out 3"/>
- <connect from_op="InformationValues" from_port="merged set" to_port="out 2"/>
- <portSpacing port="source_in 1" spacing="0"/>
- <portSpacing port="source_in 2" spacing="0"/>
- <portSpacing port="sink_out 1" spacing="0"/>
- <portSpacing port="sink_out 2" spacing="54"/>
- <portSpacing port="sink_out 3" spacing="126"/>
- <portSpacing port="sink_out 4" spacing="0"/>
- </process>
- </operator>
- <connect from_op="Get Data" from_port="output" to_op="Discretize" to_port="example set input"/>
- <connect from_op="Discretize" from_port="example set output" to_op="Create Pos & Neg labels as attributes" to_port="example set input"/>
- <connect from_op="Create Pos & Neg labels as attributes" from_port="example set output" to_op="Filter Examples" to_port="example set input"/>
- <connect from_op="Filter Examples" from_port="example set output" to_op="Set Macros for Loop Att" to_port="through 1"/>
- <connect from_op="Set Macros for Loop Att" from_port="through 1" to_op="Weight of Evidence Calculator" to_port="in 1"/>
- <connect from_op="Weight of Evidence Calculator" from_port="out 1" to_port="result 1"/>
- <connect from_op="Weight of Evidence Calculator" from_port="out 2" to_port="result 2"/>
- <connect from_op="Weight of Evidence Calculator" from_port="out 3" to_port="result 3"/>
- <portSpacing port="source_input 1" spacing="0"/>
- <portSpacing port="sink_result 1" spacing="36"/>
- <portSpacing port="sink_result 2" spacing="0"/>
- <portSpacing port="sink_result 3" spacing="0"/>
- <portSpacing port="sink_result 4" spacing="0"/>
- <description align="center" color="yellow" colored="false" height="415" resized="true" width="757" x="10" y="10"/>
- <description align="center" color="red" colored="true" height="105" resized="false" width="180" x="10" y="303">Make edits in the yellow area.</description>
- </process>
- </operator>
- </process>
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement