Pages: [1]
  Print  
Author Topic: weighting operater discards zero-valued attributes - is there a workaround?  (Read 682 times)
ollestrat
Newbie
*
Posts: 9


« on: January 21, 2011, 12:10:18 PM »

Hi All,

the operator "Weight by Tree Importance" outputs only those attributes that actually have a weight. That means attributes with zero weigths (those which do not reached the minimum number of occurence at the Random Forest splitting nodes) are NOT listed.

Do you have an idea how to get the complete lists of attributes that even includes zero values? Otherwise it would be a nice manual work for me afterwards in loads of exported excel sheets. Could you point me in the right direction here?

Thanks in advance,

ollestrat
Logged
Sebastian Land
Administrator
Hero Member
*****
Posts: 2426


« Reply #1 on: February 10, 2011, 12:23:02 PM »

Hi,
this is of course a Bug in the Weights operator. Could you file a bug for this on our bugtracker?

I post a process that will help you avoid any manual work:

Code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.1.001" expanded="true" name="Process">
    <process expanded="true" height="449" width="681">
      <operator activated="true" class="generate_data" compatibility="5.1.001" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
        <parameter key="target_function" value="simple polynomial classification"/>
      </operator>
      <operator activated="true" class="multiply" compatibility="5.1.001" expanded="true" height="94" name="Multiply" width="90" x="179" y="30"/>
      <operator activated="true" class="random_forest" compatibility="5.1.001" expanded="true" height="76" name="Random Forest" width="90" x="313" y="30"/>
      <operator activated="true" breakpoints="after" class="weight_by_user_specification" compatibility="5.1.001" expanded="true" height="76" name="Weight by User Specification" width="90" x="313" y="165">
        <parameter key="normalize_weights" value="false"/>
        <list key="name_regex_to_weights"/>
        <parameter key="default_weight" value="0.0"/>
      </operator>
      <operator activated="true" breakpoints="after" class="weights_to_data" compatibility="5.1.001" expanded="true" height="60" name="Weights to Data (2)" width="90" x="447" y="165"/>
      <operator activated="true" class="weight_by_forest" compatibility="5.1.001" expanded="true" height="76" name="Weight by Tree Importance" width="90" x="447" y="30"/>
      <operator activated="true" class="weights_to_data" compatibility="5.1.001" expanded="true" height="60" name="Weights to Data" width="90" x="574" y="33"/>
      <operator activated="true" class="append" compatibility="5.1.001" expanded="true" height="94" name="Append" width="90" x="447" y="300"/>
      <operator activated="true" class="aggregate" compatibility="5.1.001" expanded="true" height="76" name="Aggregate" width="90" x="581" y="300">
        <list key="aggregation_attributes">
          <parameter key="Weight" value="sum"/>
        </list>
        <parameter key="group_by_attributes" value="Attribute"/>
      </operator>
      <connect from_op="Generate Data" from_port="output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Random Forest" to_port="training set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Weight by User Specification" to_port="example set"/>
      <connect from_op="Random Forest" from_port="model" to_op="Weight by Tree Importance" to_port="random forest"/>
      <connect from_op="Weight by User Specification" from_port="weights" to_op="Weights to Data (2)" to_port="attribute weights"/>
      <connect from_op="Weights to Data (2)" from_port="example set" to_op="Append" to_port="example set 2"/>
      <connect from_op="Weight by Tree Importance" from_port="weights" to_op="Weights to Data" to_port="attribute weights"/>
      <connect from_op="Weights to Data" from_port="example set" to_op="Append" to_port="example set 1"/>
      <connect from_op="Append" from_port="merged set" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>



Hope that helps until the next release Smiley

Greetings,
  Sebastian
Logged
Pages: [1]
  Print  
 
Jump to: