Pages: [1]
Author Topic: weighting operater discards zero-valued attributes - is there a workaround?  (Read 865 times)
Posts: 9

« on: January 21, 2011, 12:10:18 PM »

Hi All,

the operator "Weight by Tree Importance" outputs only those attributes that actually have a weight. That means attributes with zero weigths (those which do not reached the minimum number of occurence at the Random Forest splitting nodes) are NOT listed.

Do you have an idea how to get the complete lists of attributes that even includes zero values? Otherwise it would be a nice manual work for me afterwards in loads of exported excel sheets. Could you point me in the right direction here?

Thanks in advance,

Sebastian Land
Hero Member
Posts: 2426

« Reply #1 on: February 10, 2011, 12:23:02 PM »

this is of course a Bug in the Weights operator. Could you file a bug for this on our bugtracker?

I post a process that will help you avoid any manual work:

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.1.001">
  <operator activated="true" class="process" compatibility="5.1.001" expanded="true" name="Process">
    <process expanded="true" height="449" width="681">
      <operator activated="true" class="generate_data" compatibility="5.1.001" expanded="true" height="60" name="Generate Data" width="90" x="45" y="30">
        <parameter key="target_function" value="simple polynomial classification"/>
      <operator activated="true" class="multiply" compatibility="5.1.001" expanded="true" height="94" name="Multiply" width="90" x="179" y="30"/>
      <operator activated="true" class="random_forest" compatibility="5.1.001" expanded="true" height="76" name="Random Forest" width="90" x="313" y="30"/>
      <operator activated="true" breakpoints="after" class="weight_by_user_specification" compatibility="5.1.001" expanded="true" height="76" name="Weight by User Specification" width="90" x="313" y="165">
        <parameter key="normalize_weights" value="false"/>
        <list key="name_regex_to_weights"/>
        <parameter key="default_weight" value="0.0"/>
      <operator activated="true" breakpoints="after" class="weights_to_data" compatibility="5.1.001" expanded="true" height="60" name="Weights to Data (2)" width="90" x="447" y="165"/>
      <operator activated="true" class="weight_by_forest" compatibility="5.1.001" expanded="true" height="76" name="Weight by Tree Importance" width="90" x="447" y="30"/>
      <operator activated="true" class="weights_to_data" compatibility="5.1.001" expanded="true" height="60" name="Weights to Data" width="90" x="574" y="33"/>
      <operator activated="true" class="append" compatibility="5.1.001" expanded="true" height="94" name="Append" width="90" x="447" y="300"/>
      <operator activated="true" class="aggregate" compatibility="5.1.001" expanded="true" height="76" name="Aggregate" width="90" x="581" y="300">
        <list key="aggregation_attributes">
          <parameter key="Weight" value="sum"/>
        <parameter key="group_by_attributes" value="Attribute"/>
      <connect from_op="Generate Data" from_port="output" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Random Forest" to_port="training set"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Weight by User Specification" to_port="example set"/>
      <connect from_op="Random Forest" from_port="model" to_op="Weight by Tree Importance" to_port="random forest"/>
      <connect from_op="Weight by User Specification" from_port="weights" to_op="Weights to Data (2)" to_port="attribute weights"/>
      <connect from_op="Weights to Data (2)" from_port="example set" to_op="Append" to_port="example set 2"/>
      <connect from_op="Weight by Tree Importance" from_port="weights" to_op="Weights to Data" to_port="attribute weights"/>
      <connect from_op="Weights to Data" from_port="example set" to_op="Append" to_port="example set 1"/>
      <connect from_op="Append" from_port="merged set" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>

Hope that helps until the next release Smiley


Old World Computing - Expert Consulting and Training for RapidMiner
Pages: [1]
Jump to: