Pages: [1]
  Print  
Author Topic: [SOLVED] Reduce number of nominal classes in attribute  (Read 423 times)
ScottD
Newbie
*
Posts: 10


« on: May 06, 2013, 10:24:44 AM »

I have a nominal attribute with a lot of infrequently used classes in it.

How can I include only the ten most frequently occuring classes and classify the rest as "Other"?

Thanks.
« Last Edit: May 23, 2013, 10:13:17 AM by Marius » Logged
Marcin
Global Moderator
Full Member
*****
Posts: 165


« Reply #1 on: May 07, 2013, 08:31:57 AM »

Hi,

There is no single operator for this task, but you could do it with the combination of several operator. See my attached process as an example how to do this.

Have fun
  Marcin

Code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.008" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="generate_nominal_data" compatibility="5.3.008" expanded="true" height="60" name="Generate Nominal Data" width="90" x="45" y="30">
        <parameter key="number_of_attributes" value="1"/>
        <parameter key="number_of_values" value="20"/>
      </operator>
      <operator activated="true" class="subprocess" compatibility="5.3.008" expanded="true" height="76" name="Reduce values" width="90" x="246" y="30">
        <process expanded="true">
          <operator activated="true" class="aggregate" compatibility="5.3.008" expanded="true" height="76" name="Aggregate" width="90" x="45" y="30">
            <list key="aggregation_attributes">
              <parameter key="att1" value="count"/>
            </list>
            <parameter key="group_by_attributes" value="|att1"/>
          </operator>
          <operator activated="true" class="sort" compatibility="5.3.008" expanded="true" height="76" name="Sort" width="90" x="246" y="30">
            <parameter key="attribute_name" value="count(att1)"/>
            <parameter key="sorting_direction" value="decreasing"/>
          </operator>
          <operator activated="true" class="generate_id" compatibility="5.3.008" expanded="true" height="76" name="Generate ID" width="90" x="380" y="30"/>
          <operator activated="true" class="join" compatibility="5.3.008" expanded="true" height="76" name="Join" width="90" x="313" y="210">
            <parameter key="use_id_attribute_as_key" value="false"/>
            <list key="key_attributes">
              <parameter key="att1" value="att1"/>
            </list>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="5.3.008" expanded="true" height="76" name="Generate Attributes" width="90" x="447" y="210">
            <list key="function_descriptions">
              <parameter key="att1" value="if(id&gt;10, &quot;Other&quot;, att1)"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="5.3.008" expanded="true" height="76" name="Select Attributes" width="90" x="581" y="30">
            <parameter key="attribute_filter_type" value="subset"/>
            <parameter key="attributes" value="|count(att1)|id"/>
            <parameter key="invert_selection" value="true"/>
            <parameter key="include_special_attributes" value="true"/>
          </operator>
          <connect from_port="in 1" to_op="Aggregate" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="example set output" to_op="Sort" to_port="example set input"/>
          <connect from_op="Aggregate" from_port="original" to_op="Join" to_port="right"/>
          <connect from_op="Sort" from_port="example set output" to_op="Generate ID" to_port="example set input"/>
          <connect from_op="Generate ID" from_port="example set output" to_op="Join" to_port="left"/>
          <connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
          <connect from_op="Generate Attributes" from_port="example set output" to_op="Select Attributes" to_port="example set input"/>
          <connect from_op="Select Attributes" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <connect from_op="Generate Nominal Data" from_port="output" to_op="Reduce values" to_port="in 1"/>
      <connect from_op="Reduce values" from_port="out 1" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
Logged
ScottD
Newbie
*
Posts: 10


« Reply #2 on: May 13, 2013, 08:54:54 AM »

Wow, thanks so much for this Marcin...

I figured there was a way to do it, I just need to get a better handle on how to use all of the RM operators. 

I really appreciate the help.

Logged
Pages: [1]
  Print  
 
Jump to: