Pages: [1]
  Print  
Author Topic: [SOLVED] Remove Records with more than 3 missing values (in attributes)  (Read 406 times)
faridehbagherzadeh
Newbie
*
Posts: 24


« on: May 18, 2013, 07:48:33 PM »

Hello
I`m new to Rapid miner and I`ve confronted a problem,
Would you plz let me know how I can remove the records with more than 3 missing values.
I was trying to use the Parameter String in Filter Example, But I couldn`t write the right Condition
Thanks in advance
Farideh
« Last Edit: July 22, 2013, 12:33:15 PM by Marius » Logged
Marcin
Global Moderator
Full Member
*****
Posts: 165


« Reply #1 on: May 21, 2013, 09:01:30 AM »

Hey,

The "Filter Example" operator is correct, but you need to create a new attribute which counts the missings of this example before and filter for this single attribute. See the proces below for an example.

To get this count you have to use a few operators which I have collected within a subprocess called "Count missings". It is easy to get the count of non-missings, but to get the missings you have to substract the total number of attributes which you can get via the "Extract macro" operator.

Code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.009">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.009" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="subprocess" compatibility="5.3.009" expanded="true" height="76" name="Input" width="90" x="45" y="30">
        <process expanded="true">
          <operator activated="true" class="generate_nominal_data" compatibility="5.3.009" expanded="true" height="60" name="Generate Nominal Data" width="90" x="45" y="30">
            <parameter key="number_of_values" value="3"/>
          </operator>
          <operator activated="true" class="declare_missing_value" compatibility="5.3.009" expanded="true" height="76" name="Declare Missing Value" width="90" x="179" y="30">
            <parameter key="mode" value="nominal"/>
            <parameter key="nominal_value" value="value0"/>
          </operator>
          <connect from_op="Generate Nominal Data" from_port="output" to_op="Declare Missing Value" to_port="example set input"/>
          <connect from_op="Declare Missing Value" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="subprocess" compatibility="5.3.009" expanded="true" height="76" name="Count missings" width="90" x="179" y="30">
        <process expanded="true">
          <operator activated="true" class="generate_aggregation" compatibility="5.3.009" expanded="true" height="76" name="Count non-missings" width="90" x="45" y="30">
            <parameter key="attribute_name" value="count"/>
            <parameter key="include_special_attributes" value="true"/>
            <parameter key="aggregation_function" value="count"/>
          </operator>
          <operator activated="true" class="extract_macro" compatibility="5.3.009" expanded="true" height="60" name="Extract attribute count" width="90" x="179" y="30">
            <parameter key="macro" value="attributes"/>
            <parameter key="macro_type" value="number_of_attributes"/>
            <list key="additional_macros"/>
          </operator>
          <operator activated="true" class="generate_attributes" compatibility="5.3.009" expanded="true" height="76" name="Compute missings" width="90" x="313" y="30">
            <list key="function_descriptions">
              <parameter key="missings" value="parse(macro(&quot;attributes&quot;)) - count"/>
            </list>
          </operator>
          <operator activated="true" class="select_attributes" compatibility="5.3.009" expanded="true" height="76" name="Remove count" width="90" x="447" y="30">
            <parameter key="attribute_filter_type" value="single"/>
            <parameter key="attribute" value="count"/>
            <parameter key="invert_selection" value="true"/>
          </operator>
          <connect from_port="in 1" to_op="Count non-missings" to_port="example set input"/>
          <connect from_op="Count non-missings" from_port="example set output" to_op="Extract attribute count" to_port="example set"/>
          <connect from_op="Extract attribute count" from_port="example set" to_op="Compute missings" to_port="example set input"/>
          <connect from_op="Compute missings" from_port="example set output" to_op="Remove count" to_port="example set input"/>
          <connect from_op="Remove count" from_port="example set output" to_port="out 1"/>
          <portSpacing port="source_in 1" spacing="0"/>
          <portSpacing port="source_in 2" spacing="0"/>
          <portSpacing port="sink_out 1" spacing="0"/>
          <portSpacing port="sink_out 2" spacing="0"/>
        </process>
      </operator>
      <operator activated="true" class="filter_examples" compatibility="5.3.009" expanded="true" height="76" name="Filter Examples" width="90" x="313" y="30">
        <parameter key="condition_class" value="attribute_value_filter"/>
        <parameter key="parameter_string" value="missings&gt;3"/>
        <parameter key="invert_filter" value="true"/>
      </operator>
      <connect from_op="Input" from_port="out 1" to_op="Count missings" to_port="in 1"/>
      <connect from_op="Count missings" from_port="out 1" to_op="Filter Examples" to_port="example set input"/>
      <connect from_op="Filter Examples" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
Logged
faridehbagherzadeh
Newbie
*
Posts: 24


« Reply #2 on: July 16, 2013, 12:42:37 PM »

Hey!
Thanks for the great help!
I used the code and that was a wonderful one!
Logged
Pages: [1]
  Print  
 
Jump to: