Pages: [1]
  Print  
Author Topic: [SOLVED] Model from Generate Script generates no values  (Read 518 times)
aborg
Jr. Member
**
Posts: 62


WWW
« on: January 19, 2013, 09:59:08 PM »

Hi,

Here is my process:
Code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.2.008">
  <context>
    <input>
      <location>//Samples/data/Polynomial</location>
    </input>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.2.008" expanded="true" name="Process">
    <process expanded="true" height="251" width="681">
      <operator activated="true" class="multiply" compatibility="5.2.008" expanded="true" height="112" name="Multiply" width="90" x="45" y="30"/>
      <operator activated="true" class="execute_script" compatibility="5.2.008" expanded="true" height="76" name="Execute Script" width="90" x="246" y="30">
        <parameter key="script" value="import com.rapidminer.operator.features.transformation.PCAModel;&#10;/*String macroName = &quot;temp_path&quot;;&#10;Attribute macroValueAttribute = AttributeFactory.createAttribute(&quot;macroValue&quot;, com.rapidminer.tools.Ontology.NOMINAL);&#10;String macroValue = operator.getProcess().macroHandler.getMacro(macroName);&#10;&#10;macroValueAttribute.setMapping(&#10;     new com.rapidminer.example.table.PolynominalMapping(&#10;          //Collections.singletonMap(Integer.valueOf(0), macroValue)&#10;          [0:macroValue]&#10;          ));&#10;ExampleTable table = new MemoryExampleTable(macroValueAttribute);&#10;table.addDataRow(new IntArrayDataRow(0));&#10;ExampleSet ret = new SimpleExampleSet(table);&#10;&#10;//String macroValue = operator.getProcess().macroHandler.getMacro(macroName);&#10;return [ret] as ExampleSet[];*/&#10;ExampleSet exampleSet = input[0];&#10;int dim = exampleSet.getAttributes().size();&#10;/*double[] eigenValues = new double[dim];&#10;double[][] eigenVectors = new double[dim][dim];&#10;Random r = new Random(2);&#10;for (int i = dim; i--&gt;0;)&#10;{&#10;&#9;eigenValues[i] = 1.0;//11.0 - i;&#10;&#9;for (int j = dim; j--&gt;0;)&#10;&#9;&#9;eigenVectors[i][j] = 0.0;//r.nextDouble() - 0.5;&#10;&#9;eigenVectors[i][dim - i - 1] = 1;&#10;}*/&#10;Jama.Matrix m = com.rapidminer.tools.math.matrix.CovarianceMatrix.&#10;   getCovarianceMatrix(exampleSet);&#10;double[][] v = m.eig().getV().getArray();&#10;Model model = new PCAModel(exampleSet, /*eigenValues*/m.eig().getRealEigenvalues(), /*eigenVectors*/v);&#10;model.setParameter(&quot;keep_attribues&quot;, &quot;true&quot;);&#10;model.setParameter(&quot;dimensionality_reduction&quot;, &quot;none&quot;);&#10;model.setParameter(&quot;number_of_components&quot;, Integer.toString(dim));&#10;model.setParameter(&quot;variance_threshold&quot;, &quot;1.0&quot;);&#10;model.setNumberOfComponents(dim);&#10;return [model] as Model[];"/>
      </operator>
      <operator activated="true" class="principal_component_analysis" compatibility="5.2.008" expanded="true" height="94" name="PCA" width="90" x="246" y="120"/>
      <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model (2)" width="90" x="380" y="165">
        <list key="application_parameters"/>
      </operator>
      <operator activated="true" class="apply_model" compatibility="5.2.008" expanded="true" height="76" name="Apply Model" width="90" x="380" y="30">
        <list key="application_parameters"/>
      </operator>
      <connect from_port="input 1" to_op="Multiply" to_port="input"/>
      <connect from_op="Multiply" from_port="output 1" to_op="Execute Script" to_port="input 1"/>
      <connect from_op="Multiply" from_port="output 2" to_op="Apply Model" to_port="unlabelled data"/>
      <connect from_op="Multiply" from_port="output 3" to_op="PCA" to_port="example set input"/>
      <connect from_op="Execute Script" from_port="output 1" to_op="Apply Model" to_port="model"/>
      <connect from_op="PCA" from_port="example set output" to_port="result 2"/>
      <connect from_op="PCA" from_port="original" to_op="Apply Model (2)" to_port="unlabelled data"/>
      <connect from_op="PCA" from_port="preprocessing model" to_op="Apply Model (2)" to_port="model"/>
      <connect from_op="Apply Model (2)" from_port="labelled data" to_port="result 3"/>
      <connect from_op="Apply Model (2)" from_port="model" to_port="result 5"/>
      <connect from_op="Apply Model" from_port="labelled data" to_port="result 1"/>
      <connect from_op="Apply Model" from_port="model" to_port="result 4"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="source_input 2" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
      <portSpacing port="sink_result 4" spacing="0"/>
      <portSpacing port="sink_result 5" spacing="0"/>
      <portSpacing port="sink_result 6" spacing="0"/>
    </process>
  </operator>
</process>
Sorry, it is intentionally large for testing purposes. So, I create a PCAModel in Groov. It seems to be ok in the model output (as I see the same values comparing to the normal PCA operator's model). But when I try to apply that model to the same dataset, I get nothing, but missing values. I guess I did something obviously wrong, but I do not see where is the problem. Do you have idea?
Thanks, gabor

PS: RM 5.2 Community edition; I was not sure whether this is Development topic or not, so no hard feelings if this gets moved.
« Last Edit: January 19, 2013, 10:39:52 PM by aborg » Logged
aborg
Jr. Member
**
Posts: 62


WWW
« Reply #1 on: January 19, 2013, 10:39:34 PM »

Never mind... After some debugging there is a solution. I thought the Tools#onlyNumericalAttributes and Tools#onlyNonMissingValues calls are just checking some invariants, but it turned out those are computing some statistics too.
It is interesting that those stats (attribute means) are not checked in the model constructor, just saved. (Maybe a check for NaNs would not take too long and could report errors. Also, some Javadoc would help a bit.)
Logged
Pages: [1]
  Print  
 
Jump to: