Pages: [1]
  Print  
Author Topic: [SOLVED] Is here someone can help me about operator "pivot"  (Read 436 times)
njjzb2006
Newbie
*
Posts: 4


« on: August 14, 2013, 04:03:53 AM »

1.my origin data like this:
FORMULA_ID   HERB_ID
15   65
15   152
16   96
16   185
17   96
17   44
18   29
19   59
19   92
20   33
20   44
...     ...
2.i want to use operator "pivot" to trans data("FORMULA_ID" as group attr,"HERB_ID" as index arrt) for next Association data mind.The attr "HERB_ID" has >50 different values,then the operator "pivot" can't work well,the resoult just can create only  one regular attr which use the group attr.

while the "pivot" description say:
The resultant ExampleSet has m regular attributes in addition to the group attribute where m is the number of unique values of the index attribute.  

Why?Dose there has any limit number of unique index attrs in the version of rapidminer?(My rapidminer version is 5.3.012 for student. Huh)

Code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.012">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.012" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="5.3.012" expanded="true" height="60" name="伤寒方" width="90" x="45" y="75">
        <parameter key="repository_entry" value="//Local Repository/data/伤寒方"/>
      </operator>
      <operator activated="true" class="pivot" compatibility="5.3.012" expanded="true" height="76" name="Pivot" width="90" x="179" y="75">
        <parameter key="group_attribute" value="FORMULA_ID"/>
        <parameter key="index_attribute" value="HERB_ID"/>
      </operator>
      <connect from_op="伤寒方" from_port="output" to_op="Pivot" to_port="example set input"/>
      <connect from_op="Pivot" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>
hope for your help,thanks

the whole data:
FORMULA_ID,HERB_ID
15,65
15,63
15,64
15,152
16,96
16,185
17,96
17,44
17,45
17,149
18,24
18,25
18,153
18,29
19,59
19,92
20,33
20,44
20,147
20,29
20,59
21,59
23,59
23,45
23,44
22,59
24,24
24,147
24,153
24,28
24,96
24,25
25,25
25,153
25,147
25,29
26,59
26,92
27,33
27,44
27,149
27,153
27,29
28,45
29,74
29,75
29,151
29,73
29,25
30,28
30,96
30,45
30,158
30,186
30,83
30,25
30,149
30,46
31,86
32,151
32,29
32,153
33,153
33,96
34,153
35,153
35,151
35,25
36,153
36,44
36,96
36,45
37,153
37,96
37,149
38,153
38,96
39,153
39,102
39,147
39,33
40,29
40,153
40,149
40,96
40,44
40,45
41,54
41,46
41,45
41,48
42,39
42,37
42,153
42,149
43,39
43,37
43,153
44,96
45,96
46,135
46,62
46,127
47,178
47,81
48,44
48,96
48,149
48,153
48,45
49,125
49,25
49,153
50,147
50,153
51,147
51,153
52,158
52,147
52,153
52,72
52,25
52,28
52,29
53,158
53,25
53,147
53,28
53,153
53,72
54,43
54,37
54,149
54,153
55,174
55,173
56,149
56,29
57,148
57,75
57,140
57,137
57,138
57,40
57,187
58,73
58,149
58,151
58,147
59,59
59,45
59,44
61,120
61,121
61,107
61,59
62,120
62,121
62,107
62,59
63,153
63,29
63,149
63,25
63,159
63,188
64,77
64,59
65,25
66,73
66,149
66,153
66,96
65,29
65,73
65,153
67,25
67,153
67,73
68,73
68,25
68,151
68,153
70,42
70,96
71,42
71,153
71,189
72,42
72,29
72,189
73,42
73,92
73,102
74,42
74,189
75,153
75,46
75,190
76,92
76,29
76,153
76,149
77,73
77,147
77,29
77,151
78,25
78,147
78,24
78,29
78,130
78,153
78,152
79,25
79,147
79,24
79,153
79,29
79,37
80,25
80,153
80,151
80,149
80,96
81,25
81,153
82,25
82,153
82,29
82,148
82,140
82,142
83,25
83,153
84,147
84,153
84,29
84,151
84,73
85,25
85,153
85,148
85,142
86,25
86,153
87,25
87,59
87,147
87,29
87,153
88,25
88,147
88,153
88,149
88,29
89,25
89,147
89,153
90,25
90,147
90,153
91,25
91,153
91,147
91,92
92,25
92,147
92,29
92,153
93,35
93,147
93,153
93,25
94,25
94,147
94,153
94,29
95,25
95,153
96,25
96,147
96,153
96,24
97,135
97,153
98,173
99,59
99,25
99,153
99,60
100,33
100,142
100,44
100,181
100,149
100,25
100,73
100,59
100,148
101,33
101,44
101,149
101,153
101,125
101,60
102,33
102,25
102,96
102,40
102,44
102,148
102,153
103,25
103,44
103,149
103,153
103,147
103,33
104,191
105,59
105,153
106,153
106,96
108,149
108,96
108,153
108,151
107,153
107,96
109,44
109,147
109,153
110,44
110,147
110,153
111,45
111,153
111,96
111,25
111,149
112,45
112,44
112,147
112,159
113,74
113,73
113,75
113,159
113,71
114,169
115,147
115,102
115,59
116,24
116,39
116,44
116,147
116,161
116,25
116,73
116,153
116,37
116,151
116,96
116,164
117,24
117,25
117,153
118,24
118,153
118,37
119,24
119,153
119,37
120,24
120,29
120,153
121,153
121,24
122,24
122,28
123,149
123,29
123,153
123,192
124,35
124,24
124,153
124,147
124,25
125,35
125,24
125,25
125,153
125,147
126,35
126,153
126,44
126,45
« Last Edit: August 23, 2013, 01:25:27 PM by Marius » Logged
marcin.blachnik
Newbie
*
Posts: 35


« Reply #1 on: August 14, 2013, 04:38:42 PM »

Hi
You are missing one more attribute. The Group by attribute creates rows, Index attribute defines columns, and you miss what should be placed inside the exampleset after pivoting

For example you can do something like this:
Code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.008" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_csv" compatibility="5.3.008" expanded="true" height="60" name="Read CSV" width="90" x="112" y="130">
        <parameter key="csv_file" value="D:\Marcin\Desktop\data.csv"/>
        <parameter key="column_separators" value=","/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <parameter key="encoding" value="windows-1250"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="FORMULA_ID.true.integer.attribute"/>
          <parameter key="1" value="HERB_ID.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="5.3.008" expanded="true" height="76" name="Generate Attributes" width="90" x="241" y="132">
        <list key="function_descriptions">
          <parameter key="Const" value="1"/>
        </list>
      </operator>
      <operator activated="true" class="pivot" compatibility="5.3.008" expanded="true" height="76" name="Pivot" width="90" x="514" y="120">
        <parameter key="group_attribute" value="FORMULA_ID"/>
        <parameter key="index_attribute" value="HERB_ID"/>
        <parameter key="consider_weights" value="false"/>
        <parameter key="skip_constant_attributes" value="false"/>
      </operator>
      <connect from_op="Read CSV" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Pivot" to_port="example set input"/>
      <connect from_op="Pivot" from_port="example set output" to_port="result 1"/>
      <connect from_op="Pivot" from_port="original" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="90"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>


best Marcin
Logged
njjzb2006
Newbie
*
Posts: 4


« Reply #2 on: August 14, 2013, 05:01:44 PM »

Hi
You are missing one more attribute. The Group by attribute creates rows, Index attribute defines columns, and you miss what should be placed inside the exampleset after pivoting

For example you can do something like this:
Code:
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="5.3.008">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="5.3.008" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="read_csv" compatibility="5.3.008" expanded="true" height="60" name="Read CSV" width="90" x="112" y="130">
        <parameter key="csv_file" value="D:\Marcin\Desktop\data.csv"/>
        <parameter key="column_separators" value=","/>
        <parameter key="first_row_as_names" value="false"/>
        <list key="annotations">
          <parameter key="0" value="Name"/>
        </list>
        <parameter key="encoding" value="windows-1250"/>
        <list key="data_set_meta_data_information">
          <parameter key="0" value="FORMULA_ID.true.integer.attribute"/>
          <parameter key="1" value="HERB_ID.true.integer.attribute"/>
        </list>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="5.3.008" expanded="true" height="76" name="Generate Attributes" width="90" x="241" y="132">
        <list key="function_descriptions">
          <parameter key="Const" value="1"/>
        </list>
      </operator>
      <operator activated="true" class="pivot" compatibility="5.3.008" expanded="true" height="76" name="Pivot" width="90" x="514" y="120">
        <parameter key="group_attribute" value="FORMULA_ID"/>
        <parameter key="index_attribute" value="HERB_ID"/>
        <parameter key="consider_weights" value="false"/>
        <parameter key="skip_constant_attributes" value="false"/>
      </operator>
      <connect from_op="Read CSV" from_port="output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Pivot" to_port="example set input"/>
      <connect from_op="Pivot" from_port="example set output" to_port="result 1"/>
      <connect from_op="Pivot" from_port="original" to_port="result 2"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="90"/>
      <portSpacing port="sink_result 2" spacing="0"/>
      <portSpacing port="sink_result 3" spacing="0"/>
    </process>
  </operator>
</process>


best Marcin
Thank u very much!It works well now!
 :)jzb
Logged
Pages: [1]
  Print  
 
Jump to: