Rapid Miner 行最大值

Rapid Miner Row Maximum

抱歉,我是 RapidMiner 的新手,只做了基础教程。

我有一个像

这样的数据集
MatchID   Value1   Value2   Value3
1            5        1        2
1           4.5      1.5       2
...

并想知道是否有可能获得每列的最高值(例如 Value1)并用它进行进一步计算(生成属性)。

谢谢。

碰巧有很多方法。这是一个使用 Aggregate 运算符找到最大值的方法,Join 将其与原始值合并,并使用 Generate Attributes 进行一些计算。

<?xml version="1.0" encoding="UTF-8"?><process version="7.2.003">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.2.003" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.2.003" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
    <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="aggregate" compatibility="7.2.003" expanded="true" height="82" name="Aggregate" width="90" x="179" y="34">
    <parameter key="use_default_aggregation" value="true"/>
    <parameter key="default_aggregation_function" value="maximum"/>
    <list key="aggregation_attributes"/>
      </operator>
      <operator activated="true" class="join" compatibility="7.2.003" expanded="true" height="82" name="Join" width="90" x="313" y="34">
    <parameter key="join_type" value="outer"/>
    <parameter key="use_id_attribute_as_key" value="false"/>
    <list key="key_attributes"/>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="7.2.003" expanded="true" height="82" name="Generate Attributes" width="90" x="447" y="34">
    <list key="function_descriptions">
      <parameter key="deltaA1" value="[maximum(a1)]-a1"/>
      <parameter key="deltaA2" value="[maximum(a2)]-a2"/>
      <parameter key="deltaA3" value="[maximum(a3)]-a3"/>
      <parameter key="deltaA4" value="[maximum(a4)]-a4"/>
    </list>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Aggregate" to_port="example set input"/>
      <connect from_op="Aggregate" from_port="example set output" to_op="Join" to_port="left"/>
      <connect from_op="Aggregate" from_port="original" to_op="Join" to_port="right"/>
      <connect from_op="Join" from_port="join" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

另一种方法是使用 Extract Macro 运算符和 statistics 设置 max。这将给定属性的最大值存储为宏值,然后可以使用,例如在 Generate Attributes.

优点是您无需修改​​原始数据集,也不必使用 joinmultiply 运算符。

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.5.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.5.000" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="7.5.000" expanded="true" height="68" name="Extract Macro" width="90" x="179" y="34">
        <parameter key="macro" value="maxA1"/>
        <parameter key="macro_type" value="statistics"/>
        <parameter key="statistics" value="max"/>
        <parameter key="attribute_name" value="a1"/>
        <list key="additional_macros"/>
        <description align="center" color="transparent" colored="false" width="126">extract maximum of attribute a1 and store it in a macro</description>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="7.5.000" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="34">
        <list key="function_descriptions">
          <parameter key="DifferenceA1" value="parse(%{maxA1})-a1"/>
        </list>
        <description align="center" color="transparent" colored="false" width="126">calculate the difference of a1 from the maximum using the macro value</description>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Extract Macro" to_port="example set"/>
      <connect from_op="Extract Macro" from_port="example set" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

提示: 由于宏值以文本形式存储,您首先必须 parse 它们才能使用它们的数值。

第三个选项是 Sort 示例集,仅保留带有 Filter Example Range 运算符的最大值的示例。如果您最感兴趣的是其他属性的值,当某个属性最大时,这会派上用场。

<?xml version="1.0" encoding="UTF-8"?><process version="7.5.000">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.5.000" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="retrieve" compatibility="7.5.000" expanded="true" height="68" name="Retrieve Iris" width="90" x="45" y="34">
        <parameter key="repository_entry" value="//Samples/data/Iris"/>
      </operator>
      <operator activated="true" class="sort" compatibility="7.5.000" expanded="true" height="82" name="Sort" width="90" x="179" y="34">
        <parameter key="attribute_name" value="a1"/>
        <parameter key="sorting_direction" value="decreasing"/>
        <description align="center" color="transparent" colored="false" width="126">sorting the example set on a1 decreasing</description>
      </operator>
      <operator activated="true" class="filter_example_range" compatibility="7.5.000" expanded="true" height="82" name="Filter Example Range" width="90" x="313" y="34">
        <parameter key="first_example" value="1"/>
        <parameter key="last_example" value="1"/>
        <description align="center" color="transparent" colored="false" width="126">only keeping the first example, which has the maximum for a1</description>
      </operator>
      <connect from_op="Retrieve Iris" from_port="output" to_op="Sort" to_port="example set input"/>
      <connect from_op="Sort" from_port="example set output" to_op="Filter Example Range" to_port="example set input"/>
      <connect from_op="Filter Example Range" from_port="example set output" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>