根据分类器结果使用 rapidminer 将文件分类到文件夹中

Sorting files into folders using rapidminer based on a classifier results

我需要帮助对 rapidminer 分类到标签(文件夹)中的文件进行排序,这个任务在 rapidminer 或读取结果示例集的 java 代码中是否可行? 这是结果 table

即:我只想将文件拆分到代表其标签的文件夹中

这是示例集

    Data: SimpleExampleSet: 15 examples, 31988 regular attributes, 
special attributes = { label = #0: label (polynominal/single_value)/values=[test1] 
metadata_file = #1: metadata_file (polynominal/single_value)/values=[0.txt, 1.txt, 10.txt, 11.txt, 12.txt, 13.txt, 14.txt, 2.txt, 3.txt, 4.txt, 5.txt, 6.txt, 7.txt, 8.txt, 9.txt] 
metadata_path = #2: metadata_path (polynominal/single_value)/values=[D:\Finaltests\test1[=10=].txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt] 
metadata_date = #3: metadata_date (date_time/single_value) 
confidence_sport = #31993: confidence(sport) (real/single_value) 
confidence_places = #31994: confidence(places) (real/single_value) 
prediction = #31992: prediction(label) (binominal/single_value) }

谢谢。

这个过程比我最初所说的稍微复杂一些,所以我在下面提供了一个示例。它假定 Linux 并将任何文件从 /tmp/old 复制到 /tmp/new/A/tmp/new/BAB由标签决定。

<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
  <context>
    <input/>
    <output/>
    <macros/>
  </context>
  <operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
    <process expanded="true">
      <operator activated="true" class="loop_files" compatibility="7.0.001" expanded="true" height="82" name="Loop Files" width="90" x="45" y="34">
    <parameter key="directory" value="/tmp/old"/>
    <process expanded="true">
      <operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="parent_path" width="90" x="179" y="34">
        <parameter key="macro_name" value="parent_path"/>
      </operator>
      <operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="file_name" width="90" x="179" y="136">
        <parameter key="macro_name" value="file_name"/>
      </operator>
      <operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="380" y="34">
        <list key="log">
          <parameter key="parent_path" value="operator.parent_path.value.macro_value"/>
          <parameter key="file_name" value="operator.file_name.value.macro_value"/>
        </list>
      </operator>
      <connect from_op="parent_path" from_port="through 1" to_op="file_name" to_port="through 1"/>
      <connect from_op="file_name" from_port="through 1" to_op="Log" to_port="through 1"/>
      <connect from_op="Log" from_port="through 1" to_port="out 1"/>
      <portSpacing port="source_file object" spacing="0"/>
      <portSpacing port="source_in 1" spacing="0"/>
      <portSpacing port="sink_out 1" spacing="0"/>
      <portSpacing port="sink_out 2" spacing="0"/>
    </process>
      </operator>
      <operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="179" y="34"/>
      <operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="313" y="34">
    <list key="function_descriptions">
      <parameter key="label" value="if(rand()&gt;0.5, &quot;A&quot;, &quot;B&quot;)"/>
    </list>
      </operator>
      <operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="187">
    <list key="function_descriptions">
      <parameter key="old" value="parent_path + &quot;/&quot; + file_name"/>
      <parameter key="new" value="&quot;/tmp/new/&quot; + label+ &quot;/&quot; + file_name"/>
    </list>
      </operator>
      <operator activated="true" class="loop_examples" compatibility="7.0.001" expanded="true" height="82" name="Loop Examples" width="90" x="514" y="187">
    <process expanded="true">
      <operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="old" width="90" x="112" y="34">
        <parameter key="macro" value="old"/>
        <parameter key="macro_type" value="data_value"/>
        <parameter key="attribute_name" value="old"/>
        <parameter key="example_index" value="%{example}"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="new" width="90" x="112" y="136">
        <parameter key="macro" value="new"/>
        <parameter key="macro_type" value="data_value"/>
        <parameter key="attribute_name" value="new"/>
        <parameter key="example_index" value="%{example}"/>
        <list key="additional_macros"/>
      </operator>
      <operator activated="true" class="copy_file" compatibility="7.0.001" expanded="true" height="82" name="Copy File" width="90" x="380" y="34">
        <parameter key="source_file" value="%{old}"/>
        <parameter key="new_file" value="%{new}"/>
      </operator>
      <connect from_port="example set" to_op="old" to_port="example set"/>
      <connect from_op="old" from_port="example set" to_op="new" to_port="example set"/>
      <connect from_op="new" from_port="example set" to_op="Copy File" to_port="through 1"/>
      <connect from_op="Copy File" from_port="through 1" to_port="example set"/>
      <portSpacing port="source_example set" spacing="0"/>
      <portSpacing port="sink_example set" spacing="0"/>
      <portSpacing port="sink_output 1" spacing="0"/>
    </process>
      </operator>
      <connect from_op="Loop Files" from_port="out 1" to_op="Log to Data" to_port="through 1"/>
      <connect from_op="Log to Data" from_port="exampleSet" to_op="Generate Attributes (2)" to_port="example set input"/>
      <connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
      <connect from_op="Generate Attributes" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
      <connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
      <portSpacing port="source_input 1" spacing="0"/>
      <portSpacing port="sink_result 1" spacing="0"/>
      <portSpacing port="sink_result 2" spacing="0"/>
    </process>
  </operator>
</process>

我使用 Copy File 来避免移动文件并造成损坏,希望您能看到它是如何工作的。

综上所述,如果你用旧名称和新名称制作属性,你必须使用 Loop Examples 来遍历每个示例。在这个循环运算符中,您必须提取要作为宏传递给 Copy File 运算符的值。

希望对您有所帮助。