根据分类器结果使用 rapidminer 将文件分类到文件夹中
Sorting files into folders using rapidminer based on a classifier results
我需要帮助对 rapidminer 分类到标签(文件夹)中的文件进行排序,这个任务在 rapidminer 或读取结果示例集的 java 代码中是否可行?
这是结果 table
即:我只想将文件拆分到代表其标签的文件夹中
这是示例集
Data: SimpleExampleSet: 15 examples, 31988 regular attributes,
special attributes = { label = #0: label (polynominal/single_value)/values=[test1]
metadata_file = #1: metadata_file (polynominal/single_value)/values=[0.txt, 1.txt, 10.txt, 11.txt, 12.txt, 13.txt, 14.txt, 2.txt, 3.txt, 4.txt, 5.txt, 6.txt, 7.txt, 8.txt, 9.txt]
metadata_path = #2: metadata_path (polynominal/single_value)/values=[D:\Finaltests\test1[=10=].txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt]
metadata_date = #3: metadata_date (date_time/single_value)
confidence_sport = #31993: confidence(sport) (real/single_value)
confidence_places = #31994: confidence(places) (real/single_value)
prediction = #31992: prediction(label) (binominal/single_value) }
谢谢。
这个过程比我最初所说的稍微复杂一些,所以我在下面提供了一个示例。它假定 Linux 并将任何文件从 /tmp/old
复制到 /tmp/new/A
或 /tmp/new/B
。 A
和B
由标签决定。
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="loop_files" compatibility="7.0.001" expanded="true" height="82" name="Loop Files" width="90" x="45" y="34">
<parameter key="directory" value="/tmp/old"/>
<process expanded="true">
<operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="parent_path" width="90" x="179" y="34">
<parameter key="macro_name" value="parent_path"/>
</operator>
<operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="file_name" width="90" x="179" y="136">
<parameter key="macro_name" value="file_name"/>
</operator>
<operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="380" y="34">
<list key="log">
<parameter key="parent_path" value="operator.parent_path.value.macro_value"/>
<parameter key="file_name" value="operator.file_name.value.macro_value"/>
</list>
</operator>
<connect from_op="parent_path" from_port="through 1" to_op="file_name" to_port="through 1"/>
<connect from_op="file_name" from_port="through 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="out 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="179" y="34"/>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="label" value="if(rand()>0.5, "A", "B")"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="187">
<list key="function_descriptions">
<parameter key="old" value="parent_path + "/" + file_name"/>
<parameter key="new" value=""/tmp/new/" + label+ "/" + file_name"/>
</list>
</operator>
<operator activated="true" class="loop_examples" compatibility="7.0.001" expanded="true" height="82" name="Loop Examples" width="90" x="514" y="187">
<process expanded="true">
<operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="old" width="90" x="112" y="34">
<parameter key="macro" value="old"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="old"/>
<parameter key="example_index" value="%{example}"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="new" width="90" x="112" y="136">
<parameter key="macro" value="new"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="new"/>
<parameter key="example_index" value="%{example}"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="copy_file" compatibility="7.0.001" expanded="true" height="82" name="Copy File" width="90" x="380" y="34">
<parameter key="source_file" value="%{old}"/>
<parameter key="new_file" value="%{new}"/>
</operator>
<connect from_port="example set" to_op="old" to_port="example set"/>
<connect from_op="old" from_port="example set" to_op="new" to_port="example set"/>
<connect from_op="new" from_port="example set" to_op="Copy File" to_port="through 1"/>
<connect from_op="Copy File" from_port="through 1" to_port="example set"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<connect from_op="Loop Files" from_port="out 1" to_op="Log to Data" to_port="through 1"/>
<connect from_op="Log to Data" from_port="exampleSet" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
我使用 Copy File
来避免移动文件并造成损坏,希望您能看到它是如何工作的。
综上所述,如果你用旧名称和新名称制作属性,你必须使用 Loop Examples
来遍历每个示例。在这个循环运算符中,您必须提取要作为宏传递给 Copy File
运算符的值。
希望对您有所帮助。
我需要帮助对 rapidminer 分类到标签(文件夹)中的文件进行排序,这个任务在 rapidminer 或读取结果示例集的 java 代码中是否可行? 这是结果 table
即:我只想将文件拆分到代表其标签的文件夹中
这是示例集
Data: SimpleExampleSet: 15 examples, 31988 regular attributes,
special attributes = { label = #0: label (polynominal/single_value)/values=[test1]
metadata_file = #1: metadata_file (polynominal/single_value)/values=[0.txt, 1.txt, 10.txt, 11.txt, 12.txt, 13.txt, 14.txt, 2.txt, 3.txt, 4.txt, 5.txt, 6.txt, 7.txt, 8.txt, 9.txt]
metadata_path = #2: metadata_path (polynominal/single_value)/values=[D:\Finaltests\test1[=10=].txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt, D:\Finaltests\test1.txt]
metadata_date = #3: metadata_date (date_time/single_value)
confidence_sport = #31993: confidence(sport) (real/single_value)
confidence_places = #31994: confidence(places) (real/single_value)
prediction = #31992: prediction(label) (binominal/single_value) }
谢谢。
这个过程比我最初所说的稍微复杂一些,所以我在下面提供了一个示例。它假定 Linux 并将任何文件从 /tmp/old
复制到 /tmp/new/A
或 /tmp/new/B
。 A
和B
由标签决定。
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<process version="7.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="7.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="loop_files" compatibility="7.0.001" expanded="true" height="82" name="Loop Files" width="90" x="45" y="34">
<parameter key="directory" value="/tmp/old"/>
<process expanded="true">
<operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="parent_path" width="90" x="179" y="34">
<parameter key="macro_name" value="parent_path"/>
</operator>
<operator activated="true" class="provide_macro_as_log_value" compatibility="7.0.001" expanded="true" height="82" name="file_name" width="90" x="179" y="136">
<parameter key="macro_name" value="file_name"/>
</operator>
<operator activated="true" class="log" compatibility="7.0.001" expanded="true" height="82" name="Log" width="90" x="380" y="34">
<list key="log">
<parameter key="parent_path" value="operator.parent_path.value.macro_value"/>
<parameter key="file_name" value="operator.file_name.value.macro_value"/>
</list>
</operator>
<connect from_op="parent_path" from_port="through 1" to_op="file_name" to_port="through 1"/>
<connect from_op="file_name" from_port="through 1" to_op="Log" to_port="through 1"/>
<connect from_op="Log" from_port="through 1" to_port="out 1"/>
<portSpacing port="source_file object" spacing="0"/>
<portSpacing port="source_in 1" spacing="0"/>
<portSpacing port="sink_out 1" spacing="0"/>
<portSpacing port="sink_out 2" spacing="0"/>
</process>
</operator>
<operator activated="true" class="log_to_data" compatibility="7.0.001" expanded="true" height="103" name="Log to Data" width="90" x="179" y="34"/>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes (2)" width="90" x="313" y="34">
<list key="function_descriptions">
<parameter key="label" value="if(rand()>0.5, "A", "B")"/>
</list>
</operator>
<operator activated="true" class="generate_attributes" compatibility="7.0.001" expanded="true" height="82" name="Generate Attributes" width="90" x="313" y="187">
<list key="function_descriptions">
<parameter key="old" value="parent_path + "/" + file_name"/>
<parameter key="new" value=""/tmp/new/" + label+ "/" + file_name"/>
</list>
</operator>
<operator activated="true" class="loop_examples" compatibility="7.0.001" expanded="true" height="82" name="Loop Examples" width="90" x="514" y="187">
<process expanded="true">
<operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="old" width="90" x="112" y="34">
<parameter key="macro" value="old"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="old"/>
<parameter key="example_index" value="%{example}"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="extract_macro" compatibility="7.0.001" expanded="true" height="68" name="new" width="90" x="112" y="136">
<parameter key="macro" value="new"/>
<parameter key="macro_type" value="data_value"/>
<parameter key="attribute_name" value="new"/>
<parameter key="example_index" value="%{example}"/>
<list key="additional_macros"/>
</operator>
<operator activated="true" class="copy_file" compatibility="7.0.001" expanded="true" height="82" name="Copy File" width="90" x="380" y="34">
<parameter key="source_file" value="%{old}"/>
<parameter key="new_file" value="%{new}"/>
</operator>
<connect from_port="example set" to_op="old" to_port="example set"/>
<connect from_op="old" from_port="example set" to_op="new" to_port="example set"/>
<connect from_op="new" from_port="example set" to_op="Copy File" to_port="through 1"/>
<connect from_op="Copy File" from_port="through 1" to_port="example set"/>
<portSpacing port="source_example set" spacing="0"/>
<portSpacing port="sink_example set" spacing="0"/>
<portSpacing port="sink_output 1" spacing="0"/>
</process>
</operator>
<connect from_op="Loop Files" from_port="out 1" to_op="Log to Data" to_port="through 1"/>
<connect from_op="Log to Data" from_port="exampleSet" to_op="Generate Attributes (2)" to_port="example set input"/>
<connect from_op="Generate Attributes (2)" from_port="example set output" to_op="Generate Attributes" to_port="example set input"/>
<connect from_op="Generate Attributes" from_port="example set output" to_op="Loop Examples" to_port="example set"/>
<connect from_op="Loop Examples" from_port="example set" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
</process>
</operator>
</process>
我使用 Copy File
来避免移动文件并造成损坏,希望您能看到它是如何工作的。
综上所述,如果你用旧名称和新名称制作属性,你必须使用 Loop Examples
来遍历每个示例。在这个循环运算符中,您必须提取要作为宏传递给 Copy File
运算符的值。
希望对您有所帮助。