RapidMiner:根据日期搜索推文
RapidMiner: Search Tweets according to date
我是一名工商管理专业的学生,目前正在为一个研究项目学习社交媒体分析的基础知识。我目前的目标是跟踪推文中关键字的使用。我下载了 RapidMiner 并弄清楚了如何搜索关键字。但是,是否有可能弄清楚关键字在特定时间范围内的使用频率?我可以过滤结果,例如,只显示包含我的关键字的 2017 年 12 月的推文吗?
非常感谢您考虑我的问题。
如果您将数据提取为 RapidMiner ExampleSet,则可以使用 Aggregate-Operator 来计算使用的不同关键字。或者您可以简单地使用 Filter Examples-Operator 来仅显示包含关键字的推文。
有关简单示例,请参见下面的过程。只需将 xml 复制并粘贴到 RapidMiner 的进程视图中。
也可以在 RapidMiner community forum.
中进一步提问或重新post提问
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="generate_direct_mailing_data" compatibility="8.0.001" expanded="true" height="68" name="Generate Direct Mailing Data" width="90" x="45" y="34">
<description align="center" color="transparent" colored="false" width="126">Generic sample data.<br>We use the &quot;sports&quot; Attribute as key words</description>
</operator>
<operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="34"/>
<operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="340">
<list key="filters_list">
<parameter key="filters_entry_key" value="sports.equals.athletics"/>
</list>
<description align="center" color="yellow" colored="true" width="126">Alternatively we can filter for a specific sport and then count.</description>
</operator>
<operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate (2)" width="90" x="715" y="340">
<parameter key="use_default_aggregation" value="true"/>
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="sports"/>
<parameter key="default_aggregation_function" value="count"/>
<list key="aggregation_attributes"/>
<description align="center" color="yellow" colored="true" width="126">Type your comment</description>
</operator>
<operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="sports"/>
<parameter key="default_aggregation_function" value="count"/>
<list key="aggregation_attributes">
<parameter key="sports" value="count"/>
</list>
<parameter key="group_by_attributes" value="sports"/>
<description align="center" color="green" colored="true" width="126">The &quot;group by&quot; and the &quot;aggregation&quot; attributes are both set to &quot;sports&quot;</description>
</operator>
<connect from_op="Generate Direct Mailing Data" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
<connect from_op="Aggregate (2)" from_port="example set output" to_port="result 2"/>
<connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>
我是一名工商管理专业的学生,目前正在为一个研究项目学习社交媒体分析的基础知识。我目前的目标是跟踪推文中关键字的使用。我下载了 RapidMiner 并弄清楚了如何搜索关键字。但是,是否有可能弄清楚关键字在特定时间范围内的使用频率?我可以过滤结果,例如,只显示包含我的关键字的 2017 年 12 月的推文吗?
非常感谢您考虑我的问题。
如果您将数据提取为 RapidMiner ExampleSet,则可以使用 Aggregate-Operator 来计算使用的不同关键字。或者您可以简单地使用 Filter Examples-Operator 来仅显示包含关键字的推文。 有关简单示例,请参见下面的过程。只需将 xml 复制并粘贴到 RapidMiner 的进程视图中。
也可以在 RapidMiner community forum.
中进一步提问或重新post提问<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
<operator activated="true" class="generate_direct_mailing_data" compatibility="8.0.001" expanded="true" height="68" name="Generate Direct Mailing Data" width="90" x="45" y="34">
<description align="center" color="transparent" colored="false" width="126">Generic sample data.<br>We use the &quot;sports&quot; Attribute as key words</description>
</operator>
<operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="34"/>
<operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="340">
<list key="filters_list">
<parameter key="filters_entry_key" value="sports.equals.athletics"/>
</list>
<description align="center" color="yellow" colored="true" width="126">Alternatively we can filter for a specific sport and then count.</description>
</operator>
<operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate (2)" width="90" x="715" y="340">
<parameter key="use_default_aggregation" value="true"/>
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="sports"/>
<parameter key="default_aggregation_function" value="count"/>
<list key="aggregation_attributes"/>
<description align="center" color="yellow" colored="true" width="126">Type your comment</description>
</operator>
<operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate" width="90" x="447" y="34">
<parameter key="attribute_filter_type" value="single"/>
<parameter key="attribute" value="sports"/>
<parameter key="default_aggregation_function" value="count"/>
<list key="aggregation_attributes">
<parameter key="sports" value="count"/>
</list>
<parameter key="group_by_attributes" value="sports"/>
<description align="center" color="green" colored="true" width="126">The &quot;group by&quot; and the &quot;aggregation&quot; attributes are both set to &quot;sports&quot;</description>
</operator>
<connect from_op="Generate Direct Mailing Data" from_port="output" to_op="Multiply" to_port="input"/>
<connect from_op="Multiply" from_port="output 1" to_op="Aggregate" to_port="example set input"/>
<connect from_op="Multiply" from_port="output 2" to_op="Filter Examples" to_port="example set input"/>
<connect from_op="Filter Examples" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
<connect from_op="Aggregate (2)" from_port="example set output" to_port="result 2"/>
<connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
<portSpacing port="source_input 1" spacing="0"/>
<portSpacing port="sink_result 1" spacing="0"/>
<portSpacing port="sink_result 2" spacing="0"/>
<portSpacing port="sink_result 3" spacing="0"/>
</process>