RapidMiner:根据日期搜索推文

RapidMiner: Search Tweets according to date

我是一名工商管理专业的学生,​​目前正在为一个研究项目学习社交媒体分析的基础知识。我目前的目标是跟踪推文中关键字的使用。我下载了 RapidMiner 并弄清楚了如何搜索关键字。但是,是否有可能弄清楚关键字在特定时间范围内的使用频率?我可以过滤结果,例如,只显示包含我的关键字的 2017 年 12 月的推文吗?

非常感谢您考虑我的问题。

如果您将数据提取为 RapidMiner ExampleSet,则可以使用 Aggregate-Operator 来计算使用的不同关键字。或者您可以简单地使用 Filter Examples-Operator 来仅显示包含关键字的推文。 有关简单示例,请参见下面的过程。只需将 xml 复制并粘贴到 RapidMiner 的进程视图中。

也可以在 RapidMiner community forum.

中进一步提问或重新post提问
<?xml version="1.0" encoding="UTF-8"?><process version="8.0.001">
<context>
<input/>
<output/>
<macros/>
</context>
<operator activated="true" class="process" compatibility="8.0.001" expanded="true" name="Process">
<process expanded="true">
  <operator activated="true" class="generate_direct_mailing_data" compatibility="8.0.001" expanded="true" height="68" name="Generate Direct Mailing Data" width="90" x="45" y="34">
    <description align="center" color="transparent" colored="false" width="126">Generic sample data.&lt;br&gt;We use the &amp;quot;sports&amp;quot; Attribute as key words</description>
  </operator>
  <operator activated="true" class="multiply" compatibility="8.0.001" expanded="true" height="103" name="Multiply" width="90" x="246" y="34"/>
  <operator activated="true" class="filter_examples" compatibility="8.0.001" expanded="true" height="103" name="Filter Examples" width="90" x="447" y="340">
    <list key="filters_list">
      <parameter key="filters_entry_key" value="sports.equals.athletics"/>
    </list>
    <description align="center" color="yellow" colored="true" width="126">Alternatively we can filter for a specific sport and then count.</description>
  </operator>
  <operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate (2)" width="90" x="715" y="340">
    <parameter key="use_default_aggregation" value="true"/>
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="sports"/>
    <parameter key="default_aggregation_function" value="count"/>
    <list key="aggregation_attributes"/>
    <description align="center" color="yellow" colored="true" width="126">Type your comment</description>
  </operator>
  <operator activated="true" class="aggregate" compatibility="8.0.001" expanded="true" height="82" name="Aggregate" width="90" x="447" y="34">
    <parameter key="attribute_filter_type" value="single"/>
    <parameter key="attribute" value="sports"/>
    <parameter key="default_aggregation_function" value="count"/>
    <list key="aggregation_attributes">
      <parameter key="sports" value="count"/>
    </list>
    <parameter key="group_by_attributes" value="sports"/>
    <description align="center" color="green" colored="true" width="126">The &amp;quot;group by&amp;quot; and the &amp;quot;aggregation&amp;quot; attributes are both set to &amp;quot;sports&amp;quot;</description>
  </operator>
  <connect from_op="Generate Direct Mailing Data" from_port="output" to_op="Multiply" to_port="input"/>
  <connect from_op="Multiply" from_port="output 1" to_op="Aggregate" to_port="example set input"/>
  <connect from_op="Multiply" from_port="output 2" to_op="Filter Examples" to_port="example set input"/>
  <connect from_op="Filter Examples" from_port="example set output" to_op="Aggregate (2)" to_port="example set input"/>
  <connect from_op="Aggregate (2)" from_port="example set output" to_port="result 2"/>
  <connect from_op="Aggregate" from_port="example set output" to_port="result 1"/>
  <portSpacing port="source_input 1" spacing="0"/>
  <portSpacing port="sink_result 1" spacing="0"/>
  <portSpacing port="sink_result 2" spacing="0"/>
  <portSpacing port="sink_result 3" spacing="0"/>
</process>