icCube ETL - Java 视图 - 分组依据超过 1 列 + 检索最大值和最小值
icCube ETL - Java View - group by on more than 1 column + retrieve max and min value
在 icCube Builder ETL 中,我想在多个字段上对数据进行分组。另外,作为聚合函数,我想使用 MAX 和 MIN。
示例数据:
(文本中的相同数据)
groupId phase startDate endDate
100 start 1-May-2018 5-May-2018
100 start 4-May-2018 7-May-2018
100 start 28-Apr-2018 1-May-2018
100 middle 4-May-2018 11-May-2018
100 middle 1-May-2018 10-May-2018
100 end 12-May-2018 15-May-2018
100 end 11-May-2018 13-May-2018
100 end 13-May-2018 14-May-2018
100 end 9-May-2018 12-May-2018
200 start 4-Apr-2018 2-May-2018
200 middle 18-Apr-2018 3-May-2018
200 middle 1-May-2018 1-May-2018
300 end 21-Apr-2018 24-Apr-2018
我想根据 groupId 和阶段对这些数据进行分组,并获得最小开始日期和最大结束日期:
如何在 icCube ETL 中做到最好?
我们正在 ETL 层中添加新版本的 groupBy View 来支持这一点。但是,您可以创建一个 Java 视图来执行 groupBy。
类似于:
package iccube.pub;
import java.util.*;
import java.lang.*;
import org.joda.time.*;
import crazydev.iccube.pub.view.*;
public class CustomJavaView implements IOlapBuilderViewLogic
{
private Map<List<Comparable>,List<Agg>> cached;
public CustomJavaView()
{
}
public void onInitMainTable(Map<String, IOlapCachedTable> cachedTables, IOlapDataTableDef mainTable)
{
cached = new HashMap();
}
public boolean onNewRow(IOlapViewContext context, Map<String, IOlapCachedTable> cachedTables, IOlapDataTableDef mainTable, IOlapReadOnlyDataRow mainTableRow)
{
// create the groupby key (list of values)
final List<Comparable> groupBy = Arrays.asList(mainTableRow.get("phase"), mainTableRow.get("groupId"));
// get the aggregators for values for the keys, build them if not already there
final List<Agg> aggs = cached.computeIfAbsent(groupBy, key -> Arrays.asList(new Agg(true), new Agg(false)));
// add values
aggs.get(0).add(mainTableRow.getAsDateTime("startDate"));
aggs.get(1).add(mainTableRow.getAsDateTime("endDate"));
return true; // false to stop
}
public void onProcessingCompleted(IOlapViewContext context, Map<String, IOlapCachedTable> cachedTables)
{
// now we can fire rows
for (Map.Entry<List<Comparable>, List<Agg>> entry : cached.entrySet())
{
final List<Comparable> groupByKey = entry.getKey();
final List<Agg> aggs = entry.getValue();
// create empty row
final IOlapDataTableRow row = context.newRow();
row.set("phase",groupByKey.get(0));
row.set("groupId",groupByKey.get(1));
row.set("startDate",aggs.get(0).date);
row.set("endDate",aggs.get(1).date);
context.fireRow(row);
}
}
// this is the Aggregator, you could implement something more complicated
static class Agg
{
final int isMin;
LocalDateTime date;
Agg(boolean isMin)
{
this.isMin = isMin ? -1 : 1;
}
void add(LocalDateTime ndate)
{
if (ndate != null)
{
date = ( date!= null && ((date.compareTo(ndate) * isMin) > 0)) ? date : ndate;
}
}
}
}
在 icCube Builder ETL 中,我想在多个字段上对数据进行分组。另外,作为聚合函数,我想使用 MAX 和 MIN。
示例数据:
(文本中的相同数据)
groupId phase startDate endDate
100 start 1-May-2018 5-May-2018
100 start 4-May-2018 7-May-2018
100 start 28-Apr-2018 1-May-2018
100 middle 4-May-2018 11-May-2018
100 middle 1-May-2018 10-May-2018
100 end 12-May-2018 15-May-2018
100 end 11-May-2018 13-May-2018
100 end 13-May-2018 14-May-2018
100 end 9-May-2018 12-May-2018
200 start 4-Apr-2018 2-May-2018
200 middle 18-Apr-2018 3-May-2018
200 middle 1-May-2018 1-May-2018
300 end 21-Apr-2018 24-Apr-2018
我想根据 groupId 和阶段对这些数据进行分组,并获得最小开始日期和最大结束日期:
如何在 icCube ETL 中做到最好?
我们正在 ETL 层中添加新版本的 groupBy View 来支持这一点。但是,您可以创建一个 Java 视图来执行 groupBy。
类似于:
package iccube.pub;
import java.util.*;
import java.lang.*;
import org.joda.time.*;
import crazydev.iccube.pub.view.*;
public class CustomJavaView implements IOlapBuilderViewLogic
{
private Map<List<Comparable>,List<Agg>> cached;
public CustomJavaView()
{
}
public void onInitMainTable(Map<String, IOlapCachedTable> cachedTables, IOlapDataTableDef mainTable)
{
cached = new HashMap();
}
public boolean onNewRow(IOlapViewContext context, Map<String, IOlapCachedTable> cachedTables, IOlapDataTableDef mainTable, IOlapReadOnlyDataRow mainTableRow)
{
// create the groupby key (list of values)
final List<Comparable> groupBy = Arrays.asList(mainTableRow.get("phase"), mainTableRow.get("groupId"));
// get the aggregators for values for the keys, build them if not already there
final List<Agg> aggs = cached.computeIfAbsent(groupBy, key -> Arrays.asList(new Agg(true), new Agg(false)));
// add values
aggs.get(0).add(mainTableRow.getAsDateTime("startDate"));
aggs.get(1).add(mainTableRow.getAsDateTime("endDate"));
return true; // false to stop
}
public void onProcessingCompleted(IOlapViewContext context, Map<String, IOlapCachedTable> cachedTables)
{
// now we can fire rows
for (Map.Entry<List<Comparable>, List<Agg>> entry : cached.entrySet())
{
final List<Comparable> groupByKey = entry.getKey();
final List<Agg> aggs = entry.getValue();
// create empty row
final IOlapDataTableRow row = context.newRow();
row.set("phase",groupByKey.get(0));
row.set("groupId",groupByKey.get(1));
row.set("startDate",aggs.get(0).date);
row.set("endDate",aggs.get(1).date);
context.fireRow(row);
}
}
// this is the Aggregator, you could implement something more complicated
static class Agg
{
final int isMin;
LocalDateTime date;
Agg(boolean isMin)
{
this.isMin = isMin ? -1 : 1;
}
void add(LocalDateTime ndate)
{
if (ndate != null)
{
date = ( date!= null && ((date.compareTo(ndate) * isMin) > 0)) ? date : ndate;
}
}
}
}