如何在kusto中将多个记录更新转换为定期快照
How to convert multiple record updates into periodic snapshot in kusto
我有一种机制,每当源中的记录发生更改时,它就会向 Azure 数据资源管理器发布更新。所以数据最终成为 ADX 中记录的一系列版本。我想将它变成每日快照,并在快照时使用最新版本。我已经设法用
做了一些接近的事情
let visits = datatable(id:guid, timestamp:datetime, category:string, start:datetime, end:datetime, row:int)
[
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 1,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 2,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 3,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-04T04:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 4,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-05T07:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-01T07:00:00), 5
];
let binsize = 1d;
let min_date_time = toscalar(visits | summarize startofday(min(timestamp)));
let max_date_time = toscalar(visits | summarize endofday(max(timestamp)));
//
range hour from min_date_time to max_date_time step binsize
| join kind=leftouter (
visits
| summarize arg_max(timestamp, *) by id, bin(timestamp, binsize)
| extend hour = bin(timestamp, binsize)
) on hour
| project-away hour1
这给出了以下内容:
hour
id
timestamp
timestamp1
category
start
end
row
2021-10-01 00:00:00.0000000
b5ce180e-ce11-4936-b3f1-c817a261622e
2021-10-01 00:00:00.0000000
2021-10-01 01:02:03.0000000
SRU
2021-09-30 01:02:03.0000000
1
2021-10-02 00:00:00.0000000
b5ce180e-ce11-4936-b3f1-c817a261622e
2021-10-02 00:00:00.0000000
2021-10-02 02:05:03.0000000
SRU
2021-09-30 01:02:03.0000000
3
2021-10-03 00:00:00.0000000
2021-10-04 00:00:00.0000000
b5ce180e-ce11-4936-b3f1-c817a261622e
2021-10-04 00:00:00.0000000
2021-10-04 04:05:03.0000000
SRU
2021-09-30 01:02:03.0000000
4
2021-10-05 00:00:00.0000000
b5ce180e-ce11-4936-b3f1-c817a261622e
2021-10-05 00:00:00.0000000
2021-10-05 07:05:03.0000000
SRU
2021-09-30 01:02:03.0000000
2021-10-01 07:00:00.0000000
5
这是正确的,因为它正确地选择了 2021 年 10 月 2 日的最新值(第 3 行),但是它没有选择下一个时间段的第 3 行,我们得到空白。
我被最后一部分难住了。
如果有帮助,难题的下一部分是按类别聚合分组,结果类似于
day
category
total
started
ended
2021-10-01
SRU
1
1
0
2021-10-02
SRU
1
1
0
2021-10-03
SRU
1
1
0
2021-10-04
SRU
1
1
0
2021-10-05
SRU
1
0
1
这是第一个 table 的解决方案,取消注释最后一行以获得第二个 table:
let visits = datatable(id:guid, timestamp:datetime, category:string, start:datetime, end:datetime, row:int)
[
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 1,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 2,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 3,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-04T04:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 4,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-05T07:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-01T07:00:00), 5,
"8acaffa4-3ab8-479c-8f13-191c016bff70", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 6,
"8acaffa4-3ab8-479c-8f13-191c016bff70", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-02T07:00:00), 7
];
let binsize = 1d;
let StartDate = datetime(2021-10-01);
let EndDate = datetime(2021-10-06);
visits
| summarize arg_max(timestamp, *) by ['id'], Day = bin(timestamp,1d)
| partition hint.strategy=native by ['id']
(
make-series timestamp = take_any(tolong(timestamp)) default=long(null),
start = take_any(tolong(start)) default=long(null),
end = take_any(tolong(end)) default=long(null),
row = take_any(row) default=long(null),total = count() default=long(null),
started=countif(isnull(end)) default=long(null),
ended=countif(isnotnull(end))
on Day from StartDate to EndDate step 1d by category, ['id']
| extend timestamp = series_fill_forward(timestamp),
start = series_fill_forward(start),
end = series_fill_forward(end),
row = series_fill_forward(row),
total = series_fill_forward(total),
started=series_fill_forward(started),
ended=series_fill_forward(ended)
)
| mv-expand timestamp to typeof(long), start to typeof(long), end to typeof(long), Day to typeof(datetime), row to typeof(int), total to typeof(int), started to typeof(int), ended to typeof(int)
| extend timestamp = todatetime(timestamp), start=todatetime(start), end=todatetime(end)
| project-reorder id, Day, timestamp, start, end, row, category
//| summarize Total = sum(total), sum(started), sum(ended) by Day
id
Day
timestamp
start
end
row
category
total
started
ended
b5ce180e-ce11-4936-b3f1-c817a261622e
2021-10-01 00:00:00.0000000
2021-10-01 01:02:03.0000000
2021-09-30 01:02:03.0000000
1
SRU
1
1
0
8acaffa4-3ab8-479c-8f13-191c016bff70
2021-10-01 00:00:00.0000000
2021-10-01 01:02:03.0000000
2021-09-30 01:02:03.0000000
6
SRU
1
1
0
8acaffa4-3ab8-479c-8f13-191c016bff70
2021-10-02 00:00:00.0000000
2021-10-02 02:02:03.0000000
2021-09-30 01:02:03.0000000
2021-10-02 07:00:00.0000000
7
SRU
1
0
1
b5ce180e-ce11-4936-b3f1-c817a261622e
2021-10-02 00:00:00.0000000
2021-10-02 02:05:03.0000000
2021-09-30 01:02:03.0000000
3
SRU
1
1
0
8acaffa4-3ab8-479c-8f13-191c016bff70
2021-10-03 00:00:00.0000000
2021-10-02 02:02:03.0000000
2021-09-30 01:02:03.0000000
2021-10-02 07:00:00.0000000
7
SRU
1
0
1
b5ce180e-ce11-4936-b3f1-c817a261622e
2021-10-03 00:00:00.0000000
2021-10-02 02:05:03.0000000
2021-09-30 01:02:03.0000000
3
SRU
1
1
0
8acaffa4-3ab8-479c-8f13-191c016bff70
2021-10-04 00:00:00.0000000
2021-10-02 02:02:03.0000000
2021-09-30 01:02:03.0000000
2021-10-02 07:00:00.0000000
7
SRU
1
0
1
b5ce180e-ce11-4936-b3f1-c817a261622e
2021-10-04 00:00:00.0000000
2021-10-04 04:05:03.0000000
2021-09-30 01:02:03.0000000
4
SRU
1
1
0
8acaffa4-3ab8-479c-8f13-191c016bff70
2021-10-05 00:00:00.0000000
2021-10-02 02:02:03.0000000
2021-09-30 01:02:03.0000000
2021-10-02 07:00:00.0000000
7
SRU
1
0
1
b5ce180e-ce11-4936-b3f1-c817a261622e
2021-10-05 00:00:00.0000000
2021-10-05 07:05:03.0000000
2021-09-30 01:02:03.0000000
2021-10-01 07:00:00.0000000
5
SRU
1
0
1
我有一种机制,每当源中的记录发生更改时,它就会向 Azure 数据资源管理器发布更新。所以数据最终成为 ADX 中记录的一系列版本。我想将它变成每日快照,并在快照时使用最新版本。我已经设法用
做了一些接近的事情let visits = datatable(id:guid, timestamp:datetime, category:string, start:datetime, end:datetime, row:int)
[
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 1,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 2,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 3,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-04T04:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 4,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-05T07:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-01T07:00:00), 5
];
let binsize = 1d;
let min_date_time = toscalar(visits | summarize startofday(min(timestamp)));
let max_date_time = toscalar(visits | summarize endofday(max(timestamp)));
//
range hour from min_date_time to max_date_time step binsize
| join kind=leftouter (
visits
| summarize arg_max(timestamp, *) by id, bin(timestamp, binsize)
| extend hour = bin(timestamp, binsize)
) on hour
| project-away hour1
这给出了以下内容:
hour | id | timestamp | timestamp1 | category | start | end | row |
---|---|---|---|---|---|---|---|
2021-10-01 00:00:00.0000000 | b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-01 00:00:00.0000000 | 2021-10-01 01:02:03.0000000 | SRU | 2021-09-30 01:02:03.0000000 | 1 | |
2021-10-02 00:00:00.0000000 | b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-02 00:00:00.0000000 | 2021-10-02 02:05:03.0000000 | SRU | 2021-09-30 01:02:03.0000000 | 3 | |
2021-10-03 00:00:00.0000000 | |||||||
2021-10-04 00:00:00.0000000 | b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-04 00:00:00.0000000 | 2021-10-04 04:05:03.0000000 | SRU | 2021-09-30 01:02:03.0000000 | 4 | |
2021-10-05 00:00:00.0000000 | b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-05 00:00:00.0000000 | 2021-10-05 07:05:03.0000000 | SRU | 2021-09-30 01:02:03.0000000 | 2021-10-01 07:00:00.0000000 | 5 |
这是正确的,因为它正确地选择了 2021 年 10 月 2 日的最新值(第 3 行),但是它没有选择下一个时间段的第 3 行,我们得到空白。
我被最后一部分难住了。
如果有帮助,难题的下一部分是按类别聚合分组,结果类似于
day | category | total | started | ended |
---|---|---|---|---|
2021-10-01 | SRU | 1 | 1 | 0 |
2021-10-02 | SRU | 1 | 1 | 0 |
2021-10-03 | SRU | 1 | 1 | 0 |
2021-10-04 | SRU | 1 | 1 | 0 |
2021-10-05 | SRU | 1 | 0 | 1 |
这是第一个 table 的解决方案,取消注释最后一行以获得第二个 table:
let visits = datatable(id:guid, timestamp:datetime, category:string, start:datetime, end:datetime, row:int)
[
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 1,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 2,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-02T02:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 3,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-04T04:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 4,
"b5ce180e-ce11-4936-b3f1-c817a261622e", datetime(2021-10-05T07:05:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-01T07:00:00), 5,
"8acaffa4-3ab8-479c-8f13-191c016bff70", datetime(2021-10-01T01:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(null), 6,
"8acaffa4-3ab8-479c-8f13-191c016bff70", datetime(2021-10-02T02:02:03), "SRU", datetime(2021-09-30T01:02:03), datetime(2021-10-02T07:00:00), 7
];
let binsize = 1d;
let StartDate = datetime(2021-10-01);
let EndDate = datetime(2021-10-06);
visits
| summarize arg_max(timestamp, *) by ['id'], Day = bin(timestamp,1d)
| partition hint.strategy=native by ['id']
(
make-series timestamp = take_any(tolong(timestamp)) default=long(null),
start = take_any(tolong(start)) default=long(null),
end = take_any(tolong(end)) default=long(null),
row = take_any(row) default=long(null),total = count() default=long(null),
started=countif(isnull(end)) default=long(null),
ended=countif(isnotnull(end))
on Day from StartDate to EndDate step 1d by category, ['id']
| extend timestamp = series_fill_forward(timestamp),
start = series_fill_forward(start),
end = series_fill_forward(end),
row = series_fill_forward(row),
total = series_fill_forward(total),
started=series_fill_forward(started),
ended=series_fill_forward(ended)
)
| mv-expand timestamp to typeof(long), start to typeof(long), end to typeof(long), Day to typeof(datetime), row to typeof(int), total to typeof(int), started to typeof(int), ended to typeof(int)
| extend timestamp = todatetime(timestamp), start=todatetime(start), end=todatetime(end)
| project-reorder id, Day, timestamp, start, end, row, category
//| summarize Total = sum(total), sum(started), sum(ended) by Day
id | Day | timestamp | start | end | row | category | total | started | ended |
---|---|---|---|---|---|---|---|---|---|
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-01 00:00:00.0000000 | 2021-10-01 01:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 1 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-01 00:00:00.0000000 | 2021-10-01 01:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 6 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-02 00:00:00.0000000 | 2021-10-02 02:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-02 07:00:00.0000000 | 7 | SRU | 1 | 0 | 1 |
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-02 00:00:00.0000000 | 2021-10-02 02:05:03.0000000 | 2021-09-30 01:02:03.0000000 | 3 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-03 00:00:00.0000000 | 2021-10-02 02:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-02 07:00:00.0000000 | 7 | SRU | 1 | 0 | 1 |
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-03 00:00:00.0000000 | 2021-10-02 02:05:03.0000000 | 2021-09-30 01:02:03.0000000 | 3 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-04 00:00:00.0000000 | 2021-10-02 02:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-02 07:00:00.0000000 | 7 | SRU | 1 | 0 | 1 |
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-04 00:00:00.0000000 | 2021-10-04 04:05:03.0000000 | 2021-09-30 01:02:03.0000000 | 4 | SRU | 1 | 1 | 0 | |
8acaffa4-3ab8-479c-8f13-191c016bff70 | 2021-10-05 00:00:00.0000000 | 2021-10-02 02:02:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-02 07:00:00.0000000 | 7 | SRU | 1 | 0 | 1 |
b5ce180e-ce11-4936-b3f1-c817a261622e | 2021-10-05 00:00:00.0000000 | 2021-10-05 07:05:03.0000000 | 2021-09-30 01:02:03.0000000 | 2021-10-01 07:00:00.0000000 | 5 | SRU | 1 | 0 | 1 |