Cosmos DB 分页提供多页记录
Cosmos DB Pagination giving multiplied page records
我有一个场景,我需要根据文档中数组中存在的元素来过滤集合。谁能建议如何在文档
中将 OFFSET
和 LIMIT
与嵌套数组一起使用
{
"id": "abcd",
"pqrs": 1,
"xyz": "UNKNOWN_594",
"arrayList": [
{
"Id": 2,
"def": true
},
{
"Id": 302,
"def": true
}
]
}
现在我需要过滤并从集合中取出 10 10 条记录。我尝试了以下查询
SELECT * FROM collections c
WHERE ARRAY_CONTAINS(c.arrayList , {"Id":302 },true) or ARRAY_CONTAINS(c.arrayList , {"Id":2 },true)
ORDER BY c._ts DESC
OFFSET 10 LIMIT 10
现在,当我 运行 这个查询时,它返回了 40 条记录
在下一个OFFSET的每一步,RU都会继续增加,你可以使用ContinuationToken
private static async Task QueryWithPagingAsync(Uri collectionUri)
{
// The .NET client automatically iterates through all the pages of query results
// Developers can explicitly control paging by creating an IDocumentQueryable
// using the IQueryable object, then by reading the ResponseContinuationToken values
// and passing them back as RequestContinuationToken in FeedOptions.
List<Family> families = new List<Family>();
// tell server we only want 1 record
FeedOptions options = new FeedOptions { MaxItemCount = 1, EnableCrossPartitionQuery = true };
// using AsDocumentQuery you get access to whether or not the query HasMoreResults
// If it does, just call ExecuteNextAsync until there are no more results
// No need to supply a continuation token here as the server keeps track of progress
var query = client.CreateDocumentQuery<Family>(collectionUri, options).AsDocumentQuery();
while (query.HasMoreResults)
{
foreach (Family family in await query.ExecuteNextAsync())
{
families.Add(family);
}
}
// The above sample works fine whilst in a loop as above, but
// what if you load a page of 1 record and then in a different
// Session at a later stage want to continue from where you were?
// well, now you need to capture the continuation token
// and use it on subsequent queries
query = client.CreateDocumentQuery<Family>(
collectionUri,
new FeedOptions { MaxItemCount = 1, EnableCrossPartitionQuery = true }).AsDocumentQuery();
var feedResponse = await query.ExecuteNextAsync<Family>();
string continuation = feedResponse.ResponseContinuation;
foreach (var f in feedResponse.AsEnumerable().OrderBy(f => f.Id))
{
}
// Now the second time around use the contiuation token you got
// and start the process from that point
query = client.CreateDocumentQuery<Family>(
collectionUri,
new FeedOptions
{
MaxItemCount = 1,
RequestContinuation = continuation,
EnableCrossPartitionQuery = true
}).AsDocumentQuery();
feedResponse = await query.ExecuteNextAsync<Family>();
foreach (var f in feedResponse.AsEnumerable().OrderBy(f => f.Id))
{
}
}
要跳过特定页面,pfb 代码
private static async Task QueryPageByPage(int currentPageNumber = 1, int documentNumber = 1)
{
// Number of documents per page
const int PAGE_SIZE = 3 // configurable;
// Continuation token for subsequent queries (NULL for the very first request/page)
string continuationToken = null;
do
{
Console.WriteLine($"----- PAGE {currentPageNumber} -----");
// Loads ALL documents for the current page
KeyValuePair<string, IEnumerable<Family>> currentPage = await QueryDocumentsByPage(currentPageNumber, PAGE_SIZE, continuationToken);
foreach (Family celeryTask in currentPage.Value)
{
documentNumber++;
}
// Ensure the continuation token is kept for the next page query execution
continuationToken = currentPage.Key;
currentPageNumber++;
} while (continuationToken != null);
Console.WriteLine("\n--- END: Finished Querying ALL Dcuments ---");
}
和QueryDocumentsByPage函数如下
private static async Task<KeyValuePair<string, IEnumerable<Family>>> QueryDocumentsByPage(int pageNumber, int pageSize, string continuationToken)
{
DocumentClient documentClient = new DocumentClient(new Uri("https://{CosmosDB/SQL Account Name}.documents.azure.com:443/"), "{CosmosDB/SQL Account Key}");
var feedOptions = new FeedOptions {
MaxItemCount = pageSize,
EnableCrossPartitionQuery = true,
// IMPORTANT: Set the continuation token (NULL for the first ever request/page)
RequestContinuation = continuationToken
};
IQueryable<Family> filter = documentClient.CreateDocumentQuery<Family>("dbs/{Database Name}/colls/{Collection Name}", feedOptions);
IDocumentQuery<Family> query = filter.AsDocumentQuery();
FeedResponse<Family> feedRespose = await query.ExecuteNextAsync<Family>();
List<Family> documents = new List<Family>();
foreach (CeleryTask t in feedRespose)
{
documents.Add(t);
}
// IMPORTANT: Ensure the continuation token is kept for the next requests
return new KeyValuePair<string, IEnumerable<Family>>(feedRespose.ResponseContinuation, documents);
}
您实际上在结果中收到了 40 个元素吗?还是您正在返回 10 个文档,但也许您的 Cosmos 本身有 40 个文档用于此查询?
使用 ORDER by 子句根据查询检索所有文档,在数据库中对其进行排序,然后应用 OFFSET 和 LIMIT 值来提供最终结果。
我已经从下面的快照中说明了这一点。
- 我的 Cosmos 帐户有 14 个文件符合查询条件,这是
与检索到的文档数相匹配的内容。
- 输出文档是 10,因为 DB 必须跳过前 5 和
然后交付下一个 5.
- 但我的实际结果只有5个文档,因为我就是这样
要求。
延续标记对分页很有效,但有局限性。如果您想直接跳过页面(比如从第 1 页跳到第 10 页),则不能使用它们。您需要从第一个文档开始遍历页面,并继续使用令牌转到下一页。由于限制,如果您有大量文档用于单个查询,通常建议使用。
另一个建议是在使用 ORDER BY 时使用索引来提高 RU/s 的使用率。看到这个 link.
我有一个场景,我需要根据文档中数组中存在的元素来过滤集合。谁能建议如何在文档
中将OFFSET
和 LIMIT
与嵌套数组一起使用
{
"id": "abcd",
"pqrs": 1,
"xyz": "UNKNOWN_594",
"arrayList": [
{
"Id": 2,
"def": true
},
{
"Id": 302,
"def": true
}
]
}
现在我需要过滤并从集合中取出 10 10 条记录。我尝试了以下查询
SELECT * FROM collections c
WHERE ARRAY_CONTAINS(c.arrayList , {"Id":302 },true) or ARRAY_CONTAINS(c.arrayList , {"Id":2 },true)
ORDER BY c._ts DESC
OFFSET 10 LIMIT 10
现在,当我 运行 这个查询时,它返回了 40 条记录
在下一个OFFSET的每一步,RU都会继续增加,你可以使用ContinuationToken
private static async Task QueryWithPagingAsync(Uri collectionUri)
{
// The .NET client automatically iterates through all the pages of query results
// Developers can explicitly control paging by creating an IDocumentQueryable
// using the IQueryable object, then by reading the ResponseContinuationToken values
// and passing them back as RequestContinuationToken in FeedOptions.
List<Family> families = new List<Family>();
// tell server we only want 1 record
FeedOptions options = new FeedOptions { MaxItemCount = 1, EnableCrossPartitionQuery = true };
// using AsDocumentQuery you get access to whether or not the query HasMoreResults
// If it does, just call ExecuteNextAsync until there are no more results
// No need to supply a continuation token here as the server keeps track of progress
var query = client.CreateDocumentQuery<Family>(collectionUri, options).AsDocumentQuery();
while (query.HasMoreResults)
{
foreach (Family family in await query.ExecuteNextAsync())
{
families.Add(family);
}
}
// The above sample works fine whilst in a loop as above, but
// what if you load a page of 1 record and then in a different
// Session at a later stage want to continue from where you were?
// well, now you need to capture the continuation token
// and use it on subsequent queries
query = client.CreateDocumentQuery<Family>(
collectionUri,
new FeedOptions { MaxItemCount = 1, EnableCrossPartitionQuery = true }).AsDocumentQuery();
var feedResponse = await query.ExecuteNextAsync<Family>();
string continuation = feedResponse.ResponseContinuation;
foreach (var f in feedResponse.AsEnumerable().OrderBy(f => f.Id))
{
}
// Now the second time around use the contiuation token you got
// and start the process from that point
query = client.CreateDocumentQuery<Family>(
collectionUri,
new FeedOptions
{
MaxItemCount = 1,
RequestContinuation = continuation,
EnableCrossPartitionQuery = true
}).AsDocumentQuery();
feedResponse = await query.ExecuteNextAsync<Family>();
foreach (var f in feedResponse.AsEnumerable().OrderBy(f => f.Id))
{
}
}
要跳过特定页面,pfb 代码
private static async Task QueryPageByPage(int currentPageNumber = 1, int documentNumber = 1)
{
// Number of documents per page
const int PAGE_SIZE = 3 // configurable;
// Continuation token for subsequent queries (NULL for the very first request/page)
string continuationToken = null;
do
{
Console.WriteLine($"----- PAGE {currentPageNumber} -----");
// Loads ALL documents for the current page
KeyValuePair<string, IEnumerable<Family>> currentPage = await QueryDocumentsByPage(currentPageNumber, PAGE_SIZE, continuationToken);
foreach (Family celeryTask in currentPage.Value)
{
documentNumber++;
}
// Ensure the continuation token is kept for the next page query execution
continuationToken = currentPage.Key;
currentPageNumber++;
} while (continuationToken != null);
Console.WriteLine("\n--- END: Finished Querying ALL Dcuments ---");
}
和QueryDocumentsByPage函数如下
private static async Task<KeyValuePair<string, IEnumerable<Family>>> QueryDocumentsByPage(int pageNumber, int pageSize, string continuationToken)
{
DocumentClient documentClient = new DocumentClient(new Uri("https://{CosmosDB/SQL Account Name}.documents.azure.com:443/"), "{CosmosDB/SQL Account Key}");
var feedOptions = new FeedOptions {
MaxItemCount = pageSize,
EnableCrossPartitionQuery = true,
// IMPORTANT: Set the continuation token (NULL for the first ever request/page)
RequestContinuation = continuationToken
};
IQueryable<Family> filter = documentClient.CreateDocumentQuery<Family>("dbs/{Database Name}/colls/{Collection Name}", feedOptions);
IDocumentQuery<Family> query = filter.AsDocumentQuery();
FeedResponse<Family> feedRespose = await query.ExecuteNextAsync<Family>();
List<Family> documents = new List<Family>();
foreach (CeleryTask t in feedRespose)
{
documents.Add(t);
}
// IMPORTANT: Ensure the continuation token is kept for the next requests
return new KeyValuePair<string, IEnumerable<Family>>(feedRespose.ResponseContinuation, documents);
}
您实际上在结果中收到了 40 个元素吗?还是您正在返回 10 个文档,但也许您的 Cosmos 本身有 40 个文档用于此查询?
使用 ORDER by 子句根据查询检索所有文档,在数据库中对其进行排序,然后应用 OFFSET 和 LIMIT 值来提供最终结果。
我已经从下面的快照中说明了这一点。
- 我的 Cosmos 帐户有 14 个文件符合查询条件,这是 与检索到的文档数相匹配的内容。
- 输出文档是 10,因为 DB 必须跳过前 5 和 然后交付下一个 5.
- 但我的实际结果只有5个文档,因为我就是这样 要求。
延续标记对分页很有效,但有局限性。如果您想直接跳过页面(比如从第 1 页跳到第 10 页),则不能使用它们。您需要从第一个文档开始遍历页面,并继续使用令牌转到下一页。由于限制,如果您有大量文档用于单个查询,通常建议使用。
另一个建议是在使用 ORDER BY 时使用索引来提高 RU/s 的使用率。看到这个 link.