如何通过c#代码删除DocumentDB中的所有文档
How to delete all the documents in DocumentDB through c# code
我正在使用 Microsoft 的一个名为 DocumentDB 的新数据库。现在我想按 ID 删除文档,但我不知道该怎么做。 DocumentDB 中的删除操作需要 self-links 并且它们与我自己的 id 不同。
但是我查询一次文件,然后我会得到自己 link。
有了那个自我link我正在删除文档。
现在我想删除我 collection 中大约 50000 多个文档的所有文档。
需要获取每个文档然后删除或任何简单的方法来做同样的事情?
这可能吗?
您说得对,删除文档需要引用文档的 _self
link。
如果您要删除集合中的 ALL 文档 - 删除并重新创建集合可能更简单、更快捷。唯一需要注意的是,服务器端脚本(例如 sprocs、udfs、触发器)也属于该集合,可能也需要重新创建。
更新: 我编写了一个快速存储过程,用于执行给定查询的批量删除。这使您可以在更少的网络请求中执行批量删除操作。
/**
* A DocumentDB stored procedure that bulk deletes documents for a given query.<br/>
* Note: You may need to execute this sproc multiple times (depending whether the sproc is able to delete every document within the execution timeout limit).
*
* @function
* @param {string} query - A query that provides the documents to be deleted (e.g. "SELECT * FROM c WHERE c.founded_year = 2008")
* @returns {Object.<number, boolean>} Returns an object with the two properties:<br/>
* deleted - contains a count of documents deleted<br/>
* continuation - a boolean whether you should execute the sproc again (true if there are more documents to delete; false otherwise).
*/
function bulkDeleteSproc(query) {
var collection = getContext().getCollection();
var collectionLink = collection.getSelfLink();
var response = getContext().getResponse();
var responseBody = {
deleted: 0,
continuation: true
};
// Validate input.
if (!query) throw new Error("The query is undefined or null.");
tryQueryAndDelete();
// Recursively runs the query w/ support for continuation tokens.
// Calls tryDelete(documents) as soon as the query returns documents.
function tryQueryAndDelete(continuation) {
var requestOptions = {continuation: continuation};
var isAccepted = collection.queryDocuments(collectionLink, query, requestOptions, function (err, retrievedDocs, responseOptions) {
if (err) throw err;
if (retrievedDocs.length > 0) {
// Begin deleting documents as soon as documents are returned form the query results.
// tryDelete() resumes querying after deleting; no need to page through continuation tokens.
// - this is to prioritize writes over reads given timeout constraints.
tryDelete(retrievedDocs);
} else if (responseOptions.continuation) {
// Else if the query came back empty, but with a continuation token; repeat the query w/ the token.
tryQueryAndDelete(responseOptions.continuation);
} else {
// Else if there are no more documents and no continuation token - we are finished deleting documents.
responseBody.continuation = false;
response.setBody(responseBody);
}
});
// If we hit execution bounds - return continuation: true.
if (!isAccepted) {
response.setBody(responseBody);
}
}
// Recursively deletes documents passed in as an array argument.
// Attempts to query for more on empty array.
function tryDelete(documents) {
if (documents.length > 0) {
// Delete the first document in the array.
var isAccepted = collection.deleteDocument(documents[0]._self, {}, function (err, responseOptions) {
if (err) throw err;
responseBody.deleted++;
documents.shift();
// Delete the next document in the array.
tryDelete(documents);
});
// If we hit execution bounds - return continuation: true.
if (!isAccepted) {
response.setBody(responseBody);
}
} else {
// If the document array is empty, query for more documents.
tryQueryAndDelete();
}
}
}
这是使用 C# SDK 删除文档的解决方案。下面的代码假设有一个数据库和一个集合。它将迭代集合中的所有文档并一次删除它们。要删除特定的数据库、集合或文档,请修改适当的 "CreateQuery" 方法以包含 SQL select 语法。例如,对于 select 一个特定的数据库,
db = client.CreateDatabaseQuery().Where(o => o.Id == "MyDocDb").ToList().First();
用于删除具有单个数据库和单个集合的 DocumentDB 实例中所有文档的示例代码:
using Microsoft.Azure.Documents;
using Microsoft.Azure.Documents.Client;
using Microsoft.Azure.Documents.Linq;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Util
{
class Program
{
private Uri _docDbUri = new Uri("https://<nameofyourdocdb>.documents.azure.com:443/");
private string _docDbKey = "<your primary key>";
private async Task DeleteDocsAsync()
{
using (var client = new DocumentClient(_docDbUri, _docDbKey))
{
try
{
var db = client.CreateDatabaseQuery().ToList().First();
var coll = client.CreateDocumentCollectionQuery(db.CollectionsLink).ToList().First();
var docs = client.CreateDocumentQuery(coll.DocumentsLink);
foreach (var doc in docs)
{
await client.DeleteDocumentAsync(doc.SelfLink);
}
}
catch (Exception ex)
{
Trace.WriteLine(ex);
throw;
}
}
}
static void Main(string[] args)
{
try
{
Program p = new Program();
p.DeleteDocsAsync().Wait();
}
catch (Exception)
{
throw;
}
}
}
}
这是一种在启用 AllowBulkExecution
的情况下删除文档的方法。此方法以 100 个块为单位删除文档,因为目前,cosmos DB 一次最多只支持 100 个操作。它采用分区键 属性 名称并使用反射来获取其值。
public async Task BulkDeleteDocumentsAsync(IEnumerable<T> entities, string partitionKeyPropertyName)
{
List<Task> tasks = new List<Task>(100);
foreach (var entity in entities)
{
var partitionKey = entity.GetType().GetProperty(partitionKeyPropertyName)?.GetValue(entity, null)?.ToString();
if (partitionKey != null)
{
tasks.Add(DeleteDocumentAsync(entity.Id!, partitionKey)
.ContinueWith(itemResponse =>
{
if (!itemResponse.IsCompletedSuccessfully)
{
AggregateException innerExceptions = itemResponse.Exception!.Flatten();
if (innerExceptions.InnerExceptions.FirstOrDefault(innerEx => innerEx is CosmosException) is CosmosException cosmosException)
{
_logger.LogError($"Cosmos Exception deleting {entity.Id} {cosmosException.StatusCode} ({cosmosException.Message}).");
}
else
{
_logger.LogError($"Exception deleting {entity.Id} {innerExceptions.InnerExceptions.FirstOrDefault()}.");
}
}
}));
if (tasks.Count == 100)
{
await Task.WhenAll(tasks);
tasks.Clear();
}
}
}
await Task.WhenAll(tasks);
}
我正在使用 Microsoft 的一个名为 DocumentDB 的新数据库。现在我想按 ID 删除文档,但我不知道该怎么做。 DocumentDB 中的删除操作需要 self-links 并且它们与我自己的 id 不同。 但是我查询一次文件,然后我会得到自己 link。 有了那个自我link我正在删除文档。
现在我想删除我 collection 中大约 50000 多个文档的所有文档。
需要获取每个文档然后删除或任何简单的方法来做同样的事情?
这可能吗?
您说得对,删除文档需要引用文档的 _self
link。
如果您要删除集合中的 ALL 文档 - 删除并重新创建集合可能更简单、更快捷。唯一需要注意的是,服务器端脚本(例如 sprocs、udfs、触发器)也属于该集合,可能也需要重新创建。
更新: 我编写了一个快速存储过程,用于执行给定查询的批量删除。这使您可以在更少的网络请求中执行批量删除操作。
/**
* A DocumentDB stored procedure that bulk deletes documents for a given query.<br/>
* Note: You may need to execute this sproc multiple times (depending whether the sproc is able to delete every document within the execution timeout limit).
*
* @function
* @param {string} query - A query that provides the documents to be deleted (e.g. "SELECT * FROM c WHERE c.founded_year = 2008")
* @returns {Object.<number, boolean>} Returns an object with the two properties:<br/>
* deleted - contains a count of documents deleted<br/>
* continuation - a boolean whether you should execute the sproc again (true if there are more documents to delete; false otherwise).
*/
function bulkDeleteSproc(query) {
var collection = getContext().getCollection();
var collectionLink = collection.getSelfLink();
var response = getContext().getResponse();
var responseBody = {
deleted: 0,
continuation: true
};
// Validate input.
if (!query) throw new Error("The query is undefined or null.");
tryQueryAndDelete();
// Recursively runs the query w/ support for continuation tokens.
// Calls tryDelete(documents) as soon as the query returns documents.
function tryQueryAndDelete(continuation) {
var requestOptions = {continuation: continuation};
var isAccepted = collection.queryDocuments(collectionLink, query, requestOptions, function (err, retrievedDocs, responseOptions) {
if (err) throw err;
if (retrievedDocs.length > 0) {
// Begin deleting documents as soon as documents are returned form the query results.
// tryDelete() resumes querying after deleting; no need to page through continuation tokens.
// - this is to prioritize writes over reads given timeout constraints.
tryDelete(retrievedDocs);
} else if (responseOptions.continuation) {
// Else if the query came back empty, but with a continuation token; repeat the query w/ the token.
tryQueryAndDelete(responseOptions.continuation);
} else {
// Else if there are no more documents and no continuation token - we are finished deleting documents.
responseBody.continuation = false;
response.setBody(responseBody);
}
});
// If we hit execution bounds - return continuation: true.
if (!isAccepted) {
response.setBody(responseBody);
}
}
// Recursively deletes documents passed in as an array argument.
// Attempts to query for more on empty array.
function tryDelete(documents) {
if (documents.length > 0) {
// Delete the first document in the array.
var isAccepted = collection.deleteDocument(documents[0]._self, {}, function (err, responseOptions) {
if (err) throw err;
responseBody.deleted++;
documents.shift();
// Delete the next document in the array.
tryDelete(documents);
});
// If we hit execution bounds - return continuation: true.
if (!isAccepted) {
response.setBody(responseBody);
}
} else {
// If the document array is empty, query for more documents.
tryQueryAndDelete();
}
}
}
这是使用 C# SDK 删除文档的解决方案。下面的代码假设有一个数据库和一个集合。它将迭代集合中的所有文档并一次删除它们。要删除特定的数据库、集合或文档,请修改适当的 "CreateQuery" 方法以包含 SQL select 语法。例如,对于 select 一个特定的数据库,
db = client.CreateDatabaseQuery().Where(o => o.Id == "MyDocDb").ToList().First();
用于删除具有单个数据库和单个集合的 DocumentDB 实例中所有文档的示例代码:
using Microsoft.Azure.Documents;
using Microsoft.Azure.Documents.Client;
using Microsoft.Azure.Documents.Linq;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using System.Threading.Tasks;
namespace Util
{
class Program
{
private Uri _docDbUri = new Uri("https://<nameofyourdocdb>.documents.azure.com:443/");
private string _docDbKey = "<your primary key>";
private async Task DeleteDocsAsync()
{
using (var client = new DocumentClient(_docDbUri, _docDbKey))
{
try
{
var db = client.CreateDatabaseQuery().ToList().First();
var coll = client.CreateDocumentCollectionQuery(db.CollectionsLink).ToList().First();
var docs = client.CreateDocumentQuery(coll.DocumentsLink);
foreach (var doc in docs)
{
await client.DeleteDocumentAsync(doc.SelfLink);
}
}
catch (Exception ex)
{
Trace.WriteLine(ex);
throw;
}
}
}
static void Main(string[] args)
{
try
{
Program p = new Program();
p.DeleteDocsAsync().Wait();
}
catch (Exception)
{
throw;
}
}
}
}
这是一种在启用 AllowBulkExecution
的情况下删除文档的方法。此方法以 100 个块为单位删除文档,因为目前,cosmos DB 一次最多只支持 100 个操作。它采用分区键 属性 名称并使用反射来获取其值。
public async Task BulkDeleteDocumentsAsync(IEnumerable<T> entities, string partitionKeyPropertyName)
{
List<Task> tasks = new List<Task>(100);
foreach (var entity in entities)
{
var partitionKey = entity.GetType().GetProperty(partitionKeyPropertyName)?.GetValue(entity, null)?.ToString();
if (partitionKey != null)
{
tasks.Add(DeleteDocumentAsync(entity.Id!, partitionKey)
.ContinueWith(itemResponse =>
{
if (!itemResponse.IsCompletedSuccessfully)
{
AggregateException innerExceptions = itemResponse.Exception!.Flatten();
if (innerExceptions.InnerExceptions.FirstOrDefault(innerEx => innerEx is CosmosException) is CosmosException cosmosException)
{
_logger.LogError($"Cosmos Exception deleting {entity.Id} {cosmosException.StatusCode} ({cosmosException.Message}).");
}
else
{
_logger.LogError($"Exception deleting {entity.Id} {innerExceptions.InnerExceptions.FirstOrDefault()}.");
}
}
}));
if (tasks.Count == 100)
{
await Task.WhenAll(tasks);
tasks.Clear();
}
}
}
await Task.WhenAll(tasks);
}