正在从 Gmail 中检索超过 100 个邮件 ID API
Retrieving more than 100 message ids from Gmail API
我的 gmail 帐户中有 3000 封电子邮件。我想创建所有发件人的汇总列表,以便更有效地清理收件箱。我不需要下载邮件正文或附件。
我使用这个示例开始 (https://developers.google.com/gmail/api/quickstart/dotnet) 虽然现在我不知道如何在执行此代码时 return 超过 100 个消息 ID:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Google.Apis.Auth.OAuth2;
using Google.Apis.Gmail.v1;
using Google.Apis.Gmail.v1.Data;
using Google.Apis.Requests;
using Google.Apis.Services;
using Google.Apis.Util;
using Google.Apis.Util.Store;
namespace GmailQuickstart
{
class Program
{
static string[] Scopes = { GmailService.Scope.GmailReadonly };
static string ApplicationName = "Gmail API .NET Quickstart";
static void Main(string[] args)
{
UserCredential credential;
using (var stream = new FileStream("credentials.json", FileMode.Open, FileAccess.Read))
{
string credPath = "token.json";
credential = GoogleWebAuthorizationBroker.AuthorizeAsync(
GoogleClientSecrets.Load(stream).Secrets,
Scopes,
"user",
CancellationToken.None,
new FileDataStore(credPath, true)).Result;
Console.WriteLine("Credential file saved to: " + credPath);
}
// Create Gmail API service.
var service = new GmailService(new BaseClientService.Initializer()
{
HttpClientInitializer = credential,
ApplicationName = ApplicationName,
});
////get all of the message ids for the messages in the inbox
var messageRequest = service.Users.Messages.List("me");
messageRequest.LabelIds = "INBOX";
var messageList = new List<Message>();
ListMessagesResponse messageResponse1 = new ListMessagesResponse();
var k = 0;
do
{
messageResponse1 = messageRequest.Execute();
messageList.AddRange(messageResponse1.Messages);
var output = $"Request {k} - Message Count: {messageList.Count()} Page Token: {messageRequest.PageToken} - Next Page Token: {messageResponse1.NextPageToken}";
Console.WriteLine(output);
System.IO.File.AppendAllText(@"C:[=11=]0\log.txt", output);
messageRequest.PageToken = messageResponse1.NextPageToken;
k++;
//this switch allowed me to walk through getting multiple pages of emails without having to get them all
//if (k == 5)
//{
// break;
//}
} while (!String.IsNullOrEmpty(messageRequest.PageToken));
//once i created the list of all the message ids i serialized the list to JSON and wrote it to a file
//so I could test the next portions without having to make the calls against the above each time
var serializedMessageIdList = Newtonsoft.Json.JsonConvert.SerializeObject(messageList);
System.IO.File.WriteAllText(@"C:[=11=]0\MessageIds.json", serializedMessageIdList);
//read in the serialized list and rehydrate it to test the next portion
var mIdList = Newtonsoft.Json.JsonConvert.DeserializeObject<List<Message>>(System.IO.File.ReadAllText(@"C:[=11=]0\MessageIds.json"));
//this method takes those message ids and gets the message object from the api for each of them
//1000 is the maximum number of requests google allows in a batch request
var messages = BatchDownloadEmails(service, mIdList.Select(m => m.Id), 1000);
//again i'm serializing the message list and writing them to a file
var serializedMessageList = Newtonsoft.Json.JsonConvert.SerializeObject(messages);
System.IO.File.WriteAllText(@"C:[=11=]0\Messages.json", serializedMessageList);
//and then reading them in and rehydrating the list to test the next portion
var mList = Newtonsoft.Json.JsonConvert.DeserializeObject<IList<Message>>(System.IO.File.ReadAllText(@"C:[=11=]0\Messages.json"));
//then i loop through each message and pull the values out of the payload header i'm looking for
var emailList = new List<EmailItem>();
foreach (var message in mList)
{
if (message != null)
{
var from = message.Payload.Headers.SingleOrDefault(h => h.Name == "From")?.Value;
var date = message.Payload.Headers.SingleOrDefault(h => h.Name == "Date")?.Value;
var subject = message.Payload.Headers.SingleOrDefault(h => h.Name == "Subject")?.Value;
emailList.Add(new EmailItem() { From = from, Subject = subject, Date = date });
}
}
//i serialized this list as well
var serializedEmailItemList = Newtonsoft.Json.JsonConvert.SerializeObject(emailList);
System.IO.File.WriteAllText(@"C:[=11=]0\EmailItems.json", serializedEmailItemList);
//rehydrate for testing
var eiList = Newtonsoft.Json.JsonConvert.DeserializeObject<List<EmailItem>>(System.IO.File.ReadAllText(@"C:[=11=]0\EmailItems.json"));
//here is where i do the actual aggregation to determine which senders i have the most email from
var senderSummary = eiList.GroupBy(g => g.From).Select(g => new { Sender = g.Key, Count = g.Count() }).OrderByDescending(g => g.Count);
//serialize and output the results
var serializedSummaryList = Newtonsoft.Json.JsonConvert.SerializeObject(senderSummary);
System.IO.File.WriteAllText(@"C:[=11=]0\SenderSummary.json", serializedSummaryList);
}
public static IList<Message> BatchDownloadEmails(GmailService service, IEnumerable<string> messageIds, int chunkSize)
{
// Create a batch request.
var messages = new List<Message>();
//because the google batch request will only allow 1000 requests per batch the list needs to be split
//based on chunk size
var lists = messageIds.ChunkBy(chunkSize);
//double batchRequests = (2500 + 999) / 1000;
//for each list create a request with teh message id and add it to the batch request queue
for (int i = 0; i < lists.Count(); i++)
{
var list = lists.ElementAt(i);
Console.WriteLine($"list: {i}...");
var request = new BatchRequest(service);
foreach (var messageId in list)
{
//Console.WriteLine($"message id: {messageId}...");
var messageBodyRequest = service.Users.Messages.Get("me", messageId);
//messageBodyRequest.Format = UsersResource.MessagesResource.GetRequest.FormatEnum.Metadata;
request.Queue<Message>(messageBodyRequest,
(content, error, index, message) =>
{
messages.Add(content);
});
}
Console.WriteLine("");
Console.WriteLine("ExecuteAsync");
//execute all the requests in the queue
request.ExecuteAsync().Wait();
System.Threading.Thread.Sleep(5000);
}
return messages;
}
}
public class EmailItem
{
public string From { get; set; }
public string Subject { get; set; }
public string Date { get; set; }
}
public static class IEnumerableExtensions
{
public static IEnumerable<IEnumerable<T>> ChunkBy<T>(this IEnumerable<T> source, int chunkSize)
{
return source
.Select((x, i) => new { Index = i, Value = x })
.GroupBy(x => x.Index / chunkSize)
.Select(x => x.Select(v => v.Value));
}
}
}
我所做的研究表明我需要使用批处理请求,并且根据我发现的信息,我无法调整它以适应我想要完成的任务。我的理解是,我会使用批处理请求获取所有邮件 ID,然后进行 3000 次单独调用以获取收件箱中每封电子邮件的实际发件人、主题和日期??
您可以使用分页获取完整列表。
从上一页传递页面令牌以获取对 Users.Messages.List
的下一次调用(不要传递到第一个调用以启动操作)。当结果不包含消息时检测结束。
这样您就可以获取邮箱中的所有邮件。
注意。我建议您将代码设为异步:如果要阅读的消息不止几条,则可能需要相当长的时间才能全部读取。
您也可以使用 PageStreamer 来获取剩余的结果。
var pageStreamer = new PageStreamer<Google.Apis.Gmail.v1.Data.Message, UsersResource.MessagesResource.ListRequest, ListMessagesResponse, string>(
(request, token) => request.PageToken = token,
response => response.NextPageToken,
response => response.Messages);
var req = service.Users.Messages.List("me");
req.MaxResults = 1000;
foreach (var result in pageStreamer.Fetch(req))
{
Console.WriteLine(result.Id);
}
只要有其他结果需要请求,此代码就会继续 运行。 Batching 在这里并不能真正帮助您,因为无法知道下一页标记是什么。
我的 gmail 帐户中有 3000 封电子邮件。我想创建所有发件人的汇总列表,以便更有效地清理收件箱。我不需要下载邮件正文或附件。
我使用这个示例开始 (https://developers.google.com/gmail/api/quickstart/dotnet) 虽然现在我不知道如何在执行此代码时 return 超过 100 个消息 ID:
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
using Google.Apis.Auth.OAuth2;
using Google.Apis.Gmail.v1;
using Google.Apis.Gmail.v1.Data;
using Google.Apis.Requests;
using Google.Apis.Services;
using Google.Apis.Util;
using Google.Apis.Util.Store;
namespace GmailQuickstart
{
class Program
{
static string[] Scopes = { GmailService.Scope.GmailReadonly };
static string ApplicationName = "Gmail API .NET Quickstart";
static void Main(string[] args)
{
UserCredential credential;
using (var stream = new FileStream("credentials.json", FileMode.Open, FileAccess.Read))
{
string credPath = "token.json";
credential = GoogleWebAuthorizationBroker.AuthorizeAsync(
GoogleClientSecrets.Load(stream).Secrets,
Scopes,
"user",
CancellationToken.None,
new FileDataStore(credPath, true)).Result;
Console.WriteLine("Credential file saved to: " + credPath);
}
// Create Gmail API service.
var service = new GmailService(new BaseClientService.Initializer()
{
HttpClientInitializer = credential,
ApplicationName = ApplicationName,
});
////get all of the message ids for the messages in the inbox
var messageRequest = service.Users.Messages.List("me");
messageRequest.LabelIds = "INBOX";
var messageList = new List<Message>();
ListMessagesResponse messageResponse1 = new ListMessagesResponse();
var k = 0;
do
{
messageResponse1 = messageRequest.Execute();
messageList.AddRange(messageResponse1.Messages);
var output = $"Request {k} - Message Count: {messageList.Count()} Page Token: {messageRequest.PageToken} - Next Page Token: {messageResponse1.NextPageToken}";
Console.WriteLine(output);
System.IO.File.AppendAllText(@"C:[=11=]0\log.txt", output);
messageRequest.PageToken = messageResponse1.NextPageToken;
k++;
//this switch allowed me to walk through getting multiple pages of emails without having to get them all
//if (k == 5)
//{
// break;
//}
} while (!String.IsNullOrEmpty(messageRequest.PageToken));
//once i created the list of all the message ids i serialized the list to JSON and wrote it to a file
//so I could test the next portions without having to make the calls against the above each time
var serializedMessageIdList = Newtonsoft.Json.JsonConvert.SerializeObject(messageList);
System.IO.File.WriteAllText(@"C:[=11=]0\MessageIds.json", serializedMessageIdList);
//read in the serialized list and rehydrate it to test the next portion
var mIdList = Newtonsoft.Json.JsonConvert.DeserializeObject<List<Message>>(System.IO.File.ReadAllText(@"C:[=11=]0\MessageIds.json"));
//this method takes those message ids and gets the message object from the api for each of them
//1000 is the maximum number of requests google allows in a batch request
var messages = BatchDownloadEmails(service, mIdList.Select(m => m.Id), 1000);
//again i'm serializing the message list and writing them to a file
var serializedMessageList = Newtonsoft.Json.JsonConvert.SerializeObject(messages);
System.IO.File.WriteAllText(@"C:[=11=]0\Messages.json", serializedMessageList);
//and then reading them in and rehydrating the list to test the next portion
var mList = Newtonsoft.Json.JsonConvert.DeserializeObject<IList<Message>>(System.IO.File.ReadAllText(@"C:[=11=]0\Messages.json"));
//then i loop through each message and pull the values out of the payload header i'm looking for
var emailList = new List<EmailItem>();
foreach (var message in mList)
{
if (message != null)
{
var from = message.Payload.Headers.SingleOrDefault(h => h.Name == "From")?.Value;
var date = message.Payload.Headers.SingleOrDefault(h => h.Name == "Date")?.Value;
var subject = message.Payload.Headers.SingleOrDefault(h => h.Name == "Subject")?.Value;
emailList.Add(new EmailItem() { From = from, Subject = subject, Date = date });
}
}
//i serialized this list as well
var serializedEmailItemList = Newtonsoft.Json.JsonConvert.SerializeObject(emailList);
System.IO.File.WriteAllText(@"C:[=11=]0\EmailItems.json", serializedEmailItemList);
//rehydrate for testing
var eiList = Newtonsoft.Json.JsonConvert.DeserializeObject<List<EmailItem>>(System.IO.File.ReadAllText(@"C:[=11=]0\EmailItems.json"));
//here is where i do the actual aggregation to determine which senders i have the most email from
var senderSummary = eiList.GroupBy(g => g.From).Select(g => new { Sender = g.Key, Count = g.Count() }).OrderByDescending(g => g.Count);
//serialize and output the results
var serializedSummaryList = Newtonsoft.Json.JsonConvert.SerializeObject(senderSummary);
System.IO.File.WriteAllText(@"C:[=11=]0\SenderSummary.json", serializedSummaryList);
}
public static IList<Message> BatchDownloadEmails(GmailService service, IEnumerable<string> messageIds, int chunkSize)
{
// Create a batch request.
var messages = new List<Message>();
//because the google batch request will only allow 1000 requests per batch the list needs to be split
//based on chunk size
var lists = messageIds.ChunkBy(chunkSize);
//double batchRequests = (2500 + 999) / 1000;
//for each list create a request with teh message id and add it to the batch request queue
for (int i = 0; i < lists.Count(); i++)
{
var list = lists.ElementAt(i);
Console.WriteLine($"list: {i}...");
var request = new BatchRequest(service);
foreach (var messageId in list)
{
//Console.WriteLine($"message id: {messageId}...");
var messageBodyRequest = service.Users.Messages.Get("me", messageId);
//messageBodyRequest.Format = UsersResource.MessagesResource.GetRequest.FormatEnum.Metadata;
request.Queue<Message>(messageBodyRequest,
(content, error, index, message) =>
{
messages.Add(content);
});
}
Console.WriteLine("");
Console.WriteLine("ExecuteAsync");
//execute all the requests in the queue
request.ExecuteAsync().Wait();
System.Threading.Thread.Sleep(5000);
}
return messages;
}
}
public class EmailItem
{
public string From { get; set; }
public string Subject { get; set; }
public string Date { get; set; }
}
public static class IEnumerableExtensions
{
public static IEnumerable<IEnumerable<T>> ChunkBy<T>(this IEnumerable<T> source, int chunkSize)
{
return source
.Select((x, i) => new { Index = i, Value = x })
.GroupBy(x => x.Index / chunkSize)
.Select(x => x.Select(v => v.Value));
}
}
}
我所做的研究表明我需要使用批处理请求,并且根据我发现的信息,我无法调整它以适应我想要完成的任务。我的理解是,我会使用批处理请求获取所有邮件 ID,然后进行 3000 次单独调用以获取收件箱中每封电子邮件的实际发件人、主题和日期??
您可以使用分页获取完整列表。
从上一页传递页面令牌以获取对 Users.Messages.List
的下一次调用(不要传递到第一个调用以启动操作)。当结果不包含消息时检测结束。
这样您就可以获取邮箱中的所有邮件。
注意。我建议您将代码设为异步:如果要阅读的消息不止几条,则可能需要相当长的时间才能全部读取。
您也可以使用 PageStreamer 来获取剩余的结果。
var pageStreamer = new PageStreamer<Google.Apis.Gmail.v1.Data.Message, UsersResource.MessagesResource.ListRequest, ListMessagesResponse, string>(
(request, token) => request.PageToken = token,
response => response.NextPageToken,
response => response.Messages);
var req = service.Users.Messages.List("me");
req.MaxResults = 1000;
foreach (var result in pageStreamer.Fetch(req))
{
Console.WriteLine(result.Id);
}
只要有其他结果需要请求,此代码就会继续 运行。 Batching 在这里并不能真正帮助您,因为无法知道下一页标记是什么。