正在从 Gmail 中检索超过 100 个邮件 ID API

Retrieving more than 100 message ids from Gmail API

我的 gmail 帐户中有 3000 封电子邮件。我想创建所有发件人的汇总列表,以便更有效地清理收件箱。我不需要下载邮件正文或附件。

我使用这个示例开始 (https://developers.google.com/gmail/api/quickstart/dotnet) 虽然现在我不知道如何在执行此代码时 return 超过 100 个消息 ID:

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Net.Http;
using System.Runtime.InteropServices;
using System.Text;
using System.Threading;
using System.Threading.Tasks;

using Google.Apis.Auth.OAuth2;
using Google.Apis.Gmail.v1;
using Google.Apis.Gmail.v1.Data;
using Google.Apis.Requests;
using Google.Apis.Services;
using Google.Apis.Util;
using Google.Apis.Util.Store;

namespace GmailQuickstart
{
    class Program
    {
        static string[] Scopes = { GmailService.Scope.GmailReadonly };
        static string ApplicationName = "Gmail API .NET Quickstart";

        static void Main(string[] args)
        {
            UserCredential credential;

            using (var stream = new FileStream("credentials.json", FileMode.Open, FileAccess.Read))
            {
                string credPath = "token.json";
                credential = GoogleWebAuthorizationBroker.AuthorizeAsync(
                    GoogleClientSecrets.Load(stream).Secrets,
                    Scopes,
                    "user",
                    CancellationToken.None,
                    new FileDataStore(credPath, true)).Result;
                Console.WriteLine("Credential file saved to: " + credPath);
            }

            // Create Gmail API service.
            var service = new GmailService(new BaseClientService.Initializer()
            {
                HttpClientInitializer = credential,
                ApplicationName = ApplicationName,
            });


            ////get all of the message ids for the messages in the inbox
            var messageRequest = service.Users.Messages.List("me");
            messageRequest.LabelIds = "INBOX";

            var messageList = new List<Message>();

            ListMessagesResponse messageResponse1 = new ListMessagesResponse();
            var k = 0;

            do
            {
                messageResponse1 = messageRequest.Execute();
                messageList.AddRange(messageResponse1.Messages);

                var output = $"Request {k} - Message Count: {messageList.Count()} Page Token: {messageRequest.PageToken} - Next Page Token: {messageResponse1.NextPageToken}";
                Console.WriteLine(output);
                System.IO.File.AppendAllText(@"C:[=11=]0\log.txt", output);

                messageRequest.PageToken = messageResponse1.NextPageToken;
                k++;

                //this switch allowed me to walk through getting multiple pages of emails without having to get them all
                //if (k == 5)
                //{
                //    break;
                //}

            } while (!String.IsNullOrEmpty(messageRequest.PageToken));

            //once i created the list of all the message ids i serialized the list to JSON and wrote it to a file
            //so I could test the next portions without having to make the calls against the above each time
            var serializedMessageIdList = Newtonsoft.Json.JsonConvert.SerializeObject(messageList);
            System.IO.File.WriteAllText(@"C:[=11=]0\MessageIds.json", serializedMessageIdList);


            //read in the serialized list and rehydrate it to test the next portion
            var mIdList = Newtonsoft.Json.JsonConvert.DeserializeObject<List<Message>>(System.IO.File.ReadAllText(@"C:[=11=]0\MessageIds.json"));

            //this method takes those message ids and gets the message object from the api for each of them
            //1000 is the maximum number of requests google allows in a batch request
            var messages = BatchDownloadEmails(service, mIdList.Select(m => m.Id), 1000);


            //again i'm serializing the message list and writing them to a file
            var serializedMessageList = Newtonsoft.Json.JsonConvert.SerializeObject(messages);
            System.IO.File.WriteAllText(@"C:[=11=]0\Messages.json", serializedMessageList);

            //and then reading them in and rehydrating the list to test the next portion
            var mList = Newtonsoft.Json.JsonConvert.DeserializeObject<IList<Message>>(System.IO.File.ReadAllText(@"C:[=11=]0\Messages.json"));

            //then i loop through each message and pull the values out of the payload header i'm looking for
            var emailList = new List<EmailItem>();

            foreach (var message in mList)
            {
                if (message != null)
                {
                    var from = message.Payload.Headers.SingleOrDefault(h => h.Name == "From")?.Value;
                    var date = message.Payload.Headers.SingleOrDefault(h => h.Name == "Date")?.Value;
                    var subject = message.Payload.Headers.SingleOrDefault(h => h.Name == "Subject")?.Value;

                    emailList.Add(new EmailItem() { From = from, Subject = subject, Date = date });
                }
            }

            //i serialized this list as well
            var serializedEmailItemList = Newtonsoft.Json.JsonConvert.SerializeObject(emailList);
            System.IO.File.WriteAllText(@"C:[=11=]0\EmailItems.json", serializedEmailItemList);

            //rehydrate for testing
            var eiList = Newtonsoft.Json.JsonConvert.DeserializeObject<List<EmailItem>>(System.IO.File.ReadAllText(@"C:[=11=]0\EmailItems.json"));

            //here is where i do the actual aggregation to determine which senders i have the most email from
            var senderSummary = eiList.GroupBy(g => g.From).Select(g => new { Sender = g.Key, Count = g.Count() }).OrderByDescending(g => g.Count);

            //serialize and output the results
            var serializedSummaryList = Newtonsoft.Json.JsonConvert.SerializeObject(senderSummary);
            System.IO.File.WriteAllText(@"C:[=11=]0\SenderSummary.json", serializedSummaryList);
        }

        public static IList<Message> BatchDownloadEmails(GmailService service, IEnumerable<string> messageIds, int chunkSize)
        {
            // Create a batch request.
            var messages = new List<Message>();

            //because the google batch request will only allow 1000 requests per batch the list needs to be split
            //based on chunk size
            var lists = messageIds.ChunkBy(chunkSize);

            //double batchRequests = (2500 + 999) / 1000;

            //for each list create a request with teh message id and add it to the batch request queue
            for (int i = 0; i < lists.Count(); i++)
            {
                var list = lists.ElementAt(i);

                Console.WriteLine($"list: {i}...");
                var request = new BatchRequest(service);

                foreach (var messageId in list)
                {
                    //Console.WriteLine($"message id: {messageId}...");
                    var messageBodyRequest = service.Users.Messages.Get("me", messageId);
                    //messageBodyRequest.Format = UsersResource.MessagesResource.GetRequest.FormatEnum.Metadata;

                    request.Queue<Message>(messageBodyRequest,
                        (content, error, index, message) =>
                        {
                            messages.Add(content);
                        });
                }

                Console.WriteLine("");
                Console.WriteLine("ExecuteAsync");
                //execute all the requests in the queue
                request.ExecuteAsync().Wait();

                System.Threading.Thread.Sleep(5000);
            }

            return messages;
        }
    }

    public class EmailItem
    {
        public string From { get; set; }
        public string Subject { get; set; }
        public string Date { get; set; }
    }

    public static class IEnumerableExtensions
    {
        public static IEnumerable<IEnumerable<T>> ChunkBy<T>(this IEnumerable<T> source, int chunkSize)
        {
            return source
                .Select((x, i) => new { Index = i, Value = x })
                .GroupBy(x => x.Index / chunkSize)
                .Select(x => x.Select(v => v.Value));
        }
    }
}

我所做的研究表明我需要使用批处理请求,并且根据我发现的信息,我无法调整它以适应我想要完成的任务。我的理解是,我会使用批处理请求获取所有邮件 ID,然后进行 3000 次单独调用以获取收件箱中每封电子邮件的实际发件人、主题和日期??

您可以使用分页获取完整列表。

从上一页传递页面令牌以获取对 Users.Messages.List 的下一次调用(不要传递到第一个调用以启动操作)。当结果不包含消息时检测结束。

这样您就可以获取邮箱中的所有邮件。

注意。我建议您将代码设为异步:如果要阅读的消息不止几条,则可能需要相当长的时间才能全部读取。

您也可以使用 PageStreamer 来获取剩余的结果。

var pageStreamer = new PageStreamer<Google.Apis.Gmail.v1.Data.Message, UsersResource.MessagesResource.ListRequest, ListMessagesResponse, string>(
            (request, token) => request.PageToken = token,
            response => response.NextPageToken,
            response => response.Messages);
var req = service.Users.Messages.List("me");
req.MaxResults = 1000;
foreach (var result in pageStreamer.Fetch(req))
    {
            Console.WriteLine(result.Id);
    }

只要有其他结果需要请求,此代码就会继续 运行。 Batching 在这里并不能真正帮助您,因为无法知道下一页标记是什么。