From 49dd8a6dd383688e12a2665a9dbe22c34bbcd37b Mon Sep 17 00:00:00 2001 From: Will Duff Date: Tue, 9 Feb 2016 22:19:20 -0800 Subject: [PATCH] Support loading more than 500 Google Blogger posts The Google Blogger v3 API has a limit to return only 500 items per request. Each Blogger response returns a `NextPageToken` to get the next 500 items, so I added support to loop and download all posts and pages, or stop early at a given maximum. You can see this by going to *File* > *Open recent post* and then selecting a Google Blogger blog. --- .../Clients/GoogleBloggerv3Client.cs | 97 ++++++++++++++++--- 1 file changed, 82 insertions(+), 15 deletions(-) diff --git a/src/managed/OpenLiveWriter.BlogClient/Clients/GoogleBloggerv3Client.cs b/src/managed/OpenLiveWriter.BlogClient/Clients/GoogleBloggerv3Client.cs index 4027ebfc..907e44c1 100644 --- a/src/managed/OpenLiveWriter.BlogClient/Clients/GoogleBloggerv3Client.cs +++ b/src/managed/OpenLiveWriter.BlogClient/Clients/GoogleBloggerv3Client.cs @@ -37,6 +37,11 @@ namespace OpenLiveWriter.BlogClient.Clients public static string BloggerServiceScope = BloggerService.Scope.Blogger; public static char LabelDelimiter = ','; + /// + /// Maximum number of results the Google Blogger v3 API will return in one request. + /// + public static int MaxResultsPerRequest = 500; + public static Task GetOAuth2AuthorizationAsync(string blogId, CancellationToken taskCancellationToken) { // This async task will either find cached credentials in the IDataStore provided, or it will pop open a @@ -337,8 +342,14 @@ namespace OpenLiveWriter.BlogClient.Clients return new BlogPostKeyword[] { }; } - private PostList ListRecentPosts(string blogId, int maxPosts, DateTime? now, PostsResource.ListRequest.StatusEnum status) + private PostList ListRecentPosts(string blogId, int maxPosts, DateTime? now, PostsResource.ListRequest.StatusEnum status, PostList previousPage) { + if (previousPage != null && string.IsNullOrWhiteSpace(previousPage.NextPageToken)) + { + // The previous page was also the last page, so do nothing and return an empty list. + return new PostList(); + } + var recentPostsRequest = GetService().Posts.List(blogId); if (now.HasValue) { @@ -348,21 +359,46 @@ namespace OpenLiveWriter.BlogClient.Clients recentPostsRequest.MaxResults = maxPosts; recentPostsRequest.OrderBy = PostsResource.ListRequest.OrderByEnum.Published; recentPostsRequest.Status = status; + recentPostsRequest.PageToken = previousPage?.NextPageToken; return recentPostsRequest.Execute(); } public BlogPost[] GetRecentPosts(string blogId, int maxPosts, bool includeCategories, DateTime? now) { - var draftRecentPostsList = ListRecentPosts(blogId, maxPosts, now, PostsResource.ListRequest.StatusEnum.Draft); - var liveRecentPostsList = ListRecentPosts(blogId, maxPosts, now, PostsResource.ListRequest.StatusEnum.Live); - var scheduledRecentPostsList = ListRecentPosts(blogId, maxPosts, now, PostsResource.ListRequest.StatusEnum.Scheduled); + // Blogger requires separate API calls to get drafts vs. live vs. scheduled posts. We aggregate each + // type of post separately. + IList draftRecentPosts = new List(); + IList liveRecentPosts = new List(); + IList scheduledRecentPosts = new List(); + IEnumerable allPosts = new List(); - var draftRecentPosts = draftRecentPostsList.Items ?? new List(); - var liveRecentPosts = liveRecentPostsList.Items ?? new List(); - var scheduledRecentPosts = scheduledRecentPostsList.Items ?? new List(); + // We keep around the PostList returned by each request to support pagination. + PostList draftRecentPostsList = null; + PostList liveRecentPostsList = null; + PostList scheduledRecentPostsList = null; - var allPosts = draftRecentPosts.Concat(liveRecentPosts).Concat(scheduledRecentPosts); + // Google has a per-request results limit on their API. + var maxResultsPerRequest = Math.Min(maxPosts, MaxResultsPerRequest); + + // We break out of the following loop depending on which one of these two cases we hit: + // (a) the number of all blog posts ever posted to this blog is greater than maxPosts, so eventually + // allPosts.count() will exceed maxPosts and we can stop making requests. + // (b) the number of all blog posts ever posted to this blog is less than maxPosts, so eventually our + // calls to ListRecentPosts() will return 0 results and we need to stop making requests. + do + { + draftRecentPostsList = ListRecentPosts(blogId, maxResultsPerRequest, now, PostsResource.ListRequest.StatusEnum.Draft, draftRecentPostsList); + liveRecentPostsList = ListRecentPosts(blogId, maxResultsPerRequest, now, PostsResource.ListRequest.StatusEnum.Live, liveRecentPostsList); + scheduledRecentPostsList = ListRecentPosts(blogId, maxResultsPerRequest, now, PostsResource.ListRequest.StatusEnum.Scheduled, scheduledRecentPostsList); + + draftRecentPosts = draftRecentPostsList?.Items ?? new List(); + liveRecentPosts = liveRecentPostsList?.Items ?? new List(); + scheduledRecentPosts = scheduledRecentPostsList?.Items ?? new List(); + allPosts = allPosts.Concat(draftRecentPosts).Concat(liveRecentPosts).Concat(scheduledRecentPosts); + + } while (allPosts.Count() < maxPosts && (draftRecentPosts.Count > 0 || liveRecentPosts.Count > 0 || scheduledRecentPosts.Count > 0)); + return allPosts .OrderByDescending(p => p.Published) .Take(maxPosts) @@ -430,22 +466,53 @@ namespace OpenLiveWriter.BlogClient.Clients return ConvertToBlogPost(getPageRequest.Execute()); } - private PageList ListPages(string blogId, int? maxPages, PagesResource.ListRequest.StatusEnum status) + private PageList ListPages(string blogId, int? maxPages, PagesResource.ListRequest.StatusEnum status, PageList previousPage) { + if (previousPage != null && string.IsNullOrWhiteSpace(previousPage.NextPageToken)) + { + // The previous page was also the last page, so do nothing and return an empty list. + return new PageList(); + } + var getPagesRequest = GetService().Pages.List(blogId); - getPagesRequest.MaxResults = maxPages; + if (maxPages.HasValue) + { + // Google has a per-request results limit on their API. + getPagesRequest.MaxResults = Math.Min(maxPages.Value, MaxResultsPerRequest); + } getPagesRequest.Status = status; return getPagesRequest.Execute(); } private IEnumerable ListAllPages(string blogId, int? maxPages) { - var draftPageList = ListPages(blogId, maxPages, PagesResource.ListRequest.StatusEnum.Draft); - var livePageList = ListPages(blogId, maxPages, PagesResource.ListRequest.StatusEnum.Live); + // Blogger requires separate API calls to get drafts vs. live vs. scheduled posts. We aggregate each + // type of post separately. + IList draftPages = new List(); + IList livePages = new List(); + IEnumerable allPages = new List(); - var draftPages = draftPageList.Items ?? new List(); - var livePages = livePageList.Items ?? new List(); - return draftPages.Concat(livePages); + // We keep around the PageList returned by each request to support pagination. + PageList draftPagesList = null; + PageList livePagesList = null; + + // We break out of the following loop depending on which one of these two cases we hit: + // (a) the number of all blog pages ever posted to this blog is greater than maxPages, so eventually + // allPages.count() will exceed maxPages and we can stop making requests. + // (b) the number of all blog pages ever posted to this blog is less than maxPages, so eventually our + // calls to ListPages() will return 0 results and we need to stop making requests. + do + { + draftPagesList = ListPages(blogId, maxPages, PagesResource.ListRequest.StatusEnum.Draft, draftPagesList); + livePagesList = ListPages(blogId, maxPages, PagesResource.ListRequest.StatusEnum.Live, livePagesList); + + draftPages = draftPagesList?.Items ?? new List(); + livePages = livePagesList?.Items ?? new List(); + allPages = allPages.Concat(draftPages).Concat(livePages); + + } while (allPages.Count() < maxPages && (draftPages.Count > 0 || livePages.Count > 0)); + + return allPages; } public PageInfo[] GetPageList(string blogId)