Update requests to use new amazon pagination tokens properly

This commit is contained in:
Tristan Homsi
2018-10-03 01:17:14 -04:00
parent c0b8248689
commit 5a8ac083dd
4 changed files with 49 additions and 27 deletions

6
dist/bookcision.js vendored

File diff suppressed because one or more lines are too long

2
npm-shrinkwrap.json generated
View File

@@ -1,6 +1,6 @@
{
"name": "bookcision",
"version": "2.2.0",
"version": "2.2.1",
"dependencies": {
"abbrev": {
"version": "1.1.0",

View File

@@ -29,7 +29,10 @@ function BookScraper(options) {
asin: options.asin,
title: null,
authors: null,
highlights: []
highlights: [],
contentLimitState: null,
nextPageStartToken: null,
};
this.queueNextChunkScraping();
@@ -44,13 +47,26 @@ BookScraper.prototype.queueNextChunkScraping = function() {
amazonScraper.getAmazonDeviceType()
);
var url = util.format(
'https://%s/kp/notebook?purpose=NOTEBOOK&amazonDeviceType=%s&appName=notebook&asin=%s&contentLimitState=%s',
this.host,
amazonScraper.getAmazonDeviceType(),
this.options.asin,
amazonScraper.getContentLimitState()
);
var url;
if (this.scrapedData.nextPageStartToken) {
url = util.format(
'https://%s/kp/notebook?asin=%s&contentLimitState=%s&token=%s',
this.host,
this.options.asin,
this.scrapedData.contentLimitState,
this.scrapedData.nextPageStartToken
);
}
else {
url = util.format(
'https://%s/kp/notebook?purpose=NOTEBOOK&amazonDeviceType=%s&appName=notebook&asin=%s&contentLimitState=&',
this.host,
amazonScraper.getAmazonDeviceType(),
this.options.asin
);
}
if (index !== 0) {
url += '&index=' + index;
@@ -63,11 +79,15 @@ BookScraper.prototype.onChunkScraped = function(
asin,
title,
authors,
highlights
highlights,
contentLimitState,
nextPageStartToken
) {
var that = this;
that.scrapedData.title = that.scrapedData.title || title;
that.scrapedData.authors = that.scrapedData.authors || authors;
that.scrapedData.contentLimitState = contentLimitState;
that.scrapedData.nextPageStartToken = nextPageStartToken;
if (highlights && highlights.length > 0) {
log(
@@ -149,6 +169,7 @@ BookScraper.prototype.scrapeChunk = function(index, url) {
function(scraper) {
var highlights = [];
var title, authors;
var contentLimitState, nextPageStartToken;
if (options.failCallback) {
scraper.on('error', function(err) {
@@ -157,7 +178,7 @@ BookScraper.prototype.scrapeChunk = function(index, url) {
}
scraper.on('dom', function() {
that.onChunkScraped(options.asin, title, authors, highlights);
that.onChunkScraped(options.asin, title, authors, highlights, contentLimitState, nextPageStartToken);
});
// <h3 class="a-spacing-top-small a-color-base kp-notebook-selectable kp-notebook-metadata">The Blank Slate: The Modern Denial of Human Nature</h3>
@@ -191,6 +212,19 @@ BookScraper.prototype.scrapeChunk = function(index, url) {
}
);
scraper.select(
'.kp-notebook-content-limit-state',
function(clsInput) {
contentLimitState = clsInput.attribs['value'];
}
);
scraper.select(
'.kp-notebook-annotations-next-page-start',
function(clsInput) {
nextPageStartToken = clsInput.attribs['value'];
}
);
// div#kp-notebook-annotations div
// Location:
// - <input type="hidden" name="" value="6498" id="kp-annotation-location">
@@ -260,6 +294,7 @@ BookScraper.prototype.scrapeChunk = function(index, url) {
}
}
});
}.bind(this)
);
};

View File

@@ -68,22 +68,9 @@ module.exports = (function() {
return deviceType;
};
var extractContentLimitState = function() {
if (TEST) {
return null;
}
return typeof $ !== undefined
? $('.kp-notebook-content-limit-state')
.last()
.val()
: null;
};
return {
findASIN: extractASIN,
findUser: functional.memoize(extractUser),
getAmazonDeviceType: functional.memoize(extractAmazonDeviceType),
getContentLimitState: extractContentLimitState,
};
})();