diff --git a/README.md b/README.md index 796db82d..866e9aaf 100644 --- a/README.md +++ b/README.md @@ -89,7 +89,7 @@ function launch() { onSuccess: (result => { console.log('onSuccess', result); }), - ensureClearCache: false, // Set false so that cache won't be cleared when closing the crawler + persistCache: true, // Set true so that cache won't be cleared when closing the crawler    cache,  }); } @@ -154,7 +154,7 @@ HCCrawler provides method to launch or connect to a HeadlessChrome/Chromium. * `maxConcurrency` <[number]> Maximum number of pages to open concurrently, defaults to `10`. * `maxRequest` <[number]> Maximum number of requests, defaults to `0`. Pass `0` to disable the limit. * `cache` <[Cache]> A cache object which extends [BaseCache](#class-basecache) to remember and skip duplicate requests, defaults to [SessionCache](#class-sessioncache). Pass `null` if you don't want to skip duplicate requests. - * `ensureClearCache` <[boolean]> Whether to clear cache on closing or disconnecting from the browser, defaults to `true`. + * `persistCache` <[boolean]> Whether to persist cache on closing or disconnecting from the browser, defaults to `false`. * returns: > Promise which resolves to HCCrawler instance. This method connects to an existing Chromium instance. The following options are passed straight to [puppeteer.connect([options])](https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md#puppeteerconnectoptions). @@ -177,7 +177,7 @@ url, timeout, priority, delay, retryCount, retryDelay, jQuery, device, username, * `maxConcurrency` <[number]> Maximum number of pages to open concurrently, defaults to `10`. * `maxRequest` <[number]> Maximum number of requests, defaults to `0`. Pass `0` to disable the limit. * `cache` <[Cache]> A cache object which extends [BaseCache](#class-basecache) to remember and skip duplicate requests, defaults to [SessionCache](#class-sessioncache). Pass `null` if you don't want to skip duplicate requests. - * `ensureClearCache` <[boolean]> Whether to clear cache on closing or disconnecting from the browser, defaults to `true`. + * `persistCache` <[boolean]> Whether to clear cache on closing or disconnecting from the browser, defaults to `false`. * returns: > Promise which resolves to HCCrawler instance. The method launches a HeadlessChrome/Chromium instance. The following options are passed straight to [puppeteer.launch([options])](https://github.com/GoogleChrome/puppeteer/blob/master/docs/api.md#puppeteerlaunchoptions). @@ -309,7 +309,7 @@ HCCrawler.launch({ cache: null }); ### class: RedisCache -Passing a `RedisCache` object to the [HCCrawler.connect([options])](#hccrawlerconnectoptions)'s `cache` options allows you to persist requested urls in Redis and prevents from requesting same urls in a distributed servers' environment. It also works well with its `ensureClearCache` option to be false. +Passing a `RedisCache` object to the [HCCrawler.connect([options])](#hccrawlerconnectoptions)'s `cache` options allows you to persist requested urls in Redis and prevents from requesting same urls in a distributed servers' environment. It also works well with its `persistCache` option to be true. Its constructing options are passed to [NodeRedis's redis.createClient([options])](https://github.com/NodeRedis/node_redis#rediscreateclient)'s options. @@ -320,7 +320,7 @@ const RedisCache = require('headless-chrome-crawler/cache/redis'); const cache = new SessionRedis({ host: '127.0.0.1', port: 6379 }); HCCrawler.launch({ - ensureClearCache: false, // Set false so that cache won't be cleared when closing the crawler + persistCache: true, // Set true so that cache won't be cleared when closing the crawler cache, }); // ... @@ -328,7 +328,7 @@ HCCrawler.launch({ ### class: BaseCache -You can create your own cache by extending the [BaseCache's interfaces](https://github.com/yujiosaka/headless-chrome-crawler/blob/master/cache/base.js) and pass its object to the [HCCrawler.connect([options])](#hccrawlerconnectoptions)'s `cache` options. +You can create your own cache by extending the [BaseCache's interfaces](https://github.com/yujiosaka/headless-chrome-crawler/blob/master/cache/base.js) and pass its object to the [HCCrawler.connect([options])](#hccrawlerconnectoptions)'s `cache` options. Here is an example of creating a file based cache. diff --git a/examples/redis-cache.js b/examples/redis-cache.js index dd28909a..54143ef8 100644 --- a/examples/redis-cache.js +++ b/examples/redis-cache.js @@ -12,7 +12,7 @@ function launch() { onSuccess: (result => { console.log('onSuccess', result); }), - ensureClearCache: false, // Set false so that cache won't be cleared when closing the crawler + persistCache: true, // Set true so that cache won't be cleared when closing the crawler cache: new RedisCache(), // Passing no options expects Redis to be run in the local machine. }); } diff --git a/lib/hccrawler.js b/lib/hccrawler.js index 91c4d11c..4e27dfff 100644 --- a/lib/hccrawler.js +++ b/lib/hccrawler.js @@ -30,7 +30,7 @@ const HCCRAWLER_OPTIONS = [ 'maxConcurrency', 'maxRequest', 'cache', - 'clearCacheOnEnd', + 'persistCache', ]; const deviceNames = Object.keys(devices); @@ -84,7 +84,7 @@ class HCCrawler { retryDelay: 10000, jQuery: true, cache: new SessionCache(), - ensureClearCache: true, + persistCache: true, }, options); this._pQueue = new PQueue({ concurrency: this._options.maxConcurrency, @@ -342,7 +342,7 @@ class HCCrawler { * @private */ _clearCacheOnEnd() { - if (this._options.ensureClearCache) return this._clearCache(); + if (this._options.persistCache) return this._clearCache(); return Promise.resolve(); }