From 59355609ecb3c2e396a289b28f34d5116fc89b8e Mon Sep 17 00:00:00 2001 From: Eric Bidelman Date: Tue, 18 Dec 2018 12:38:20 -0800 Subject: [PATCH] Use CDP Page.setDownloadBehavior and headless for demo --- verify_download.js | 129 ++++++++++++---------------------- verify_download2.js | 168 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 214 insertions(+), 83 deletions(-) create mode 100644 verify_download2.js diff --git a/verify_download.js b/verify_download.js index a27880f..6f3b2c7 100644 --- a/verify_download.js +++ b/verify_download.js @@ -28,12 +28,12 @@ * node verify_download.js */ -const puppeteer = require('puppeteer'); const fs = require('fs'); -const path = require('path'); const os = require('os'); +const path = require('path'); +const puppeteer = require('puppeteer'); -const DOWNLOADS_FOLDER = `${os.homedir()}/Downloads`; +const DOWNLOAD_PATH = path.resolve(__dirname, 'downloads'); /** * From @xprudhomme. @@ -43,9 +43,9 @@ const DOWNLOADS_FOLDER = `${os.homedir()}/Downloads`; * @param {string} filePath * @param {integer} timeout * @returns {!Promise} Resolves when file has been created. Rejects - * if timout is reached. + * if timeout is reached. */ -function checkFileExists(filePath, timeout=15000) { +function waitForFileExists(filePath, timeout=15000) { return new Promise((resolve, reject) => { const dir = path.dirname(filePath); const basename = path.basename(filePath); @@ -73,96 +73,59 @@ function checkFileExists(filePath, timeout=15000) { }); } -/** - * @param {!Browser} browser - * @param {string} url The URL of the download file to wait for. - * @returns {!Promise} Metadata about the latest file in Download Manager. - */ -async function waitForFileToDownload(browser, url) { - const downloadPage = await browser.newPage(); - // Note: navigating to this page only works in headful chrome. - await downloadPage.goto('chrome://downloads/'); - - // Wait for our download to show up in the list by matching on its url. - const jsHandle = await downloadPage.waitForFunction(downloadUrl => { - const manager = document.querySelector('downloads-manager'); - const downloads = manager.items_.length; - const lastDownload = manager.items_[0]; - if (downloads && lastDownload.url === downloadUrl && - lastDownload.state === 'COMPLETE') { - return manager.items_[0]; - } - }, {polling: 100}, url); - - const fileMeta = await jsHandle.jsonValue(); - - await downloadPage.close(); - - return fileMeta; -} - -/** - * @param {!Browser} browser - * @param {string} url The url of the page to navigate to. - * @param {string} text The link with this text to find and click on the page. - * @returns {!Promise} The download resource's url. - */ -async function clickDownloadLink(browser, url, text) { - const page = await browser.newPage(); - await page.goto(url, {waitUntil: 'networkidle2'}); - - const downloadUrl = await page.evaluate((text) => { - const link = document.evaluate(`//a[text()="${text}"]`, document, - null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; - if (link) { - link.click(); - return link.href; - } - return null; - }, text); - - await page.close(); - - return downloadUrl; -} - (async() => { -const browser = await puppeteer.launch({ - headless: false, - // dumpio: true, -}); +const browser = await puppeteer.launch(); -// TODO: setDownloadBehavior would be a good approach, as we could check -// that the file shows up in the location specified by downloadPath. Howeverm -// that arg doesn't currently work. -// const client = await page.target().createCDPSession(); -// await client.send('Page.setDownloadBehavior', { -// behavior: 'allow', -// downloadPath: path.resolve(__dirname, 'downloads'), -// }); +const page = await browser.newPage(); -// await client.detach(); +// Change from the default ~/Downloads folder to our own. +const client = await page.target().createCDPSession(); +await client.send('Page.setDownloadBehavior', { + behavior: 'allow', + downloadPath: DOWNLOAD_PATH, +}); -// 1. navigate to a page with a bunch links to download. -// 2. click the "Short Selling (csv)" link on the page. The browser force downloads the file. const url = 'https://www.nseindia.com/products/content/equities/equities/homepage_eq.htm'; -const downloadUrl = await clickDownloadLink(browser, url, 'Short Selling (csv)'); +await page.goto(url); +// Wait for main content area to have list of links. +await page.waitForSelector('.main_content', {visible: true, timeout: 5000}); + +const downloadUrl = await page.evaluate(() => { + const link = document.evaluate(`//a[text()="Short Selling (csv)"]`, document, + null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; + if (link) { + // Prevent link from opening up in a new tab. Puppeteer won't respect + // the Page.setDownloadBehavior on the new tab and the file ends up in the + // default download folder. + link.target = ''; + link.click(); + return link.href; + } + return null; +}); if (!downloadUrl) { - console.error('Did not find download link!'); + console.warn('Did not find link to download!'); + await browser.close(); return; } -// 3. Open chrome:downloads and wait for the file to be downloaded. -const fileMeta = await waitForFileToDownload(browser, downloadUrl); -console.log(`"${fileMeta.file_name}" was downloaded`); +// Wait for file response to complete. +await new Promise(resolve => { + page.on('response', async resp => { + if (resp.url() === downloadUrl) { + resolve(); + } + }); +}); + +console.log('Downloaded.'); -// 4. Optionally check that the file really ends up in the expected location -// on the filesystem. -const exists = await checkFileExists(`${DOWNLOADS_FOLDER}/${fileMeta.file_name}`); -console.assert(exists, `${fileMeta.file_name} was not downloaded to correct location.`); +// Verify it's on the file system. +await waitForFileExists(`${DOWNLOAD_PATH}/ShortSelling.csv`); +console.log('Exists!'); await browser.close(); -})(); \ No newline at end of file +})(); diff --git a/verify_download2.js b/verify_download2.js new file mode 100644 index 0000000..f00879d --- /dev/null +++ b/verify_download2.js @@ -0,0 +1,168 @@ +/** + * Copyright 2018 Google Inc. All rights reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * @author ebidel@ (Eric Bidelman) + */ + +/** + * Note: this approach only works in headful Chrome. + * Another approach to verifying a file gets downloaded. Shows how to click a + * file download link and verify that the file gets downloaded in the + * chrome:downloads page. + * + * Install: + * npm i puppeteer + * Run: + * node verify_download2.js + */ + +const puppeteer = require('puppeteer'); +const fs = require('fs'); +const path = require('path'); +const os = require('os'); + +const DOWNLOADS_FOLDER = `${os.homedir()}/Downloads`; + +/** + * From @xprudhomme. + * Check if file exists, watching containing directory meanwhile. + * Resolve if the file exists, or if the file is created before the timeout + * occurs. + * @param {string} filePath + * @param {integer} timeout + * @returns {!Promise} Resolves when file has been created. Rejects + * if timout is reached. + */ +function checkFileExists(filePath, timeout=15000) { + return new Promise((resolve, reject) => { + const dir = path.dirname(filePath); + const basename = path.basename(filePath); + + const watcher = fs.watch(dir, (eventType, filename) => { + if (eventType === 'rename' && filename === basename) { + clearTimeout(timer); + watcher.close(); + resolve(); + } + }); + + const timer = setTimeout(() => { + watcher.close(); + reject(new Error(' [checkFileExists] File does not exist, and was not created during the timeout delay.')); + }, timeout); + + fs.access(filePath, fs.constants.R_OK, err => { + if (!err) { + clearTimeout(timer); + watcher.close(); + resolve(); + } + }); + }); +} + +/** + * @param {!Browser} browser + * @param {string} url The URL of the download file to wait for. + * @returns {!Promise} Metadata about the latest file in Download Manager. + */ +async function waitForFileToDownload(browser, url) { + const downloadPage = await browser.newPage(); + // Note: navigating to this page only works in headful chrome. + await downloadPage.goto('chrome://downloads/'); + + // Wait for our download to show up in the list by matching on its url. + const jsHandle = await downloadPage.waitForFunction(downloadUrl => { + const manager = document.querySelector('downloads-manager'); + const downloads = manager.items_.length; + const lastDownload = manager.items_[0]; + if (downloads && lastDownload.url === downloadUrl && + lastDownload.state === 'COMPLETE') { + return manager.items_[0]; + } + }, {polling: 100}, url); + + const fileMeta = await jsHandle.jsonValue(); + + await downloadPage.close(); + + return fileMeta; +} + +/** + * @param {!Browser} browser + * @param {string} url The url of the page to navigate to. + * @param {string} text The link with this text to find and click on the page. + * @returns {!Promise} The download resource's url. + */ +async function clickDownloadLink(browser, url, text) { + const page = await browser.newPage(); + await page.goto(url, {waitUntil: 'networkidle2'}); + + const downloadUrl = await page.evaluate((text) => { + const link = document.evaluate(`//a[text()="${text}"]`, document, + null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; + if (link) { + link.click(); + return link.href; + } + return null; + }, text); + + await page.close(); + + return downloadUrl; +} + +(async() => { + +const browser = await puppeteer.launch({ + headless: false, + // dumpio: true, +}); + +// TODO: setDownloadBehavior would be a good approach, as we could check +// that the file shows up in the location specified by downloadPath. However, +// that arg doesn't currently work. +// const client = await page.target().createCDPSession(); +// await client.send('Page.setDownloadBehavior', { +// behavior: 'allow', +// downloadPath: path.resolve(__dirname, 'downloads'), +// }); + +// await client.detach(); + +// 1. navigate to a page with a bunch links to download. +// 2. click the "Short Selling (csv)" link on the page. The browser force downloads the file. +const url = 'https://www.nseindia.com/products/content/equities/equities/homepage_eq.htm'; +const downloadUrl = await clickDownloadLink(browser, url, 'Short Selling (csv)'); + +if (!downloadUrl) { + console.error('Did not find download link!'); + return; +} + +// 3. Open chrome:downloads and wait for the file to be downloaded. +const fileMeta = await waitForFileToDownload(browser, downloadUrl); +console.log(`"${fileMeta.file_name}" was downloaded`); + +// 4. Optionally check that the file really ends up in the expected location +// on the filesystem. +const exists = await checkFileExists(`${DOWNLOADS_FOLDER}/${fileMeta.file_name}`); +console.assert(exists, `${fileMeta.file_name} was not downloaded to correct location.`); + +await browser.close(); + +})(); \ No newline at end of file