Skip to content

Commit

Permalink
Seen URLs are now ignored until they’ve been really reached
Browse files Browse the repository at this point in the history
  • Loading branch information
Tobias Hinz committed Apr 28, 2017
1 parent e398aa0 commit b5f5007
Showing 1 changed file with 10 additions and 16 deletions.
26 changes: 10 additions & 16 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -93,16 +93,16 @@ RemoteSitemapGenerator.prototype.createSitemap = function () {
});

this.crawler.on('fetch404', (item) => {
// mark url to be ignored later
this.ignoreUrl(item.url);

console.log('Not found: ' + item.url);
});

this.crawler.on('fetcherror', (item) => {
// mark url to be ignored later
this.ignoreUrl(item.url);
this.crawler.on('fetchcomplete', (item) => {
if (item.stateData.code === 200) {
this.unIgnoreUrl(item.url);
}
});

this.crawler.on('fetcherror', (item) => {
console.log('Fetch error: ' + item.url);
});

Expand All @@ -112,11 +112,6 @@ RemoteSitemapGenerator.prototype.createSitemap = function () {
process.exit(1);
}

// remove blacklisted urls by setting them as ignored ones
_.forEach(this.options.blacklist, (blacklistedUrl) => {
this.ignoreUrl(blacklistedUrl);
});

this.write((err, path) => {
if (err) {
console.error(err);
Expand Down Expand Up @@ -203,15 +198,14 @@ RemoteSitemapGenerator.prototype.write = function (callback) {
};

/**
* Set an URL to be ignored
* Set an URL to be unignored
* @param {String} url the URL
*/
RemoteSitemapGenerator.prototype.ignoreUrl = function (url) {
// check if url is already known
RemoteSitemapGenerator.prototype.unIgnoreUrl = function (url) {
// get location of item
var index = _.findIndex(this.seenUrls, 'url', url);
if (index >= 0) {
this.seenUrls[index].ignore = true;
this.seenUrls[index].ignore = false;
}
};

Expand All @@ -238,7 +232,7 @@ RemoteSitemapGenerator.prototype.addSeenUrl = function (url) {
}

if (allowed) {
this.seenUrls.push({url: url, counter: 1, ignore: false});
this.seenUrls.push({url: url, counter: 1, ignore: true});
console.log('Found: ' + url);
} else {
console.log('Ignored: ' + url);
Expand Down

0 comments on commit b5f5007

Please sign in to comment.