-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathindex.js
129 lines (111 loc) · 4.88 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
var SpotifyWebApi = require("spotify-web-api-node");
var _ = require("lodash");
var bb = require("bluebird");
var fs = require("fs");
// pull the clientSecret from an gitignored file
var clientSecret = fs.readFileSync("clientSecret.txt", 'utf8');
var spotifyApi = new SpotifyWebApi({
clientId : 'f58828e1e3e044989aef82999ced5027',
clientSecret : clientSecret
});
var searchIndex = 0;
var trackJSON = [];
var unPROCESSED = [];
var genres = ["Metal", "pop", "folk", "country",
"rock", "hip hop", "reggae", "jazz",
"edm", "classical", "blues",
"indie", "r&b", "alterative rock",
"rap"]; // array of genres to loop through // TODO add genres
preformSearch(searchIndex);
// this function preforms a batch of searches and recursively calls its self until the desired number of records is reached\
// Prams:
// index: this is the page index that searches should start at
function preformSearch(index){
let oldIndex = index;
spotifyApi.clientCredentialsGrant().then(function(data){
// Retrieve access token from SpotifyWebApi endpoint
spotifyApi.setAccessToken(data.body['access_token']); // save access token to api object
var searches = [];
// Query spotify servers for songs by genre
spotifyApi.searchTracks("genre:rock", {limit : 1, offset:0}).then(function(data){
fs.writeFile("./JSON/unprocessedTrackExample.json", JSON.stringify(data), function(err){
if (err) {return console.log("an error occurred while writing JSON file:", err)}
console.log("successfully wrote JSON array of " + data.length + " length.");
});
}, function(err){
error = true;
console.log("an error occurred while querying", err);
})
while(index < (oldIndex + 1)){
genres.forEach(genre => {
searches.push(spotifyApi.searchTracks("genre:" + genre, {limit : 50, offset:(index*50)}).then(function(data){
data.body.tracks.items.forEach(element => {
//unPROCESSED.push(JSON.stringify(element));
trackJSON.push({
name: nameArrayifyer(element.name)
});
});
}, function(err){
error = true;
console.log("an error occurred while querying", err);
}));
});
index++;
}
bb.all(searches).done(function(){
console.log("Length with duplicates: " + trackJSON.length);
if (trackJSON.length > 5000){
trackJSON = _.uniqWith(trackJSON, _.isEqual); // remove duplicate search values
fs.writeFile("./JSON/tracks.json", JSON.stringify(trackJSON), function(err){
if (err) {return console.log("an error occurred while writing JSON file:", err)}
console.log("successfully wrote JSON array of " + trackJSON.length + " length.");
});
} else {
return delay(1000).then(function(){
preformSearch(index);
});
}
});
}, function(err){
console.log('Something went wrong when retrieving an access token', err);
});
}
function delay(t) {
return new Promise(function(resolve) {
setTimeout(resolve, t)
});
}
// categorizes data popular data for better frequent pattern matching
// prams:
// popularity: popularity of a track
// returns: the category a popularity falls into
function popCat(popularity){
if (popularity >= 90){
return "most popular (90 - 100)";
} else if (popularity >= 80){
return "very popular (80 - 89)"
} else if (popularity >= 70){
return "fairly popular (70 - 79)"
} else if (popularity >= 60){
return "somewhat popular (60 - 69)"
} else if (popularity >= 50){
return "popular (50 - 59)"
} else if (popularity >= 40){
return "not very popular (40 - 49)"
} else if (popularity >= 30){
return "not popular (30 - 39)"
} else {
return "unpopular (>30)"
}
}
// removes punctuation and splits the name strings into arrays
// prams:
// name: name of the track to split and remove punctuation from
// returns: an array containing the words in the song title
function nameArrayifyer(name){
name = name.toLowerCase();
name = name.replace(" -", ""); // remove extraneous hyphens e.g. "data-mining" will keep the hyphen but not "rocky - radio edit"
name = name.replace(/[\u2000-\u206F\u2E00-\u2E7F\\'!"#$%&()*+,.\/:;<=>?@\[\]^_`{|}~]/g,""); // replace all punctuation besides -
name = name.split(/[ ,]+/);
return _.uniqWith(name, _.isEqual()); // splits the string into an array on space or comma and removes duplicates
}