-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinginious-git.js
585 lines (502 loc) · 21.5 KB
/
inginious-git.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
const simpleGitModule = require('simple-git/promise');
const FileHound = require('filehound');
const partition = require('lodash.partition');
const groupBy = require('lodash.groupby');
const yaml = require('js-yaml');
const {readFileSync, existsSync} = require("fs");
const child_process = require("child_process");
const path = require("path");
const dirname = path.dirname;
const slash = require('slash');
const isAbsoluteUrl = require('is-absolute-url');
const exists = (dir) => {
try {
return existsSync(dir);
} catch (e) {
return false;
}
};
const rst2md = (str) => {
try {
const result = child_process.spawnSync(
"pandoc",
["--from=rst", "--to=html5", "--no-highlight"],
{input: str, encoding: "utf-8"}
);
return result.stdout;
} catch (e) {
// Should ever occur but who knows ?
console.log(e);
return "An error occurs during conversion from RST TO Markdown : please retry or give up";
}
};
// Handle tag crawling for INGINIOUS tasks on one GIT
module.exports = async function (options) {
const {
workingDirectory,
url: gitURL
} = options;
// git clone the given folder in workingDir
const simpleGit = simpleGitModule(workingDirectory);
const folderName = gitURL.substr(gitURL.lastIndexOf("/") + 1).replace(/\.git/, "");
const gitFolder = path.resolve(workingDirectory, "./" + folderName);
try {
// check if given git folder already exist, to prevent stupid re cloning
const folder_exists = exists(gitFolder);
if (!folder_exists) {
// Clone the given git
await simpleGit.clone(gitURL);
}
// Find course.yaml and task.yaml files
const files = await FileHound
.create()
.paths(gitFolder)
.ext("yaml")
// some inginious tasks use config files like "feedback_settings.yaml", exclude them from result)
.glob("*course.yaml", "*task.yaml")
.find();
// separate the course(s) and task(s) data
const [courses_data, exercises_data] = partition(files, (file) => file.endsWith("course.yaml"));
// the hard part here
// As Inginious uses at least 2 versions for their tag system, this script tries to handle both :
// In V1, tags can be directly found in their task.yaml
// In V2, they must be first present in course.yaml , then referenced in task.yaml
// 1. Get a dictionary of course metadata
const courses = groupBy(
courses_data
.map(course_data => {
// parse the given yaml file
const doc = yaml.safeLoad(readFileSync(course_data, 'utf8'));
// if we can find some tags, extract them
return {
"all_tags": extract_tags(doc, "tags"), // all the given tags
"name": doc.name,
"path": course_data, // useful for later
};
}
)
, "path");
// 2. Get an array of tags metadata
// As said before, V1 and V2 have a few differences :
// for a task, V2 only use the tags under the "categories" key of a task.yaml
// for V1
// Copied from https://github.com/UCL-INGI/INGInious/blob/master/inginious/common/tags.py
// as it is referred nowhere in INGINIOUS docs :
/*
The 'type' represents the behaviour of the tag:
- 0: Skill tags. It appear in blue. When the tag is activated it becomes green.
We can perform tasks search on this tag.
- 1: Misconception tags. It does not appear until it is activated. It appear in red when activated.
We can NOT perform tasks search on this tag.
The tags are useful to highlight errors or misconceptions.
- 2: Category tags. Never appear. Only used for organisation and when we perform tasks search.
*/
const exercises = exercises_data.map(exercise_data => {
// parse the given yaml file
const doc = yaml.safeLoad(readFileSync(exercise_data, 'utf8'));
// if we can find the related course metadata , use that to get more info on this exercise
// Warning : as path , we must take the most specialized one ( aka the longest string )
const course_match = Object
.keys(courses)
.filter(s => exercise_data.includes(dirname(s)))
.reduce((a, b) => a.length > b.length ? a : b, '');
const course_data = (course_match.length > 0) ? courses[course_match][0] : {};
// To distinguish categories, I must have a criteria :
// If the tag has the "autoGenerated" flag set to "true", it means the category is coming from the generator
// For security, I use _ around the name to prevent match(s) with others
let auto_tags = auto_generate_tags_for_exercise(gitURL, options, course_data, doc);
// all the given tags by users
// as Inginious also have "garbage" tags (the one with category 1)
let found_tags = found_tags_for_exercise(course_data, doc)
.filter(tag => (tag["category"] !== 1));
let exercise = {
"title": doc.name,
// We must improve description before serve that into a json file
"description": clean_inginious_links(
rst2md(
(doc.hasOwnProperty("context")) ? doc.context : "" // it is optional on Inginious
),
// the optional inginious URL given by properties
(options.hasOwnProperty("inginiousURL"))
? options.inginiousURL
: ""
),
"tags": auto_tags.concat(found_tags) // merge them in a single array
};
// if the base course inginious url was given , try to infer the inginious task link
// Only valid for github that have a single course : for multiple, it is too rare (and much more complex )
if (options.hasOwnProperty("inginiousURL")) {
const folder_exercise = path.relative(gitFolder, dirname(exercise_data));
exercise["url"] = `${options.inginiousURL}/${folder_exercise}`
}
// To allow file uploads, we need some extra metadata
if (course_match.length > 0) {
exercise["archive_properties"] = {
// relative path, in order to be cross platform and handle multiple cases
// So I used / instead of the ugly \\ of Windows for path
"folders": [slash(path.relative(gitFolder, dirname(exercise_data)))],
"files": [slash(path.relative(gitFolder, course_match))]
}
}
// Time to clean the title and maybe add a new tag
return handle_exercise_title(exercise);
});
return {
"exercises": exercises,
// if the given platform has its own categories for tag, use them
"own_categories": {
0: "thématique",
1: "Misconception",
2: "autres"
},
// date of creation
"extraction_date": new Date(),
"url": gitURL
}
} catch (e) {
console.error(e);
return {};
}
};
// function to extract tags in the given yaml file object, under the given rootLey
function extract_tags(doc, rootLey) {
if (doc.hasOwnProperty(rootLey)) {
// only takes their id, text and type attributes
// rest like "visible", "description", etc.. aren't useful for us (yet)
return Object
.entries(doc[rootLey])
.reduce((acc, [key, value]) => {
// to handle both V1 and V2 case as tags can be found in course.yaml and task.yaml
// In LSINF1252, they used unvalidated tags in their task.yaml
const real_key = (value.hasOwnProperty("id")) ? value.id : key;
acc[real_key] = {
id: real_key,
text: value.name,
category: value.type
};
return acc;
}, {});
} else {
return {};
}
}
// Function to add autogenerated Tag for given exercise
function auto_generate_tags_for_exercise(gitURL, options, course_data, exercise_data) {
let tags = [];
// add platform of this exercise : INGINIOUS
tags.push({
text: "INGINIOUS",
autoGenerated: true,
category_id: "_PLATFORM_"
});
// add the source url of this exercise
tags.push({
text: gitURL,
autoGenerated: true,
category_id: "_SOURCE_"
});
// If given, add the license SPDX Identifier
if (options.hasOwnProperty("license") && options.license.length > 0) {
tags.push({
text: options.license,
autoGenerated: true,
category_id: "_LICENSE_"
})
}
// If we can retrieve the course name, we could add course as tag
if (course_data.hasOwnProperty("name") && course_data.name.length > 0) {
tags.push({
text: course_data.name,
autoGenerated: true,
category_id: "_COURSE_"
})
}
// If we can retrieve the author(s), we could add author as tag
if (exercise_data.hasOwnProperty("author")) {
// handle multiple authors
// example(s) :
// Author 1 , Author 2
// Author 1 && Author 2
// Author 1 & Author 2
exercise_data
.author
.split(new RegExp("(?:\,|\&{1,2})", "g"))
.map(s => s.trim()) // remove leading space
.filter(s => s.length > 0) // no empty author
.map(s => s.replace(/\s*\(.*?\)\s*/g, '')) // credits to genius in CS1 for violating regular convention
.forEach((author) => {
tags.push({
text: author,
autoGenerated: true,
category_id: "_AUTHOR_"
})
});
}
// If we can correctly predict the language and kind of exercise, it will be a huge improvement
if (exercise_data.hasOwnProperty("problems")) {
// Two major problem categories can be found : text or code
const text_type = ["multiple_choice", "match"];
const code_type = ["code", "code_single_line"];
// to correctly determine what kind of problem we have inside this exercise
const metadata = Object
.values(exercise_data.problems)
.map(currentProblem => {
// For text, it is easy
if (text_type.includes(currentProblem.type)) {
return {"category": "text", "type": currentProblem.type}
}
// For code, least easy
if (code_type.includes(currentProblem.type)) {
return {
"category": "code",
"type": currentProblem.type,
// language can be present (or not), if not, try to infer that
"language": find_language_of_a_problem(currentProblem)
};
}
// skip this element if not match one of the previous case
return {};
})
.filter(obj => obj !== {});
// Now, it is decision time
const [code_problems, text_problems] = partition(metadata, obj => obj.category === "code");
const at_least_one_code = code_problems.length > 0;
const at_least_one_text = text_problems.length > 0;
// Takes everything (maybe later a more advanced mechanism
[].concat(
...[
(at_least_one_code)
? handle_code_problem(code_problems)
: [],
(at_least_one_text)
? handle_text_problem(text_problems)
: []
]
).forEach((tag) => {
tags.push(tag);
});
}
return tags;
}
function found_tags_for_exercise(course_data, exercise_data) {
// V2 : tags can be found in "categories" key in task.yaml
// It is only an array of ids so we must retrieve the full tag data from course.yaml
let tags = (exercise_data.hasOwnProperty("categories"))
? exercise_data.categories
.map(categoryId => course_data.all_tags[categoryId])
// V1 : tags can be found in "tags" key in task.yaml
:
Object.values(
extract_tags(exercise_data, "tags")
);
// Remove unused properties ( id )
tags.forEach(function (v) {
delete v.id
});
return tags;
}
// To easy find if we can retrieve the language of a tag
const DEFAULT_LANGUAGE = "UNKNOWN";
function find_language_of_a_problem(problem) {
// language can be present (or not), if not, try to infer that
const header_regex = RegExp("\.\. code-block:: (\\w+)");
let language = DEFAULT_LANGUAGE;
// the most simple case : directly found the language
if (problem.hasOwnProperty("language") && problem.language.length > 0) {
language = problem.language;
}
// We could try extract possible language in header
if (language === DEFAULT_LANGUAGE && problem.hasOwnProperty("header")) {
// we found a match
if (header_regex.test(problem.header)) {
language = problem.header.match(header_regex)[1]
}
}
// Least try : hope we can found something in boxes
if (language === DEFAULT_LANGUAGE && problem.hasOwnProperty("boxes")) {
const matches = Object
.values(problem.boxes)
.filter(box_problem => box_problem.hasOwnProperty("language"))
.map(box_problem => box_problem.language);
// Takes the first result : should be probably the good language
if (matches.length > 0) {
language = matches[0];
}
}
// If none of the previous if, BLAME the people who create the task
// I cannot do all your job ^^
return language;
}
// to handle problem types
// for text
function handle_text_problem(text_problems) {
// More text that code : probably QCM
const found_types = text_problems.map(s => s.type);
const exercise_type = found_types.includes("multiple_choice") ? "multiple_choice" : "match";
return [{
autoGenerated: true,
category_id: "_EXERCISE-TYPE_",
text: exercise_type
}];
}
// for code
function handle_code_problem(code_problems) {
// More code that text : probably code
// Add at least the kind of exercise
let tags = [{
autoGenerated: true,
category_id: "_EXERCISE-TYPE_",
text: "code"
}];
const languages = code_problems
.map(s => s.language)
.filter(s => s !== DEFAULT_LANGUAGE)
.map(l => l.toLowerCase());
// Add the found language(s) in exercise
[...new Set(languages)].forEach((language) => {
tags.push({
autoGenerated: true,
category_id: "_PROGRAMMING-LANGUAGE_",
text: language
});
});
// Add the default language if I found nothing
if (languages.length === 0) {
tags.push({
autoGenerated: true,
category_id: "_PROGRAMMING-LANGUAGE_",
text: DEFAULT_LANGUAGE
});
}
return tags;
}
// To purge exercise title from most ugly things I saw in Inginious + maybe add some unseen tags
function handle_exercise_title(exercise) {
// to find match, to prevent duplicate tag entries
const existOne = (tag) => exercise
.tags
.some(tag2 => {
const same_kind = (
([tag, tag2].every((match) => match.hasOwnProperty("autoGenerated")))
||
([tag, tag2].every((match) => !match.hasOwnProperty("autoGenerated")))
);
const is_autogenerated = tag.hasOwnProperty("autoGenerated");
const category_field = (is_autogenerated) ? "category_id" : "category";
return same_kind && (tag[category_field] === tag2[category_field]) && (
tag["text"].toLowerCase() === tag2["text"].toLowerCase()
);
});
// If someone wants to add/update/improve them ; good luck ;)
const common_pattern = [
// to match exam - like exercise
/\[((?:Septembre|Juin)\s[0-9]{4,}).*\](.+)$/im,
/EXAM.+\s-(.+)$/im,
// to match midterm - like exercise ( from my analysis, only see that two times but who knows ?)
/mid\-?term(.+)$/im,
// to match mission - like exercise
/(?:Bilan\sM|Mission)\s?([0-9])+[\s|\-|:]*(.+)$/im,
// to match some categorized - like exercise ( often by week : S2)
/\[(.+)\]\s(.+)$/im,
// to clean a garbage title commonly seen
/PART\s[0-9]+\s-(.+)$/im
];
const title = exercise.title;
const is_a_exam = [common_pattern[0], common_pattern[1]].some((pattern) => pattern.test(title));
const [is_a_midterm, is_a_mission, is_categorized_exercise, is_garbage_title] = common_pattern
.slice(2)
.map((pattern) => pattern.test(title));
// array to store new tags proposal, if they don't already exist
const tag_proposals = [];
let matches; // to handle regex match
// more elegant way that if cascades
switch (true) {
case is_a_exam:
// in any case, we should add a new tag to tell exercise kind, if not already the case
tag_proposals.push({
"autoGenerated": true,
"category_id": "_EXERCISE-TYPE_",
"text": "exam"
});
const pattern_index = (common_pattern[0].test(title)) ? 0 : 1;
matches = title.match(common_pattern[pattern_index]);
if (pattern_index === 0) {
tag_proposals.push({
"category": 2,
"text": matches[1].trim()
});
exercise.title = matches[2].trim();
} else {
exercise.title = matches[1].trim();
}
break;
case is_a_midterm:
matches = title.match(common_pattern[2]);
exercise.title = matches[1].trim();
// in any case, we should add a new tag to tell exercise kind, if not already the case
tag_proposals.push({
"autoGenerated": true,
"category_id": "_EXERCISE-TYPE_",
"text": "midterm"
});
break;
case is_a_mission || is_categorized_exercise:
let pattern = common_pattern[(is_a_mission) ? 3 : 4];
matches = title.match(pattern);
exercise.title = matches[2].trim();
const tag_text = (is_a_mission)
? "Mission " + matches[1].trim()
: matches[1].trim();
tag_proposals.push({
"category": 2,
"text": tag_text
});
break;
case is_garbage_title:
matches = title.match(common_pattern[5]);
exercise.title = matches[1].trim();
break;
default:
// do nothing, I cannot clean all the mess titles in Inginious by myself ^^
break;
}
// insert the new tags if they don't already exist
exercise["tags"] = exercise["tags"].concat(
tag_proposals.filter(tag => !existOne(tag))
);
return exercise;
}
// regex useful to clean inginious links
const img_html_regex = /<img[^>]+src="([^">]+)"/gm; // as pandoc links always start by src, it should work as expected
const links_regex = /<a href="([^">]+)"/gm; // only takes link part : [](thisOne) - result : thisOne
// To handle links inside markdown since Inginious links are a real mess
function clean_inginious_links(description, inginious_link) {
// some reading for people that never read documentation ^^
// https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/String/replace
// maybe useful for some scenario
const getRootUrl = inginious_link.replace(/^(.*\/\/[^\/?#]*).*$/, "$1");
// To solve nasty links found
const url_solver = (givenUrl) => {
// conner case for links of LSINF1252 - some links are literally broken like
// like "%60printf%20%3Chttps://sites.uclouvain.be/SystInfo/manpages/man3/sprintf.3.html"
if (givenUrl.startsWith("%")) {
givenUrl = givenUrl.replace(/\%.*http/, "http");
}
// if already an absolute link, no work to do or
// if no inginiousURL in settings, cannot infer the real link
if (isAbsoluteUrl(givenUrl) || inginious_link.length === 0) {
return givenUrl;
}
// Two kinds of ugly relative links exists on inginious
// Type 1 : like ( "PART3Bst/bst.png" ) : relative to a course url
// Type 2 : like ( "/course/LEPL1402/BoundedBuffer/BoundedBuffer.png" ) : relative to root path
// If Type 1, we can append the given link with this part (it is relative to the course)
return (!givenUrl.startsWith("/"))
? inginious_link + "/" + givenUrl
: getRootUrl + givenUrl;
};
// purge description of nasty broken inginious links
return description
.replace(links_regex, (_match, p1, p2) => `<a href="${url_solver(p1)}"`)
.replace(img_html_regex, (_match, p1) => `<img src="${url_solver(p1)}"`);
}