Skip to content

Commit

Permalink
sketching reports.ts
Browse files Browse the repository at this point in the history
  • Loading branch information
R. S. Doiel committed Oct 30, 2024
1 parent c6fa389 commit 702a881
Show file tree
Hide file tree
Showing 15 changed files with 208 additions and 53 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ logs/*
groups_rpt.sql
*_final.csv
in_feeds.csv
htdocs/rpt/*
4 changes: 2 additions & 2 deletions cold_admin.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%cold(1) user manual | 0.0.9 2024-10-24 ea5aeff
%cold(1) user manual | 0.0.9 2024-10-29 c6fa389
% R. S.Doiel
% 2024-10-24 ea5aeff
% 2024-10-29 c6fa389

# NAME

Expand Down
6 changes: 3 additions & 3 deletions deno.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,11 @@
"dev": "deno run --allow-import --allow-read --allow-net --watch cold_admin.ts --htdocs=./htdocs",
"reload_dev": "deno run --allow-import --allow-read --allow-net --reload --write-lock --watch cold_admin.ts --htdocs=./htdocs",
"start": "deno run --allow-read --allow-net cold_admin.ts",
"build": "deno compile --allow-import --allow-read --allow-net --output bin/cold_admin cold_admin.ts ; deno compile --allow-import --allow-read --allow-net --output bin/ds_importer ds_importer.ts ; deno compile --allow-import --allow-read --allow-net --output bin/directory_sync directory_sync.ts ; deno compile --allow-import --allow-read --allow-net --output bin/set_include_in_feeds set_include_in_feeds.ts ; deno compile --allow-import --allow-read --allow-net --output bin/reports reports.ts",
"build": "deno compile --allow-import --allow-read --allow-net --output bin/cold_admin cold_admin.ts ; deno compile --allow-import --allow-read --allow-net --output bin/ds_importer ds_importer.ts ; deno compile --allow-import --allow-read --allow-net --output bin/directory_sync directory_sync.ts ; deno compile --allow-import --allow-read --allow-net --output bin/set_include_in_feeds set_include_in_feeds.ts ; deno compile --allow-all --output bin/reports reports.ts",
"htdocs": "deno run --allow-import --allow-read --allow-write=htdocs build.ts",
"test": "dsquery test.ds 'delete from test' ; deno test --allow-net dataset_test.ts ; deno test options_test.ts ; deno test config_test.ts ; deno test people_test.ts ; deno test groups_test.ts ; deno test cold_admin_test.ts ",
"import_people_csv": "deno run --allow-read --allow-net ds_importer.ts people.ds people.csv",
"import_groups_csv": "deno run --allow-read --allow-net ds_importer.ts groups.ds groups.csv",
"import_people_csv": "deno run --allow-import --allow-read --allow-net ds_importer.ts people.ds people.csv",
"import_groups_csv": "deno run --allow-import --allow-read --allow-net ds_importer.ts groups.ds groups.csv",
"set_include_in_feeds": "deno run --allow-read --allow-net set_include_in_feeds.ts people.ds people_in_feeds.csv",
"import_issn_journal_publisher_csv": "deno run --allow-read --allow-net ds_importer.ts issn.ds issn_journal_publisher.csv",
"esm:add": "deno run -A https://esm.sh/v135 add",
Expand Down
4 changes: 2 additions & 2 deletions directory_sync.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%directory_sync(1) user manual | 0.0.9 2024-10-24 ea5aeff
%directory_sync(1) user manual | 0.0.9 2024-10-29 c6fa389
% R. S.Doiel
% 2024-10-24 ea5aeff
% 2024-10-29 c6fa389

# NAME

Expand Down
26 changes: 26 additions & 0 deletions groups_rpt.sql
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
select json_object(
'key', src->'clgid',
'name', src->'name',
'alternative', replace(replace(replace(replace(json_quote(src->'alternative'), '","', '; '), '["', ''), '"]', ''), '[]', ''),
'email', src->'email',
'date', src->'date',
'description', src->'description',
'start', src->'start_date',
'approx_start', src->'is_approx_start',
'activity', src->'activity',
'end', src->'end_date',
'appox_end', src->'is_approx_end',
'website', src->'website',
'pi', src->'pi',
'parent', src->'parent',
'prefix', src->'prefix',
'grid', src->'grid',
'isni', src->'isni',
'ringold', src->'ringold',
'viaf', src->'viaf',
'ror', src->'ror',
'updated', src->'updated',
'Scope', src->'Scope'
)
from groups
order by src->'name'
3 changes: 3 additions & 0 deletions htdocs/rpt/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@

This is a temporary place for report output during development.

8 changes: 3 additions & 5 deletions report_design_choices.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,9 @@ If the request is valid then several things need to happen.
- if the report executes successfully the request record should be updated wiht the status of "available" and the link
- if there are email(s) associated with the request a message needs to be sent out with the report name, final status and link if available or error message if not

Reports are programs or scripts (e.g. Bash, Python) that write their results to standard output, the runner reads that and takes care of saving the results and contacting the email addresses.

I've prototyped the report runner in TypeScript but am not happy with it. I am deciding if I should write this in Go or give Python a try.

I'm thinking reports could be simple script written in any langauge that write their output to standard out. The runner should be responsible for taking that output and writing it to an appropriate place (e.g. Google Drive).

Expand Down Expand Up @@ -66,9 +68,5 @@ The problem is the NAS might be mounted in different ways on each person's compu

Example "file://datawork.library.caltech.edu/Sites/feeds_v1.6/htdocs/people/people.csv" while eventually it'll get to "https://feeds.library.caltech.edu/people/people.csv" that isn't helpful when an end user wants a current, clickable link to see the report right at the point of creation.

I will need to decide where to write the intial report data before I proceed.

The report runner server should run as a systemd service.

I've prototyped the report runner in TypeScript but am not happy with it.

I've written a prototype in TypeScript compiled with Deno. Creating webservices that take advantage of concurrency is more convoluted in TypeScript (e.g. need to use service workers) than Go. The report runner server should run as a systemd service. It is easy to implement a sequencial report runner in TypeScript but since reports can sometimes take hours to complete this isn't ideal. Taking advantage of concurrency in TypeScript means using service works. Given that case it makes more sense in writing the report runner in Go and taking advantage of Go's maturity in concurrency and as a service platform.
4 changes: 2 additions & 2 deletions reports.1.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
%directory_sync(1) user manual | 0.0.9 2024-10-24 ea5aeff
%directory_sync(1) user manual | 0.0.9 2024-10-29 c6fa389
% R. S.Doiel
% 2024-10-24 ea5aeff
% 2024-10-29 c6fa389

# NAME

Expand Down
103 changes: 81 additions & 22 deletions reports.ts
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,6 @@ export interface ReportInterface {
id: string;
report_name: string;
options: string[];
content_type: string;
emails: string;
requested: string;
updated: string;
Expand All @@ -126,7 +125,6 @@ export class Report implements ReportInterface {
id: string = "";
report_name: string = "";
options: string[] = [];
content_type: string = "";
emails: string = "";
requested: string = "";
updated: string = "";
Expand All @@ -145,7 +143,6 @@ export class Report implements ReportInterface {
const content_type = parts.length > 1 ? parts[1].trim() : "text/plain";

this.report_name = report_name;
this.content_type = content_type;
this.options = "options" in o ? o.options as unknown[] as string[] : [];
this.emails = "emails" in o ? `${o.emails}` : ``;
const now = new Date();
Expand All @@ -171,7 +168,6 @@ export class Report implements ReportInterface {
updated: this.updated,
expire: this.expire,
status: this.status,
content_type: this.content_type,
link: this.link,
};
}
Expand Down Expand Up @@ -347,36 +343,93 @@ async function handleReportRequest(
interface RunnableInterface {
cmd: string;
options: string[];
basename: string;
content_type: string;
append_datestamp: boolean;
final_status: string;
link: string;
}

class Runnable implements RunnableInterface {
readonly cmd: string;
cmd: string;
options: string[];
basename: string;
content_type: string;
append_datestamp: boolean;
final_status: string;
link: string;

constructor(cmd: string) {
constructor(
cmd: string,
basename: string,
append_datestamp: boolean,
content_type: string,
) {
this.cmd = cmd;
this.options = [];
this.final_status = "";
this.link = "";
this.basename = basename;
this.append_datestamp = append_datestamp;
this.content_type = content_type;
}

// Run executables the program implementing the report. It's calling out to the operating system to run it.
// The report program is expected to return a link written to standard out on success. Otherwise return an
// empty string or short error message using the protocol `error://`.
async run(options: string[]): Promise<string> {
//FIXME: Need to execute command line program and capture result link or error message from standard out then hand it back.
console.log(`Running: ${this.cmd}`);
//console.log(`Running: ${this.cmd}`);
let txt: string;
try {
txt = await $`${this.cmd}`.lines();
} catch(err) {
txt = await $`${this.cmd}`.text();
} catch (err) {
txt = "error://" + err;
}
return txt;

// the URL would be returned by the runner when final desitantion is available.
let filename: string = this.basename;
let ext: string = ".txt";
switch (this.content_type) {
case "text/plain":
ext = ".txt";
break;
case "text/csv":
ext = ".csv";
break;
case "application/json":
ext = ".json";
break;
case "text/markdown":
ext = ".md";
break;
case "application/yaml":
ext = ".yaml";
break;
default:
ext = "";
break;
}
console.log("DEBUG file extension set to ", ext, this.content_type);
if (this.append_datestamp) {
let datestamp = (new Date()).toJSON().substring(0, 10);
filename = `${this.basename}_${datestamp}${ext}`;
} else {
filename = `${this.basename}${ext}`;
}
console.log("DEBUG filename should be", filename);

// FIXME: output of should be read in by the runner so that the report can be rendering to a URL location, then write out the file.
const basedir: string = "./htdocs/rpt";
const base_url: string = "rpt";
const utf8Encoder = new TextEncoder();
const data = utf8Encoder.encode(txt);
try {
await Deno.writeFile(`${basedir}/${filename}`, data, { create: true });
} catch (err) {
return "error://" + err;
}
return `${base_url}/${filename}`;
}
}

Expand All @@ -389,18 +442,25 @@ class Runner implements RunnerInterface {

constructor(config_yaml: string) {
const src = Deno.readTextFileSync(config_yaml);
const cfg = yaml.parse(src) as { [key: string]: { [key: string]: string } };
const cfg = yaml.parse(src) as {
[key: string]: { [key: string]: Runnable };
};
console.log(`DEBUG cfg.reports ${typeof cfg.reports}:\n\t`, cfg.reports);
if (cfg.reports !== undefined) {
for (const [k, v] of Object.entries(cfg.reports)) {
console.log(
`DEBUG cfg.reports ${typeof cfg.reports[k]}:\n\t`,
cfg.reports[k],
);
if (v === "") {
if (v === undefined) {
continue;
}
this.report_map[k] = new Runnable(v);
this.report_map[k] = new Runnable(
v.cmd,
v.basename,
v.append_datestamp,
v.content_type,
);
}
}
}
Expand Down Expand Up @@ -434,14 +494,10 @@ async function process_request(
request.link = link; /*link.replace("error://", "");*/
request.status = "error";
request.updated = (new Date()).toJSON();
} else if (link.indexOf("://") > -1 ) {
} else {
request.link = link;
request.status = "completed";
request.updated = (new Date()).toJSON();
} else {
request.link = "unknown error";
request.status = "error";
request.updated = (new Date()).toJSON();
}
return (await ds.update(id, request));
}
Expand Down Expand Up @@ -485,7 +541,7 @@ async function servicing_requests(runner: Runner): Promise<void> {
async function report_runner(config_yaml: string): Promise<number> {
try {
await Deno.lstat(config_yaml);
} catch(err) {
} catch (err) {
console.log(err);
return 1;
}
Expand Down Expand Up @@ -536,9 +592,12 @@ async function main(): Promise<void> {
config_yaml = "reports.yaml";
}
// Start up the service.
setInterval(await (async function() {
await report_runner(config_yaml);
}), 10000);
setInterval(
await (async function () {
await report_runner(config_yaml);
}),
10000,
);
}

// Run main()
Expand Down
11 changes: 11 additions & 0 deletions reports.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
reports:
run_people_csv:
cmd: ./run_people_csv.bash
basename: people
append_datestamp: false
content_type: text/csv
run_groups_csv:
cmd: ./run_groups_csv.bash
basename: groups
append_datestamp: false
content_type: text/csv
12 changes: 10 additions & 2 deletions reports.yaml-example
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
reports:
run_people_csv: ./run_people_csv.bash
run_groups_csv: ./run_groups_csv.bash
run_people_csv:
cmd: ./run_people_csv.bash
name: people.csv
append_datestamp: false
content_type: text/csv
run_groups_csv:
cmd: ./run_groups_csv.bash
name: groups.csv
append_datestamp: false
content_type: text/csv
61 changes: 57 additions & 4 deletions run_groups_csv.bash
Original file line number Diff line number Diff line change
@@ -1,8 +1,61 @@
#!/bin/bash
#

# FIXME: this report needs to SSH to the data processing system and then run the rpt_groups_csv.bash script.
# {
# "Scope": "",
# "activity": "active",
# "alternative": [
# "RSI"
# ],
# "clgid": "Resnick-Sustainability-Institute",
# "date": "10/23/18",
# "description": "",
# "email": "joy@caltech.edu",
# "end_date": "",
# "grid": "",
# "include_in_feeds": true,
# "is_approx_end": false,
# "is_approx_start": false,
# "isni": "",
# "name": "Resnick Sustainability Institute",
# "parent": "Jonas Peters",
# "pi": "",
# "prefix": "",
# "ringold": "",
# "ror": "",
# "start_date": "",
# "updated": "3/26/20",
# "viaf": "",
# "website": "https://resnick.caltech.edu/"
# }

# Mockup report being run
sleep 5;
echo "files://dataset.library.caltech.edu/Sites/feeds_v1.6/groups.csv"
### cat <<SQL>groups_rpt.sql
### select json_object(
### 'key', src->'clgid',
### 'name', src->'name',
### 'alternative', replace(replace(replace(replace(json_quote(src->'alternative'), '","', '; '), '["', ''), '"]', ''), '[]', ''),
### 'email', src->'email',
### 'date', src->'date',
### 'description', src->'description',
### 'start', src->'start_date',
### 'approx_start', src->'is_approx_start',
### 'activity', src->'activity',
### 'end', src->'end_date',
### 'appox_end', src->'is_approx_end',
### 'website', src->'website',
### 'pi', src->'pi',
### 'parent', src->'parent',
### 'prefix', src->'prefix',
### 'grid', src->'grid',
### 'isni', src->'isni',
### 'ringold', src->'ringold',
### 'viaf', src->'viaf',
### 'ror', src->'ror',
### 'updated', src->'updated',
### 'Scope', src->'Scope'
### )
### from groups
### order by src->'name'
### SQL
###
dsquery -csv "key,name,alternative,email,date,description,start,approx_start,activity,end,approx_end,website,pi,parent,prefix,grid,isni,ringold,viaf,ror,updated,Scope" -sql groups_rpt.sql groups.ds
Loading

0 comments on commit 702a881

Please sign in to comment.