forked from linked-statistics/COOS
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathcoos.html
797 lines (647 loc) · 48.4 KB
/
coos.html
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
<!DOCTYPE html>
<html>
<head>
<title>COOS</title>
<meta http-equiv='Content-Type' content='text/html;charset=utf-8'/>
<script src='https://www.w3.org/Tools/respec/respec-w3c' class='remove' defer></script>
<script class='remove'>
var respecConfig = {
specStatus: "unofficial",
noRecTrack: true,
shortName: "COOS",
subtitle: "A Core Ontology for Official Statistics",
latestVersion: "https://linked-statistics.github.io/COOS/coos.html",
// publishDate: "2022-xx-xx",
editors: [
{ name: "Franck Cotton", company: "Insee", companyURL: "http://www.insee.fr/" }
],
authors: [
{ name: "Flavio Rizzolo", company: "Statistics Canada", companyURL: "https://www.statcan.gc.ca/" },
{ name: "Daniel Gillman", company: "Bureau of Labor Statistics", companyURL: "https://www.bls.gov/" },
{ name: "Florian Vucko", company: "Insee", companyURL: "http://www.insee.fr/" }
],
wg: "High-Level Group for the Modernisation of Official Statistics",
wgURI: "https://statswiki.unece.org/display/hlgbas/Modernisation+Groups",
wgPatentURI: "https://creativecommons.org/licenses/by/3.0/igo/",
github: {
repoURL: "https://github.com/linked-statistics/COOS",
branch: " ", // avoids default 'gh-pages'
},
logos: [{
// src: "https://unece.org/themes/custom/unece/logo.png",
src: "img/unece-logo.png",
url: "https://unece.org/",
alt: "Unece",
width: 267,
height: 66,
id: "unece-logo"
}],
localBiblio: {
"CSDA": {
"date": "Novembre 2018",
"href": "https://statswiki.unece.org/display/DA",
"publisher": "Unece",
"title": "Common Statistical Data Architecture (CSDA)"
},
"DDI-CDI": {
"date": "July 2021",
"href": "https://ddialliance.org/announcement/public-review-ddi-cross-domain-integration-ddi-cdi",
"publisher": "DDI Alliance",
"title": "DDI - Cross Domain Integration"
},
"GAMSO": {
"date": "January 2019",
"href": "https://statswiki.unece.org/display/gamso",
"publisher": "Unece",
"title": "Generic Activity Model for Statistical Organizations, version 1.2"
},
"GSBPM": {
"date": "January 2019",
"href": "https://statswiki.unece.org/display/gsbpm",
"publisher": "Unece",
"title": "Generic Statistical Business Process Model, version 5.1"
},
"GSBPM-LM": {
"authors": [
"Franck Cotton",
"Daniel Gillman"
],
"date": "October 2015",
"href": "http://ceur-ws.org/Vol-1551/article-06.pdf",
"title": "Modeling the Statistical Process with Linked Metadata"
},
"GSIM": {
"href": "https://statswiki.unece.org/display/gsim",
"publisher": "Unece",
"title": "Generic Statistical Information Model, version 1.2"
},
"GSIM-LM": {
"authors": [
"Monica Scannapieco",
"Laura Tosco",
"Daniel Gillman",
"Antoine Dreyer",
"Guillaume Duffes"
],
"date": "October 2016",
"href": "http://ceur-ws.org/Vol-1654/article-03.pdf",
"title": "An OWL Ontology for the Generic Statistical Information Model (GSIM): Design and Implementation"
},
"PAV": {
"authors": [
"Paolo Ciccarese",
"Stian Soiland-Reyes"
],
"date": "16 March 2015",
"href": "https://pav-ontology.github.io/pav/",
"title": "PAV ontology: provenance, authoring and versioning"
},
"PROV-RL":{
"authors": [
"Franck Cotton",
"Guillaume Duffes",
"Flavio Rizzolo"
],
"date": "October 2019",
"href": "http://ceur-ws.org/Vol-2549/article-08.pdf",
"title": "Using PROV-O to Represent Lineage in Statistical Processes: A Record Linkage Example"
},
"SDMX": {
"date": "September 2021",
"href": "https://sdmx.org/?page_id=5008",
"publisher": "SDMX Sponsor Organisations",
"title": "Statistical Data and Metadata eXchange Technical Specifications, version 3.0"
}
}
};
</script>
</head>
<body>
<p class='copyright'>Copyright © 2022 <a href='https://www.unece.org/'>Unece</a>, All Rights Reserved</p >
<p style="font-size:80%"><em>(Authors affiliations were recorded at the beginning of the writing of this document and might have changed since.)</em></p>
<div class="issue" title="Remaining issues">
<ul>
<li>Layout, copyright and SOTD section to adapt to the Unece context</li>
</ul>
</div>
<section id='abstract'>
<p>This document presents a RDF/OWL vocabulary based on different business models used in the field of Official Statistics. It provides vocabulary terms that can be used to describe statistical processes, products and organizations. When appropriate, COOS also connects the artifacts it defines to other well-known models and vocabularies.</p>
</section>
<section id='sotd'>
<p>This is a draft document and may be updated, replaced or obsoleted by other documents at any time. It is inappropriate to cite this document as other than work in progress.</p>
</section>
<section id="intro">
<h2>Introduction</h2>
<section id="motivation">
<h3>Motivation</h3>
<p>This paper introduces COOS, the Core Ontology for Official Statistics. COOS main purpose is to serve as an integration model for the core set of ModernStats standards backed by elements of well-known standard vocabularies. These ModernStats standards, mostly developed under the auspices of the UNECE High-Level Group for the Modernisation of Official Statistics, include the Generic Statistical Business Process Model (GSBPM), the Generic Activity Model for Statistical Organisations (GAMSO), the Generic Statistical Information Model (GSIM), and the Common Statistical Data Architecture (CSDA).</p>
<p>As more statistical offices are turning to semantic standards to formalize their data and metadata, it became necessary to establish common foundations on which the different standards can develop in a coherent way using a formal framework that allows interoperability, machine-actionability and globally unique identification.</p>
<p>ModernStats standards have been developed independently over the course of more than a decade by a diverse group of specialists with different viewpoints, stakeholders and ideas. This created misalignments and impedance mismatches between the underlying models that should otherwise work well together and complement each other: Information objects (GSIM) describe the data and metadata necessary to produce statistics where capabilities (CSDA) are the essential building blocks enabling activities (GAMSO) to be implemented via business processes (GSBPM).</p>
<p>COOS defines a conceptual integration framework to provide semantic coherence across these models based on a common vocabulary of terms, definitions and a well-defined set of inter- and intra-model relationships formalized in RDF/OWL using standards vocabularies, e.g. SKOS, PROV, DCAT, DC, ORG, etc. COOS provides a powerful mechanism to describe complex aspects of statistical production to support business discussions and technical solution implementations.</p>
<p>Model management is a big part of the standards integration story: the underlying models evolve and to maintain alignment the COOS needs to evolve with them. COOS includes an initial governance framework complementing each model’s own governance processes. This governance framework includes a core set of principles and a process for managing change.</p>
</section>
<section id="background">
<h3>Background</h3>
<p>ModernStats standards consist of conceptual models and supporting documentation developed for the official statistics domain describing information entities and capabilities together with business processes and activities. The essence of this core set of standards is described below.</p>
<p>GSBPM provides a framework to describe the building blocks of statistical production in terms of sub-processes. Its main goal is to help statistical organizations standardize their statistical production processes. It was the first ModernStats model to be published, back in 2008, and has been widely used by national and international statistical agencies since then.</p>
<p>GAMSO provides a framework to describe the building blocks of statistical production in terms of activities. It complements the GSBPM in two ways: (i) by covering areas beyond the scope of GSBPM, and (ii) by providing a business capability view of statistical production itself.</p>
<p>GSIM complements both GSBPM and GAMSO by providing a catalogue of information objects to describe statistical data and metadata. It functions as a reference framework consisting of a set of standardised information objects to be used in statistical production.</p>
<p>CSDA provides a capability framework cataloguing the major abilities a statistical organization has to use, produce, share and manage data and metadata. CSDA integrates with the GSBPM and GAMSO by enabling processes and activities related to the lifecycle management of GSIM information objects.</p>
</section>
<section id="overview">
<h3>Overview</h3>
<p>The ontology provides classes and properties to allow statistical activities to be described, as well as the organizations that undertake them, the inputs they use and the ouputs they produce.</p>
<p>All activities undertaken by organizations that produce Official Statistics may be represented using the ontology. A distinction is made between activities directly linked to the production of statistical content, and activities required to perform those. Several classes of activities are also introduced to allow different levels of granularity to be expressed. The ontology also provides instances of GAMSO/GSBPM's activities and the possibility to map these to user defined activities. See <a href="#activities">Section 3. Activities</a>.</p>
<p>In the context of the ontology, organizations are distinguished by whether they produce Official Statistics at national or international level. See <a href="#organizations">Section 4. Organizations</a>.</p>
<p>In the course of a statistical production process, different kinds of information objects flow from one activity to the other. The ontology focuses on the core information object “DataSet”. See <a href="#products">Section 5. Products</a>.</p>
</section>
<section id="coos-docs">
<h3>COOS Documents</h3>
<p>In addition to the present specification, the COOS family of documents includes:</p>
<ul>
<li>The <a href="coos.ttl">formal vocabulary</a> expressed with the Terse RDF Triple language (Turtle) [[turtle]] syntax.</li>
<li>The <a href="governance.html">governance document</a> which describes the principles and processes used for the evolution of COOS.</li>
<li>The <a href="uri-policy.html">URI policy</a> which describes the principles used for the naming and identification of the COOS artifacts.</li>
</ul>
<p>If there are any ambiguities or contradictions between the documents, the formal vocabulary will be authoritative.</p>
</section>
</section>
<section id="nsvoc">
<h2>COOS Namespace and Vocabulary</h2>
<p>The COOS namespace URI is:</p>
<ul>
<li><strong>http://id.unece.org/def/coos#</strong></li>
</ul>
<p>The prefix <code>coos</code> will be associated to this namespace in this document.</p>
<p>The COOS vocabulary is a set of URIs, given in the left-hand column in Table 1 below. Note that we use upper camel case for naming classes and lower camel case for properties. The right-hand column indicates in which section of this document the corresponding term is explained in more detail.</p>
<table class="simple">
<caption>Table 1. COOS Vocabulary</caption>
<thead>
<tr>
<th>URI</th>
<th>Definition</th>
</tr>
</thead>
<tbody>
<tr>
<td>coos:Activity</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:ActivityArea</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:ActivityCategory</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalActivity</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:ProductionActivity</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:OverarchingActivity</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:Phase</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:SubProcess</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalProgram</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalProgramCycle</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:Task</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:Capability</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:CoreCapability</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:CrossCuttingCapability</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:uses</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:supports</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:enables</td>
<td><a href="#activities">Section 3. Activities</a></td>
</tr>
<tr>
<td>coos:StatisticalOrganization</td>
<td><a href="#organizations">Section 4. Organizations</a></td>
</tr>
<tr>
<td>coos:NationalStatisticalInstitute</td>
<td><a href="#organizations">Section 4. Organizations</a></td>
</tr>
<tr>
<td>coos:InternationalAgency</td>
<td><a href="#organizations">Section 4. Organizations</a></td>
</tr>
<tr>
<td>coos:InformationOject</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:StatisticalInformationOject</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:Entity</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:Product</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:Dataset</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:DimensionalDataset</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:GraphDataset</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:KeyValueDataset</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:RectangularDataset</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:TransposedDataset</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:ProductPresentation</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:ProductContent</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:metadataFor</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:content</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:presentation</td>
<td><a href="#products">Section 5. Products</a></td>
</tr>
<tr>
<td>coos:ReferenceDocument</td>
<td><a href="#other-terms">Section 6. Other terms</a></td>
</tr>
<tr>
<td>coos:informs</td>
<td><a href="#other-terms">Section 6. Other terms</a></td>
</tr>
</tbody>
</table>
<p>COOS defines a number of individuals. The majority of these are instances of the classes above and are described in more detail in the relevant sections.</p>
<p>Other vocabularies used in this document or in the RDF specification are listed in Table 2 below, with their namespaces and associated prefixes.</p>
<table class="simple">
<caption>Table 2. Other vocabularies used in this document</caption>
<thead>
<tr>
<th>Prefix</th>
<th>URI</th>
<th>Description</th>
</tr>
</thead>
<tbody>
<tr>
<td>cc</td>
<td>https://creativecommons.org/ns</td>
<td>Describing Copyright in RDF: Creative Commons Rights Expression Language ([[!CC-ABOUT]])</td>
</tr>
<tr>
<td>dc</td>
<td>http://purl.org/dc/elements/1.1/</td>
<td>Dublin Core Metadata Element Set, Version 1.1 ([[!DC11]])</td>
</tr>
<tr>
<td>dcat</td>
<td>http://www.w3.org/ns/dcat#</td>
<td>Data Catalog Vocabulary ([[!vocab-dcat-2]])</td>
</tr>
<tr>
<td>dcterms</td>
<td>http://purl.org/dc/terms/</td>
<td>Dublin Core Metadata Initiative Metadata Terms ([[!DCTERMS]])</td>
</tr>
<tr>
<td>foaf</td>
<td>http://xmlns.com/foaf/0.1/ </td>
<td>FOAF Vocabulary Specification 0.99 ([[!FOAF]])</td>
</tr>
<tr>
<td>org</td>
<td>http://www.w3.org/ns/org#</td>
<td>The Organization Ontology ([[!vocab-org]])</td>
</tr>
<tr>
<td>pav</td>
<td>http://purl.org/pav/</td>
<td>PAV - Provenance, Authoring and Versioning ([[!PAV]])</td>
</tr>
<tr>
<td>prov</td>
<td>http://www.w3.org/ns/prov#</td>
<td>PROV-O: The PROV Ontology ([[!prov-o]])</td>
</tr>
<tr>
<td>skos</td>
<td>http://www.w3.org/2004/02/skos/core#</td>
<td>SKOS Simple Knowledge Organization System Reference ([[!skos-reference]])</td>
</tr>
<tr>
<td>vann</td>
<td>http://purl.org/vocab/vann/</td>
<td>VANN: A vocabulary for annotating vocabulary descriptions ([[!vann]])</td>
</tr>
<tr>
<td>voaf</td>
<td>http://purl.org/vocommons/voaf#</td>
<td>Vocabulary of a Friend (VOAF) ([[!voaf]])</td>
</tr>
</tbody>
</table>
<p>RDF, RDFS, OWL and XSD vocabularies or namespaces are also used, with their usual URIs and prefixes.</p>
<p>The RDF examples are expressed in Turtle. Unless otherwise specified, these examples use the http://example.org/ns/ domain, which will be represented by the <code>ex:</code> prefix. Note however that individual resource names used as examples are entirely fictitious.</p>
</section>
<section id="activities">
<h2>Activities</h2>
<p>A large part of the COOS is related to the activities performed by statistical organizations. Indeed, the GSBPM model was one of the first to be standardized at the international level. It remains the most widely used of the UNECE models. The GAMSO appeared later, and was subsequently articulated with the GSBPM in a coherent scheme.</p>
<p>In this section, this timeline is followed by describing the COOS artefacts that originate from the GSBPM, then add the GAMSO view. Other useful COOS constructs related to Official Statistics activities are listed in a third sub-section (Section 3.3).</p>
<section id="activities-gsbpm">
<h3>Base GSBPM model</h3>
<p>The GSBPM comprises three levels:</p>
<ul>
<li>Level 0, the statistical business process;</li>
<li>Level 1, the phases of the statistical business process;</li>
<li>Level 2, the sub-processes within each phase.</li>
</ul>
<p>A first approach to OWL modelling of statistical activities based on the GSBPM is presented in [[GSBPM-LM]].</p>
<p>In COOS, a sub-class of <code>skos:Concept</code> named <code>ActivityCategory</code> is created. Each GSBPM phase and sub-process is defined as an instance of <code>ActivityCategory</code> and included (with a <code>skos:inScheme</code> relation) in a <code>skos:ConceptScheme</code> named <code>GSBPM</code>: the scheme of all the GSBPM phase and sub-process categories.</p>
<p>In addition, the <code>statisticalProductionProcess</code> individual, representing the statistical production process as a whole, is also declared as an instance of <code>ActivityCategory</code> and included in the <code>GSBPM</code> concept scheme.</p>
<p>The links between the different individuals are represented by the usual SKOS properties: all the sub-processes of a phase have a <code>skos:broader</code> link to the phase, and each phase has a <code>skos:broader</code> link to the <code>statisticalProductionProcess</code> individual.</p>
<pre class="example nohighlight" title="Example of GSBPM phase and sub-process">
<http://id.unece.org/activities/phase/2> a coos:ActivityCategory ;
skos:notation "2" ;
skos:prefLabel "Design"@en ;
skos:narrower <http://id.unece.org/activities/subProcess/2.3> .
<http://id.unece.org/activities/subProcess/2.3> a coos:ActivityCategory ;
skos:notation "2.3" ;
skos:prefLabel "Design collection"@en ;
skos:broader <http://id.unece.org/activities/phase/2> .
</pre>
<p>To materialize a clear distinction between the model itself and the statistical process which is modeled, the ontology defines classes that inherit from <code>prov:Activity</code>. Using PROV allows to describe an activity that really happens at a certain time, to specify who undertakes the activity and what entities it consumes or produces.</p>
<p>Basically, three classes are defined in this ontology: <code>ProductionActivity</code>, <code>Phase</code> and <code>SubProcess</code>.</p>
<p><code>ProductionActivity</code> is a sub-class of <code>prov:Activity</code>. Instances of this class are intended to be larger than a GSBPM phase. If they correspond to a statistical production process they are typed using the <code>dcterms:type</code> property with the <code>statisticalProductionProcess</code> individual.</p>
<pre class="example nohighlight" title="Examples of statistical production activities">
ex:produce-study-2021 a coos:ProductionActivity ;
rdfs:label "Produce study on legal population 2021"@en .
ex:produce-census-2021 a coos:ProductionActivity ;
rdfs:label "Produce census 2021"@en ;
dcterms:type coos:statisticalProductionProcess .
</pre>
<p><code>Phase</code> and <code>SubProcess</code> are both sub-classes of <code>ProductionActivity</code>. A link to the GSBPM taxonomy using the property <code>dcterms:type</code> is made on instances of these classes to formalize to what part of the GSBPM the activity that happened corresponds. Additionally, the idea that each phase is made up of its different sub-processes can be represented by using the Dublin Core <code>dcterms:hasPart</code> and <code>dcterms:isPartOf</code> properties.</p>
<pre class="example nohighlight" title="Examples of instances phase and sub-process">
ex:collect-census-2021 a coos:Phase ;
rdfs:label "Collect census 2021"@en ;
dcterms:type <http://id.unece.org/activities/phase/6> .
ex:run-collection-census-2021 a coos:SubProcess ;
rdfs:label "Run collection of census 2021"@en ;
dcterms:isPartOf ex:collect-census-2021 ;
dcterms:type <http://id.unece.org/activities/subProcess/4.3> .
</pre>
<p>[[[#image-gsbpm]]] summarizes the vocabulary terms defined so far in this document.</p>
<figure id="image-gsbpm">
<img src="img/coos-gsbpm.png" alt="GSBPM vocabulary overview" title="GSBPM vocabulary overview"/>
<figcaption>Overview of COOS vocabulary for GSBPM</figcaption>
</figure>
<p>In addition the ontology defines three individuals of <code>ActivityCategory</code> to model the types of overarching activities provided by GSBPM the “Quality Management”, the “Data Management” and the “Metadata Management”.</p>
<pre class="example nohighlight" title="Examples of GSBPM overarching activity">
<http://id.unece.org/activities/overarchingActivity/0.2> a coos:ActivityCategory ;
skos:notation "0.2" ;
skos:prefLabel "Metadata Management"@en .
</pre>
</section>
<section id="activities-gamso">
<h3>Adding GAMSO</h3>
<p>Introducing GAMSO in this framework is not very difficult. The terminology used in the model shows that the main concepts are activity and activity areas. GAMSO describes two kinds of activities: those that are of statistical nature, for example “Manage Statistical Methodology”, and those that are in support of statistical activities, like “Manage Finances”. To account for this distinction, COOS defines two different classes: <code>StatisticalActivity</code> and <code>Activity</code>. <code>Activity</code> is a subclass of <code>prov:Activity</code> and a superclass of <code>StatisticalActivity</code>, which in turn is a superclass of the <code>ProductionActivity</code> introduced previously.</p>
<p>The GAMSO activity areas are not really activities by themselves, but rather “boxes” used to classify the activities. This makes them instances of <code>skos:Concept</code> to represent this “taxonomic” nature, or more precisely of a specific <code>ActivityArea</code> class, which is a sub-class of <code>skos:Concept</code>.</p>
<p>As with GSBPM, COOS defines individuals for each GAMSO activity and activity area, and the model itself is a <code>skos:ConceptScheme</code>. At the top of the scheme, an instance of <code>ActivityCategory</code> represents the whole activity of statistical organizations and is named <code>officialStatistics</code>. All four activity areas have <code>skos:broader</code> links to it and <code>skos:narrower</code> links to the categories of activity they contain. </p>
<pre class="example nohighlight" title="Examples of GAMSO activity area and activity category">
<http://id.unece.org/activities/activityArea/1> a coos:ActivityArea ;
skos:notation "1" ;
skos:prefLabel "Strategy and Leadership"@en .
<http://id.unece.org/activities/activity/2.3> a coos:ActivityCategory ;
skos:notation "2.3" ;
skos:prefLabel "Monitor Capability Improvements"@en .
</pre>
<p>In addition, COOS defines the <code>OverarchingActivity</code> class, which represents a supporting activity that applies to all production phases, e.g. metadata management, data management, statistical methodology management, etc. These activities correspond to the overarching processes in the GSBPM and the statistical activities in Corporate Support in GAMSO. These activities support the execution of phases and sub-processes effectively. When harmonized and standardized, overarching activities enable the efficient execution of the entire statistical production process.</p>
<p>Finally, two properties are needed to describe how an <code>Activity</code> classified with GAMSO relates to a <code>ProductionActivity</code> classified with GSBPM:</p>
<ul>
<li><code>supports</code>: an <code>Activity</code> helps to perform a <code>ProductionActivity</code>, e.g. a sub-process;</li>
<li><code>uses</code>: conversely, a statistical production activity will require one or several activities in order to function.</li>
</ul>
<p>Properties <code>uses</code> and <code>supports</code> are inverses of each other. They represent weaker (looser) forms of dependencies than partitive relationships: <code>uses/supports</code> are about function whereas partitive relations are about constitution. An obvious example of <code>uses</code>/<code>supports</code> relation is included in the ontology for reference:</p>
<pre class="example nohighlight" title="Examples of “uses” and “supports” properties">
ex:quality-management a coos:OverarchingActivity ;
skos:prefLabel "Quality Management"@en ;
dcterms:type <http://id.unece.org/activities/overarchingActivity/0.1> ;
coos:uses ex:manage-quality .
ex:manage-quality a coos:Activity ;
skos:prefLabel "Manage Quality"@en ;
dcterms:type <http://id.unece.org/activities/activity/3.3> ;
coos:supports ex:quality-management .
</pre>
<p>[[[#image-gamso]]] gives an overview of the COOS elements defined so far in this document.</p>
<figure id="image-gamso">
<img src="img/coos-gamso.png" alt="GAMSO vocabulary overview" title="GAMSO vocabulary overview"/>
<figcaption>Overview of COOS vocabulary for GAMSO and GSBPM</figcaption>
</figure>
<p><em>Note</em>:</p>
<p>In COOS, the relation between the “Production” <code>ActivityArea</code> of GAMSO and the GSBPM as a concept scheme is expressed by a simple pair of <code>rdfs:seeAlso</code> properties. No GAMSO <code>ActivityCategory</code> is defined in the “Production” area, that would be redundant with the “Statistical Production Process” individual, but a <code>skos:exactMatch</code> relation is made between the GAMSO activity area and the GSBPM individual. It is possible that GAMSO and GSBPM evolve in the future towards a totally integrated view. In such an approach, all GSBPM and GAMSO artefacts would go into a single <code>skos:ConceptScheme</code> and the GSBPM would not be a concept scheme anymore but rather a <code>skos:Collection</code>.</p>
</section>
<section id="activities-other">
<h3>Adding GSIM and CSDA</h3>
<p>Adding the Generic Statistical Information Model (GSIM) in this framework allows us to refine <code>StatisticalActivity</code> even further. In parallel to <code>ProductionActivity</code>, two additional sub-classes of <code>StatisticalActivity</code> can be defined:</p>
<ul>
<li><code>StatisticalProgram</code>: it is essentially a set of activities carried out to produce statistics. These statistics are about the set of units in scope for the program, e.g. “All persons with a university degree”, within a given subject field, e.g. income statistics, tourism, etc.</li>
<li><code>StatisticalProgramCycle</code>: statistical program activities are often repeated over time in iterations called cycles. A StatisticalProgramCycle is one of those iterations for a specific time and geography. To link a cycle to the corresponding program, it is recommended to use `dcterms:isPartOf`. Nevertheless, this property does not convey aspects like temporality, so a more specific property might be defined in a future version of COOS.</li>
</ul>
<p>Another dimension to consider is information capabilities. A capability is an ability a statistical organization possesses to undertake a specific activity. It is achieved through the integration of all relevant capability elements (e.g. methods, processes, standards and frameworks, IT systems and people skills). The notion of capability is often used in enterprise architecture approaches like <a href="https://www.opengroup.org/togaf">TOGAF</a> (The Open Group Architecture Framework) or, in the statistical domain, the Common Statistical Data Architecture [[CSDA]]. Capabilities can be mapped to strategic goals and objectives and provide a useful starting point to map lower level elements such as business process and functions, applications and technology assets.</p>
<p>Following CSDA, two types of capabilities are distinguished:</p>
<ul>
<li><code>Core capabilities: </code> capabilities the organization needs to execute its core business, i.e. the production of statistics. They generally map to phases in GSBPM.</li>
<li><code>Cross-cutting capabilities: </code>capabilities used to formulate and implement the policies that the organization chooses for its internal operations. They generally map to corporate support in GAMSO and overarching processes in GSBPM.</li>
</ul>
<p>Examples of capabilities relevant to statistical activities can be found in CSDA, but also in other references like the Global Statistical Geospatial Framework (<a href="https://www.efgs.info/wp-content/uploads/geostat/3/GEOSTAT3_GSGF_EuropeanImplementationGuide_v1.0.pdf">GSGF</a>):</p>
<pre class="example" title="Example of capabilities">
ex:csda-data-integration a coos:CoreCapability ;
rdfs:label "Data integration"@en ;
skos:definition "The ability to combine, link, relate and/or align different data sets in order to create an integrated information set."@en .
ex:geostat-integration-services a coos:Capability ;
rdfs:label "Develop and apply services for smarter statistical-geospatial integration"@en ;
rdfs:isDefinedBy <https://www.efgs.info/geostat/> .
</pre>
<p>Also, the European Statistical System has published a <a href="https://joinup.ec.europa.eu/collection/statistical-enterprise-architecture/document/ess-earf-business-capabilities-model">Business Capabilities Model</a> that defines a number of capabilities organized in three levels.</p>
<p>To represent the link between a capability and the activities it allows to perform, COOS defines the <code>enables</code> property. [[[#image-csda-ex]]] below shows an example using this property.</p>
<figure id="image-csda-ex">
<img src="img/coos-csda-ex.png" alt="Linking COOS capabilities and activities" title="Linking COOS capabilities and activities"/>
<figcaption>Linking COOS capabilities and activities</figcaption>
</figure>
<p><em>Note</em>:</p>
<p>There is an obvious relation between capabilities and the “Capability Development” GAMSO activity area, but this link is not formally modeled in COOS.</p>
</section>
<section id="activities-task">
<h3>More detailed activities</h3>
<p>The previous definitions stay in the framework of the GAMSO/GSBPM/GSIM/CSDA, but more precise notions will be needed by the statistical organizations for the more detailed modelling of their statistical activities. COOS users can adopt different names for these more specific activities that are scoped by a given GSBPM sub-process or overarching activity, but COOS defines the generic <code>Task</code> class as a common term for better interoperability. Tasks can be more or less granular and form hierarchies.</p>
<pre class="example" title="Example of tasks">
ex:calculate-mean-wages a coos:Task ;
rdfs:label "Calculate mean wages"@en .
ex:produce-study prov:wasInformedBy ex:calculate-mean-wages .
</pre>
<p>[[[#image-ex1]]] shows how PROV can be used to represent links between activities: here it is supposed that the “Produce study” activity uses the results of the “Calculate mean wages” task.</p>
<figure id="image-ex1">
<img src="img/coos-ex1.png" alt="Using PROV with COOS constructs" title="Using PROV with COOS constructs"/>
<figcaption>Using PROV with COOS constructs</figcaption>
</figure>
<p>As mentioned previously, the <code>Task</code> class can be used by statistical organizations to create instances for their own needs. Two statistical organizations can create their own "Calculate mean wages" task, and only their labels would indicate that they do similar things. In certain cases however, it could be useful to define standard sub-classes of <code>Task</code> for some widely used and specific types of tasks, for example record linkage or hot-deck imputation. This would allow for example to refer to relevant methodology.</p>
</section>
</section>
<section id="organizations">
<h2>Organizations</h2>
<p>So far, this document has discussed how the ontology represents what the official statistics community does in the form of activities, capabilities and processes. This section describes the ontology part that captures how this community is organized.</p>
<p>Essentially, what is needed is the notion of a collection of people organized as a group in some formal structure, which is represented using <code>org:Organization</code> and <code>prov:Organization</code>. Then a <code>StatisticalOrganization</code> is defined as an organization, or unit within an organization, whose primary role is the production of official statistics. This <code>StatisticalOrganization</code> class is further specialized into two sub-classes:</p>
<ul>
<li><code>NationalStatisticalInstitute</code>: The main producer of official statistics in a country and/or the organization responsible for coordinating all activities related to the development, production, and dissemination of official statistics in the national statistical system. The actual name given to the national statistical office in a country may be National Statistical Institute (NSI), National Bureau of Statistics (NBS), Central Bureau of Statistics (CBS), National Statistical Agency (NSA), Central Statistical Agency (CSA), Central Statistics Agency (CSA), etc.</li>
<li><code>InternationalAgency</code>: A body with an international membership, scope, or presence whose primary role is the production of official statistics.</li>
</ul>
<figure id="image-org">
<img src="img/coos-org.png" alt="Organizations vocabulary overview" title="Organizations vocabulary overview"/>
<figcaption>Organizations vocabulary overview</figcaption>
</figure>
<p>The use of <code>org:Organization</code> as a mother class allows to benefit from all the constructs of the ORG ontology, for example the <code>org:hasUnit</code>/<code>org:unitOf</code> properties for the representation of the hierarchical links between organizations. The use of <code>prov:Organization</code> allows to capture the relations between organizations, activities and products, for example <code>prov:wasAssociatedWith</code> can link a statistical activity to the statistical organization that conducts it.</p>
<p>It would also be useful to include in the base ontology instances of <code>StatisticalOrganization</code> representing the existing national statistical institutes and international statistical organizations. This would provide in particular a shared global identifier for each of these organisms.</p>
</section>
<section id="products">
<h2>Products</h2>
<p>Finally, the information that activities, capabilities and processes use and produce needs to be described. This is captured by an <code>InformationObject</code> class that aligns with the notion of “information object” in GSIM, i.e. all GSIM classes are going to be sub-classes of <code>InformationObject</code>. A <code>StatisticalInformationObject</code> is essentially an <code>InformationObject</code> representing statistical information, i.e. they are the inputs and outputs in the design and production of statistics. Some of those statistical information objects are also entities in the <code>PROV</code> sense, which are captured by the <code>Product</code> and <code>Dataset</code> classes, corresponding to the GSIM Product and GSIM Data Set, respectively. </p>
<p>Here again, the PROV vocabulary is useful, in particular to provide provenance information on products and to link them to activities and organizations (for example <code>prov:wasGeneratedBy</code> from a product to an activity).</p>
<p>The <code>Dataset</code> class is also declared as a daughter of <code>dcat:Dataset</code>, which allows the reuse of DCAT (or StatDCAT-AP), in particular to document datasets and their different distributions, group them in catalogues, etc.</p>
<figure id="image-prod">
<img src="img/coos-prod.png" alt="Products vocabulary main classes" title="Products vocabulary main classes"/>
<figcaption>Products vocabulary main classes</figcaption>
</figure>
<p>In DDI-CDI ([[DDI-CDI]]), four basic structural types of organizing data sets have been defined: rectangular, event history, key-value pair, and dimensional. Here the 'graph' type is added, which also covers tree-like data, and rename 'event history' as 'transposed'. Several of the types could be used to structure the same data. There is no canonical structure in all cases, though some data is much more amenable to one structure over the others.</p>
<p>The types are defined roughly as follows:</p>
<ul>
<li>rectangular (or wide) - rows are units and columns are variables</li>
<li>transposed (or event history, tall or long) - rows are based on the value for each variable, one unit at a time</li>
<li>dimensional - a pre-defined set of cells defined by the combination of categories, one from each of a set of dimensions (category sets), used to handle the value of some measure (variable) restricted to the cell</li>
<li>key-value - a set of values, each associated with some key</li>
<li>graph - datapoints are nodes and relationships between them are edges in a graph structure</li>
</ul>
<p>Dimensional data are usually associated with aggregates. Key-value data are often taken from scraping the web. Event-history (transposed) is used to describe events over some time period.</p>
<p>The nominal, ordinal, interval, ratio are not used to differentiate datasets. Rather, they are families of datatypes used to describe variables. Nominal data are those conforming to a finite set of categories with no other conditions (sex categories). Ordinal data are those conforming to an ordered finite set of categories, but the difference between adjacent categories is not necessarily uniform (Likert scale measures of satisfaction). Interval data are numeric with no zero (absence of quantity) defined (Celsius temperature). Ratio data are numeric with a defined zero (Kelvin temperature). These apply to any kind of statistical data.</p>
<p>The distinction between aggregate and unit data is based on the definition of the variables in the dataset. A dataset can contain both unit and aggregate data.</p>
<p>Access restrictions on data (e.g., public, restricted, private) are assigned by the business and can change over the life-cycle of the dataset.</p>
<p>The domain for a dataset is defined by the subject field that data apply to. However, some datasets are merged from others, so a merged set can have the combination of its constituents. There seems to be no restriction on the number of subject fields.</p>
<p>Mode of transmission is not definitional for a dataset, as a single dataset can be obtained multiple ways. The phases of GSBPM may not be useful, as a single dataset can pass through a phase without change. Further, the phases impose a usage criterion (data for collection; data for editing; etc.) that seems arbitrary and would be useless in another domain (outside statistics).</p>
<p>Similarly, the explorative, temporary, and organizational categorization is based on intent, rather than the data per se. Plus, the categorization could change without any change to the data. If the organizational structure described above (rectangular, etc.) is changed, then it should be called a new dataset.</p>
<figure id="image-ds">
<img src="img/coos-ds.png" alt="Dataset types" title="Dataset types"/>
<figcaption>Dataset types</figcaption>
</figure>
<p>No specific type of data set is defined for metadata, since sets of metadata can have any of the structures defined. However, COOS specifies the <code>metadataFor</code> property that can be used to associate a set of metadata to the dataset (or other resource) that it qualifies. For example, the [[SDMX]] standard defines the notion of a <code>MetadataSet</code> which conforms to a <code>MetadataStructureDefinition</code> and can be attached to differents types of targets. Note that the notion of metadata is contextual: a table of exchange rates can be viewed as a data set in the framework of a financial study but as metadata for a set of international consumer prices. The range of the <code>metadataFor</code> property is intentionally left open in order to allow for cases where matadata sets are attached to studies or other kind of products.</p>
<p>To further qualify statistical products, COOS defines two properties: <code>content</code> and <code>presentation</code>, that take their values in controlled vocabularies (represented as SKOS concept schemes). More specifically, COOS defines the following values for these properties:</p>
<ul>
<li><code>content</code> can be “Data”, “Metadata”, “Analysis” or “Model”</li>
<li><code>presentation</code> can be “Dataset”, “Publication”, “Viualization”, “Infographic”, “Thematic Map” or “Interactive”</li>
</ul>
<p>Implementors can define additional instances of the <code>coos:ProductContent</code> of <code>coos:ProductPresentation</code> for their specific needs.</p>
</section>
<section id="additional-terms">
<h2>Additional terms</h2>
<p>Statistical activities often rely on methodologies, guidelines, legal texts, etc. In order to capture this, COOS defines the <code>ReferenceDocument</code> class and the <code>informs</code> property.</p>
<p>As an example, statistical organisations regularly produce seasonally adjusted time series for Gross domestic product (GDP). For the creation of these statistical datasets, the unadjusted (raw) time series data and methodological guidelines and tools supporting seasonal adjustment are used. The end product is a seasonally and calendar (effects) adjusted and reconciled GDP time series data. A well-known methodological reference for seasonal adjustment is the Handbook on Seasonal Adjustment published by Eurostat.</p>
<p>This example is illustrated in the following figure.</p>
<figure id="image-informs">
<img src="img/coos-informs.png" alt="Example for informs and reference document" title="Example for informs and reference document"/>
<figcaption>The Handbook on Seasonal Adjustment informs the corresponding activity</figcaption>
</figure>
</section>
<section id="conclusion">
<h2>Conclusion</h2>
<p>This paper introduced COOS, an ontology that serves as an integration model for the core set of ModernStats standards. During the development of COOS, it became evident that the natural misalignments and inconsistencies found among models developed independently are often diminished, and occasionally eliminated altogether, by the presence of a solid integration framework.</p>
<p>Future work includes investigating the feasibility and benefits of integrating other aspects of standards already included, e.g. objects of the GSIM Concept Group, and look into new standards and architectures, e.g. the Common Statistical Production Architecture (CSPA), the ESS Enterprise Architecture Reference Framework, and the European Interoperability Framework (EIF) and Reference Architecture (EIRA), and the Single Integrated Metadata Structure (SIMS), among others.</p>
</section>
<section class='appendix'>
<h2>Acknowledgements</h2>
<p>This is a placeholder for now.</p>
<p>In memory of Thérèse Lalor.</p>
</section>
<section class="appendix">
<h2>Full copyright</h2>
<p>Copyright © 2021 Unece, <i>All Rights Reserved</i><br/><a href="http://www.unece.org/">http://www.unece.org/</a></p>
<p>Content of this document is licensed under a Creative Commons License:<br/>Attribution 3.0 IGO (CC BY 3.0 IGO)</p>
<p>This is a human-readable summary of the Legal Code (the full license).<br/><a href="https://creativecommons.org/licenses/by/3.0/igo/">https://creativecommons.org/licenses/by/3.0/igo/</a></p>
<p>You are free to:</p>
<ul>
<li>Share - copy and redistribute the material in any medium or format</li>
<li>Adapt - remix, transform, and build upon the material for any purpose, even commercially.</li>
</ul>
<p>The licensor cannot revoke these freedoms as long as you follow the license terms.</p>
<p>Under the following terms:</p>
<ul>
<li>Attribution. You must give appropriate credit, provide a link to the license, and indicate if changes were made. You may do so in any reasonable manner, but not in any way that suggests the licensor endorses you or your use.</li>
<li>No additional restrictions. You may not apply legal terms or technological measures that legally restrict others from doing anything the license permits.</li>
</ul>
<div style="font-size:80%; margin-top:40px; margin-bottom:40px">
<p>Disclaimer</p>
<p>This deed highlights only some of the key features and terms of the actual license. It is not a license and has no legal value. You should carefully review all of the terms and conditions of the actual license before using the licensed material.</p>
<p>Creative Commons is not a law firm and does not provide legal services. Distributing, displaying, or linking to this deed or the license that it summarizes does not create a lawyer-client or any other relationship.</p>
</div>
<p>Legal Code:<br/><a href="https://creativecommons.org/licenses/by/3.0/igo/legalcode">https://creativecommons.org/licenses/by/3.0/igo/legalcode</a></p>
</section>
</body>
</html>