-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathMySAX2Handler.h
160 lines (135 loc) · 4.4 KB
/
MySAX2Handler.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
/*
* Created by Damir Cavar on 7/28/15.
*
* Part of the elan2split project.
*
* Xerces-C++ handler for SAX2 parser events.
*
* (C) 2015 by Damir Cavar <damir@linguistlist.org>
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
#ifndef TUTORIAL1_MYSAX2HANDLER_H
#define TUTORIAL1_MYSAX2HANDLER_H
#include <string>
#include <iostream>
#include <fstream>
#include <sstream>
#include <string.h>
#include <unordered_map>
#include <stdexcept>
#include <boost/filesystem.hpp>
#include <xercesc/sax2/DefaultHandler.hpp>
#include <xercesc/sax2/Attributes.hpp>
#include "Interval.h"
enum Tags {
ALIGNABLE_ANNOTATION = 1,
ANNOTATION,
ANNOTATION_ID,
ANNOTATION_REF,
ANNOTATION_VALUE,
HEADER,
LINGUISTIC_TYPE_REF,
MEDIA_DESCRIPTOR,
PARTICIPANT,
REF_ANNOTATION,
RELATIVE_MEDIA_URL,
TIER,
TIER_ANNOTATOR,
TIER_ID,
TIME_ORDER,
TIME_SLOT,
TIME_SLOT_ID,
TIME_SLOT_REF1,
TIME_SLOT_REF2,
TIME_UNITS,
TIME_VALUE
};
using namespace std;
using namespace xercesc;
namespace fs = boost::filesystem;
class MySAX2Handler : public DefaultHandler {
public:
void startElement(
const XMLCh *const uri,
const XMLCh *const localname,
const XMLCh *const qname,
const Attributes &attrs
);
void endElement(
const XMLCh *const uri,
const XMLCh *const localname,
const XMLCh *const qname);
void characters(const XMLCh *const chars, const XMLSize_t length);
void fatalError(const SAXParseException &);
// a map for time slot IDs and time values
unordered_map<string, int> time_slots;
// store map for referenced time slot IDs and time values in dependent tiers
unordered_map<string, pair<string,string>> ref_slots;
// time units (e.g. milliseconds or other)
string time_units;
// relative media URL
vector<string> rel_media_urls;
// initializing a hash-map
unordered_map<string, int> myTags_{
{"ALIGNABLE_ANNOTATION", ALIGNABLE_ANNOTATION},
{"ANNOTATION", ANNOTATION},
{"ANNOTATION_ID", ANNOTATION_ID},
{"ANNOTATION_REF", ANNOTATION_REF},
{"ANNOTATION_VALUE", ANNOTATION_VALUE},
{"ANNOTATOR", TIER_ANNOTATOR},
{"HEADER", HEADER},
{"LINGUISTIC_TYPE_REF", LINGUISTIC_TYPE_REF},
{"MEDIA_DESCRIPTOR", MEDIA_DESCRIPTOR},
{"PARTICIPANT", PARTICIPANT},
{"REF_ANNOTATION", REF_ANNOTATION},
{"RELATIVE_MEDIA_URL", RELATIVE_MEDIA_URL},
{"TIER", TIER},
{"TIER_ID", TIER_ID},
{"TIME_ORDER", TIME_ORDER},
{"TIME_SLOT", TIME_SLOT},
{"TIME_SLOT_ID", TIME_SLOT_ID},
{"TIME_SLOT_REF1", TIME_SLOT_REF1},
{"TIME_SLOT_REF2", TIME_SLOT_REF2},
{"TIME_UNITS", TIME_UNITS},
{"TIME_VALUE", TIME_VALUE}
};
// file path
string file_path;
// quite mode on/off
bool quiet = false;
// add tier name to text-output file names
bool add_tier_name = false;
// tier selected is on when the parsed tier is selected,
// either the first one, or the one named via command line
bool tier_selected = false;
// state flag for processing ELAN file
bool parse_tier_annotation = false;
// another state flag
bool collect_annotation_value = false;
// force overwrite of existing files
bool force_overwrite = false;
// tier name to be processed
string target_tier_name;
// temporary Interval
Interval myInt;
// stringbuf for characters from tag-values
string buffer;
// output folder
string output_folder;
// text file suffix
string text_suffix;
MySAX2Handler();
};
#endif //TUTORIAL1_MYSAX2HANDLER_H