Skip to content

Commit 9e08d52

Browse files
Merge pull request #990 from mouhamadalmounayar/custom-upsampling-resolution
user-defined upsampling resolution
2 parents 3d37db2 + 621423d commit 9e08d52

File tree

5 files changed

+284
-43
lines changed

5 files changed

+284
-43
lines changed

src/__tests__/if-run/builtins/time-sync.test.ts

Lines changed: 156 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@ const {
1919
} = ERRORS;
2020

2121
const {
22+
INCOMPATIBLE_RESOLUTION_WITH_INTERVAL,
23+
INCOMPATIBLE_RESOLUTION_WITH_GAPS,
2224
INVALID_OBSERVATION_OVERLAP,
2325
INVALID_TIME_NORMALIZATION,
2426
AVOIDING_PADDING_BY_EDGES,
@@ -34,17 +36,17 @@ jest.mock('luxon', () => {
3436
fromDateTimes: jest.fn((start, end) => ({
3537
start,
3638
end,
37-
splitBy: jest.fn(() => {
39+
splitBy: jest.fn(duration => {
3840
const intervals = [];
3941
let current = start;
4042

4143
while (current < end) {
4244
intervals.push({
4345
start: process.env.MOCK_INTERVAL === 'true' ? null : current,
44-
end: current.plus({seconds: 1}),
46+
end: current.plus(duration),
4547
});
4648

47-
current = current.plus({seconds: 1});
49+
current = current.plus(duration);
4850
}
4951

5052
return intervals;
@@ -942,6 +944,157 @@ describe('builtins/time-sync:', () => {
942944

943945
expect(result).toStrictEqual(expectedResult);
944946
});
947+
948+
it('should throw an error if the upsampling resolution is not compatible with the interval', async () => {
949+
const basicConfig = {
950+
'start-time': '2023-12-12T00:00:00.000Z',
951+
'end-time': '2023-12-12T00:00:03.000Z',
952+
interval: 3,
953+
'allow-padding': true,
954+
'upsampling-resolution': 2,
955+
};
956+
const timeModel = TimeSync(basicConfig, parametersMetadata, {});
957+
expect.assertions(1);
958+
try {
959+
await timeModel.execute([
960+
{
961+
timestamp: '2023-12-12T00:00:02.000Z',
962+
duration: 10,
963+
'cpu/utilization': 10,
964+
},
965+
]);
966+
} catch (error) {
967+
expect(error).toStrictEqual(
968+
new ConfigError(INCOMPATIBLE_RESOLUTION_WITH_INTERVAL)
969+
);
970+
}
971+
});
972+
973+
it('should throw an error if the upsampling resolution is not compatible with paddings', async () => {
974+
const basicConfig = {
975+
'start-time': '2023-12-12T00:00:00.000Z',
976+
'end-time': '2023-12-12T00:00:12.000Z',
977+
interval: 2,
978+
'allow-padding': true,
979+
'upsampling-resolution': 2,
980+
};
981+
const timeModel = TimeSync(basicConfig, parametersMetadata, {});
982+
expect.assertions(1);
983+
try {
984+
await timeModel.execute([
985+
{
986+
timestamp: '2023-12-12T00:00:05.000Z',
987+
duration: 10,
988+
'cpu/utilization': 10,
989+
},
990+
]);
991+
} catch (error) {
992+
expect(error).toStrictEqual(
993+
new ConfigError(INCOMPATIBLE_RESOLUTION_WITH_GAPS)
994+
);
995+
}
996+
});
997+
998+
it('should throw an error if the upsampling resolution is not compatible with gaps', async () => {
999+
const basicConfig = {
1000+
'start-time': '2023-12-12T00:00:00.000Z',
1001+
'end-time': '2023-12-12T00:00:12.000Z',
1002+
interval: 5,
1003+
'allow-padding': true,
1004+
'upsampling-resolution': 5,
1005+
};
1006+
const timeModel = TimeSync(basicConfig, parametersMetadata, {});
1007+
expect.assertions(1);
1008+
try {
1009+
await timeModel.execute([
1010+
{
1011+
timestamp: '2023-12-12T00:00:00.000Z',
1012+
duration: 5,
1013+
},
1014+
{
1015+
timestamp: '2023-12-12T00:00:07.000Z',
1016+
duration: 5,
1017+
},
1018+
]);
1019+
} catch (error) {
1020+
expect(error).toStrictEqual(
1021+
new ConfigError(INCOMPATIBLE_RESOLUTION_WITH_GAPS)
1022+
);
1023+
}
1024+
});
1025+
1026+
it('should upsample and resample correctly with a custom upsampling resolution given', async () => {
1027+
const basicConfig = {
1028+
'start-time': '2023-12-12T00:00:00.000Z',
1029+
'end-time': '2023-12-12T00:00:20.000Z',
1030+
interval: 5,
1031+
'allow-padding': true,
1032+
'upsampling-resolution': 5,
1033+
};
1034+
const timeModel = TimeSync(basicConfig, parametersMetadata, {});
1035+
const result = await timeModel.execute([
1036+
{
1037+
timestamp: '2023-12-12T00:00:00.000Z',
1038+
duration: 15,
1039+
},
1040+
]);
1041+
const expected = [
1042+
{
1043+
timestamp: '2023-12-12T00:00:00.000Z',
1044+
duration: 5,
1045+
},
1046+
{
1047+
timestamp: '2023-12-12T00:00:05.000Z',
1048+
duration: 5,
1049+
},
1050+
{
1051+
timestamp: '2023-12-12T00:00:10.000Z',
1052+
duration: 5,
1053+
},
1054+
{
1055+
timestamp: '2023-12-12T00:00:15.000Z',
1056+
duration: 5,
1057+
},
1058+
];
1059+
expect(result).toEqual(expected);
1060+
});
1061+
1062+
it('checks that metric carbon with aggregation == sum is properly spread over interpolated time points with custom upsampling resolution given', async () => {
1063+
const basicConfig = {
1064+
'start-time': '2023-12-12T00:00:00.000Z',
1065+
'end-time': '2023-12-12T00:00:15.000Z',
1066+
interval: 5,
1067+
'allow-padding': true,
1068+
'upsampling-resolution': 5,
1069+
};
1070+
const timeModel = TimeSync(basicConfig, parametersMetadata, {});
1071+
const result = await timeModel.execute([
1072+
{
1073+
timestamp: '2023-12-12T00:00:00.000Z',
1074+
duration: 15,
1075+
carbon: 3,
1076+
},
1077+
]);
1078+
1079+
const expected = [
1080+
{
1081+
timestamp: '2023-12-12T00:00:00.000Z',
1082+
duration: 5,
1083+
carbon: 1,
1084+
},
1085+
{
1086+
timestamp: '2023-12-12T00:00:05.000Z',
1087+
duration: 5,
1088+
carbon: 1,
1089+
},
1090+
{
1091+
timestamp: '2023-12-12T00:00:10.000Z',
1092+
duration: 5,
1093+
carbon: 1,
1094+
},
1095+
];
1096+
expect(result).toEqual(expected);
1097+
});
9451098
});
9461099
});
9471100
});

src/if-run/builtins/time-sync/README.md

Lines changed: 28 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -9,9 +9,10 @@ Time sync standardizes the start time, end time and temporal resolution of all o
99
The following should be defined in the plugin initialization:
1010

1111
- `start-time`: global start time as ISO 8061 string
12-
- `stop`: global end time as ISO 8061 string
12+
- `end-time`: global end time as ISO 8061 string
1313
- `interval`: temporal resolution in seconds
14-
- `error-on-padding`: avoid zero/'zeroish' padding (if needed) and error out instead. `False` by defult.
14+
- `allow-padding`: avoid zero/'zeroish' padding (if needed) and error out instead.
15+
- `upsampling-resolution`: temporal resolution at which observations will be upsampled, in seconds. Defaults to 1.
1516

1617
#### Inputs:
1718

@@ -28,7 +29,7 @@ A manifest file for a tree might contain many nodes each representing some diffe
2829
We do this by implementing the following logic:
2930

3031
- Shift readings to nearest whole seconds
31-
- Upsample the time series to a base resolution (1s)
32+
- Upsample the time series to a base resolution.
3233
- Resample to desired resolution by batching 1s entries
3334
- Extrapolate or trim to ensure all time series share global start and end dates
3435

@@ -39,6 +40,7 @@ The next section explains each stage in more detail.
3940
##### Upsampling rules
4041

4142
A set of `inputs` is naturally a time series because all `observations` include a `timestamp` and a `duration`, measured in seconds.
43+
4244
For each `observation` in `inputs` we check whether the duration is greater than 1 second. If `duration` is greater than 1 second, we create N new `observation` objects, where N is equal to `duration`. This means we have an `observation` for every second between the initial timestamp and the end of the observation period. Each new object receives a timestamp incremented by one second.
4345

4446
This looks as follows:
@@ -54,6 +56,7 @@ This looks as follows:
5456
{timestamp: '2023-12-12T00:00:04.000Z', duration: 1}
5557
{timestamp: '2023-12-12T00:00:05.000Z', duration: 1}
5658
]
59+
5760
```
5861

5962
Each `observation` actually includes many key-value pairs. The precise content of the `observation` is not known until runtime because it depends on which plugins have been included in the pipeline. Different values have to be treated differently when we upsample in time. The method we use to upsample depends on the `aggregation-method` defined for each key in `units.yml`.
@@ -151,12 +154,31 @@ For example, for `startTime = 2023-12-12T00:00:00.000Z` and `endTime = 2023-12-1
151154
]
152155
```
153156

154-
Note that when `error-on-padding` is `true` no padding is performed and the plugin will error out instead.
157+
Note that when `allow-padding` is `true` no padding is performed and the plugin will error out instead.
155158

156159
##### Resampling rules
157160

158161
Now we have synchronized, continuous, high resolution time series data, we can resample. To achieve this, we use `interval`, which sets the global temporal resolution for the final, processed time series. `interval` is expressed in units of seconds, which means we can simply batch `observations` together in groups of size `interval`. For each value in each object we either sum, average or copy the values into one single summary object representing each time bucket of size `interval` depending on their `aggregation-method` defined in `aggregation` section in the manifest file. The returned array is the final, synchronized time series at the desired temporal resolution.
159162

163+
#### Setting a custom upsampling resolution
164+
165+
The model defaults to upsampling observations to a 1-second resolution. However, this can lead to unnecessary effort, as upsampling at a coarser resolution is often sufficient, provided it doesn't interfere with the accuracy of resampling. To optimize performance, we can set the `upsampling-resolution` parameter in the configuration to a more appropriate value. The chosen value should meet the following criteria :
166+
167+
- It should evenly divide all observation durations within the dataset.
168+
- It must be a divisor of the `interval`.
169+
- It should also divide any gaps between observations, as well as the start and end paddings.
170+
171+
For example, for `interval = 10` and this time-series
172+
173+
```ts
174+
[
175+
{timestamp: '2023-12-12T00:00:00.000Z', duration: 300},
176+
]
177+
````
178+
setting the `upsampling-resolution` to `10s` is preferable to the default behavior.
179+
If the default behavior were used, the model would create `300` samples of `1s` each, which would be inefficient. By setting a custom `upsampling-resolution` of `10s`, the model only generates `30` samples, each representing `10s`.
180+
181+
160182
#### Assumptions and limitations
161183

162184
To do time synchronization, we assume:
@@ -173,7 +195,8 @@ Then, you can call `execute()`.
173195
const config = {
174196
'start-time': '2023-12-12T00:00:00.000Z',
175197
'end-time': '2023-12-12T00:00:30.000Z',
176-
interval: 10
198+
interval: 10,
199+
'allow-padding': true,
177200
}
178201
const timeSync = TimeSync(config);
179202
const results = timeSync.execute([

0 commit comments

Comments
 (0)