|
| 1 | +/* |
| 2 | + * Unless explicitly stated otherwise all files in this repository are licensed under the Apache License Version 2.0. |
| 3 | + * This product includes software developed at Datadog (https://www.datadoghq.com/). |
| 4 | + * Copyright 2019-2020 Datadog, Inc. |
| 5 | + */ |
| 6 | + |
| 7 | +@testable import Datadog |
| 8 | + |
| 9 | +class KronosE2ETests: E2ETests { |
| 10 | + /// The logger sending logs on Kronos execution. These logs are available in Mobile Integrations org. |
| 11 | + private var logger: Logger! // swiftlint:disable:this implicitly_unwrapped_optional |
| 12 | + /// The logger sending telemetry on internal Kronos execution. These logs are available in Mobile Integrations org. |
| 13 | + private var telemetryLogger: Logger! // swiftlint:disable:this implicitly_unwrapped_optional |
| 14 | + private let queue = DispatchQueue(label: "kronos-monitor-queue") |
| 15 | + |
| 16 | + override func setUp() { |
| 17 | + super.setUp() |
| 18 | + logger = Logger |
| 19 | + .builder |
| 20 | + .set(loggerName: "kronos-e2e") |
| 21 | + .build() |
| 22 | + telemetryLogger = Logger.builder |
| 23 | + .set(loggerName: "kronos-e2e-internal-telemetry") |
| 24 | + .sendNetworkInfo(true) |
| 25 | + .build() |
| 26 | + } |
| 27 | + |
| 28 | + override func tearDown() { |
| 29 | + logger = nil |
| 30 | + telemetryLogger = nil |
| 31 | + super.tearDown() |
| 32 | + } |
| 33 | + |
| 34 | + /// Creates kronos monitor for checking connections to all IPs resolved from NTP pool and sending additional telemetry on their statuses. |
| 35 | + private func createKronosMonitor() -> KronosMonitor? { |
| 36 | + if #available(iOS 14.2, *) { |
| 37 | + let monitor = KronosInternalMonitor( |
| 38 | + queue: queue, |
| 39 | + connectionMonitor: IPConnectionMonitor(queue: queue) |
| 40 | + ) |
| 41 | + // Here we redirect IM's logger to E2E Kronos logger (`telemetryLogger`) to send data to Mobile Integrations org, not IM's org |
| 42 | + monitor.export(to: InternalMonitor(sdkLogger: telemetryLogger)) |
| 43 | + return monitor |
| 44 | + } else { |
| 45 | + return nil |
| 46 | + } |
| 47 | + } |
| 48 | + |
| 49 | + /// TODO: RUMM-1859: Add E2E tests for monitoring Kronos in nightly tests |
| 50 | + func test_kronos_clock_performs_sync_using_datadog_ntp_pool() { // E2E:wip |
| 51 | + /// The result of `KronosClock.sync()`. |
| 52 | + struct KronosSyncResult { |
| 53 | + /// First received server date. |
| 54 | + var firstReceivedDate: Date? = nil |
| 55 | + /// First received server offset. |
| 56 | + var firstReceivedOffset: TimeInterval? = nil |
| 57 | + /// Last received server date. |
| 58 | + var lastReceivedDate: Date? = nil |
| 59 | + /// Last received server offset. |
| 60 | + var lastReceivedOffset: TimeInterval? = nil |
| 61 | + /// Device date measured at the moment of receiving any server date. Used for additional debugging and comparision. |
| 62 | + var measuredDeviceDate = Date() |
| 63 | + } |
| 64 | + |
| 65 | + func performKronosSync(using pool: String) -> KronosSyncResult { |
| 66 | + KronosClock.reset() |
| 67 | + defer { KronosClock.reset() } |
| 68 | + |
| 69 | + // Given |
| 70 | + let numberOfSamplesForEachIP = 2 // exchange only 2 samples with each resolved IP - to run test quick |
| 71 | + |
| 72 | + // Each IP (each server) is asked in parallel, but samples are obtained sequentially. |
| 73 | + // Here we compute test timeout, to ensure that all (parallel) servers complete querying their (sequential) samples |
| 74 | + // below `testTimeout` with assuming +50% margin. This should guarantee no flakiness on test timeout. |
| 75 | + let testTimeout = kronosDefaultTimeout * Double(numberOfSamplesForEachIP) * 1.5 |
| 76 | + |
| 77 | + // When |
| 78 | + let completionExpectation = expectation(description: "KronosClock.sync() calls completion closure") |
| 79 | + var result = KronosSyncResult() |
| 80 | + |
| 81 | + KronosClock.sync( |
| 82 | + from: pool, |
| 83 | + samples: numberOfSamplesForEachIP, |
| 84 | + monitor: createKronosMonitor(), |
| 85 | + first: { date, offset in // this closure could not be called if all samples to all servers resulted with failure |
| 86 | + result.firstReceivedDate = date |
| 87 | + result.firstReceivedOffset = offset |
| 88 | + result.measuredDeviceDate = Date() |
| 89 | + }, |
| 90 | + completion: { date, offset in // this closure should always be called |
| 91 | + result.lastReceivedDate = date |
| 92 | + result.lastReceivedOffset = offset |
| 93 | + result.measuredDeviceDate = Date() |
| 94 | + completionExpectation.fulfill() |
| 95 | + } |
| 96 | + ) |
| 97 | + |
| 98 | + // Then |
| 99 | + |
| 100 | + // We don't expect receiving timeout on `completionExpectation`. Number of samples and individual sample timeout |
| 101 | + // is configured in a way that lets `KronosNTPClient` always fulfill the `completionExpectation`. |
| 102 | + waitForExpectations(timeout: testTimeout) |
| 103 | + |
| 104 | + return result |
| 105 | + } |
| 106 | + |
| 107 | + // Run test for each Datadog NTP pool: |
| 108 | + DateCorrector.datadogNTPServers.forEach { ddNTPPool in |
| 109 | + let result = measure(resourceName: DD.PerfSpanName.fromCurrentMethodName()) { |
| 110 | + performKronosSync(using: ddNTPPool) |
| 111 | + } |
| 112 | + |
| 113 | + // Report result for this pool: |
| 114 | + if let _ = result.firstReceivedDate, let _ = result.firstReceivedOffset, let serverDate = result.lastReceivedDate, let serverOffset = result.lastReceivedOffset { |
| 115 | + // We consider `KronosClock.sync()` result to be consistent only if it has both `first` and `last` time values set. |
| 116 | + // We log consistent result as INFO log that can be seen in Mobile Integration org. |
| 117 | + logger.info("KronosClock.sync() completed with consistent result for \(ddNTPPool)", attributes: [ |
| 118 | + "serverOffset_measured": serverDate.timeIntervalSince(result.measuredDeviceDate), |
| 119 | + "serverOffset_received": serverOffset, |
| 120 | + "serverDate_received": iso8601DateFormatter.string(from: serverDate), |
| 121 | + ]) |
| 122 | + } else { |
| 123 | + // Inconsistent result may correspond to flaky execution, e.g. if network was unreachable or if **all** NTP calls received timeout. |
| 124 | + // We track inconsistent result as WARN log that will be watched by E2E monitor. |
| 125 | + logger.warn("KronosClock.sync() completed with inconsistent result for \(ddNTPPool)", attributes: [ |
| 126 | + "serverDate_firstReceived": result.firstReceivedDate.flatMap { iso8601DateFormatter.string(from: $0) }, |
| 127 | + "serverDate_lastReceived": result.lastReceivedDate.flatMap { iso8601DateFormatter.string(from: $0) }, |
| 128 | + "serverOffset_firstReceived": result.firstReceivedOffset, |
| 129 | + "serverOffset_lastReceived": result.lastReceivedOffset, |
| 130 | + ]) |
| 131 | + } |
| 132 | + } |
| 133 | + } |
| 134 | + |
| 135 | + /// TODO: RUMM-1859: Add E2E tests for monitoring Kronos in nightly tests |
| 136 | + func test_kronos_ntp_client_queries_both_ipv4_and_ipv6_ips() { // E2E:wip |
| 137 | + /// The result of `KronosNTPClient.query(pool:)`. |
| 138 | + struct KronosNTPClientQueryResult { |
| 139 | + /// Partial offsets received for each NTP packet sent to each resolved IP. |
| 140 | + var receivedOffsets: [TimeInterval?] = [] |
| 141 | + /// Expected number of NTP packets to send. |
| 142 | + var expectedNumberOfSamples = 0 |
| 143 | + /// Actual number of NTP packets that completed. |
| 144 | + var numberOfCompletedSamples = 0 |
| 145 | + } |
| 146 | + |
| 147 | + func performKronosNTPClientQuery() -> KronosNTPClientQueryResult { |
| 148 | + let testTimeout: TimeInterval = 30 |
| 149 | + let monitor = createKronosMonitor() |
| 150 | + |
| 151 | + // Given |
| 152 | + let pool = "2.datadog.pool.ntp.org" // a pool resolved to multiple IPv4 and IPv6 addresses (e.g. 4 + 4) |
| 153 | + let numberOfSamplesForEachIP = 2 // exchange only 2 samples with each resolved IP - to run test quick |
| 154 | + |
| 155 | + // Each IP (each server) is asked in parallel, but samples are obtained sequentially. |
| 156 | + // Here we compute individual sample timeout, to ensure that all (parallel) servers complete querying their (sequential) samples |
| 157 | + // below `testTimeout` with assuming -30% margin. This should guarantee no flakiness on test timeout. |
| 158 | + let timeoutForEachSample = (testTimeout / Double(numberOfSamplesForEachIP)) * 0.7 |
| 159 | + |
| 160 | + // When |
| 161 | + let completionExpectation = expectation(description: "It completes all samples for all IPs") |
| 162 | + var result = KronosNTPClientQueryResult() |
| 163 | + |
| 164 | + monitor?.notifySyncStart(from: pool) // must be notified by hand because normally it's called from `KronosClock.sync()` |
| 165 | + |
| 166 | + KronosNTPClient() |
| 167 | + .query( |
| 168 | + pool: pool, |
| 169 | + numberOfSamples: numberOfSamplesForEachIP, |
| 170 | + maximumServers: .max, // query all resolved IPs in the pool - to include both IPv4 and IPv6 |
| 171 | + timeout: timeoutForEachSample, |
| 172 | + monitor: monitor |
| 173 | + ) { offset, completed, total in |
| 174 | + result.receivedOffsets.append(offset) |
| 175 | + result.numberOfCompletedSamples = completed |
| 176 | + result.expectedNumberOfSamples = total |
| 177 | + |
| 178 | + if completed == total { |
| 179 | + monitor?.notifySyncEnd(serverOffset: offset) // must be notified by hand because normally it's called from `KronosClock.sync()` |
| 180 | + completionExpectation.fulfill() |
| 181 | + } |
| 182 | + } |
| 183 | + |
| 184 | + // Then |
| 185 | + |
| 186 | + // We don't expect receiving timeout on `completionExpectation`. Number of samples and individual sample timeout |
| 187 | + // is configured in a way that lets `KronosNTPClient` always fulfill the `completionExpectation`. |
| 188 | + // In worst case, it can fulfill it, with recording only `nil` offsets, which will mean receiving timeouts |
| 189 | + // or error on all NTP queries. |
| 190 | + waitForExpectations(timeout: testTimeout) |
| 191 | + |
| 192 | + return result |
| 193 | + } |
| 194 | + |
| 195 | + // Run test: |
| 196 | + let result = measure(resourceName: DD.PerfSpanName.fromCurrentMethodName()) { |
| 197 | + performKronosNTPClientQuery() |
| 198 | + } |
| 199 | + |
| 200 | + // Report result: |
| 201 | + if result.receivedOffsets.contains(where: { offset in offset != nil }) { |
| 202 | + // We consider `KronosNTPClient.query(pool:)` result to be consistent if it received at least one offset. |
| 203 | + let receivedOffsets: [String] = result.receivedOffsets.map { offset in |
| 204 | + if let offset = offset { |
| 205 | + return "\(offset)" |
| 206 | + } else { |
| 207 | + return "(nil)" |
| 208 | + } |
| 209 | + } |
| 210 | + logger.info( |
| 211 | + "KronosNTPClient.query(pool:) completed with consistent result receiving \(result.numberOfCompletedSamples)/\(result.expectedNumberOfSamples) NTP packets", |
| 212 | + attributes: [ |
| 213 | + "offsets_received": receivedOffsets |
| 214 | + ] |
| 215 | + ) |
| 216 | + } else { |
| 217 | + // Inconsistent result may correspond to flaky execution, e.g. if network was unreachable or if **all** NTP calls received timeout. |
| 218 | + // We track inconsistent result as WARN log that will be watched by E2E monitor. |
| 219 | + logger.warn( |
| 220 | + "KronosNTPClient.query(pool:) completed with inconsistent result receiving \(result.numberOfCompletedSamples)/\(result.expectedNumberOfSamples) NTP packets" |
| 221 | + ) |
| 222 | + } |
| 223 | + } |
| 224 | +} |
0 commit comments