Skip to content

Commit 0fbb431

Browse files
committed
update temporal integration to support temporal version 1.27
1 parent 5c02fc9 commit 0fbb431

File tree

9 files changed

+1071
-582
lines changed

9 files changed

+1071
-582
lines changed

temporal/datadog_checks/temporal/metrics.py

Lines changed: 213 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
# E501: line too long (XXX > 120 characters)
66
# flake8: noqa: E501
7-
# Source for metric names: https://github.com/temporalio/temporal/blob/master/common/metrics/metric_defs.go
7+
# Source for metric names: https://github.com/temporalio/temporal/blob/17e06952d471f86d106af0158419d9e288371585/common/metrics/metric_defs.go
88
METRIC_MAP = {
99
# General
1010
'service_requests': 'service.requests',
@@ -13,6 +13,17 @@
1313
'service_error_with_type': 'service.error_with_type',
1414
'service_errors_critical': 'service.errors.critical',
1515
'service_errors_resource_exhausted': 'service.errors.resource_exhausted',
16+
'service_panics': 'service.panics',
17+
'service_errors_invalid_argument': 'service.errors.invalid_argument',
18+
'service_errors_namespace_not_active': 'service.errors.namespace_not_active',
19+
'service_errors_entity_not_found': 'service.errors.entity_not_found',
20+
'service_errors_execution_already_started': 'service.errors.execution_already_started',
21+
'service_errors_context_timeout': 'service.errors.context_timeout',
22+
'service_errors_retry_task': 'service.errors.retry_task',
23+
'service_errors_incomplete_history': 'service.errors.incomplete_history',
24+
'service_errors_nondeterministic': 'service.errors.nondeterministic',
25+
'service_errors_unauthorized': 'service.errors.unauthorized',
26+
'service_errors_authorize_failed': 'service.errors.authorize_failed',
1627
'service_latency': 'service.latency',
1728
'service_latency_nouserlatency': 'service.latency.nouserlatency',
1829
'service_latency_userlatency': 'service.latency.userlatency',
@@ -58,6 +69,12 @@
5869
'cache_errors': 'cache.errors',
5970
'cache_latency': 'cache.latency',
6071
'cache_miss': 'cache.miss',
72+
'cache_size': 'cache.size',
73+
'cache_usage': 'cache.usage',
74+
'cache_pinned_usage': 'cache.pinned_usage',
75+
'cache_ttl': 'cache.ttl',
76+
'cache_entry_age_on_get': 'cache.entry_age_on_get',
77+
'cache_entry_age_on_eviction': 'cache.entry_age_on_eviction',
6178
'history_event_notification_queueing_latency': 'history.event_notification.queueing_latency',
6279
'history_event_notification_fanout_latency': 'history.event_notification.fanout_latency',
6380
'history_event_notification_inflight_message_gauge': 'history.event_notification.inflight_message',
@@ -111,6 +128,10 @@
111128
'task_errors_namespace_handover': 'task.errors.namespace_handover',
112129
'task_errors_throttled': 'task.errors.throttled',
113130
'task_errors_corruption': 'task.errors.corruption',
131+
'task_terminal_failures': 'task.terminal_failures',
132+
'task_dlq_failures': 'task.dlq_failures',
133+
'task_dlq_latency': 'task.dlq_latency',
134+
'task_internal_error': 'task.internal_error',
114135
'task_schedule_to_start_latency': 'task.schedule_to_start_latency',
115136
'transfer_task_missing_event_counter': 'transfer_task.missing_event_counter',
116137
'task_batch_complete_counter': 'task.batch_complete_counter',
@@ -141,6 +162,18 @@
141162
'accept_workflow_update_message': 'accept_workflow_update_message',
142163
'complete_workflow_update_message': 'complete_workflow_update_message',
143164
'reject_workflow_update_message': 'reject_workflow_update_message',
165+
'workflow_execution_update_registry_size': 'workflow.execution.update_registry_size',
166+
'workflow_execution_update_registry_size_limited': 'workflow.execution.update_registry_size_limited',
167+
'workflow_execution_update_request_rate_limited': 'workflow.execution.update_request_rate_limited',
168+
'workflow_execution_update_too_many': 'workflow.execution.update_too_many',
169+
'workflow_execution_update_aborted': 'workflow.execution.update_aborted',
170+
'workflow_execution_update_continue_as_new_suggestions': 'workflow.execution.update_continue_as_new_suggestions',
171+
'workflow_execution_update_sent_to_worker': 'workflow.execution.update_sent_to_worker',
172+
'workflow_execution_update_sent_to_worker_again': 'workflow.execution.update_sent_to_worker_again',
173+
'workflow_execution_update_wait_stage_accepted': 'workflow.execution.update_wait_stage_accepted',
174+
'workflow_execution_update_wait_stage_completed': 'workflow.execution.update_wait_stage_completed',
175+
'workflow_execution_update_client_timeout': 'workflow.execution.update_client_timeout',
176+
'workflow_execution_update_server_timeout': 'workflow.execution.update_server_timeout',
144177
'activity_eager_execution': 'activity.eager_execution',
145178
'workflow_eager_execution': 'workflow.eager_execution',
146179
'workflow_eager_execution_denied': 'workflow.eager_execution.denied',
@@ -407,4 +440,182 @@
407440
'visibility_persistence_errors': 'visibility.persistence.errors',
408441
'visibility_persistence_resource_exhausted': 'visibility.persistence.resource_exhausted',
409442
'visibility_persistence_latency': 'visibility.persistence.latency',
410-
}
443+
# HTTP/Nexus
444+
'http_service_requests': 'http.service_requests',
445+
'nexus_request_preprocess_errors': 'nexus.request_preprocess_errors',
446+
'nexus_completion_request_preprocess_errors': 'nexus.completion_request_preprocess_errors',
447+
# Dynamic Worker Pool
448+
'dynamic_worker_pool_scheduler_dequeued_tasks': 'dynamic_worker_pool.scheduler_dequeued_tasks',
449+
'dynamic_worker_pool_scheduler_rejected_tasks': 'dynamic_worker_pool.scheduler_rejected_tasks',
450+
# Host/Rate Limit
451+
'host_rps_limit': 'host.rps_limit',
452+
'namespace_host_rps_limit': 'namespace_host.rps_limit',
453+
'handover_wait_latency': 'handover_wait_latency',
454+
# Deadlock detector metrics
455+
'dd_cluster_metadata_lock_latency': 'dd.cluster_metadata.lock_latency',
456+
'dd_cluster_metadata_callback_lock_latency': 'dd.cluster_metadata.callback.lock_latency',
457+
'dd_shard_controller_lock_latency': 'dd.shard_controller.lock_latency',
458+
'dd_shard_lock_latency': 'dd.shard.lock_latency',
459+
'dd_shard_io_semaphore_latency': 'dd.shard.io_semaphore_latency',
460+
'dd_namespace_registry_lock_latency': 'dd.namespace_registry.lock_latency',
461+
# Task metrics
462+
'task_errors_internal': 'task.errors.internal',
463+
'task_dispatch_latency': 'task.dispatch_latency',
464+
'task_rewrites': 'task.rewrites',
465+
'batchable_task_batch_count': 'batchable_task.batch_count',
466+
# Workflow metrics
467+
'speculative_workflow_task_commits': 'workflow.speculative_task.commits',
468+
'speculative_workflow_task_rollbacks': 'workflow.speculative_task.rollbacks',
469+
'workflow_update_registry_size': 'workflow.update.registry_size',
470+
'workflow_update_registry_size_limited': 'workflow.update.registry_size_limited',
471+
'workflow_update_request_rate_limited': 'workflow.update.request_rate_limited',
472+
'workflow_update_request_too_many': 'workflow.update.request_too_many',
473+
'workflow_update_aborted': 'workflow.update.aborted',
474+
'workflow_update_continue_as_new_suggestions': 'workflow.update.continue_as_new_suggestions',
475+
'workflow_update_sent_to_worker': 'workflow.update.sent_to_worker',
476+
'workflow_update_sent_to_worker_again': 'workflow.update.sent_to_worker_again',
477+
'workflow_update_wait_stage_accepted': 'workflow.update.wait_stage_accepted',
478+
'workflow_update_wait_stage_completed': 'workflow.update.wait_stage_completed',
479+
'workflow_update_client_timeout': 'workflow.update.client_timeout',
480+
'workflow_update_server_timeout': 'workflow.update.server_timeout',
481+
'workflow_delayed_start_backoff_timer': 'workflow.delayed_start_backoff.timer',
482+
'workflow_backoff_timer': 'workflow.backoff.timer',
483+
# Replication metrics
484+
'replication_task_transmission_latency': 'replication.task.transmission_latency',
485+
'replication_task_send_attempt': 'replication.task.send_attempt',
486+
'replication_task_send_backlog': 'replication.task.send_backlog',
487+
'replication_task_send_error': 'replication.task.send_error',
488+
'replication_task_send_latency': 'replication.task.send_latency',
489+
'replication_task_processing_latency': 'replication.task.processing_latency',
490+
'replication_task_generation_latency': 'replication.task.generation_latency',
491+
'replication_task_load_latency': 'replication.task.load_latency',
492+
'replication_task_load_size': 'replication.task.load_size',
493+
'replication_tasks_send': 'replication.tasks.send',
494+
'replication_tasks_recv': 'replication.tasks.recv',
495+
'replication_tasks_recv_backlog': 'replication.tasks.recv_backlog',
496+
'replication_tasks_skipped': 'replication.tasks.skipped',
497+
'replication_dlq_non_empty': 'replication.dlq.non_empty',
498+
'replication_service_error': 'replication.service_error',
499+
'replication_stream_error': 'replication.stream_error',
500+
'replication_stream_panic': 'replication.stream_panic',
501+
'replication_stream_stuck': 'replication.stream_stuck',
502+
'replication_duplicated_task': 'replication.duplicated_task',
503+
'replication_outlier_namespace': 'replication.outlier_namespace',
504+
# Verification metrics
505+
'verify_replication_task_success': 'verify.replication_task.success',
506+
'verify_replication_task_not_found': 'verify.replication_task.not_found',
507+
'verify_replication_task_failed': 'verify.replication_task.failed',
508+
'verify_replication_tasks_latency': 'verify.replication_tasks.latency',
509+
'verify_describe_mutable_state_latency': 'verify.describe_mutable_state.latency',
510+
'generate_replication_tasks_latency': 'generate.replication_tasks.latency',
511+
# Encounter metrics
512+
'encounter_zombie_workflow_count': 'encounter.zombie_workflow',
513+
'encounter_not_found_workflow_count': 'encounter.not_found_workflow',
514+
'encounter_pass_retention_workflow_count': 'encounter.pass_retention_workflow',
515+
# Matching metrics
516+
'invalid_task_queue_partition': 'invalid_task_queue_partition',
517+
'loaded_task_queue_family_count': 'loaded_task_queue_family',
518+
'loaded_task_queue_partition_count': 'loaded_task_queue_partition',
519+
'force_loaded_task_queue_partitions_count': 'force_loaded_task_queue_partitions',
520+
'force_loaded_task_queue_partition_unnecessarily_count': 'force_loaded_task_queue_partition_unnecessarily',
521+
'unknown_build_polls': 'unknown_build_polls',
522+
'unknown_build_tasks': 'unknown_build_tasks',
523+
'poll_latency': 'poll.latency',
524+
'respond_nexus_failed': 'respond_nexus.failed',
525+
# State metrics
526+
'state_machine_timer_processing_failures': 'state_machine.timer_processing_failures',
527+
'state_machine_timer_skips': 'state_machine.timer_skips',
528+
# Shard metrics
529+
'shard_linger_success': 'shard.linger_success',
530+
'shard_linger_timeouts': 'shard.linger_timeouts',
531+
# Command metrics
532+
'command': 'command',
533+
# Circuit breaker metrics
534+
'circuit_breaker_executable_blocked': 'circuit_breaker.executable_blocked',
535+
# Rate limiter metrics
536+
'rate_limited_task_runnable_wait_time': 'rate_limited_task.runnable_wait_time',
537+
# Dynamic worker pool metrics
538+
'dynamic_worker_pool_scheduler_buffer_size': 'dynamic_worker_pool.scheduler_buffer_size',
539+
'dynamic_worker_pool_scheduler_active_workers': 'dynamic_worker_pool.scheduler_active_workers',
540+
'dynamic_worker_pool_scheduler_enqueued_tasks': 'dynamic_worker_pool.scheduler_enqueued_tasks',
541+
# Persistence metrics
542+
'persistence_shard_rps': 'persistence.shard_rps',
543+
'persistence_session_refresh_failures': 'persistence.session_refresh_failures',
544+
'persistence_session_refresh_attempts': 'persistence.session_refresh_attempts',
545+
# Memory metrics
546+
'memory_allocated': 'memory.allocated',
547+
'memory_heap': 'memory.heap',
548+
'memory_heap_objects': 'memory.heap_objects',
549+
'memory_heapidle': 'memory.heapidle',
550+
'memory_heapinuse': 'memory.heapinuse',
551+
'memory_stack': 'memory.stack',
552+
'memory_mallocs': 'memory.mallocs',
553+
'memory_frees': 'memory.frees',
554+
'memory_num_gc': 'memory.num_gc',
555+
'memory_gc_pause_ms': 'memory.gc_pause_ms',
556+
'memory_num_gc_last': 'memory.num_gc_last',
557+
'memory_pause_total_ns_last': 'memory.pause_total_ns_last',
558+
# Process metrics
559+
'restarts': 'restarts',
560+
'num_goroutines': 'num_goroutines',
561+
'gomaxprocs': 'gomaxprocs',
562+
# Finalizer metrics
563+
'finalizer_items_completed': 'finalizer.items_completed',
564+
'finalizer_items_unfinished': 'finalizer.items_unfinished',
565+
'finalizer_latency': 'finalizer.latency',
566+
# Activity metrics
567+
'paused_activities': 'paused_activities',
568+
# Signal metrics
569+
'signal_request_id_size': 'signal_request_id.size',
570+
'signal_request_id_count': 'signal_request_id',
571+
# Total metrics
572+
'total_activity_count': 'total.activity',
573+
'total_user_timer_count': 'total.user_timer',
574+
'total_signal_count': 'total.signal',
575+
'total_signal_external_count': 'total.signal_external',
576+
'total_request_cancel_external_count': 'total.request_cancel_external',
577+
'total_child_execution_count': 'total.child_execution',
578+
# Chasm metrics
579+
'chasm_total_size': 'chasm.total_size',
580+
# Reachability metrics
581+
'reachability_exit_point_count': 'reachability.exit_point',
582+
# Start workflow metrics
583+
'start_workflow_request_deduped': 'start_workflow.request_deduped',
584+
# Out of order metrics
585+
'out_of_order_buffered_events': 'out_of_order.buffered_events',
586+
# Approximate metrics
587+
'approximate_backlog_count': 'approximate.backlog_count',
588+
'approximate_backlog_age_seconds': 'approximate.backlog_age_seconds',
589+
# Time metrics
590+
'time_between_shardinfo_update': 'time_between_shardinfo.update',
591+
'tasks_per_shardinfo_update': 'tasks_per_shardinfo.update',
592+
# Elasticsearch metrics
593+
'elasticsearch_custom_order_by_clause_counter': 'elasticsearch.custom_order_by_clause',
594+
'elasticsearch_bulk_processor_bulk_request_took_latency': 'elasticsearch.bulk_processor.bulk_request_took.latency',
595+
# Workflow update metrics
596+
'request_workflow_update_message': 'request_workflow.update_message',
597+
'respond_workflow_update_message': 'respond_workflow.update_message',
598+
'invalid_state_transition_workflow_update_message': 'invalid_state_transition_workflow.update_message',
599+
# Signal metrics
600+
'signal_with_start_skip_delay_count': 'signal_with_start.skip_delay',
601+
# Cassandra metrics
602+
'cassandra_session_refresh_failures': 'cassandra.session_refresh_failures',
603+
'cassandra_init_session_latency': 'cassandra.init_session_latency',
604+
# Replication metrics
605+
'replication_tasks_back_fill': 'replication.tasks.back_fill',
606+
'replication_tasks_back_fill_latency': 'replication.tasks.back_fill_latency',
607+
# Semaphore metrics
608+
'semaphore_latency': 'semaphore.latency',
609+
'semaphore_requests': 'semaphore.requests',
610+
'semaphore_failures': 'semaphore.failures',
611+
# Task queue metrics
612+
'loaded_physical_task_queue_count': 'loaded_physical_task_queue',
613+
# Rate limit metrics
614+
'dynamic_rate_limit_multiplier': 'dynamic_rate_limit.multiplier',
615+
# Namespace metrics
616+
'read_namespace_errors': 'read_namespace.errors',
617+
# Mutable state metrics
618+
'mutable_state_dirty': 'mutable_state.dirty',
619+
# History metrics
620+
'history_workflow_execution_cache_lock_hold_duration': 'history.workflow_execution_cache_lock_hold_duration',
621+
}

0 commit comments

Comments
 (0)