From 028e843a1724317d5cb3b8a3131bc3c2cad0fd7b Mon Sep 17 00:00:00 2001
From: Andrew Pease <7442091+peasead@users.noreply.github.com>
Date: Tue, 5 Nov 2024 12:16:50 -0600
Subject: [PATCH] added all available gen_ai fields from otel and estc
 integrations

---
 rfcs/text/0000-gen_ai-security-fields.md | 144 +++++++++++++++++++----
 1 file changed, 121 insertions(+), 23 deletions(-)

diff --git a/rfcs/text/0000-gen_ai-security-fields.md b/rfcs/text/0000-gen_ai-security-fields.md
index 8eb8118b58..5811a968f5 100644
--- a/rfcs/text/0000-gen_ai-security-fields.md
+++ b/rfcs/text/0000-gen_ai-security-fields.md
@@ -59,32 +59,126 @@ llm.model.parameters	|	keyword	|	Parameters used to confirm the LLM model.
 
 The `gen_ai` fields proposed are: [WIP]
 
-Field | Type | Description /Usage
--- | -- | -- 
-gen_ai | group | This defines the attributes used to describe telemetry in the context of Generative Artificial Intelligence (GenAI) Models requests and responses.
-gen_ai.system | keyword | The Generative AI product as identified by the client or server instrumentation. 
-gen_ai.request.model | keyword | 
-gen_ai.request.max_tokens | integer | 
-gen_ai.request.temperature | integer | 
-gen_ai.request.top_p | integer | 
-gen_ai.request.top_k | integer | 
-gen_ai.request.stop_sequences | keyword | 
-gen_ai.request.frequency_penalty | integer | 
-gen_ai.request.presence_penalty | integer | 
-gen_ai.response.id | keyword | 
-gen_ai.response.model | keyword | 
-gen_ai.response.finish_reasons | keyword | 
-gen_ai.usage.input_tokens | integer | 
-gen_ai.usage.output_tokens | integer | 
-gen_ai.token.type | keyword | 
-gen_ai.operation.name | keword | 
-
+**Note:** a `*` denotes that this field is not currenlty part of OTel or ECS, but is being used somewhere - most commonly in an Elastic integration
+
+Field | Type | Description /Usage | Example
+-- | -- | -- | --
+gen_ai | nested | This defines the attributes used to describe telemetry in the context of Generative Artificial Intelligence (GenAI) Models requests and responses.
+gen_ai.analysis | nested | 
+gen_ai.analysis.action_recommended | keyword | Recommended actions based on the analysis. 
+gen_ai.analysis.finding | keyword | Detailed findings from security tools.
+gen_ai.analysis.function | keyword | Name of the security or analysis function used.
+gen_ai.analysis.tool_names | keyword | Name of the security or analysis tools used.
+gen_ai.assistant | nested | 
+gen_ai.assistant.message | nested | 
+gen_ai.assistant.message.role | keyword | The actual role of the message author as passed in the message. | `assistant` or `bot`
+gen_ai.assistant.message.content | keyword | The contents of the assistant message. | `Spans, events, metrics defined by the GenAI semantic conventions.`
+gen_ai.assistant.message.tool_calls | nested | The tool calls generated by the model, such as function calls. | 
+gen_ai.assistant.message.tool_calls.id | text | The id of the tool call | `call_mszuSIzqtI65i1wAUOE8w5H4`
+gen_ai.assistant.message.tool_calls.type | keyword | The type of the tool | `function`
+gen_ai.assistant.message.tool_calls.function | nested
+gen_ai.assistant.message.tool_calls.function.name | keyword | The name of the function to call | `get_link_to_otel_semconv`
+gen_ai.assistant.message.tool_calls.function.arguments | keyword | The arguments to pass the the function | `{"semconv": "gen_ai"}`
+gen_ai.choice | nested | This event describes model-generated individual chat response
+gen_ai.choice.finish_reason | keyword | The reason the model stopped generating tokens. | `stop`, `tool_calls`, `content_filter`
+gen_ai.choice.index | intiger | The index of the choice in the list of choices. | `1`
+gen_ai.choice.message | nested | GenAI response message. | 
+gen_ai.choice.message.role | The actual role of the message author as passed in the message. | `assistant` or `bot`
+gen_ai.choice.message.content | The contents of the choice message. | `Spans, events, metrics defined by the GenAI semantic conventions.`
+gen_ai.choice.message.tool_calls | nested | The tool calls generated by the model, such as function calls. | 
+gen_ai.choice.message.tool_calls.id | text | The id of the tool call | `call_mszuSIzqtI65i1wAUOE8w5H4`
+gen_ai.choice.message.tool_calls.type | keyword | The type of the tool | `function`
+gen_ai.choice.message.tool_calls.function | nested
+gen_ai.choice.message.tool_calls.function.name | keyword | The name of the function to call | `get_link_to_otel_semconv`
+gen_ai.choice.message.tool_calls.function.arguments | keyword | The arguments to pass the the function | `{"semconv": "gen_ai"}`
+gen_ai.compliance | nested |
+gen_ai.compliance.request_triggered | keyword | Lists compliance-related filters that were triggered during the processing of the request, such as data privacy filters or regulatory compliance checks.
+gen_ai.compliance.response_triggered | keyword | Lists compliance-related filters that were triggered during the processing of the response, such as data privacy filters or regulatory compliance checks.
+gen_ai.compliance.violation_code | keyword | Code identifying the specific compliance rule that was violated.
+gen_ai.compliance.violation_detected | boolean | Indicates if any compliance violation was detected during the interaction.
+gen_ai.operation.name | keyword | The name of the operation being performed. | `chat`, `text_completion`
+gen_ai.openai | nested | This group defines attributes for OpenAI. |
+gen_ai.openai.request.response_format | keyword | The response format that is requested. | `json_object`, `json_schema`, `auto`
+gen_ai.openai.request.seed | integer | Requests with same seed value more likely to return same result. | `100`
+gen_ai.openai.request.service_tier | keyword | The service tier requested. May be a specific tier, detault, or auto. | `auto`, `default`
+gen_ai.openai.response.service_tier | keyword | The service tier used for the response. | `scale`, `default`
+gen_ai.owasp | nested | 
+gen_ai.owasp.description | text | Description of the OWASP risk triggered.
+gen_ai.owasp.id | keyword | Identifier for the OWASP risk addressed.
+gen_ai.performance | nested
+gen_ai.performance.request_size | long | Size of the request payload in bytes.
+gen_ai.performance.response_size | long | Size of the response payload in bytes.
+gen_ai.performance.response_time | long | Time taken by the LLM to generate a response in milliseconds.
+gen_ai.performance.start_response_time | long | Time taken by the LLM to send first response byte in milliseconds.
+gen_ai.policy | nested
+gen_ai.policy.action | keyword | Action taken due to a policy violation, such as blocking, alerting, or modifying the content.
+gen_ai.policy.confidence | keyword | Confidence level in the policy match that triggered the action, quantifying how closely the identified content matched the policy criteria.
+gen_ai.policy.match_detail | nested
+gen_ai.policy.name | keyword | Name of the specific policy that was triggered.
+gen_ai.policy.violation | boolean | Specifies if a security policy was violated.
+gen_ai.request | nested | This group defines attributes for GenAI request actions. |
+gen_ai.request.id | keyword | Unique identifier for the LLM request.
+gen_ai.request.max_tokens | integer | The maximum number of tokens the model generates for a request. | `100`
+gen_ai.request.model | nested | 
+gen_ai.request.model.description | text | Description of the LLM model.
+gen_ai.request.model.instructions | text | Custom instructions for the LLM model.
+gen_ai.request.model.role | keyword | Role of the LLM model in the interaction.
+gen_ai.request.model.type | keyword | Type of LLM model.
+gen_ai.request.model.version | keyword | Version of the LLM model used to generate the response.
+gen_ai.request.model.id | keyword | The name of the GenAI model a request is being made to. | `gpt-4`
+gen_ai.request.temperature | integer | The temperature setting for the GenAI request. | `0.0`
+gen_ai.request.timestamp | date | Timestamp when the request was made.
+gen_ai.request.top_p | integer | The top_p sampling setting for the GenAI request. | `1.0`
+gen_ai.request.top_k | integer | The top_k sampling setting for the GenAI request. | `1.0`
+gen_ai.request.stop_sequences | keyword | List of sequences that the model will use to stop generating further tokens. | `["forest", "lived"]`
+gen_ai.request.frequency_penalty | integer | The frequency penalty setting for the GenAI request. | `0.1`
+gen_ai.request.presence_penalty | integer | The presence penalty setting for the GenAI request. | `0.1`
+gen_ai.response | nested | This group defines attributes for GenAI response actions. |
+gen_ai.response.error_code | keyword | Error code returned in the LLM response.
+gen_ai.response.finish_reasons | keyword | Array of reasons the model stopped generating tokens, corresponding to each generation received. | `["stop", "stop", "length"]`
+gen_ai.response.id | keyword | The unique identifier for the completion. | `chatcmpl-123`
+gen_ai.response.model | keyword | The name of the model that generated the response. | `gpt-4-0613`
+gen_ai.response.timestamp | date | Timestamp when the response was received.
+gen_ai.security | nested | 
+gen_ai.security.halluncination_consistency | integer | Consistency check between multiple responses. 
+gen_ai.security.jailbreak_score | integer | Measures similarity to known jailbreak attempts.
+gen_ai.security.prompt_injection_score | integer | Measures similarity to known prompt injection attacks.
+gen_ai.security.refusal_score | integer | Measures similarity to known LLM refusal responses.
+gen_ai.security.regex_pattern_count | integer | Counts occurrences of strings matching user-defined regex patterns.
+gen_ai.sentiment | nested | 
+gen_ai.sentiment.content_categories | keyword | Categories of content identified as sensitive or requiring moderation.
+gen_ai.sentiment.content_inappropriate | keyword | Whether the content was flagged as inappropriate or sensitive.
+gen_ai.sentiment.score | integer | Sentiment analysis score.
+gen_ai.sentiment.toxicity_score | integer | Toxicity analysis score.
+gen_ai.system | nested | 
+gen_ai.system.product | keyword | The Generative AI product as identified by the client or server instrumentation. | `openai`
+gen_ai.system.message | nested | This event describes the instructions passed to the GenAI model.
+gen_ai.system.message.role | keyword | The actual role of the message author as passed in the message. | `system` or `instructions`
+gen_ai.system.message.content | keyword | The contents of the system message. | `You're a friendly bot that answers questions about GenAi.`
+gen_ai.text | nested
+gen_ai.text.complexity_score | integer | Evaluates the complexity of the text.
+gen_ai.text.readability_score | integer | Measures the readability level of the text.
+gen_ai.text.similarity_score | integer | Measures the similarity between the prompt and response.
+gen_ai.token.type | keyword | The type of token being counted. | `input`, `output`
+gen_ai.tool | nested |
+gen_ai.tool.message | nested |
+gen_ai.tool.message.role | keyword | The actual role of the message author as passed in the message. | `tool` or `function`
+gen_ai.tool.message.content | keyword | The contents of the tool message. | `elastic.co`
+gen_ai.tool.message.id | text | Tool call that this message is responding to. | `call_mszuSIzqtI65i1wAUOE8w5H4`
+gen_ai.usage | nested | This group defines attributes for GenAI usage. |
+gen_ai.usage.completion_tokens | integer | Number of tokens in the LLM's response.
+gen_ai.usage.prompt_tokens | integer | Number of tokens in the user's request.
+gen_ai.usage.input_tokens | integer | The number of tokens used in the GenAI input (prompt). | `100`
+gen_ai.usage.output_tokens | integer | The number of tokens used in the GenAI response (completion). | `180`
+gen_ai.user | nested | This event describes the prompt message specified by the user.
+gen_ai.user.content | keyword | The contents of the user message. | `What telemetry is reported by OpenAI instrumentations?`
+gen_ai.user.id | keyword | Unique identifier for the user.
+gen_ai.user.rn | keyword | Unique resource name for the user.
+gen_ai.user.role | keyword | The actual role of the message author as passed in the message. | `user` or `customer`
 
 Reuse fields: 
-* AWS - https://www.elastic.co/docs/current/integrations/aws
-* GCP - https://www.elastic.co/docs/current/integrations/gcp
-* Azure - https://www.elastic.co/docs/current/integrations/azure
 * Threat - https://www.elastic.co/guide/en/ecs/current/ecs-threat.html
+* Rule - https://www.elastic.co/guide/en/ecs/current/ecs-rule.html
 
 <!--
 Stage 2: Add or update all remaining field definitions. The list should now be exhaustive. The goal here is to validate the technical details of all remaining fields and to provide a basis for releasing these field definitions as beta in the schema. Use GitHub code blocks with yml syntax formatting, and add them to the corresponding RFC folder.
@@ -159,6 +253,10 @@ e.g.:
 
 <!-- Insert any links appropriate to this RFC in this section. -->
 
+* [OpenTelemetry GenAI attributes registry docs](https://github.com/open-telemetry/semantic-conventions/blob/main/docs/attributes-registry/gen-ai.md)
+* [OpenTelemetry GenAI docs](https://github.com/open-telemetry/semantic-conventions/tree/main/docs/gen-ai)
+* [OpenTelemetry GenAI registry YAML](https://github.com/open-telemetry/semantic-conventions/blob/main/model/gen-ai/registry.yaml)
+
 ### RFC Pull Requests
 
 <!-- An RFC should link to the PRs for each of it stage advancements. -->