From 6329ba355b074cb95ceba7cb33a8c8912d4d492d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Istv=C3=A1n=20Zolt=C3=A1n=20Szab=C3=B3?= Date: Tue, 4 Feb 2025 12:48:20 +0100 Subject: [PATCH] [E&A] Refine anomaly detection docs set part 1. (#304) --- explore-analyze/machine-learning.md | 25 ++-- .../machine-learning/anomaly-detection.md | 16 +-- .../anomaly-detection-scale.md | 46 +++----- .../anomaly-detection/anomaly-how-tos.md | 19 +--- .../anomaly-detection/ml-ad-algorithms.md | 3 +- .../anomaly-detection/ml-ad-concepts.md | 8 +- .../anomaly-detection/ml-ad-explain.md | 6 - .../ml-ad-finding-anomalies.md | 12 +- .../anomaly-detection/ml-ad-forecast.md | 1 - .../anomaly-detection/ml-ad-plan.md | 4 - .../anomaly-detection/ml-ad-run-jobs.md | 49 ++------ .../anomaly-detection/ml-ad-view-results.md | 10 +- .../ml-anomaly-detection-job-types.md | 25 ++-- .../anomaly-detection/ml-api-quickref.md | 1 - .../ml-configuring-aggregation.md | 15 --- .../ml-configuring-alerts.md | 10 +- .../ml-delayed-data-detection.md | 7 +- .../anomaly-detection/ml-getting-started.md | 107 +++++++----------- .../machine-learning-in-kibana.md | 4 +- .../xpack-ml-aiops.md | 12 +- .../xpack-ml-anomalies.md | 8 +- .../xpack-ml-dfanalytics.md | 7 +- .../setting-up-machine-learning.md | 26 +---- .../kibana/kibana/xpack-ml-anomalies.md | 41 ------- .../machine-learning/ml-ad-overview.md | 11 -- raw-migrated-files/toc.yml | 2 - 26 files changed, 115 insertions(+), 360 deletions(-) delete mode 100644 raw-migrated-files/kibana/kibana/xpack-ml-anomalies.md delete mode 100644 raw-migrated-files/stack-docs/machine-learning/ml-ad-overview.md diff --git a/explore-analyze/machine-learning.md b/explore-analyze/machine-learning.md index aae54209f..c02154bc0 100644 --- a/explore-analyze/machine-learning.md +++ b/explore-analyze/machine-learning.md @@ -8,27 +8,21 @@ mapped_urls: # What is Elastic Machine Learning? [machine-learning-intro] -{{ml-cap}} features analyze your data and generate models for its patterns of behavior. -The type of analysis that you choose depends on the questions or problems you want to address and the type of data you have available. +{{ml-cap}} features analyze your data and generate models for its patterns of behavior. The type of analysis that you choose depends on the questions or problems you want to address and the type of data you have available. ## Unsupervised {{ml}} [machine-learning-unsupervised] There are two types of analysis that can deduce the patterns and relationships within your data without training or intervention: *{{anomaly-detect}}* and *{{oldetection}}*. -[{{anomaly-detect-cap}}](machine-learning/anomaly-detection.md) requires time series data. -It constructs a probability model and can run continuously to identify unusual events as they occur. The model evolves over time; you can use its insights to forecast future behavior. +[{{anomaly-detect-cap}}](machine-learning/anomaly-detection.md) requires time series data. It constructs a probability model and can run continuously to identify unusual events as they occur. The model evolves over time; you can use its insights to forecast future behavior. -[{{oldetection-cap}}](machine-learning/data-frame-analytics/ml-dfa-finding-outliers.md) does not require time series data. -It is a type of {{dfanalytics}} that identifies unusual points in a data set by analyzing how close each data point is to others and the density of the cluster of points around it. -It does not run continuously; it generates a copy of your data set where each data point is annotated with an {{olscore}}. -The score indicates the extent to which a data point is an outlier compared to other data points. +[{{oldetection-cap}}](machine-learning/data-frame-analytics/ml-dfa-finding-outliers.md) does not require time series data. It is a type of {{dfanalytics}} that identifies unusual points in a data set by analyzing how close each data point is to others and the density of the cluster of points around it. It does not run continuously; it generates a copy of your data set where each data point is annotated with an {{olscore}}. The score indicates the extent to which a data point is an outlier compared to other data points. ## Supervised {{ml}} [machine-learning-supervised] There are two types of {{dfanalytics}} that require training data sets: *{{classification}}* and *{{regression}}*. -In both cases, the result is a copy of your data set where each data point is annotated with predictions and a trained model, which you can deploy to make predictions for new data. -For more information, refer to [Introduction to supervised learning](machine-learning/data-frame-analytics/ml-dfa-overview.md#ml-supervised-workflow). +In both cases, the result is a copy of your data set where each data point is annotated with predictions and a trained model, which you can deploy to make predictions for new data. For more information, refer to [Introduction to supervised learning](machine-learning/data-frame-analytics/ml-dfa-overview.md#ml-supervised-workflow). [{{classification-cap}}](machine-learning/data-frame-analytics/ml-dfa-classification.md) learns relationships between your data points in order to predict discrete categorical values, such as whether a DNS request originates from a malicious or benign domain. @@ -44,18 +38,13 @@ The {{ml-features}} that are available vary by project type: ## Synchronize saved objects [machine-learning-synchronize-saved-objects] -Before you can view your {{ml}} {dfeeds}, jobs, and trained models in {{kib}}, they must have saved objects. -For example, if you used APIs to create your jobs, wait for automatic synchronization or go to the **{{ml-app}}** page and click **Synchronize saved objects**. +Before you can view your {{ml}} {dfeeds}, jobs, and trained models in {{kib}}, they must have saved objects. For example, if you used APIs to create your jobs, wait for automatic synchronization or go to the **{{ml-app}}** page and click **Synchronize saved objects**. ## Export and import jobs [machine-learning-export-and-import-jobs] -You can export and import your {{ml}} job and {{dfeed}} configuration details on the **{{ml-app}}** page. -For example, you can export jobs from your test environment and import them in your production environment. +You can export and import your {{ml}} job and {{dfeed}} configuration details on the **{{ml-app}}** page. For example, you can export jobs from your test environment and import them in your production environment. -The exported file contains configuration details; it does not contain the {{ml}} models. -For {{anomaly-detect}}, you must import and run the job to build a model that is accurate for the new environment. -For {{dfanalytics}}, trained models are portable; you can import the job then transfer the model to the new cluster. -Refer to [Exporting and importing {{dfanalytics}} trained models](machine-learning/data-frame-analytics/ml-trained-models.md#export-import). +The exported file contains configuration details; it does not contain the {{ml}} models. For {{anomaly-detect}}, you must import and run the job to build a model that is accurate for the new environment. For {{dfanalytics}}, trained models are portable; you can import the job then transfer the model to the new cluster. Refer to [Exporting and importing {{dfanalytics}} trained models](machine-learning/data-frame-analytics/ml-trained-models.md#export-import). There are some additional actions that you must take before you can successfully import and run your jobs: diff --git a/explore-analyze/machine-learning/anomaly-detection.md b/explore-analyze/machine-learning/anomaly-detection.md index b67ef2405..ddbb65256 100644 --- a/explore-analyze/machine-learning/anomaly-detection.md +++ b/explore-analyze/machine-learning/anomaly-detection.md @@ -4,13 +4,13 @@ mapped_urls: - https://www.elastic.co/guide/en/kibana/current/xpack-ml-anomalies.html --- -# Anomaly detection +# Anomaly detection [ml-ad-overview] -% What needs to be done: Align serverless/stateful +You can use {{stack}} {{ml-features}} to analyze time series data and identify anomalous patterns in your data set. -% Scope notes: Colleen McGinnis removed "https://www.elastic.co/guide/en/serverless/current/observability-machine-learning.html" and "All children" because this page is also used below in "AIOps Labs" with "All children" selected. We can't copy all children to two places. - -% Use migrated content from existing pages that map to this page: - -% - [ ] ./raw-migrated-files/stack-docs/machine-learning/ml-ad-overview.md -% - [ ] ./raw-migrated-files/kibana/kibana/xpack-ml-anomalies.md \ No newline at end of file +* [Finding anomalies](../../../explore-analyze/machine-learning/anomaly-detection/ml-ad-finding-anomalies.md) +* [Tutorial: Getting started with {{anomaly-detect}}](../../../explore-analyze/machine-learning/anomaly-detection/ml-getting-started.md) +* [*Advanced concepts*](../../../explore-analyze/machine-learning/anomaly-detection/ml-ad-concepts.md) +* [*API quick reference*](../../../explore-analyze/machine-learning/anomaly-detection/ml-api-quickref.md) +* [How-tos](../../../explore-analyze/machine-learning/anomaly-detection/anomaly-how-tos.md) +* [*Resources*](../../../explore-analyze/machine-learning/anomaly-detection/ml-ad-resources.md) \ No newline at end of file diff --git a/explore-analyze/machine-learning/anomaly-detection/anomaly-detection-scale.md b/explore-analyze/machine-learning/anomaly-detection/anomaly-detection-scale.md index 04fd13836..df26ce710 100644 --- a/explore-analyze/machine-learning/anomaly-detection/anomaly-detection-scale.md +++ b/explore-analyze/machine-learning/anomaly-detection/anomaly-detection-scale.md @@ -17,8 +17,7 @@ Prerequisites: The following recommendations are not sequential – the numbers just help to navigate between the list items; you can take action on one or more of them in any order. You can implement some of these changes on existing jobs; others require you to clone an existing job or create a new one. - -## 1. Consider autoscaling, node sizing, and configuration [node-sizing] +## 1. Consider autoscaling, node sizing, and configuration [node-sizing] An {{anomaly-job}} runs on a single node and requires sufficient resources to hold its model in memory. When a job is opened, it will be placed on the node with the most available memory at that time. @@ -32,20 +31,17 @@ Increasing the number of nodes will allow distribution of job processing as well In {{ecloud}}, you can enable [autoscaling](../../../deploy-manage/autoscaling.md) so that the {{ml}} nodes in your cluster scale up or down based on current {{ml}} memory and CPU requirements. The {{ecloud}} infrastructure allows you to create {{ml-jobs}} up to the size that fits on the maximum node size that the cluster can scale to (usually somewhere between 58GB and 64GB) rather than what would fit in the current cluster. If you attempt to use autoscaling outside of {{ecloud}}, then set `xpack.ml.max_ml_node_size` to define the maximum possible size of a {{ml}} node. Creating {{ml-jobs}} with model memory limits larger than the maximum node size can support is not allowed, as autoscaling cannot add a node big enough to run the job. On a self-managed deployment, you can set `xpack.ml.max_model_memory_limit` according to the available resources of the {{ml}} node. This prevents you from creating jobs with model memory limits too high to open in your cluster. - -## 2. Use dedicated results indices [dedicated-results-index] +## 2. Use dedicated results indices [dedicated-results-index] For large jobs, use a dedicated results index. This ensures that results from a single large job do not dominate the shared results index. It also ensures that the job and results (if `results_retention_days` is set) can be deleted more efficiently and improves renormalization performance. By default, {{anomaly-job}} results are stored in a shared index. To change to use a dedicated result index, you need to clone or create a new job. - -## 3. Disable model plot [model-plot] +## 3. Disable model plot [model-plot] By default, model plot is enabled when you create jobs in {{kib}}. If you have a large job, however, consider disabling it. You can disable model plot for existing jobs by using the [Update {{anomaly-jobs}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-update-job.html). Model plot calculates and stores the model bounds for each analyzed entity, including both anomalous and non-anomalous entities. These bounds are used to display the shaded area in the Single Metric Viewer charts. Model plot creates one result document per bucket per split field value. If you have high cardinality fields and/or a short bucket span, disabling model plot reduces processing workload and results stored. - -## 4. Understand how detector configuration can impact model memory [detector-configuration] +## 4. Understand how detector configuration can impact model memory [detector-configuration] The following factors are most significant in increasing the memory required for a job: @@ -59,8 +55,7 @@ If you have high cardinality `by` or `partition` fields, ensure you have suffici To change partitioning fields, influencers and/or detectors, you need to clone or create a new job. - -## 5. Optimize the bucket span [optimize-bucket-span] +## 5. Optimize the bucket span [optimize-bucket-span] Short bucket spans and high cardinality detectors are resource intensive and require more system resources. @@ -68,27 +63,23 @@ Bucket span is typically between 15m and 1h. The recommended value always depend If your use case is suitable, consider increasing the bucket span to reduce processing workload. To change the bucket span, you need to clone or create a new job. - -## 6. Set the `scroll_size` of the {{dfeed}} [set-scroll-size] +## 6. Set the `scroll_size` of the {{dfeed}} [set-scroll-size] This consideration only applies to {{dfeeds}} that **do not** use aggregations. The `scroll_size` parameter of a {{dfeed}} specifies the number of hits to return from {{es}} searches. The higher the `scroll_size` the more results are returned by a single search. When your {{anomaly-job}} has a high throughput, increasing `scroll_size` may decrease the time the job needs to analyze incoming data, however may also increase the pressure on your cluster. You cannot increase `scroll_size` to more than the value of `index.max_result_window` which is 10,000 by default. If you update the settings of a {{dfeed}}, you must stop and start the {{dfeed}} for the change to be applied. - -## 7. Set the model memory limit [set-model-memory-limit] +## 7. Set the model memory limit [set-model-memory-limit] The `model_memory_limit` job configuration option sets the approximate maximum amount of memory resources required for analytical processing. When you create an {{anomaly-job}} in {{kib}}, it provides an estimate for this limit. The estimate is based on the analysis configuration details for the job and cardinality estimates, which are derived by running aggregations on the source indices as they exist at that specific point in time. If you change the resources available on your {{ml}} nodes or make significant changes to the characteristics or cardinality of your data, the model memory requirements might also change. You can update the model memory limit for a job while it is closed. If you want to decrease the limit below the current model memory usage, however, you must clone and re-run the job. -::::{tip} +::::{tip} You can view the current model size statistics with the [get {{anomaly-job}} stats](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-get-job-stats.html) and [get model snapshots](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-get-snapshot.html) APIs. You can also obtain a model memory limit estimate at any time by running the [estimate {{anomaly-jobs}} model memory API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-estimate-model-memory.html). However, you must provide your own cardinality estimates. :::: - As a job approaches its model memory limit, the memory status is `soft_limit` and older models are more aggressively pruned to free up space. If you have categorization jobs, no further examples are stored. When a job exceeds its limit, the memory status is `hard_limit` and the job no longer models new entities. It is therefore important to have appropriate memory model limits for each job. If you reach the hard limit and are concerned about the missing data, ensure that you have adequate resources then clone and re-run the job with a larger model memory limit. - -## 8. Pre-aggregate your data [pre-aggregate-data] +## 8. Pre-aggregate your data [pre-aggregate-data] You can speed up the analysis by summarizing your data with aggregations. @@ -100,22 +91,19 @@ In certain cases, you cannot do aggregations to increase performance. For exampl Please consult [Aggregating data for faster performance](ml-configuring-aggregation.md) to learn more. - -## 9. Optimize the results retention [results-retention] +## 9. Optimize the results retention [results-retention] Set a results retention window to reduce the amount of results stored. {{anomaly-detect-cap}} results are retained indefinitely by default. Results build up over time, and your result index may be quite large. A large results index is slow to query and takes up significant space on your cluster. Consider how long you wish to retain the results and set `results_retention_days` accordingly – for example, to 30 or 60 days – to avoid unnecessarily large result indices. Deleting old results does not affect the model behavior. You can change this setting for existing jobs. - -## 10. Optimize the renormalization window [renormalization-window] +## 10. Optimize the renormalization window [renormalization-window] Reduce the renormalization window to reduce processing workload. When a new anomaly has a much higher score than any anomaly in the past, the anomaly scores are adjusted on a range from 0 to 100 based on the new data. This is called renormalization. It can mean rewriting a large number of documents in the results index. Renormalization happens for results from the last 30 days or 100 bucket spans (depending on which is the longer) by default. When you are working at scale, set `renormalization_window_days` to a lower value, so the workload is reduced. You can change this setting for existing jobs and changes will take effect after the job has been reopened. - -## 11. Optimize the model snapshot retention [model-snapshot-retention] +## 11. Optimize the model snapshot retention [model-snapshot-retention] Model snapshots are taken periodically, to ensure resilience in the event of a system failure and to allow you to manually revert to a specific point in time. These are stored in a compressed format in an internal index and kept according to the configured retention policy. Load is placed on the cluster when indexing a model snapshot and index size is increased as multiple snapshots are retained. @@ -125,20 +113,17 @@ Also consider how long you wish to retain snapshots using `model_snapshot_retent For more information, refer to [Model snapshots](https://www.elastic.co/guide/en/machine-learning/current/ml-model-snapshots.html). - -## 12. Optimize your search queries [search-queries] +## 12. Optimize your search queries [search-queries] If you are operating on a big scale, make sure that your {{dfeed}} query is as efficient as possible. There are different ways to write {{es}} queries and some of them are more efficient than others. Please consult [Tune for search speed](../../../deploy-manage/production-guidance/optimize-performance/search-speed.md) to learn more about {{es}} performance tuning. You need to clone or recreate an existing job if you want to optimize its search query. - -## 13. Consider using population analysis [population-analysis] +## 13. Consider using population analysis [population-analysis] Population analysis is more memory efficient than individual analysis of each series. It builds a profile of what a "typical" entity does over a specified time period and then identifies when one is behaving abnormally compared to the population. Use population analysis for analyzing high cardinality fields if you expect that the entities of the population generally behave in the same way. - -## 14. Reduce the cost of forecasting [forecasting] +## 14. Reduce the cost of forecasting [forecasting] There are two main performance factors to consider when you create a forecast: indexing load and memory usage. Check the cluster monitoring data to learn the indexing rate and the memory usage. @@ -147,4 +132,3 @@ Forecasting writes a new document to the result index for every forecasted eleme To reduce indexing load, consider a shorter forecast duration and/or try to avoid concurrent forecast requests. Further performance gains can be achieved by reviewing the job configuration; for example by using a dedicated results index, increasing the bucket span and/or by having lower cardinality partitioning fields. The memory usage of a forecast is restricted to 20 MB by default. From 7.9, you can extend this limit by setting `max_model_memory` to a higher value. The maximum value is 40% of the memory limit of the {{anomaly-job}} or 500 MB. If the forecast needs more memory than the provided value, it spools to disk. Forecasts that spool to disk generally run slower. If you need to speed up forecasts, increase the available memory for the forecast. Forecasts that would take more than 500 MB to run won’t start because this is the maximum limit of disk space that a forecast is allowed to use. - diff --git a/explore-analyze/machine-learning/anomaly-detection/anomaly-how-tos.md b/explore-analyze/machine-learning/anomaly-detection/anomaly-how-tos.md index 53e019934..ff3bd723c 100644 --- a/explore-analyze/machine-learning/anomaly-detection/anomaly-how-tos.md +++ b/explore-analyze/machine-learning/anomaly-detection/anomaly-how-tos.md @@ -4,11 +4,8 @@ mapped_pages: - https://www.elastic.co/guide/en/machine-learning/current/anomaly-how-tos.html --- - - # How-tos [anomaly-how-tos] - Though it is quite simple to analyze your data and provide quick {{ml}} results, gaining deep insights might require some additional planning and configuration. The guides in this section describe some best practices for generating useful {{ml}} results and insights from your data. * [Generating alerts for {{anomaly-jobs}}](ml-configuring-alerts.md) @@ -24,8 +21,7 @@ Though it is quite simple to analyze your data and provide quick {{ml}} results, * [{{anomaly-jobs-cap}} from visualizations](ml-jobs-from-lens.md) * [Exporting and importing {{ml}} jobs](move-jobs.md) - -## {{anomaly-detect-cap}} examples in blog posts [anomaly-examples-blog-posts] +## {{anomaly-detect-cap}} examples in blog posts [anomaly-examples-blog-posts] The blog posts listed below show how to get the most out of Elastic {{ml}} {anomaly-detect}. @@ -41,16 +37,3 @@ The blog posts listed below show how to get the most out of Elastic {{ml}} {anom * [Interpretability in {{ml}}: Identifying anomalies, influencers, and root causes](https://www.elastic.co/blog/interpretability-in-ml-identifying-anomalies-influencers-root-causes) There are also some examples in the {{ml}} folder in the [examples repository](https://github.com/elastic/examples). - - - - - - - - - - - - - diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-ad-algorithms.md b/explore-analyze/machine-learning/anomaly-detection/ml-ad-algorithms.md index c72b0c734..e5ff20fbf 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-ad-algorithms.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-ad-algorithms.md @@ -5,9 +5,8 @@ mapped_pages: # Anomaly detection algorithms [ml-ad-algorithms] -The {{anomaly-detect}} {ml-features} use a bespoke amalgamation of different techniques such as clustering, various types of time series decomposition, Bayesian distribution modeling, and correlation analysis. These analytics provide sophisticated real-time automated {{anomaly-detect}} for time series data. +The {{anomaly-detect}} {{ml-features}} use a bespoke amalgamation of different techniques such as clustering, various types of time series decomposition, Bayesian distribution modeling, and correlation analysis. These analytics provide sophisticated real-time automated {{anomaly-detect}} for time series data. The {{ml}} analytics statistically model the time-based characteristics of your data by observing historical behavior and adapting to new data. The model represents a baseline of normal behavior and can therefore be used to determine how anomalous new events are. {{anomaly-detect-cap}} results are written for each [bucket span](ml-ad-run-jobs.md#ml-ad-bucket-span). These results include scores that are aggregated in order to reduce noise and normalized in order to rank the most mathematically significant anomalies. - diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-ad-concepts.md b/explore-analyze/machine-learning/anomaly-detection/ml-ad-concepts.md index e22abe135..043b89a73 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-ad-concepts.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-ad-concepts.md @@ -5,16 +5,10 @@ mapped_pages: # Advanced concepts [ml-ad-concepts] -This section explains the more complex concepts of the Elastic {{ml}} {anomaly-detect} feature. +This section explains the more complex concepts of the Elastic {{ml}} {{anomaly-detect}} feature. * [{{anomaly-detect-cap}} algorithms](ml-ad-algorithms.md) * [Anomaly explanation](ml-ad-explain.md) * [Job types](ml-anomaly-detection-job-types.md) * [Working with {{anomaly-detect}} at scale](anomaly-detection-scale.md) * [Delayed data](ml-delayed-data-detection.md) - - - - - - diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-ad-explain.md b/explore-analyze/machine-learning/anomaly-detection/ml-ad-explain.md index 6b4ed75da..58b42c533 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-ad-explain.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-ad-explain.md @@ -7,7 +7,6 @@ mapped_pages: Every anomaly has an anomaly score assigned to it. That score indicates how anomalous the data point is, which makes it possible to define its severity compared to other anomalies. This page gives you a high-level explanation of the critical factors considered for calculating anomaly scores, how the scores are calculated, and how renormalization works. - ## Anomaly score impact factors [score-impact-factors] {{anomaly-jobs-cap}} split the time series data into time buckets. The data within a bucket is aggregated using functions. Anomaly detection is happening on the bucket values. Three factors can affect the initial anomaly score of a record: @@ -16,26 +15,22 @@ Every anomaly has an anomaly score assigned to it. That score indicates how anom * multi-bucket impact, * anomaly characteristics impact. - ### Single bucket impact [single-bucket-impact] The probability of the actual value in the bucket is calculated first. This probability depends on how many similar values were seen in the past. It often relates to the difference between actual and typical values. The typical value is the median value of the probability distribution for the bucket. This probability leads to the single bucket impact. It usually dominates the initial anomaly score of a short spike or dip. - ### Multi-bucket impact [multi-bucket-impact] The probabilities of the values in the current bucket and the preceding 11 buckets contribute to the multi-bucket impact. The accumulated differences between the actual and typical values result in the multi-bucket impact on the initial anomaly score of the current bucket. High multi-bucket impact indicates unusual behavior in the interval preceding the current bucket, even if the value of this bucket may be within the 95% confidence interval. Different signs mark the anomalies with high multi-bucket impact to highlight the distinction. A cross sign "+" represents these anomalies in {{kib}}, instead of a circle. - ### Anomaly characteristics impact [anomaly-characteristics-impact] The impact of the anomaly characteristics considers the different features of the anomaly, such as its length and size. The total duration of the anomaly is considered, and not a fixed interval as in the case of the multi-bucket impact calculation. The length might be only one bucket or thirty (or more) buckets. Comparing the length and size of the anomaly to the historical averages makes it possible to adapt to your domain and the patterns in data. The default behavior of the algorithm is to score longer anomalies higher than short-lived spikes. In practice, short anomalies often turn out to be errors in data, while long anomalies are something you might need to react to. Combining multi-bucket impact and anomaly characteristics impact leads to more reliable detection of abnormal behavior over various domains. - ## Record score reduction (renormalization) [record-score-reduction] Anomaly scores are in the range of 0 and 100. The values close to 100 signify the biggest anomalies the job has seen to date. For this reason, when an anomaly bigger than any other before is detected, the scores of previous anomalies need to be reduced. @@ -47,7 +42,6 @@ The process when the anomaly detection algorithm adjusts the anomaly scores of p :class: screenshot ::: - ## Other factors for score reduction [other-factors] Two more factors may lead to a reduction of the initial score: a high variance interval and an incomplete bucket. diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-ad-finding-anomalies.md b/explore-analyze/machine-learning/anomaly-detection/ml-ad-finding-anomalies.md index 3fe9022f2..bae164d94 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-ad-finding-anomalies.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-ad-finding-anomalies.md @@ -4,21 +4,13 @@ mapped_pages: - https://www.elastic.co/guide/en/machine-learning/current/ml-ad-finding-anomalies.html --- - - # Finding anomalies [ml-ad-finding-anomalies] - -The {{ml}} {anomaly-detect} features automate the analysis of time series data by creating accurate baselines of normal behavior in your data. These baselines then enable you to identify anomalous events or patterns. Data is pulled from {{es}} for analysis and anomaly results are displayed in {{kib}} dashboards. For example, the **{{ml-app}}** app provides charts that illustrate the actual data values, the bounds for the expected values, and the anomalies that occur outside these bounds. +The {{ml}} {{anomaly-detect}} features automate the analysis of time series data by creating accurate baselines of normal behavior in your data. These baselines then enable you to identify anomalous events or patterns. Data is pulled from {{es}} for analysis and anomaly results are displayed in {{kib}} dashboards. For example, the **{{ml-app}}** app provides charts that illustrate the actual data values, the bounds for the expected values, and the anomalies that occur outside these bounds. The typical workflow for performing {{anomaly-detect}} is as follows: * [Plan your analysis](ml-ad-plan.md) * [Run a job](ml-ad-run-jobs.md) * [View the results](ml-ad-view-results.md) -* [Forecast future behavior](ml-ad-forecast.md) - - - - - +* [Forecast future behavior](ml-ad-forecast.md) \ No newline at end of file diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-ad-forecast.md b/explore-analyze/machine-learning/anomaly-detection/ml-ad-forecast.md index 5cbded3dc..958a3b779 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-ad-forecast.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-ad-forecast.md @@ -23,4 +23,3 @@ The yellow line in the chart represents the predicted data values. The shaded ye When you create a forecast, you specify its *duration*, which indicates how far the forecast extends beyond the last record that was processed. By default, the duration is 1 day. Typically the farther into the future that you forecast, the lower the confidence levels become (that is to say, the bounds increase). Eventually if the confidence levels are too low, the forecast stops. For more information about limitations that affect your ability to create a forecast, see [Unsupported forecast configurations](ml-limitations.md#ml-forecast-config-limitations). You can also optionally specify when the forecast expires. By default, it expires in 14 days and is deleted automatically thereafter. You can specify a different expiration period by using the `expires_in` parameter in the [forecast {{anomaly-jobs}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-forecast.html). - diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-ad-plan.md b/explore-analyze/machine-learning/anomaly-detection/ml-ad-plan.md index 8d275553a..8c8d503dc 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-ad-plan.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-ad-plan.md @@ -4,11 +4,8 @@ mapped_pages: - https://www.elastic.co/guide/en/machine-learning/current/ml-ad-plan.html --- - - # Plan your analysis [ml-ad-plan] - The {{ml-features}} in {{stack}} enable you to seek anomalies in your data in many different ways. Using [proprietary {{ml}} algorithms](ml-ad-algorithms.md), the following circumstances are detected: * Anomalies related to temporal deviations in values, counts, or frequencies @@ -23,4 +20,3 @@ When you are deciding which type of {{anomaly-detect}} to use, the most importan If you are uncertain where to begin, {{kib}} can recognize certain types of data and suggest useful {{anomaly-jobs}}. Likewise, some {{agent}} integrations include {{anomaly-job}} configuration information, dashboards, searches, and visualizations that are customized to help you analyze your data. For the full list of functions that you can use in your {{anomaly-jobs}}, see [*Function reference*](ml-functions.md). For a list of the preconfigured jobs, see [Supplied configurations](ootb-ml-jobs.md). - diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-ad-run-jobs.md b/explore-analyze/machine-learning/anomaly-detection/ml-ad-run-jobs.md index 9f05f2f6c..4c86917cc 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-ad-run-jobs.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-ad-run-jobs.md @@ -4,14 +4,10 @@ mapped_pages: - https://www.elastic.co/guide/en/machine-learning/current/ml-ad-run-jobs.html --- - - # Run a job [ml-ad-run-jobs] - {{anomaly-jobs-cap}} contain the configuration information and metadata necessary to perform the {{ml}} analysis. They can run for a specific time period or continuously against incoming data. - ## Set up the environment [ml-ad-setup] Before you can use the {{stack-ml-features}}, there are some configuration requirements (such as security privileges) that must be addressed. Refer to [Setup and security](../setting-up-machine-learning.md). @@ -21,8 +17,6 @@ If your data is located outside of {{es}}, you cannot use {{kib}} to create your :::: - - ## Create an {{anomaly-job}} [ml-ad-create-job] You can create {{anomaly-jobs}} by using the [create {{anomaly-jobs}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-put-job.html). {{kib}} also provides wizards to simplify the process, which vary depending on whether you are using the {{ml-app}} app, {{security-app}} or {{observability}} apps. To open **Anomaly Detection**, find **{{ml-app}}** in the main menu, or use the [global search field](../../overview/kibana-quickstart.md#_finding_your_apps_and_objects). @@ -38,12 +32,10 @@ You can create {{anomaly-jobs}} by using the [create {{anomaly-jobs}} API](https * The categorization wizard creates jobs that group log messages into categories and use `count` or `rare` functions to detect anomalies within them. * The advanced wizard creates jobs that can have multiple detectors and enables you to configure all job settings. -{{kib}} can also recognize certain types of data and provide specialized wizards for that context. For example, there are {{anomaly-jobs}} for the sample eCommerce orders and sample web logs data sets, as well as for data generated by the {{elastic-sec}} and {{observability}} solutions, {{beats}}, and {{agent}} {integrations}. For a list of all the preconfigured jobs, see [Supplied configurations](ootb-ml-jobs.md). +{{kib}} can also recognize certain types of data and provide specialized wizards for that context. For example, there are {{anomaly-jobs}} for the sample eCommerce orders and sample web logs data sets, as well as for data generated by the {{elastic-sec}} and {{observability}} solutions, {{beats}}, and {{agent}} {{integrations}}. For a list of all the preconfigured jobs, see [Supplied configurations](ootb-ml-jobs.md). -$$$ml-ad-job-tips$$$ When you create an {{anomaly-job}} in {{kib}}, the job creation wizards can provide advice based on the characteristics of your data. By heeding these suggestions, you can create jobs that are more likely to produce insightful {{ml}} results. The most important concepts are covered here; for a description of all the job properties, see the [create {{anomaly-jobs}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-put-job.html). - ### Bucket span [ml-ad-bucket-span] The {{ml-features}} use the concept of a *bucket* to divide the time series into batches for processing. @@ -52,7 +44,6 @@ The *bucket span* is part of the configuration information for an {{anomaly-job} The bucket span must contain a valid [time interval](https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#time-units). When you create an {{anomaly-job}} in {{kib}}, you can choose to estimate a bucket span value based on your data characteristics. If you choose a value that is larger than one day or is significantly different than the estimated value, you receive an informational message. - ### Detectors [ml-ad-detectors] Each {{anomaly-job}} must have one or more *detectors*. A detector defines the type of analysis that will occur and which fields to analyze. @@ -61,14 +52,14 @@ Detectors can also contain properties that affect which types of entities or eve If your job does not contain a detector or the detector does not contain a [valid function](ml-functions.md), you receive an error. If a job contains duplicate detectors, you also receive an error. Detectors are duplicates if they have the same `function`, `field_name`, `by_field_name`, `over_field_name` and `partition_field_name`. - ### Influencers [ml-ad-influencers] When anomalous events occur, we want to know why. To determine the cause, however, you often need a broader knowledge of the domain. If you have suspicions about which entities in your data set are likely causing irregularities, you can identify them as influencers in your {{anomaly-jobs}}. That is to say, *influencers* are fields that you suspect contain information about someone or something that influences or contributes to anomalies in your data. Influencers can be any field in your data. You can pick influencers when you create your {{anomaly-job}} by using the **Advanced job wizard**. -::::{dropdown} **Requirements when using the {{ml}} APIs to pick influencers** +::::{dropdown} Requirements when using the machine learning APIs to pick influencers + * The influencer field must exist in your {{dfeed}} query or aggregation; otherwise it is not included in the job analysis. * If you use a query in your {{dfeed}}: influencer fields must exist in the query results in the same hit as the detector fields. {{dfeeds-cap}} process data by paging through the query results; since search hits cannot span multiple indices or documents, {{dfeeds}} have the same limitation. * If you use aggregations in your {{dfeed}}, it is possible to use influencers that come from different indices than the detector fields. However, both indices must have a date field with the same name, which you specify in the `data_description`.`time_field` property for the {{dfeed}}. @@ -76,7 +67,6 @@ You can pick influencers when you create your {{anomaly-job}} by using the **Adv :::: - Picking an influencer is strongly recommended for the following reasons: * It allows you to more easily assign blame for anomalies. @@ -90,17 +80,14 @@ The **Anomaly Explorer** in {{kib}} lists the top influencers for a job and show Do not pick too many influencers. For example, you generally do not need more than three. If you pick many influencers, the results can be overwhelming and there is a small overhead to the analysis. :::: - Refer to [this blog post](https://www.elastic.co/blog/interpretability-in-ml-identifying-anomalies-influencers-root-causes) for further details on influencers. - ### Cardinality [ml-ad-cardinality] If there are logical groupings of related entities in your data, {{ml}} analytics can make data models and generate results that take these groupings into consideration. For example, you might choose to split your data by user ID and detect when users are accessing resources differently than they usually do. If the field that you use to split your data has many different values, the job uses more memory resources. In {{kib}}, if the cardinality of the `by_field_name`, `over_field_name`, or `partition_field_name` is greater than 1000, the job creation wizards advise that there might be high memory usage. Likewise if you are performing population analysis and the cardinality of the `over_field_name` is below 10, you are advised that this might not be a suitable field to use. - ### Model memory limits [ml-ad-model-memory-limits] For each {{anomaly-job}}, you can optionally specify a `model_memory_limit`, which is the approximate maximum amount of memory resources that are required for analytical processing. The default value is 1 GB. Once this limit is approached, data pruning becomes more aggressive. Upon exceeding this limit, new entities are not modeled. @@ -111,24 +98,20 @@ You can also optionally specify the `xpack.ml.max_model_memory_limit` setting. B If you set the `model_memory_limit` too high, it will be impossible to open the job; jobs cannot be allocated to nodes that have insufficient memory to run them. :::: - If the estimated model memory limit for an {{anomaly-job}} is greater than the model memory limit for the job or the maximum model memory limit for the cluster, the job creation wizards in {{kib}} generate a warning. If the estimated memory requirement is only a little higher than the `model_memory_limit`, the job will probably produce useful results. Otherwise, the actions you take to address these warnings vary depending on the resources available in your cluster: * If you are using the default value for the `model_memory_limit` and the {{ml}} nodes in the cluster have lots of memory, the best course of action might be to simply increase the job’s `model_memory_limit`. Before doing this, however, double-check that the chosen analysis makes sense. The default `model_memory_limit` is relatively low to avoid accidentally creating a job that uses a huge amount of memory. * If the {{ml}} nodes in the cluster do not have sufficient memory to accommodate a job of the estimated size, the only options are: - * Add bigger {{ml}} nodes to the cluster, or - * Accept that the job will hit its memory limit and will not necessarily find all the anomalies it could otherwise find. - + * Add bigger {{ml}} nodes to the cluster, or + * Accept that the job will hit its memory limit and will not necessarily find all the anomalies it could otherwise find. If you are using {{ece}} or the hosted Elasticsearch Service on Elastic Cloud, `xpack.ml.max_model_memory_limit` is set to prevent you from creating jobs that cannot be allocated to any {{ml}} nodes in the cluster. If you find that you cannot increase `model_memory_limit` for your {{ml}} jobs, the solution is to increase the size of the {{ml}} nodes in your cluster. - ### Dedicated indices [ml-ad-dedicated-indices] For each {{anomaly-job}}, you can optionally specify a dedicated index to store the {{anomaly-detect}} results. As {{anomaly-jobs}} may produce a large amount of results (for example, jobs with many time series, small bucket span, or with long running period), it is recommended to use a dedicated results index by choosing the **Use dedicated index** option in {{kib}} or specifying the `results_index_name` via the [Create {{anomaly-jobs}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-put-job.html). - ### {{dfeeds-cap}} [ml-ad-datafeeds] If you create {{anomaly-jobs}} in {{kib}}, you *must* use {{dfeeds}} to retrieve data from {{es}} for analysis. When you create an {{anomaly-job}}, you select a {{data-source}} and {{kib}} configures the {{dfeed}} for you under the covers. @@ -138,16 +121,14 @@ You can associate only one {{dfeed}} with each {{anomaly-job}}. The {{dfeed}} co {{dfeeds-cap}} can also aggregate data before sending it to the {{anomaly-job}}. There are some limitations, however, and aggregations should generally be used only for low cardinality data. See [Aggregating data for faster performance](ml-configuring-aggregation.md). ::::{important} -When the {{es}} {security-features} are enabled, a {{dfeed}} stores the roles of the user who created or updated the {{dfeed}} at that time. This means that if those roles are updated, the {{dfeed}} subsequently runs with the new permissions that are associated with the roles. However, if the user’s roles are adjusted after creating or updating the {{dfeed}}, the {{dfeed}} continues to run with the permissions that were associated with the original roles. +When the {{es}} {{security-features}} are enabled, a {{dfeed}} stores the roles of the user who created or updated the {{dfeed}} at that time. This means that if those roles are updated, the {{dfeed}} subsequently runs with the new permissions that are associated with the roles. However, if the user’s roles are adjusted after creating or updating the {{dfeed}}, the {{dfeed}} continues to run with the permissions that were associated with the original roles. One way to update the roles that are stored within the {{dfeed}} without changing any other settings is to submit an empty JSON document ({}) to the [update {{dfeed}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-update-datafeed.html). :::: - If the data that you want to analyze is not stored in {{es}}, you cannot use {{dfeeds}}. You can however send batches of data directly to the job by using the [post data to jobs API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-post-data.html). [7.11.0] - ## Open the job [ml-ad-open-job] An {{anomaly-job}} must be opened in order for it to be ready to receive and analyze data. It can be opened and closed multiple times throughout its lifecycle. @@ -156,17 +137,15 @@ After you start the job, you can start the {{dfeed}}, which retrieves data from You can perform both these tasks in {{kib}} or use the [open {{anomaly-jobs}}](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-open-job.html) and [start {{dfeeds}}](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-start-datafeed.html) APIs. - ## Tune the job [ml-ad-tune] Typically after you open a job, the next step is to [view the results](ml-ad-view-results.md). You might find that you need to alter the job configuration or settings. - ### Calendars and scheduled events [ml-ad-calendars] Sometimes there are periods when you expect unusual activity to take place, such as bank holidays, "Black Friday", or planned system outages. If you identify these events in advance, no anomalies are generated during that period. The {{ml}} model is not ill-affected and you do not receive spurious results. -You can create calendars and scheduled events in the **Settings** pane on the **Machine Learning** page in {{kib}} or by using [{{ml-cap}} {anomaly-detect} APIs](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-ad-apis.html). +You can create calendars and scheduled events in the **Settings** pane on the **Machine Learning** page in {{kib}} or by using [{{ml-cap}} {{anomaly-detect}} APIs](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-ad-apis.html). A scheduled event must have a start time, end time, and description. In general, scheduled events are short in duration (typically lasting from a few hours to a day) and occur infrequently. If you have regularly occurring events, such as weekly maintenance periods, you do not need to create scheduled events for these circumstances; they are already handled by the {{ml}} analytics. @@ -175,6 +154,7 @@ You can identify zero or more scheduled events in a calendar. {{anomaly-jobs-cap If you want to add multiple scheduled events at once, you can import an iCalendar (`.ics`) file in {{kib}} or a JSON file in the [add events to calendar API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-post-calendar-event.html). ::::{note} + * You must identify scheduled events before your {{anomaly-job}} analyzes the data for that time period. Machine learning results are not updated retroactively. * If your iCalendar file contains recurring events, only the first occurrence is imported. * [Bucket results](https://www.elastic.co/guide/en/machine-learning/current/ml-bucket-results.html) are generated during scheduled events but they have an anomaly score of zero. @@ -182,8 +162,6 @@ If you want to add multiple scheduled events at once, you can import an iCalenda :::: - - ### Custom rules [ml-ad-rules] By default, {{anomaly-detect}} is unsupervised and the {{ml}} models have no awareness of the domain of your data. As a result, {{anomaly-jobs}} might identify events that are statistically significant but are uninteresting when you know the larger context. {{ml-cap}} custom rules enable you to customize {{anomaly-detect}}. @@ -198,10 +176,9 @@ If you are analyzing web traffic, you might create a filter that contains a list For more information, see [Customizing detectors with custom rules](ml-configuring-detector-custom-rules.md). - ### Model snapshots [ml-ad-model-snapshots] -{{stack}} {ml-features} calculate baselines of normal behavior then extrapolate anomalous events. These baselines are accomplished by generating models of your data. +{{stack}} {{ml-features}} calculate baselines of normal behavior then extrapolate anomalous events. These baselines are accomplished by generating models of your data. To ensure resilience in the event of a system failure, snapshots of the {{ml}} model for each {{anomaly-job}} are saved to an internal index within the {{es}} cluster. The amount of time necessary to save these snapshots is proportional to the size of the model in memory. By default, snapshots are captured approximately every 3 to 4 hours. You can change this interval (`background_persist_interval`) when you create or update a job. @@ -218,8 +195,6 @@ You can see the list of model snapshots for each job with the [get model snapsho There are situations other than system failures where you might want to [revert](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-revert-snapshot.html) to using a specific model snapshot. The {{ml-features}} react quickly to anomalous input and new behaviors in data. Highly anomalous input increases the variance in the models and {{ml}} analytics must determine whether it is a new step-change in behavior or a one-off event. In the case where you know this anomalous input is a one-off, it might be appropriate to reset the model state to a time before this event. For example, after a Black Friday sales day you might consider reverting to a saved snapshot. If you know about such events in advance, however, you can use [calendars and scheduled events](https://www.elastic.co/guide/en/machine-learning/current/ml-calendars.html) to avoid impacting your model. :::: - - ## Close the job [ml-ad-close-job] When historical data is analyzed, there is no need to stop the {{dfeed}} and/or close the job as they are stopped and closed automatically when the end time is reached. @@ -235,7 +210,6 @@ If you need to stop your {{anomaly-job}}, an orderly shutdown ensures that: This process ensures that jobs are in a consistent state in case you want to subsequently re-open them. - ### Stopping {{dfeeds}} [stopping-ml-datafeeds] When you stop a {{dfeed}}, it ceases to retrieve data from {{es}}. You can stop a {{dfeed}} by using {{kib}} or the [stop {{dfeeds}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-stop-datafeed.html). For example, the following request stops the `feed1` {{dfeed}}: @@ -248,10 +222,8 @@ POST _ml/datafeeds/feed1/_stop You must have `manage_ml`, or `manage` cluster privileges to stop {{dfeeds}}. For more information, see [Security privileges](../../../deploy-manage/users-roles/cluster-or-deployment-auth/elasticsearch-privileges.md). :::: - A {{dfeed}} can be started and stopped multiple times throughout its lifecycle. - ### Stopping all {{dfeeds}} [stopping-all-ml-datafeeds] If you are upgrading your cluster, you can use the following request to stop all {{dfeeds}}: @@ -260,7 +232,6 @@ If you are upgrading your cluster, you can use the following request to stop all POST _ml/datafeeds/_all/_stop ``` - ### Closing {{anomaly-jobs}} [closing-ml-jobs] When you close an {{anomaly-job}}, it cannot receive data or perform analysis operations. You can close a job by using the [close {{anomaly-job}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-close-job.html). For example, the following request closes the `job1` job: @@ -273,12 +244,10 @@ POST _ml/anomaly_detectors/job1/_close You must have `manage_ml`, or `manage` cluster privileges to stop {{anomaly-jobs}}. For more information, see [Security privileges](../../../deploy-manage/users-roles/cluster-or-deployment-auth/elasticsearch-privileges.md). :::: - If you submit a request to close an {{anomaly-job}} and its {{dfeed}} is running, the request first tries to stop the {{dfeed}}. This behavior is equivalent to calling the [stop {{dfeeds}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-stop-datafeed.html) with the same `timeout` and `force` parameters as the close job request. {{anomaly-jobs-cap}} can be opened and closed multiple times throughout their lifecycle. - ### Closing all {{anomaly-jobs}} [closing-all-ml-jobs] If you are upgrading your cluster, you can use the following request to close all open {{anomaly-jobs}} on the cluster: diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-ad-view-results.md b/explore-analyze/machine-learning/anomaly-detection/ml-ad-view-results.md index 51db06e49..2dd2dda5c 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-ad-view-results.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-ad-view-results.md @@ -4,21 +4,16 @@ mapped_pages: - https://www.elastic.co/guide/en/machine-learning/current/ml-ad-view-results.html --- - - # View the results [ml-ad-view-results] - After the {{anomaly-job}} has processed some data, you can view the results in {{kib}}. ::::{tip} Depending on the capacity of your machine, you might need to wait a few seconds for the {{ml}} analysis to generate initial results. :::: - There are two tools for examining the results from {{anomaly-jobs}} in {{kib}}: the **Anomaly Explorer** and the **Single Metric Viewer**. - ## Bucket results [ml-ad-bucket-results] When you view your {{ml}} results, each bucket has an anomaly score. This score is a statistically aggregated and normalized view of the combined anomalousness of all the record results in the bucket. @@ -32,9 +27,9 @@ The {{ml}} analytics enhance the anomaly score for each bucket by considering co In this example, you can see that some of the anomalies fall within the shaded blue area, which represents the bounds for the expected values. The bounds are calculated per bucket, but multi-bucket analysis is not limited by that scope. -Both the **Anomaly Explorer** and the **Single Metric Viewer*** contain an ***Anomalies** table that shows key details about each anomaly such as time, typical and actual values, and probability. The **Anomaly explanation** section helps you to interpret a given anomaly by providing further insights about its type, impact, and score. +Both the **Anomaly Explorer** and the **Single Metric Viewer** contain an **Anomalies** table that shows key details about each anomaly such as time, typical and actual values, and probability. The **Anomaly explanation** section helps you to interpret a given anomaly by providing further insights about its type, impact, and score. -If you have [{{anomaly-detect-cap}} alert rules](https://www.elastic.co/guide/en/machine-learning/current/creating-anomaly-alert-rules.html) applied to an {{anomaly-job}} and an alert has occured for the rule, you can view how the alert correlates with the {{anomaly-detect}} results in the **Anomaly Explorer** by using the **Anomaly timeline*** swimlane and the ***Alerts** panel. The **Alerts** panel contains a line chart with the alerts count over time. The cursor on the line chart is in sync with the anomaly swimlane making it easier to review anomalous buckets with the spike produced by the alerts. The panel also contains aggregated information for each alert rule associated with the job selection such as the total number of active, recovered, and untracked alerts for the selected job and time range. An alert context menu is displayed when an anomaly swimlane cell is selected with alerts in the chosen time range. The context menu contains the alert counters for the selected time buckets. +If you have [{{anomaly-detect-cap}} alert rules](https://www.elastic.co/guide/en/machine-learning/current/creating-anomaly-alert-rules.html) applied to an {{anomaly-job}} and an alert has occured for the rule, you can view how the alert correlates with the {{anomaly-detect}} results in the **Anomaly Explorer** by using the **Anomaly timeline** swimlane and the **Alerts** panel. The **Alerts** panel contains a line chart with the alerts count over time. The cursor on the line chart is in sync with the anomaly swimlane making it easier to review anomalous buckets with the spike produced by the alerts. The panel also contains aggregated information for each alert rule associated with the job selection such as the total number of active, recovered, and untracked alerts for the selected job and time range. An alert context menu is displayed when an anomaly swimlane cell is selected with alerts in the chosen time range. The context menu contains the alert counters for the selected time buckets. :::{image} ../../../images/machine-learning-anomaly-explorer-alerts.png :alt: Alerts table in the Anomaly Explorer @@ -45,7 +40,6 @@ If you have more than one {{anomaly-job}}, you can also obtain *overall bucket* Bucket results provide the top level, overall view of the {{anomaly-job}} and are ideal for alerts. For example, the bucket results might indicate that at 16:05 the system was unusual. This information is a summary of all the anomalies, pinpointing when they occurred. When you identify an anomalous bucket, you can investigate further by examining the pertinent records. - ## Influencer results [ml-ad-influencer-results] The influencer results show which entities were anomalous and when. One influencer result is written per bucket for each influencer that affects the anomalousness of the bucket. The {{ml}} analytics determine the impact of an influencer by performing a series of experiments that remove all data points with a specific influencer value and check whether the bucket is still anomalous. That means that only influencers with statistically significant impact on the anomaly are reported in the results. For jobs with more than one detector, influencer scores provide a powerful view of the most anomalous entities. diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-anomaly-detection-job-types.md b/explore-analyze/machine-learning/anomaly-detection/ml-anomaly-detection-job-types.md index 5339d85e5..eca366bd7 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-anomaly-detection-job-types.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-anomaly-detection-job-types.md @@ -8,7 +8,6 @@ mapped_pages: # Job types [ml-anomaly-detection-job-types] - {{anomaly-jobs-cap}} have many possible configuration options which enable you to fine-tune the jobs and cover your use case as much as possible. This page provides a quick overview of different types of {{anomaly-jobs}} and their capabilities. The job types available in {{kib}} are: * single metric jobs, @@ -19,13 +18,11 @@ mapped_pages: * rare jobs, * geo jobs. - -## Single metric jobs [singe-metric-jobs] +## Single metric jobs [singe-metric-jobs] Every {{anomaly-job}} has at least one detector. A detector defines the type of analysis that occurs (for example, using `max`, `average`, or `high` functions) and the field in your data that is analyzed. Single metric jobs have exactly one detector. These jobs are best for detecting anomalies in one aspect of your time series data. For example, you can monitor the request rate in your log data with the `low_count` function to find unusually low request rates that might be a sign of an error. Refer to the [*Function reference*](ml-functions.md) to learn more about the available functions. - -## Multi-metric jobs [multi-metric-jobs] +## Multi-metric jobs [multi-metric-jobs] Multi-metric jobs can have more than one detector configured and optionally split the analysis by a field. Conceptually, multi-metric jobs can be considered as multiple independent single metric jobs. Binding the jobs together into a multi-metric job has the advantage of an overall anomaly score (instead of an independent anomaly score for each job) and influencers that apply to all metrics in the job. Multi-metrics jobs provide better results when the influencers are shared across the detectors. @@ -33,8 +30,7 @@ Splitting the analysis by a field enables you to model each value of that field Multi-metric jobs are recommended for complex use cases where you want to detect anomalous behavior in multiple aspects of your data or analyze the data in the context of distinct values of a field. - -## Population jobs [population-jobs] +## Population jobs [population-jobs] In the case of the population jobs, the analyzed data is split by the distinct values of a field. This field defines what is called a population. The splits are analyzed in the context of all the splits to find unusual values in the population. In other words, the population analysis is a comparison of an individual entity against a collective model of all members in the population as witnessed over time. @@ -42,13 +38,11 @@ For example, if you want to detect IP addresses with unusual request rates compa Refer to [Performing population analysis](https://www.elastic.co/guide/en/machine-learning/current/ml-configuring-populations.html) to learn more. +## Advanced jobs [advanced-jobs] -## Advanced jobs [advanced-jobs] - -Advanced jobs give you all the flexibility that’s possible in the [create {{anomaly-jobs}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-put-job.html). At the extreme, you can switch to directly edit the JSON that will be sent to this endpoint. All the other types of jobs described in this page *can* be created as advanced jobs, but the more specialized wizards make it easier to create jobs for common situations. You can create an advanced job if you are familiar with all the functionality that {{ml}} {anomaly-detect} provides and want to do something that the more specialized wizards do not allow you to do. +Advanced jobs give you all the flexibility that’s possible in the [create {{anomaly-jobs}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-put-job.html). At the extreme, you can switch to directly edit the JSON that will be sent to this endpoint. All the other types of jobs described in this page *can* be created as advanced jobs, but the more specialized wizards make it easier to create jobs for common situations. You can create an advanced job if you are familiar with all the functionality that {{ml}} {{anomaly-detect}} provides and want to do something that the more specialized wizards do not allow you to do. - -## Categorization jobs [categorization-jobs] +## Categorization jobs [categorization-jobs] Categorization jobs cluster similar text values together, classify them into categories, and detect anomalies within the categories. Categorization works best on machine-written text like log messages that typically contains repeated strings of text; it does not work well on human-generated text because of its high variability. @@ -56,13 +50,10 @@ The model learns the normal volume and pattern of a category over time so the jo Refer to [Detecting anomalous categories of data](ml-configuring-categories.md) to learn more. - -## Rare jobs [rare-jobs] +## Rare jobs [rare-jobs] Rare {{anomaly-jobs}} detect rare occurrences in time series data. Rare jobs use the `rare` or `freq_rare` functions and detect such events in populations as well. A *rare* job finds events in simple time series data that occur rarely compared to what the model observed over time. A *rare in a population* job finds members of a population that have rare values over time compared to the other members of the population. The *frequently rare in a population* job detects rare events that frequently occur for a member of a population compared to other members. As an example of this last type of rare job, you can create one that models URI paths and client IP interactions and detects a rare URI path that is visited by very few client IPs in the population (this is the reason why it’s rare). The client IPs that have many interactions with this URI path are anomalous compared to the rest of the population that rarely interact with the URI path. - -## Geo jobs [geo-jobs] +## Geo jobs [geo-jobs] Geo {{anomaly-jobs}} detect unusual occurrences in the geographic locations of your data. Your data set must contain geo data to be able to use the `lat_long` function in the detector to detect anomalous geo data. Geo jobs can identify, for example, transactions that are initiated from locations that are unusual compared to the locations of the rest of the transactions. - diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-api-quickref.md b/explore-analyze/machine-learning/anomaly-detection/ml-api-quickref.md index 43245dbbf..0640971f6 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-api-quickref.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-api-quickref.md @@ -21,4 +21,3 @@ The main resources can be accessed with a variety of endpoints: * [`/model_snapshots/`](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-ad-apis.html#ml-api-snapshot-endpoint): Manage model snapshots For a full list, see [{{ml-cap}} {anomaly-detect} APIs](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-ad-apis.html). - diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-configuring-aggregation.md b/explore-analyze/machine-learning/anomaly-detection/ml-configuring-aggregation.md index ffb4e5bbd..48aca370d 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-configuring-aggregation.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-configuring-aggregation.md @@ -7,12 +7,10 @@ mapped_pages: When you aggregate data, {{es}} automatically distributes the calculations across your cluster. Then you can feed this aggregated data into the {{ml-features}} instead of raw results. It reduces the volume of data that must be analyzed. - ## Requirements [aggs-requs-dfeeds] There are a number of requirements for using aggregations in {{dfeeds}}. - ### Aggregations [aggs-aggs] * Your aggregation must include a `date_histogram` aggregation or a top level `composite` aggregation, which in turn must contain a `max` aggregation on the time field. It ensures that the aggregated data is a time series and the timestamp of each bucket is the time of the last record in the bucket. @@ -23,21 +21,18 @@ There are a number of requirements for using aggregations in {{dfeeds}}. * If you set the `summary_count_field_name` property to a non-null value, the {{anomaly-job}} expects to receive aggregated input. The property must be set to the name of the field that contains the count of raw data points that have been aggregated. It applies to all detectors in the job. * The influencers or the partition fields must be included in the aggregation of your {{dfeed}}, otherwise they are not included in the job analysis. For more information on influencers, refer to [Influencers](ml-ad-run-jobs.md#ml-ad-influencers). - ### Intervals [aggs-interval] * The bucket span of your {{anomaly-job}} must be divisible by the value of the `calendar_interval` or `fixed_interval` in your aggregation (with no remainder). * If you specify a `frequency` for your {{dfeed}}, it must be divisible by the `calendar_interval` or the `fixed_interval`. * {{anomaly-jobs-cap}} cannot use `date_histogram` or `composite` aggregations with an interval measured in months because the length of the month is not fixed; they can use weeks or smaller units. - ## Limitations [aggs-limits-dfeeds] * If your [{{dfeed}} uses aggregations with nested `terms` aggs](#aggs-dfeeds) and model plot is not enabled for the {{anomaly-job}}, neither the **Single Metric Viewer** nor the **Anomaly Explorer** can plot and display an anomaly chart. In these cases, an explanatory message is shown instead of the chart. * Your {{dfeed}} can contain multiple aggregations, but only the ones with names that match values in the job configuration are fed to the job. * Using [scripted metric](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-metrics-scripted-metric-aggregation.html) aggregations is not supported in {{dfeeds}}. - ## Recommendations [aggs-recommendations-dfeeds] * When your detectors use [metric](https://www.elastic.co/guide/en/machine-learning/current/ml-metric-functions.html) or [sum](https://www.elastic.co/guide/en/machine-learning/current/ml-sum-functions.html) analytical functions, it’s recommended to set the `date_histogram` or `composite` aggregation interval to a tenth of the bucket span. This creates finer, more granular time buckets, which are ideal for this type of analysis. @@ -59,8 +54,6 @@ There are a number of requirements for using aggregations in {{dfeeds}}. } ``` - - ## Including aggregations in {{anomaly-jobs}} [aggs-using-date-histogram] When you create or update an {{anomaly-job}}, you can include aggregated fields in the analysis configuration. In the {{dfeed}} configuration object, you can define the aggregations. @@ -119,7 +112,6 @@ PUT _ml/anomaly_detectors/kibana-sample-data-flights 4. The `term` aggregation is named `airline` and its field is also named `airline`. 5. The `avg` aggregation is named `responsetime` and its field is also named `responsetime`. - Use the following format to define a `date_histogram` aggregation to bucket by time in your {{dfeed}}: ```js @@ -153,7 +145,6 @@ Use the following format to define a `date_histogram` aggregation to bucket by t } ``` - ## Composite aggregations [aggs-using-composite] Composite aggregations are optimized for queries that are either `match_all` or `range` filters. Use composite aggregations in your {{dfeeds}} for these cases. Other types of queries may cause the `composite` aggregation to be inefficient. @@ -224,7 +215,6 @@ PUT _ml/anomaly_detectors/kibana-sample-data-flights-composite 4. The required `max` aggregation whose name is the time field in the job analysis config. 5. The `avg` aggregation is named `responsetime` and its field is also named `responsetime`. - Use the following format to define a composite aggregation in your {{dfeed}}: ```js @@ -257,7 +247,6 @@ Use the following format to define a composite aggregation in your {{dfeed}}: } ``` - ## Nested aggregations [aggs-dfeeds] You can also use complex nested aggregations in {{dfeeds}}. @@ -268,7 +257,6 @@ The next example uses the [`derivative` pipeline aggregation](https://www.elasti `derivative` or other pipeline aggregations may not work within `composite` aggregations. See [composite aggregations and pipeline aggregations](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-composite-aggregation.html#search-aggregations-bucket-composite-aggregation-pipeline-aggregations). :::: - ```js "aggregations": { "beat.name": { @@ -304,7 +292,6 @@ The next example uses the [`derivative` pipeline aggregation](https://www.elasti } ``` - ## Single bucket aggregations [aggs-single-dfeeds] You can also use single bucket aggregations in {{dfeeds}}. The following example shows two `filter` aggregations, each gathering the number of unique entries for the `error` field. @@ -350,14 +337,12 @@ You can also use single bucket aggregations in {{dfeeds}}. The following example } ``` - ## Using `aggregate_metric_double` field type in {{dfeeds}} [aggs-amd-dfeeds] ::::{note} It is not currently possible to use `aggregate_metric_double` type fields in {{dfeeds}} without aggregations. :::: - You can use fields with the [`aggregate_metric_double`](https://www.elastic.co/guide/en/elasticsearch/reference/current/aggregate-metric-double.html) field type in a {{dfeed}} with aggregations. It is required to retrieve the `value_count` of the `aggregate_metric_double` filed in an aggregation and then use it as the `summary_count_field_name` to provide the correct count that represents the aggregation value. In the following example, `presum` is an `aggregate_metric_double` type field that has all the possible metrics: `[ min, max, sum, value_count ]`. To use an `avg` aggregation on this field, you need to perform a `value_count` aggregation on `presum` and then set the field that contains the aggregated values `my_count` as the `summary_count_field_name`: diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-configuring-alerts.md b/explore-analyze/machine-learning/anomaly-detection/ml-configuring-alerts.md index 052a1c14a..593a85926 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-configuring-alerts.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-configuring-alerts.md @@ -5,7 +5,7 @@ mapped_pages: # Generating alerts for anomaly detection jobs [ml-configuring-alerts] -{{kib}} {alert-features} include support for {{ml}} rules, which run scheduled checks for anomalies in one or more {{anomaly-jobs}} or check the health of the job with certain conditions. If the conditions of the rule are met, an alert is created and the associated action is triggered. For example, you can create a rule to check an {{anomaly-job}} every fifteen minutes for critical anomalies and to notify you in an email. To learn more about {{kib}} {alert-features}, refer to [Alerting](../../alerts/kibana.md#alerting-getting-started). +{{kib}} {{alert-features}} include support for {{ml}} rules, which run scheduled checks for anomalies in one or more {{anomaly-jobs}} or check the health of the job with certain conditions. If the conditions of the rule are met, an alert is created and the associated action is triggered. For example, you can create a rule to check an {{anomaly-job}} every fifteen minutes for critical anomalies and to notify you in an email. To learn more about {{kib}} {{alert-features}}, refer to [Alerting](../../alerts/kibana.md#alerting-getting-started). The following {{ml}} rules are available: @@ -15,14 +15,8 @@ The following {{ml}} rules are available: {{anomaly-jobs-cap}} health : Monitors job health and alerts if an operational issue occurred that may prevent the job from detecting anomalies. -::::{tip} +::::{tip} If you have created rules for specific {{anomaly-jobs}} and you want to monitor whether these jobs work as expected, {{anomaly-jobs}} health rules are ideal for this purpose. :::: - In **{{stack-manage-app}} > {{rules-ui}}**, you can create both types of {{ml}} rules. In the **{{ml-app}}** app, you can create only {{anomaly-detect}} alert rules; create them from the {{anomaly-job}} wizard after you start the job or from the {{anomaly-job}} list. - - - - - diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-delayed-data-detection.md b/explore-analyze/machine-learning/anomaly-detection/ml-delayed-data-detection.md index af0023a31..e5d0cc072 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-delayed-data-detection.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-delayed-data-detection.md @@ -9,11 +9,6 @@ Delayed data are documents that are indexed late. That is to say, it is data rel When you create a {{dfeed}}, you can specify a [`query_delay`](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-put-datafeed.html#ml-put-datafeed-request-body) setting. This setting enables the {{dfeed}} to wait for some time past real-time, which means any "late" data in this period is fully indexed before the {{dfeed}} tries to gather it. However, if the setting is set too low, the {{dfeed}} may query for data before it has been indexed and consequently miss that document. Conversely, if it is set too high, analysis drifts farther away from real-time. The balance that is struck depends upon each use case and the environmental factors of the cluster. -::::{important} +::::{important} If you get an error that says `Datafeed missed XXXX documents due to ingest latency`, consider increasing the value of query_delay. If it doesn’t help, investigate the ingest latency and its cause. You can do this by comparing event and ingest timestamps. High latency is often caused by bursts of ingested documents, misconfiguration of the ingest pipeline, or misalignment of system clocks. :::: - - - - - diff --git a/explore-analyze/machine-learning/anomaly-detection/ml-getting-started.md b/explore-analyze/machine-learning/anomaly-detection/ml-getting-started.md index 5c6e3751a..19a807f9d 100644 --- a/explore-analyze/machine-learning/anomaly-detection/ml-getting-started.md +++ b/explore-analyze/machine-learning/anomaly-detection/ml-getting-started.md @@ -4,11 +4,8 @@ mapped_pages: - https://www.elastic.co/guide/en/machine-learning/current/ml-getting-started.html --- - - # Tutorial: Getting started with anomaly detection [ml-getting-started] - Ready to take {{anomaly-detect}} for a test drive? Follow this tutorial to: * Try out the **{{data-viz}}** @@ -19,35 +16,32 @@ At the end of this tutorial, you should have a good idea of what {{ml}} is and w Need more context? Check out the [{{es}} introduction](https://www.elastic.co/guide/en/elasticsearch/reference/current/elasticsearch-intro.html) to learn the lingo and understand the basics of how {{es}} works. - ## Try it out [get-started-prereqs] 1. Before you can play with the {{ml-features}}, you must install {{es}} and {{kib}}. {{es}} stores the data and the analysis results. {{kib}} provides a helpful user interface for creating and viewing jobs. - ::::{tip} +::::{tip} You can run {{es}} and {{kib}} on your own hardware, or use our [hosted {{ess}}](https://www.elastic.co/cloud/elasticsearch-service) on {{ecloud}}. The {{ess}} is available on both AWS and GCP. [Try out the {{ess}} for free](https://www.elastic.co/cloud/elasticsearch-service/signup). - :::: +:::: -2. Verify that your environment is set up properly to use the {{ml-features}}. If the {{es}} {security-features} are enabled, to complete this tutorial you need a user that has authority to manage {{anomaly-jobs}}. See [Setup and security](../setting-up-machine-learning.md). +2. Verify that your environment is set up properly to use the {{ml-features}}. If the {{es}} {{security-features}} are enabled, to complete this tutorial you need a user that has authority to manage {{anomaly-jobs}}. See [Setup and security](../setting-up-machine-learning.md). 3. [Add the sample data sets that ship with {{kib}}](../../overview/kibana-quickstart.md#gs-get-data-into-kibana). 1. From the {{kib}} home page, click **Try sample data**, then open the **Other sample data sets** section. 2. Pick a data set. In this tutorial, you’ll use the **Sample web logs**. While you’re here, feel free to click **Add data** on all of the available sample data sets. - These data sets are now ready be analyzed in {{ml}} jobs in {{kib}}. - ## Explore the data in {{kib}} [sample-data-visualizer] To get the best results from {{ml}} analytics, you must understand your data. You must know its data types and the range and distribution of values. The {{data-viz}} enables you to explore the fields in your data: 1. Open {{kib}} in your web browser. If you are running {{kib}} locally, go to `http://localhost:5601/`. - ::::{tip} - The {{kib}} {ml-features} use pop-ups. You must configure your web browser so that it does not block pop-up windows or create an exception for your {{kib}} URL. - :::: +::::{tip} + The {{kib}} {{ml-features}} use pop-ups. You must configure your web browser so that it does not block pop-up windows or create an exception for your {{kib}} URL. +:::: 2. Open **Machine Learning** from the main menu, or use the [global search field](../../overview/kibana-quickstart.md#_finding_your_apps_and_objects). 3. Select the **{{data-viz}}** tab. @@ -60,22 +54,21 @@ To get the best results from {{ml}} analytics, you must understand your data. Yo In particular, look at the `clientip`, `response.keyword`, and `url.keyword` fields, since we’ll use them in our {{anomaly-jobs}}. For these fields, the {{data-viz}} provides the number of distinct values, a list of the top values, and the number and percentage of documents that contain the field. For example: - :::{image} ../../../images/machine-learning-ml-gs-data-keyword.jpg - :alt: {{data-viz}} output for ip and keyword fields - :class: screenshot - ::: +:::{image} ../../../images/machine-learning-ml-gs-data-keyword.jpg +:alt: {{data-viz}} output for ip and keyword fields +:class: screenshot +::: For numeric fields, the {{data-viz}} provides information about the minimum, median, maximum, and top values, the number of distinct values, and their distribution. You can use the distribution chart to get a better idea of how the values in the data are clustered. For example: - :::{image} ../../../images/machine-learning-ml-gs-data-metric.jpg - :alt: {{data-viz}} for sample web logs - :class: screenshot - ::: - - ::::{tip} - Make note of the range of dates in the `@timestamp` field. They are relative to when you added the sample data and you’ll need that information later in the tutorial. - :::: +:::{image} ../../../images/machine-learning-ml-gs-data-metric.jpg +:alt: {{data-viz}} for sample web logs +:class: screenshot +::: +::::{tip} +Make note of the range of dates in the `@timestamp` field. They are relative to when you added the sample data and you’ll need that information later in the tutorial. +:::: Now that you’re familiar with the data in the `kibana_sample_data_logs` index, you can create some {{anomaly-jobs}} to analyze it. @@ -83,15 +76,12 @@ Now that you’re familiar with the data in the `kibana_sample_data_logs` index, You can view the statistics of the selectable fields in the {{anomaly-detect}} wizard. The field statistics displayed in a flyout provide more meaningful context to help you select relevant fields. :::: - - ## Create sample {{anomaly-jobs}} in {{kib}} [sample-data-jobs] ::::{important} The results on this page might be different than the actual values you get when using the sample data sets. This behavior is expected as the data points in the data sets might change over time. :::: - The {{kib}} sample data sets include some pre-configured {{anomaly-jobs}} for you to play with. You can use either of the following methods to add the jobs: * After you load the sample web logs data set on the {{kib}} home page, click **View data** > **ML jobs**. @@ -111,7 +101,6 @@ For more information, see [{{dfeeds-cap}}](ml-ad-run-jobs.md#ml-ad-datafeeds), [ :::: - If you want to see all of the configuration details for your jobs and {{dfeeds}}, you can do so on the **Machine Learning** > **Anomaly Detection** > **Jobs** page. Alternatively, you can see the configuration files in [GitHub ](https://github.com/elastic/kibana/tree/master/x-pack/plugins/ml/server/models/data_recognizer/modules/sample_data_weblogs). For the purposes of this tutorial, however, here’s a quick overview of the goal of each job: * `low_request_rate` uses the `low_count` function to find unusually low request rates @@ -120,7 +109,6 @@ If you want to see all of the configuration details for your jobs and {{dfeeds}} The next step is to view the results and see what types of insights these jobs have generated! - ## View {{anomaly-detect}} results [sample-data-results] After the {{dfeeds}} are started and the {{anomaly-jobs}} have processed some data, you can view the results in {{kib}}. @@ -129,7 +117,6 @@ After the {{dfeeds}} are started and the {{anomaly-jobs}} have processed some da Depending on the capacity of your machine, you might need to wait a few seconds for the {{ml}} analysis to generate initial results. :::: - :::{image} ../../../images/machine-learning-ml-gs-web-results.jpg :alt: Create jobs for the sample web logs :class: screenshot @@ -139,7 +126,6 @@ The {{ml-features}} analyze the input stream of data, model its behavior, and pe There are two tools for examining the results from {{anomaly-jobs}} in {{kib}}: the **Anomaly Explorer** and the **Single Metric Viewer**. You can switch between these tools by clicking the icons in the top left corner. You can also edit the job selection to examine a different subset of {{anomaly-jobs}}. - ### Single metric job results [ml-gs-results-smv] One of the sample jobs (`low_request_rate`), is a *single metric {{anomaly-job}}*. It has a single detector that uses the `low_count` function and limited job properties. You might use a job like this if you want to determine when the request rate on your web site drops significantly. @@ -166,14 +152,12 @@ Any data points outside the range that was predicted by the model are marked as :::: - Slide the time selector to a section of the time series that contains a red anomaly data point. If you hover over the point, you can see more information. ::::{note} You might notice a high spike in the time series. It’s not highlighted as an anomaly, however, since this job looks for low counts only. :::: - For each anomaly, you can see key details such as the time, the actual and expected ("typical") values, and their probability in the **Anomalies** section of the viewer. For example: :::{image} ../../../images/machine-learning-ml-gs-job1-anomalies.jpg @@ -196,7 +180,6 @@ You can optionally annotate your job results by drag-selecting a period of time After you have identified anomalies, often the next step is to try to determine the context of those situations. For example, are there other factors that are contributing to the problem? Are the anomalies confined to particular applications or servers? You can begin to troubleshoot these situations by layering additional jobs or creating multi-metric jobs. - ### Advanced or multi-metric job results [ml-gs-results-ae] Conceptually, you can think of *multi-metric {{anomaly-jobs}}* as running multiple independent single metric jobs. By bundling them together in a multi-metric job, however, you can see an overall score and shared influencers for all the metrics and all the entities in the job. Multi-metric jobs therefore scale better than having many independent single metric jobs. They also provide better results when you have influencers that are shared across the detectors. @@ -208,7 +191,6 @@ As a best practice, do not pick too many influencers. For example, you generally :::: - You can also configure your {{anomaly-jobs}} to split a single time series into multiple time series based on a categorical field. For example, the `response_code_rates` job has a single detector that splits the data based on the `response.keyword` and then uses the `count` function to determine when the number of events is anomalous. You might use a job like this if you want to look at both high and low request rates partitioned by response code. Let’s start by looking at the `response_code_rates` job in the **Anomaly Explorer**: @@ -258,8 +240,6 @@ In this sample data, the spike in the 404 response codes is influenced by a spec The anomaly scores that you see in each section of the **Anomaly Explorer** might differ slightly. This disparity occurs because for each job there are bucket results, influencer results, and record results. Anomaly scores are generated for each type of result. The anomaly timeline uses the bucket-level anomaly scores. The list of top influencers uses the influencer-level anomaly scores. The list of anomalies uses the record-level anomaly scores. :::: - - ### Population job results [ml-gs-results-population] The final sample job (`url_scanning`) is a *population {{anomaly-job}}*. As we saw in the `response_code_rates` job results, there are some clients that seem to be accessing unusually high numbers of URLs. The `url_scanning` sample job provides another method for investigating that type of problem. It has a single detector that uses the `high_distinct_count` function on the `url.keyword` to detect unusually high numbers of distinct values in that field. It then analyzes whether that behavior differs over the population of clients, as defined by the `clientip` field. @@ -280,7 +260,6 @@ If you want to play with another example of a population {{anomaly-job}}, add th :class: screenshot ::: - ## Create forecasts [sample-data-forecasts] In addition to detecting anomalous behavior in your data, you can use the {{ml-features}} to predict future behavior. @@ -290,46 +269,44 @@ To create a forecast in {{kib}}: 1. View your job results (for example, for the `low_request_rate` job) in the **Single Metric Viewer**. To find that view, click the **View series*** button in the ***Actions** column on the **Anomaly Detection** page. 2. Click **Forecast**. - :::{image} ../../../images/machine-learning-ml-gs-forecast.png - :alt: Create a forecast from the Single Metric Viewer - :class: screenshot - ::: - -3. Specify a duration for your forecast. This value indicates how far to extrapolate beyond the last record that was processed. You must use [time units](https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#time-units). In this example, the duration is one week (`1w`):
+:::{image} ../../../images/machine-learning-ml-gs-forecast.png +:alt: Create a forecast from the Single Metric Viewer +:class: screenshot +::: - :::{image} ../../../images/machine-learning-ml-gs-duration.png - :alt: Specify a duration of 1w - :class: screenshot - ::: +3. Specify a duration for your forecast. This value indicates how far to extrapolate beyond the last record that was processed. You must use [time units](https://www.elastic.co/guide/en/elasticsearch/reference/current/api-conventions.html#time-units). In this example, the duration is one week (`1w`): -4. View the forecast in the **Single Metric Viewer**:
+:::{image} ../../../images/machine-learning-ml-gs-duration.png +:alt: Specify a duration of 1w +:class: screenshot +::: - :::{image} ../../../images/machine-learning-ml-gs-forecast-results.png - :alt: View a forecast from the Single Metric Viewer - :class: screenshot - ::: +4. View the forecast in the **Single Metric Viewer**: - The yellow line in the chart represents the predicted data values. The shaded yellow area represents the bounds for the predicted values, which also gives an indication of the confidence of the predictions. Note that the bounds generally increase with time (that is to say, the confidence levels decrease), since you are forecasting further into the future. Eventually if the confidence levels are too low, the forecast stops. +:::{image} ../../../images/machine-learning-ml-gs-forecast-results.png +:alt: View a forecast from the Single Metric Viewer +:class: screenshot +::: -5. Optional: Compare the forecast to actual data.
+The yellow line in the chart represents the predicted data values. The shaded yellow area represents the bounds for the predicted values, which also gives an indication of the confidence of the predictions. Note that the bounds generally increase with time (that is to say, the confidence levels decrease), since you are forecasting further into the future. Eventually if the confidence levels are too low, the forecast stops. - :::{image} ../../../images/machine-learning-ml-gs-forecast-actual.png - :alt: View a forecast over actual data in the Single Metric Viewer - :class: screenshot - ::: +5. Optional: Compare the forecast to actual data. - As the job processes more data, you can click the **Forecast** button again and choose to see one of your forecasts overlaid on the actual data. The chart then contains the actual data values, the bounds for the expected values, the anomalies, the forecast data values, and the bounds for the forecast. This combination of actual and forecast data gives you an indication of how well the {{ml-features}} can extrapolate the future behavior of the data. +:::{image} ../../../images/machine-learning-ml-gs-forecast-actual.png +:alt: View a forecast over actual data in the Single Metric Viewer +:class: screenshot +::: - If you want to see this type of comparison for the {{kib}} sample data, which has a finite number of documents, you can reset the job and analyze only a subset of the data before you create a forecast. For example, reset one of your {{anomaly-jobs}} from the **Job Management** page in {{kib}} or use the [reset {{anomaly-jobs}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-reset-job.html). When you restart the {{dfeed}} for this job, choose a date part way through your sample data as the search end date. By default, the {{dfeed}} stops and the {{anomaly-job}} closes when it reaches that date. Create the forecast. You can then restart the {{dfeed}} to process the remaining data and generate the type of results shown here. +As the job processes more data, you can click the **Forecast** button again and choose to see one of your forecasts overlaid on the actual data. The chart then contains the actual data values, the bounds for the expected values, the anomalies, the forecast data values, and the bounds for the forecast. This combination of actual and forecast data gives you an indication of how well the {{ml-features}} can extrapolate the future behavior of the data. - ::::{tip} - The {{kib}} sample data sets have timestamps that are relative to when you added the data sets. However, some of these dates are in the future. Therefore, for the purposes of this tutorial, when you restart your {{dfeed}} do not use the **No end time (Real-time search)** option. Specify the appropriate end dates so that it processes all of the data immediately. - :::: +If you want to see this type of comparison for the {{kib}} sample data, which has a finite number of documents, you can reset the job and analyze only a subset of the data before you create a forecast. For example, reset one of your {{anomaly-jobs}} from the **Job Management** page in {{kib}} or use the [reset {{anomaly-jobs}} API](https://www.elastic.co/guide/en/elasticsearch/reference/current/ml-reset-job.html). When you restart the {{dfeed}} for this job, choose a date part way through your sample data as the search end date. By default, the {{dfeed}} stops and the {{anomaly-job}} closes when it reaches that date. Create the forecast. You can then restart the {{dfeed}} to process the remaining data and generate the type of results shown here. +::::{tip} +The {{kib}} sample data sets have timestamps that are relative to when you added the data sets. However, some of these dates are in the future. Therefore, for the purposes of this tutorial, when you restart your {{dfeed}} do not use the **No end time (Real-time search)** option. Specify the appropriate end dates so that it processes all of the data immediately. +:::: Now that you have seen how easy it is to create forecasts with the sample data, consider what type of events you might want to predict in your own data. For more information and ideas, see [Forecast future behavior](ml-ad-forecast.md). - ## Next steps [sample-data-next] By completing this tutorial, you’ve learned how you can detect anomalous behavior in a simple set of sample data. You created {{anomaly-jobs}} in {{kib}}, which opens jobs and creates and starts {{dfeeds}} for you under the covers. You examined the results of the {{ml}} analysis in the **Single Metric Viewer** and **Anomaly Explorer** in {{kib}}. You also extrapolated the future behavior of a job by creating a forecast. diff --git a/explore-analyze/machine-learning/machine-learning-in-kibana.md b/explore-analyze/machine-learning/machine-learning-in-kibana.md index 3835460f0..c655ab5db 100644 --- a/explore-analyze/machine-learning/machine-learning-in-kibana.md +++ b/explore-analyze/machine-learning/machine-learning-in-kibana.md @@ -1,4 +1,5 @@ --- +navigation_title: ML in Kibana mapped_pages: - https://www.elastic.co/guide/en/kibana/current/xpack-ml.html --- @@ -39,15 +40,12 @@ If {{stack-security-features}} are enabled, users must have the necessary privil There are limitations in {{ml-features}} that affect {{kib}}. For more information, refer to [{{ml-cap}}](anomaly-detection/ml-limitations.md). :::: - - ## Data drift [data-drift-view] ::::{warning} This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. :::: - You can find the data drift view in **{{ml-app}}** > **{{data-viz}}** in {{kib}} or by using the [global search field](../../get-started/the-stack.md#kibana-navigation-search). The data drift view shows you the differences in each field for two different time ranges in a given {{data-source}}. The view helps you to visualize the changes in your data over time and enables you to understand its behavior better. :::{image} ../../images/kibana-ml-data-drift.png diff --git a/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-aiops.md b/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-aiops.md index 6d50d0985..b774405ca 100644 --- a/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-aiops.md +++ b/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-aiops.md @@ -7,19 +7,18 @@ mapped_pages: AIOps Labs is a part of {{ml-app}} in {{kib}} which provides features that use advanced statistical methods to help you interpret your data and its behavior. - ## Log rate analysis [log-rate-analysis] Log rate analysis uses advanced statistical methods to identify reasons for increases or decreases in log rates and displays the statistically significant data in a tabular format. It makes it easy to find and investigate causes of unusual spikes or drops by using the analysis workflow view. Examine the histogram chart of the log rates for a given {{data-source}}, and find the reason behind a particular change possibly in millions of log events across multiple fields and values. -You can find log rate analysis embedded in multiple applications. In {{kib}}, you can find it under **{{ml-app}}*** > ***AIOps Labs** or by using the [global search field](../../../get-started/the-stack.md#kibana-navigation-search). Here, you can select the {{data-source}} or saved Discover session that you want to analyze. +You can find log rate analysis embedded in multiple applications. In {{kib}}, you can find it under **{{ml-app}}** > **AIOps Labs** or by using the [global search field](../../../get-started/the-stack.md#kibana-navigation-search). Here, you can select the {{data-source}} or saved Discover session that you want to analyze. :::{image} ../../../images/kibana-ml-log-rate-analysis-before.png :alt: Log event histogram chart :class: screenshot ::: -Select a spike or drop in the log event histogram chart to start the analysis. It identifies statistically significant field-value combinations that contribute to the spike or drop and displays them in a table. You can optionally choose to summarize the results into groups. The table also shows an indicator of the level of impact and a sparkline showing the shape of the impact in the chart. Hovering over a row displays the impact on the histogram chart in more detail. You can inspect a field in **Discover***, further investigate in ***Log pattern analysis***, or copy the table row information as a query filter to the clipboard by selecting the corresponding option under the ***Actions** column. You can also pin a table row by clicking on it then move the cursor to the histogram chart. It displays a tooltip with exact count values for the pinned field which enables closer investigation. +Select a spike or drop in the log event histogram chart to start the analysis. It identifies statistically significant field-value combinations that contribute to the spike or drop and displays them in a table. You can optionally choose to summarize the results into groups. The table also shows an indicator of the level of impact and a sparkline showing the shape of the impact in the chart. Hovering over a row displays the impact on the histogram chart in more detail. You can inspect a field in **Discover**, further investigate in **Log pattern analysis**, or copy the table row information as a query filter to the clipboard by selecting the corresponding option under the **Actions** column. You can also pin a table row by clicking on it then move the cursor to the histogram chart. It displays a tooltip with exact count values for the pinned field which enables closer investigation. Brushes in the chart show the baseline time range and the deviation in the analyzed data. You can move the brushes to redefine both the baseline and the deviation and rerun the analysis with the modified values. @@ -28,12 +27,11 @@ Brushes in the chart show the baseline time range and the deviation in the analy :class: screenshot ::: - ## Log pattern analysis [log-pattern-analysis] Log pattern analysis helps you to find patterns in unstructured log messages and makes it easier to examine your data. It performs categorization analysis on a selected field of a {{data-source}}, creates categories based on the data and displays them together with a chart that shows the distribution of each category and an example document that matches the category. -You can find log pattern analysis under **{{ml-app}}*** > ***AIOps Labs*** or by using the [global search field](../../../get-started/the-stack.md#kibana-navigation-search). Here, you can select the {{data-source}} or saved Discover session that you want to analyze, or in ***Discover** as an available action for any text field. +You can find log pattern analysis under **{{ml-app}}** > **AIOps Labs** or by using the [global search field](../../../get-started/the-stack.md#kibana-navigation-search). Here, you can select the {{data-source}} or saved Discover session that you want to analyze, or in **Discover** as an available action for any text field. :::{image} ../../../images/kibana-ml-log-pattern-analysis.png :alt: Log pattern analysis UI @@ -42,17 +40,15 @@ You can find log pattern analysis under **{{ml-app}}*** > ***AIOps Labs*** or by Select a field for categorization and optionally apply any filters that you want, then start the analysis. The analysis uses the same algorithms as a {{ml}} categorization job. The results of the analysis are shown in a table that makes it possible to open **Discover** and show or filter out the given category there, which helps you to further examine your log messages. - ## Change point detection [change-point-detection] ::::{warning} This functionality is in technical preview and may be changed or removed in a future release. Elastic will work to fix any issues, but features in technical preview are not subject to the support SLA of official GA features. :::: - Change point detection uses the [change point aggregation](https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-change-point-aggregation.html) to detect distribution changes, trend changes, and other statistically significant change points in a metric of your time series data. -You can find change point detection under **{{ml-app}}*** > ***AIOps Labs** or by using the [global search field](../../../get-started/the-stack.md#kibana-navigation-search). Here, you can select the {{data-source}} or saved Discover session that you want to analyze. +You can find change point detection under **{{ml-app}}** > **AIOps Labs** or by using the [global search field](../../../get-started/the-stack.md#kibana-navigation-search). Here, you can select the {{data-source}} or saved Discover session that you want to analyze. :::{image} ../../../images/kibana-ml-change-point-detection.png :alt: Change point detection UI diff --git a/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-anomalies.md b/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-anomalies.md index 4c226ad53..12f2b348c 100644 --- a/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-anomalies.md +++ b/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-anomalies.md @@ -5,7 +5,7 @@ mapped_pages: # Anomaly detection [xpack-ml-anomalies] -The Elastic {{ml}} {anomaly-detect} feature automatically models the normal behavior of your time series data — learning trends, periodicity, and more — in real time to identify anomalies, streamline root cause analysis, and reduce false positives. {{anomaly-detect-cap}} runs in and scales with {{es}}, and includes an intuitive UI on the {{kib}} **Machine Learning** page for creating {{anomaly-jobs}} and understanding results. +The Elastic {{ml}} {{anomaly-detect}} feature automatically models the normal behavior of your time series data — learning trends, periodicity, and more — in real time to identify anomalies, streamline root cause analysis, and reduce false positives. {{anomaly-detect-cap}} runs in and scales with {{es}}, and includes an intuitive UI on the {{kib}} **Machine Learning** page for creating {{anomaly-jobs}} and understanding results. If you have a license that includes the {{ml-features}}, you can create {{anomaly-jobs}} and manage jobs and {{dfeeds}} from the **Job Management** pane: @@ -38,9 +38,7 @@ You can optionally add annotations by drag-selecting a period of time in the **S In some circumstances, annotations are also added automatically. For example, if the {{anomaly-job}} detects that there is missing data, it annotates the affected time period. For more information, see [Handling delayed data](../anomaly-detection/ml-delayed-data-detection.md). The **Job Management** pane shows the full list of annotations for each job. ::::{note} -The {{kib}} {ml-features} use pop-ups. You must configure your web browser so that it does not block pop-up windows or create an exception for your {{kib}} URL. +The {{kib}} {{ml-features}} use pop-ups. You must configure your web browser so that it does not block pop-up windows or create an exception for your {{kib}} URL. :::: - -For more information about the {{anomaly-detect}} feature, see [{{ml-cap}} in the {{stack}}](https://www.elastic.co/what-is/elastic-stack-machine-learning) and [{{ml-cap}} {anomaly-detect}](../anomaly-detection.md). - +For more information about the {{anomaly-detect}} feature, see [{{ml-cap}} in the {{stack}}](https://www.elastic.co/what-is/elastic-stack-machine-learning) and [{{ml-cap}} {{anomaly-detect}}](../anomaly-detection.md). diff --git a/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-dfanalytics.md b/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-dfanalytics.md index 1f35119d3..4178b7ac6 100644 --- a/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-dfanalytics.md +++ b/explore-analyze/machine-learning/machine-learning-in-kibana/xpack-ml-dfanalytics.md @@ -5,14 +5,13 @@ mapped_pages: # Data frame analytics [xpack-ml-dfanalytics] -The Elastic {{ml}} {dfanalytics} feature enables you to analyze your data using {{classification}}, {{oldetection}}, and {{regression}} algorithms and generate new indices that contain the results alongside your source data. +The Elastic {{ml}} {{dfanalytics}} feature enables you to analyze your data using {{classification}}, {{oldetection}}, and {{regression}} algorithms and generate new indices that contain the results alongside your source data. -If you have a license that includes the {{ml-features}}, you can create {{dfanalytics-jobs}} and view their results on the **Data Frame Analytics** page in {{kib}}. For example: +If you have a license that includes the {{ml-features}}, you can create {{dfanalytics}} jobs and view their results on the **Data Frame Analytics** page in {{kib}}. For example: :::{image} ../../../images/kibana-classification.png :alt: {{classification-cap}} results in {kib} :class: screenshot ::: -For more information about the {{dfanalytics}} feature, see [{{ml-cap}} {dfanalytics}](../data-frame-analytics.md). - +For more information about the {{dfanalytics}} feature, see [{{ml-cap}} {{dfanalytics}}](../data-frame-analytics.md). diff --git a/explore-analyze/machine-learning/setting-up-machine-learning.md b/explore-analyze/machine-learning/setting-up-machine-learning.md index 04f40cc8c..461e1a974 100644 --- a/explore-analyze/machine-learning/setting-up-machine-learning.md +++ b/explore-analyze/machine-learning/setting-up-machine-learning.md @@ -4,12 +4,8 @@ mapped_pages: - https://www.elastic.co/guide/en/machine-learning/current/setup.html --- - - # Setting up machine learning [setup] - - ## Requirements overview [requirements-overview] To use the {{stack}} {ml-features}, you must have: @@ -20,16 +16,13 @@ To use the {{stack}} {ml-features}, you must have: * {{ml}} features visible in the {{kib}} space * security privileges assigned to the user that: - * grant use of {{ml-features}}, and - * grant access to source and destination indices. - + * grant use of {{ml-features}}, and + * grant access to source and destination indices. ::::{tip} The fastest way to get started with {{ml-features}} is to [start a free 14-day trial of {{ess}}](https://cloud.elastic.co/registration?page=docs&placement=docs-body) in the cloud. :::: - - ## Security privileges [setup-privileges] Assigning security privileges affects how users access {{ml-features}}. Consider the two main categories: @@ -42,7 +35,6 @@ You can configure these privileges * under **Security**. To open Security, find **{{stack-manage-app}}** in the main menu or use the [global search field](../overview/kibana-quickstart.md#_finding_your_apps_and_objects). * via the respective {{es}} security APIs. - ### {{es}} API user [es-security-privileges] If you use {{ml}} APIs, you must have the following cluster and index privileges: @@ -63,35 +55,30 @@ For read-only access: The `machine_learning_admin` and `machine_learning_user` built-in roles give access to the results of *all* {{anomaly-jobs}}, irrespective of whether the user has access to the source indices. You must carefully consider who is given these roles, as {{anomaly-job}} results may propagate field values that contain sensitive information from the source indices to the results. :::: - - ### {{kib}} security [kib-security] ::::{important} Granting `All` or `Read` {{kib}} feature privilege for {{ml-app}} will also grant the role the equivalent feature privileges to certain types of {{kib}} saved objects, namely index patterns, dashboards, saved searches, and visualizations as well as {{ml}} job, trained model and module saved objects. :::: - - #### Feature visibility in Spaces [kib-visibility-spaces] -In {{kib}}, the {{ml-features}} must be visible in your [space](../../deploy-manage/manage-spaces.md#spaces-control-feature-visibility). To manage which features are visible in your space, go to **{{stack-manage-app}}** > **{{kib}}*** > ***Spaces** or use the [global search field](../overview/kibana-quickstart.md#_finding_your_apps_and_objects) to locate **Spaces** directly. +In {{kib}}, the {{ml-features}} must be visible in your [space](../../deploy-manage/manage-spaces.md#spaces-control-feature-visibility). To manage which features are visible in your space, go to **{{stack-manage-app}}** > **{{kib}}** > **Spaces** or use the [global search field](../overview/kibana-quickstart.md#_finding_your_apps_and_objects) to locate **Spaces** directly. :::{image} ../../images/machine-learning-spaces.jpg :alt: Manage spaces in {kib} :class: screenshot ::: -In addition to index privileges, source {{data-sources}} must also exist in the same space as your {{ml}} jobs. You can configure these under **{{data-sources-caps}}**. To open **{{data-sources-caps}}***, find ***{{stack-manage-app}}** > **{{kib}}** in the main menu, or use the [global search field](../overview/kibana-quickstart.md#_finding_your_apps_and_objects). +In addition to index privileges, source {{data-sources}} must also exist in the same space as your {{ml}} jobs. You can configure these under **{{data-sources-caps}}**. To open **{{data-sources-caps}}**, find **{{stack-manage-app}}** > **{{kib}}** in the main menu, or use the [global search field](../overview/kibana-quickstart.md#_finding_your_apps_and_objects). -Each {{ml}} job and trained model can be assigned to all, one, or multiple spaces. This can be configured in **Machine Learning**. To open **Machine Learning***, find ***{{stack-manage-app}} > Alerts and Insights** in the main menu, or use the [global search field](../overview/kibana-quickstart.md#_finding_your_apps_and_objects). You can edit the spaces that a job or model is assigned to by clicking the icons in the **Spaces** column. +Each {{ml}} job and trained model can be assigned to all, one, or multiple spaces. This can be configured in **Machine Learning**. To open **Machine Learning**, find **{{stack-manage-app}} > Alerts and Insights** in the main menu, or use the [global search field](../overview/kibana-quickstart.md#_finding_your_apps_and_objects). You can edit the spaces that a job or model is assigned to by clicking the icons in the **Spaces** column. :::{image} ../../images/machine-learning-assign-job-spaces.jpg :alt: Assign machine learning jobs to spaces :class: screenshot ::: - #### {{kib}} user [kib-security-privileges] Within a {{kib}} space, for full access to the {{ml-features}}, you must have: @@ -113,15 +100,12 @@ Within a {{kib}} space, for read-only access to the {{ml-features}}, you must ha A user who has full or read-only access to {{ml-features}} within a given {{kib}} space can view the results of *all* {{anomaly-jobs}} that are visible in that space, even if they do not have access to the source indices of those jobs. You must carefully consider who is given access to {{ml-features}}, as {{anomaly-job}} results may propagate field values that contain sensitive information from the source indices to the results. :::: - ::::{note} {{data-sources-cap}} can be automatically created when creating a {{dfanalytics-job}}. :::: - For access to use {{ml}} APIs via *Dev Tools* in {{kib}}, set the {{es}} security privileges and grant access to `machine_learning_admin` or `machine_learning_user` built-in roles. - #### {{data-viz}} feature [upload-file-security-privileges] Within a {{kib}} space, to upload and import files in the **{{data-viz}}**, you must have: diff --git a/raw-migrated-files/kibana/kibana/xpack-ml-anomalies.md b/raw-migrated-files/kibana/kibana/xpack-ml-anomalies.md deleted file mode 100644 index c77ea27e6..000000000 --- a/raw-migrated-files/kibana/kibana/xpack-ml-anomalies.md +++ /dev/null @@ -1,41 +0,0 @@ -# {{anomaly-detect-cap}} [xpack-ml-anomalies] - -The Elastic {{ml}} {anomaly-detect} feature automatically models the normal behavior of your time series data — learning trends, periodicity, and more — in real time to identify anomalies, streamline root cause analysis, and reduce false positives. {{anomaly-detect-cap}} runs in and scales with {{es}}, and includes an intuitive UI on the {{kib}} **Machine Learning** page for creating {{anomaly-jobs}} and understanding results. - -If you have a license that includes the {{ml-features}}, you can create {{anomaly-jobs}} and manage jobs and {{dfeeds}} from the **Job Management** pane: - -:::{image} ../../../images/kibana-ml-job-management.png -:alt: Job Management -:class: screenshot -::: - -You can use the **Settings** pane to create and edit calendars and the filters that are used in custom rules: - -:::{image} ../../../images/kibana-ml-settings.png -:alt: Calendar Management -:class: screenshot -::: - -The **Anomaly Explorer** and **Single Metric Viewer** display the results of your {{anomaly-jobs}}. For example: - -:::{image} ../../../images/kibana-ml-single-metric-viewer.png -:alt: Single Metric Viewer -:class: screenshot -::: - -You can optionally add annotations by drag-selecting a period of time in the **Single Metric Viewer** and adding a description. For example, you can add an explanation for anomalies in that time period or provide notes about what is occurring in your operational environment at that time: - -:::{image} ../../../images/kibana-ml-annotations-list.png -:alt: Single Metric Viewer with annotations -:class: screenshot -::: - -In some circumstances, annotations are also added automatically. For example, if the {{anomaly-job}} detects that there is missing data, it annotates the affected time period. For more information, see [Handling delayed data](../../../explore-analyze/machine-learning/anomaly-detection/ml-delayed-data-detection.md). The **Job Management** pane shows the full list of annotations for each job. - -::::{note} -The {{kib}} {ml-features} use pop-ups. You must configure your web browser so that it does not block pop-up windows or create an exception for your {{kib}} URL. -:::: - - -For more information about the {{anomaly-detect}} feature, see [{{ml-cap}} in the {{stack}}](https://www.elastic.co/what-is/elastic-stack-machine-learning) and [{{ml-cap}} {anomaly-detect}](../../../explore-analyze/machine-learning/anomaly-detection.md). - diff --git a/raw-migrated-files/stack-docs/machine-learning/ml-ad-overview.md b/raw-migrated-files/stack-docs/machine-learning/ml-ad-overview.md deleted file mode 100644 index 203aec5ba..000000000 --- a/raw-migrated-files/stack-docs/machine-learning/ml-ad-overview.md +++ /dev/null @@ -1,11 +0,0 @@ -# {{anomaly-detect-cap}} [ml-ad-overview] - -You can use {{stack}} {ml-features} to analyze time series data and identify anomalous patterns in your data set. - -* [Finding anomalies](../../../explore-analyze/machine-learning/anomaly-detection/ml-ad-finding-anomalies.md) -* [Tutorial: Getting started with {{anomaly-detect}}](../../../explore-analyze/machine-learning/anomaly-detection/ml-getting-started.md) -* [*Advanced concepts*](../../../explore-analyze/machine-learning/anomaly-detection/ml-ad-concepts.md) -* [*API quick reference*](../../../explore-analyze/machine-learning/anomaly-detection/ml-api-quickref.md) -* [How-tos](../../../explore-analyze/machine-learning/anomaly-detection/anomaly-how-tos.md) -* [*Resources*](../../../explore-analyze/machine-learning/anomaly-detection/ml-ad-resources.md) - diff --git a/raw-migrated-files/toc.yml b/raw-migrated-files/toc.yml index ee50348eb..2f31a9c97 100644 --- a/raw-migrated-files/toc.yml +++ b/raw-migrated-files/toc.yml @@ -732,7 +732,6 @@ toc: - file: kibana/kibana/using-kibana-with-security.md - file: kibana/kibana/watcher-ui.md - file: kibana/kibana/xpack-ml-aiops.md - - file: kibana/kibana/xpack-ml-anomalies.md - file: kibana/kibana/xpack-ml-dfanalytics.md - file: kibana/kibana/xpack-security-authorization.md - file: kibana/kibana/xpack-security-fips-140-2.md @@ -1033,7 +1032,6 @@ toc: - file: stack-docs/elastic-stack/upgrading-kibana.md - file: stack-docs/machine-learning/index.md children: - - file: stack-docs/machine-learning/ml-ad-overview.md - file: stack-docs/machine-learning/ml-dfanalytics.md - file: tech-content/starting-with-the-elasticsearch-platform-and-its-solutions/index.md children: