diff --git a/lib/compositor_utils.ex b/lib/compositor_utils.ex
index d3181126..f6225a11 100644
--- a/lib/compositor_utils.ex
+++ b/lib/compositor_utils.ex
@@ -2,6 +2,7 @@ defmodule RecordingConverter.Compositor do
   @moduledoc false
 
   alias Membrane.LiveCompositor.Request
+  alias RecordingConverter.ReportParser
 
   @text_margin 10
   @letter_width 12
@@ -10,6 +11,8 @@ defmodule RecordingConverter.Compositor do
   @video_output_id "video_output_1"
   @audio_output_id "audio_output_1"
 
+  @avatar_threshold_ns 1_000_000_000
+
   @spec server_setup(binary) :: :start_locally | {:start_locally, String.t()}
   def server_setup(compositor_path) do
     compositor_path = compositor_path
@@ -39,10 +42,10 @@ defmodule RecordingConverter.Compositor do
   @spec video_output_id() :: String.t()
   def video_output_id(), do: @video_output_id
 
-  @spec generate_output_update(map(), number()) :: [struct()]
-  def generate_output_update(tracks, timestamp),
+  @spec generate_output_update(map(), number(), map()) :: [struct()]
+  def generate_output_update(tracks, timestamp, video_tracks_offset),
     do: [
-      generate_video_output_update(tracks, timestamp),
+      generate_video_output_update(tracks, timestamp, video_tracks_offset),
       generate_audio_output_update(tracks, timestamp)
     ]
 
@@ -78,11 +81,17 @@ defmodule RecordingConverter.Compositor do
 
   defp generate_video_output_update(
          %{"video" => video_tracks, "audio" => audio_tracks},
-         timestamp
+         timestamp,
+         video_tracks_offset
        )
        when is_list(video_tracks) do
-    video_tracks_id = Enum.map(video_tracks, fn track -> track["origin"] end)
-    avatar_tracks = Enum.reject(audio_tracks, fn track -> track["origin"] in video_tracks_id end)
+    video_tracks_origin = Enum.map(video_tracks, fn track -> track["origin"] end)
+
+    avatar_tracks =
+      Enum.filter(
+        audio_tracks,
+        &should_have_avatar?(&1, timestamp, video_tracks_origin, video_tracks_offset)
+      )
 
     avatars_config = Enum.map(avatar_tracks, &avatar_view/1)
     video_tracks_config = Enum.map(video_tracks, &video_input_source_view/1)
@@ -103,6 +112,32 @@ defmodule RecordingConverter.Compositor do
     }
   end
 
+  defp should_have_avatar?(
+         %{"origin" => origin} = track,
+         timestamp,
+         video_tracks_origin,
+         video_tracks_offset
+       ) do
+    origin not in video_tracks_origin and
+      longer_than_treshold?(track, timestamp) and
+      not has_video_in_threshold?(origin, video_tracks_offset, timestamp)
+  end
+
+  defp longer_than_treshold?(%{"offset" => offset} = track, timestamp) do
+    ReportParser.calculate_track_end(track, offset) - timestamp > @avatar_threshold_ns
+  end
+
+  defp has_video_in_threshold?(origin, video_tracks_offset, timestamp) do
+    threshold = timestamp + @avatar_threshold_ns
+
+    next_video_offset =
+      video_tracks_offset
+      |> Map.get(origin, [])
+      |> Enum.find(threshold, &(&1 > timestamp))
+
+    next_video_offset < threshold
+  end
+
   defp video_input_source_view(track) do
     %{
       type: :view,
diff --git a/lib/report_parser.ex b/lib/report_parser.ex
index 992e5a2d..5a24b0bf 100644
--- a/lib/report_parser.ex
+++ b/lib/report_parser.ex
@@ -19,13 +19,40 @@ defmodule RecordingConverter.ReportParser do
   @spec get_all_track_actions(tracks :: list()) :: list()
   def get_all_track_actions(tracks) do
     tracks_actions = get_track_actions(tracks)
+    video_tracks_offset = get_video_tracks_offset(tracks)
+
+    update_scene_notifications =
+      create_update_scene_notifications(tracks_actions, video_tracks_offset)
 
-    update_scene_notifications = create_update_scene_notifications(tracks_actions)
     unregister_output_actions = generate_unregister_output_actions(tracks_actions)
 
     update_scene_notifications ++ unregister_output_actions
   end
 
+  @spec calculate_track_end(map(), non_neg_integer()) :: non_neg_integer()
+  def calculate_track_end(track, offset) do
+    clock_rate_ms = div(track["clock_rate"], 1_000)
+
+    end_timestamp = track["end_timestamp"]
+    start_timestamp = track["start_timestamp"]
+
+    timestamp_difference =
+      if end_timestamp < start_timestamp do
+        end_timestamp + @max_timestamp_value - start_timestamp
+      else
+        end_timestamp - start_timestamp
+      end
+
+    difference_in_milliseconds = div(timestamp_difference, clock_rate_ms)
+
+    duration =
+      (difference_in_milliseconds - @delta_timestamp_milliseconds)
+      |> Membrane.Time.milliseconds()
+      |> Membrane.Time.as_nanoseconds(:round)
+
+    offset + duration
+  end
+
   defp get_report(bucket_name, report_path) do
     bucket_name
     |> ExAws.S3.download_file(report_path, :memory)
@@ -34,6 +61,16 @@ defmodule RecordingConverter.ReportParser do
     |> Jason.decode!()
   end
 
+  defp get_video_tracks_offset(tracks) do
+    # TODO: remove screenshare tracks, after adding necessary info to reports
+    tracks
+    |> Enum.filter(&(&1["type"] == "video"))
+    |> Enum.reduce(%{}, fn %{"origin" => origin, "offset" => offset}, acc ->
+      Map.update(acc, origin, [offset], &[offset | &1])
+    end)
+    |> Map.new(fn {origin, offset} -> {origin, Enum.sort(offset)} end)
+  end
+
   defp get_track_actions(tracks) do
     tracks
     |> Enum.flat_map(fn track ->
@@ -41,22 +78,22 @@ defmodule RecordingConverter.ReportParser do
 
       [
         {:start, track, offset},
-        {:end, track, offset + calculate_track_duration(track)}
+        {:end, track, calculate_track_end(track, offset)}
       ]
     end)
     |> Enum.sort_by(fn {_atom, _track, timestamp} -> timestamp end)
   end
 
-  defp create_update_scene_notifications(track_actions) do
+  defp create_update_scene_notifications(track_actions, video_tracks_offset) do
     track_actions
     |> Enum.map_reduce(%{"audio" => [], "video" => []}, fn
       {:start, %{"type" => type} = track, timestamp}, acc ->
         acc = Map.update!(acc, type, &[track | &1])
-        {Compositor.generate_output_update(acc, timestamp), acc}
+        {Compositor.generate_output_update(acc, timestamp, video_tracks_offset), acc}
 
       {:end, %{"type" => type} = track, timestamp}, acc ->
         acc = Map.update!(acc, type, fn tracks -> Enum.reject(tracks, &(&1 == track)) end)
-        {Compositor.generate_output_update(acc, timestamp), acc}
+        {Compositor.generate_output_update(acc, timestamp, video_tracks_offset), acc}
     end)
     |> then(fn {actions, _acc} -> actions end)
     |> List.flatten()
@@ -101,26 +138,6 @@ defmodule RecordingConverter.ReportParser do
     {audio_end_timestamp, video_end_timestamp}
   end
 
-  defp calculate_track_duration(track) do
-    clock_rate_ms = div(track["clock_rate"], 1_000)
-
-    end_timestamp = track["end_timestamp"]
-    start_timestamp = track["start_timestamp"]
-
-    timestamp_difference =
-      if end_timestamp < start_timestamp do
-        end_timestamp + @max_timestamp_value - start_timestamp
-      else
-        end_timestamp - start_timestamp
-      end
-
-    difference_in_milliseconds = div(timestamp_difference, clock_rate_ms)
-
-    (difference_in_milliseconds - @delta_timestamp_milliseconds)
-    |> Membrane.Time.milliseconds()
-    |> Membrane.Time.as_nanoseconds(:round)
-  end
-
   defp calculate_end_timestamp(tracks) do
     if Enum.count(tracks) > 0 do
       {_atom, _video_track, timestamp} = Enum.at(tracks, -1)
diff --git a/test/fixtures/long-video/report.json b/test/fixtures/long-video/report.json
index d460e4e5..f65d7edb 100644
--- a/test/fixtures/long-video/report.json
+++ b/test/fixtures/long-video/report.json
@@ -7,7 +7,8 @@
       "encoding": "H264",
       "clock_rate": 90000,
       "start_timestamp": 1252237519,
-      "end_timestamp": 1255786759
+      "end_timestamp": 1255786759,
+      "origin": "origin"
     },
     "4a0bbdc0-c610-46a7-a574-1d0858aa65ca.msr": {
       "offset": 0,
@@ -16,7 +17,8 @@
       "encoding": "H264",
       "clock_rate": 90000,
       "start_timestamp": 4135275928,
-      "end_timestamp": 4143092158
+      "end_timestamp": 4143092158,
+      "origin": "origin"
     }
   },
   "recording_id": "64"
diff --git a/test/fixtures/multiple-audios-and-videos/report.json b/test/fixtures/multiple-audios-and-videos/report.json
index 99af6e12..9ddf9ea9 100644
--- a/test/fixtures/multiple-audios-and-videos/report.json
+++ b/test/fixtures/multiple-audios-and-videos/report.json
@@ -10,7 +10,8 @@
       },
       "clock_rate": 90000,
       "start_timestamp": 714157743,
-      "end_timestamp": 715056243
+      "end_timestamp": 715056243,
+      "origin": 1
     },
     "a89421bc-8466-485d-ad52-7f574acc4084.msr": {
       "offset": 18052166,
@@ -19,7 +20,8 @@
       "metadata": null,
       "clock_rate": 48000,
       "start_timestamp": 775938176,
-      "end_timestamp": 776421056
+      "end_timestamp": 776421056,
+      "origin": 1
     },
     "b96043e6-abaa-40bd-9267-12794f6e5529.msr": {
       "offset": 0,
@@ -31,7 +33,8 @@
       },
       "clock_rate": 90000,
       "start_timestamp": 3637718073,
-      "end_timestamp": 3638616573
+      "end_timestamp": 3638616573,
+      "origin": 2
     },
     "dd78e5bd-aa5a-4e01-abbc-354227ab7529.msr": {
       "offset": 12473666,
@@ -40,7 +43,8 @@
       "metadata": null,
       "clock_rate": 48000,
       "start_timestamp": 2095834478,
-      "end_timestamp": 2096317358
+      "end_timestamp": 2096317358,
+      "origin": 2
     }
   },
   "recording_id": "recording_id"
diff --git a/test/fixtures/one-audio-one-video/report.json b/test/fixtures/one-audio-one-video/report.json
index ca1f15c0..a23beb16 100644
--- a/test/fixtures/one-audio-one-video/report.json
+++ b/test/fixtures/one-audio-one-video/report.json
@@ -8,7 +8,8 @@
       "metadata": null,
       "start_timestamp": 3620631514,
       "clock_rate": 48000,
-      "end_timestamp": 3621114394
+      "end_timestamp": 3621114394,
+      "origin": 1
     },
     "video_15D5A19A045095D9.msr": {
       "offset": 0,
@@ -20,7 +21,8 @@
       },
       "start_timestamp": 3257040625,
       "clock_rate": 90000,
-      "end_timestamp": 3257939125
+      "end_timestamp": 3257939125,
+      "origin": 1
     }
   }
 }
\ No newline at end of file
diff --git a/test/fixtures/one-audio/report.json b/test/fixtures/one-audio/report.json
index 0460d939..c37e842a 100644
--- a/test/fixtures/one-audio/report.json
+++ b/test/fixtures/one-audio/report.json
@@ -8,7 +8,8 @@
       "metadata": null,
       "start_timestamp": 3620631514,
       "clock_rate": 48000,
-      "end_timestamp": 3621114394
+      "end_timestamp": 3621114394,
+      "origin": 1
     }
   }
 }
diff --git a/test/fixtures/one-video/report.json b/test/fixtures/one-video/report.json
index 6325a1b9..1d7892d0 100644
--- a/test/fixtures/one-video/report.json
+++ b/test/fixtures/one-video/report.json
@@ -11,7 +11,8 @@
       },
       "start_timestamp": 3257040625,
       "clock_rate": 90000,
-      "end_timestamp": 3257939125
+      "end_timestamp": 3257939125,
+      "origin": 1
     }
   }
 }
diff --git a/test/fixtures/report_parser/audio_multiple_video.json b/test/fixtures/report_parser/audio_multiple_video.json
new file mode 100644
index 00000000..480c088f
--- /dev/null
+++ b/test/fixtures/report_parser/audio_multiple_video.json
@@ -0,0 +1,45 @@
+{
+  "tracks": {
+    "audio.msr": {
+      "offset": 0,
+      "type": "audio",
+      "encoding": "OPUS",
+      "metadata": null,
+      "clock_rate": 48000,
+      "start_timestamp": 0,
+      "end_timestamp": 400000,
+      "origin": "origin"
+    },
+    "video_1.msr": {
+      "offset": 50000000000,
+      "type": "video",
+      "metadata": null,
+      "encoding": "H264",
+      "clock_rate": 90000,
+      "start_timestamp": 4135275928,
+      "end_timestamp": 4143092158,
+      "origin": "origin"
+    },
+    "video_2.msr": {
+      "offset": 5000000000,
+      "type": "video",
+      "metadata": null,
+      "encoding": "H264",
+      "clock_rate": 90000,
+      "start_timestamp": 4135275928,
+      "end_timestamp": 4143092158,
+      "origin": "origin"
+    },
+    "video_3.msr": {
+      "offset": 500000000,
+      "type": "video",
+      "metadata": null,
+      "encoding": "H264",
+      "clock_rate": 90000,
+      "start_timestamp": 4135275928,
+      "end_timestamp": 4143092158,
+      "origin": "origin"
+    }
+  },
+  "recording_id": "recording_id"
+}
\ No newline at end of file
diff --git a/test/fixtures/report_parser/audio_video.json b/test/fixtures/report_parser/audio_video.json
new file mode 100644
index 00000000..ea713520
--- /dev/null
+++ b/test/fixtures/report_parser/audio_video.json
@@ -0,0 +1,25 @@
+{
+  "tracks": {
+    "audio.msr": {
+      "offset": 0,
+      "type": "audio",
+      "encoding": "OPUS",
+      "metadata": null,
+      "clock_rate": 48000,
+      "start_timestamp": 0,
+      "end_timestamp": 480000,
+      "origin": "origin"
+    },
+    "video_1.msr": {
+      "offset": 2000000000,
+      "type": "video",
+      "metadata": null,
+      "encoding": "H264",
+      "clock_rate": 90000,
+      "start_timestamp": 0,
+      "end_timestamp": 540000,
+      "origin": "origin"
+    }
+  },
+  "recording_id": "recording_id"
+}
\ No newline at end of file
diff --git a/test/fixtures/report_parser/audio_video_in_threshold.json b/test/fixtures/report_parser/audio_video_in_threshold.json
new file mode 100644
index 00000000..7cc4d3d0
--- /dev/null
+++ b/test/fixtures/report_parser/audio_video_in_threshold.json
@@ -0,0 +1,25 @@
+{
+  "tracks": {
+    "audio.msr": {
+      "offset": 0,
+      "type": "audio",
+      "encoding": "OPUS",
+      "metadata": null,
+      "clock_rate": 48000,
+      "start_timestamp": 0,
+      "end_timestamp": 400000,
+      "origin": "origin"
+    },
+    "video.msr": {
+      "offset": 500000000,
+      "type": "video",
+      "metadata": null,
+      "encoding": "H264",
+      "clock_rate": 90000,
+      "start_timestamp": 4135275928,
+      "end_timestamp": 4143092158,
+      "origin": "origin"
+    }
+  },
+  "recording_id": "recording_id"
+}
\ No newline at end of file
diff --git a/test/fixtures/report_parser/audio_video_not_in_threshold.json b/test/fixtures/report_parser/audio_video_not_in_threshold.json
new file mode 100644
index 00000000..ac1fde3c
--- /dev/null
+++ b/test/fixtures/report_parser/audio_video_not_in_threshold.json
@@ -0,0 +1,26 @@
+{
+    "tracks": {
+      "audio.msr": {
+        "offset": 0,
+        "type": "audio",
+        "encoding": "OPUS",
+        "metadata": null,
+        "clock_rate": 48000,
+        "start_timestamp": 0,
+        "end_timestamp": 400000,
+        "origin": "origin"
+      },
+      "video.msr": {
+        "offset": 5000000000,
+        "type": "video",
+        "metadata": null,
+        "encoding": "H264",
+        "clock_rate": 90000,
+        "start_timestamp": 4135275928,
+        "end_timestamp": 4143092158,
+        "origin": "origin"
+      }
+    },
+    "recording_id": "recording_id"
+  }
+  
\ No newline at end of file
diff --git a/test/fixtures/report_parser/long_audio.json b/test/fixtures/report_parser/long_audio.json
new file mode 100644
index 00000000..29f91ee6
--- /dev/null
+++ b/test/fixtures/report_parser/long_audio.json
@@ -0,0 +1,16 @@
+{
+    "tracks": {
+      "audio.msr": {
+        "offset": 0,
+        "type": "audio",
+        "encoding": "OPUS",
+        "metadata": null,
+        "clock_rate": 48000,
+        "start_timestamp": 0,
+        "end_timestamp": 400000,
+        "origin": "origin"
+      }
+    },
+    "recording_id": "recording_id"
+  }
+  
\ No newline at end of file
diff --git a/test/fixtures/report_parser/short_audio.json b/test/fixtures/report_parser/short_audio.json
new file mode 100644
index 00000000..fd2efe40
--- /dev/null
+++ b/test/fixtures/report_parser/short_audio.json
@@ -0,0 +1,16 @@
+{
+    "tracks": {
+      "audio.msr": {
+        "offset": 0,
+        "type": "audio",
+        "encoding": "OPUS",
+        "metadata": null,
+        "clock_rate": 48000,
+        "start_timestamp": 0,
+        "end_timestamp": 40000,
+        "origin": "origin"
+      }
+    },
+    "recording_id": "recording_id"
+  }
+  
\ No newline at end of file
diff --git a/test/report_parser_test.exs b/test/report_parser_test.exs
new file mode 100644
index 00000000..1e79047d
--- /dev/null
+++ b/test/report_parser_test.exs
@@ -0,0 +1,52 @@
+defmodule RecordingConverter.ReportParserTest do
+  use ExUnit.Case
+
+  alias Membrane.LiveCompositor.Request.UpdateVideoOutput
+  alias RecordingConverter.ReportParser
+
+  @fixtures "test/fixtures/report_parser/"
+
+  tests = [
+    %{report: "short_audio.json", avatars: 0},
+    %{report: "long_audio.json", avatars: 1},
+    %{report: "audio_video_in_threshold.json", avatars: 0},
+    %{report: "audio_video_not_in_threshold.json", avatars: 1},
+    %{report: "audio_multiple_video.json", avatars: 0},
+    %{report: "audio_video.json", avatars: 2}
+  ]
+
+  for test <- tests do
+    test "recording with report #{test.report} has #{test.avatars} scenes with avatars" do
+      avatar_scenes =
+        @fixtures
+        |> Path.join(unquote(test.report))
+        |> get_tracks()
+        |> ReportParser.get_all_track_actions()
+        |> get_scenes_with_avatars()
+
+      assert length(avatar_scenes) == unquote(test.avatars)
+    end
+  end
+
+  defp get_tracks(path) do
+    path
+    |> File.read!()
+    |> Jason.decode!()
+    |> Map.fetch!("tracks")
+    |> Enum.map(fn {key, value} -> Map.put(value, :id, key) end)
+  end
+
+  defp get_scenes_with_avatars(actions) do
+    Enum.filter(actions, fn
+      %UpdateVideoOutput{root: %{children: children}} -> has_avatar?(children)
+      _action -> false
+    end)
+  end
+
+  defp has_avatar?(children) do
+    Enum.any?(children, fn
+      %{children: [%{child: %{type: :image}}]} -> true
+      _child -> false
+    end)
+  end
+end