|
2 | 2 |
|
3 | 3 | from dotenv import load_dotenv
|
4 | 4 | from livekit.agents import (
|
| 5 | + Agent, |
| 6 | + AgentSession, |
5 | 7 | AutoSubscribe,
|
6 | 8 | JobContext,
|
7 | 9 | JobProcess,
|
8 | 10 | WorkerOptions,
|
9 | 11 | cli,
|
10 |
| - llm, |
11 | 12 | metrics,
|
| 13 | + RoomInputOptions, |
12 | 14 | )
|
13 |
| -from livekit.agents.pipeline import VoicePipelineAgent |
14 | 15 | from livekit.plugins import (
|
15 | 16 | cartesia,
|
16 | 17 | openai,
|
17 | 18 | deepgram,
|
18 | 19 | noise_cancellation,
|
19 | 20 | silero,
|
20 |
| - turn_detector, |
21 | 21 | )
|
| 22 | +from livekit.plugins.turn_detector.multilingual import MultilingualModel |
22 | 23 |
|
23 | 24 |
|
24 | 25 | load_dotenv(dotenv_path=".env.local")
|
25 | 26 | logger = logging.getLogger("voice-agent")
|
26 | 27 |
|
27 | 28 |
|
| 29 | +class Assistant(Agent): |
| 30 | + def __init__(self) -> None: |
| 31 | + # This project is configured to use Deepgram STT, OpenAI LLM and Cartesia TTS plugins |
| 32 | + # Other great providers exist like Cerebras, ElevenLabs, Groq, Play.ht, Rime, and more |
| 33 | + # Learn more and pick the best one for your app: |
| 34 | + # https://docs.livekit.io/agents/plugins |
| 35 | + super().__init__( |
| 36 | + instructions="You are a voice assistant created by LiveKit. Your interface with users will be voice. " |
| 37 | + "You should use short and concise responses, and avoiding usage of unpronouncable punctuation. " |
| 38 | + "You were created as a demo to showcase the capabilities of LiveKit's agents framework.", |
| 39 | + stt=deepgram.STT(), |
| 40 | + llm=openai.LLM(model="gpt-4o-mini"), |
| 41 | + tts=cartesia.TTS(), |
| 42 | + # use LiveKit's transformer-based turn detector |
| 43 | + turn_detection=MultilingualModel(), |
| 44 | + ) |
| 45 | + |
| 46 | + async def on_enter(self): |
| 47 | + # The agent should be polite and greet the user when it joins :) |
| 48 | + self.session.generate_reply( |
| 49 | + instructions="Hey, how can I help you today?", allow_interruptions=True |
| 50 | + ) |
| 51 | + |
| 52 | + |
28 | 53 | def prewarm(proc: JobProcess):
|
29 | 54 | proc.userdata["vad"] = silero.VAD.load()
|
30 | 55 |
|
31 | 56 |
|
32 | 57 | async def entrypoint(ctx: JobContext):
|
33 |
| - initial_ctx = llm.ChatContext().append( |
34 |
| - role="system", |
35 |
| - text=( |
36 |
| - "You are a voice assistant created by LiveKit. Your interface with users will be voice. " |
37 |
| - "You should use short and concise responses, and avoiding usage of unpronouncable punctuation. " |
38 |
| - "You were created as a demo to showcase the capabilities of LiveKit's agents framework." |
39 |
| - ), |
40 |
| - ) |
41 |
| - |
42 | 58 | logger.info(f"connecting to room {ctx.room.name}")
|
43 | 59 | await ctx.connect(auto_subscribe=AutoSubscribe.AUDIO_ONLY)
|
44 | 60 |
|
45 | 61 | # Wait for the first participant to connect
|
46 | 62 | participant = await ctx.wait_for_participant()
|
47 | 63 | logger.info(f"starting voice assistant for participant {participant.identity}")
|
48 | 64 |
|
49 |
| - # This project is configured to use Deepgram STT, OpenAI LLM and Cartesia TTS plugins |
50 |
| - # Other great providers exist like Cerebras, ElevenLabs, Groq, Play.ht, Rime, and more |
51 |
| - # Learn more and pick the best one for your app: |
52 |
| - # https://docs.livekit.io/agents/plugins |
53 |
| - agent = VoicePipelineAgent( |
| 65 | + usage_collector = metrics.UsageCollector() |
| 66 | + |
| 67 | + # Log metrics and collect usage data |
| 68 | + def on_metrics_collected(agent_metrics: metrics.AgentMetrics): |
| 69 | + metrics.log_metrics(agent_metrics) |
| 70 | + usage_collector.collect(agent_metrics) |
| 71 | + |
| 72 | + session = AgentSession( |
54 | 73 | vad=ctx.proc.userdata["vad"],
|
55 |
| - stt=deepgram.STT(), |
56 |
| - llm=openai.LLM(model="gpt-4o-mini"), |
57 |
| - tts=cartesia.TTS(), |
58 |
| - # use LiveKit's transformer-based turn detector |
59 |
| - turn_detector=turn_detector.EOUModel(), |
60 | 74 | # minimum delay for endpointing, used when turn detector believes the user is done with their turn
|
61 | 75 | min_endpointing_delay=0.5,
|
62 | 76 | # maximum delay for endpointing, used when turn detector does not believe the user is done with their turn
|
63 | 77 | max_endpointing_delay=5.0,
|
64 |
| - # enable background voice & noise cancellation, powered by Krisp |
65 |
| - # included at no additional cost with LiveKit Cloud |
66 |
| - noise_cancellation=noise_cancellation.BVC(), |
67 |
| - chat_ctx=initial_ctx, |
68 | 78 | )
|
69 | 79 |
|
70 |
| - usage_collector = metrics.UsageCollector() |
| 80 | + # Trigger the on_metrics_collected function when metrics are collected |
| 81 | + session.on("metrics_collected", on_metrics_collected) |
71 | 82 |
|
72 |
| - @agent.on("metrics_collected") |
73 |
| - def on_metrics_collected(agent_metrics: metrics.AgentMetrics): |
74 |
| - metrics.log_metrics(agent_metrics) |
75 |
| - usage_collector.collect(agent_metrics) |
76 |
| - |
77 |
| - agent.start(ctx.room, participant) |
78 |
| - |
79 |
| - # The agent should be polite and greet the user when it joins :) |
80 |
| - await agent.say("Hey, how can I help you today?", allow_interruptions=True) |
| 83 | + await session.start( |
| 84 | + room=ctx.room, |
| 85 | + agent=Assistant(), |
| 86 | + room_input_options=RoomInputOptions( |
| 87 | + # enable background voice & noise cancellation, powered by Krisp |
| 88 | + # included at no additional cost with LiveKit Cloud |
| 89 | + noise_cancellation=noise_cancellation.BVC(), |
| 90 | + ), |
| 91 | + ) |
81 | 92 |
|
82 | 93 |
|
83 | 94 | if __name__ == "__main__":
|
|
0 commit comments