telephony-agent:support[twilio]

This commit is contained in:
sosumit001
2025-06-17 18:47:28 +05:30
parent ffed783583
commit 50dcb7c9b4
17 changed files with 790 additions and 20 deletions

56
.gitignore vendored Normal file
View File

@@ -0,0 +1,56 @@
# Environment variables
.env
.env.local
.env.*.local
# Python
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
# Virtual environments
.venv/
venv/
ENV/
env/
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# OS
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# Logs
*.log
logs/
# Temporary files
*.tmp
*.temp

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2025 VideoSDK Community
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,12 +1,31 @@
# VideoSDK AI Telephony Agent
<div align="left">
# AI Telephony Agent
<div align="left" style="margin:0px 12px;">
Make INBOUND and OUTBOUND calls with AI agents using VideoSDK. Supports multiple SIP providers and AI agents with a clean, extensible architecture for VoIP telephony solutions.
</div>
<div align="center">
![Architecture : Connecting Voice Agent to Telephony Agent](https://assets.videosdk.live/images/sip-telephony-agent.png)
<a href="https://docs.videosdk.live/ai_agents/introduction" target="_blank"><img src="https://img.shields.io/badge/_Documentation-4285F4?style=for-the-badge" alt="Documentation"></a>
<a href="https://www.youtube.com/playlist?list=PLrujdOR6BS_1fMqsHd9tynAg0foSRX5ti" target="_blank"><img src="https://img.shields.io/badge/_Tutorials-FF0000?style=for-the-badge&logo=youtube&logoColor=white" alt="Video Tutorials"></a>
<a href="https://dub.sh/o59dJJB" target="_blank"><img src="https://img.shields.io/badge/_Get_Started-4285F4?style=for-the-badge" alt="Get Started"></a>
<a href="https://discord.gg/f2WsNDN9S5" target="_blank"><img src="https://img.shields.io/badge/_Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord Community"></a>
<a href="https://pypi.org/project/videosdk-agents/" target="_blank"><img src="https://img.shields.io/badge/_pip_install-3776AB?style=for-the-badge&logo=python&logoColor=white" alt="PyPI Package"></a>
</div>
</div>
## Installation
### Prerequisites
- Python 3.21+
- Python 3.11+
- VideoSDK account
- Twilio account (SIP trunking provider)
- Google API key (for Gemini AI)
@@ -47,7 +66,7 @@ TWILIO_NUMBER=your_twilio_number
4. **Run the server**
```bash
python server_modular.py
python server.py
```
The server will start on `http://localhost:8000`
@@ -90,7 +109,7 @@ POST /configure-provider?provider_name=twilio
Switch SIP providers at runtime (currently supports: `twilio`).
## 🔌 Adding New SIP Providers
## Adding New SIP Providers
The modular architecture makes it easy to add new SIP providers and SIP trunking services. Here's how to add a new provider:
@@ -163,7 +182,7 @@ class Config:
# ... rest of validation
```
## 🤖 Adding New AI Agents
## Adding New AI Agents
Similarly, you can add new AI agents for intelligent call handling:
@@ -342,19 +361,4 @@ For additional SIP providers, add their specific environment variables to `confi
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
## Support
- **Issues**: [GitHub Issues](https://github.com/yourusername/ai-agent-telephony/issues)
- **Documentation**: [Wiki](https://github.com/yourusername/ai-agent-telephony/wiki)
- **Discussions**: [GitHub Discussions](https://github.com/yourusername/ai-agent-telephony/discussions)
## Acknowledgments
- [VideoSDK](https://videosdk.live/) for the real-time communication platform
- [Twilio](https://www.twilio.com/) for SIP trunking capabilities
- [Google Gemini](https://ai.google.dev/) for AI agent capabilities
- [FastAPI](https://fastapi.tiangolo.com/) for the web framework
---
**Made with ❤️ for the developer community**

15
ai/__init__.py Normal file
View File

@@ -0,0 +1,15 @@
from .base_agent import AIAgent
from .gemini_agent import GeminiAgent
def get_ai_agent(agent_name: str = "gemini") -> AIAgent:
"""Factory function to get the appropriate AI agent."""
agents = {
"gemini": GeminiAgent,
}
if agent_name not in agents:
raise ValueError(f"Unsupported AI agent: {agent_name}. Available agents: {list(agents.keys())}")
return agents[agent_name]()
__all__ = ["AIAgent", "GeminiAgent", "get_ai_agent"]

21
ai/base_agent.py Normal file
View File

@@ -0,0 +1,21 @@
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional
from videosdk.agents import AgentSession, RealTimePipeline
class AIAgent(ABC):
"""Base interface for AI agents."""
@abstractmethod
def create_pipeline(self) -> RealTimePipeline:
"""Create and return the AI pipeline."""
pass
@abstractmethod
def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
"""Create and return an agent session."""
pass
@abstractmethod
def get_agent_name(self) -> str:
"""Return the agent name."""
pass

45
ai/gemini_agent.py Normal file
View File

@@ -0,0 +1,45 @@
from typing import Dict, Any
from videosdk.agents import AgentSession, RealTimePipeline
from videosdk.plugins.google import GeminiRealtime, GeminiLiveConfig
from .base_agent import AIAgent
from voice_agent import VoiceAgent
from config import Config
class GeminiAgent(AIAgent):
"""Gemini AI agent implementation."""
def create_pipeline(self) -> RealTimePipeline:
"""Create and return the Gemini pipeline."""
model = GeminiRealtime(
model="gemini-2.0-flash-live-001",
api_key=Config.GOOGLE_API_KEY,
config=GeminiLiveConfig(
voice="Leda",
response_modalities=["AUDIO"],
)
)
return RealTimePipeline(model=model)
def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
"""Create and return a Gemini agent session."""
pipeline = self.create_pipeline()
# Context for the agent
agent_context = {
"name": "VideoSDK Gemini Agent",
"meetingId": room_id,
"videosdk_auth": Config.VIDEOSDK_AUTH_TOKEN,
**context
}
session = AgentSession(
agent=VoiceAgent(context=agent_context),
pipeline=pipeline,
context=agent_context
)
return session
def get_agent_name(self) -> str:
"""Return the agent name."""
return "gemini"

52
config.py Normal file
View File

@@ -0,0 +1,52 @@
import os
import logging
from typing import Dict, Any
from dotenv import load_dotenv
# Load environment variables
load_dotenv()
# Configure logging
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
class Config:
"""Centralized configuration management."""
# VideoSDK Configuration
VIDEOSDK_AUTH_TOKEN = os.getenv("VIDEOSDK_AUTH_TOKEN")
VIDEOSDK_SIP_USERNAME = os.getenv("VIDEOSDK_SIP_USERNAME")
VIDEOSDK_SIP_PASSWORD = os.getenv("VIDEOSDK_SIP_PASSWORD")
# AI Configuration
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
# Twilio Configuration
TWILIO_ACCOUNT_SID = os.getenv("TWILIO_SID")
TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN")
TWILIO_NUMBER = os.getenv("TWILIO_NUMBER")
@classmethod
def validate(cls) -> None:
"""Validate that all required environment variables are set."""
required_vars = {
"VIDEOSDK_AUTH_TOKEN": cls.VIDEOSDK_AUTH_TOKEN,
"VIDEOSDK_SIP_USERNAME": cls.VIDEOSDK_SIP_USERNAME,
"VIDEOSDK_SIP_PASSWORD": cls.VIDEOSDK_SIP_PASSWORD,
"GOOGLE_API_KEY": cls.GOOGLE_API_KEY,
"TWILIO_SID": cls.TWILIO_ACCOUNT_SID,
"TWILIO_AUTH_TOKEN": cls.TWILIO_AUTH_TOKEN,
"TWILIO_NUMBER": cls.TWILIO_NUMBER,
}
missing_vars = [var_name for var_name, var_value in required_vars.items() if not var_value]
if missing_vars:
for var_name in missing_vars:
logger.error(f"Error: Missing environment variable: {var_name}")
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
logger.info("All required environment variables are set.")
# Validate configuration on import
Config.validate()

20
models.py Normal file
View File

@@ -0,0 +1,20 @@
from typing import Optional
from pydantic import BaseModel
class OutboundCallRequest(BaseModel):
"""Request model for initiating outbound calls."""
to_number: str
initial_greeting: Optional[str] = None
class CallResponse(BaseModel):
"""Response model for call operations."""
message: str
twilio_call_sid: Optional[str] = None
videosdk_room_id: Optional[str] = None
class SessionInfo(BaseModel):
"""Model for session information."""
room_id: str
call_type: str
agent_type: str
status: str

15
providers/__init__.py Normal file
View File

@@ -0,0 +1,15 @@
from .base import SIPProvider
from .twilio_provider import TwilioProvider
def get_provider(provider_name: str = "twilio") -> SIPProvider:
"""Factory function to get the appropriate SIP provider."""
providers = {
"twilio": TwilioProvider,
}
if provider_name not in providers:
raise ValueError(f"Unsupported provider: {provider_name}. Available providers: {list(providers.keys())}")
return providers[provider_name]()
__all__ = ["SIPProvider", "TwilioProvider", "get_provider"]

26
providers/base.py Normal file
View File

@@ -0,0 +1,26 @@
from abc import ABC, abstractmethod
from typing import Dict, Any, Optional
from twilio.twiml.voice_response import VoiceResponse
class SIPProvider(ABC):
"""Base interface for SIP providers."""
@abstractmethod
def create_client(self) -> Any:
"""Create and return the provider's client instance."""
pass
@abstractmethod
def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
"""Generate TwiML for connecting to SIP endpoint."""
pass
@abstractmethod
def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
"""Initiate an outbound call using the provider."""
pass
@abstractmethod
def get_provider_name(self) -> str:
"""Return the provider name."""
pass

View File

@@ -0,0 +1,44 @@
from typing import Dict, Any
from twilio.rest import Client as TwilioClient
from twilio.twiml.voice_response import VoiceResponse, Dial
from .base import SIPProvider
from config import Config
class TwilioProvider(SIPProvider):
"""Twilio SIP provider implementation."""
def __init__(self):
self.client = self.create_client()
def create_client(self) -> TwilioClient:
"""Create and return Twilio client instance."""
return TwilioClient(Config.TWILIO_ACCOUNT_SID, Config.TWILIO_AUTH_TOKEN)
def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
"""Generate TwiML for connecting to SIP endpoint."""
response = VoiceResponse()
dial = Dial()
dial.sip(
sip_endpoint,
username=Config.VIDEOSDK_SIP_USERNAME,
password=Config.VIDEOSDK_SIP_PASSWORD,
)
response.append(dial)
return str(response)
def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
"""Initiate an outbound call using Twilio."""
call = self.client.calls.create(
to=to_number,
from_=Config.TWILIO_NUMBER,
twiml=twiml
)
return {
"call_sid": call.sid,
"status": call.status,
"provider": "twilio"
}
def get_provider_name(self) -> str:
"""Return the provider name."""
return "twilio"

121
requirements.txt Normal file
View File

@@ -0,0 +1,121 @@
aiohappyeyeballs==2.6.1
aiohttp==3.12.13
aiohttp-retry==2.9.1
aioice==0.10.1
aiosignal==1.3.2
annotated-types==0.7.0
anyio==4.9.0
attrs==25.3.0
audioop-lts==0.2.1
audioread==3.0.1
av==13.1.0
cachetools==5.5.2
certifi==2025.6.15
cffi==1.17.1
charset-normalizer==3.4.2
click==8.2.1
cryptography==45.0.4
decorator==5.2.1
distro==1.9.0
dnspython==2.7.0
docstring-parser==0.16
fastapi==0.115.12
frozenlist==1.7.0
google-api-core==2.25.1
google-auth==2.40.3
google-cloud-speech==2.33.0
google-cloud-texttospeech==2.27.0
google-crc32c==1.7.1
google-genai==1.20.0
googleapis-common-protos==1.70.0
grpcio==1.73.0
grpcio-status==1.73.0
h11==0.16.0
h264-profile-level-id==1.0.0
httpcore==1.0.9
httpx==0.28.1
httpx-sse==0.4.0
idna==3.10
ifaddr==0.2.0
jiter==0.10.0
joblib==1.5.1
lazy-loader==0.4
librosa==0.11.0
llvmlite==0.44.0
markdown-it-py==3.0.0
mcp==1.9.4
mdurl==0.1.2
msgpack==1.1.1
multidict==6.4.4
numba==0.61.2
numpy==2.2.6
openai==1.88.0
packaging==25.0
pillow==10.4.0
platformdirs==4.3.8
pooch==1.8.2
propcache==0.3.2
proto-plus==1.26.1
protobuf==6.31.1
pyasn1==0.6.1
pyasn1-modules==0.4.2
pycparser==2.22
pycryptodome==3.20.0
pydantic==2.11.7
pydantic-core==2.33.2
pydantic-settings==2.9.1
pyee==11.1.0
pygments==2.19.1
pyjwt==2.10.1
pylibsrtp==0.12.0
pyopenssl==25.1.0
python-dotenv==1.1.0
python-multipart==0.0.20
pyyaml==6.0.2
requests==2.31.0
rich==14.0.0
rsa==4.9.1
scikit-learn==1.7.0
scipy==1.15.3
sdp-transform==1.1.0
sniffio==1.3.1
soundfile==0.13.1
soxr==0.5.0.post1
sse-starlette==2.3.6
standard-aifc==3.13.0
standard-chunk==3.13.0
standard-sunau==3.13.0
starlette==0.46.2
threadpoolctl==3.6.0
tqdm==4.67.1
twilio==9.6.3
typing-extensions==4.14.0
typing-inspection==0.4.1
urllib3==2.4.0
uvicorn==0.34.3
videosdk==0.1.0
videosdk-agents==0.0.14
videosdk-plugins-google==0.0.6
videosdk-plugins-openai==0.0.8
vonage==4.4.3
vonage-account==1.1.1
vonage-application==2.0.1
vonage-http-client==1.5.1
vonage-jwt==1.1.5
vonage-messages==1.4.0
vonage-network-auth==1.0.2
vonage-network-number-verification==1.0.2
vonage-network-sim-swap==1.1.2
vonage-number-insight==1.0.7
vonage-numbers==1.0.4
vonage-sms==1.1.6
vonage-subaccounts==1.0.4
vonage-users==1.2.1
vonage-utils==1.1.4
vonage-verify==2.1.0
vonage-verify-legacy==1.0.1
vonage-video==1.2.0
vonage-voice==1.4.0
vsaiortc==0.0.8
websockets==15.0.1
yarl==1.20.1

160
server.py Normal file
View File

@@ -0,0 +1,160 @@
import logging
from typing import Optional
from fastapi import FastAPI, Request, Form, BackgroundTasks, HTTPException
from fastapi.responses import PlainTextResponse
# Import our modular components
from config import Config
from models import OutboundCallRequest, CallResponse, SessionInfo
from providers import get_provider
from services import VideoSDKService, SessionManager
# Configure logging
logger = logging.getLogger(__name__)
# --- FastAPI App Initialization ---
app = FastAPI(
title="VideoSDK AI Agent Call Server (Modular)",
description="Modular FastAPI server for inbound/outbound calls with VideoSDK AI Agent using different providers.",
version="2.0.0"
)
# --- Initialize Services ---
videosdk_service = VideoSDKService()
session_manager = SessionManager()
sip_provider = get_provider("twilio") # Default to Twilio
# --- FastAPI Endpoints ---
@app.get("/health", response_class=PlainTextResponse)
async def health_check():
"""Health check endpoint."""
active_sessions = session_manager.get_active_sessions_count()
return f"Server is healthy. Active sessions: {active_sessions}"
@app.get("/sessions", response_class=PlainTextResponse)
async def get_active_sessions():
"""Get information about active sessions."""
session_info = session_manager.get_session_info()
if not session_info:
return "No active sessions"
session_details = []
for session in session_info:
session_details.append(
f"Room: {session['room_id']}, "
f"Agent: {session['agent_type']}, "
f"Status: {session['status']}"
)
return "\n".join(session_details)
@app.post("/inbound-call", response_class=PlainTextResponse)
async def inbound_call(
request: Request,
background_tasks: BackgroundTasks,
CallSid: str = Form(...),
From: str = Form(...),
To: str = Form(...),
):
"""
Handles incoming calls from SIP provider.
1. Creates a VideoSDK room.
2. Creates an AI Agent session for the room.
3. Starts the session in a background task.
4. Generates TwiML to connect the call to the VideoSDK SIP endpoint.
"""
logger.info(f"Inbound call received from {From} to {To}. CallSid: {CallSid}")
try:
# Create VideoSDK room
room_id = await videosdk_service.create_room()
# Create the AI agent session
session = await session_manager.create_session(room_id, "inbound")
# Start the session in a background task
background_tasks.add_task(session_manager.run_session, session, room_id)
# Generate TwiML to connect the call to VideoSDK's SIP gateway
sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
twiml = sip_provider.generate_twiml(sip_endpoint)
logger.info(f"Responding to {sip_provider.get_provider_name()} inbound call {CallSid} with TwiML to dial SIP: {sip_endpoint}")
return twiml
except HTTPException as e:
logger.error(f"Failed to handle inbound call {CallSid}: {e.detail}")
return PlainTextResponse(f"<Response><Say>An error occurred: {e.detail}</Say></Response>", status_code=500)
except Exception as e:
logger.error(f"Unhandled error in inbound call {CallSid}: {e}", exc_info=True)
return PlainTextResponse("<Response><Say>An unexpected error occurred. Please try again later.</Say></Response>", status_code=500)
@app.post("/outbound-call")
async def outbound_call(request_body: OutboundCallRequest, background_tasks: BackgroundTasks):
"""
Initiates an outbound call using SIP provider, connecting to an AI Agent in a VideoSDK room.
"""
to_number = request_body.to_number
initial_greeting = request_body.initial_greeting
logger.info(f"Request to initiate outbound call to: {to_number}")
if not to_number:
raise HTTPException(status_code=400, detail="'to_number' is required.")
try:
# Create VideoSDK room
room_id = await videosdk_service.create_room()
# Create the AI agent session
session = await session_manager.create_session(
room_id,
"outbound",
initial_greeting
)
# Start the session in a background task
background_tasks.add_task(session_manager.run_session, session, room_id)
# Generate TwiML for connecting to SIP endpoint
sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
twiml = sip_provider.generate_twiml(sip_endpoint)
logger.info(f"Outbound call SIP endpoint: {sip_endpoint}")
# Create the outbound call via SIP provider
call_result = sip_provider.initiate_outbound_call(to_number, twiml)
logger.info(f"Outbound call initiated via {sip_provider.get_provider_name()} to {to_number}. "
f"Call SID: {call_result['call_sid']}. VideoSDK Room: {room_id}")
return CallResponse(
message="Outbound call initiated successfully",
twilio_call_sid=call_result['call_sid'],
videosdk_room_id=room_id
)
except HTTPException as e:
logger.error(f"Failed to initiate outbound call to {to_number}: {e.detail}")
raise e
except Exception as e:
logger.error(f"Unhandled error initiating outbound call to {to_number}: {e}", exc_info=True)
raise HTTPException(status_code=500, detail=f"Failed to initiate outbound call: {e}")
# --- Configuration Endpoints ---
@app.post("/configure-provider")
async def configure_provider(provider_name: str):
"""Configure the SIP provider to use."""
global sip_provider
try:
sip_provider = get_provider(provider_name)
logger.info(f"SIP provider changed to: {provider_name}")
return {"message": f"Provider changed to {provider_name}"}
except ValueError as e:
raise HTTPException(status_code=400, detail=str(e))
if __name__ == "__main__":
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)

4
services/__init__.py Normal file
View File

@@ -0,0 +1,4 @@
from .videosdk_service import VideoSDKService
from .session_manager import SessionManager
__all__ = ["VideoSDKService", "SessionManager"]

View File

@@ -0,0 +1,81 @@
import logging
import asyncio
from typing import Dict, Any, Optional
from videosdk.agents import AgentSession
from ai import get_ai_agent
from config import Config
logger = logging.getLogger(__name__)
class SessionManager:
"""Manages AI agent sessions."""
def __init__(self):
self.active_sessions: Dict[str, AgentSession] = {}
async def create_session(
self,
room_id: str,
call_type: str = "inbound",
initial_greeting: Optional[str] = None,
ai_agent_name: str = "gemini"
) -> AgentSession:
"""Create and store a new AI agent session."""
logger.info(f"Creating AI agent session for {call_type} call in room: {room_id}")
try:
# Get the AI agent
ai_agent = get_ai_agent(ai_agent_name)
# Prepare context
context = {
"call_type": call_type,
}
if initial_greeting:
context["initial_greeting"] = initial_greeting
# Create session
session = ai_agent.create_session(room_id, context)
# Store the session
self.active_sessions[room_id] = session
logger.info(f"Session created for room {room_id} using {ai_agent.get_agent_name()}")
return session
except Exception as e:
logger.error(f"Error creating AI agent session for room {room_id}: {e}", exc_info=True)
raise
async def run_session(self, session: AgentSession, room_id: str):
"""Run the agent session and keep it alive."""
try:
logger.info(f"Starting session for room {room_id}...")
await session.start()
logger.info(f"AI Agent session for room {room_id} has ended.")
except Exception as session_error:
logger.error(f"Session error for room {room_id}: {session_error}", exc_info=True)
finally:
# Clean up the session
self.cleanup_session(room_id)
def cleanup_session(self, room_id: str):
"""Clean up a session."""
if room_id in self.active_sessions:
del self.active_sessions[room_id]
logger.info(f"Session cleaned up for room {room_id}")
def get_active_sessions_count(self) -> int:
"""Get the number of active sessions."""
return len(self.active_sessions)
def get_session_info(self) -> Dict[str, Any]:
"""Get information about all active sessions."""
session_info = []
for room_id, session in self.active_sessions.items():
session_info.append({
"room_id": room_id,
"agent_type": session.agent.__class__.__name__,
"status": "active"
})
return session_info

View File

@@ -0,0 +1,53 @@
import logging
import httpx
from typing import Dict, Any
from fastapi import HTTPException
from config import Config
logger = logging.getLogger(__name__)
class VideoSDKService:
"""Service for managing VideoSDK rooms and operations."""
def __init__(self):
self.auth_token = Config.VIDEOSDK_AUTH_TOKEN
self.base_url = "https://api.videosdk.live/v2"
async def create_room(self, geo_fence: str = "us002") -> str:
"""Creates a new VideoSDK room and returns its ID."""
url = f"{self.base_url}/rooms"
headers = {
"Content-Type": "application/json",
"Authorization": self.auth_token
}
async with httpx.AsyncClient() as client:
try:
response = await client.post(url, headers=headers)
response.raise_for_status()
room_data = response.json()
room_id = room_data.get("roomId")
if not room_id:
raise ValueError("roomId not found in VideoSDK response.")
logger.info(f"VideoSDK Room created: {room_id}")
return room_id
except httpx.HTTPStatusError as e:
logger.error(f"HTTP error creating VideoSDK room: {e.response.status_code} - {e.response.text}")
raise HTTPException(
status_code=500,
detail=f"Failed to create VideoSDK room: HTTP error {e.response.status_code}"
)
except Exception as e:
logger.error(f"Error creating VideoSDK room: {e}")
raise HTTPException(
status_code=500,
detail=f"Failed to create VideoSDK room: {e}"
)
def get_sip_endpoint(self, room_id: str) -> str:
"""Generate SIP endpoint for a room."""
return f"sip:{room_id}@sip.videosdk.live"

32
voice_agent.py Normal file
View File

@@ -0,0 +1,32 @@
import logging
from typing import Optional, List, Any
from videosdk.agents import Agent
logger = logging.getLogger(__name__)
class VoiceAgent(Agent):
"""An outbound call agent specialized for medical appointment scheduling."""
def __init__(
self,
instructions: str = "You are a medical appointment scheduling assistant. Your goal is to confirm upcoming appointments (5th June 2025 at 11:00 AM) and reschedule if needed.",
tools: Optional[List[Any]] = None,
context: Optional[dict] = None,
) -> None:
"""Initialize the AppointmentSchedulingAgent."""
super().__init__(
instructions=instructions,
tools=tools or []
)
self.context = context or {}
self.logger = logging.getLogger(__name__)
async def on_enter(self) -> None:
"""Handle agent entry into the session."""
self.logger.info("Agent entered the session.")
initial_greeting = self.context.get("initial_greeting", "Hello, this is Neha, calling from City Medical Center regarding your upcoming appointment. Is this a good time to speak?")
await self.session.say(initial_greeting)
async def on_exit(self) -> None:
"""Handle call termination."""
self.logger.info("Call ended")