diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f94585c --- /dev/null +++ b/.gitignore @@ -0,0 +1,56 @@ +# Environment variables +.env +.env.local +.env.*.local + +# Python +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg + +# Virtual environments +.venv/ +venv/ +ENV/ +env/ + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Logs +*.log +logs/ + +# Temporary files +*.tmp +*.temp \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..a8a2540 --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2025 VideoSDK Community + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/README.md b/README.md index a2f13ab..4763b21 100644 --- a/README.md +++ b/README.md @@ -1,12 +1,31 @@ -# VideoSDK AI Telephony Agent +
+ +# AI Telephony Agent + +
Make INBOUND and OUTBOUND calls with AI agents using VideoSDK. Supports multiple SIP providers and AI agents with a clean, extensible architecture for VoIP telephony solutions. +
+
+ +![Architecture : Connecting Voice Agent to Telephony Agent](https://assets.videosdk.live/images/sip-telephony-agent.png) + +Documentation +Video Tutorials +Get Started +Discord Community +PyPI Package + +
+ +
+ ## Installation ### Prerequisites -- Python 3.21+ +- Python 3.11+ - VideoSDK account - Twilio account (SIP trunking provider) - Google API key (for Gemini AI) @@ -47,7 +66,7 @@ TWILIO_NUMBER=your_twilio_number 4. **Run the server** ```bash -python server_modular.py +python server.py ``` The server will start on `http://localhost:8000` @@ -90,7 +109,7 @@ POST /configure-provider?provider_name=twilio Switch SIP providers at runtime (currently supports: `twilio`). -## 🔌 Adding New SIP Providers +## Adding New SIP Providers The modular architecture makes it easy to add new SIP providers and SIP trunking services. Here's how to add a new provider: @@ -163,7 +182,7 @@ class Config: # ... rest of validation ``` -## 🤖 Adding New AI Agents +## Adding New AI Agents Similarly, you can add new AI agents for intelligent call handling: @@ -342,19 +361,4 @@ For additional SIP providers, add their specific environment variables to `confi This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. -## Support - -- **Issues**: [GitHub Issues](https://github.com/yourusername/ai-agent-telephony/issues) -- **Documentation**: [Wiki](https://github.com/yourusername/ai-agent-telephony/wiki) -- **Discussions**: [GitHub Discussions](https://github.com/yourusername/ai-agent-telephony/discussions) - -## Acknowledgments - -- [VideoSDK](https://videosdk.live/) for the real-time communication platform -- [Twilio](https://www.twilio.com/) for SIP trunking capabilities -- [Google Gemini](https://ai.google.dev/) for AI agent capabilities -- [FastAPI](https://fastapi.tiangolo.com/) for the web framework - ---- - **Made with ❤️ for the developer community** diff --git a/ai/__init__.py b/ai/__init__.py new file mode 100644 index 0000000..1414312 --- /dev/null +++ b/ai/__init__.py @@ -0,0 +1,15 @@ +from .base_agent import AIAgent +from .gemini_agent import GeminiAgent + +def get_ai_agent(agent_name: str = "gemini") -> AIAgent: + """Factory function to get the appropriate AI agent.""" + agents = { + "gemini": GeminiAgent, + } + + if agent_name not in agents: + raise ValueError(f"Unsupported AI agent: {agent_name}. Available agents: {list(agents.keys())}") + + return agents[agent_name]() + +__all__ = ["AIAgent", "GeminiAgent", "get_ai_agent"] \ No newline at end of file diff --git a/ai/base_agent.py b/ai/base_agent.py new file mode 100644 index 0000000..93ffb54 --- /dev/null +++ b/ai/base_agent.py @@ -0,0 +1,21 @@ +from abc import ABC, abstractmethod +from typing import Dict, Any, Optional +from videosdk.agents import AgentSession, RealTimePipeline + +class AIAgent(ABC): + """Base interface for AI agents.""" + + @abstractmethod + def create_pipeline(self) -> RealTimePipeline: + """Create and return the AI pipeline.""" + pass + + @abstractmethod + def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession: + """Create and return an agent session.""" + pass + + @abstractmethod + def get_agent_name(self) -> str: + """Return the agent name.""" + pass \ No newline at end of file diff --git a/ai/gemini_agent.py b/ai/gemini_agent.py new file mode 100644 index 0000000..9af046e --- /dev/null +++ b/ai/gemini_agent.py @@ -0,0 +1,45 @@ +from typing import Dict, Any +from videosdk.agents import AgentSession, RealTimePipeline +from videosdk.plugins.google import GeminiRealtime, GeminiLiveConfig +from .base_agent import AIAgent +from voice_agent import VoiceAgent +from config import Config + +class GeminiAgent(AIAgent): + """Gemini AI agent implementation.""" + + def create_pipeline(self) -> RealTimePipeline: + """Create and return the Gemini pipeline.""" + model = GeminiRealtime( + model="gemini-2.0-flash-live-001", + api_key=Config.GOOGLE_API_KEY, + config=GeminiLiveConfig( + voice="Leda", + response_modalities=["AUDIO"], + ) + ) + return RealTimePipeline(model=model) + + def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession: + """Create and return a Gemini agent session.""" + pipeline = self.create_pipeline() + + # Context for the agent + agent_context = { + "name": "VideoSDK Gemini Agent", + "meetingId": room_id, + "videosdk_auth": Config.VIDEOSDK_AUTH_TOKEN, + **context + } + + session = AgentSession( + agent=VoiceAgent(context=agent_context), + pipeline=pipeline, + context=agent_context + ) + + return session + + def get_agent_name(self) -> str: + """Return the agent name.""" + return "gemini" \ No newline at end of file diff --git a/config.py b/config.py new file mode 100644 index 0000000..82a4594 --- /dev/null +++ b/config.py @@ -0,0 +1,52 @@ +import os +import logging +from typing import Dict, Any +from dotenv import load_dotenv + +# Load environment variables +load_dotenv() + +# Configure logging +logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') +logger = logging.getLogger(__name__) + +class Config: + """Centralized configuration management.""" + + # VideoSDK Configuration + VIDEOSDK_AUTH_TOKEN = os.getenv("VIDEOSDK_AUTH_TOKEN") + VIDEOSDK_SIP_USERNAME = os.getenv("VIDEOSDK_SIP_USERNAME") + VIDEOSDK_SIP_PASSWORD = os.getenv("VIDEOSDK_SIP_PASSWORD") + + # AI Configuration + GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY") + + # Twilio Configuration + TWILIO_ACCOUNT_SID = os.getenv("TWILIO_SID") + TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN") + TWILIO_NUMBER = os.getenv("TWILIO_NUMBER") + + @classmethod + def validate(cls) -> None: + """Validate that all required environment variables are set.""" + required_vars = { + "VIDEOSDK_AUTH_TOKEN": cls.VIDEOSDK_AUTH_TOKEN, + "VIDEOSDK_SIP_USERNAME": cls.VIDEOSDK_SIP_USERNAME, + "VIDEOSDK_SIP_PASSWORD": cls.VIDEOSDK_SIP_PASSWORD, + "GOOGLE_API_KEY": cls.GOOGLE_API_KEY, + "TWILIO_SID": cls.TWILIO_ACCOUNT_SID, + "TWILIO_AUTH_TOKEN": cls.TWILIO_AUTH_TOKEN, + "TWILIO_NUMBER": cls.TWILIO_NUMBER, + } + + missing_vars = [var_name for var_name, var_value in required_vars.items() if not var_value] + + if missing_vars: + for var_name in missing_vars: + logger.error(f"Error: Missing environment variable: {var_name}") + raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}") + + logger.info("All required environment variables are set.") + +# Validate configuration on import +Config.validate() \ No newline at end of file diff --git a/models.py b/models.py new file mode 100644 index 0000000..33b7213 --- /dev/null +++ b/models.py @@ -0,0 +1,20 @@ +from typing import Optional +from pydantic import BaseModel + +class OutboundCallRequest(BaseModel): + """Request model for initiating outbound calls.""" + to_number: str + initial_greeting: Optional[str] = None + +class CallResponse(BaseModel): + """Response model for call operations.""" + message: str + twilio_call_sid: Optional[str] = None + videosdk_room_id: Optional[str] = None + +class SessionInfo(BaseModel): + """Model for session information.""" + room_id: str + call_type: str + agent_type: str + status: str \ No newline at end of file diff --git a/providers/__init__.py b/providers/__init__.py new file mode 100644 index 0000000..4ef910c --- /dev/null +++ b/providers/__init__.py @@ -0,0 +1,15 @@ +from .base import SIPProvider +from .twilio_provider import TwilioProvider + +def get_provider(provider_name: str = "twilio") -> SIPProvider: + """Factory function to get the appropriate SIP provider.""" + providers = { + "twilio": TwilioProvider, + } + + if provider_name not in providers: + raise ValueError(f"Unsupported provider: {provider_name}. Available providers: {list(providers.keys())}") + + return providers[provider_name]() + +__all__ = ["SIPProvider", "TwilioProvider", "get_provider"] \ No newline at end of file diff --git a/providers/base.py b/providers/base.py new file mode 100644 index 0000000..e8bf566 --- /dev/null +++ b/providers/base.py @@ -0,0 +1,26 @@ +from abc import ABC, abstractmethod +from typing import Dict, Any, Optional +from twilio.twiml.voice_response import VoiceResponse + +class SIPProvider(ABC): + """Base interface for SIP providers.""" + + @abstractmethod + def create_client(self) -> Any: + """Create and return the provider's client instance.""" + pass + + @abstractmethod + def generate_twiml(self, sip_endpoint: str, **kwargs) -> str: + """Generate TwiML for connecting to SIP endpoint.""" + pass + + @abstractmethod + def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]: + """Initiate an outbound call using the provider.""" + pass + + @abstractmethod + def get_provider_name(self) -> str: + """Return the provider name.""" + pass \ No newline at end of file diff --git a/providers/twilio_provider.py b/providers/twilio_provider.py new file mode 100644 index 0000000..1a1ce3b --- /dev/null +++ b/providers/twilio_provider.py @@ -0,0 +1,44 @@ +from typing import Dict, Any +from twilio.rest import Client as TwilioClient +from twilio.twiml.voice_response import VoiceResponse, Dial +from .base import SIPProvider +from config import Config + +class TwilioProvider(SIPProvider): + """Twilio SIP provider implementation.""" + + def __init__(self): + self.client = self.create_client() + + def create_client(self) -> TwilioClient: + """Create and return Twilio client instance.""" + return TwilioClient(Config.TWILIO_ACCOUNT_SID, Config.TWILIO_AUTH_TOKEN) + + def generate_twiml(self, sip_endpoint: str, **kwargs) -> str: + """Generate TwiML for connecting to SIP endpoint.""" + response = VoiceResponse() + dial = Dial() + dial.sip( + sip_endpoint, + username=Config.VIDEOSDK_SIP_USERNAME, + password=Config.VIDEOSDK_SIP_PASSWORD, + ) + response.append(dial) + return str(response) + + def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]: + """Initiate an outbound call using Twilio.""" + call = self.client.calls.create( + to=to_number, + from_=Config.TWILIO_NUMBER, + twiml=twiml + ) + return { + "call_sid": call.sid, + "status": call.status, + "provider": "twilio" + } + + def get_provider_name(self) -> str: + """Return the provider name.""" + return "twilio" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..1fe1148 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,121 @@ +aiohappyeyeballs==2.6.1 +aiohttp==3.12.13 +aiohttp-retry==2.9.1 +aioice==0.10.1 +aiosignal==1.3.2 +annotated-types==0.7.0 +anyio==4.9.0 +attrs==25.3.0 +audioop-lts==0.2.1 +audioread==3.0.1 +av==13.1.0 +cachetools==5.5.2 +certifi==2025.6.15 +cffi==1.17.1 +charset-normalizer==3.4.2 +click==8.2.1 +cryptography==45.0.4 +decorator==5.2.1 +distro==1.9.0 +dnspython==2.7.0 +docstring-parser==0.16 +fastapi==0.115.12 +frozenlist==1.7.0 +google-api-core==2.25.1 +google-auth==2.40.3 +google-cloud-speech==2.33.0 +google-cloud-texttospeech==2.27.0 +google-crc32c==1.7.1 +google-genai==1.20.0 +googleapis-common-protos==1.70.0 +grpcio==1.73.0 +grpcio-status==1.73.0 +h11==0.16.0 +h264-profile-level-id==1.0.0 +httpcore==1.0.9 +httpx==0.28.1 +httpx-sse==0.4.0 +idna==3.10 +ifaddr==0.2.0 +jiter==0.10.0 +joblib==1.5.1 +lazy-loader==0.4 +librosa==0.11.0 +llvmlite==0.44.0 +markdown-it-py==3.0.0 +mcp==1.9.4 +mdurl==0.1.2 +msgpack==1.1.1 +multidict==6.4.4 +numba==0.61.2 +numpy==2.2.6 +openai==1.88.0 +packaging==25.0 +pillow==10.4.0 +platformdirs==4.3.8 +pooch==1.8.2 +propcache==0.3.2 +proto-plus==1.26.1 +protobuf==6.31.1 +pyasn1==0.6.1 +pyasn1-modules==0.4.2 +pycparser==2.22 +pycryptodome==3.20.0 +pydantic==2.11.7 +pydantic-core==2.33.2 +pydantic-settings==2.9.1 +pyee==11.1.0 +pygments==2.19.1 +pyjwt==2.10.1 +pylibsrtp==0.12.0 +pyopenssl==25.1.0 +python-dotenv==1.1.0 +python-multipart==0.0.20 +pyyaml==6.0.2 +requests==2.31.0 +rich==14.0.0 +rsa==4.9.1 +scikit-learn==1.7.0 +scipy==1.15.3 +sdp-transform==1.1.0 +sniffio==1.3.1 +soundfile==0.13.1 +soxr==0.5.0.post1 +sse-starlette==2.3.6 +standard-aifc==3.13.0 +standard-chunk==3.13.0 +standard-sunau==3.13.0 +starlette==0.46.2 +threadpoolctl==3.6.0 +tqdm==4.67.1 +twilio==9.6.3 +typing-extensions==4.14.0 +typing-inspection==0.4.1 +urllib3==2.4.0 +uvicorn==0.34.3 +videosdk==0.1.0 +videosdk-agents==0.0.14 +videosdk-plugins-google==0.0.6 +videosdk-plugins-openai==0.0.8 +vonage==4.4.3 +vonage-account==1.1.1 +vonage-application==2.0.1 +vonage-http-client==1.5.1 +vonage-jwt==1.1.5 +vonage-messages==1.4.0 +vonage-network-auth==1.0.2 +vonage-network-number-verification==1.0.2 +vonage-network-sim-swap==1.1.2 +vonage-number-insight==1.0.7 +vonage-numbers==1.0.4 +vonage-sms==1.1.6 +vonage-subaccounts==1.0.4 +vonage-users==1.2.1 +vonage-utils==1.1.4 +vonage-verify==2.1.0 +vonage-verify-legacy==1.0.1 +vonage-video==1.2.0 +vonage-voice==1.4.0 +vsaiortc==0.0.8 +websockets==15.0.1 +yarl==1.20.1 diff --git a/server.py b/server.py new file mode 100644 index 0000000..9257ddc --- /dev/null +++ b/server.py @@ -0,0 +1,160 @@ +import logging +from typing import Optional +from fastapi import FastAPI, Request, Form, BackgroundTasks, HTTPException +from fastapi.responses import PlainTextResponse + +# Import our modular components +from config import Config +from models import OutboundCallRequest, CallResponse, SessionInfo +from providers import get_provider +from services import VideoSDKService, SessionManager + +# Configure logging +logger = logging.getLogger(__name__) + +# --- FastAPI App Initialization --- +app = FastAPI( + title="VideoSDK AI Agent Call Server (Modular)", + description="Modular FastAPI server for inbound/outbound calls with VideoSDK AI Agent using different providers.", + version="2.0.0" +) + +# --- Initialize Services --- +videosdk_service = VideoSDKService() +session_manager = SessionManager() +sip_provider = get_provider("twilio") # Default to Twilio + +# --- FastAPI Endpoints --- + +@app.get("/health", response_class=PlainTextResponse) +async def health_check(): + """Health check endpoint.""" + active_sessions = session_manager.get_active_sessions_count() + return f"Server is healthy. Active sessions: {active_sessions}" + +@app.get("/sessions", response_class=PlainTextResponse) +async def get_active_sessions(): + """Get information about active sessions.""" + session_info = session_manager.get_session_info() + + if not session_info: + return "No active sessions" + + session_details = [] + for session in session_info: + session_details.append( + f"Room: {session['room_id']}, " + f"Agent: {session['agent_type']}, " + f"Status: {session['status']}" + ) + + return "\n".join(session_details) + +@app.post("/inbound-call", response_class=PlainTextResponse) +async def inbound_call( + request: Request, + background_tasks: BackgroundTasks, + CallSid: str = Form(...), + From: str = Form(...), + To: str = Form(...), +): + """ + Handles incoming calls from SIP provider. + 1. Creates a VideoSDK room. + 2. Creates an AI Agent session for the room. + 3. Starts the session in a background task. + 4. Generates TwiML to connect the call to the VideoSDK SIP endpoint. + """ + logger.info(f"Inbound call received from {From} to {To}. CallSid: {CallSid}") + + try: + # Create VideoSDK room + room_id = await videosdk_service.create_room() + + # Create the AI agent session + session = await session_manager.create_session(room_id, "inbound") + + # Start the session in a background task + background_tasks.add_task(session_manager.run_session, session, room_id) + + # Generate TwiML to connect the call to VideoSDK's SIP gateway + sip_endpoint = videosdk_service.get_sip_endpoint(room_id) + twiml = sip_provider.generate_twiml(sip_endpoint) + + logger.info(f"Responding to {sip_provider.get_provider_name()} inbound call {CallSid} with TwiML to dial SIP: {sip_endpoint}") + return twiml + + except HTTPException as e: + logger.error(f"Failed to handle inbound call {CallSid}: {e.detail}") + return PlainTextResponse(f"An error occurred: {e.detail}", status_code=500) + except Exception as e: + logger.error(f"Unhandled error in inbound call {CallSid}: {e}", exc_info=True) + return PlainTextResponse("An unexpected error occurred. Please try again later.", status_code=500) + +@app.post("/outbound-call") +async def outbound_call(request_body: OutboundCallRequest, background_tasks: BackgroundTasks): + """ + Initiates an outbound call using SIP provider, connecting to an AI Agent in a VideoSDK room. + """ + to_number = request_body.to_number + initial_greeting = request_body.initial_greeting + logger.info(f"Request to initiate outbound call to: {to_number}") + + if not to_number: + raise HTTPException(status_code=400, detail="'to_number' is required.") + + try: + # Create VideoSDK room + room_id = await videosdk_service.create_room() + + # Create the AI agent session + session = await session_manager.create_session( + room_id, + "outbound", + initial_greeting + ) + + # Start the session in a background task + background_tasks.add_task(session_manager.run_session, session, room_id) + + # Generate TwiML for connecting to SIP endpoint + sip_endpoint = videosdk_service.get_sip_endpoint(room_id) + twiml = sip_provider.generate_twiml(sip_endpoint) + + logger.info(f"Outbound call SIP endpoint: {sip_endpoint}") + + # Create the outbound call via SIP provider + call_result = sip_provider.initiate_outbound_call(to_number, twiml) + + logger.info(f"Outbound call initiated via {sip_provider.get_provider_name()} to {to_number}. " + f"Call SID: {call_result['call_sid']}. VideoSDK Room: {room_id}") + + return CallResponse( + message="Outbound call initiated successfully", + twilio_call_sid=call_result['call_sid'], + videosdk_room_id=room_id + ) + + except HTTPException as e: + logger.error(f"Failed to initiate outbound call to {to_number}: {e.detail}") + raise e + except Exception as e: + logger.error(f"Unhandled error initiating outbound call to {to_number}: {e}", exc_info=True) + raise HTTPException(status_code=500, detail=f"Failed to initiate outbound call: {e}") + +# --- Configuration Endpoints --- + +@app.post("/configure-provider") +async def configure_provider(provider_name: str): + """Configure the SIP provider to use.""" + global sip_provider + try: + sip_provider = get_provider(provider_name) + logger.info(f"SIP provider changed to: {provider_name}") + return {"message": f"Provider changed to {provider_name}"} + except ValueError as e: + raise HTTPException(status_code=400, detail=str(e)) + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) \ No newline at end of file diff --git a/services/__init__.py b/services/__init__.py new file mode 100644 index 0000000..c0429e8 --- /dev/null +++ b/services/__init__.py @@ -0,0 +1,4 @@ +from .videosdk_service import VideoSDKService +from .session_manager import SessionManager + +__all__ = ["VideoSDKService", "SessionManager"] \ No newline at end of file diff --git a/services/session_manager.py b/services/session_manager.py new file mode 100644 index 0000000..8ad0d01 --- /dev/null +++ b/services/session_manager.py @@ -0,0 +1,81 @@ +import logging +import asyncio +from typing import Dict, Any, Optional +from videosdk.agents import AgentSession +from ai import get_ai_agent +from config import Config + +logger = logging.getLogger(__name__) + +class SessionManager: + """Manages AI agent sessions.""" + + def __init__(self): + self.active_sessions: Dict[str, AgentSession] = {} + + async def create_session( + self, + room_id: str, + call_type: str = "inbound", + initial_greeting: Optional[str] = None, + ai_agent_name: str = "gemini" + ) -> AgentSession: + """Create and store a new AI agent session.""" + logger.info(f"Creating AI agent session for {call_type} call in room: {room_id}") + + try: + # Get the AI agent + ai_agent = get_ai_agent(ai_agent_name) + + # Prepare context + context = { + "call_type": call_type, + } + if initial_greeting: + context["initial_greeting"] = initial_greeting + + # Create session + session = ai_agent.create_session(room_id, context) + + # Store the session + self.active_sessions[room_id] = session + + logger.info(f"Session created for room {room_id} using {ai_agent.get_agent_name()}") + return session + + except Exception as e: + logger.error(f"Error creating AI agent session for room {room_id}: {e}", exc_info=True) + raise + + async def run_session(self, session: AgentSession, room_id: str): + """Run the agent session and keep it alive.""" + try: + logger.info(f"Starting session for room {room_id}...") + await session.start() + logger.info(f"AI Agent session for room {room_id} has ended.") + except Exception as session_error: + logger.error(f"Session error for room {room_id}: {session_error}", exc_info=True) + finally: + # Clean up the session + self.cleanup_session(room_id) + + def cleanup_session(self, room_id: str): + """Clean up a session.""" + if room_id in self.active_sessions: + del self.active_sessions[room_id] + logger.info(f"Session cleaned up for room {room_id}") + + def get_active_sessions_count(self) -> int: + """Get the number of active sessions.""" + return len(self.active_sessions) + + def get_session_info(self) -> Dict[str, Any]: + """Get information about all active sessions.""" + session_info = [] + for room_id, session in self.active_sessions.items(): + session_info.append({ + "room_id": room_id, + "agent_type": session.agent.__class__.__name__, + "status": "active" + }) + return session_info \ No newline at end of file diff --git a/services/videosdk_service.py b/services/videosdk_service.py new file mode 100644 index 0000000..24cff3d --- /dev/null +++ b/services/videosdk_service.py @@ -0,0 +1,53 @@ +import logging +import httpx +from typing import Dict, Any +from fastapi import HTTPException +from config import Config + +logger = logging.getLogger(__name__) + +class VideoSDKService: + """Service for managing VideoSDK rooms and operations.""" + + def __init__(self): + self.auth_token = Config.VIDEOSDK_AUTH_TOKEN + self.base_url = "https://api.videosdk.live/v2" + + async def create_room(self, geo_fence: str = "us002") -> str: + """Creates a new VideoSDK room and returns its ID.""" + url = f"{self.base_url}/rooms" + headers = { + "Content-Type": "application/json", + "Authorization": self.auth_token + } + + + async with httpx.AsyncClient() as client: + try: + response = await client.post(url, headers=headers) + response.raise_for_status() + room_data = response.json() + + room_id = room_data.get("roomId") + if not room_id: + raise ValueError("roomId not found in VideoSDK response.") + + logger.info(f"VideoSDK Room created: {room_id}") + return room_id + + except httpx.HTTPStatusError as e: + logger.error(f"HTTP error creating VideoSDK room: {e.response.status_code} - {e.response.text}") + raise HTTPException( + status_code=500, + detail=f"Failed to create VideoSDK room: HTTP error {e.response.status_code}" + ) + except Exception as e: + logger.error(f"Error creating VideoSDK room: {e}") + raise HTTPException( + status_code=500, + detail=f"Failed to create VideoSDK room: {e}" + ) + + def get_sip_endpoint(self, room_id: str) -> str: + """Generate SIP endpoint for a room.""" + return f"sip:{room_id}@sip.videosdk.live" \ No newline at end of file diff --git a/voice_agent.py b/voice_agent.py new file mode 100644 index 0000000..5be07c5 --- /dev/null +++ b/voice_agent.py @@ -0,0 +1,32 @@ +import logging +from typing import Optional, List, Any +from videosdk.agents import Agent + +logger = logging.getLogger(__name__) + +class VoiceAgent(Agent): + """An outbound call agent specialized for medical appointment scheduling.""" + + def __init__( + self, + instructions: str = "You are a medical appointment scheduling assistant. Your goal is to confirm upcoming appointments (5th June 2025 at 11:00 AM) and reschedule if needed.", + tools: Optional[List[Any]] = None, + context: Optional[dict] = None, + ) -> None: + """Initialize the AppointmentSchedulingAgent.""" + super().__init__( + instructions=instructions, + tools=tools or [] + ) + self.context = context or {} + self.logger = logging.getLogger(__name__) + + async def on_enter(self) -> None: + """Handle agent entry into the session.""" + self.logger.info("Agent entered the session.") + initial_greeting = self.context.get("initial_greeting", "Hello, this is Neha, calling from City Medical Center regarding your upcoming appointment. Is this a good time to speak?") + await self.session.say(initial_greeting) + + async def on_exit(self) -> None: + """Handle call termination.""" + self.logger.info("Call ended") \ No newline at end of file