telephony-agent:support[twilio]

2025-08-02 04:19:31 +03:00 · 2025-06-17 18:47:28 +05:30
parent ffed783583
commit 50dcb7c9b4
17 changed files with 790 additions and 20 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,56 @@
 # Environment variables
 .env
 .env.local
 .env.*.local
 # Python
 __pycache__/
 *.py[cod]
 *$py.class
 *.so
 .Python
 build/
 develop-eggs/
 dist/
 downloads/
 eggs/
 .eggs/
 lib/
 lib64/
 parts/
 sdist/
 var/
 wheels/
 *.egg-info/
 .installed.cfg
 *.egg
 # Virtual environments
 .venv/
 venv/
 ENV/
 env/
 # IDE
 .vscode/
 .idea/
 *.swp
 *.swo
 *~
 # OS
 .DS_Store
 .DS_Store?
 ._*
 .Spotlight-V100
 .Trashes
 ehthumbs.db
 Thumbs.db
 # Logs
 *.log
 logs/
 # Temporary files
 *.tmp
 *.temp 
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
 MIT License
 Copyright (c) 2025 VideoSDK Community
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all
 copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -1,12 +1,31 @@
-# VideoSDK AI Telephony Agent
+<div align="left">
 # AI Telephony Agent
 <div align="left" style="margin:0px 12px;">
 Make INBOUND and OUTBOUND calls with AI agents using VideoSDK. Supports multiple SIP providers and AI agents with a clean, extensible architecture for VoIP telephony solutions.
 </div>
 <div align="center">
 ![Architecture : Connecting Voice Agent to Telephony Agent](https://assets.videosdk.live/images/sip-telephony-agent.png)
 <a href="https://docs.videosdk.live/ai_agents/introduction" target="_blank"><img src="https://img.shields.io/badge/_Documentation-4285F4?style=for-the-badge" alt="Documentation"></a>
 <a href="https://www.youtube.com/playlist?list=PLrujdOR6BS_1fMqsHd9tynAg0foSRX5ti" target="_blank"><img src="https://img.shields.io/badge/_Tutorials-FF0000?style=for-the-badge&logo=youtube&logoColor=white" alt="Video Tutorials"></a>
 <a href="https://dub.sh/o59dJJB" target="_blank"><img src="https://img.shields.io/badge/_Get_Started-4285F4?style=for-the-badge" alt="Get Started"></a>
 <a href="https://discord.gg/f2WsNDN9S5" target="_blank"><img src="https://img.shields.io/badge/_Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord Community"></a>
 <a href="https://pypi.org/project/videosdk-agents/" target="_blank"><img src="https://img.shields.io/badge/_pip_install-3776AB?style=for-the-badge&logo=python&logoColor=white" alt="PyPI Package"></a>
 </div>
 </div>
 ## Installation
 ### Prerequisites
- Python 3.21+
+- Python 3.11+
 - VideoSDK account
 - Twilio account (SIP trunking provider)
 - Google API key (for Gemini AI)
@@ -47,7 +66,7 @@ TWILIO_NUMBER=your_twilio_number
 4. **Run the server**
 ```bash
-python server_modular.py
+python server.py
 ```
 The server will start on `http://localhost:8000`
@@ -90,7 +109,7 @@ POST /configure-provider?provider_name=twilio
 Switch SIP providers at runtime (currently supports: `twilio`).
-## 🔌 Adding New SIP Providers
+## Adding New SIP Providers
 The modular architecture makes it easy to add new SIP providers and SIP trunking services. Here's how to add a new provider:
@@ -163,7 +182,7 @@ class Config:
        # ... rest of validation
 ```
-## 🤖 Adding New AI Agents
+## Adding New AI Agents
 Similarly, you can add new AI agents for intelligent call handling:
@@ -342,19 +361,4 @@ For additional SIP providers, add their specific environment variables to `confi
 This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
 ## Support
 - **Issues**: [GitHub Issues](https://github.com/yourusername/ai-agent-telephony/issues)
 - **Documentation**: [Wiki](https://github.com/yourusername/ai-agent-telephony/wiki)
 - **Discussions**: [GitHub Discussions](https://github.com/yourusername/ai-agent-telephony/discussions)
 ## Acknowledgments
 - [VideoSDK](https://videosdk.live/) for the real-time communication platform
 - [Twilio](https://www.twilio.com/) for SIP trunking capabilities
 - [Google Gemini](https://ai.google.dev/) for AI agent capabilities
 - [FastAPI](https://fastapi.tiangolo.com/) for the web framework
 ---
 **Made with ❤️ for the developer community**
--- a/ai/init.py
+++ b/ai/init.py
@@ -0,0 +1,15 @@
 from .base_agent import AIAgent
 from .gemini_agent import GeminiAgent
 def get_ai_agent(agent_name: str = "gemini") -> AIAgent:
    """Factory function to get the appropriate AI agent."""
    agents = {
        "gemini": GeminiAgent,
    }
    if agent_name not in agents:
        raise ValueError(f"Unsupported AI agent: {agent_name}. Available agents: {list(agents.keys())}")
    return agents[agent_name]()
 __all__ = ["AIAgent", "GeminiAgent", "get_ai_agent"] 
--- a/ai/base_agent.py
+++ b/ai/base_agent.py
@@ -0,0 +1,21 @@
 from abc import ABC, abstractmethod
 from typing import Dict, Any, Optional
 from videosdk.agents import AgentSession, RealTimePipeline
 class AIAgent(ABC):
    """Base interface for AI agents."""
    @abstractmethod
    def create_pipeline(self) -> RealTimePipeline:
        """Create and return the AI pipeline."""
        pass
    @abstractmethod
    def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
        """Create and return an agent session."""
        pass
    @abstractmethod
    def get_agent_name(self) -> str:
        """Return the agent name."""
        pass 
--- a/ai/gemini_agent.py
+++ b/ai/gemini_agent.py
@@ -0,0 +1,45 @@
 from typing import Dict, Any
 from videosdk.agents import AgentSession, RealTimePipeline
 from videosdk.plugins.google import GeminiRealtime, GeminiLiveConfig
 from .base_agent import AIAgent
 from voice_agent import VoiceAgent
 from config import Config
 class GeminiAgent(AIAgent):
    """Gemini AI agent implementation."""
    def create_pipeline(self) -> RealTimePipeline:
        """Create and return the Gemini pipeline."""
        model = GeminiRealtime(
            model="gemini-2.0-flash-live-001",
            api_key=Config.GOOGLE_API_KEY,
            config=GeminiLiveConfig(
                voice="Leda",
                response_modalities=["AUDIO"],
            )
        )
        return RealTimePipeline(model=model)
    def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
        """Create and return a Gemini agent session."""
        pipeline = self.create_pipeline()
        # Context for the agent
        agent_context = {
            "name": "VideoSDK Gemini Agent",
            "meetingId": room_id,
            "videosdk_auth": Config.VIDEOSDK_AUTH_TOKEN,
            **context
        }
        session = AgentSession(
            agent=VoiceAgent(context=agent_context),
            pipeline=pipeline,
            context=agent_context
        )
        return session
    def get_agent_name(self) -> str:
        """Return the agent name."""
        return "gemini" 
--- a/config.py
+++ b/config.py
@@ -0,0 +1,52 @@
 import os
 import logging
 from typing import Dict, Any
 from dotenv import load_dotenv
 # Load environment variables
 load_dotenv()
 # Configure logging
 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
 logger = logging.getLogger(__name__)
 class Config:
    """Centralized configuration management."""
    # VideoSDK Configuration
    VIDEOSDK_AUTH_TOKEN = os.getenv("VIDEOSDK_AUTH_TOKEN")
    VIDEOSDK_SIP_USERNAME = os.getenv("VIDEOSDK_SIP_USERNAME")
    VIDEOSDK_SIP_PASSWORD = os.getenv("VIDEOSDK_SIP_PASSWORD")
    # AI Configuration
    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
    # Twilio Configuration
    TWILIO_ACCOUNT_SID = os.getenv("TWILIO_SID")
    TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN")
    TWILIO_NUMBER = os.getenv("TWILIO_NUMBER")
    @classmethod
    def validate(cls) -> None:
        """Validate that all required environment variables are set."""
        required_vars = {
            "VIDEOSDK_AUTH_TOKEN": cls.VIDEOSDK_AUTH_TOKEN,
            "VIDEOSDK_SIP_USERNAME": cls.VIDEOSDK_SIP_USERNAME,
            "VIDEOSDK_SIP_PASSWORD": cls.VIDEOSDK_SIP_PASSWORD,
            "GOOGLE_API_KEY": cls.GOOGLE_API_KEY,
            "TWILIO_SID": cls.TWILIO_ACCOUNT_SID,
            "TWILIO_AUTH_TOKEN": cls.TWILIO_AUTH_TOKEN,
            "TWILIO_NUMBER": cls.TWILIO_NUMBER,
        }
        missing_vars = [var_name for var_name, var_value in required_vars.items() if not var_value]
        if missing_vars:
            for var_name in missing_vars:
                logger.error(f"Error: Missing environment variable: {var_name}")
            raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
        logger.info("All required environment variables are set.")
 # Validate configuration on import
 Config.validate() 
--- a/models.py
+++ b/models.py
@@ -0,0 +1,20 @@
 from typing import Optional
 from pydantic import BaseModel
 class OutboundCallRequest(BaseModel):
    """Request model for initiating outbound calls."""
    to_number: str
    initial_greeting: Optional[str] = None
 class CallResponse(BaseModel):
    """Response model for call operations."""
    message: str
    twilio_call_sid: Optional[str] = None
    videosdk_room_id: Optional[str] = None
 class SessionInfo(BaseModel):
    """Model for session information."""
    room_id: str
    call_type: str
    agent_type: str
    status: str 
--- a/providers/init.py
+++ b/providers/init.py
@@ -0,0 +1,15 @@
 from .base import SIPProvider
 from .twilio_provider import TwilioProvider
 def get_provider(provider_name: str = "twilio") -> SIPProvider:
    """Factory function to get the appropriate SIP provider."""
    providers = {
        "twilio": TwilioProvider,
    }
    if provider_name not in providers:
        raise ValueError(f"Unsupported provider: {provider_name}. Available providers: {list(providers.keys())}")
    return providers[provider_name]()
 __all__ = ["SIPProvider", "TwilioProvider", "get_provider"] 
--- a/providers/base.py
+++ b/providers/base.py
@@ -0,0 +1,26 @@
 from abc import ABC, abstractmethod
 from typing import Dict, Any, Optional
 from twilio.twiml.voice_response import VoiceResponse
 class SIPProvider(ABC):
    """Base interface for SIP providers."""
    @abstractmethod
    def create_client(self) -> Any:
        """Create and return the provider's client instance."""
        pass
    @abstractmethod
    def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
        """Generate TwiML for connecting to SIP endpoint."""
        pass
    @abstractmethod
    def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
        """Initiate an outbound call using the provider."""
        pass
    @abstractmethod
    def get_provider_name(self) -> str:
        """Return the provider name."""
        pass 
--- a/providers/twilio_provider.py
+++ b/providers/twilio_provider.py
@@ -0,0 +1,44 @@
 from typing import Dict, Any
 from twilio.rest import Client as TwilioClient
 from twilio.twiml.voice_response import VoiceResponse, Dial
 from .base import SIPProvider
 from config import Config
 class TwilioProvider(SIPProvider):
    """Twilio SIP provider implementation."""
    def __init__(self):
        self.client = self.create_client()
    def create_client(self) -> TwilioClient:
        """Create and return Twilio client instance."""
        return TwilioClient(Config.TWILIO_ACCOUNT_SID, Config.TWILIO_AUTH_TOKEN)
    def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
        """Generate TwiML for connecting to SIP endpoint."""
        response = VoiceResponse()
        dial = Dial()
        dial.sip(
            sip_endpoint,
            username=Config.VIDEOSDK_SIP_USERNAME,
            password=Config.VIDEOSDK_SIP_PASSWORD,
        )
        response.append(dial)
        return str(response)
    def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
        """Initiate an outbound call using Twilio."""
        call = self.client.calls.create(
            to=to_number,
            from_=Config.TWILIO_NUMBER,
            twiml=twiml
        )
        return {
            "call_sid": call.sid,
            "status": call.status,
            "provider": "twilio"
        }
    def get_provider_name(self) -> str:
        """Return the provider name."""
        return "twilio" 
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,121 @@
 aiohappyeyeballs==2.6.1
 aiohttp==3.12.13
 aiohttp-retry==2.9.1
 aioice==0.10.1
 aiosignal==1.3.2
 annotated-types==0.7.0
 anyio==4.9.0
 attrs==25.3.0
 audioop-lts==0.2.1
 audioread==3.0.1
 av==13.1.0
 cachetools==5.5.2
 certifi==2025.6.15
 cffi==1.17.1
 charset-normalizer==3.4.2
 click==8.2.1
 cryptography==45.0.4
 decorator==5.2.1
 distro==1.9.0
 dnspython==2.7.0
 docstring-parser==0.16
 fastapi==0.115.12
 frozenlist==1.7.0
 google-api-core==2.25.1
 google-auth==2.40.3
 google-cloud-speech==2.33.0
 google-cloud-texttospeech==2.27.0
 google-crc32c==1.7.1
 google-genai==1.20.0
 googleapis-common-protos==1.70.0
 grpcio==1.73.0
 grpcio-status==1.73.0
 h11==0.16.0
 h264-profile-level-id==1.0.0
 httpcore==1.0.9
 httpx==0.28.1
 httpx-sse==0.4.0
 idna==3.10
 ifaddr==0.2.0
 jiter==0.10.0
 joblib==1.5.1
 lazy-loader==0.4
 librosa==0.11.0
 llvmlite==0.44.0
 markdown-it-py==3.0.0
 mcp==1.9.4
 mdurl==0.1.2
 msgpack==1.1.1
 multidict==6.4.4
 numba==0.61.2
 numpy==2.2.6
 openai==1.88.0
 packaging==25.0
 pillow==10.4.0
 platformdirs==4.3.8
 pooch==1.8.2
 propcache==0.3.2
 proto-plus==1.26.1
 protobuf==6.31.1
 pyasn1==0.6.1
 pyasn1-modules==0.4.2
 pycparser==2.22
 pycryptodome==3.20.0
 pydantic==2.11.7
 pydantic-core==2.33.2
 pydantic-settings==2.9.1
 pyee==11.1.0
 pygments==2.19.1
 pyjwt==2.10.1
 pylibsrtp==0.12.0
 pyopenssl==25.1.0
 python-dotenv==1.1.0
 python-multipart==0.0.20
 pyyaml==6.0.2
 requests==2.31.0
 rich==14.0.0
 rsa==4.9.1
 scikit-learn==1.7.0
 scipy==1.15.3
 sdp-transform==1.1.0
 sniffio==1.3.1
 soundfile==0.13.1
 soxr==0.5.0.post1
 sse-starlette==2.3.6
 standard-aifc==3.13.0
 standard-chunk==3.13.0
 standard-sunau==3.13.0
 starlette==0.46.2
 threadpoolctl==3.6.0
 tqdm==4.67.1
 twilio==9.6.3
 typing-extensions==4.14.0
 typing-inspection==0.4.1
 urllib3==2.4.0
 uvicorn==0.34.3
 videosdk==0.1.0
 videosdk-agents==0.0.14
 videosdk-plugins-google==0.0.6
 videosdk-plugins-openai==0.0.8
 vonage==4.4.3
 vonage-account==1.1.1
 vonage-application==2.0.1
 vonage-http-client==1.5.1
 vonage-jwt==1.1.5
 vonage-messages==1.4.0
 vonage-network-auth==1.0.2
 vonage-network-number-verification==1.0.2
 vonage-network-sim-swap==1.1.2
 vonage-number-insight==1.0.7
 vonage-numbers==1.0.4
 vonage-sms==1.1.6
 vonage-subaccounts==1.0.4
 vonage-users==1.2.1
 vonage-utils==1.1.4
 vonage-verify==2.1.0
 vonage-verify-legacy==1.0.1
 vonage-video==1.2.0
 vonage-voice==1.4.0
 vsaiortc==0.0.8
 websockets==15.0.1
 yarl==1.20.1
--- a/server.py
+++ b/server.py
@@ -0,0 +1,160 @@
 import logging
 from typing import Optional
 from fastapi import FastAPI, Request, Form, BackgroundTasks, HTTPException
 from fastapi.responses import PlainTextResponse
 # Import our modular components
 from config import Config
 from models import OutboundCallRequest, CallResponse, SessionInfo
 from providers import get_provider
 from services import VideoSDKService, SessionManager
 # Configure logging
 logger = logging.getLogger(__name__)
 # --- FastAPI App Initialization ---
 app = FastAPI(
    title="VideoSDK AI Agent Call Server (Modular)",
    description="Modular FastAPI server for inbound/outbound calls with VideoSDK AI Agent using different providers.",
    version="2.0.0"
 )
 # --- Initialize Services ---
 videosdk_service = VideoSDKService()
 session_manager = SessionManager()
 sip_provider = get_provider("twilio")  # Default to Twilio
 # --- FastAPI Endpoints ---
@app.get("/health", response_class=PlainTextResponse)
 async def health_check():
    """Health check endpoint."""
    active_sessions = session_manager.get_active_sessions_count()
    return f"Server is healthy. Active sessions: {active_sessions}"
@app.get("/sessions", response_class=PlainTextResponse)
 async def get_active_sessions():
    """Get information about active sessions."""
    session_info = session_manager.get_session_info()
    if not session_info:
        return "No active sessions"
    session_details = []
    for session in session_info:
        session_details.append(
            f"Room: {session['room_id']}, "
            f"Agent: {session['agent_type']}, "
            f"Status: {session['status']}"
        )
    return "\n".join(session_details)
@app.post("/inbound-call", response_class=PlainTextResponse)
 async def inbound_call(
    request: Request,
    background_tasks: BackgroundTasks,
    CallSid: str = Form(...),
    From: str = Form(...),
    To: str = Form(...),
 ):
    """
    Handles incoming calls from SIP provider.
    1. Creates a VideoSDK room.
    2. Creates an AI Agent session for the room.
    3. Starts the session in a background task.
    4. Generates TwiML to connect the call to the VideoSDK SIP endpoint.
    """
    logger.info(f"Inbound call received from {From} to {To}. CallSid: {CallSid}")
    try:
        # Create VideoSDK room
        room_id = await videosdk_service.create_room()
        # Create the AI agent session
        session = await session_manager.create_session(room_id, "inbound")
        # Start the session in a background task
        background_tasks.add_task(session_manager.run_session, session, room_id)
        # Generate TwiML to connect the call to VideoSDK's SIP gateway
        sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
        twiml = sip_provider.generate_twiml(sip_endpoint)
        logger.info(f"Responding to {sip_provider.get_provider_name()} inbound call {CallSid} with TwiML to dial SIP: {sip_endpoint}")
        return twiml
    except HTTPException as e:
        logger.error(f"Failed to handle inbound call {CallSid}: {e.detail}")
        return PlainTextResponse(f"<Response><Say>An error occurred: {e.detail}</Say></Response>", status_code=500)
    except Exception as e:
        logger.error(f"Unhandled error in inbound call {CallSid}: {e}", exc_info=True)
        return PlainTextResponse("<Response><Say>An unexpected error occurred. Please try again later.</Say></Response>", status_code=500)
@app.post("/outbound-call")
 async def outbound_call(request_body: OutboundCallRequest, background_tasks: BackgroundTasks):
    """
    Initiates an outbound call using SIP provider, connecting to an AI Agent in a VideoSDK room.
    """
    to_number = request_body.to_number
    initial_greeting = request_body.initial_greeting
    logger.info(f"Request to initiate outbound call to: {to_number}")
    if not to_number:
        raise HTTPException(status_code=400, detail="'to_number' is required.")
    try:
        # Create VideoSDK room
        room_id = await videosdk_service.create_room()
        # Create the AI agent session
        session = await session_manager.create_session(
            room_id, 
            "outbound", 
            initial_greeting
        )
        # Start the session in a background task
        background_tasks.add_task(session_manager.run_session, session, room_id)
        # Generate TwiML for connecting to SIP endpoint
        sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
        twiml = sip_provider.generate_twiml(sip_endpoint)
        logger.info(f"Outbound call SIP endpoint: {sip_endpoint}")
        # Create the outbound call via SIP provider
        call_result = sip_provider.initiate_outbound_call(to_number, twiml)
        logger.info(f"Outbound call initiated via {sip_provider.get_provider_name()} to {to_number}. "
                   f"Call SID: {call_result['call_sid']}. VideoSDK Room: {room_id}")
        return CallResponse(
            message="Outbound call initiated successfully",
            twilio_call_sid=call_result['call_sid'],
            videosdk_room_id=room_id
        )
    except HTTPException as e:
        logger.error(f"Failed to initiate outbound call to {to_number}: {e.detail}")
        raise e
    except Exception as e:
        logger.error(f"Unhandled error initiating outbound call to {to_number}: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=f"Failed to initiate outbound call: {e}")
 # --- Configuration Endpoints ---
@app.post("/configure-provider")
 async def configure_provider(provider_name: str):
    """Configure the SIP provider to use."""
    global sip_provider
    try:
        sip_provider = get_provider(provider_name)
        logger.info(f"SIP provider changed to: {provider_name}")
        return {"message": f"Provider changed to {provider_name}"}
    except ValueError as e:
        raise HTTPException(status_code=400, detail=str(e))
 if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=8000) 
--- a/services/init.py
+++ b/services/init.py
@@ -0,0 +1,4 @@
 from .videosdk_service import VideoSDKService
 from .session_manager import SessionManager
 __all__ = ["VideoSDKService", "SessionManager"] 
--- a/services/session_manager.py
+++ b/services/session_manager.py
@@ -0,0 +1,81 @@
 import logging
 import asyncio
 from typing import Dict, Any, Optional
 from videosdk.agents import AgentSession
 from ai import get_ai_agent
 from config import Config
 logger = logging.getLogger(__name__)
 class SessionManager:
    """Manages AI agent sessions."""
    def __init__(self):
        self.active_sessions: Dict[str, AgentSession] = {}
    async def create_session(
        self, 
        room_id: str, 
        call_type: str = "inbound",
        initial_greeting: Optional[str] = None,
        ai_agent_name: str = "gemini"
    ) -> AgentSession:
        """Create and store a new AI agent session."""
        logger.info(f"Creating AI agent session for {call_type} call in room: {room_id}")
        try:
            # Get the AI agent
            ai_agent = get_ai_agent(ai_agent_name)
            # Prepare context
            context = {
                "call_type": call_type,
            }
            if initial_greeting:
                context["initial_greeting"] = initial_greeting
            # Create session
            session = ai_agent.create_session(room_id, context)
            # Store the session
            self.active_sessions[room_id] = session
            logger.info(f"Session created for room {room_id} using {ai_agent.get_agent_name()}")
            return session
        except Exception as e:
            logger.error(f"Error creating AI agent session for room {room_id}: {e}", exc_info=True)
            raise
    async def run_session(self, session: AgentSession, room_id: str):
        """Run the agent session and keep it alive."""
        try:
            logger.info(f"Starting session for room {room_id}...")
            await session.start()
            logger.info(f"AI Agent session for room {room_id} has ended.")
        except Exception as session_error:
            logger.error(f"Session error for room {room_id}: {session_error}", exc_info=True)
        finally:
            # Clean up the session
            self.cleanup_session(room_id)
    def cleanup_session(self, room_id: str):
        """Clean up a session."""
        if room_id in self.active_sessions:
            del self.active_sessions[room_id]
            logger.info(f"Session cleaned up for room {room_id}")
    def get_active_sessions_count(self) -> int:
        """Get the number of active sessions."""
        return len(self.active_sessions)
    def get_session_info(self) -> Dict[str, Any]:
        """Get information about all active sessions."""
        session_info = []
        for room_id, session in self.active_sessions.items():
            session_info.append({
                "room_id": room_id,
                "agent_type": session.agent.__class__.__name__,
                "status": "active"
            })
        return session_info 
--- a/services/videosdk_service.py
+++ b/services/videosdk_service.py
@@ -0,0 +1,53 @@
 import logging
 import httpx
 from typing import Dict, Any
 from fastapi import HTTPException
 from config import Config
 logger = logging.getLogger(__name__)
 class VideoSDKService:
    """Service for managing VideoSDK rooms and operations."""
    def __init__(self):
        self.auth_token = Config.VIDEOSDK_AUTH_TOKEN
        self.base_url = "https://api.videosdk.live/v2"
    async def create_room(self, geo_fence: str = "us002") -> str:
        """Creates a new VideoSDK room and returns its ID."""
        url = f"{self.base_url}/rooms"
        headers = {
            "Content-Type": "application/json",
            "Authorization": self.auth_token
        }
        async with httpx.AsyncClient() as client:
            try:
                response = await client.post(url, headers=headers)
                response.raise_for_status()
                room_data = response.json()
                room_id = room_data.get("roomId")
                if not room_id:
                    raise ValueError("roomId not found in VideoSDK response.")
                logger.info(f"VideoSDK Room created: {room_id}")
                return room_id
            except httpx.HTTPStatusError as e:
                logger.error(f"HTTP error creating VideoSDK room: {e.response.status_code} - {e.response.text}")
                raise HTTPException(
                    status_code=500, 
                    detail=f"Failed to create VideoSDK room: HTTP error {e.response.status_code}"
                )
            except Exception as e:
                logger.error(f"Error creating VideoSDK room: {e}")
                raise HTTPException(
                    status_code=500, 
                    detail=f"Failed to create VideoSDK room: {e}"
                )
    def get_sip_endpoint(self, room_id: str) -> str:
        """Generate SIP endpoint for a room."""
        return f"sip:{room_id}@sip.videosdk.live" 
--- a/voice_agent.py
+++ b/voice_agent.py
@@ -0,0 +1,32 @@
 import logging
 from typing import Optional, List, Any
 from videosdk.agents import Agent
 logger = logging.getLogger(__name__)
 class VoiceAgent(Agent):
    """An outbound call agent specialized for medical appointment scheduling."""
    def __init__(
        self,
        instructions: str = "You are a medical appointment scheduling assistant. Your goal is to confirm upcoming appointments (5th June 2025 at 11:00 AM) and reschedule if needed.",
        tools: Optional[List[Any]] = None,
        context: Optional[dict] = None,
    ) -> None:
        """Initialize the AppointmentSchedulingAgent."""
        super().__init__(
            instructions=instructions,
            tools=tools or []
        )
        self.context = context or {}
        self.logger = logging.getLogger(__name__)
    async def on_enter(self) -> None:
        """Handle agent entry into the session."""
        self.logger.info("Agent entered the session.")
        initial_greeting = self.context.get("initial_greeting", "Hello, this is Neha, calling from City Medical Center regarding your upcoming appointment. Is this a good time to speak?")
        await self.session.say(initial_greeting)
    async def on_exit(self) -> None:
        """Handle call termination."""
        self.logger.info("Call ended")