telephony-agent:support[twilio]

2025-08-02 04:19:31 +03:00 · 2025-06-17 18:47:28 +05:30
parent ffed783583
commit 50dcb7c9b4
17 changed files with 790 additions and 20 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,56 @@
+# Environment variables
+.env
+.env.local
+.env.*.local
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Logs
+*.log
+logs/
+
+# Temporary files
+*.tmp
+*.temp 
--- a/21
+++ b/21
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 VideoSDK Community
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
--- a/README.md
+++ b/README.md
@@ -1,12 +1,31 @@
-# VideoSDK AI Telephony Agent
+<div align="left">
+
+# AI Telephony Agent
+
+<div align="left" style="margin:0px 12px;">

 Make INBOUND and OUTBOUND calls with AI agents using VideoSDK. Supports multiple SIP providers and AI agents with a clean, extensible architecture for VoIP telephony solutions.

+</div>
+<div align="center">
+
+![Architecture : Connecting Voice Agent to Telephony Agent](https://assets.videosdk.live/images/sip-telephony-agent.png)
+
+<a href="https://docs.videosdk.live/ai_agents/introduction" target="_blank"><img src="https://img.shields.io/badge/_Documentation-4285F4?style=for-the-badge" alt="Documentation"></a>
+<a href="https://www.youtube.com/playlist?list=PLrujdOR6BS_1fMqsHd9tynAg0foSRX5ti" target="_blank"><img src="https://img.shields.io/badge/_Tutorials-FF0000?style=for-the-badge&logo=youtube&logoColor=white" alt="Video Tutorials"></a>
+<a href="https://dub.sh/o59dJJB" target="_blank"><img src="https://img.shields.io/badge/_Get_Started-4285F4?style=for-the-badge" alt="Get Started"></a>
+<a href="https://discord.gg/f2WsNDN9S5" target="_blank"><img src="https://img.shields.io/badge/_Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord Community"></a>
+<a href="https://pypi.org/project/videosdk-agents/" target="_blank"><img src="https://img.shields.io/badge/_pip_install-3776AB?style=for-the-badge&logo=python&logoColor=white" alt="PyPI Package"></a>
+
+</div>
+
+</div>
+
 ## Installation

 ### Prerequisites

- Python 3.21+
+- Python 3.11+
 - VideoSDK account
 - Twilio account (SIP trunking provider)
 - Google API key (for Gemini AI)
@@ -47,7 +66,7 @@ TWILIO_NUMBER=your_twilio_number
 4. **Run the server**

 ```bash
-python server_modular.py
+python server.py
 ```

 The server will start on `http://localhost:8000`
@@ -90,7 +109,7 @@ POST /configure-provider?provider_name=twilio

 Switch SIP providers at runtime (currently supports: `twilio`).

-## 🔌 Adding New SIP Providers
+## Adding New SIP Providers

 The modular architecture makes it easy to add new SIP providers and SIP trunking services. Here's how to add a new provider:

@@ -163,7 +182,7 @@ class Config:
        # ... rest of validation
 ```

-## 🤖 Adding New AI Agents
+## Adding New AI Agents

 Similarly, you can add new AI agents for intelligent call handling:

@@ -342,19 +361,4 @@ For additional SIP providers, add their specific environment variables to `confi

 This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.

-## Support
-
- **Issues**: [GitHub Issues](https://github.com/yourusername/ai-agent-telephony/issues)
- **Documentation**: [Wiki](https://github.com/yourusername/ai-agent-telephony/wiki)
- **Discussions**: [GitHub Discussions](https://github.com/yourusername/ai-agent-telephony/discussions)
-
-## Acknowledgments
-
- [VideoSDK](https://videosdk.live/) for the real-time communication platform
- [Twilio](https://www.twilio.com/) for SIP trunking capabilities
- [Google Gemini](https://ai.google.dev/) for AI agent capabilities
- [FastAPI](https://fastapi.tiangolo.com/) for the web framework
-
---
-
 **Made with ❤️ for the developer community**
--- a/ai/init.py
+++ b/ai/init.py
@@ -0,0 +1,15 @@
+from .base_agent import AIAgent
+from .gemini_agent import GeminiAgent
+
+def get_ai_agent(agent_name: str = "gemini") -> AIAgent:
+    """Factory function to get the appropriate AI agent."""
+    agents = {
+        "gemini": GeminiAgent,
+    }
+    
+    if agent_name not in agents:
+        raise ValueError(f"Unsupported AI agent: {agent_name}. Available agents: {list(agents.keys())}")
+    
+    return agents[agent_name]()
+
+__all__ = ["AIAgent", "GeminiAgent", "get_ai_agent"] 
--- a/ai/base_agent.py
+++ b/ai/base_agent.py
@@ -0,0 +1,21 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Optional
+from videosdk.agents import AgentSession, RealTimePipeline
+
+class AIAgent(ABC):
+    """Base interface for AI agents."""
+    
+    @abstractmethod
+    def create_pipeline(self) -> RealTimePipeline:
+        """Create and return the AI pipeline."""
+        pass
+    
+    @abstractmethod
+    def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
+        """Create and return an agent session."""
+        pass
+    
+    @abstractmethod
+    def get_agent_name(self) -> str:
+        """Return the agent name."""
+        pass 
--- a/ai/gemini_agent.py
+++ b/ai/gemini_agent.py
@@ -0,0 +1,45 @@
+from typing import Dict, Any
+from videosdk.agents import AgentSession, RealTimePipeline
+from videosdk.plugins.google import GeminiRealtime, GeminiLiveConfig
+from .base_agent import AIAgent
+from voice_agent import VoiceAgent
+from config import Config
+
+class GeminiAgent(AIAgent):
+    """Gemini AI agent implementation."""
+    
+    def create_pipeline(self) -> RealTimePipeline:
+        """Create and return the Gemini pipeline."""
+        model = GeminiRealtime(
+            model="gemini-2.0-flash-live-001",
+            api_key=Config.GOOGLE_API_KEY,
+            config=GeminiLiveConfig(
+                voice="Leda",
+                response_modalities=["AUDIO"],
+            )
+        )
+        return RealTimePipeline(model=model)
+    
+    def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
+        """Create and return a Gemini agent session."""
+        pipeline = self.create_pipeline()
+        
+        # Context for the agent
+        agent_context = {
+            "name": "VideoSDK Gemini Agent",
+            "meetingId": room_id,
+            "videosdk_auth": Config.VIDEOSDK_AUTH_TOKEN,
+            **context
+        }
+        
+        session = AgentSession(
+            agent=VoiceAgent(context=agent_context),
+            pipeline=pipeline,
+            context=agent_context
+        )
+        
+        return session
+    
+    def get_agent_name(self) -> str:
+        """Return the agent name."""
+        return "gemini" 
--- a/config.py
+++ b/config.py
@@ -0,0 +1,52 @@
+import os
+import logging
+from typing import Dict, Any
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+class Config:
+    """Centralized configuration management."""
+    
+    # VideoSDK Configuration
+    VIDEOSDK_AUTH_TOKEN = os.getenv("VIDEOSDK_AUTH_TOKEN")
+    VIDEOSDK_SIP_USERNAME = os.getenv("VIDEOSDK_SIP_USERNAME")
+    VIDEOSDK_SIP_PASSWORD = os.getenv("VIDEOSDK_SIP_PASSWORD")
+    
+    # AI Configuration
+    GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+    
+    # Twilio Configuration
+    TWILIO_ACCOUNT_SID = os.getenv("TWILIO_SID")
+    TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN")
+    TWILIO_NUMBER = os.getenv("TWILIO_NUMBER")
+    
+    @classmethod
+    def validate(cls) -> None:
+        """Validate that all required environment variables are set."""
+        required_vars = {
+            "VIDEOSDK_AUTH_TOKEN": cls.VIDEOSDK_AUTH_TOKEN,
+            "VIDEOSDK_SIP_USERNAME": cls.VIDEOSDK_SIP_USERNAME,
+            "VIDEOSDK_SIP_PASSWORD": cls.VIDEOSDK_SIP_PASSWORD,
+            "GOOGLE_API_KEY": cls.GOOGLE_API_KEY,
+            "TWILIO_SID": cls.TWILIO_ACCOUNT_SID,
+            "TWILIO_AUTH_TOKEN": cls.TWILIO_AUTH_TOKEN,
+            "TWILIO_NUMBER": cls.TWILIO_NUMBER,
+        }
+        
+        missing_vars = [var_name for var_name, var_value in required_vars.items() if not var_value]
+        
+        if missing_vars:
+            for var_name in missing_vars:
+                logger.error(f"Error: Missing environment variable: {var_name}")
+            raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
+        
+        logger.info("All required environment variables are set.")
+
+# Validate configuration on import
+Config.validate() 
--- a/models.py
+++ b/models.py
@@ -0,0 +1,20 @@
+from typing import Optional
+from pydantic import BaseModel
+
+class OutboundCallRequest(BaseModel):
+    """Request model for initiating outbound calls."""
+    to_number: str
+    initial_greeting: Optional[str] = None
+
+class CallResponse(BaseModel):
+    """Response model for call operations."""
+    message: str
+    twilio_call_sid: Optional[str] = None
+    videosdk_room_id: Optional[str] = None
+
+class SessionInfo(BaseModel):
+    """Model for session information."""
+    room_id: str
+    call_type: str
+    agent_type: str
+    status: str 
--- a/providers/init.py
+++ b/providers/init.py
@@ -0,0 +1,15 @@
+from .base import SIPProvider
+from .twilio_provider import TwilioProvider
+
+def get_provider(provider_name: str = "twilio") -> SIPProvider:
+    """Factory function to get the appropriate SIP provider."""
+    providers = {
+        "twilio": TwilioProvider,
+    }
+    
+    if provider_name not in providers:
+        raise ValueError(f"Unsupported provider: {provider_name}. Available providers: {list(providers.keys())}")
+    
+    return providers[provider_name]()
+
+__all__ = ["SIPProvider", "TwilioProvider", "get_provider"] 
--- a/providers/base.py
+++ b/providers/base.py
@@ -0,0 +1,26 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Optional
+from twilio.twiml.voice_response import VoiceResponse
+
+class SIPProvider(ABC):
+    """Base interface for SIP providers."""
+    
+    @abstractmethod
+    def create_client(self) -> Any:
+        """Create and return the provider's client instance."""
+        pass
+    
+    @abstractmethod
+    def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
+        """Generate TwiML for connecting to SIP endpoint."""
+        pass
+    
+    @abstractmethod
+    def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
+        """Initiate an outbound call using the provider."""
+        pass
+    
+    @abstractmethod
+    def get_provider_name(self) -> str:
+        """Return the provider name."""
+        pass 
--- a/providers/twilio_provider.py
+++ b/providers/twilio_provider.py
@@ -0,0 +1,44 @@
+from typing import Dict, Any
+from twilio.rest import Client as TwilioClient
+from twilio.twiml.voice_response import VoiceResponse, Dial
+from .base import SIPProvider
+from config import Config
+
+class TwilioProvider(SIPProvider):
+    """Twilio SIP provider implementation."""
+    
+    def __init__(self):
+        self.client = self.create_client()
+    
+    def create_client(self) -> TwilioClient:
+        """Create and return Twilio client instance."""
+        return TwilioClient(Config.TWILIO_ACCOUNT_SID, Config.TWILIO_AUTH_TOKEN)
+    
+    def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
+        """Generate TwiML for connecting to SIP endpoint."""
+        response = VoiceResponse()
+        dial = Dial()
+        dial.sip(
+            sip_endpoint,
+            username=Config.VIDEOSDK_SIP_USERNAME,
+            password=Config.VIDEOSDK_SIP_PASSWORD,
+        )
+        response.append(dial)
+        return str(response)
+    
+    def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
+        """Initiate an outbound call using Twilio."""
+        call = self.client.calls.create(
+            to=to_number,
+            from_=Config.TWILIO_NUMBER,
+            twiml=twiml
+        )
+        return {
+            "call_sid": call.sid,
+            "status": call.status,
+            "provider": "twilio"
+        }
+    
+    def get_provider_name(self) -> str:
+        """Return the provider name."""
+        return "twilio" 
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,121 @@
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.13
+aiohttp-retry==2.9.1
+aioice==0.10.1
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+audioop-lts==0.2.1
+audioread==3.0.1
+av==13.1.0
+cachetools==5.5.2
+certifi==2025.6.15
+cffi==1.17.1
+charset-normalizer==3.4.2
+click==8.2.1
+cryptography==45.0.4
+decorator==5.2.1
+distro==1.9.0
+dnspython==2.7.0
+docstring-parser==0.16
+fastapi==0.115.12
+frozenlist==1.7.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-cloud-speech==2.33.0
+google-cloud-texttospeech==2.27.0
+google-crc32c==1.7.1
+google-genai==1.20.0
+googleapis-common-protos==1.70.0
+grpcio==1.73.0
+grpcio-status==1.73.0
+h11==0.16.0
+h264-profile-level-id==1.0.0
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.0
+idna==3.10
+ifaddr==0.2.0
+jiter==0.10.0
+joblib==1.5.1
+lazy-loader==0.4
+librosa==0.11.0
+llvmlite==0.44.0
+markdown-it-py==3.0.0
+mcp==1.9.4
+mdurl==0.1.2
+msgpack==1.1.1
+multidict==6.4.4
+numba==0.61.2
+numpy==2.2.6
+openai==1.88.0
+packaging==25.0
+pillow==10.4.0
+platformdirs==4.3.8
+pooch==1.8.2
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==6.31.1
+pyasn1==0.6.1
+pyasn1-modules==0.4.2
+pycparser==2.22
+pycryptodome==3.20.0
+pydantic==2.11.7
+pydantic-core==2.33.2
+pydantic-settings==2.9.1
+pyee==11.1.0
+pygments==2.19.1
+pyjwt==2.10.1
+pylibsrtp==0.12.0
+pyopenssl==25.1.0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pyyaml==6.0.2
+requests==2.31.0
+rich==14.0.0
+rsa==4.9.1
+scikit-learn==1.7.0
+scipy==1.15.3
+sdp-transform==1.1.0
+sniffio==1.3.1
+soundfile==0.13.1
+soxr==0.5.0.post1
+sse-starlette==2.3.6
+standard-aifc==3.13.0
+standard-chunk==3.13.0
+standard-sunau==3.13.0
+starlette==0.46.2
+threadpoolctl==3.6.0
+tqdm==4.67.1
+twilio==9.6.3
+typing-extensions==4.14.0
+typing-inspection==0.4.1
+urllib3==2.4.0
+uvicorn==0.34.3
+videosdk==0.1.0
+videosdk-agents==0.0.14
+videosdk-plugins-google==0.0.6
+videosdk-plugins-openai==0.0.8
+vonage==4.4.3
+vonage-account==1.1.1
+vonage-application==2.0.1
+vonage-http-client==1.5.1
+vonage-jwt==1.1.5
+vonage-messages==1.4.0
+vonage-network-auth==1.0.2
+vonage-network-number-verification==1.0.2
+vonage-network-sim-swap==1.1.2
+vonage-number-insight==1.0.7
+vonage-numbers==1.0.4
+vonage-sms==1.1.6
+vonage-subaccounts==1.0.4
+vonage-users==1.2.1
+vonage-utils==1.1.4
+vonage-verify==2.1.0
+vonage-verify-legacy==1.0.1
+vonage-video==1.2.0
+vonage-voice==1.4.0
+vsaiortc==0.0.8
+websockets==15.0.1
+yarl==1.20.1
--- a/server.py
+++ b/server.py
@@ -0,0 +1,160 @@
+import logging
+from typing import Optional
+from fastapi import FastAPI, Request, Form, BackgroundTasks, HTTPException
+from fastapi.responses import PlainTextResponse
+
+# Import our modular components
+from config import Config
+from models import OutboundCallRequest, CallResponse, SessionInfo
+from providers import get_provider
+from services import VideoSDKService, SessionManager
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+# --- FastAPI App Initialization ---
+app = FastAPI(
+    title="VideoSDK AI Agent Call Server (Modular)",
+    description="Modular FastAPI server for inbound/outbound calls with VideoSDK AI Agent using different providers.",
+    version="2.0.0"
+)
+
+# --- Initialize Services ---
+videosdk_service = VideoSDKService()
+session_manager = SessionManager()
+sip_provider = get_provider("twilio")  # Default to Twilio
+
+# --- FastAPI Endpoints ---
+
+@app.get("/health", response_class=PlainTextResponse)
+async def health_check():
+    """Health check endpoint."""
+    active_sessions = session_manager.get_active_sessions_count()
+    return f"Server is healthy. Active sessions: {active_sessions}"
+
+@app.get("/sessions", response_class=PlainTextResponse)
+async def get_active_sessions():
+    """Get information about active sessions."""
+    session_info = session_manager.get_session_info()
+    
+    if not session_info:
+        return "No active sessions"
+    
+    session_details = []
+    for session in session_info:
+        session_details.append(
+            f"Room: {session['room_id']}, "
+            f"Agent: {session['agent_type']}, "
+            f"Status: {session['status']}"
+        )
+    
+    return "\n".join(session_details)
+
+@app.post("/inbound-call", response_class=PlainTextResponse)
+async def inbound_call(
+    request: Request,
+    background_tasks: BackgroundTasks,
+    CallSid: str = Form(...),
+    From: str = Form(...),
+    To: str = Form(...),
+):
+    """
+    Handles incoming calls from SIP provider.
+    1. Creates a VideoSDK room.
+    2. Creates an AI Agent session for the room.
+    3. Starts the session in a background task.
+    4. Generates TwiML to connect the call to the VideoSDK SIP endpoint.
+    """
+    logger.info(f"Inbound call received from {From} to {To}. CallSid: {CallSid}")
+
+    try:
+        # Create VideoSDK room
+        room_id = await videosdk_service.create_room()
+
+        # Create the AI agent session
+        session = await session_manager.create_session(room_id, "inbound")
+        
+        # Start the session in a background task
+        background_tasks.add_task(session_manager.run_session, session, room_id)
+
+        # Generate TwiML to connect the call to VideoSDK's SIP gateway
+        sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
+        twiml = sip_provider.generate_twiml(sip_endpoint)
+
+        logger.info(f"Responding to {sip_provider.get_provider_name()} inbound call {CallSid} with TwiML to dial SIP: {sip_endpoint}")
+        return twiml
+
+    except HTTPException as e:
+        logger.error(f"Failed to handle inbound call {CallSid}: {e.detail}")
+        return PlainTextResponse(f"<Response><Say>An error occurred: {e.detail}</Say></Response>", status_code=500)
+    except Exception as e:
+        logger.error(f"Unhandled error in inbound call {CallSid}: {e}", exc_info=True)
+        return PlainTextResponse("<Response><Say>An unexpected error occurred. Please try again later.</Say></Response>", status_code=500)
+
+@app.post("/outbound-call")
+async def outbound_call(request_body: OutboundCallRequest, background_tasks: BackgroundTasks):
+    """
+    Initiates an outbound call using SIP provider, connecting to an AI Agent in a VideoSDK room.
+    """
+    to_number = request_body.to_number
+    initial_greeting = request_body.initial_greeting
+    logger.info(f"Request to initiate outbound call to: {to_number}")
+
+    if not to_number:
+        raise HTTPException(status_code=400, detail="'to_number' is required.")
+
+    try:
+        # Create VideoSDK room
+        room_id = await videosdk_service.create_room()
+
+        # Create the AI agent session
+        session = await session_manager.create_session(
+            room_id, 
+            "outbound", 
+            initial_greeting
+        )
+        
+        # Start the session in a background task
+        background_tasks.add_task(session_manager.run_session, session, room_id)
+
+        # Generate TwiML for connecting to SIP endpoint
+        sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
+        twiml = sip_provider.generate_twiml(sip_endpoint)
+
+        logger.info(f"Outbound call SIP endpoint: {sip_endpoint}")
+
+        # Create the outbound call via SIP provider
+        call_result = sip_provider.initiate_outbound_call(to_number, twiml)
+
+        logger.info(f"Outbound call initiated via {sip_provider.get_provider_name()} to {to_number}. "
+                   f"Call SID: {call_result['call_sid']}. VideoSDK Room: {room_id}")
+        
+        return CallResponse(
+            message="Outbound call initiated successfully",
+            twilio_call_sid=call_result['call_sid'],
+            videosdk_room_id=room_id
+        )
+
+    except HTTPException as e:
+        logger.error(f"Failed to initiate outbound call to {to_number}: {e.detail}")
+        raise e
+    except Exception as e:
+        logger.error(f"Unhandled error initiating outbound call to {to_number}: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail=f"Failed to initiate outbound call: {e}")
+
+# --- Configuration Endpoints ---
+
+@app.post("/configure-provider")
+async def configure_provider(provider_name: str):
+    """Configure the SIP provider to use."""
+    global sip_provider
+    try:
+        sip_provider = get_provider(provider_name)
+        logger.info(f"SIP provider changed to: {provider_name}")
+        return {"message": f"Provider changed to {provider_name}"}
+    except ValueError as e:
+        raise HTTPException(status_code=400, detail=str(e))
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000) 
--- a/services/init.py
+++ b/services/init.py
@@ -0,0 +1,4 @@
+from .videosdk_service import VideoSDKService
+from .session_manager import SessionManager
+
+__all__ = ["VideoSDKService", "SessionManager"] 
--- a/services/session_manager.py
+++ b/services/session_manager.py
@@ -0,0 +1,81 @@
+import logging
+import asyncio
+from typing import Dict, Any, Optional
+from videosdk.agents import AgentSession
+from ai import get_ai_agent
+from config import Config
+
+logger = logging.getLogger(__name__)
+
+class SessionManager:
+    """Manages AI agent sessions."""
+    
+    def __init__(self):
+        self.active_sessions: Dict[str, AgentSession] = {}
+    
+    async def create_session(
+        self, 
+        room_id: str, 
+        call_type: str = "inbound",
+        initial_greeting: Optional[str] = None,
+        ai_agent_name: str = "gemini"
+    ) -> AgentSession:
+        """Create and store a new AI agent session."""
+        logger.info(f"Creating AI agent session for {call_type} call in room: {room_id}")
+        
+        try:
+            # Get the AI agent
+            ai_agent = get_ai_agent(ai_agent_name)
+            
+            # Prepare context
+            context = {
+                "call_type": call_type,
+            }
+            if initial_greeting:
+                context["initial_greeting"] = initial_greeting
+            
+            # Create session
+            session = ai_agent.create_session(room_id, context)
+            
+            # Store the session
+            self.active_sessions[room_id] = session
+            
+            logger.info(f"Session created for room {room_id} using {ai_agent.get_agent_name()}")
+            return session
+            
+        except Exception as e:
+            logger.error(f"Error creating AI agent session for room {room_id}: {e}", exc_info=True)
+            raise
+    
+    async def run_session(self, session: AgentSession, room_id: str):
+        """Run the agent session and keep it alive."""
+        try:
+            logger.info(f"Starting session for room {room_id}...")
+            await session.start()
+            logger.info(f"AI Agent session for room {room_id} has ended.")
+        except Exception as session_error:
+            logger.error(f"Session error for room {room_id}: {session_error}", exc_info=True)
+        finally:
+            # Clean up the session
+            self.cleanup_session(room_id)
+    
+    def cleanup_session(self, room_id: str):
+        """Clean up a session."""
+        if room_id in self.active_sessions:
+            del self.active_sessions[room_id]
+            logger.info(f"Session cleaned up for room {room_id}")
+    
+    def get_active_sessions_count(self) -> int:
+        """Get the number of active sessions."""
+        return len(self.active_sessions)
+    
+    def get_session_info(self) -> Dict[str, Any]:
+        """Get information about all active sessions."""
+        session_info = []
+        for room_id, session in self.active_sessions.items():
+            session_info.append({
+                "room_id": room_id,
+                "agent_type": session.agent.__class__.__name__,
+                "status": "active"
+            })
+        return session_info 
--- a/services/videosdk_service.py
+++ b/services/videosdk_service.py
@@ -0,0 +1,53 @@
+import logging
+import httpx
+from typing import Dict, Any
+from fastapi import HTTPException
+from config import Config
+
+logger = logging.getLogger(__name__)
+
+class VideoSDKService:
+    """Service for managing VideoSDK rooms and operations."""
+    
+    def __init__(self):
+        self.auth_token = Config.VIDEOSDK_AUTH_TOKEN
+        self.base_url = "https://api.videosdk.live/v2"
+    
+    async def create_room(self, geo_fence: str = "us002") -> str:
+        """Creates a new VideoSDK room and returns its ID."""
+        url = f"{self.base_url}/rooms"
+        headers = {
+            "Content-Type": "application/json",
+            "Authorization": self.auth_token
+        }
+       
+        
+        async with httpx.AsyncClient() as client:
+            try:
+                response = await client.post(url, headers=headers)
+                response.raise_for_status()
+                room_data = response.json()
+                
+                room_id = room_data.get("roomId")
+                if not room_id:
+                    raise ValueError("roomId not found in VideoSDK response.")
+                
+                logger.info(f"VideoSDK Room created: {room_id}")
+                return room_id
+                
+            except httpx.HTTPStatusError as e:
+                logger.error(f"HTTP error creating VideoSDK room: {e.response.status_code} - {e.response.text}")
+                raise HTTPException(
+                    status_code=500, 
+                    detail=f"Failed to create VideoSDK room: HTTP error {e.response.status_code}"
+                )
+            except Exception as e:
+                logger.error(f"Error creating VideoSDK room: {e}")
+                raise HTTPException(
+                    status_code=500, 
+                    detail=f"Failed to create VideoSDK room: {e}"
+                )
+    
+    def get_sip_endpoint(self, room_id: str) -> str:
+        """Generate SIP endpoint for a room."""
+        return f"sip:{room_id}@sip.videosdk.live" 
--- a/voice_agent.py
+++ b/voice_agent.py
@@ -0,0 +1,32 @@
+import logging
+from typing import Optional, List, Any
+from videosdk.agents import Agent
+
+logger = logging.getLogger(__name__)
+
+class VoiceAgent(Agent):
+    """An outbound call agent specialized for medical appointment scheduling."""
+
+    def __init__(
+        self,
+        instructions: str = "You are a medical appointment scheduling assistant. Your goal is to confirm upcoming appointments (5th June 2025 at 11:00 AM) and reschedule if needed.",
+        tools: Optional[List[Any]] = None,
+        context: Optional[dict] = None,
+    ) -> None:
+        """Initialize the AppointmentSchedulingAgent."""
+        super().__init__(
+            instructions=instructions,
+            tools=tools or []
+        )
+        self.context = context or {}
+        self.logger = logging.getLogger(__name__)
+        
+    async def on_enter(self) -> None:
+        """Handle agent entry into the session."""
+        self.logger.info("Agent entered the session.")
+        initial_greeting = self.context.get("initial_greeting", "Hello, this is Neha, calling from City Medical Center regarding your upcoming appointment. Is this a good time to speak?")
+        await self.session.say(initial_greeting)
+
+    async def on_exit(self) -> None:
+        """Handle call termination."""
+        self.logger.info("Call ended")