diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..f94585c
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,56 @@
+# Environment variables
+.env
+.env.local
+.env.*.local
+
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual environments
+.venv/
+venv/
+ENV/
+env/
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+*~
+
+# OS
+.DS_Store
+.DS_Store?
+._*
+.Spotlight-V100
+.Trashes
+ehthumbs.db
+Thumbs.db
+
+# Logs
+*.log
+logs/
+
+# Temporary files
+*.tmp
+*.temp
\ No newline at end of file
diff --git a/LICENSE b/LICENSE
new file mode 100644
index 0000000..a8a2540
--- /dev/null
+++ b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 VideoSDK Community
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
index a2f13ab..4763b21 100644
--- a/README.md
+++ b/README.md
@@ -1,12 +1,31 @@
-# VideoSDK AI Telephony Agent
+
+
+# AI Telephony Agent
+
+
Make INBOUND and OUTBOUND calls with AI agents using VideoSDK. Supports multiple SIP providers and AI agents with a clean, extensible architecture for VoIP telephony solutions.
+
+
+
+
+
+

+

+

+

+

+
+
+
+
+
## Installation
### Prerequisites
-- Python 3.21+
+- Python 3.11+
- VideoSDK account
- Twilio account (SIP trunking provider)
- Google API key (for Gemini AI)
@@ -47,7 +66,7 @@ TWILIO_NUMBER=your_twilio_number
4. **Run the server**
```bash
-python server_modular.py
+python server.py
```
The server will start on `http://localhost:8000`
@@ -90,7 +109,7 @@ POST /configure-provider?provider_name=twilio
Switch SIP providers at runtime (currently supports: `twilio`).
-## 🔌 Adding New SIP Providers
+## Adding New SIP Providers
The modular architecture makes it easy to add new SIP providers and SIP trunking services. Here's how to add a new provider:
@@ -163,7 +182,7 @@ class Config:
# ... rest of validation
```
-## 🤖 Adding New AI Agents
+## Adding New AI Agents
Similarly, you can add new AI agents for intelligent call handling:
@@ -342,19 +361,4 @@ For additional SIP providers, add their specific environment variables to `confi
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
-## Support
-
-- **Issues**: [GitHub Issues](https://github.com/yourusername/ai-agent-telephony/issues)
-- **Documentation**: [Wiki](https://github.com/yourusername/ai-agent-telephony/wiki)
-- **Discussions**: [GitHub Discussions](https://github.com/yourusername/ai-agent-telephony/discussions)
-
-## Acknowledgments
-
-- [VideoSDK](https://videosdk.live/) for the real-time communication platform
-- [Twilio](https://www.twilio.com/) for SIP trunking capabilities
-- [Google Gemini](https://ai.google.dev/) for AI agent capabilities
-- [FastAPI](https://fastapi.tiangolo.com/) for the web framework
-
----
-
**Made with ❤️ for the developer community**
diff --git a/ai/__init__.py b/ai/__init__.py
new file mode 100644
index 0000000..1414312
--- /dev/null
+++ b/ai/__init__.py
@@ -0,0 +1,15 @@
+from .base_agent import AIAgent
+from .gemini_agent import GeminiAgent
+
+def get_ai_agent(agent_name: str = "gemini") -> AIAgent:
+ """Factory function to get the appropriate AI agent."""
+ agents = {
+ "gemini": GeminiAgent,
+ }
+
+ if agent_name not in agents:
+ raise ValueError(f"Unsupported AI agent: {agent_name}. Available agents: {list(agents.keys())}")
+
+ return agents[agent_name]()
+
+__all__ = ["AIAgent", "GeminiAgent", "get_ai_agent"]
\ No newline at end of file
diff --git a/ai/base_agent.py b/ai/base_agent.py
new file mode 100644
index 0000000..93ffb54
--- /dev/null
+++ b/ai/base_agent.py
@@ -0,0 +1,21 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Optional
+from videosdk.agents import AgentSession, RealTimePipeline
+
+class AIAgent(ABC):
+ """Base interface for AI agents."""
+
+ @abstractmethod
+ def create_pipeline(self) -> RealTimePipeline:
+ """Create and return the AI pipeline."""
+ pass
+
+ @abstractmethod
+ def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
+ """Create and return an agent session."""
+ pass
+
+ @abstractmethod
+ def get_agent_name(self) -> str:
+ """Return the agent name."""
+ pass
\ No newline at end of file
diff --git a/ai/gemini_agent.py b/ai/gemini_agent.py
new file mode 100644
index 0000000..9af046e
--- /dev/null
+++ b/ai/gemini_agent.py
@@ -0,0 +1,45 @@
+from typing import Dict, Any
+from videosdk.agents import AgentSession, RealTimePipeline
+from videosdk.plugins.google import GeminiRealtime, GeminiLiveConfig
+from .base_agent import AIAgent
+from voice_agent import VoiceAgent
+from config import Config
+
+class GeminiAgent(AIAgent):
+ """Gemini AI agent implementation."""
+
+ def create_pipeline(self) -> RealTimePipeline:
+ """Create and return the Gemini pipeline."""
+ model = GeminiRealtime(
+ model="gemini-2.0-flash-live-001",
+ api_key=Config.GOOGLE_API_KEY,
+ config=GeminiLiveConfig(
+ voice="Leda",
+ response_modalities=["AUDIO"],
+ )
+ )
+ return RealTimePipeline(model=model)
+
+ def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
+ """Create and return a Gemini agent session."""
+ pipeline = self.create_pipeline()
+
+ # Context for the agent
+ agent_context = {
+ "name": "VideoSDK Gemini Agent",
+ "meetingId": room_id,
+ "videosdk_auth": Config.VIDEOSDK_AUTH_TOKEN,
+ **context
+ }
+
+ session = AgentSession(
+ agent=VoiceAgent(context=agent_context),
+ pipeline=pipeline,
+ context=agent_context
+ )
+
+ return session
+
+ def get_agent_name(self) -> str:
+ """Return the agent name."""
+ return "gemini"
\ No newline at end of file
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..82a4594
--- /dev/null
+++ b/config.py
@@ -0,0 +1,52 @@
+import os
+import logging
+from typing import Dict, Any
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+
+class Config:
+ """Centralized configuration management."""
+
+ # VideoSDK Configuration
+ VIDEOSDK_AUTH_TOKEN = os.getenv("VIDEOSDK_AUTH_TOKEN")
+ VIDEOSDK_SIP_USERNAME = os.getenv("VIDEOSDK_SIP_USERNAME")
+ VIDEOSDK_SIP_PASSWORD = os.getenv("VIDEOSDK_SIP_PASSWORD")
+
+ # AI Configuration
+ GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
+
+ # Twilio Configuration
+ TWILIO_ACCOUNT_SID = os.getenv("TWILIO_SID")
+ TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN")
+ TWILIO_NUMBER = os.getenv("TWILIO_NUMBER")
+
+ @classmethod
+ def validate(cls) -> None:
+ """Validate that all required environment variables are set."""
+ required_vars = {
+ "VIDEOSDK_AUTH_TOKEN": cls.VIDEOSDK_AUTH_TOKEN,
+ "VIDEOSDK_SIP_USERNAME": cls.VIDEOSDK_SIP_USERNAME,
+ "VIDEOSDK_SIP_PASSWORD": cls.VIDEOSDK_SIP_PASSWORD,
+ "GOOGLE_API_KEY": cls.GOOGLE_API_KEY,
+ "TWILIO_SID": cls.TWILIO_ACCOUNT_SID,
+ "TWILIO_AUTH_TOKEN": cls.TWILIO_AUTH_TOKEN,
+ "TWILIO_NUMBER": cls.TWILIO_NUMBER,
+ }
+
+ missing_vars = [var_name for var_name, var_value in required_vars.items() if not var_value]
+
+ if missing_vars:
+ for var_name in missing_vars:
+ logger.error(f"Error: Missing environment variable: {var_name}")
+ raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
+
+ logger.info("All required environment variables are set.")
+
+# Validate configuration on import
+Config.validate()
\ No newline at end of file
diff --git a/models.py b/models.py
new file mode 100644
index 0000000..33b7213
--- /dev/null
+++ b/models.py
@@ -0,0 +1,20 @@
+from typing import Optional
+from pydantic import BaseModel
+
+class OutboundCallRequest(BaseModel):
+ """Request model for initiating outbound calls."""
+ to_number: str
+ initial_greeting: Optional[str] = None
+
+class CallResponse(BaseModel):
+ """Response model for call operations."""
+ message: str
+ twilio_call_sid: Optional[str] = None
+ videosdk_room_id: Optional[str] = None
+
+class SessionInfo(BaseModel):
+ """Model for session information."""
+ room_id: str
+ call_type: str
+ agent_type: str
+ status: str
\ No newline at end of file
diff --git a/providers/__init__.py b/providers/__init__.py
new file mode 100644
index 0000000..4ef910c
--- /dev/null
+++ b/providers/__init__.py
@@ -0,0 +1,15 @@
+from .base import SIPProvider
+from .twilio_provider import TwilioProvider
+
+def get_provider(provider_name: str = "twilio") -> SIPProvider:
+ """Factory function to get the appropriate SIP provider."""
+ providers = {
+ "twilio": TwilioProvider,
+ }
+
+ if provider_name not in providers:
+ raise ValueError(f"Unsupported provider: {provider_name}. Available providers: {list(providers.keys())}")
+
+ return providers[provider_name]()
+
+__all__ = ["SIPProvider", "TwilioProvider", "get_provider"]
\ No newline at end of file
diff --git a/providers/base.py b/providers/base.py
new file mode 100644
index 0000000..e8bf566
--- /dev/null
+++ b/providers/base.py
@@ -0,0 +1,26 @@
+from abc import ABC, abstractmethod
+from typing import Dict, Any, Optional
+from twilio.twiml.voice_response import VoiceResponse
+
+class SIPProvider(ABC):
+ """Base interface for SIP providers."""
+
+ @abstractmethod
+ def create_client(self) -> Any:
+ """Create and return the provider's client instance."""
+ pass
+
+ @abstractmethod
+ def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
+ """Generate TwiML for connecting to SIP endpoint."""
+ pass
+
+ @abstractmethod
+ def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
+ """Initiate an outbound call using the provider."""
+ pass
+
+ @abstractmethod
+ def get_provider_name(self) -> str:
+ """Return the provider name."""
+ pass
\ No newline at end of file
diff --git a/providers/twilio_provider.py b/providers/twilio_provider.py
new file mode 100644
index 0000000..1a1ce3b
--- /dev/null
+++ b/providers/twilio_provider.py
@@ -0,0 +1,44 @@
+from typing import Dict, Any
+from twilio.rest import Client as TwilioClient
+from twilio.twiml.voice_response import VoiceResponse, Dial
+from .base import SIPProvider
+from config import Config
+
+class TwilioProvider(SIPProvider):
+ """Twilio SIP provider implementation."""
+
+ def __init__(self):
+ self.client = self.create_client()
+
+ def create_client(self) -> TwilioClient:
+ """Create and return Twilio client instance."""
+ return TwilioClient(Config.TWILIO_ACCOUNT_SID, Config.TWILIO_AUTH_TOKEN)
+
+ def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
+ """Generate TwiML for connecting to SIP endpoint."""
+ response = VoiceResponse()
+ dial = Dial()
+ dial.sip(
+ sip_endpoint,
+ username=Config.VIDEOSDK_SIP_USERNAME,
+ password=Config.VIDEOSDK_SIP_PASSWORD,
+ )
+ response.append(dial)
+ return str(response)
+
+ def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
+ """Initiate an outbound call using Twilio."""
+ call = self.client.calls.create(
+ to=to_number,
+ from_=Config.TWILIO_NUMBER,
+ twiml=twiml
+ )
+ return {
+ "call_sid": call.sid,
+ "status": call.status,
+ "provider": "twilio"
+ }
+
+ def get_provider_name(self) -> str:
+ """Return the provider name."""
+ return "twilio"
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..1fe1148
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,121 @@
+aiohappyeyeballs==2.6.1
+aiohttp==3.12.13
+aiohttp-retry==2.9.1
+aioice==0.10.1
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.9.0
+attrs==25.3.0
+audioop-lts==0.2.1
+audioread==3.0.1
+av==13.1.0
+cachetools==5.5.2
+certifi==2025.6.15
+cffi==1.17.1
+charset-normalizer==3.4.2
+click==8.2.1
+cryptography==45.0.4
+decorator==5.2.1
+distro==1.9.0
+dnspython==2.7.0
+docstring-parser==0.16
+fastapi==0.115.12
+frozenlist==1.7.0
+google-api-core==2.25.1
+google-auth==2.40.3
+google-cloud-speech==2.33.0
+google-cloud-texttospeech==2.27.0
+google-crc32c==1.7.1
+google-genai==1.20.0
+googleapis-common-protos==1.70.0
+grpcio==1.73.0
+grpcio-status==1.73.0
+h11==0.16.0
+h264-profile-level-id==1.0.0
+httpcore==1.0.9
+httpx==0.28.1
+httpx-sse==0.4.0
+idna==3.10
+ifaddr==0.2.0
+jiter==0.10.0
+joblib==1.5.1
+lazy-loader==0.4
+librosa==0.11.0
+llvmlite==0.44.0
+markdown-it-py==3.0.0
+mcp==1.9.4
+mdurl==0.1.2
+msgpack==1.1.1
+multidict==6.4.4
+numba==0.61.2
+numpy==2.2.6
+openai==1.88.0
+packaging==25.0
+pillow==10.4.0
+platformdirs==4.3.8
+pooch==1.8.2
+propcache==0.3.2
+proto-plus==1.26.1
+protobuf==6.31.1
+pyasn1==0.6.1
+pyasn1-modules==0.4.2
+pycparser==2.22
+pycryptodome==3.20.0
+pydantic==2.11.7
+pydantic-core==2.33.2
+pydantic-settings==2.9.1
+pyee==11.1.0
+pygments==2.19.1
+pyjwt==2.10.1
+pylibsrtp==0.12.0
+pyopenssl==25.1.0
+python-dotenv==1.1.0
+python-multipart==0.0.20
+pyyaml==6.0.2
+requests==2.31.0
+rich==14.0.0
+rsa==4.9.1
+scikit-learn==1.7.0
+scipy==1.15.3
+sdp-transform==1.1.0
+sniffio==1.3.1
+soundfile==0.13.1
+soxr==0.5.0.post1
+sse-starlette==2.3.6
+standard-aifc==3.13.0
+standard-chunk==3.13.0
+standard-sunau==3.13.0
+starlette==0.46.2
+threadpoolctl==3.6.0
+tqdm==4.67.1
+twilio==9.6.3
+typing-extensions==4.14.0
+typing-inspection==0.4.1
+urllib3==2.4.0
+uvicorn==0.34.3
+videosdk==0.1.0
+videosdk-agents==0.0.14
+videosdk-plugins-google==0.0.6
+videosdk-plugins-openai==0.0.8
+vonage==4.4.3
+vonage-account==1.1.1
+vonage-application==2.0.1
+vonage-http-client==1.5.1
+vonage-jwt==1.1.5
+vonage-messages==1.4.0
+vonage-network-auth==1.0.2
+vonage-network-number-verification==1.0.2
+vonage-network-sim-swap==1.1.2
+vonage-number-insight==1.0.7
+vonage-numbers==1.0.4
+vonage-sms==1.1.6
+vonage-subaccounts==1.0.4
+vonage-users==1.2.1
+vonage-utils==1.1.4
+vonage-verify==2.1.0
+vonage-verify-legacy==1.0.1
+vonage-video==1.2.0
+vonage-voice==1.4.0
+vsaiortc==0.0.8
+websockets==15.0.1
+yarl==1.20.1
diff --git a/server.py b/server.py
new file mode 100644
index 0000000..9257ddc
--- /dev/null
+++ b/server.py
@@ -0,0 +1,160 @@
+import logging
+from typing import Optional
+from fastapi import FastAPI, Request, Form, BackgroundTasks, HTTPException
+from fastapi.responses import PlainTextResponse
+
+# Import our modular components
+from config import Config
+from models import OutboundCallRequest, CallResponse, SessionInfo
+from providers import get_provider
+from services import VideoSDKService, SessionManager
+
+# Configure logging
+logger = logging.getLogger(__name__)
+
+# --- FastAPI App Initialization ---
+app = FastAPI(
+ title="VideoSDK AI Agent Call Server (Modular)",
+ description="Modular FastAPI server for inbound/outbound calls with VideoSDK AI Agent using different providers.",
+ version="2.0.0"
+)
+
+# --- Initialize Services ---
+videosdk_service = VideoSDKService()
+session_manager = SessionManager()
+sip_provider = get_provider("twilio") # Default to Twilio
+
+# --- FastAPI Endpoints ---
+
+@app.get("/health", response_class=PlainTextResponse)
+async def health_check():
+ """Health check endpoint."""
+ active_sessions = session_manager.get_active_sessions_count()
+ return f"Server is healthy. Active sessions: {active_sessions}"
+
+@app.get("/sessions", response_class=PlainTextResponse)
+async def get_active_sessions():
+ """Get information about active sessions."""
+ session_info = session_manager.get_session_info()
+
+ if not session_info:
+ return "No active sessions"
+
+ session_details = []
+ for session in session_info:
+ session_details.append(
+ f"Room: {session['room_id']}, "
+ f"Agent: {session['agent_type']}, "
+ f"Status: {session['status']}"
+ )
+
+ return "\n".join(session_details)
+
+@app.post("/inbound-call", response_class=PlainTextResponse)
+async def inbound_call(
+ request: Request,
+ background_tasks: BackgroundTasks,
+ CallSid: str = Form(...),
+ From: str = Form(...),
+ To: str = Form(...),
+):
+ """
+ Handles incoming calls from SIP provider.
+ 1. Creates a VideoSDK room.
+ 2. Creates an AI Agent session for the room.
+ 3. Starts the session in a background task.
+ 4. Generates TwiML to connect the call to the VideoSDK SIP endpoint.
+ """
+ logger.info(f"Inbound call received from {From} to {To}. CallSid: {CallSid}")
+
+ try:
+ # Create VideoSDK room
+ room_id = await videosdk_service.create_room()
+
+ # Create the AI agent session
+ session = await session_manager.create_session(room_id, "inbound")
+
+ # Start the session in a background task
+ background_tasks.add_task(session_manager.run_session, session, room_id)
+
+ # Generate TwiML to connect the call to VideoSDK's SIP gateway
+ sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
+ twiml = sip_provider.generate_twiml(sip_endpoint)
+
+ logger.info(f"Responding to {sip_provider.get_provider_name()} inbound call {CallSid} with TwiML to dial SIP: {sip_endpoint}")
+ return twiml
+
+ except HTTPException as e:
+ logger.error(f"Failed to handle inbound call {CallSid}: {e.detail}")
+ return PlainTextResponse(f"An error occurred: {e.detail}", status_code=500)
+ except Exception as e:
+ logger.error(f"Unhandled error in inbound call {CallSid}: {e}", exc_info=True)
+ return PlainTextResponse("An unexpected error occurred. Please try again later.", status_code=500)
+
+@app.post("/outbound-call")
+async def outbound_call(request_body: OutboundCallRequest, background_tasks: BackgroundTasks):
+ """
+ Initiates an outbound call using SIP provider, connecting to an AI Agent in a VideoSDK room.
+ """
+ to_number = request_body.to_number
+ initial_greeting = request_body.initial_greeting
+ logger.info(f"Request to initiate outbound call to: {to_number}")
+
+ if not to_number:
+ raise HTTPException(status_code=400, detail="'to_number' is required.")
+
+ try:
+ # Create VideoSDK room
+ room_id = await videosdk_service.create_room()
+
+ # Create the AI agent session
+ session = await session_manager.create_session(
+ room_id,
+ "outbound",
+ initial_greeting
+ )
+
+ # Start the session in a background task
+ background_tasks.add_task(session_manager.run_session, session, room_id)
+
+ # Generate TwiML for connecting to SIP endpoint
+ sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
+ twiml = sip_provider.generate_twiml(sip_endpoint)
+
+ logger.info(f"Outbound call SIP endpoint: {sip_endpoint}")
+
+ # Create the outbound call via SIP provider
+ call_result = sip_provider.initiate_outbound_call(to_number, twiml)
+
+ logger.info(f"Outbound call initiated via {sip_provider.get_provider_name()} to {to_number}. "
+ f"Call SID: {call_result['call_sid']}. VideoSDK Room: {room_id}")
+
+ return CallResponse(
+ message="Outbound call initiated successfully",
+ twilio_call_sid=call_result['call_sid'],
+ videosdk_room_id=room_id
+ )
+
+ except HTTPException as e:
+ logger.error(f"Failed to initiate outbound call to {to_number}: {e.detail}")
+ raise e
+ except Exception as e:
+ logger.error(f"Unhandled error initiating outbound call to {to_number}: {e}", exc_info=True)
+ raise HTTPException(status_code=500, detail=f"Failed to initiate outbound call: {e}")
+
+# --- Configuration Endpoints ---
+
+@app.post("/configure-provider")
+async def configure_provider(provider_name: str):
+ """Configure the SIP provider to use."""
+ global sip_provider
+ try:
+ sip_provider = get_provider(provider_name)
+ logger.info(f"SIP provider changed to: {provider_name}")
+ return {"message": f"Provider changed to {provider_name}"}
+ except ValueError as e:
+ raise HTTPException(status_code=400, detail=str(e))
+
+if __name__ == "__main__":
+ import uvicorn
+ uvicorn.run(app, host="0.0.0.0", port=8000)
\ No newline at end of file
diff --git a/services/__init__.py b/services/__init__.py
new file mode 100644
index 0000000..c0429e8
--- /dev/null
+++ b/services/__init__.py
@@ -0,0 +1,4 @@
+from .videosdk_service import VideoSDKService
+from .session_manager import SessionManager
+
+__all__ = ["VideoSDKService", "SessionManager"]
\ No newline at end of file
diff --git a/services/session_manager.py b/services/session_manager.py
new file mode 100644
index 0000000..8ad0d01
--- /dev/null
+++ b/services/session_manager.py
@@ -0,0 +1,81 @@
+import logging
+import asyncio
+from typing import Dict, Any, Optional
+from videosdk.agents import AgentSession
+from ai import get_ai_agent
+from config import Config
+
+logger = logging.getLogger(__name__)
+
+class SessionManager:
+ """Manages AI agent sessions."""
+
+ def __init__(self):
+ self.active_sessions: Dict[str, AgentSession] = {}
+
+ async def create_session(
+ self,
+ room_id: str,
+ call_type: str = "inbound",
+ initial_greeting: Optional[str] = None,
+ ai_agent_name: str = "gemini"
+ ) -> AgentSession:
+ """Create and store a new AI agent session."""
+ logger.info(f"Creating AI agent session for {call_type} call in room: {room_id}")
+
+ try:
+ # Get the AI agent
+ ai_agent = get_ai_agent(ai_agent_name)
+
+ # Prepare context
+ context = {
+ "call_type": call_type,
+ }
+ if initial_greeting:
+ context["initial_greeting"] = initial_greeting
+
+ # Create session
+ session = ai_agent.create_session(room_id, context)
+
+ # Store the session
+ self.active_sessions[room_id] = session
+
+ logger.info(f"Session created for room {room_id} using {ai_agent.get_agent_name()}")
+ return session
+
+ except Exception as e:
+ logger.error(f"Error creating AI agent session for room {room_id}: {e}", exc_info=True)
+ raise
+
+ async def run_session(self, session: AgentSession, room_id: str):
+ """Run the agent session and keep it alive."""
+ try:
+ logger.info(f"Starting session for room {room_id}...")
+ await session.start()
+ logger.info(f"AI Agent session for room {room_id} has ended.")
+ except Exception as session_error:
+ logger.error(f"Session error for room {room_id}: {session_error}", exc_info=True)
+ finally:
+ # Clean up the session
+ self.cleanup_session(room_id)
+
+ def cleanup_session(self, room_id: str):
+ """Clean up a session."""
+ if room_id in self.active_sessions:
+ del self.active_sessions[room_id]
+ logger.info(f"Session cleaned up for room {room_id}")
+
+ def get_active_sessions_count(self) -> int:
+ """Get the number of active sessions."""
+ return len(self.active_sessions)
+
+ def get_session_info(self) -> Dict[str, Any]:
+ """Get information about all active sessions."""
+ session_info = []
+ for room_id, session in self.active_sessions.items():
+ session_info.append({
+ "room_id": room_id,
+ "agent_type": session.agent.__class__.__name__,
+ "status": "active"
+ })
+ return session_info
\ No newline at end of file
diff --git a/services/videosdk_service.py b/services/videosdk_service.py
new file mode 100644
index 0000000..24cff3d
--- /dev/null
+++ b/services/videosdk_service.py
@@ -0,0 +1,53 @@
+import logging
+import httpx
+from typing import Dict, Any
+from fastapi import HTTPException
+from config import Config
+
+logger = logging.getLogger(__name__)
+
+class VideoSDKService:
+ """Service for managing VideoSDK rooms and operations."""
+
+ def __init__(self):
+ self.auth_token = Config.VIDEOSDK_AUTH_TOKEN
+ self.base_url = "https://api.videosdk.live/v2"
+
+ async def create_room(self, geo_fence: str = "us002") -> str:
+ """Creates a new VideoSDK room and returns its ID."""
+ url = f"{self.base_url}/rooms"
+ headers = {
+ "Content-Type": "application/json",
+ "Authorization": self.auth_token
+ }
+
+
+ async with httpx.AsyncClient() as client:
+ try:
+ response = await client.post(url, headers=headers)
+ response.raise_for_status()
+ room_data = response.json()
+
+ room_id = room_data.get("roomId")
+ if not room_id:
+ raise ValueError("roomId not found in VideoSDK response.")
+
+ logger.info(f"VideoSDK Room created: {room_id}")
+ return room_id
+
+ except httpx.HTTPStatusError as e:
+ logger.error(f"HTTP error creating VideoSDK room: {e.response.status_code} - {e.response.text}")
+ raise HTTPException(
+ status_code=500,
+ detail=f"Failed to create VideoSDK room: HTTP error {e.response.status_code}"
+ )
+ except Exception as e:
+ logger.error(f"Error creating VideoSDK room: {e}")
+ raise HTTPException(
+ status_code=500,
+ detail=f"Failed to create VideoSDK room: {e}"
+ )
+
+ def get_sip_endpoint(self, room_id: str) -> str:
+ """Generate SIP endpoint for a room."""
+ return f"sip:{room_id}@sip.videosdk.live"
\ No newline at end of file
diff --git a/voice_agent.py b/voice_agent.py
new file mode 100644
index 0000000..5be07c5
--- /dev/null
+++ b/voice_agent.py
@@ -0,0 +1,32 @@
+import logging
+from typing import Optional, List, Any
+from videosdk.agents import Agent
+
+logger = logging.getLogger(__name__)
+
+class VoiceAgent(Agent):
+ """An outbound call agent specialized for medical appointment scheduling."""
+
+ def __init__(
+ self,
+ instructions: str = "You are a medical appointment scheduling assistant. Your goal is to confirm upcoming appointments (5th June 2025 at 11:00 AM) and reschedule if needed.",
+ tools: Optional[List[Any]] = None,
+ context: Optional[dict] = None,
+ ) -> None:
+ """Initialize the AppointmentSchedulingAgent."""
+ super().__init__(
+ instructions=instructions,
+ tools=tools or []
+ )
+ self.context = context or {}
+ self.logger = logging.getLogger(__name__)
+
+ async def on_enter(self) -> None:
+ """Handle agent entry into the session."""
+ self.logger.info("Agent entered the session.")
+ initial_greeting = self.context.get("initial_greeting", "Hello, this is Neha, calling from City Medical Center regarding your upcoming appointment. Is this a good time to speak?")
+ await self.session.say(initial_greeting)
+
+ async def on_exit(self) -> None:
+ """Handle call termination."""
+ self.logger.info("Call ended")
\ No newline at end of file