mirror of
https://github.com/videosdk-community/ai-telephony-demo.git
synced 2025-08-02 04:19:31 +03:00
telephony-agent:support[twilio]
This commit is contained in:
56
.gitignore
vendored
Normal file
56
.gitignore
vendored
Normal file
@@ -0,0 +1,56 @@
|
|||||||
|
# Environment variables
|
||||||
|
.env
|
||||||
|
.env.local
|
||||||
|
.env.*.local
|
||||||
|
|
||||||
|
# Python
|
||||||
|
__pycache__/
|
||||||
|
*.py[cod]
|
||||||
|
*$py.class
|
||||||
|
*.so
|
||||||
|
.Python
|
||||||
|
build/
|
||||||
|
develop-eggs/
|
||||||
|
dist/
|
||||||
|
downloads/
|
||||||
|
eggs/
|
||||||
|
.eggs/
|
||||||
|
lib/
|
||||||
|
lib64/
|
||||||
|
parts/
|
||||||
|
sdist/
|
||||||
|
var/
|
||||||
|
wheels/
|
||||||
|
*.egg-info/
|
||||||
|
.installed.cfg
|
||||||
|
*.egg
|
||||||
|
|
||||||
|
# Virtual environments
|
||||||
|
.venv/
|
||||||
|
venv/
|
||||||
|
ENV/
|
||||||
|
env/
|
||||||
|
|
||||||
|
# IDE
|
||||||
|
.vscode/
|
||||||
|
.idea/
|
||||||
|
*.swp
|
||||||
|
*.swo
|
||||||
|
*~
|
||||||
|
|
||||||
|
# OS
|
||||||
|
.DS_Store
|
||||||
|
.DS_Store?
|
||||||
|
._*
|
||||||
|
.Spotlight-V100
|
||||||
|
.Trashes
|
||||||
|
ehthumbs.db
|
||||||
|
Thumbs.db
|
||||||
|
|
||||||
|
# Logs
|
||||||
|
*.log
|
||||||
|
logs/
|
||||||
|
|
||||||
|
# Temporary files
|
||||||
|
*.tmp
|
||||||
|
*.temp
|
||||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
MIT License
|
||||||
|
|
||||||
|
Copyright (c) 2025 VideoSDK Community
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||||
|
of this software and associated documentation files (the "Software"), to deal
|
||||||
|
in the Software without restriction, including without limitation the rights
|
||||||
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
copies of the Software, and to permit persons to whom the Software is
|
||||||
|
furnished to do so, subject to the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included in all
|
||||||
|
copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||||
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||||
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||||
|
SOFTWARE.
|
||||||
44
README.md
44
README.md
@@ -1,12 +1,31 @@
|
|||||||
# VideoSDK AI Telephony Agent
|
<div align="left">
|
||||||
|
|
||||||
|
# AI Telephony Agent
|
||||||
|
|
||||||
|
<div align="left" style="margin:0px 12px;">
|
||||||
|
|
||||||
Make INBOUND and OUTBOUND calls with AI agents using VideoSDK. Supports multiple SIP providers and AI agents with a clean, extensible architecture for VoIP telephony solutions.
|
Make INBOUND and OUTBOUND calls with AI agents using VideoSDK. Supports multiple SIP providers and AI agents with a clean, extensible architecture for VoIP telephony solutions.
|
||||||
|
|
||||||
|
</div>
|
||||||
|
<div align="center">
|
||||||
|
|
||||||
|

|
||||||
|
|
||||||
|
<a href="https://docs.videosdk.live/ai_agents/introduction" target="_blank"><img src="https://img.shields.io/badge/_Documentation-4285F4?style=for-the-badge" alt="Documentation"></a>
|
||||||
|
<a href="https://www.youtube.com/playlist?list=PLrujdOR6BS_1fMqsHd9tynAg0foSRX5ti" target="_blank"><img src="https://img.shields.io/badge/_Tutorials-FF0000?style=for-the-badge&logo=youtube&logoColor=white" alt="Video Tutorials"></a>
|
||||||
|
<a href="https://dub.sh/o59dJJB" target="_blank"><img src="https://img.shields.io/badge/_Get_Started-4285F4?style=for-the-badge" alt="Get Started"></a>
|
||||||
|
<a href="https://discord.gg/f2WsNDN9S5" target="_blank"><img src="https://img.shields.io/badge/_Discord-5865F2?style=for-the-badge&logo=discord&logoColor=white" alt="Discord Community"></a>
|
||||||
|
<a href="https://pypi.org/project/videosdk-agents/" target="_blank"><img src="https://img.shields.io/badge/_pip_install-3776AB?style=for-the-badge&logo=python&logoColor=white" alt="PyPI Package"></a>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
|
</div>
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
### Prerequisites
|
### Prerequisites
|
||||||
|
|
||||||
- Python 3.21+
|
- Python 3.11+
|
||||||
- VideoSDK account
|
- VideoSDK account
|
||||||
- Twilio account (SIP trunking provider)
|
- Twilio account (SIP trunking provider)
|
||||||
- Google API key (for Gemini AI)
|
- Google API key (for Gemini AI)
|
||||||
@@ -47,7 +66,7 @@ TWILIO_NUMBER=your_twilio_number
|
|||||||
4. **Run the server**
|
4. **Run the server**
|
||||||
|
|
||||||
```bash
|
```bash
|
||||||
python server_modular.py
|
python server.py
|
||||||
```
|
```
|
||||||
|
|
||||||
The server will start on `http://localhost:8000`
|
The server will start on `http://localhost:8000`
|
||||||
@@ -90,7 +109,7 @@ POST /configure-provider?provider_name=twilio
|
|||||||
|
|
||||||
Switch SIP providers at runtime (currently supports: `twilio`).
|
Switch SIP providers at runtime (currently supports: `twilio`).
|
||||||
|
|
||||||
## 🔌 Adding New SIP Providers
|
## Adding New SIP Providers
|
||||||
|
|
||||||
The modular architecture makes it easy to add new SIP providers and SIP trunking services. Here's how to add a new provider:
|
The modular architecture makes it easy to add new SIP providers and SIP trunking services. Here's how to add a new provider:
|
||||||
|
|
||||||
@@ -163,7 +182,7 @@ class Config:
|
|||||||
# ... rest of validation
|
# ... rest of validation
|
||||||
```
|
```
|
||||||
|
|
||||||
## 🤖 Adding New AI Agents
|
## Adding New AI Agents
|
||||||
|
|
||||||
Similarly, you can add new AI agents for intelligent call handling:
|
Similarly, you can add new AI agents for intelligent call handling:
|
||||||
|
|
||||||
@@ -342,19 +361,4 @@ For additional SIP providers, add their specific environment variables to `confi
|
|||||||
|
|
||||||
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
|
||||||
|
|
||||||
## Support
|
|
||||||
|
|
||||||
- **Issues**: [GitHub Issues](https://github.com/yourusername/ai-agent-telephony/issues)
|
|
||||||
- **Documentation**: [Wiki](https://github.com/yourusername/ai-agent-telephony/wiki)
|
|
||||||
- **Discussions**: [GitHub Discussions](https://github.com/yourusername/ai-agent-telephony/discussions)
|
|
||||||
|
|
||||||
## Acknowledgments
|
|
||||||
|
|
||||||
- [VideoSDK](https://videosdk.live/) for the real-time communication platform
|
|
||||||
- [Twilio](https://www.twilio.com/) for SIP trunking capabilities
|
|
||||||
- [Google Gemini](https://ai.google.dev/) for AI agent capabilities
|
|
||||||
- [FastAPI](https://fastapi.tiangolo.com/) for the web framework
|
|
||||||
|
|
||||||
---
|
|
||||||
|
|
||||||
**Made with ❤️ for the developer community**
|
**Made with ❤️ for the developer community**
|
||||||
|
|||||||
15
ai/__init__.py
Normal file
15
ai/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from .base_agent import AIAgent
|
||||||
|
from .gemini_agent import GeminiAgent
|
||||||
|
|
||||||
|
def get_ai_agent(agent_name: str = "gemini") -> AIAgent:
|
||||||
|
"""Factory function to get the appropriate AI agent."""
|
||||||
|
agents = {
|
||||||
|
"gemini": GeminiAgent,
|
||||||
|
}
|
||||||
|
|
||||||
|
if agent_name not in agents:
|
||||||
|
raise ValueError(f"Unsupported AI agent: {agent_name}. Available agents: {list(agents.keys())}")
|
||||||
|
|
||||||
|
return agents[agent_name]()
|
||||||
|
|
||||||
|
__all__ = ["AIAgent", "GeminiAgent", "get_ai_agent"]
|
||||||
21
ai/base_agent.py
Normal file
21
ai/base_agent.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
from videosdk.agents import AgentSession, RealTimePipeline
|
||||||
|
|
||||||
|
class AIAgent(ABC):
|
||||||
|
"""Base interface for AI agents."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def create_pipeline(self) -> RealTimePipeline:
|
||||||
|
"""Create and return the AI pipeline."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
|
||||||
|
"""Create and return an agent session."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_agent_name(self) -> str:
|
||||||
|
"""Return the agent name."""
|
||||||
|
pass
|
||||||
45
ai/gemini_agent.py
Normal file
45
ai/gemini_agent.py
Normal file
@@ -0,0 +1,45 @@
|
|||||||
|
from typing import Dict, Any
|
||||||
|
from videosdk.agents import AgentSession, RealTimePipeline
|
||||||
|
from videosdk.plugins.google import GeminiRealtime, GeminiLiveConfig
|
||||||
|
from .base_agent import AIAgent
|
||||||
|
from voice_agent import VoiceAgent
|
||||||
|
from config import Config
|
||||||
|
|
||||||
|
class GeminiAgent(AIAgent):
|
||||||
|
"""Gemini AI agent implementation."""
|
||||||
|
|
||||||
|
def create_pipeline(self) -> RealTimePipeline:
|
||||||
|
"""Create and return the Gemini pipeline."""
|
||||||
|
model = GeminiRealtime(
|
||||||
|
model="gemini-2.0-flash-live-001",
|
||||||
|
api_key=Config.GOOGLE_API_KEY,
|
||||||
|
config=GeminiLiveConfig(
|
||||||
|
voice="Leda",
|
||||||
|
response_modalities=["AUDIO"],
|
||||||
|
)
|
||||||
|
)
|
||||||
|
return RealTimePipeline(model=model)
|
||||||
|
|
||||||
|
def create_session(self, room_id: str, context: Dict[str, Any]) -> AgentSession:
|
||||||
|
"""Create and return a Gemini agent session."""
|
||||||
|
pipeline = self.create_pipeline()
|
||||||
|
|
||||||
|
# Context for the agent
|
||||||
|
agent_context = {
|
||||||
|
"name": "VideoSDK Gemini Agent",
|
||||||
|
"meetingId": room_id,
|
||||||
|
"videosdk_auth": Config.VIDEOSDK_AUTH_TOKEN,
|
||||||
|
**context
|
||||||
|
}
|
||||||
|
|
||||||
|
session = AgentSession(
|
||||||
|
agent=VoiceAgent(context=agent_context),
|
||||||
|
pipeline=pipeline,
|
||||||
|
context=agent_context
|
||||||
|
)
|
||||||
|
|
||||||
|
return session
|
||||||
|
|
||||||
|
def get_agent_name(self) -> str:
|
||||||
|
"""Return the agent name."""
|
||||||
|
return "gemini"
|
||||||
52
config.py
Normal file
52
config.py
Normal file
@@ -0,0 +1,52 @@
|
|||||||
|
import os
|
||||||
|
import logging
|
||||||
|
from typing import Dict, Any
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
# Load environment variables
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
"""Centralized configuration management."""
|
||||||
|
|
||||||
|
# VideoSDK Configuration
|
||||||
|
VIDEOSDK_AUTH_TOKEN = os.getenv("VIDEOSDK_AUTH_TOKEN")
|
||||||
|
VIDEOSDK_SIP_USERNAME = os.getenv("VIDEOSDK_SIP_USERNAME")
|
||||||
|
VIDEOSDK_SIP_PASSWORD = os.getenv("VIDEOSDK_SIP_PASSWORD")
|
||||||
|
|
||||||
|
# AI Configuration
|
||||||
|
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
|
||||||
|
|
||||||
|
# Twilio Configuration
|
||||||
|
TWILIO_ACCOUNT_SID = os.getenv("TWILIO_SID")
|
||||||
|
TWILIO_AUTH_TOKEN = os.getenv("TWILIO_AUTH_TOKEN")
|
||||||
|
TWILIO_NUMBER = os.getenv("TWILIO_NUMBER")
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def validate(cls) -> None:
|
||||||
|
"""Validate that all required environment variables are set."""
|
||||||
|
required_vars = {
|
||||||
|
"VIDEOSDK_AUTH_TOKEN": cls.VIDEOSDK_AUTH_TOKEN,
|
||||||
|
"VIDEOSDK_SIP_USERNAME": cls.VIDEOSDK_SIP_USERNAME,
|
||||||
|
"VIDEOSDK_SIP_PASSWORD": cls.VIDEOSDK_SIP_PASSWORD,
|
||||||
|
"GOOGLE_API_KEY": cls.GOOGLE_API_KEY,
|
||||||
|
"TWILIO_SID": cls.TWILIO_ACCOUNT_SID,
|
||||||
|
"TWILIO_AUTH_TOKEN": cls.TWILIO_AUTH_TOKEN,
|
||||||
|
"TWILIO_NUMBER": cls.TWILIO_NUMBER,
|
||||||
|
}
|
||||||
|
|
||||||
|
missing_vars = [var_name for var_name, var_value in required_vars.items() if not var_value]
|
||||||
|
|
||||||
|
if missing_vars:
|
||||||
|
for var_name in missing_vars:
|
||||||
|
logger.error(f"Error: Missing environment variable: {var_name}")
|
||||||
|
raise ValueError(f"Missing required environment variables: {', '.join(missing_vars)}")
|
||||||
|
|
||||||
|
logger.info("All required environment variables are set.")
|
||||||
|
|
||||||
|
# Validate configuration on import
|
||||||
|
Config.validate()
|
||||||
20
models.py
Normal file
20
models.py
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
from typing import Optional
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
class OutboundCallRequest(BaseModel):
|
||||||
|
"""Request model for initiating outbound calls."""
|
||||||
|
to_number: str
|
||||||
|
initial_greeting: Optional[str] = None
|
||||||
|
|
||||||
|
class CallResponse(BaseModel):
|
||||||
|
"""Response model for call operations."""
|
||||||
|
message: str
|
||||||
|
twilio_call_sid: Optional[str] = None
|
||||||
|
videosdk_room_id: Optional[str] = None
|
||||||
|
|
||||||
|
class SessionInfo(BaseModel):
|
||||||
|
"""Model for session information."""
|
||||||
|
room_id: str
|
||||||
|
call_type: str
|
||||||
|
agent_type: str
|
||||||
|
status: str
|
||||||
15
providers/__init__.py
Normal file
15
providers/__init__.py
Normal file
@@ -0,0 +1,15 @@
|
|||||||
|
from .base import SIPProvider
|
||||||
|
from .twilio_provider import TwilioProvider
|
||||||
|
|
||||||
|
def get_provider(provider_name: str = "twilio") -> SIPProvider:
|
||||||
|
"""Factory function to get the appropriate SIP provider."""
|
||||||
|
providers = {
|
||||||
|
"twilio": TwilioProvider,
|
||||||
|
}
|
||||||
|
|
||||||
|
if provider_name not in providers:
|
||||||
|
raise ValueError(f"Unsupported provider: {provider_name}. Available providers: {list(providers.keys())}")
|
||||||
|
|
||||||
|
return providers[provider_name]()
|
||||||
|
|
||||||
|
__all__ = ["SIPProvider", "TwilioProvider", "get_provider"]
|
||||||
26
providers/base.py
Normal file
26
providers/base.py
Normal file
@@ -0,0 +1,26 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
from twilio.twiml.voice_response import VoiceResponse
|
||||||
|
|
||||||
|
class SIPProvider(ABC):
|
||||||
|
"""Base interface for SIP providers."""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def create_client(self) -> Any:
|
||||||
|
"""Create and return the provider's client instance."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
|
||||||
|
"""Generate TwiML for connecting to SIP endpoint."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
|
||||||
|
"""Initiate an outbound call using the provider."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def get_provider_name(self) -> str:
|
||||||
|
"""Return the provider name."""
|
||||||
|
pass
|
||||||
44
providers/twilio_provider.py
Normal file
44
providers/twilio_provider.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
from typing import Dict, Any
|
||||||
|
from twilio.rest import Client as TwilioClient
|
||||||
|
from twilio.twiml.voice_response import VoiceResponse, Dial
|
||||||
|
from .base import SIPProvider
|
||||||
|
from config import Config
|
||||||
|
|
||||||
|
class TwilioProvider(SIPProvider):
|
||||||
|
"""Twilio SIP provider implementation."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.client = self.create_client()
|
||||||
|
|
||||||
|
def create_client(self) -> TwilioClient:
|
||||||
|
"""Create and return Twilio client instance."""
|
||||||
|
return TwilioClient(Config.TWILIO_ACCOUNT_SID, Config.TWILIO_AUTH_TOKEN)
|
||||||
|
|
||||||
|
def generate_twiml(self, sip_endpoint: str, **kwargs) -> str:
|
||||||
|
"""Generate TwiML for connecting to SIP endpoint."""
|
||||||
|
response = VoiceResponse()
|
||||||
|
dial = Dial()
|
||||||
|
dial.sip(
|
||||||
|
sip_endpoint,
|
||||||
|
username=Config.VIDEOSDK_SIP_USERNAME,
|
||||||
|
password=Config.VIDEOSDK_SIP_PASSWORD,
|
||||||
|
)
|
||||||
|
response.append(dial)
|
||||||
|
return str(response)
|
||||||
|
|
||||||
|
def initiate_outbound_call(self, to_number: str, twiml: str) -> Dict[str, Any]:
|
||||||
|
"""Initiate an outbound call using Twilio."""
|
||||||
|
call = self.client.calls.create(
|
||||||
|
to=to_number,
|
||||||
|
from_=Config.TWILIO_NUMBER,
|
||||||
|
twiml=twiml
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"call_sid": call.sid,
|
||||||
|
"status": call.status,
|
||||||
|
"provider": "twilio"
|
||||||
|
}
|
||||||
|
|
||||||
|
def get_provider_name(self) -> str:
|
||||||
|
"""Return the provider name."""
|
||||||
|
return "twilio"
|
||||||
121
requirements.txt
Normal file
121
requirements.txt
Normal file
@@ -0,0 +1,121 @@
|
|||||||
|
aiohappyeyeballs==2.6.1
|
||||||
|
aiohttp==3.12.13
|
||||||
|
aiohttp-retry==2.9.1
|
||||||
|
aioice==0.10.1
|
||||||
|
aiosignal==1.3.2
|
||||||
|
annotated-types==0.7.0
|
||||||
|
anyio==4.9.0
|
||||||
|
attrs==25.3.0
|
||||||
|
audioop-lts==0.2.1
|
||||||
|
audioread==3.0.1
|
||||||
|
av==13.1.0
|
||||||
|
cachetools==5.5.2
|
||||||
|
certifi==2025.6.15
|
||||||
|
cffi==1.17.1
|
||||||
|
charset-normalizer==3.4.2
|
||||||
|
click==8.2.1
|
||||||
|
cryptography==45.0.4
|
||||||
|
decorator==5.2.1
|
||||||
|
distro==1.9.0
|
||||||
|
dnspython==2.7.0
|
||||||
|
docstring-parser==0.16
|
||||||
|
fastapi==0.115.12
|
||||||
|
frozenlist==1.7.0
|
||||||
|
google-api-core==2.25.1
|
||||||
|
google-auth==2.40.3
|
||||||
|
google-cloud-speech==2.33.0
|
||||||
|
google-cloud-texttospeech==2.27.0
|
||||||
|
google-crc32c==1.7.1
|
||||||
|
google-genai==1.20.0
|
||||||
|
googleapis-common-protos==1.70.0
|
||||||
|
grpcio==1.73.0
|
||||||
|
grpcio-status==1.73.0
|
||||||
|
h11==0.16.0
|
||||||
|
h264-profile-level-id==1.0.0
|
||||||
|
httpcore==1.0.9
|
||||||
|
httpx==0.28.1
|
||||||
|
httpx-sse==0.4.0
|
||||||
|
idna==3.10
|
||||||
|
ifaddr==0.2.0
|
||||||
|
jiter==0.10.0
|
||||||
|
joblib==1.5.1
|
||||||
|
lazy-loader==0.4
|
||||||
|
librosa==0.11.0
|
||||||
|
llvmlite==0.44.0
|
||||||
|
markdown-it-py==3.0.0
|
||||||
|
mcp==1.9.4
|
||||||
|
mdurl==0.1.2
|
||||||
|
msgpack==1.1.1
|
||||||
|
multidict==6.4.4
|
||||||
|
numba==0.61.2
|
||||||
|
numpy==2.2.6
|
||||||
|
openai==1.88.0
|
||||||
|
packaging==25.0
|
||||||
|
pillow==10.4.0
|
||||||
|
platformdirs==4.3.8
|
||||||
|
pooch==1.8.2
|
||||||
|
propcache==0.3.2
|
||||||
|
proto-plus==1.26.1
|
||||||
|
protobuf==6.31.1
|
||||||
|
pyasn1==0.6.1
|
||||||
|
pyasn1-modules==0.4.2
|
||||||
|
pycparser==2.22
|
||||||
|
pycryptodome==3.20.0
|
||||||
|
pydantic==2.11.7
|
||||||
|
pydantic-core==2.33.2
|
||||||
|
pydantic-settings==2.9.1
|
||||||
|
pyee==11.1.0
|
||||||
|
pygments==2.19.1
|
||||||
|
pyjwt==2.10.1
|
||||||
|
pylibsrtp==0.12.0
|
||||||
|
pyopenssl==25.1.0
|
||||||
|
python-dotenv==1.1.0
|
||||||
|
python-multipart==0.0.20
|
||||||
|
pyyaml==6.0.2
|
||||||
|
requests==2.31.0
|
||||||
|
rich==14.0.0
|
||||||
|
rsa==4.9.1
|
||||||
|
scikit-learn==1.7.0
|
||||||
|
scipy==1.15.3
|
||||||
|
sdp-transform==1.1.0
|
||||||
|
sniffio==1.3.1
|
||||||
|
soundfile==0.13.1
|
||||||
|
soxr==0.5.0.post1
|
||||||
|
sse-starlette==2.3.6
|
||||||
|
standard-aifc==3.13.0
|
||||||
|
standard-chunk==3.13.0
|
||||||
|
standard-sunau==3.13.0
|
||||||
|
starlette==0.46.2
|
||||||
|
threadpoolctl==3.6.0
|
||||||
|
tqdm==4.67.1
|
||||||
|
twilio==9.6.3
|
||||||
|
typing-extensions==4.14.0
|
||||||
|
typing-inspection==0.4.1
|
||||||
|
urllib3==2.4.0
|
||||||
|
uvicorn==0.34.3
|
||||||
|
videosdk==0.1.0
|
||||||
|
videosdk-agents==0.0.14
|
||||||
|
videosdk-plugins-google==0.0.6
|
||||||
|
videosdk-plugins-openai==0.0.8
|
||||||
|
vonage==4.4.3
|
||||||
|
vonage-account==1.1.1
|
||||||
|
vonage-application==2.0.1
|
||||||
|
vonage-http-client==1.5.1
|
||||||
|
vonage-jwt==1.1.5
|
||||||
|
vonage-messages==1.4.0
|
||||||
|
vonage-network-auth==1.0.2
|
||||||
|
vonage-network-number-verification==1.0.2
|
||||||
|
vonage-network-sim-swap==1.1.2
|
||||||
|
vonage-number-insight==1.0.7
|
||||||
|
vonage-numbers==1.0.4
|
||||||
|
vonage-sms==1.1.6
|
||||||
|
vonage-subaccounts==1.0.4
|
||||||
|
vonage-users==1.2.1
|
||||||
|
vonage-utils==1.1.4
|
||||||
|
vonage-verify==2.1.0
|
||||||
|
vonage-verify-legacy==1.0.1
|
||||||
|
vonage-video==1.2.0
|
||||||
|
vonage-voice==1.4.0
|
||||||
|
vsaiortc==0.0.8
|
||||||
|
websockets==15.0.1
|
||||||
|
yarl==1.20.1
|
||||||
160
server.py
Normal file
160
server.py
Normal file
@@ -0,0 +1,160 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Optional
|
||||||
|
from fastapi import FastAPI, Request, Form, BackgroundTasks, HTTPException
|
||||||
|
from fastapi.responses import PlainTextResponse
|
||||||
|
|
||||||
|
# Import our modular components
|
||||||
|
from config import Config
|
||||||
|
from models import OutboundCallRequest, CallResponse, SessionInfo
|
||||||
|
from providers import get_provider
|
||||||
|
from services import VideoSDKService, SessionManager
|
||||||
|
|
||||||
|
# Configure logging
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# --- FastAPI App Initialization ---
|
||||||
|
app = FastAPI(
|
||||||
|
title="VideoSDK AI Agent Call Server (Modular)",
|
||||||
|
description="Modular FastAPI server for inbound/outbound calls with VideoSDK AI Agent using different providers.",
|
||||||
|
version="2.0.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- Initialize Services ---
|
||||||
|
videosdk_service = VideoSDKService()
|
||||||
|
session_manager = SessionManager()
|
||||||
|
sip_provider = get_provider("twilio") # Default to Twilio
|
||||||
|
|
||||||
|
# --- FastAPI Endpoints ---
|
||||||
|
|
||||||
|
@app.get("/health", response_class=PlainTextResponse)
|
||||||
|
async def health_check():
|
||||||
|
"""Health check endpoint."""
|
||||||
|
active_sessions = session_manager.get_active_sessions_count()
|
||||||
|
return f"Server is healthy. Active sessions: {active_sessions}"
|
||||||
|
|
||||||
|
@app.get("/sessions", response_class=PlainTextResponse)
|
||||||
|
async def get_active_sessions():
|
||||||
|
"""Get information about active sessions."""
|
||||||
|
session_info = session_manager.get_session_info()
|
||||||
|
|
||||||
|
if not session_info:
|
||||||
|
return "No active sessions"
|
||||||
|
|
||||||
|
session_details = []
|
||||||
|
for session in session_info:
|
||||||
|
session_details.append(
|
||||||
|
f"Room: {session['room_id']}, "
|
||||||
|
f"Agent: {session['agent_type']}, "
|
||||||
|
f"Status: {session['status']}"
|
||||||
|
)
|
||||||
|
|
||||||
|
return "\n".join(session_details)
|
||||||
|
|
||||||
|
@app.post("/inbound-call", response_class=PlainTextResponse)
|
||||||
|
async def inbound_call(
|
||||||
|
request: Request,
|
||||||
|
background_tasks: BackgroundTasks,
|
||||||
|
CallSid: str = Form(...),
|
||||||
|
From: str = Form(...),
|
||||||
|
To: str = Form(...),
|
||||||
|
):
|
||||||
|
"""
|
||||||
|
Handles incoming calls from SIP provider.
|
||||||
|
1. Creates a VideoSDK room.
|
||||||
|
2. Creates an AI Agent session for the room.
|
||||||
|
3. Starts the session in a background task.
|
||||||
|
4. Generates TwiML to connect the call to the VideoSDK SIP endpoint.
|
||||||
|
"""
|
||||||
|
logger.info(f"Inbound call received from {From} to {To}. CallSid: {CallSid}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create VideoSDK room
|
||||||
|
room_id = await videosdk_service.create_room()
|
||||||
|
|
||||||
|
# Create the AI agent session
|
||||||
|
session = await session_manager.create_session(room_id, "inbound")
|
||||||
|
|
||||||
|
# Start the session in a background task
|
||||||
|
background_tasks.add_task(session_manager.run_session, session, room_id)
|
||||||
|
|
||||||
|
# Generate TwiML to connect the call to VideoSDK's SIP gateway
|
||||||
|
sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
|
||||||
|
twiml = sip_provider.generate_twiml(sip_endpoint)
|
||||||
|
|
||||||
|
logger.info(f"Responding to {sip_provider.get_provider_name()} inbound call {CallSid} with TwiML to dial SIP: {sip_endpoint}")
|
||||||
|
return twiml
|
||||||
|
|
||||||
|
except HTTPException as e:
|
||||||
|
logger.error(f"Failed to handle inbound call {CallSid}: {e.detail}")
|
||||||
|
return PlainTextResponse(f"<Response><Say>An error occurred: {e.detail}</Say></Response>", status_code=500)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unhandled error in inbound call {CallSid}: {e}", exc_info=True)
|
||||||
|
return PlainTextResponse("<Response><Say>An unexpected error occurred. Please try again later.</Say></Response>", status_code=500)
|
||||||
|
|
||||||
|
@app.post("/outbound-call")
|
||||||
|
async def outbound_call(request_body: OutboundCallRequest, background_tasks: BackgroundTasks):
|
||||||
|
"""
|
||||||
|
Initiates an outbound call using SIP provider, connecting to an AI Agent in a VideoSDK room.
|
||||||
|
"""
|
||||||
|
to_number = request_body.to_number
|
||||||
|
initial_greeting = request_body.initial_greeting
|
||||||
|
logger.info(f"Request to initiate outbound call to: {to_number}")
|
||||||
|
|
||||||
|
if not to_number:
|
||||||
|
raise HTTPException(status_code=400, detail="'to_number' is required.")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Create VideoSDK room
|
||||||
|
room_id = await videosdk_service.create_room()
|
||||||
|
|
||||||
|
# Create the AI agent session
|
||||||
|
session = await session_manager.create_session(
|
||||||
|
room_id,
|
||||||
|
"outbound",
|
||||||
|
initial_greeting
|
||||||
|
)
|
||||||
|
|
||||||
|
# Start the session in a background task
|
||||||
|
background_tasks.add_task(session_manager.run_session, session, room_id)
|
||||||
|
|
||||||
|
# Generate TwiML for connecting to SIP endpoint
|
||||||
|
sip_endpoint = videosdk_service.get_sip_endpoint(room_id)
|
||||||
|
twiml = sip_provider.generate_twiml(sip_endpoint)
|
||||||
|
|
||||||
|
logger.info(f"Outbound call SIP endpoint: {sip_endpoint}")
|
||||||
|
|
||||||
|
# Create the outbound call via SIP provider
|
||||||
|
call_result = sip_provider.initiate_outbound_call(to_number, twiml)
|
||||||
|
|
||||||
|
logger.info(f"Outbound call initiated via {sip_provider.get_provider_name()} to {to_number}. "
|
||||||
|
f"Call SID: {call_result['call_sid']}. VideoSDK Room: {room_id}")
|
||||||
|
|
||||||
|
return CallResponse(
|
||||||
|
message="Outbound call initiated successfully",
|
||||||
|
twilio_call_sid=call_result['call_sid'],
|
||||||
|
videosdk_room_id=room_id
|
||||||
|
)
|
||||||
|
|
||||||
|
except HTTPException as e:
|
||||||
|
logger.error(f"Failed to initiate outbound call to {to_number}: {e.detail}")
|
||||||
|
raise e
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unhandled error initiating outbound call to {to_number}: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=f"Failed to initiate outbound call: {e}")
|
||||||
|
|
||||||
|
# --- Configuration Endpoints ---
|
||||||
|
|
||||||
|
@app.post("/configure-provider")
|
||||||
|
async def configure_provider(provider_name: str):
|
||||||
|
"""Configure the SIP provider to use."""
|
||||||
|
global sip_provider
|
||||||
|
try:
|
||||||
|
sip_provider = get_provider(provider_name)
|
||||||
|
logger.info(f"SIP provider changed to: {provider_name}")
|
||||||
|
return {"message": f"Provider changed to {provider_name}"}
|
||||||
|
except ValueError as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run(app, host="0.0.0.0", port=8000)
|
||||||
4
services/__init__.py
Normal file
4
services/__init__.py
Normal file
@@ -0,0 +1,4 @@
|
|||||||
|
from .videosdk_service import VideoSDKService
|
||||||
|
from .session_manager import SessionManager
|
||||||
|
|
||||||
|
__all__ = ["VideoSDKService", "SessionManager"]
|
||||||
81
services/session_manager.py
Normal file
81
services/session_manager.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
import logging
|
||||||
|
import asyncio
|
||||||
|
from typing import Dict, Any, Optional
|
||||||
|
from videosdk.agents import AgentSession
|
||||||
|
from ai import get_ai_agent
|
||||||
|
from config import Config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class SessionManager:
|
||||||
|
"""Manages AI agent sessions."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.active_sessions: Dict[str, AgentSession] = {}
|
||||||
|
|
||||||
|
async def create_session(
|
||||||
|
self,
|
||||||
|
room_id: str,
|
||||||
|
call_type: str = "inbound",
|
||||||
|
initial_greeting: Optional[str] = None,
|
||||||
|
ai_agent_name: str = "gemini"
|
||||||
|
) -> AgentSession:
|
||||||
|
"""Create and store a new AI agent session."""
|
||||||
|
logger.info(f"Creating AI agent session for {call_type} call in room: {room_id}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get the AI agent
|
||||||
|
ai_agent = get_ai_agent(ai_agent_name)
|
||||||
|
|
||||||
|
# Prepare context
|
||||||
|
context = {
|
||||||
|
"call_type": call_type,
|
||||||
|
}
|
||||||
|
if initial_greeting:
|
||||||
|
context["initial_greeting"] = initial_greeting
|
||||||
|
|
||||||
|
# Create session
|
||||||
|
session = ai_agent.create_session(room_id, context)
|
||||||
|
|
||||||
|
# Store the session
|
||||||
|
self.active_sessions[room_id] = session
|
||||||
|
|
||||||
|
logger.info(f"Session created for room {room_id} using {ai_agent.get_agent_name()}")
|
||||||
|
return session
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating AI agent session for room {room_id}: {e}", exc_info=True)
|
||||||
|
raise
|
||||||
|
|
||||||
|
async def run_session(self, session: AgentSession, room_id: str):
|
||||||
|
"""Run the agent session and keep it alive."""
|
||||||
|
try:
|
||||||
|
logger.info(f"Starting session for room {room_id}...")
|
||||||
|
await session.start()
|
||||||
|
logger.info(f"AI Agent session for room {room_id} has ended.")
|
||||||
|
except Exception as session_error:
|
||||||
|
logger.error(f"Session error for room {room_id}: {session_error}", exc_info=True)
|
||||||
|
finally:
|
||||||
|
# Clean up the session
|
||||||
|
self.cleanup_session(room_id)
|
||||||
|
|
||||||
|
def cleanup_session(self, room_id: str):
|
||||||
|
"""Clean up a session."""
|
||||||
|
if room_id in self.active_sessions:
|
||||||
|
del self.active_sessions[room_id]
|
||||||
|
logger.info(f"Session cleaned up for room {room_id}")
|
||||||
|
|
||||||
|
def get_active_sessions_count(self) -> int:
|
||||||
|
"""Get the number of active sessions."""
|
||||||
|
return len(self.active_sessions)
|
||||||
|
|
||||||
|
def get_session_info(self) -> Dict[str, Any]:
|
||||||
|
"""Get information about all active sessions."""
|
||||||
|
session_info = []
|
||||||
|
for room_id, session in self.active_sessions.items():
|
||||||
|
session_info.append({
|
||||||
|
"room_id": room_id,
|
||||||
|
"agent_type": session.agent.__class__.__name__,
|
||||||
|
"status": "active"
|
||||||
|
})
|
||||||
|
return session_info
|
||||||
53
services/videosdk_service.py
Normal file
53
services/videosdk_service.py
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
import logging
|
||||||
|
import httpx
|
||||||
|
from typing import Dict, Any
|
||||||
|
from fastapi import HTTPException
|
||||||
|
from config import Config
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class VideoSDKService:
|
||||||
|
"""Service for managing VideoSDK rooms and operations."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
self.auth_token = Config.VIDEOSDK_AUTH_TOKEN
|
||||||
|
self.base_url = "https://api.videosdk.live/v2"
|
||||||
|
|
||||||
|
async def create_room(self, geo_fence: str = "us002") -> str:
|
||||||
|
"""Creates a new VideoSDK room and returns its ID."""
|
||||||
|
url = f"{self.base_url}/rooms"
|
||||||
|
headers = {
|
||||||
|
"Content-Type": "application/json",
|
||||||
|
"Authorization": self.auth_token
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async with httpx.AsyncClient() as client:
|
||||||
|
try:
|
||||||
|
response = await client.post(url, headers=headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
room_data = response.json()
|
||||||
|
|
||||||
|
room_id = room_data.get("roomId")
|
||||||
|
if not room_id:
|
||||||
|
raise ValueError("roomId not found in VideoSDK response.")
|
||||||
|
|
||||||
|
logger.info(f"VideoSDK Room created: {room_id}")
|
||||||
|
return room_id
|
||||||
|
|
||||||
|
except httpx.HTTPStatusError as e:
|
||||||
|
logger.error(f"HTTP error creating VideoSDK room: {e.response.status_code} - {e.response.text}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Failed to create VideoSDK room: HTTP error {e.response.status_code}"
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error creating VideoSDK room: {e}")
|
||||||
|
raise HTTPException(
|
||||||
|
status_code=500,
|
||||||
|
detail=f"Failed to create VideoSDK room: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_sip_endpoint(self, room_id: str) -> str:
|
||||||
|
"""Generate SIP endpoint for a room."""
|
||||||
|
return f"sip:{room_id}@sip.videosdk.live"
|
||||||
32
voice_agent.py
Normal file
32
voice_agent.py
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
import logging
|
||||||
|
from typing import Optional, List, Any
|
||||||
|
from videosdk.agents import Agent
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
class VoiceAgent(Agent):
|
||||||
|
"""An outbound call agent specialized for medical appointment scheduling."""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
instructions: str = "You are a medical appointment scheduling assistant. Your goal is to confirm upcoming appointments (5th June 2025 at 11:00 AM) and reschedule if needed.",
|
||||||
|
tools: Optional[List[Any]] = None,
|
||||||
|
context: Optional[dict] = None,
|
||||||
|
) -> None:
|
||||||
|
"""Initialize the AppointmentSchedulingAgent."""
|
||||||
|
super().__init__(
|
||||||
|
instructions=instructions,
|
||||||
|
tools=tools or []
|
||||||
|
)
|
||||||
|
self.context = context or {}
|
||||||
|
self.logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
async def on_enter(self) -> None:
|
||||||
|
"""Handle agent entry into the session."""
|
||||||
|
self.logger.info("Agent entered the session.")
|
||||||
|
initial_greeting = self.context.get("initial_greeting", "Hello, this is Neha, calling from City Medical Center regarding your upcoming appointment. Is this a good time to speak?")
|
||||||
|
await self.session.say(initial_greeting)
|
||||||
|
|
||||||
|
async def on_exit(self) -> None:
|
||||||
|
"""Handle call termination."""
|
||||||
|
self.logger.info("Call ended")
|
||||||
Reference in New Issue
Block a user