To bring a custom LLM server, set up a compatible server endpoint using OpenAI’s style, specifically targeting create_chat_completion.Here’s an example server implementation using FastAPI and OpenAI’s Python SDK:
Copy
Ask AI
import jsonimport osimport fastapifrom fastapi.responses import StreamingResponsefrom openai import AsyncOpenAIimport uvicornimport loggingfrom dotenv import load_dotenvfrom pydantic import BaseModelfrom typing import List, Optional# Load environment variables from .env fileload_dotenv()# Retrieve API key from environmentOPENAI_API_KEY = os.getenv('OPENAI_API_KEY')if not OPENAI_API_KEY: raise ValueError("OPENAI_API_KEY not found in environment variables")app = fastapi.FastAPI()oai_client = AsyncOpenAI(api_key=OPENAI_API_KEY)class Message(BaseModel): role: str content: strclass ChatCompletionRequest(BaseModel): messages: List[Message] model: str temperature: Optional[float] = 0.7 max_tokens: Optional[int] = None stream: Optional[bool] = False user_id: Optional[str] = None@app.post("/v1/chat/completions")async def create_chat_completion(request: ChatCompletionRequest) -> StreamingResponse: oai_request = request.dict(exclude_none=True) if "user_id" in oai_request: oai_request["user"] = oai_request.pop("user_id") chat_completion_coroutine = await oai_client.chat.completions.create(**oai_request) async def event_stream(): try: async for chunk in chat_completion_coroutine: # Convert the ChatCompletionChunk to a dictionary before JSON serialization chunk_dict = chunk.model_dump() yield f"data: {json.dumps(chunk_dict)}\n\n" yield "data: [DONE]\n\n" except Exception as e: logging.error("An error occurred: %s", str(e)) yield f"data: {json.dumps({'error': 'Internal error occurred!'})}\n\n" return StreamingResponse(event_stream(), media_type="text/event-stream")if __name__ == "__main__": uvicorn.run(app, host="0.0.0.0", port=8013)
Now let’s make the changes in ElevenlabsDirect your server URL to ngrok endpoint, setup “Limit token usage” to 5000 and
you can setup CustomLLM extra body to true if you want to pass additional parameters to your custom LLM implementation at conversation start.You can start interacting with Conversational AI with your own LLM server