Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from flask import Flask, request, Response, jsonify
- import requests
- import os
- import time
- from functools import lru_cache
- app = Flask(__name__)
- # Configuration
- # Configure with your specific Azure endpoint and model
- AZURE_MODEL_NAME = "DeepSeek-R1" # Must match exactly what Azure expects
- AZURE_BASE_URL = os.getenv("AZURE_ENDPOINT", "https://<SERVER_NAME>.services.ai.azure.com")
- AZURE_API_KEY = os.getenv("AZURE_API_KEY", "<API_KEY>") # Set this in your environment
- # Add favicon handler to prevent 404 errors
- @app.route('/v1/chat/completions/deployments/<path:model>/chat/completions', methods=['POST'])
- def chat_completion(model):
- """Handle chat completion requests (POST only)"""
- azure_url = f"{AZURE_BASE_URL}/models/chat/completions"
- # Prepare headers with Azure authentication
- headers = {
- "Content-Type": "application/json",
- "api-key": AZURE_API_KEY,
- "extra-parameters": "pass-through"
- }
- # Remove potentially problematic headers
- original_headers = dict(request.headers)
- original_headers.pop("Host", None)
- original_headers.pop("Accept-Encoding", None)
- try:
- original_body = request.get_json()
- except Exception as e:
- return jsonify({"error": "Invalid JSON body"}), 400
- modified_body = {
- **original_body,
- "model": AZURE_MODEL_NAME,
- }
- try:
- azure_response = requests.post(
- azure_url,
- headers=headers,
- params={"api-version": "2024-05-01-preview"},
- json=modified_body,
- stream=True
- )
- azure_response.raise_for_status()
- except requests.exceptions.RequestException as e:
- return jsonify({"error": str(e)}), 500
- def generate():
- for chunk in azure_response.iter_content(chunk_size=1024):
- yield chunk
- response_headers = dict(azure_response.headers)
- # Remove problematic encoding headers
- response_headers.pop("Content-Encoding", None)
- response_headers.pop("Transfer-Encoding", None)
- return Response(
- generate(),
- status=azure_response.status_code,
- headers=response_headers,
- content_type=azure_response.headers.get('Content-Type', 'application/json')
- )
- @app.route('/favicon.ico')
- def favicon():
- return Response(status=204)
- #@lru_cache(maxsize=1)
- def get_azure_models():
- """Fetch actual models from Azure with caching"""
- try:
- response = requests.get(
- f"{AZURE_BASE_URL}/openai/models?api-version=2024-10-21",
- headers={"api-key": AZURE_API_KEY},
- # params={"api-version": "2024-10-21"},
- timeout=5
- )
- print(response)
- response.raise_for_status()
- return response.json().get('data', [])
- except requests.exceptions.RequestException as e:
- app.logger.error(f"Azure model fetch failed: {str(e)}")
- return []
- def map_azure_model(azure_model):
- """Map Azure model schema to OpenAI-compatible format"""
- capabilities = azure_model.get('capabilities', {})
- deprecation = azure_model.get('deprecation', {})
- return {
- "id": azure_model.get('id', 'unknown'),
- "object": "model",
- "created": azure_model.get('created_at', int(time.time())),
- "owned_by": "azure-ai",
- "permission": [{
- "id": f"modelperm-{azure_model['id'].lower()}",
- "object": "model_permission",
- "created": azure_model.get('created_at', int(time.time())),
- "allow_create_engine": False,
- "allow_sampling": capabilities.get('inference', False),
- "allow_logprobs": True,
- "allow_search_indices": False,
- "allow_view": True,
- "allow_fine_tuning": capabilities.get('fine_tune', False),
- "organization": "*",
- "group": None,
- "is_blocking": False
- }],
- "root": azure_model.get('model', azure_model.get('id')),
- "parent": azure_model.get('model', None)
- }
- @app.route('/v1/models', methods=['GET'])
- @app.route('/v1/chat/completions/models', methods=['GET'])
- def list_models():
- """Dynamic model listing from Azure"""
- try:
- azure_models = get_azure_models()
- openai_models = [map_azure_model(m) for m in azure_models]
- return jsonify({
- "object": "list",
- "data": openai_models
- })
- except Exception as e:
- app.logger.error(f"Model list error: {str(e)}")
- return jsonify({
- "error": {
- "message": "Failed to fetch models",
- "type": "server_error",
- "code": 500
- }
- }), 500
- if __name__ == '__main__':
- app.run(host='localhost', port=8080, debug=False)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement