我们以qwen为例,qwen也是兼容openai协议的,搭建一个api代理后,访问本地localhost就能流式输出结果。

点击查看python代码
# main.py
import os
import time
import hashlib
import json
from typing import List, Optional
import httpx
from fastapi import FastAPI, Request, HTTPException, Depends
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from starlette.responses import StreamingResponse
API_KEY ="千问的apikey,去百炼申请"
BASE_URL ="https://dashscope.aliyuncs.com/compatible-mode/v1"
# --- 2. Pydantic 模型 (与OpenAI API兼容) ---
# 复用之前的模型,因为我们的代理需要理解请求结构
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
stream: Optional[bool] = False
# 其他OpenAI参数...
temperature: Optional[float] = 0.7
# --- 3. FastAPI 应用和代理端点 ---
app = FastAPI(title="OpenAI-Compatible Proxy")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
# 使用 httpx.AsyncClient 来创建可复用的、支持HTTP/2的客户端
# 这是进行异步API调用的推荐方式
client = httpx.AsyncClient(base_url=BASE_URL)
@app.post("/v1/chat/completions", dependencies=[Depends(verify_signature)])
async def chat_completions_proxy(request: Request):
headers = {
"Content-Type": "application/json",
"Authorization": f"Bearer {API_KEY}",
"Accept": request.headers.get("Accept", "text/event-stream"),
}
body = await request.body()
# 定义正确的端点路径
endpoint_path = "/chat/completions"
try:
# 使用正确的端点路径,而不是基地址
backend_req = client.build_request(
method="POST",
url=endpoint_path,
headers=headers,
content=body,
timeout=300.0
)
# 【调试技巧】: 在这里打印出将要发送到后端的信息
print("--- Forwarding to Backend ---")
print(f"URL: {backend_req.url}")
print(f"Headers: {backend_req.headers}")
print(f"Body: {backend_req.content.decode()}")
print("---------------------------")
backend_resp = await client.send(backend_req, stream=True)
except httpx.RequestError as e:
raise HTTPException(status_code=503, detail=f"后端服务请求失败: {e}")
if backend_resp.status_code != 200:
error_content = await backend_resp.aread()
# 打印后端返回的原始错误,方便调试
print(f"Backend Error ({backend_resp.status_code}): {error_content.decode()}")
raise HTTPException(
status_code=backend_resp.status_code, # 将后端的错误码透传给客户端
detail=f"后端服务返回错误: {error_content.decode()}"
)
return StreamingResponse(
backend_resp.aiter_bytes(),
status_code=backend_resp.status_code,
media_type=backend_resp.headers.get("Content-Type"),
background=backend_resp.aclose
)
# --- 5. 运行服务器 ---
if __name__ == "__main__":
import uvicorn
uvicorn.run("chatproxy:app", host="127.0.0.1", port=8100, reload=False) 网友回复
有没有不依赖embedding向量的RAG技术?
有没有支持实时打断语音通话并后台帮你执行任何的ai模型?
开源ai大模型文件格式GGUF、MLX、Safetensors、 ONNX 有什么区别?
出海挣钱支付收款PayPal、Wise 、PingPong、Stripe如何选择?
如何实现类似google的图片隐形水印添加和识别技术?
linux上如何运行任意windows程序?
ai能写出比黑客还厉害的零日漏洞等攻击工具攻击任意软件系统工程?
js如何获取浏览器的音频上下文指纹、Canvas指纹、WebGL渲染特征?
为啥ai开始抛弃markdown文本,重新偏好html文本了?
网站有没有办法鉴别访问请求是由ai操控chrome-devtools-mcp发出的?


