Flask流式调用
2024-08-20
python
import os
from collections.abc import Generator
from flask import Flask
from flask import Response
from flask import request
from flask import stream_with_context
from openai import OpenAI
from openai import Stream
from flask_cors import CORS
from openai.types.chat import ChatCompletionChunk
app = Flask(__name__)
CORS(app)
# Initialize OpenAI client globally
openai_client = OpenAI(
api_key=os.getenv("OPENAI_API_KEY"),
base_url=os.getenv("OPENAI_BASE_URL"),
)
def generate(text: str) -> Generator[str, None, None]:
stream: Stream[ChatCompletionChunk] = openai_client.chat.completions.create(
model="yi-medium",
messages=[{"role": "user", "content": text}],
stream=True,
)
for event in stream:
current_response = event.choices[0].delta.content
if current_response:
yield current_response
@app.route("/chat", methods=["POST"])
def chat() -> Response:
text = request.json["text"] # type: ignore
return Response(
stream_with_context(generate(text)),
mimetype="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
if __name__ == "__main__":
app.run(host="0.0.0.0", port=8000, debug=True)