Skip to content

Flask流式调用

2024-08-20
python
import os
from collections.abc import Generator

from flask import Flask
from flask import Response
from flask import request
from flask import stream_with_context
from openai import OpenAI
from openai import Stream
from flask_cors import CORS
from openai.types.chat import ChatCompletionChunk

app = Flask(__name__)
CORS(app)

# Initialize OpenAI client globally
openai_client = OpenAI(
    api_key=os.getenv("OPENAI_API_KEY"),
    base_url=os.getenv("OPENAI_BASE_URL"),
)


def generate(text: str) -> Generator[str, None, None]:
    stream: Stream[ChatCompletionChunk] = openai_client.chat.completions.create(
        model="yi-medium",
        messages=[{"role": "user", "content": text}],
        stream=True,
    )
    for event in stream:
        current_response = event.choices[0].delta.content
        if current_response:
            yield current_response


@app.route("/chat", methods=["POST"])
def chat() -> Response:
    text = request.json["text"]  # type: ignore
    return Response(
        stream_with_context(generate(text)),
        mimetype="text/event-stream",
        headers={
            "Cache-Control": "no-cache",
            "Connection": "keep-alive",
            "X-Accel-Buffering": "no",
        },
    )


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=8000, debug=True)