import os from flask import Flask, request, jsonify from transformers import AutoModelForCausalLM, AutoTokenizer import yaml # Load environment variables api_key = os.getenv('HF_API_KEY') model_path = os.getenv('MODEL_PATH') # Initialize Flask app app = Flask(__name__) # Load configuration with open('config.yaml', 'r') as file: config = yaml.safe_load(file) # Load the model and tokenizer def load_model(): tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForCausalLM.from_pretrained(model_path) return model, tokenizer model, tokenizer = load_model() def generate_text(prompt): inputs = tokenizer(prompt, return_tensors="pt") outputs = model.generate(inputs['input_ids']) return tokenizer.decode(outputs[0], skip_special_tokens=True) @app.route('/generate', methods=['POST']) def generate(): data = request.get_json() prompt = data.get('prompt') if prompt: response_text = generate_text(prompt) return jsonify({"response": response_text}) else: return jsonify({"error": "No prompt provided"}) if __name__ == '__main__': app.run(debug=True)