# embedding.py from haystack.components.embedders import OpenAITextEmbedder, HuggingFaceAPITextEmbedder from haystack.utils import Secret # 从 config 导入新的变量名 from config import ( OPENAI_EMBEDDING_MODEL, OPENAI_API_KEY_FROM_CONFIG, # 使用配置中的 Key OPENAI_API_BASE_URL_CONFIG, # 使用配置中的 Base URL OPENAI_EMBEDDING_KEY, OPENAI_EMBEDDING_BASE, HUGGINGFACE_KEY, HUGGINGFACE_EMBEDDING_MODEL, ) def initialize_text_embedder() -> OpenAITextEmbedder: """ Initializes the Haystack OpenAITextEmbedder component. Reads API Key and Base URL directly from config.py. """ # 不再需要检查环境变量 # api_key = os.getenv("OPENAI_API_KEY") # if not api_key: # raise ValueError("OPENAI_API_KEY environment variable not set.") # 检查从配置加载的 key 是否有效 (基础检查) if not OPENAI_API_KEY_FROM_CONFIG or "YOUR_API_KEY" in OPENAI_API_KEY_FROM_CONFIG: print("警告: OpenAI API Key 未在 config.py 中有效配置。") # Consider raising an error here if the key is mandatory # raise ValueError("OpenAI API Key not configured correctly in config.py") print(f"Initializing OpenAI Text Embedder with model: {OPENAI_EMBEDDING_MODEL}") # 使用配置中的 Base URL if OPENAI_API_BASE_URL_CONFIG: print(f"Using custom API base URL from config: {OPENAI_API_BASE_URL_CONFIG}") else: print("Using default OpenAI API base URL (None specified in config).") text_embedder = OpenAITextEmbedder( # 直接使用从 config.py 导入的 key 和 base_url api_key=Secret.from_token(OPENAI_EMBEDDING_KEY), api_base_url=OPENAI_EMBEDDING_BASE, model=OPENAI_EMBEDDING_MODEL, ) print("Text Embedder initialized.") return text_embedder # __main__ 部分也需要调整以反映不依赖环境变量 # Example usage if __name__ == "__main__": embedder = initialize_text_embedder() sample_text = "这是一个示例文本,用于测试 huggingface 嵌入功能。" try: result = embedder.run(text=sample_text) embedding = result["embedding"] print(f"Sample text: '{sample_text}'") # print(f"Generated embedding (first 5 dims): {embedding[:5]}") print(f"Generated embedding dimension: {len(embedding)}") print(f"Tokens used: {result['meta']['usage']['total_tokens']}") except Exception as e: print(f"Error during huggingface API call: {e}")