2026最新5款AI编程平替学生党实测合集|Cursor中文vibe coding迁移深度对比
2026/7/6 6:46:02
Embedding(向量嵌入):将自然语言文本转化为固定长度浮点数字向量,语义相近的文本,向量在空间距离更近。
base_url与key即可复用代码常用两种计算方式:
大模型单次Embedding存在输入长度限制,长文档必须拆分片段:
Meta开源轻量向量检索库,适合本地测试、小型知识库,无需部署服务:
index_id -> {原文文本、来源、标题},检索拿到id后反向取出原文用户提问 → 提问文本向量化 → FAISS检索TopN相似向量 → 通过id匹配原文片段 → 拼接进Prompt交给LLM
两条能力互补:
解决方案:
解决方案:
解决方案:
解决方案:
pip install faiss-cpu numpy aiohttp pydantic fastapi uvicornimport aiohttp import asyncio import faiss import numpy as np import json from typing import List, Dict, Tuple # 模型配置(沿用前两日) MODEL_CONFIG = { "qwen-turbo": { "base_url": "https://dashscope.aliyuncs.com/compatible-mode/v1/chat/completions", "embedding_url": "https://dashscope.aliyuncs.com/compatible-mode/v1/embeddings", "api_key": "你的通义千问key" }, "deepseek-chat": { "base_url": "https://api.deepseek.com/v1/chat/completions", "embedding_url": "https://api.deepseek.com/v1/embeddings", "api_key": "你的deepseek key" } } # 文本分片工具(重叠分片) def split_text_chunk(text: str, chunk_size: int = 400, overlap: int = 80) -> List[str]: chunks = [] start = 0 text_len = len(text) while start < text_len: end = min(start + chunk_size, text_len) chunk = text[start:end] chunks.append(chunk.strip()) start += chunk_size - overlap return chunks # 异步Embedding客户端 class AsyncEmbeddingClient: def __init__(self, model_name: str = "qwen-turbo"): self.conf = MODEL_CONFIG[model_name] self.semaphore = asyncio.Semaphore(3) self.timeout = aiohttp.ClientTimeout(total=30) async def batch_embedding(self, texts: List[str]) -> List[List[float]]: headers = { "Authorization": f"Bearer {self.conf['api_key']}", "Content-Type": "application/json" } payload = { "input": texts, "model": "text-embedding-v1" } async with self.semaphore: async with aiohttp.ClientSession(timeout=self.timeout) as session: resp = await session.post(self.conf["embedding_url"], json=payload, headers=headers) res_data = await resp.json() vecs = [item["embedding"] for item in res_data["data"]] return vecs # 内存FAISS向量库封装 class FaissVectorStore: def __init__(self, vec_dim: int = 1536): self.index = faiss.IndexFlatL2(vec_dim) self.meta_map: Dict[int, Dict] = {} self.current_id = 0 # 批量插入向量+原文 def add_batch(self, vectors: List[List[float]], metas: List[Dict]): arr = np.array(vectors, dtype=np.float32) self.index.add(arr) for meta in metas: self.meta_map[self.current_id] = meta self.current_id += 1 # 相似度检索TopK def search(self, query_vec: List[float], top_k: int = 3, score_threshold: float = 600) -> List[Dict]: arr = np.array([query_vec], dtype=np.float32) distances, ids = self.index.search(arr, top_k) results = [] for dist, idx in zip(distances[0], ids[0]): if idx == -1: continue if dist > score_threshold: continue meta = self.meta_map.get(idx, {}) meta["distance"] = float(dist) results.append(meta) return results # RAG统一封装类 class RAGService: def __init__(self, model_name="qwen-turbo"): self.emb_client = AsyncEmbeddingClient(model_name) self.vec_store = FaissVectorStore(vec_dim=1536) # 文档入库:文本分片+向量化+存入向量库 async def add_document(self, doc_text: str, source: str = "默认文档"): chunks = split_text_chunk(doc_text) if not chunks: return vecs = await self.emb_client.batch_embedding(chunks) metas = [{"text": c, "source": source} for c in chunks] self.vec_store.add_batch(vecs, metas) # 用户提问召回知识库片段 async def retrieve(self, query: str, top_k=3): query_vec = await self.emb_client.batch_embedding([query]) return self.vec_store.search(query_vec[0], top_k=top_k) # 测试RAG async def test_rag(): rag = RAGService() # 模拟知识库文档 doc = """AI Agent 是具备工具调用、记忆、规划能力的智能体。 Agent可以读取私有知识库,结合计算器、数据库等外部工具完成复杂任务。 RAG检索增强生成用于解决大模型知识滞后、幻觉问题,先检索文档再回答用户问题。 Function Calling允许模型自主调用外部函数,完成数学计算、实时数据查询。""" await rag.add_document(doc, source="Agent基础文档") # 用户提问召回 res = await rag.retrieve("RAG有什么作用") print("召回知识库片段:") for item in res: print(f"相似度距离:{item['distance']} 内容:{item['text']}") if __name__ == "__main__": asyncio.run(test_rag())import asyncio from pydantic import BaseModel, Field, ValidationError import json from rag_store import RAGService # 复用Day2 LLM客户端、工具定义 from llm_client_v2 import AsyncLLMClientV2, TOOLS, TOOL_MAP, CalcToolParams # 混合Agent整合类 class MixedRAGAgent: def __init__(self, model_name="qwen-turbo"): self.llm = AsyncLLMClientV2(model_name) self.rag = RAGService(model_name) self.system_base = """ 你是智能AI助手,回答严格遵守以下规则: 1. 优先参考下方知识库检索内容作答,禁止编造不存在信息; 2. 如果知识库无相关内容,再判断是否需要调用计算器工具; 3. 数学计算必须调用calculator工具,禁止手动计算; 4. 无资料、无可用工具时,直接回复暂无相关信息。 【知识库参考资料】: {rag_context} """ # 完整问答链路:RAG召回 + 工具调用闭环 async def chat(self, user_query: str): # 1. RAG召回相关文档 retrieve_list = await self.rag.retrieve(user_query, top_k=3) rag_context = "\n".join([item["text"] for item in retrieve_list]) # 2. 拼接系统提示词 system_prompt = self.system_base.format(rag_context=rag_context) messages = [ {"role": "system", "content": system_prompt}, {"role": "user", "content": user_query} ] # 3. 执行工具调用对话 answer = await self.llm.chat_with_tools(messages, TOOLS) return { "answer": answer, "reference_doc_count": len(retrieve_list), "reference_content": rag_context } # 测试混合Agent async def test_mixed_agent(): agent = MixedRAGAgent() # 预先写入知识库 doc = """RAG全称检索增强生成,作用是接入私有文档减少模型幻觉。 Function Calling是大模型调用外部工具的能力,支持计算器、接口查询。""" await agent.rag.add_document(doc, source="Agent知识库") # 测试1:知识库相关问题 print("问题1:什么是RAG?") res1 = await agent.chat("什么是RAG?") print("回答:", res1["answer"]) print("参考文档:", res1["reference_content"]) # 测试2:数学计算,触发工具调用 print("\n问题2:365*2等于多少?") res2 = await agent.chat("365*2等于多少?") print("回答:", res2["answer"]) if __name__ == "__main__": asyncio.run(test_mixed_agent())from fastapi import FastAPI, Query import asyncio from llm_rag_agent import MixedRAGAgent app = FastAPI(title="Day3 RAG+Function Calling混合Agent") agent = MixedRAGAgent() # 预加载知识库文档 @app.on_event("startup") async def load_knowledge(): doc = """ AI Agent 30天速成课程Day3学习内容: 1. Embedding向量嵌入将文本转为数字向量,用于相似度检索; 2. FAISS本地向量库存储向量,实现私有知识库检索; 3. RAG检索增强生成解决大模型幻觉、知识滞后问题; 4. RAG结合Function Calling,同时支持知识库查询与外部工具调用; 5. 文本分片采用重叠切割,避免语义断裂。 """ await agent.rag.add_document(doc, source="Day3学习文档") @app.get("/chat/rag") async def chat_rag(prompt: str = Query(..., description="用户提问")): result = await agent.chat(prompt) return result if __name__ == "__main__": import uvicorn uvicorn.run("main_rag:app", reload=True)rag_store.py,测试文档分片、向量入库、相似度检索chunk_size、overlap参数,观察召回内容完整性变化score_threshold阈值,观察无关片段过滤效果