强化学习与大语言模型对齐:从RLHF到DPO的实践指南
2026/5/11 21:12:32
文本生成经历了从规则方法到深度学习的演进:
文本生成技术路线 规则模板: 基于模板填充 统计语言模型: n-gram 神经语言模型: RNN/LSTM Transformer: GPT/T5| 模型 | 架构 | 特点 | 代表模型 |
|---|---|---|---|
| RNN/LSTM | 循环结构 | 序列建模 | Seq2Seq |
| Transformer | 注意力机制 | 并行计算 | GPT |
| T5 | 统一框架 | 多任务 | T5 |
| BERT | 双向编码 | 理解为主 | BERT |
生成策略 Greedy: 每步选概率最大的 token Beam Search: 保留多个候选 Sampling: 随机采样 Top-K: 限制候选范围 Top-P (Nucleus): 概率质量阈值import torch import torch.nn as nn import torch.nn.functional as F class RNNGenerator(nn.Module): def __init__(self, vocab_size, embedding_dim, hidden_dim, num_layers=2): super().__init__() self.embedding = nn.Embedding(vocab_size, embedding_dim) self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=num_layers) self.fc = nn.Linear(hidden_dim, vocab_size) def forward(self, x, hidden=None): x = self.embedding(x) output, hidden = self.lstm(x, hidden) logits = self.fc(output) return logits, hidden def generate(self, start_token, max_len=100, temperature=1.0): self.eval() generated = [start_token] hidden = None for _ in range(max_len): input_ids = torch.tensor([generated[-1]]).unsqueeze(0) with torch.no_grad(): logits, hidden = self.forward(input_ids, hidden) logits = logits.squeeze(0) / temperature probabilities = F.softmax(logits, dim=-1) next_token = torch.multinomial(probabilities, num_samples=1).item() generated.append(next_token) if next_token == self.end_token: break return generatedclass TransformerGenerator(nn.Module): def __init__(self, vocab_size, d_model=512, num_heads=8, d_ff=2048, num_layers=6): super().__init__() self.embedding = nn.Embedding(vocab_size, d_model) self.positional_encoding = PositionalEncoding(d_model) decoder_layer = nn.TransformerDecoderLayer(d_model, num_heads, d_ff) self.decoder = nn.TransformerDecoder(decoder_layer, num_layers) self.fc = nn.Linear(d_model, vocab_size) def forward(self, tgt, memory=None, tgt_mask=None): tgt = self.embedding(tgt) * torch.sqrt(torch.tensor(self.embedding.embedding_dim, dtype=torch.float32)) tgt = self.positional_encoding(tgt) output = self.decoder(tgt, memory, tgt_mask=tgt_mask) output = self.fc(output) return output def generate(self, start_token, max_len=100, temperature=1.0, top_k=50): self.eval() generated = [start_token] for _ in range(max_len): input_ids = torch.tensor([generated]).T tgt_mask = nn.Transformer.generate_square_subsequent_mask(len(input_ids)).to(input_ids.device) with torch.no_grad(): logits = self.forward(input_ids, tgt_mask=tgt_mask) logits = logits[-1, :] / temperature if top_k > 0: v, _ = torch.topk(logits, top_k) logits[logits < v[-1]] = float('-inf') probabilities = F.softmax(logits, dim=-1) next_token = torch.multinomial(probabilities, num_samples=1).item() generated.append(next_token) if next_token == self.end_token: break return generatedclass GPTGenerator(nn.Module): def __init__(self, vocab_size, d_model=768, num_heads=12, d_ff=3072, num_layers=12): super().__init__() self.transformer = nn.Transformer( d_model=d_model, nhead=num_heads, num_encoder_layers=0, num_decoder_layers=num_layers, dim_feedforward=d_ff ) self.embedding = nn.Embedding(vocab_size, d_model) self.positional_encoding = PositionalEncoding(d_model) self.fc = nn.Linear(d_model, vocab_size) def forward(self, x): x = self.embedding(x) * torch.sqrt(torch.tensor(self.embedding.embedding_dim, dtype=torch.float32)) x = self.positional_encoding(x) mask = nn.Transformer.generate_square_subsequent_mask(x.size(0)).to(x.device) output = self.transformer(x, x, tgt_mask=mask) output = self.fc(output) return output def generate(self, prompt, tokenizer, max_len=100, temperature=1.0, top_p=0.9): self.eval() input_ids = tokenizer.encode(prompt, return_tensors='pt').T for _ in range(max_len): with torch.no_grad(): logits = self.forward(input_ids) logits = logits[-1, :] / temperature if top_p < 1.0: sorted_logits, sorted_indices = torch.sort(logits, descending=True) cumulative_probs = torch.cumsum(F.softmax(sorted_logits, dim=-1), dim=-1) sorted_indices_to_remove = cumulative_probs > top_p sorted_indices_to_remove[1:] = sorted_indices_to_remove[:-1].clone() sorted_indices_to_remove[0] = 0 indices_to_remove = sorted_indices[sorted_indices_to_remove] logits[indices_to_remove] = float('-inf') probabilities = F.softmax(logits, dim=-1) next_token = torch.multinomial(probabilities, num_samples=1).item() input_ids = torch.cat([input_ids, torch.tensor([[next_token]])], dim=0) if next_token == tokenizer.eos_token_id: break return tokenizer.decode(input_ids.squeeze().tolist())| 模型 | 生成质量 | 训练难度 | 推理速度 | 适用场景 |
|---|---|---|---|---|
| RNN | 中 | 低 | 快 | 简单生成 |
| Transformer | 高 | 中 | 中 | 中等生成 |
| GPT-2 | 很高 | 高 | 中 | 复杂生成 |
| GPT-3 | 极高 | 很高 | 慢 | 高质量生成 |
| 策略 | 多样性 | 连贯性 | 可控性 |
|---|---|---|---|
| Greedy | 低 | 高 | 高 |
| Beam Search | 低 | 很高 | 很高 |
| Top-K | 中 | 中 | 中 |
| Top-P | 高 | 高 | 中 |
| Temperature | 可调 | 可调 | 可调 |
| 模型 | 参数 | 生成质量 | 训练时间 |
|---|---|---|---|
| GPT-2 small | 124M | 中 | 1周 |
| GPT-2 medium | 355M | 高 | 2周 |
| GPT-2 large | 774M | 很高 | 4周 |
| GPT-3 | 175B | 极高 | 数月 |
def select_generator(task_type, data_size): if task_type == 'simple': return RNNGenerator(10000, 256, 512) elif task_type == 'medium': return TransformerGenerator(10000, 512, 8, 2048, 6) else: from transformers import GPT2LMHeadModel return GPT2LMHeadModel.from_pretrained('gpt2') class GeneratorFactory: @staticmethod def create(config): if config['type'] == 'rnn': return RNNGenerator(**config['params']) elif config['type'] == 'transformer': return TransformerGenerator(**config['params']) elif config['type'] == 'gpt': from transformers import GPT2LMHeadModel return GPT2LMHeadModel.from_pretrained(config['model_name'])class TextGenerationTrainer: def __init__(self, model, optimizer, scheduler, loss_fn): self.model = model self.optimizer = optimizer self.scheduler = scheduler self.loss_fn = loss_fn def train_step(self, batch): self.optimizer.zero_grad() input_ids = batch['input_ids'] labels = batch['labels'] output = self.model(input_ids) loss = self.loss_fn(output.reshape(-1, output.size(-1)), labels.reshape(-1)) loss.backward() self.optimizer.step() self.scheduler.step() return loss.item() def evaluate(self, dataloader): self.model.eval() total_loss = 0 with torch.no_grad(): for batch in dataloader: input_ids = batch['input_ids'] labels = batch['labels'] output = self.model(input_ids) loss = self.loss_fn(output.reshape(-1, output.size(-1)), labels.reshape(-1)) total_loss += loss.item() return total_loss / len(dataloader)文本生成已进入 Transformer 时代:
对比数据如下: