回测流程与实践

1. Qlib 回测框架

1.1 回测架构

# 1. 数据准备
data = qlib.data.get_data(start_date, end_date)
 
# 2. 模型训练
model = train_model(train_data)
 
# 3. 策略定义
strategy = Strategy(model)
 
# 4. 回测配置
config = {
    'executor': SimulatorExecutor(...),
    'account': 1000000,
    'benchmark': 'SPY'
}
 
# 5. 执行回测
result = qlib.backtest.run(
    strategy=strategy,
    data=data,
    config=config
)
 
# 6. 分析结果
metrics = qlib.backtest.analyze(result)

1.2 Qlib 回测流程

1. 初始化Qlib
   ↓
2. 加载数据
   ↓
3. 训练模型
   ↓
4. 定义策略
   ↓
5. 配置Executor
   ↓
6. 执行回测
   ↓
7. 分析结果
   ↓
8. 生成报告

2. 完整回测步骤

2.1 Step 1: 数据准备

import qlib
from qlib.constant import REG_CN
 
# 初始化Qlib
qlib.init(provider_uri='~/.qlib/qlib_data/cn_data', region=REG_CN)
 
# 获取股票列表
instruments = qlib.get_instruments('csi300')
 
# 获取数据
from qlib.data import D
 
data = D.features(
    instruments,
    fields=['$close', '$volume', '$factor'],
    start_time='2020-01-01',
    end_time='2022-12-31'
)

2.2 Step 2: 特征工程

# 定义特征
features = [
    '$close',
    '$volume',
    'Ref($close, 1)/Ref($close, 0) - 1',  # 收益率
    'Mean($close, 5)',  # 5日均线
    'Std($close, 20)'  # 20日波动率
]
 
# 计算特征
feature_data = D.features(
    instruments,
    fields=features,
    start_time='2020-01-01',
    end_time='2022-12-31'
)

2.3 Step 3: 模型训练

# 定义标签
label = 'Ref($close, 2)/Ref($close, 1) - 1'
 
# 划分数据集
train_data = feature_data['2020-01-01':'2021-12-31']
test_data = feature_data['2022-01-01':'2022-12-31']
 
# 训练模型
import lightgbm as lgb
 
model = lgb.LGBMRegressor(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1,
    random_state=42
)
 
model.fit(
    train_data[features].values,
    train_data[label].values
)
 
# 生成预测
predictions = model.predict(test_data[features].values)

2.4 Step 4: 策略定义

def topk_strategy(predictions, k=20):
    """
    Top-K投资组合策略
    
    参数:
        predictions: 预测分数
        k: 选择股票数量
    
    返回:
        weights: 股票权重
    """
    # 按预测分数排序
    sorted_predictions = predictions.sort_values(ascending=False)
    
    # 选择Top-K
    topk = sorted_predictions[:k]
    
    # 等权重
    weight = 1.0 / k
    
    # 分配权重
    weights = pd.Series(0, index=predictions.index)
    weights[topk.index] = weight
    
    return weights

2.5 Step 5: 配置 Executor

from qlib.backtest.executor import SimulatorExecutor
from qlib.backtest.backtest import backtest_executor
 
# Executor配置
executor_config = {
    'time_per_step': 'day',
    'generate_portfolio_metrics': True
}
 
# 交易所配置
exchange = {
    'freq': 'day',
    'limit_threshold': 0.095,
    'deal_price': 'close',
    'open_cost': 0.0005,  # 买入费率万分之五
    'close_cost': 0.0015,  # 卖出费率千分之1.5
    'min_cost': 5  # 最低手续费5元
}
 
# 创建Executor
executor = SimulatorExecutor(
    exchange=exchange,
    **executor_config
)

2.6 Step 6: 执行回测

from qlib.backtest import backtest
 
# 执行回测
portfolio_metrics, indicators = backtest(
    executor=executor,
    strategy=lambda x: topk_strategy(x, k=20),
    test_data=test_data
)
 
# 获取结果
print("回测结果:")
print(f"总收益率: {portfolio_metrics['return']:.4f}")
print(f"年化收益率: {portfolio_metrics['annualized_return']:.4f}")
print(f"夏普比率: {indicators['sharpe_ratio']:.4f}")
print(f"最大回撤: {indicators['max_drawdown']:.4f}")

2.7 Step 7: 分析结果

import matplotlib.pyplot as plt
 
# 绘制累计收益曲线
plt.figure(figsize=(12, 6))
plt.plot(portfolio_metrics['cumulative_return'].index,
         portfolio_metrics['cumulative_return'].values,
         label='Strategy')
plt.title('Cumulative Return')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
 
# 绘制回撤曲线
plt.figure(figsize=(12, 6))
plt.plot(indicators['drawdown'].index,
         indicators['drawdown'].values,
         color='red')
plt.fill_between(indicators['drawdown'].index,
                 indicators['drawdown'].values, 0,
                 alpha=0.3, color='red')
plt.title('Drawdown')
plt.xlabel('Date')
plt.ylabel('Drawdown')
plt.grid(True, alpha=0.3)
plt.show()

3. 完整回测示例

import qlib
from qlib.constant import REG_CN
from qlib.data import D
from qlib.backtest.executor import SimulatorExecutor
from qlib.backtest import backtest
import lightgbm as lgb
import pandas as pd
import numpy as np
 
# 1. 初始化Qlib
qlib.init(provider_uri='~/.qlib/qlib_data/cn_data', region=REG_CN)
 
# 2. 获取数据
instruments = qlib.get_instruments('csi300')
 
# 3. 定义特征
features = [
    '$close',
    'Ref($close, 1)/Ref($close, 0) - 1',
    'Mean($close, 5)',
    'Std($close, 20)'
]
 
# 4. 获取数据
data = D.features(
    instruments,
    fields=features,
    start_time='2020-01-01',
    end_time='2022-12-31'
)
 
# 5. 划分数据集
train_data = data['2020-01-01':'2021-12-31']
test_data = data['2022-01-01':'2022-12-31']
 
# 6. 训练模型
model = lgb.LGBMRegressor(
    n_estimators=100,
    max_depth=6,
    learning_rate=0.1
)
model.fit(train_data[features], train_data['$close'])
 
# 7. 生成预测
predictions = model.predict(test_data[features])
predictions = pd.Series(predictions, index=test_data.index)
 
# 8. 定义策略
def strategy(pred):
    sorted_pred = pred.sort_values(ascending=False)
    top20 = sorted_pred[:20]
    weights = pd.Series(0, index=pred.index)
    weights[top20.index] = 1.0/20
    return weights
 
# 9. 配置Executor
exchange = {
    'freq': 'day',
    'limit_threshold': 0.095,
    'deal_price': 'close',
    'open_cost': 0.0005,
    'close_cost': 0.0015,
    'min_cost': 5
}
 
executor = SimulatorExecutor(exchange=exchange)
 
# 10. 执行回测
portfolio_metrics, indicators = backtest(
    executor=executor,
    strategy=strategy,
    test_data=test_data
)
 
# 11. 输出结果
print("回测完成!")
print(f"总收益率: {portfolio_metrics['return']:.4f}")
print(f"年化收益率: {portfolio_metrics['annualized_return']:.4f}")
print(f"夏普比率: {indicators['sharpe_ratio']:.4f}")
print(f"最大回撤: {indicators['max_drawdown']:.4f}")

4. 实践建议

4.1 回测原则

  1. 简单开始

    • 从简单策略开始
    • 逐步增加复杂度
    • 理解每个环节
  2. 严格验证

    • 使用样本外验证
    • 多时间段验证
    • 成本敏感性分析
  3. 风险控制

    • 设置止损机制
    • 分散投资
    • 限制单股权重

4.2 常见错误

  1. 未来函数

    • 使用未来数据
    • 数据泄露
    • 解决方案:检查数据对齐
  2. 成本低估

    • 忽略交易成本
    • 滑点和市场冲击
    • 解决方案:使用合理的成本参数
  3. 过拟合

    • 过度优化参数
    • 样本内表现好,样本外差
    • 解决方案:样本外验证

4.3 最佳实践

# ✅ 正确做法
 
# 1. 使用样本外验证
train_data = data['2020':'2021']
test_data = data['2022']
 
# 2. 使用合理的成本参数
exchange = {
    'open_cost': 0.0005,
    'close_cost': 0.0015,
    'min_cost': 5
}
 
# 3. 分散投资
def strategy(pred):
    top30 = pred.nlargest(30)
    weights = pd.Series(0, index=pred.index)
    weights[top30.index] = 1.0/30
    return weights
 
# 4. 风险控制
max_weight = 0.1
weights = weights.clip(upper=max_weight)
weights = weights / weights.sum()
 
# ❌ 错误做法
 
# 1. 使用全部数据训练和测试
model = Model()
model.fit(data, labels)  # 使用全部数据
predictions = model.predict(data)  # 在相同数据上预测
 
# 2. 忽略交易成本
exchange = {
    'open_cost': 0,
    'close_cost': 0
}
 
# 3. 集中投资
def strategy(pred):
    top5 = pred.nlargest(5)  # 只选5只
    weights = pd.Series(0, index=pred.index)
    weights[top5.index] = 1.0/5
    return weights  # 单股权重20%

总结

完整的回测流程包括:

  1. 数据准备:加载和清洗数据
  2. 特征工程:计算特征和标签
  3. 模型训练:训练预测模型
  4. 策略定义:定义交易策略
  5. 回测配置:配置Executor
  6. 执行回测:运行回测
  7. 分析结果:评估策略表现

建议

  • 从简单策略开始
  • 严格验证
  • 控制风险
  • 避免常见错误