回测流程与实践
1. Qlib 回测框架
1.1 回测架构
# 1. 数据准备
data = qlib.data.get_data(start_date, end_date)
# 2. 模型训练
model = train_model(train_data)
# 3. 策略定义
strategy = Strategy(model)
# 4. 回测配置
config = {
'executor': SimulatorExecutor(...),
'account': 1000000,
'benchmark': 'SPY'
}
# 5. 执行回测
result = qlib.backtest.run(
strategy=strategy,
data=data,
config=config
)
# 6. 分析结果
metrics = qlib.backtest.analyze(result)1.2 Qlib 回测流程
1. 初始化Qlib
↓
2. 加载数据
↓
3. 训练模型
↓
4. 定义策略
↓
5. 配置Executor
↓
6. 执行回测
↓
7. 分析结果
↓
8. 生成报告
2. 完整回测步骤
2.1 Step 1: 数据准备
import qlib
from qlib.constant import REG_CN
# 初始化Qlib
qlib.init(provider_uri='~/.qlib/qlib_data/cn_data', region=REG_CN)
# 获取股票列表
instruments = qlib.get_instruments('csi300')
# 获取数据
from qlib.data import D
data = D.features(
instruments,
fields=['$close', '$volume', '$factor'],
start_time='2020-01-01',
end_time='2022-12-31'
)2.2 Step 2: 特征工程
# 定义特征
features = [
'$close',
'$volume',
'Ref($close, 1)/Ref($close, 0) - 1', # 收益率
'Mean($close, 5)', # 5日均线
'Std($close, 20)' # 20日波动率
]
# 计算特征
feature_data = D.features(
instruments,
fields=features,
start_time='2020-01-01',
end_time='2022-12-31'
)2.3 Step 3: 模型训练
# 定义标签
label = 'Ref($close, 2)/Ref($close, 1) - 1'
# 划分数据集
train_data = feature_data['2020-01-01':'2021-12-31']
test_data = feature_data['2022-01-01':'2022-12-31']
# 训练模型
import lightgbm as lgb
model = lgb.LGBMRegressor(
n_estimators=100,
max_depth=6,
learning_rate=0.1,
random_state=42
)
model.fit(
train_data[features].values,
train_data[label].values
)
# 生成预测
predictions = model.predict(test_data[features].values)2.4 Step 4: 策略定义
def topk_strategy(predictions, k=20):
"""
Top-K投资组合策略
参数:
predictions: 预测分数
k: 选择股票数量
返回:
weights: 股票权重
"""
# 按预测分数排序
sorted_predictions = predictions.sort_values(ascending=False)
# 选择Top-K
topk = sorted_predictions[:k]
# 等权重
weight = 1.0 / k
# 分配权重
weights = pd.Series(0, index=predictions.index)
weights[topk.index] = weight
return weights2.5 Step 5: 配置 Executor
from qlib.backtest.executor import SimulatorExecutor
from qlib.backtest.backtest import backtest_executor
# Executor配置
executor_config = {
'time_per_step': 'day',
'generate_portfolio_metrics': True
}
# 交易所配置
exchange = {
'freq': 'day',
'limit_threshold': 0.095,
'deal_price': 'close',
'open_cost': 0.0005, # 买入费率万分之五
'close_cost': 0.0015, # 卖出费率千分之1.5
'min_cost': 5 # 最低手续费5元
}
# 创建Executor
executor = SimulatorExecutor(
exchange=exchange,
**executor_config
)2.6 Step 6: 执行回测
from qlib.backtest import backtest
# 执行回测
portfolio_metrics, indicators = backtest(
executor=executor,
strategy=lambda x: topk_strategy(x, k=20),
test_data=test_data
)
# 获取结果
print("回测结果:")
print(f"总收益率: {portfolio_metrics['return']:.4f}")
print(f"年化收益率: {portfolio_metrics['annualized_return']:.4f}")
print(f"夏普比率: {indicators['sharpe_ratio']:.4f}")
print(f"最大回撤: {indicators['max_drawdown']:.4f}")2.7 Step 7: 分析结果
import matplotlib.pyplot as plt
# 绘制累计收益曲线
plt.figure(figsize=(12, 6))
plt.plot(portfolio_metrics['cumulative_return'].index,
portfolio_metrics['cumulative_return'].values,
label='Strategy')
plt.title('Cumulative Return')
plt.xlabel('Date')
plt.ylabel('Cumulative Return')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()
# 绘制回撤曲线
plt.figure(figsize=(12, 6))
plt.plot(indicators['drawdown'].index,
indicators['drawdown'].values,
color='red')
plt.fill_between(indicators['drawdown'].index,
indicators['drawdown'].values, 0,
alpha=0.3, color='red')
plt.title('Drawdown')
plt.xlabel('Date')
plt.ylabel('Drawdown')
plt.grid(True, alpha=0.3)
plt.show()3. 完整回测示例
import qlib
from qlib.constant import REG_CN
from qlib.data import D
from qlib.backtest.executor import SimulatorExecutor
from qlib.backtest import backtest
import lightgbm as lgb
import pandas as pd
import numpy as np
# 1. 初始化Qlib
qlib.init(provider_uri='~/.qlib/qlib_data/cn_data', region=REG_CN)
# 2. 获取数据
instruments = qlib.get_instruments('csi300')
# 3. 定义特征
features = [
'$close',
'Ref($close, 1)/Ref($close, 0) - 1',
'Mean($close, 5)',
'Std($close, 20)'
]
# 4. 获取数据
data = D.features(
instruments,
fields=features,
start_time='2020-01-01',
end_time='2022-12-31'
)
# 5. 划分数据集
train_data = data['2020-01-01':'2021-12-31']
test_data = data['2022-01-01':'2022-12-31']
# 6. 训练模型
model = lgb.LGBMRegressor(
n_estimators=100,
max_depth=6,
learning_rate=0.1
)
model.fit(train_data[features], train_data['$close'])
# 7. 生成预测
predictions = model.predict(test_data[features])
predictions = pd.Series(predictions, index=test_data.index)
# 8. 定义策略
def strategy(pred):
sorted_pred = pred.sort_values(ascending=False)
top20 = sorted_pred[:20]
weights = pd.Series(0, index=pred.index)
weights[top20.index] = 1.0/20
return weights
# 9. 配置Executor
exchange = {
'freq': 'day',
'limit_threshold': 0.095,
'deal_price': 'close',
'open_cost': 0.0005,
'close_cost': 0.0015,
'min_cost': 5
}
executor = SimulatorExecutor(exchange=exchange)
# 10. 执行回测
portfolio_metrics, indicators = backtest(
executor=executor,
strategy=strategy,
test_data=test_data
)
# 11. 输出结果
print("回测完成!")
print(f"总收益率: {portfolio_metrics['return']:.4f}")
print(f"年化收益率: {portfolio_metrics['annualized_return']:.4f}")
print(f"夏普比率: {indicators['sharpe_ratio']:.4f}")
print(f"最大回撤: {indicators['max_drawdown']:.4f}")4. 实践建议
4.1 回测原则
-
简单开始
- 从简单策略开始
- 逐步增加复杂度
- 理解每个环节
-
严格验证
- 使用样本外验证
- 多时间段验证
- 成本敏感性分析
-
风险控制
- 设置止损机制
- 分散投资
- 限制单股权重
4.2 常见错误
-
未来函数
- 使用未来数据
- 数据泄露
- 解决方案:检查数据对齐
-
成本低估
- 忽略交易成本
- 滑点和市场冲击
- 解决方案:使用合理的成本参数
-
过拟合
- 过度优化参数
- 样本内表现好,样本外差
- 解决方案:样本外验证
4.3 最佳实践
# ✅ 正确做法
# 1. 使用样本外验证
train_data = data['2020':'2021']
test_data = data['2022']
# 2. 使用合理的成本参数
exchange = {
'open_cost': 0.0005,
'close_cost': 0.0015,
'min_cost': 5
}
# 3. 分散投资
def strategy(pred):
top30 = pred.nlargest(30)
weights = pd.Series(0, index=pred.index)
weights[top30.index] = 1.0/30
return weights
# 4. 风险控制
max_weight = 0.1
weights = weights.clip(upper=max_weight)
weights = weights / weights.sum()
# ❌ 错误做法
# 1. 使用全部数据训练和测试
model = Model()
model.fit(data, labels) # 使用全部数据
predictions = model.predict(data) # 在相同数据上预测
# 2. 忽略交易成本
exchange = {
'open_cost': 0,
'close_cost': 0
}
# 3. 集中投资
def strategy(pred):
top5 = pred.nlargest(5) # 只选5只
weights = pd.Series(0, index=pred.index)
weights[top5.index] = 1.0/5
return weights # 单股权重20%总结
完整的回测流程包括:
- 数据准备:加载和清洗数据
- 特征工程:计算特征和标签
- 模型训练:训练预测模型
- 策略定义:定义交易策略
- 回测配置:配置Executor
- 执行回测:运行回测
- 分析结果:评估策略表现
建议:
- 从简单策略开始
- 严格验证
- 控制风险
- 避免常见错误