FPGA 高頻交易模擬測試範例
1. 簡單的訂單匹配引擎 (Verilog)
order_matcher.v - 基本訂單匹配邏輯
// 簡化的訂單匹配引擎
module order_matcher #(
parameter PRICE_WIDTH = 32,
parameter QTY_WIDTH = 16,
parameter ID_WIDTH = 16
)(
input wire clk,
input wire rst,
// 新訂單輸入
input wire order_valid,
input wire order_is_buy, // 1=買單, 0=賣單
input wire [PRICE_WIDTH-1:0] order_price,
input wire [QTY_WIDTH-1:0] order_qty,
input wire [ID_WIDTH-1:0] order_id,
// 匹配輸出
output reg match_valid,
output reg [ID_WIDTH-1:0] match_buy_id,
output reg [ID_WIDTH-1:0] match_sell_id,
output reg [PRICE_WIDTH-1:0] match_price,
output reg [QTY_WIDTH-1:0] match_qty,
// 性能計數器
output reg [31:0] total_orders,
output reg [31:0] total_matches,
output reg [31:0] cycle_counter
);
// 簡化的訂單簿(實際應該用 CAM 或更複雜的結構)
reg [PRICE_WIDTH-1:0] best_bid_price;
reg [QTY_WIDTH-1:0] best_bid_qty;
reg [ID_WIDTH-1:0] best_bid_id;
reg bid_valid;
reg [PRICE_WIDTH-1:0] best_ask_price;
reg [QTY_WIDTH-1:0] best_ask_qty;
reg [ID_WIDTH-1:0] best_ask_id;
reg ask_valid;
// 延遲計數(模擬處理延遲)
reg [7:0] processing_cycles;
always @(posedge clk) begin
if (rst) begin
match_valid <= 0;
bid_valid <= 0;
ask_valid <= 0;
total_orders <= 0;
total_matches <= 0;
cycle_counter <= 0;
processing_cycles <= 0;
end else begin
cycle_counter <= cycle_counter + 1;
match_valid <= 0;
if (order_valid) begin
total_orders <= total_orders + 1;
processing_cycles <= processing_cycles + 1;
if (order_is_buy) begin
// 買單邏輯
if (ask_valid && order_price >= best_ask_price) begin
// 匹配成功
match_valid <= 1;
match_buy_id <= order_id;
match_sell_id <= best_ask_id;
match_price <= best_ask_price;
match_qty <= (order_qty < best_ask_qty) ? order_qty : best_ask_qty;
total_matches <= total_matches + 1;
// 更新訂單簿
if (order_qty >= best_ask_qty) begin
ask_valid <= 0;
end else begin
best_ask_qty <= best_ask_qty - order_qty;
end
end else begin
// 加入訂單簿
if (!bid_valid || order_price > best_bid_price) begin
best_bid_price <= order_price;
best_bid_qty <= order_qty;
best_bid_id <= order_id;
bid_valid <= 1;
end
end
end else begin
// 賣單邏輯
if (bid_valid && order_price <= best_bid_price) begin
// 匹配成功
match_valid <= 1;
match_buy_id <= best_bid_id;
match_sell_id <= order_id;
match_price <= best_bid_price;
match_qty <= (order_qty < best_bid_qty) ? order_qty : best_bid_qty;
total_matches <= total_matches + 1;
// 更新訂單簿
if (order_qty >= best_bid_qty) begin
bid_valid <= 0;
end else begin
best_bid_qty <= best_bid_qty - order_qty;
end
end else begin
// 加入訂單簿
if (!ask_valid || order_price < best_ask_price) begin
best_ask_price <= order_price;
best_ask_qty <= order_qty;
best_ask_id <= order_id;
ask_valid <= 1;
end
end
end
end
end
end
endmodule
market_data_decoder.v - 市場數據解碼器
// 簡化的市場數據解碼器(模擬 ITCH 協議)
module market_data_decoder (
input wire clk,
input wire rst,
// 輸入數據流
input wire [7:0] data_in,
input wire data_valid,
// 解碼輸出
output reg msg_valid,
output reg [7:0] msg_type,
output reg [31:0] timestamp,
output reg [31:0] price,
output reg [15:0] quantity,
output reg [15:0] order_id,
// 性能指標
output reg [31:0] messages_decoded,
output reg [31:0] decode_cycles
);
// 狀態機
localparam IDLE = 0, HEADER = 1, PAYLOAD = 2;
reg [1:0] state;
reg [7:0] byte_counter;
reg [255:0] buffer; // 訊息緩衝區
always @(posedge clk) begin
if (rst) begin
state <= IDLE;
msg_valid <= 0;
messages_decoded <= 0;
decode_cycles <= 0;
end else begin
decode_cycles <= decode_cycles + 1;
msg_valid <= 0;
case (state)
IDLE: begin
if (data_valid) begin
buffer[7:0] <= data_in;
byte_counter <= 1;
state <= HEADER;
end
end
HEADER: begin
if (data_valid) begin
buffer[byte_counter*8 +: 8] <= data_in;
byte_counter <= byte_counter + 1;
if (byte_counter >= 4) begin // 假設固定長度標頭
msg_type <= buffer[7:0];
timestamp <= buffer[39:8];
state <= PAYLOAD;
end
end
end
PAYLOAD: begin
if (data_valid) begin
buffer[byte_counter*8 +: 8] <= data_in;
byte_counter <= byte_counter + 1;
if (byte_counter >= 16) begin // 假設固定長度訊息
price <= buffer[71:40];
quantity <= buffer[87:72];
order_id <= buffer[103:88];
msg_valid <= 1;
messages_decoded <= messages_decoded + 1;
state <= IDLE;
end
end
end
endcase
end
end
endmodule
2. C++ 測試平臺
tb_hft_system.cpp - 高頻交易系統測試
#include <verilated.h>
#include <verilated_vcd_c.h>
#include "Vorder_matcher.h"
#include <iostream>
#include <fstream>
#include <vector>
#include <chrono>
#include <random>
class HFTSimulator {
private:
Vorder_matcher* dut;
VerilatedVcdC* tfp;
vluint64_t sim_time;
// 性能統計
struct Stats {
uint64_t total_orders = 0;
uint64_t total_matches = 0;
uint64_t total_cycles = 0;
double avg_latency = 0;
std::vector<uint32_t> latency_histogram;
} stats;
// 測試數據
struct Order {
bool is_buy;
uint32_t price;
uint16_t quantity;
uint16_t id;
uint64_t timestamp;
};
std::vector<Order> test_orders;
std::mt19937 rng;
public:
HFTSimulator() : sim_time(0), rng(std::chrono::steady_clock::now().time_since_epoch().count()) {
dut = new Vorder_matcher;
// 設置波形追蹤
Verilated::traceEverOn(true);
tfp = new VerilatedVcdC;
dut->trace(tfp, 99);
tfp->open("hft_simulation.vcd");
// 初始化
dut->clk = 0;
dut->rst = 1;
dut->order_valid = 0;
}
~HFTSimulator() {
tfp->close();
delete tfp;
delete dut;
}
// 生成測試訂單
void generateTestOrders(size_t count) {
std::uniform_int_distribution<> price_dist(9900, 10100); // 價格範圍
std::uniform_int_distribution<> qty_dist(100, 1000); // 數量範圍
std::bernoulli_distribution buy_dist(0.5); // 買賣機率
for (size_t i = 0; i < count; i++) {
Order order;
order.is_buy = buy_dist(rng);
order.price = price_dist(rng);
order.quantity = qty_dist(rng);
order.id = i;
order.timestamp = sim_time;
test_orders.push_back(order);
}
std::cout << "Generated " << count << " test orders" << std::endl;
}
// 載入歷史市場數據
void loadMarketData(const std::string& filename) {
std::ifstream file(filename);
if (!file.is_open()) {
std::cerr << "Cannot open market data file: " << filename << std::endl;
return;
}
std::string line;
while (std::getline(file, line)) {
// 解析市場數據格式(CSV)
// timestamp,side,price,quantity
// 實作省略
}
}
// 運行單一時鐘週期
void tick() {
dut->clk = 0;
dut->eval();
tfp->dump(sim_time++);
dut->clk = 1;
dut->eval();
tfp->dump(sim_time++);
}
// 送出訂單
void sendOrder(const Order& order) {
dut->order_valid = 1;
dut->order_is_buy = order.is_buy;
dut->order_price = order.price;
dut->order_qty = order.quantity;
dut->order_id = order.id;
tick();
dut->order_valid = 0;
// 等待處理
for (int i = 0; i < 5; i++) {
tick();
if (dut->match_valid) {
stats.total_matches++;
uint32_t latency = sim_time - order.timestamp;
if (latency < stats.latency_histogram.size()) {
stats.latency_histogram[latency]++;
}
break;
}
}
}
// 運行模擬
void runSimulation() {
std::cout << "\n=== Starting HFT Simulation ===" << std::endl;
// Reset
dut->rst = 1;
for (int i = 0; i < 10; i++) tick();
dut->rst = 0;
// 記錄開始時間
auto start_time = std::chrono::high_resolution_clock::now();
// 送出所有測試訂單
for (const auto& order : test_orders) {
sendOrder(order);
// 模擬訂單間隔(微秒級)
for (int i = 0; i < 10; i++) tick();
}
// 記錄結束時間
auto end_time = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(end_time - start_time);
// 收集統計資料
stats.total_orders = dut->total_orders;
stats.total_matches = dut->total_matches;
stats.total_cycles = dut->cycle_counter;
// 輸出結果
printResults(duration.count());
}
// 輸出結果
void printResults(long long sim_duration_ms) {
std::cout << "\n=== Simulation Results ===" << std::endl;
std::cout << "Simulation duration: " << sim_duration_ms << " ms" << std::endl;
std::cout << "Total orders processed: " << stats.total_orders << std::endl;
std::cout << "Total matches: " << stats.total_matches << std::endl;
std::cout << "Match rate: " << (100.0 * stats.total_matches / stats.total_orders) << "%" << std::endl;
std::cout << "Total cycles: " << stats.total_cycles << std::endl;
// 假設 FPGA 運行在 200 MHz
double fpga_freq = 200e6; // Hz
double fpga_time = stats.total_cycles / fpga_freq;
std::cout << "\nEstimated FPGA execution time: " << (fpga_time * 1e6) << " μs" << std::endl;
std::cout << "Estimated throughput: " << (stats.total_orders / fpga_time / 1e6) << " M orders/sec" << std::endl;
// 延遲分析
if (stats.total_matches > 0) {
double avg_match_cycles = (double)stats.total_cycles / stats.total_matches;
double avg_latency_ns = avg_match_cycles * (1e9 / fpga_freq);
std::cout << "Average match latency: " << avg_latency_ns << " ns" << std::endl;
}
}
};
int main(int argc, char** argv) {
Verilated::commandArgs(argc, argv);
HFTSimulator sim;
// 生成測試數據
sim.generateTestOrders(10000);
// 運行模擬
sim.runSimulation();
return 0;
}
3. 進階測試 Makefile
Makefile
# HFT FPGA Simulation Makefile
VERILATOR = verilator
VERILATOR_ROOT ?= $(HOME)/.mybin/verilator/share/verilator
# 編譯選項
CXX = g++
CXXFLAGS = -Wall -O3 -std=c++17 -I$(VERILATOR_ROOT)/include -I./obj_dir
LDFLAGS = -lpthread
# Verilator 選項(優化性能)
VFLAGS = --cc --trace --exe --build
VFLAGS += -O3 --x-assign fast --x-initial fast
VFLAGS += --threads 4 # 多執行緒加速
# 檔案
VERILOG_SOURCES = order_matcher.v market_data_decoder.v
CPP_SOURCES = tb_hft_system.cpp
# 目標
all: sim
# 編譯
compile:
$(VERILATOR) $(VFLAGS) \
--top-module order_matcher \
$(VERILOG_SOURCES) \
$(CPP_SOURCES) \
-o hft_sim
# 運行模擬
sim: compile
@echo "=== Running HFT Simulation ==="
time ./obj_dir/hft_sim
@echo "=== Simulation Complete ==="
# 性能分析
perf: compile
perf record -g ./obj_dir/hft_sim
perf report
# 延遲分析
latency:
$(VERILATOR) $(VFLAGS) --prof-cfunc \
--top-module order_matcher \
$(VERILOG_SOURCES) \
$(CPP_SOURCES) \
-o hft_sim_prof
./obj_dir/hft_sim_prof
verilator_profcfunc profiling.dat > latency_report.txt
@echo "Latency report saved to latency_report.txt"
# 波形分析
wave: sim
gtkwave hft_simulation.vcd &
# 批量測試
benchmark:
@echo "=== Running Benchmark Suite ==="
@for size in 1000 10000 100000; do \
echo "Testing with $$size orders..."; \
./obj_dir/hft_sim --orders $$size; \
done
clean:
rm -rf obj_dir *.vcd *.dat *.txt
.PHONY: all compile sim perf latency wave benchmark clean
4. 性能測試腳本
benchmark_hft.sh
#!/bin/bash
echo "======================================"
echo " HFT FPGA Simulation Benchmark"
echo "======================================"
# 編譯
echo "Building simulation..."
make clean
make compile
# 不同場景測試
echo -e "\n--- Test 1: Normal Trading ---"
./obj_dir/hft_sim --orders 10000 --volatility low
echo -e "\n--- Test 2: High Volatility ---"
./obj_dir/hft_sim --orders 10000 --volatility high
echo -e "\n--- Test 3: Flash Crash Scenario ---"
./obj_dir/hft_sim --orders 10000 --scenario flash_crash
echo -e "\n--- Test 4: Maximum Throughput ---"
./obj_dir/hft_sim --orders 100000 --no-delay
# 延遲分析
echo -e "\n--- Latency Analysis ---"
make latency
cat latency_report.txt | grep "Average"
# 生成報告
echo -e "\n--- Generating Report ---"
cat > performance_report.md << EOF
# HFT FPGA Simulation Performance Report
## Test Configuration
- Date: $(date)
- Verilator Version: $(verilator --version)
- CPU: $(lscpu | grep "Model name" | cut -d: -f2)
## Results Summary
$(tail -n 20 latency_report.txt)
## Recommendations
- Consider using FST format instead of VCD for large simulations
- Enable multi-threading for better performance
- Use coverage analysis to ensure all paths are tested
EOF
echo "Report saved to performance_report.md"
5. 實際應用建議
適合模擬的場景
- 演算法驗證 - 確認交易邏輯正確性
- 回測系統 - 使用歷史數據測試策略
- 風險分析 - 評估極端市場條件
- 協議開發 - 測試自定義協議實現
不適合的場景
- 實時交易 - 模擬速度太慢
- 精確延遲測量 - 軟體模擬無法準確反映硬體延遲
- 網路測試 - 需要專門的網路模擬器
性能優化技巧
// 1. 使用 FST 格式替代 VCD(更快更小)
#include <verilated_fst_c.h>
VerilatedFstC* tfp = new VerilatedFstC;
// 2. 條件性追蹤
if (enable_trace && critical_event) {
tfp->dump(sim_time);
}
// 3. 批量處理
for (int i = 0; i < 1000; i++) {
// 處理多個訂單後再更新波形
}
tfp->dump(sim_time);
6. 與實際 FPGA 的差異
| 特性 | Verilator 模擬 | 實際 FPGA |
|---|---|---|
| 處理延遲 | 微秒-毫秒級 | 奈秒級 |
| 吞吐量 | 千筆/秒 | 百萬筆/秒 |
| 時序準確性 | 週期準確 | 時序準確 |
| 並行處理 | 模擬並行 | 真實並行 |
| 功耗模擬 | 不支援 | 實際功耗 |
7. 實測結果與驗證
7.1 測試環境
- 測試平臺: Ubuntu Linux
- Verilator 版本: 5.040
- 測試時間: 2025-09-28
7.2 實際執行結果
$ cd /home/shihyu/github/jason_note/src/embedded_systems/src/hft_fpga_simulation
$ export PATH=$HOME/.mybin/verilator/bin:$PATH
$ make run
=== Compiling HFT Order Matcher ===
=== Running HFT Simulation ===
=== Simple HFT Order Matching Test ===
Resetting system...
Reset complete
Test 1: Basic matching
Match! Buy ID: 2 Sell ID: 1 Price: 100 Qty: 5
Test 2: Price mismatch
[No match as expected]
Test 3: Batch orders
[40 matches total from 104 orders]
=== Simulation Statistics ===
Orders sent: 104
Matches received: 40
Match rate: 38.4615%
Total orders (DUT): 104
Total matches (DUT): 40
Cycles executed: 182
Estimated FPGA Performance:
@ 200MHz clock frequency
Execution time: 0.91 μs
Throughput: 114.286 M orders/sec
Avg match latency: 22.75 ns
Simulation completed successfully!
7.3 快速開始指令
# 1. 進入測試目錄
cd /home/shihyu/github/jason_note/src/embedded_systems/src/hft_fpga_simulation
# 2. 設定環境變數
export PATH=$HOME/.mybin/verilator/bin:$PATH
export VERILATOR_ROOT=$HOME/.mybin/verilator/share/verilator
# 3. 編譯並執行
make
# 4. 查看波形
make wave
# 5. 清理檔案
make clean
7.4 簡化的 Makefile (實際使用版本)
# 簡潔的 HFT FPGA 模擬 Makefile
# Verilator 命令
VERILATOR = verilator
# 編譯器
CXX = g++
CXXFLAGS = -O2 -std=c++11
# 頂層模組
TOP = order_matcher
# 源檔案
VERILOG_SOURCES = order_matcher.v
CPP_SOURCES = tb_hft_simple.cpp
# 預設目標
all: run
# 編譯
verilate:
@echo "=== Compiling HFT Order Matcher ==="
$(VERILATOR) --cc --trace --exe --build \
$(VERILOG_SOURCES) \
$(CPP_SOURCES) \
--top-module $(TOP) \
-o hft_sim
# 運行
run: verilate
@echo "=== Running HFT Simulation ==="
./obj_dir/hft_sim
@echo "=== Simulation Complete ==="
# 查看波形
wave: run
@echo "=== Opening waveform ==="
gtkwave hft_sim.vcd &
# 清理
clean:
rm -rf obj_dir *.vcd
# 幫助
help:
@echo "HFT FPGA Simulation Makefile"
@echo "Commands:"
@echo " make - Compile and run"
@echo " make wave - Run and view waveform"
@echo " make clean - Clean all generated files"
@echo " make help - Show this help"
.PHONY: all verilate run wave clean help
總結
Verilator 適合用於 HFT 系統的功能驗證和演算法開發,但不能替代實際 FPGA 的性能測試。本範例展示瞭如何使用簡化的 Makefile 和測試平臺快速驗證 FPGA 設計。建議將其作為開發流程的一部分,在部署到實際 FPGA 前進行充分的模擬測試。