u@home:~$

CPU7

The goal is to learn from chiplab project, including how to use verilator to establish a developing and testing framework, how the chiplab cpu’s modules are orgnized, and how to use AXI and ABP bus protocol to connect with memory and peripherial devices.

Step 1:

Move and rename the chiplab project to other directory and compile it with minimal components.


Define CPU7_HOME variable in ~/.bashrc

export CPU7_HOME=~/prjs/cpu7

sims/verilator/run_func/Makefile

include ../../../chip/config-generator.mak

config-generator.mak -> ../../../chip/config-generator.mak

toolchains directory is necessary for running any tests. loongarch32-unknown-elf-objdump is needed when converting binary into ram file.

./script.sh: line 21: ../../../toolchains/loongarch32_gnu/install/bin/loongarch32-unknown-elf-objdump: No such file or directory


verilator 做测试框架有点不方便,测试代码是用loongarch gcc编译后转成内存文件然后模拟的。

这种方式测已经写好的cpu还挺好的,得到的trace结果和qemu的trace结果做比较,看看有什么不同。

但我想从头写指令一条一条的写测试用例就不是很方便。时候我不想带个qemu去生成golden trace,维护整个这套流程牵扯的代码太多了。

想为每条指令写测试例子,可以自己再手写个正确的log,或者就用qemu去生成,然后跟模拟生成的log做对比。这样不需要改testbench。

我还是想按之前的方式,用verilog的testbench写测试用例。用modelsim模拟,用quartus ii综合生成bitstream。

还有一个原因,就是模拟的这套cpu接口是简化了的。

chiplab有一套测试用的框架,包括CpuAxi, CpuRam,用c++代码模拟内存。

可以从verilator编译时给出的cpu接口看到,


module simu_top
#(
    `ifdef AXI128
        parameter   DATA_WIDTH = 128, 
    `elsif AXI64
        parameter   DATA_WIDTH = 64, 
    `else
        parameter   DATA_WIDTH = 32, 
    `endif


    `ifdef ADDR64
        parameter   BUS_WIDTH  = 64,
        parameter   CPU_WIDTH  = 64
    `else
        parameter   BUS_WIDTH  = 32,
        parameter   CPU_WIDTH  = 32
    `endif


)(
    input                       aclk,
    input                       aresetn, 

    //input  [  7             :0] intrpt,
    input                       enable_delay,
    input  [  22            :0] random_seed,
    // ram 
    output                      ram_ren  ,
    output [BUS_WIDTH-1     :0] ram_raddr,
    input  [DATA_WIDTH-1    :0] ram_rdata,
    output [DATA_WIDTH/8-1  :0] ram_wen  ,
    output [BUS_WIDTH-1     :0] ram_waddr,
    output [DATA_WIDTH-1    :0] ram_wdata
    // debug
    
    ,
    output [CPU_WIDTH-1     :0] debug0_wb_pc      ,
    output                      debug0_wb_rf_wen  ,
    output [  4             :0] debug0_wb_rf_wnum ,
    output [CPU_WIDTH-1     :0] debug0_wb_rf_wdata
    
    `ifdef CPU_2CMT
    ,
    output [CPU_WIDTH-1     :0] debug1_wb_pc      ,
    output                      debug1_wb_rf_wen  ,
    output [  4             :0] debug1_wb_rf_wnum ,
    output [CPU_WIDTH-1     :0] debug1_wb_rf_wdata
    `endif
    
    `ifdef RAND_TEST
    ,
    output [`RAND_TEST_BUS_WD-1:0] rand_test_bus
    `endif

    ,

    inout             uart_rx,
    inout             uart_tx,

    output            uart_enab,
    output            uart_rw,
    output     [3 :0] uart_addr,
    output     [7 :0] uart_datai,

    output     [15:0] led,          
    output     [1 :0] led_rg0,      
    output     [1 :0] led_rg1,      
    output reg [7 :0] num_csn,      
    output reg [6 :0] num_a_g,      
    input      [7 :0] switch,       
    output     [3 :0] btn_key_col,  
    input      [3 :0] btn_key_row,  
    input      [1 :0] btn_step      


);

soc_top #(
    .BUS_WIDTH(BUS_WIDTH),
    .DATA_WIDTH(DATA_WIDTH), 
    .CPU_WIDTH(CPU_WIDTH)
)
    soc(
    .aclk        (aclk        ),
    .aresetn     (aresetn     ), 

    //.intrpt      (intrpt      ),
    .enable_delay(enable_delay),
    .random_seed (random_seed ),
    
    // ram
    .sram_ren  (ram_ren  ),
    .sram_raddr(ram_raddr),
    .sram_rdata(ram_rdata),
    .sram_wen  (ram_wen  ),
    .sram_waddr(ram_waddr),
    .sram_wdata(ram_wdata)

    ,
    .debug0_wb_pc      (debug0_wb_pc      ),// O, 64 
    .debug0_wb_rf_wen  (debug0_wb_rf_wen  ),// O, 4  
    .debug0_wb_rf_wnum (debug0_wb_rf_wnum ),// O, 5  
    .debug0_wb_rf_wdata(debug0_wb_rf_wdata) // O, 64 

    `ifdef CPU_2CMT
    ,
    .debug1_wb_pc      (debug1_wb_pc      ),// O, 64 
    .debug1_wb_rf_wen  (debug1_wb_rf_wen  ),// O, 4  
    .debug1_wb_rf_wnum (debug1_wb_rf_wnum ),// O, 5  
    .debug1_wb_rf_wdata(debug1_wb_rf_wdata) // O, 64 
    `endif
    ,
    .UART_RX             (uart_rx         ),
    .UART_TX             (uart_tx         ),

    //use for simulation
    .uart0_enab          (uart_enab       ),
    .uart0_rw            (uart_rw         ),
    .uart0_addr          (uart_addr       ),
    .uart0_datai         (uart_datai      ),

    // For confreg
    .led                 (led           ),
    .led_rg0             (led_rg0       ),
    .led_rg1             (led_rg1       ),
    .num_csn             (num_csn       ),
    .num_a_g             (num_a_g       ),
    .switch              (switch        ),
    .btn_key_col         (btn_key_col   ),
    .btn_key_row         (btn_key_row   ),
    .btn_step            (btn_step      )

);

`ifdef RAND_TEST
wire        cmtbus_valid0;
wire [3:0]  cmtbus_cmtnum0;

wire [3:0] commit_num;
wire       cmt_last_split;

assign cmtbus_valid0 = `CMTBUS_VALID0;
assign cmtbus_cmtnum0 = `CMTBUS_CMTNUM0;

`ifdef CPU_2CMT
wire        cmtbus_valid1;
wire [3:0]  cmtbus_cmtnum1;
assign cmtbus_valid1 = `CMTBUS_VALID1;
assign cmtbus_cmtnum1 = `CMTBUS_CMTNUM1;
assign commit_num = cmtbus_cmtnum0 + cmtbus_cmtnum1;
assign cmt_last_split = cmtbus_valid1 ? (cmtbus_cmtnum1 == 0) :
                        cmtbus_valid0 ? (cmtbus_cmtnum0 == 0) :
                        1'b0;

`else
assign commit_num = cmtbus_cmtnum0;
assign cmt_last_split = cmtbus_valid0 ? (cmtbus_cmtnum0 == 0) :
                        1'b0;

`endif
assign rand_test_bus ={
                        {28'b0,commit_num},
                        {31'b0,cmt_last_split},
                        {`CR_BADVADDR},
                        {`EXBUS_EPC},
                        {26'b0,`EXBUS_EXCODE},
                        {31'b0,`EXBUS_ERET},
                        {31'b0,`EXBUS_EX},
                        `GR_RTL[31],`GR_RTL[30],`GR_RTL[29],`GR_RTL[28],
                        `GR_RTL[27],`GR_RTL[26],`GR_RTL[25],`GR_RTL[24],
                        `GR_RTL[23],`GR_RTL[22],`GR_RTL[21],`GR_RTL[20],
                        `GR_RTL[19],`GR_RTL[18],`GR_RTL[17],`GR_RTL[16],
                        `GR_RTL[15],`GR_RTL[14],`GR_RTL[13],`GR_RTL[12],
                        `GR_RTL[11],`GR_RTL[10],`GR_RTL[ 9],`GR_RTL[ 8],
                        `GR_RTL[ 7],`GR_RTL[ 6],`GR_RTL[ 5],`GR_RTL[ 4],
                        `GR_RTL[ 3],`GR_RTL[ 2],`GR_RTL[ 1],`GR_RTL[ 0]
                         };



`endif
endmodule

simu_top是verilator编译时指定的模块入口。

SIMU_TOP_NAME=simu_top

...

verilator:${VERILATOR_SRC} 
	@echo "============================================================================================================="
	@echo "============================================================================================================="
	@echo "COMPILING verilog..."
	@echo "============================================================================================================="
	@echo "============================================================================================================="
	verilator ${VERILATOR_INCLUDE} ${WAVEOPTION} --savable --threads ${THREAD} -O3 -Wno-fatal -DSIMULATION=1 -Wall --trace -cc ${VFLAGS} ${SIMU_TOP_NAME}.v ${VERILATOR_SRC} 2>&1 | tee log/compile.log
	make -C ${OBJ_DIR} -f "V${SIMU_TOP_NAME}.mk"

内存接口是和ALTEA的1-port block ram接口差不多。


    // ram 
    output                      ram_ren  ,
    output [BUS_WIDTH-1     :0] ram_raddr,
    input  [DATA_WIDTH-1    :0] ram_rdata,
    output [DATA_WIDTH/8-1  :0] ram_wen  ,
    output [BUS_WIDTH-1     :0] ram_waddr,
    output [DATA_WIDTH-1    :0] ram_wdata

verilator testbench里的c++部分通过读内存文件满足这个接口。

里面的soc_top是一样的接口。

soc_top是在chip/soc_demo/sim/soc_top.v里定义的。


module soc_top#(
    parameter   BUS_WIDTH  = 32,
    parameter   DATA_WIDTH = 64, 
    parameter   CPU_WIDTH  = 32
)
(
    input  wire        aresetn           , 
    input  wire        aclk              ,
    input  wire        enable_delay      ,
    input  wire [22:0] random_seed       ,

    output wire [CPU_WIDTH-1:0] debug0_wb_pc      ,
    output wire [CPU_WIDTH-1:0] debug0_wb_rf_wdata,
    output wire                 debug0_wb_rf_wen  ,
    output wire [4 :0]          debug0_wb_rf_wnum ,

    `ifdef CPU_2CMT
    output wire [CPU_WIDTH-1:0] debug1_wb_pc      ,
    output wire [CPU_WIDTH-1:0] debug1_wb_rf_wdata,
    output wire                 debug1_wb_rf_wen  ,
    output wire [4 :0]          debug1_wb_rf_wnum ,
    `endif

    //------gpio----------------
    output [15:0] led,
    output [1 :0] led_rg0,
    output [1 :0] led_rg1,
    output [7 :0] num_csn,
    output [6 :0] num_a_g,
    input  [7 :0] switch, 
    output [3 :0] btn_key_col,
    input  [3 :0] btn_key_row,
    input  [1 :0] btn_step,

    //ram
    output wire [BUS_WIDTH -1:0] sram_raddr        ,
    input  wire [DATA_WIDTH-1:0] sram_rdata        ,
    output wire                  sram_ren          ,
    output wire [BUS_WIDTH -1:0] sram_waddr        ,
    output wire [DATA_WIDTH-1:0] sram_wdata        ,
    output wire [DATA_WIDTH/8-1:0] sram_wen        ,

    //use for simulation
    output         uart0_enab,
    output         uart0_rw,
    output [3 :0]  uart0_addr,
    output [7 :0]  uart0_datai,

    //------uart-------
    inout         UART_RX,
    inout         UART_TX
/*
    //------DDR3 interface------
    inout  [15:0] ddr3_dq,
    output [12:0] ddr3_addr,
    output [2 :0] ddr3_ba,
    output        ddr3_ras_n,
    output        ddr3_cas_n,
    output        ddr3_we_n,
    output        ddr3_odt,
    output        ddr3_reset_n,
    output        ddr3_cke,
    output [1:0]  ddr3_dm,
    inout  [1:0]  ddr3_dqs_p,
    inout  [1:0]  ddr3_dqs_n,
    output        ddr3_ck_p,
    output        ddr3_ck_n,

    //------mac controller-------
    //TX
    input         mtxclk_0,     
    output        mtxen_0,      
    output [3:0]  mtxd_0,       
    output        mtxerr_0,
    //RX
    input         mrxclk_0,      
    input         mrxdv_0,     
    input  [3:0]  mrxd_0,        
    input         mrxerr_0,
    input         mcoll_0,
    input         mcrs_0,
    // MIIM
    output        mdc_0,
    inout         mdio_0,
    
    output        phy_rstn,
 
    //------EJTAG-------
    input         EJTAG_TRST,
    input         EJTAG_TCK,
    input         EJTAG_TDI,
    input         EJTAG_TMS,
    output        EJTAG_TDO,

    //------nand-------
    output        NAND_CLE ,
    output        NAND_ALE ,
    input         NAND_RDY ,
    inout [7:0]   NAND_DATA,
    output        NAND_RD  ,
    output        NAND_CE  ,  //low active
    output        NAND_WR  ,  
       
    //------spi flash-------
    output        SPI_CLK,
    output        SPI_CS,
    inout         SPI_MISO,
    inout         SPI_MOSI
*/
);

可以看到soc的引脚应该是ddr3,uart,EJTAG,nand,spi flash这些。mac是网卡吗?

ram在soc的外面是为了模拟测试,模拟的是内存,而不是cache用的sram。

soc_top里面包含core_top,理解为一个核心,核心里面包括cache和processor core。

core_top cpu
(
    .intrpt            (interrupt         ),// I, 8  

    .aclk              (aclk              ),// I, 1  
    .aresetn           (aresetn           ),// I, 1  
    .arid              (cpu_arid          ),// O, 4  
    .araddr            (cpu_araddr        ),// O, 64 
    .arlen             (cpu_arlen         ),// O, 4  
    .arsize            (cpu_arsize        ),// O, 3  
    .arburst           (cpu_arburst       ),// O, 2  
    .arlock            (cpu_arlock        ),// O, 2  
    .arcache           (cpu_arcache       ),// O, 4  
    .arprot            (cpu_arprot        ),// O, 3  
    .arvalid           (cpu_arvalid       ),// O, 1  
    .arready           (cpu_arready       ),// I, 1  
    .awid              (cpu_awid          ),// O, 4  
    .awaddr            (cpu_awaddr        ),// O, 64 
    .awlen             (cpu_awlen         ),// O, 4  
    .awsize            (cpu_awsize        ),// O, 3  
    .awburst           (cpu_awburst       ),// O, 2  
    .awlock            (cpu_awlock        ),// O, 2  
    .awcache           (cpu_awcache       ),// O, 4  
    .awprot            (cpu_awprot        ),// O, 3  
    .awvalid           (cpu_awvalid       ),// O, 1  
    .awready           (cpu_awready       ),// I, 1  
    .rid               (cpu_rid           ),// I, 4  
    .rdata             (cpu_rdata         ),// I, 128
    .rresp             (cpu_rresp         ),// I, 2  
    .rlast             (cpu_rlast         ),// I, 1  
    .rvalid            (cpu_rvalid        ),// I, 1  
    .rready            (cpu_rready        ),// O, 1  
    .wid               (cpu_wid           ),// O, 4  
    .wdata             (cpu_wdata         ),// O, 128
    .wstrb             (cpu_wstrb         ),// O, 16 
    .wlast             (cpu_wlast         ),// O, 1  
    .wvalid            (cpu_wvalid        ),// O, 1  
    .wready            (cpu_wready        ),// I, 1  
    .bid               (cpu_bid           ),// I, 4  
    .bresp             (cpu_bresp         ),// I, 2  
    .bvalid            (cpu_bvalid        ),// I, 1  
    .bready            (cpu_bready        ) // O, 1  
    
    ,
    .debug0_wb_pc      (debug0_wb_pc      ),// O, 64 
    .debug0_wb_rf_wen  (debug0_wb_rf_wen  ),// O, 1  
    .debug0_wb_rf_wnum (debug0_wb_rf_wnum ),// O, 5  
    .debug0_wb_rf_wdata(debug0_wb_rf_wdata) // O, 64 
    `ifdef CPU_2CMT
    ,
    .debug1_wb_pc      (debug1_wb_pc      ),// O, 64 
    .debug1_wb_rf_wen  (debug1_wb_rf_wen  ),// O, 1  
    .debug1_wb_rf_wnum (debug1_wb_rf_wnum ),// O, 5  
    .debug1_wb_rf_wdata(debug1_wb_rf_wdata) // O, 64 
    `endif
);
+------------+
|            |                   +--------+                    +------------------+
|  core_top  |__master____slave__| random |__master_____slave__|                  |  
| axi_master |                   | delay  |                    |   axi_slave_mux  | (master)
|            |                   |        |                    |                  |							
+------------+                   +--------+                    +------------------+
  (master)                         (slave)                       | |     |    |  |
                                                                 | |     |    |  |
                                                               s0| |s1   |s2  |s3|s4
                                                                 | |     |    |  |________________________
                                                 ________________| |     |    |___________                |
                                                |                  |     |                |               |
                                          +------------+    +------+  +-------+    +------------+     +--------+  
                                          |soc_axi_sram|    |      |  |axi2apb|    |soc_axi_sram|     |        |
                                          |  _bridge   |    | spi  |  |_misc  |    |_bridge     |     |  mac   |  
                                          +------------+    |      |  +-------+    +------------+     |        |
                                                |           +------+     |                |           +--------+
                                          +------------+              +------+     +------------+
                                          |            |              |      |     |   conf     |
                                          |    sram    |              | uart |     |   sram     |
                                          |            |              |      |     +------------+
                                          +------------+              +------+

core_top对外的信号主要是一个axi master。

通过一个axi_slave_mux连接几个slave,这里是为了简单用了sram作为内存。

应该是对应一个ddr controller。

如果不考虑测试,不连这个random delay,core_top应该就直接连mux了。

          +----------------------------------------------------------------------------------------+
          |                                     soc_top                                            |
          |                                                                                        |    +------------+   
          |      +-------------------------------------------+   +------+        +------------+    |    |            |
          |      |              core_top                     |   |      |        |soc_axi_sram|---------|    sram    |
          |      |                                           |   |  a   |        |  _bridge   |    |    |            |
          |      |  +--------------------+    +--------+     |   |  x   |        +------------+    |    +------------+
          |      |  |     mycpu_top      |    |        |     |   |  i   |                          |
          |      |  |                    |    | icache |     |   |  _   |                          |
          |      |  |                    |    |        |     |   |  s   |        +------------+    |    +------------+
          |      |  |                    |    +--------+     |   |  l   |        |soc_axi_sram|---------|   conf     |
          |      |  |                    |                   |   |  a   |        |  _bridge   |    |    |   sram     |
          |      |  |                    |    +--------+     |   |  v   |        +------------+    |    +------------+
          |      |  |                    |    |        |     |   |  e   |                          |
          |      |  |                    |    | dcache |     |   |  _   |                          |
          |      |  +--------------------+    |        |     |   |  m   |                          |
          |      |                            +--------+     |   |  u   |        +-------+         |    +------+ 
          |      |                                           |   |  x   |        |axi2apb|--------------|      |
          |      +-------------------------------------------+   |      |        |_misc  |         |    | uart |
          |                                                      |      |        +-------+         |    |      |
          |      +-----------------------------------------------+      |                          |    +------+
          |      |                                                      |                          |
          |      |                                                      |                          |
          |      +------------------------------------------------------+                          |
          |                 |                 |                 |                                  |
          +-----------------|-----------------|-----------------|----------------------------------+
                            |                 |                 |
                            |                 |                 |
                   +----------------+      +------+        +--------+                     
                   |                |      |      |        |        |
                   | ddr controller |      | spi  |        |  mac   |
                   |                |      |      |        |        |
                   +----------------+      +------+        +--------+

没搞明白,从注释掉的代码上看,为什么spi,mac是直接连在axi_slave_mux,而uart和sram都需要再实现一个bridge。

ddr controller应该怎样连呢,需要看一个实现了ddr与core用axi连接的例子才行。