branch logic
exu/rtl/sparc_exu.v
sparc_exu_ecl ecl(
.so (short_so0),
.si (short_scan0_1),
.rst_tri_en (mux_drive_disable),
.byp_ecl_wrccr_data_w(byp_irf_rd_data_w[7:0]),
.alu_ecl_adder_out_31_e(exu_ifu_brpc_e[31]),
.byp_ecl_rd_data_3lsb_m(exu_tlu_wsr_data_m[2:0]),
.alu_ecl_adder_out_7_0_e(exu_ifu_brpc_e[7:0]),
...
sparc_exu_alu alu(
.byp_alu_rs3_data_e(exu_lsu_rs3_data_e[63:0]),
.so (scan0_2),
.si (scan0_1),
.ifu_lsu_casa_e (ecl_alu_casa_e),
/*AUTOINST*/
// Outputs
.alu_byp_rd_data_e (alu_byp_rd_data_e[63:0]),
.exu_ifu_brpc_e (exu_ifu_brpc_e[47:0]),
.exu_lsu_ldst_va_e (exu_lsu_ldst_va_e[47:0]),
.exu_lsu_early_va_e(exu_lsu_early_va_e[10:3]),
.exu_mmu_early_va_e(exu_mmu_early_va_e[7:0]),
.alu_ecl_add_n64_e (alu_ecl_add_n64_e),
.alu_ecl_add_n32_e (alu_ecl_add_n32_e),
.alu_ecl_log_n64_e (alu_ecl_log_n64_e),
.alu_ecl_log_n32_e (alu_ecl_log_n32_e),
...
exu_ifu_brpc_e是exu给ifu的branch地址,可以看到这部分是在_e得出的。
从文档里也看到branch address的计算也是复用了ALU,应该就是ALU里的exu_ifu_brpc_e。
而在sparc_ifu.v里却有这个
// Branch Logic
sparc_ifu_dcl dcl(
.so (scan0_3),
.si (scan0_2),
.dtu_dcl_opf2_d (dtu_inst_d[7]),
.fdp_dcl_op_s (fdp_dtu_inst_s[31:30]),
.fdp_dcl_op3_s (fdp_dtu_inst_s[24:19]),
在ifu里,branch就应该是在decode阶段。
在OpenSPARC T1 Microarchitecture Specification里也有这一句。
2.3.14 Instruction Decode
…
The branch condition is also evaluated in the D-stage, and the decision for annulling a delay slot is made in this stage as well.
sparc_ifu_dcl.v的注释里也说了这一点,这到要好好看看opensparc里的branch是怎样实现的。
ifu/rtl/sparc_ifu_dcl.v
////////////////////////////////////////////////////////////////////////
/*
// Module Name: sparc_ifu_dcl
// Description:
// The decode control logic block does branch condition evaluation,
// delay slot management, and appropriate condition code
// selection. It also executes the tcc instruction and kills the E
// stage instruction if a move did not succeed. The DCL block is
// also responsible for generating the correct select signals to
// choose the branch offset and immediate operand.
//
*/
////////////////////////////////////////////////////////////////////////
还在Microarchitecture Specification里看到这一段。
2.3.10 Thread Selection Policy
…
A thread could become unavailable due to one of these reasons: 1. The thread is executing one of the long latency instructions, such as load, branch, multiplication, division, and so on.
branch也是long latency instructions?
load是,store看来不是。
sparc_ifu_dcl里面是cc指令和FP cc。
比如ifu_ffu_mvcnd_m,这个名字已经很说的很清楚了。
ifu/rtl/sparc_ifu_dcl.v
// branch condition to FPU
dff_s #(1) fpcond_ff(.din (cond_brtaken_e),
.q (ifu_ffu_mvcnd_m),
.clk (clk),
.se (se), .si(), .so());
下面还有句,回头要仔细看看怎样kill write back and bypass。
// if mov didn't succeed kill write back and bypass
// need to check thread as well
// assign ifu_exu_kill_e = dtu_inst_anull_e |
// ~fcl_dtu_inst_vld_e; // don't need this anymore
assign ifu_exu_kill_e = dtu_inst_anull_e;
看来branch指令是应该放在_e。
ifu/rtl/sparc_ifu_fcl.v
assign load_tpc[3:0] = {4{trappc_vld_w2}} & trap_thr |
rb_w2 |
{4{rb_stg_w | ims_flush_coll_w}} & thr_w |
// {4{dec_fcl_kill4sta_e}} & thr_e |
{4{flush_sonly_qual_m}} & thr_m;
assign load_bpc[3:0] = {4{brtaken_buf_e}} & thr_e;
assign load_pcp4[3:0] = {4{~part_stall_thisthr_f &
~iferrto_thisthr_d1 |
arst_vld_f |
async_intr_vld_s}} & thr_f;
always @ (/*AUTOSENSE*/load_bpc or load_pcp4 or load_tpc)
begin
// if (fcl_reset)
// begin // RESET PC is loaded to T0
// fcl_fdp_tpcbf_sel_old_bf_l = 4'b0001;
// fcl_fdp_tpcbf_sel_pcp4_bf_l = 4'b1110;
// fcl_fdp_tpcbf_sel_trap_bf_l = 4'b1111;
// fcl_fdp_tpcbf_sel_brpc_bf_l = 4'b1111;
// end // if (reset)
// else
// begin
fcl_fdp_tpcbf_sel_old_bf_l = (load_bpc | load_tpc | load_pcp4);
fcl_fdp_tpcbf_sel_brpc_bf_l = ~load_bpc | load_tpc | load_pcp4;
fcl_fdp_tpcbf_sel_pcp4_bf_l = ~load_pcp4 | load_tpc;
fcl_fdp_tpcbf_sel_trap_bf_l = ~load_tpc;
end // always @ (...
看到用always的时候不多啊。
SPARC有两类branch指令。
The SPARC Architecture Manual V9
因为SPARC有状态寄存器,flags。
loongarch没有,所以可以参考Branch on Integer Register with Prediction (BPr)。
这个是寄存器里的简单比较,和0比。
在ALU里,
module sparc_exu_alu
(
/*AUTOARG*/
// Outputs
so, alu_byp_rd_data_e, exu_ifu_brpc_e, exu_lsu_ldst_va_e,
exu_lsu_early_va_e, exu_mmu_early_va_e, alu_ecl_add_n64_e,
alu_ecl_add_n32_e, alu_ecl_log_n64_e, alu_ecl_log_n32_e,
alu_ecl_zhigh_e, alu_ecl_zlow_e, exu_ifu_regz_e, exu_ifu_regn_e,
alu_ecl_adderin2_63_e, alu_ecl_adderin2_31_e,
alu_ecl_adder_out_63_e, alu_ecl_cout32_e, alu_ecl_cout64_e_l,
alu_ecl_mem_addr_invalid_e_l,
// Inputs
rclk, se, si, byp_alu_rs1_data_e, byp_alu_rs2_data_e_l,
byp_alu_rs3_data_e, byp_alu_rcc_data_e, ecl_alu_cin_e, ecl_alu_rd_e, // uty: test
ifu_exu_invert_d, ecl_alu_log_sel_and_e, ecl_alu_log_sel_or_e,
ecl_alu_log_sel_xor_e, ecl_alu_log_sel_move_e,
ecl_alu_out_sel_sum_e_l, ecl_alu_out_sel_rs3_e_l,
ecl_alu_out_sel_shift_e_l, ecl_alu_out_sel_logic_e_l,
shft_alu_shift_out_e, ecl_alu_sethi_inst_e, ifu_lsu_casa_e
);
...
output exu_ifu_regz_e; // rs1_data == 0
output exu_ifu_regn_e;
...
// Zero comparison for exu_ifu_regz_e
sparc_exu_aluzcmp64 regzcmp(.in(byp_alu_rcc_data_e[63:0]), .zero64(exu_ifu_regz_e));
assign exu_ifu_regn_e = byp_alu_rcc_data_e[63];
exu_ifu_regz_e和exu_ifu_regn_e能判断处rs1_data与0的关系。
ifu/rtl/sparc_ifu_fcl.v
//------------------------------
// Branch Control
//------------------------------
// final portion of branch evaluation
wire brtaken_e_l;
bw_u1_buf_20x UZsize_bcbf(.z(fcl_dcl_regz_e),
.a(exu_ifu_regz_e));
bw_u1_muxi21_6x UZsize_bcmux(.z(brtaken_e_l),
.d0(dcl_fcl_bcregz0_e),
.d1(dcl_fcl_bcregz1_e),
.s(exu_ifu_regz_e));
bw_u1_inv_15x UZsize_bcinv(.z(brtaken_e),
.a(brtaken_e_l));
// Branch is taken in the E stage to thr_e. Below we check to see
// if this is the same as the next thread we will switch to
// isolate non critical section
bw_u1_buf_5x UZsize_btbuf(.z (brtaken_unq_e),
.a (brtaken_e));
assign brtaken_buf_e = brtaken_unq_e & inst_vld_qual_e & ~kill_curr_e;
// assign thr_match_ne_norst = thr_match_ne & ~rst_sw_bf;
// assign brto_nxtthr_bf = thr_match_ne & brtaken_e;
bw_u1_nand2_4x UZsize_btkn_ntl(.a (brtaken_e),
.b (thr_match_ne),
.z (brto_nxtthr_bf_l));
// bw_u1_inv_8x UZsize_btkn_bf(.a (brto_nxtthr_bf_l),
// .z (brto_nxtthr_bf));
dff_s #(1) br_ff(.din (brtaken_buf_e),
.q (brtaken_m),
.clk (clk),
.se (se), .si(), .so());
...
assign load_bpc[3:0] = {4{brtaken_buf_e}} & thr_e;
assign load_pcp4[3:0] = {4{~part_stall_thisthr_f &
~iferrto_thisthr_d1 |
arst_vld_f |
async_intr_vld_s}} & thr_f;
always @ (/*AUTOSENSE*/load_bpc or load_pcp4 or load_tpc)
begin
// if (fcl_reset)
// begin // RESET PC is loaded to T0
// fcl_fdp_tpcbf_sel_old_bf_l = 4'b0001;
// fcl_fdp_tpcbf_sel_pcp4_bf_l = 4'b1110;
// fcl_fdp_tpcbf_sel_trap_bf_l = 4'b1111;
// fcl_fdp_tpcbf_sel_brpc_bf_l = 4'b1111;
// end // if (reset)
// else
// begin
fcl_fdp_tpcbf_sel_old_bf_l = (load_bpc | load_tpc | load_pcp4);
fcl_fdp_tpcbf_sel_brpc_bf_l = ~load_bpc | load_tpc | load_pcp4;
fcl_fdp_tpcbf_sel_pcp4_bf_l = ~load_pcp4 | load_tpc;
fcl_fdp_tpcbf_sel_trap_bf_l = ~load_tpc;
end // always @ (...
可以看到exu_ifu_regz_e直接影响到load_bpc,然后影响到fcl_fdp_tpcbf_sel_brpc_bf_l。
但为什么load_bpc是控制4个线程的,还没搞懂。
奇怪的是exu_ifu_regn_e并没有和exu_ifu_regz_e一起进sparc_ifu_fcl。
而是进exu_ifu_regn_e和fcl_dcl_regz_e一起进sparc_ifu_dcl。
原来fcl里的dcl_fcl_bcregz0_e dcl_fcl_bcregz1_e都是来自dcl。
ifu/rtl/sparc_ifu_dcl.v
//////// Chandra ////////
wire temp0, temp1, cond_brtaken_e_l;
// limit loading on this signal
// wire regz_buf_e;
// bw_u1_buf_5x UZfix_regz_bf(.a (exu_ifu_regz_e),
// .z (regz_buf_e));
assign temp0 = (r_eval0 | ccbr_taken_e);
assign temp1 = (r_eval1 | ccbr_taken_e);
bw_u1_muxi21_6x UZsize_cbtmux(.z(cond_brtaken_e_l),
.d0(temp0),
.d1(temp1),
.s(fcl_dcl_regz_e));
bw_u1_inv_20x UZsize_cbtinv(.z(cond_brtaken_e),
.a(cond_brtaken_e_l));
////////////////////////
assign dcl_fcl_bcregz0_e = (temp0 & dbr_inst_e | ibr_inst_e |
call_inst_e) & ~dtu_inst_anull_e;
assign dcl_fcl_bcregz1_e = (temp1 & dbr_inst_e | ibr_inst_e |
call_inst_e) & ~dtu_inst_anull_e;
这几个信号在fcl dcl里来回传的有点乱。
最终的branch判断就在这句,
bw_u1_muxi21_6x UZsize_bcmux(.z(brtaken_e_l),
.d0(dcl_fcl_bcregz0_e),
.d1(dcl_fcl_bcregz1_e),
.s(exu_ifu_regz_e));
//--------------
// For BRZ
// -------------
// Calculate Cond Assuming Z=1 And Z=0. Then Mux
// assign r_eval1 = ((exu_ifu_regn_e | ~br_cond_e[1] | ~br_cond_e[0]) ^
// br_cond_e[2]) & ~cc_mvbr_e;
assign r_eval1 = exu_ifu_regn_e ? (~br_cond_e[2] & ~cc_mvbr_e) :
(((br_cond_e[1] & br_cond_e[0]) ^
~br_cond_e[2]) & ~cc_mvbr_e);
// assign r_eval0 = ((exu_ifu_regn_e & br_cond_e[1]) ^
// br_cond_e[2]) & ~cc_mvbr_e;
assign r_eval0 = exu_ifu_regn_e ? ((br_cond_e[1] ^ br_cond_e[2]) &
~cc_mvbr_e) :
(br_cond_e[2] & ~cc_mvbr_e);
chiplab里的branch,首先是没有复用ALU来计算跳转地址。整个模块参数跟ALU也很像。
为了简单, 我也先不复用ALU了,等到后面搞了bypass再说。
branch模块分别在ex1_stage ex2_stage里调用了一次,还没搞清楚为什么。
ex1_stage.v
branch branch(
.branch_valid (branch_valid ),
.branch_a (bru_a ),
.branch_b (bru_b ),
.branch_op (ex1_bru_op ),
.branch_pc (ex1_bru_pc ),
.branch_inst (ex1_bru_inst ),
.branch_taken (ex1_bru_br_taken ),
.branch_target(ex1_bru_br_target),
.branch_offset(ex1_bru_offset ),
.cancel_allow (cancel_allow ),
// pc interface
.bru_cancel (bru_cancel ),
.bru_target (bru_target ),
.bru_valid (bru_valid ),
.bru_taken (bru_taken ),
.bru_link_pc (bru_link_pc ),
.bru_pc (bru_pc )
);
ex2_stage.v
branch bru_s2(
.branch_valid (branch_valid ),
.branch_a (bru_a ),
.branch_b (bru_b ),
.branch_op (ex2_bru_op ),
.branch_pc (ex2_bru_pc ),
.branch_inst (ex2_bru_inst ),
.branch_taken (ex2_bru_br_taken ),
.branch_target(ex2_bru_br_target),
.branch_offset(ex2_bru_offset ),
.cancel_allow (cancel_allow ),
// pc interface
.bru_cancel (bru_cancel_ex2 ),
.bru_target (bru_target_ex2 ),
.bru_valid (bru_valid_ex2 ),
.bru_taken (bru_taken_ex2 ),
.bru_link_pc (bru_link_pc_ex2 ),
.bru_pc (bru_pc_ex2 )
);
lsoc1000_mainpipe.v
wire [`GRLEN-1:0] bru_target_input = ex1_bru_delay ? bru_target_ex2 : bru_target ;
wire [`GRLEN-1:0] bru_pc_input = ex1_bru_delay ? bru_pc_ex2 : bru_pc ;
wire [`LSOC1K_PRU_HINT:0] bru_hint_input = ex1_bru_delay ? bru_hint_ex2 : bru_hint ;
wire bru_sign_input = ex1_bru_delay ? bru_sign_ex2 : bru_sign ;
wire bru_taken_input = ex1_bru_delay ? bru_taken_ex2 : bru_taken ;
wire bru_brop_input = ex1_bru_delay ? bru_brop_ex2 : bru_brop ;
wire bru_jrop_input = ex1_bru_delay ? bru_jrop_ex2 : bru_jrop ;
wire bru_jrra_input = ex1_bru_delay ? bru_jrra_ex2 : bru_jrra ;
wire bru_link_input = ex1_bru_delay ? bru_link_ex2 : bru_link ;
wire [`GRLEN-1:0] bru_link_pc_input = ex1_bru_delay ? bru_link_pc_ex2 : bru_link_pc;
wire bru_cancel_input = bru_cancel_ex2 || bru_cancel;
bru_target_input就是送进front里的br_target。
ex1_bru_delay是bru_delay从issue stage传下去的。
lsoc1000_stage_issue.v
i if(allow_in) begin
ex1_branch_valid <= ((port0_sr_ur == `EX_BRU && is_port0_valid) || (port1_sr_ur == `EX_BRU && is_port1_valid) || is_port2_valid) && !type_crash && !bru_cancel;
ex1_bru_delay <= bru_delay;
ex1_bru_link <= is_port2_valid ? 1'b0 : port0_bru_dispatch ? (is_port0_link && is_port0_valid) : (is_port1_link && is_port1_valid);
ex1_bru_jrra <= is_port2_valid ? is_port2_jrra : port0_bru_dispatch ? is_port0_jrra : is_port1_jrra;
ex1_bru_brop <= is_port2_valid ? is_port2_brop : port0_bru_dispatch ? is_port0_brop : is_port1_brop;
ex1_bru_jrop <= is_port2_valid ? is_port2_jrop : port0_bru_dispatch ? is_port0_jrop : is_port1_jrop;
end
而bru_delay又是从 ex1_stage生成的。
ex1_stage.v
//branch
reg first_trial;
wire port0_branch_valid = ex1_port0_src == `EX_BRU && ex1_port0_valid;
wire port1_branch_valid = ex1_port1_src == `EX_BRU && ex1_port1_valid;
wire branch_valid = port0_branch_valid || port1_branch_valid || ex1_bru_port[0];
wire port0_cancel_allow = (ex1_port0_src == `EX_BRU && ex1_port0_valid) && !ex1_port0_type && !ex1_bru_delay;
wire port1_cancel_allow = (ex1_port1_src == `EX_BRU && ex1_port1_valid) && !ex1_port1_type && !ex1_bru_delay;
wire port2_cancel_allow = ex1_bru_port[0] && !ex1_port2_type && !ex1_bru_delay;
wire cancel_allow = (port0_cancel_allow || port1_cancel_allow || port2_cancel_allow) && first_trial;
assign bru_port = port1_branch_valid;
wire [31:0] ex1_bru_inst = port0_branch_valid ? ex1_port0_inst : ex1_port1_inst;
wire [`GRLEN-1:0] bru_a = ex1_bru_a_lsu_fw ? ex1_lsu_fw_data : ex1_bru_a ;
wire [`GRLEN-1:0] bru_b = ex1_bru_b_lsu_fw ? ex1_lsu_fw_data : ex1_bru_b ;
assign bru_delay = branch_valid && !(port0_cancel_allow || port1_cancel_allow || port2_cancel_allow);
现在还没搞清楚branch这么复杂是为了处理什么样的情况。
发现从下面代码可以看出来在chiplab里,port2是专门用来运行branch指令的。
always @(posedge clk) begin
if (rst) begin
ex1_bru_br_taken <= 1'b0;
ex1_bru_link <= 1'b0;
ex1_bru_jrra <= 1'b0;
ex1_bru_brop <= 1'b0;
ex1_bru_jrop <= 1'b0;
end
else if(/*(port0_bru_dispatch || port1_bru_dispatch) &&*/ is_allow_in) begin
ex1_bru_op <= is_port2_valid ? is_port2_op[`LSOC1K_BRU_CODE] : port0_bru_dispatch ? is_port0_op[`LSOC1K_BRU_CODE] : is_port1_op[`LSOC1K_BRU_CODE];
ex1_bru_a <= is_port2_valid ? rdata2_0_input : port0_bru_dispatch ? rdata0_0_input : rdata1_0_input;
ex1_bru_b <= is_port2_valid ? rdata2_1_input : port0_bru_dispatch ? rdata0_1_input : rdata1_1_input;
ex1_bru_br_taken <= is_port2_valid ? is_port2_br_taken : port0_bru_dispatch ? is_port0_br_taken : is_port1_br_taken;
ex1_bru_br_target <= is_port2_valid ? {is_port2_br_target,2'b0} : port0_bru_dispatch ? {is_port0_br_target,2'b0} : {is_port1_br_target,2'b0};
ex1_bru_hint <= is_port2_valid ? is_port2_hint : port0_bru_dispatch ? is_port0_hint : is_port1_hint;
ex1_bru_offset <= is_port2_valid ? port2_offset : port0_bru_dispatch ? port0_offset : port1_offset;
ex1_bru_pc <= is_port2_valid ? is_port2_pc : port0_bru_dispatch ? is_port0_pc : is_port1_pc;
ex1_bru_a_lsu_fw <= is_port2_valid && rdata2_0_lsu_fw || port0_bru_dispatch && rdata0_0_lsu_fw || port1_bru_dispatch && rdata1_0_lsu_fw;
ex1_bru_b_lsu_fw <= is_port2_valid && rdata2_1_lsu_fw || port0_bru_dispatch && rdata0_1_lsu_fw || port1_bru_dispatch && rdata1_1_lsu_fw;
ex1_bru_port[1] <= is_port2_id || port1_bru_dispatch;
ex1_bru_port[2] <= is_port2_app && is_port2_valid;
end
chiplab里的branch也包括JIRL。
在cpu7里,让bru工作在_e,也就是说,在_e完成比较,计算出跳转地址后,直接在_e里传给ifu,改变pc_bf。
并且同时发出kill_d。
也要发出stallF,exu_ifu_stall_req。
其实exu_ifu_stall_req和kill_d作用的方法都一样,就是把当前流水线valid & (~exu_ifu_stall_req)
, valid & (~kill_d)
。
这样,传到_d _e的valid都是0, 这条指令就没意义了。
loongarch指令里,表示偏移的部分,有叫做si ui的,也有叫offs的。
offs是bru指令里的offset,si ui是其它指令里用的。
这两种偏移的编码是不同的,并且有交集,比如有si16又有offs16。
不过loongarch32里没有用到si16的,只在loonarch64里有,比如:addu16i.d rd, rj, si16
addu16i里的si16的位置,其实和jirl bne的offs16是一样的。而offs还有offs21 offs26,si最大是si20。
感觉编码可以统一起来。
但在代码里,这两部分是分开编码的。
11 //immediate operater prepare
12 wire [ 4:0] port0_i5 = `GET_I5(ifu_exu_inst_d);
13 wire [ 5:0] port0_i6 = `GET_I6(ifu_exu_inst_d);
14 wire [11:0] port0_i12 = `GET_I12(ifu_exu_inst_d);
15 wire [13:0] port0_i14 = `GET_I14(ifu_exu_inst_d);
16 wire [15:0] port0_i16 = `GET_I16(ifu_exu_inst_d);
17 wire [19:0] port0_i20 = `GET_I20(ifu_exu_inst_d);
18
19 wire [30:0] port0_i5_u = {27'b0,port0_i5};
20 wire [31:0] port0_i6_u = {26'b0,port0_i6};
21 wire [31:0] port0_i12_u = {20'b0,port0_i12};
22 wire [31:0] port0_i12_s = ,port0_i12};
23 wire [31:0] port0_i14_s = ,port0_i14};
24 wire [31:0] port0_i16_s = ,port0_i16};
25 wire [31:0] port0_i20_s = ,port0_i20};
26
27 wire [31:0] port0_i5_i = ifu_exu_op_d[`LSOC1K_DOUBLE_WORD] ? port0_i6_u : port0_i5_u;
28 wire [31:0] port0_i12_i = ifu_exu_op_d[`LSOC1K_UNSIGN] ? port0_i12_u : port0_i12_s;
lsoc1000_stage_issue.v
////BRU operater push
wire branch_dispatch = port0_bru_dispatch || port1_bru_dispatch;
wire [15:0] port0_offset16 = `GET_OFFSET16(is_port0_inst);
wire [15:0] port1_offset16 = `GET_OFFSET16(is_port1_inst);
wire [15:0] port2_offset16 = `GET_OFFSET16(is_port2_inst);
wire [20:0] port0_offset21 = `GET_OFFSET21(is_port0_inst);
wire [20:0] port1_offset21 = `GET_OFFSET21(is_port1_inst);
wire [20:0] port2_offset21 = `GET_OFFSET21(is_port2_inst);
wire [25:0] port0_offset26 = `GET_OFFSET26(is_port0_inst);
wire [25:0] port1_offset26 = `GET_OFFSET26(is_port1_inst);
wire [25:0] port2_offset26 = `GET_OFFSET26(is_port2_inst);
`ifdef LA64
wire [63:0] port0_offset = is_port0_op[`LSOC1K_RD_READ ] ? ,port0_offset16,2'b0} :
is_port0_op[`LSOC1K_HIGH_TARGET] ? ,port0_offset26,2'b0} :
,port0_offset21,2'b0} ;
wire [63:0] port1_offset = is_port1_op[`LSOC1K_RD_READ ] ? ,port1_offset16,2'b0} :
is_port1_op[`LSOC1K_HIGH_TARGET] ? ,port1_offset26,2'b0} :
,port1_offset21,2'b0} ;
wire [63:0] port2_offset = is_port2_op[`LSOC1K_RD_READ ] ? ,port2_offset16,2'b0} :
is_port2_op[`LSOC1K_HIGH_TARGET] ? ,port2_offset26,2'b0} :
,port2_offset21,2'b0} ;
`elsif LA32
wire [31:0] port0_offset = is_port0_op[`LSOC1K_RD_READ ] ? ,port0_offset16,2'b0} :
is_port0_op[`LSOC1K_HIGH_TARGET] ? ,port0_offset26,2'b0} :
,port0_offset21,2'b0} ;
wire [31:0] port1_offset = is_port1_op[`LSOC1K_RD_READ ] ? ,port1_offset16,2'b0} :
is_port1_op[`LSOC1K_HIGH_TARGET] ? ,port1_offset26,2'b0} :
,port1_offset21,2'b0} ;
wire [31:0] port2_offset = is_port2_op[`LSOC1K_RD_READ ] ? ,port2_offset16,2'b0} :
is_port2_op[`LSOC1K_HIGH_TARGET] ? ,port2_offset26,2'b0} :
,port2_offset21,2'b0} ;
`endif
我也先分开解码,以后看能不能再改进吧。