Skip to content

Instantly share code, notes, and snippets.

@tinyfpga
Created December 2, 2017 23:01
Show Gist options
  • Save tinyfpga/20f0d9ffa0879f5697a2e9c9eb243d9a to your computer and use it in GitHub Desktop.
Save tinyfpga/20f0d9ffa0879f5697a2e9c9eb243d9a to your computer and use it in GitHub Desktop.
// simple RV32I processor inspired by Berkely CS152 notes and PicoRV32:
// * http://www-inst.eecs.berkeley.edu/~cs152/fa16/handouts/microcode.pdf
// * http://www-inst.eecs.berkeley.edu/~cs152/fa16/lectures/L02-SimpleImps.pdf
// * https://github.com/cliffordwolf/picorv32
// * https://content.riscv.org/wp-content/uploads/2017/05/riscv-spec-v2.2.pdf
//
// intention is for this to be smaller and higher frequency than PicoRV32, but
// at the cost of much lower overall performance. if used with the XIP SPI
// controller, the lower performance may not make much of a difference when
// coupled to the slow speed of executing instructions directly from SPI
// flash.
//
// the design is currently optimized for iCE40 FPGAs which have have the
// following attributes along with the significant consequences:
// * no distributed RAM -> register file and PC are stored in block RAM
// * no dedicated muxes in slices -> wired-or is more efficient than muxing
//
module tinyrv32 #(
///////////////////////////////////////////////////////////////////////////
//// PARAMETERS
///////////////////////////////////////////////////////////////////////////
// NOTE: attempting to use same parameter naming as PicoRV32
// parameter [ 0:0] ENABLE_COUNTERS = 1,
// parameter [ 0:0] ENABLE_COUNTERS64 = 1,
// parameter [ 0:0] ENABLE_REGS_16_31 = 1,
// parameter [ 0:0] ENABLE_REGS_DUALPORT = 1,
// parameter [ 0:0] LATCHED_MEM_RDATA = 0,
// parameter [ 0:0] TWO_STAGE_SHIFT = 1,
// parameter [ 0:0] BARREL_SHIFTER = 0,
// parameter [ 0:0] TWO_CYCLE_COMPARE = 0,
// parameter [ 0:0] TWO_CYCLE_ALU = 0,
// parameter [ 0:0] COMPRESSED_ISA = 0,
// parameter [ 0:0] CATCH_MISALIGN = 1,
// parameter [ 0:0] CATCH_ILLINSN = 1,
// parameter [ 0:0] ENABLE_PCPI = 0,
// parameter [ 0:0] ENABLE_MUL = 0,
// parameter [ 0:0] ENABLE_FAST_MUL = 0,
// parameter [ 0:0] ENABLE_DIV = 0,
// parameter [ 0:0] ENABLE_IRQ = 0,
// parameter [ 0:0] ENABLE_IRQ_QREGS = 1,
// parameter [ 0:0] ENABLE_IRQ_TIMER = 1,
// parameter [ 0:0] ENABLE_TRACE = 0,
// parameter [ 0:0] REGS_INIT_ZERO = 0,
// parameter [31:0] MASKED_IRQ = 32'h 0000_0000,
// parameter [31:0] LATCHED_IRQ = 32'h ffff_ffff,
parameter [31:0] PROGADDR_RESET = 32'h 0000_0000,
parameter [31:0] PROGADDR_IRQ = 32'h 0000_0010
// parameter [31:0] STACKADDR = 32'h ffff_ffff
) (
///////////////////////////////////////////////////////////////////////////
//// SYSTEM
///////////////////////////////////////////////////////////////////////////
input clk,
input resetn,
///////////////////////////////////////////////////////////////////////////
//// CONTROL INPUTS
///////////////////////////////////////////////////////////////////////////
//input ld_ir, // load ir with data from bus
//input [ 2:0] imm_sel, // select immediate operand encoding
//input en_imm, // drive immediate operand onto bus
//input ld_a, // load ALU reg A with bus data
//input ld_b, // load ALU reg B with bus data
//input [ 1:0] alu_op, // ALU operation to perform
//input [ 2:0] alu_cmp_op, // ALU comparison operator to perform
//input alu_rhs_sel, // select rhs value for ADD/SUB operations
//input en_alu, // drive ALU output onto bus
//input [ 2:0] reg_sel, // select register address
//input reg_rd, // initiate register read (data avail next clock)
//input reg_wr, // write register with bus data
//input en_reg, // drive register data onto bus
//input ld_ma, // load memory address register with bus data
//input en_mem, // drive memory data onto bus
//input en_addr_rst, // drive program reset addr onto bus
//input en_addr_irq, // drive program irq addr onto bus
///////////////////////////////////////////////////////////////////////////
//// CONTROL OUTPUTS
///////////////////////////////////////////////////////////////////////////
///*logic*/ reg alu_true, // alu test operation evaluates to true
//output alu_valid, // alu output is valid
///////////////////////////////////////////////////////////////////////////
//// EXTERNAL MEMORY INTERFACE
///////////////////////////////////////////////////////////////////////////
// same memory interface as PicoRV32
// https://github.com/cliffordwolf/picorv32
// NOTE: control signals are on control unit
output mem_valid, // valid memory transfer in progress, high until mem_ready is asserted
output mem_instr, // memory transfer is an instruction fetch
input mem_ready, // write is complete/read is complete and data is valid
output reg [31:0] mem_addr = 0,
output [31:0] mem_wdata,
output [ 3:0] mem_wstrb, // zero value implies read, non-zero value indicates which bytes to write
input [31:0] mem_rdata
);
///////////////////////////////////////////////////////////////////////////
//// INTERNAL SIGNALS
///////////////////////////////////////////////////////////////////////////
reg [31:0] bus; // main datapath bus, all transfers go through here
/*logic*/ reg [32:0] imm_value;// calculated immediate value
reg [5:0] reg_addr; // register file address
wire reset = !resetn;
///////////////////////////////////////////////////////////////////////////
//// DATAPATH STATE
///////////////////////////////////////////////////////////////////////////
reg [31:0] ir = 0; // instruction register
reg [31:0] reg_a = 0; // ALU operand register A
reg [31:0] reg_b = 0; // ALU operand register B
//reg [31:0] mem_addr; // memory address register
reg [31:0] reg_file [32:0];// register file including PC
reg [31:0] reg_out = 0; // register file output flop stage
integer i;
initial begin
for (i = 0; i < 256; i = i + 1) begin
reg_file[i] = 32'h0;
end
end
reg alu_valid;
reg alu_true;
`include "control.v"
assign mem_valid = mem_read | mem_write;
assign mem_instr = mem_read_instr;
assign mem_wdata = bus;
assign mem_wstrb = {4{mem_write}};
// loading datapath registers with bus data
always @(posedge clk) begin
if (ld_ir) ir <= bus;
if (ld_a) reg_a <= bus;
if (ld_b) reg_b <= bus;
if (ld_ma) mem_addr <= bus;
end
always @(reg_sel, ir) begin
case (reg_sel)
`REG_SEL_RD : reg_addr <= {1'b0, ir[11:7]};
`REG_SEL_RS1 : reg_addr <= {1'b0, ir[19:15]};
`REG_SEL_RS2 : reg_addr <= {1'b0, ir[24:20]};
`REG_SEL_PC : reg_addr <= 6'd32;
default : reg_addr <= 6'hXX;
endcase
end
always @(posedge clk) begin
// register file read path
if (reg_rd) reg_out <= reg_file[reg_addr];
// register file write path
if (reg_wr) reg_file[reg_addr] <= bus;
end
///////////////////////////////////////////////////////////////////////////
//// IMMEDIATE CALCULATION
///////////////////////////////////////////////////////////////////////////
// riscv-spec-v2.2.pdf, figure 2.4
always @(ir, imm_sel) begin
case (imm_sel)
`IMM_SEL_I_IMM : imm_value <= {{21{ir[31]}}, ir[30:25], ir[24:21], ir[20]};
`IMM_SEL_S_IMM : imm_value <= {{21{ir[31]}}, ir[30:25], ir[11:8], ir[7]};
`IMM_SEL_B_IMM : imm_value <= {{20{ir[31]}}, ir[7], ir[30:25], ir[24:21], 1'b0};
`IMM_SEL_U_IMM : imm_value <= {ir[31], ir[30:20], ir[19:12], 12'b000000000000};
`IMM_SEL_J_IMM : imm_value <= {{12{ir[31]}}, ir[19:12], ir[20], ir[30:25], ir[24:21], 1'b0};
default : imm_value <= 32'hXXXXXXXX;
endcase
end
///////////////////////////////////////////////////////////////////////////
//// ALU CALCULATION
///////////////////////////////////////////////////////////////////////////
// supported operations (alu_op):
// 1 cycle:
// a & b ALU_OP_AND
// a | b ALU_OP_OR
// a ^ b ALU_OP_XOR
//
// a == b ALU_OP_EQ
// a != b ALU_OP_NQ
// a < b ALU_OP_LT
// a <u b ALU_OP_LTU
// a >= b ALU_OP_GE
// a >=u b ALU_OP_GEU
// a + rhs ALU_OP_ADD
// a - rhs ALU_OP_SUB
//
// b cycles:
// a << b ALU_OP_SLL
// a >> b ALU_OP_SRL
// a >>> b ALU_OP_SRA
//
// rhs select (alu_rhs_sel):
// b ALU_RHS_B
// 1 ALU_RHS_1
// 2 ALU_RHS_2
// 4 ALU_RHS_4
//
// ALU mux hierarchy
// bus
// LOGIC (a, b)
// AND
// OR
// XOR
// ADD/SUB (a, rhs)
// ADD
// SUB
// CMP (a, rhs)
// SLT
// SLTU
// SHIFT (a, shamt)
// SLL
// SRL
// SRA
//
// alu_true
// EQ
// NQ
// LT
// LTU
// GT
// GTU
// logic unit theory of operation
// ------------------------------
// logic operations don't require use of the carry-chain. each bit
// position can be calculated independently of the rest. this means we
// have full use of all 4 inputs of each LUT. two of those inputs can be
// used to select the logic operation, and the other two inputs are used
// for the operation operands.
/*logic*/ reg [31:0] alu_logic_output;
always @(alu_op, reg_a, reg_b) begin
case (alu_op)
`ALU_OP_AND : alu_logic_output <= reg_a & reg_b;
`ALU_OP_OR : alu_logic_output <= reg_a | reg_b;
`ALU_OP_XOR : alu_logic_output <= reg_a ^ reg_b;
default : alu_logic_output <= 32'hXXXXXXXX;
endcase
end
// add/sub/cmp theory of operation
// ---------------------------
// FPGAs use "high-speed" carry chains to implement addition, subtraction,
// and comparison operations. if we simply use a case statement for all of
// the operations there is a good chance the FPGA synthesis tools will not
// create an optimal design for usage. instead, we can use just one
// 32-bit carry chain to implement most of these operations.
//
// this carry chain implements an adder/subtractor. those two operations
// can be used as is. we can then implement most of the comparison
// operators by putting the adder/subtractor into subtraction mode and
// looking at the sign bit of the operands and output.
//
// finally, we need to add an equality test to implement equals, not
// equals, and help calculate greater-than tests.
//
/*logic*/ reg [31:0] alu_rhs;
always @(reg_b, alu_rhs_sel) begin
case (alu_rhs_sel)
`ALU_RHS_SEL_B : alu_rhs <= reg_b;
`ALU_RHS_SEL_4 : alu_rhs <= 32'h4;
`ALU_RHS_SEL_2 : alu_rhs <= 32'h2;
`ALU_RHS_SEL_1 : alu_rhs <= 32'h1;
default : alu_rhs <= 32'hXXXXXXXX;
endcase
end
wire [31:0] alu_add_output = (alu_op == `ALU_OP_ADD) ? (reg_a + alu_rhs) : (reg_a - alu_rhs);
wire alu_eq_out = reg_a == reg_b;
wire alu_ne_out = !alu_eq_out;
wire alu_2_positive_operands = !reg_a[31] & !alu_rhs[31];
wire alu_2_negative_operands = reg_a[31] & alu_rhs[31];
wire alu_lt_out =
((reg_a[31] == alu_rhs[31]) & alu_add_output[31]) |
(reg_a[31] & !alu_rhs[31]);
wire alu_ge_out =
!alu_lt_out;
wire alu_ltu_out =
((reg_a[31] == alu_rhs[31]) & alu_add_output[31]) |
(!reg_a[31] & alu_rhs[31]);
wire alu_geu_out =
!alu_ltu_out;
wire [31:0] alu_slt_output = {31'b0, ((alu_op == `ALU_OP_SLT) ? alu_lt_out : alu_ltu_out)};
always @(alu_cmp_op, alu_eq_out, alu_ne_out, alu_lt_out, alu_ltu_out, alu_ge_out, alu_geu_out) begin
case (alu_cmp_op)
`ALU_CMP_OP_EQ : alu_true <= alu_eq_out;
`ALU_CMP_OP_NE : alu_true <= alu_ne_out;
`ALU_CMP_OP_LT : alu_true <= alu_lt_out;
`ALU_CMP_OP_LTU : alu_true <= alu_ltu_out;
`ALU_CMP_OP_GE : alu_true <= alu_ge_out;
`ALU_CMP_OP_GEU : alu_true <= alu_geu_out;
default : alu_true <= 1'bX;
endcase
end
// shifter theory of operation
// ---------------------------
// when ld_b is asserted, shamt register is loaded with the lower
// 5 bits of the data bus and shift_output is loaded with reg_a.
//
// while shamt is not equal to 0, the shift operation will be executed
// by one bit position, the value stored in shift_output, and shamt
// decremented by 1. shift_valid is asserted when shamt is equal to 0.
//
// WARNING: the shifter REQUIRES that A is loaded before B
//
reg [4:0] shamt;
reg [31:0] shift_output;
wire shift_valid = shamt == 0;
always @(posedge clk) begin
if (ld_b) begin
shamt <= bus[4:0];
shift_output <= reg_a;
end else if (!shift_valid) begin
shamt <= shamt - 1;
case (alu_op)
`ALU_OP_SLL : shift_output <= $unsigned(shift_output) << 1;
`ALU_OP_SRL : shift_output <= $unsigned(shift_output) >> 1;
`ALU_OP_SRA : shift_output <= $signed(shift_output) >>> 1; // FIXME: make this more efficient
default : shift_output <= 32'hXXXXXXXX;
endcase
end
end
// ALU valid output generation
always @(alu_op, shift_valid) begin
case (alu_op)
`ALU_OP_SLL : alu_valid <= shift_valid;
`ALU_OP_SRL : alu_valid <= shift_valid;
`ALU_OP_SRA : alu_valid <= shift_valid;
default : alu_valid <= 1;
endcase
end
wire en_alu_logic = ((alu_op == `ALU_OP_AND) | (alu_op == `ALU_OP_OR) | (alu_op == `ALU_OP_XOR)) & en_alu;
wire en_alu_add = ((alu_op == `ALU_OP_ADD) | (alu_op == `ALU_OP_SUB)) & en_alu;
wire en_alu_slt = ((alu_op == `ALU_OP_SLT) | (alu_op == `ALU_OP_SLTU)) & en_alu;
wire en_alu_shift = ((alu_op == `ALU_OP_SLL) | (alu_op == `ALU_OP_SRL) | (alu_op == `ALU_OP_SRA)) & en_alu;
reg [31:0] cycle_counter;
always @(posedge clk) begin
if (reset) begin
cycle_counter <= 32'h0;
end else begin
cycle_counter <= cycle_counter + 1;
end
end
reg [31:0] instret_counter;
always @(posedge clk) begin
if (reset) begin
instret_counter <= 32'h0;
end else if (mem_ready & mem_instr) begin
instret_counter <= instret_counter + 1;
end
end
///////////////////////////////////////////////////////////////////////////
//// BUS DRIVERS
///////////////////////////////////////////////////////////////////////////
always @* begin
bus = 32'h00000000;
if (en_imm) bus = bus | imm_value;
if (en_alu_logic) bus = bus | alu_logic_output;
if (en_alu_add) bus = bus | alu_add_output;
if (en_alu_slt) bus = bus | alu_slt_output;
if (en_alu_shift) bus = bus | shift_output;
if (en_reg) bus = bus | reg_out;
if (en_mem) bus = bus | mem_rdata;
if (en_addr_rst) bus = bus | PROGADDR_RESET;
if (en_cycle_counter) bus = bus | cycle_counter;
if (en_instret_counter) bus = bus | instret_counter;
$display("bus = %08x", bus);
end
/*
assign bus =
(
(en_imm ? imm_value : 32'h00000000) |
(en_alu_logic ? alu_logic_output : 32'h00000000)
) | (
(en_alu_add ? alu_add_output : 32'h00000000) |
(en_alu_slt ? alu_slt_output : 32'h00000000)
) | (
(en_alu_shift ? shift_output : 32'h00000000) |
(en_reg ? reg_out : 32'h00000000)
) | (
(en_mem ? mem_rdata : 32'h00000000) |
(en_addr_rst ? PROGADDR_RESET : 32'h00000000)
);
*/
endmodule
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment