No. 17 / project of 147 on the ladder

RV32I external-memory arch-test runner

introduces — Exposed memory bus for running larger official RV32I arch-test images

harden statelast run2026-04-30
signoff
  • DRCPASS
  • LVSPASS
  • antennaPASS

P17 keeps the P16 RV32I core and UART loader, but moves the program/data memory outside the block. The testbench connects the exposed bus to a 256 KiB behavioral memory model so the official rv32i/I test images can actually run.

Status: Hardened logic shell, architectural tests still PARTIAL. Route DRC, Magic DRC, KLayout DRC, LVS, antenna, setup timing, and hold timing are PASS in RUN_2026-04-30_17-22-47. The official rv32i/I batch is still PARTIAL: I-nop-00.S and I-fence-00.S pass, while the other 37 tests execute and reach the framework fail tail (x5 = 31).

The Result

Run it:

make -C projects/17_rv32i_external_mem/test batch
resultcount
PASS2
FAIL37
NOT RUN0

The passing tests are I-nop-00.S and I-fence-00.S. Every other official RV32I integer test in this batch reaches RVMODEL_HALT_FAIL, which the local environment exposes as x5 = 31.

This is a meaningful change from P16: P16 had 37 NOT RUN cases because the generated self-check images were larger than 8 KiB. P17 runs them.

Hardened Shell

Run it:

make harden PROJECT=17_rv32i_external_mem

This is a logic-only external-memory bus shell. The 256 KiB memory used by the official-test batch is a behavioral testbench model; it is not present in the GDS.

checkresult
Run directoryprojects/17_rv32i_external_mem/librelane/runs/RUN_2026-04-30_17-22-47
Final GDSprojects/17_rv32i_external_mem/librelane/runs/RUN_2026-04-30_17-22-47/final/gds/top.gds
Metricsprojects/17_rv32i_external_mem/librelane/runs/RUN_2026-04-30_17-22-47/final/metrics.json
Standard cells18430
Route DRCPASS (0)
Magic DRCPASS (0)
KLayout DRCPASS (0)
LVSPASS (0)
AntennaPASS (0)
Setup timingPASS (0 violations, worst setup slack 10.306 ns)
Hold timingPASS (0 violations, worst hold slack 0.105 ns)
Max slew checkerPARTIAL (2971 warnings)
Max capacitance checkerPARTIAL (29 warnings)
Floating-net checkerPARTIAL (2 warnings)

The SDC now excludes clk from generic input delay constraints and gives STA the same sort of IO drive/load environment used by the previous SRAM-loader project. That removed the avoidable clock input-delay warning. The remaining slew/capacitance warnings are real cleanup work, not a DRC/LVS failure.

The Handoff

The top-level memory interface is the new thing:

projects/17_rv32i_external_mem/src/top.sv system-verilog · L9-122
module top (
    input  logic        clk,
    input  logic        rst_n,
    input  logic        load_mode,
    input  logic        uart_rx,
    input  logic [13:0] baud_div,
    output logic [31:0] pc_out,
    output logic [31:0] x5_out,
    output logic        halted,
    output logic        illegal,
    output logic        loader_done,
    output logic        loader_error,
    output logic        ext_mem_valid,
    output logic        ext_mem_we,
    output logic [1:0]  ext_mem_size,
    output logic [31:0] ext_mem_addr,
    output logic [31:0] ext_mem_wdata,
    output logic [3:0]  ext_mem_wstrb,
    input  logic [31:0] ext_mem_rdata,
    input  logic        ext_mem_ready,
    input  logic        ext_mem_error
);

  wire        cpu_mem_valid;
  wire        cpu_mem_we;
  wire [1:0]  cpu_mem_size;
  wire [31:0] cpu_mem_addr;
  wire [31:0] cpu_mem_wdata;
  wire [3:0]  cpu_mem_wstrb;

  wire        loader_mem_valid;
  wire        loader_mem_we;
  wire [1:0]  loader_mem_size;
  wire [31:0] loader_mem_addr;
  wire [31:0] loader_mem_wdata;
  wire [3:0]  loader_mem_wstrb;

  wire        mem_valid;
  wire        mem_we;
  wire [1:0]  mem_size;
  wire [31:0] mem_addr;
  wire [31:0] mem_wdata;
  wire [3:0]  mem_wstrb;
  wire [31:0] mem_rdata = ext_mem_rdata;
  wire        mem_ready = ext_mem_ready;
  wire        mem_error = ext_mem_error;

  wire [7:0] rx_data;
  wire       rx_valid;
  wire       loader_owns_bus = load_mode || !loader_done;
  wire       cpu_rst_n = rst_n && loader_done && !load_mode;

  p17_uart_rx u_uart_rx (
    .clk       (clk),
    .rst_n     (rst_n),
    .baud_div  (baud_div),
    .rx        (uart_rx),
    .data_out  (rx_data),
    .valid_out (rx_valid)
  );

  p17_sram_loader u_loader (
    .clk        (clk),
    .rst_n      (rst_n),
    .load_mode  (load_mode),
    .rx_data    (rx_data),
    .rx_valid   (rx_valid),
    .mem_valid  (loader_mem_valid),
    .mem_we     (loader_mem_we),
    .mem_size   (loader_mem_size),
    .mem_addr   (loader_mem_addr),
    .mem_wdata  (loader_mem_wdata),
    .mem_wstrb  (loader_mem_wstrb),
    .mem_ready  (mem_ready),
    .mem_error  (mem_error),
    .done       (loader_done),
    .error      (loader_error)
  );

  p17_rv32i_arch_core u_core (
    .clk        (clk),
    .rst_n      (cpu_rst_n),
    .mem_valid  (cpu_mem_valid),
    .mem_we     (cpu_mem_we),
    .mem_size   (cpu_mem_size),
    .mem_addr   (cpu_mem_addr),
    .mem_wdata  (cpu_mem_wdata),
    .mem_wstrb  (cpu_mem_wstrb),
    .mem_rdata  (mem_rdata),
    .mem_ready  (mem_ready),
    .mem_error  (mem_error),
    .pc_out     (pc_out),
    .x5_out     (x5_out),
    .halted     (halted),
    .illegal    (illegal)
  );

  assign mem_valid = loader_owns_bus ? loader_mem_valid : cpu_mem_valid;
  assign mem_we    = loader_owns_bus ? loader_mem_we    : cpu_mem_we;
  assign mem_size  = loader_owns_bus ? loader_mem_size  : cpu_mem_size;
  assign mem_addr  = loader_owns_bus ? loader_mem_addr  : cpu_mem_addr;
  assign mem_wdata = loader_owns_bus ? loader_mem_wdata : cpu_mem_wdata;
  assign mem_wstrb = loader_owns_bus ? loader_mem_wstrb : cpu_mem_wstrb;

  assign ext_mem_valid = mem_valid;
  assign ext_mem_we    = mem_we;
  assign ext_mem_size  = mem_size;
  assign ext_mem_addr  = mem_addr;
  assign ext_mem_wdata = mem_wdata;
  assign ext_mem_wstrb = mem_wstrb;

  wire _unused = &{1'b0, mem_rdata};

endmodule

The loader count is wider now:

projects/17_rv32i_external_mem/src/top.sv system-verilog · L125-291
module p17_sram_loader (
    input  logic        clk,
    input  logic        rst_n,
    input  logic        load_mode,
    input  logic [7:0]  rx_data,
    input  logic        rx_valid,
    output logic        mem_valid,
    output logic        mem_we,
    output logic [1:0]  mem_size,
    output logic [31:0] mem_addr,
    output logic [31:0] mem_wdata,
    output logic [3:0]  mem_wstrb,
    input  logic        mem_ready,
    input  logic        mem_error,
    output logic        done,
    output logic        error
);

  localparam logic [23:0] MEM_BYTES = 24'd262144;

  typedef enum logic [3:0] {
    L_WAIT_MODE = 4'd0,
    L_WAIT_MAGIC = 4'd1,
    L_COUNT_LO = 4'd2,
    L_COUNT_MID = 4'd3,
    L_COUNT_HI = 4'd4,
    L_WAIT_BYTE = 4'd5,
    L_WRITE = 4'd6,
    L_DONE = 4'd7,
    L_ERROR = 4'd8
  } loader_state_t;

  loader_state_t state;
  logic [23:0] byte_count;
  logic [23:0] bytes_left;
  logic [17:0] wr_addr;
  logic [7:0]  byte_q;
  logic [7:0]  count_lo_q;
  logic [7:0]  count_mid_q;

  wire [23:0] count_next = {rx_data, count_mid_q, count_lo_q};
  wire count_ok = (count_next != 24'd0) && (count_next <= MEM_BYTES);

  always_comb begin
    mem_valid = (state == L_WRITE);
    mem_we    = 1'b1;
    mem_size  = 2'd0;
    mem_addr  = {14'h0, wr_addr};
    mem_wdata = {24'h0, byte_q};
    mem_wstrb = 4'b0001;
  end

  always_ff @(posedge clk or negedge rst_n) begin
    if (!rst_n) begin
      state      <= L_WAIT_MODE;
      byte_count <= 24'd0;
      bytes_left <= 24'd0;
      wr_addr    <= 18'd0;
      byte_q     <= 8'h00;
      count_lo_q <= 8'h00;
      count_mid_q <= 8'h00;
      done       <= 1'b0;
      error      <= 1'b0;
    end else begin
      case (state)
        L_WAIT_MODE: begin
          if (load_mode) begin
            done       <= 1'b0;
            error      <= 1'b0;
            byte_count <= 24'd0;
            bytes_left <= 24'd0;
            wr_addr    <= 18'd0;
            state      <= L_WAIT_MAGIC;
          end
        end

        L_WAIT_MAGIC: begin
          if (!load_mode) begin
            state <= L_WAIT_MODE;
          end else if (rx_valid && rx_data == 8'ha5) begin
            state <= L_COUNT_LO;
          end
        end

        L_COUNT_LO: begin
          if (!load_mode) begin
            state <= L_WAIT_MODE;
          end else if (rx_valid) begin
            count_lo_q <= rx_data;
            state      <= L_COUNT_MID;
          end
        end

        L_COUNT_MID: begin
          if (!load_mode) begin
            state <= L_WAIT_MODE;
          end else if (rx_valid) begin
            count_mid_q <= rx_data;
            state      <= L_COUNT_HI;
          end
        end

        L_COUNT_HI: begin
          if (!load_mode) begin
            state <= L_WAIT_MODE;
          end else if (rx_valid) begin
            if (count_ok) begin
              byte_count <= count_next;
              bytes_left <= count_next;
              wr_addr    <= 18'd0;
              state      <= L_WAIT_BYTE;
            end else begin
              error <= 1'b1;
              state <= L_ERROR;
            end
          end
        end

        L_WAIT_BYTE: begin
          if (!load_mode) begin
            state <= L_WAIT_MODE;
          end else if (rx_valid) begin
            byte_q <= rx_data;
            state  <= L_WRITE;
          end
        end

        L_WRITE: begin
          if (mem_ready) begin
            if (mem_error) begin
              error <= 1'b1;
              state <= L_ERROR;
            end else if (bytes_left == 24'd1) begin
              bytes_left <= 24'd0;
              done       <= 1'b1;
              state      <= L_DONE;
            end else begin
              bytes_left <= bytes_left - 24'd1;
              wr_addr    <= wr_addr + 18'd1;
              state      <= L_WAIT_BYTE;
            end
          end
        end

        L_DONE: begin
          if (load_mode) begin
            state <= L_DONE;
          end else begin
            state <= L_WAIT_MODE;
          end
        end

        L_ERROR: begin
          if (!load_mode) state <= L_WAIT_MODE;
        end

        default: begin
          error <= 1'b1;
          state <= L_ERROR;
        end
      endcase
    end
  end

  wire _unused = &{1'b0, byte_count};

endmodule

What This Proves

This still does not prove RV32I compliance. It proves the memory-size blocker is gone for this batch, and the remaining failures are architectural.

That is progress. NOT RUN means the setup could not ask the question. FAIL means the setup asked the question and the core gave the wrong answer. P17 gets us to the second kind of problem. The backend result adds one more useful fact: the exposed-bus version can be hardened, even though the real memory system still lives outside this GDS.