diff --git a/Documentation/01_Module_Docs/14_L1_Instruction_Cache.md b/Documentation/01_Module_Docs/14_L1_Instruction_Cache.md index 4273a27..f7b46a8 100644 --- a/Documentation/01_Module_Docs/14_L1_Instruction_Cache.md +++ b/Documentation/01_Module_Docs/14_L1_Instruction_Cache.md @@ -1,41 +1,49 @@ -# THIS MODULE IS NOT YET OUTLINED # - # L1 Instruction Cache # -(Any Notes would go here) ## Contents -* [Inputs](#inputs) -* [Outputs](#outputs) -* [Functionality](#functionality) - * [Registers](#registers) - * [Clk](#on-posedge-clk) - * [Active low reset](#asynchronous-active-low-reset) +* Inputs +* Outputs +* Functionality + * Registers + * Clk + * Active low reset ## Inputs -|Name|Bits wide| -|:---|:---:| -|```name```|#-bit| -|```name```|#-bit| +| Name | Bits wide | +|:--------------------|:----------------:| +| `clk` | 1-bit | +| `rstn` | 1-bit | +| `read_enable` | 1-bit | +| `request_address` | ADDR_WIDTH-bit | +| `mem_response_data` | DATA_WIDTH-bit | +| `mem_ready` | 1-bit | ## Outputs -|Name|Bits wide| -|:---|:---:| -|```name```|#-bit| -|```name```|#-bit| +| Name | Bits wide | +|:--------------------|:----------------:| +| `response_data` | DATA_WIDTH-bit | +| `mem_request` | 1-bit | +| `mem_address` | ADDR_WIDTH-bit | +| `c_state` | 2-bit | ## Functionality ### Registers - - #-bit ```name``` register - - #-bit ```name``` register +- `valid_bits`: Indicates whether a cache line is valid. +- `tag_array`: Holds the tag for each cache line. +- `lru_bits`: Maintains the least recently used (LRU) replacement policy. + ### On posedge clk - - ```somebranch = someval``` - - Use a table when necessary if statements are used: - - ```name``` - |Name|Bits wide| - |---|---| - |```name == 0```|```reg = val```| - |```name == 1```|```reg = val```| - +- If `read_enable` is high: + - Check whether the cache has a hit or miss. + - If it’s a hit, set `response_data` to the instruction stored in the cache. + - If it’s a miss, generate a memory request by setting `mem_request` to 1 and load data from the address stored in `mem_address`. + + - Use the LRU algorithm to select which cache line to replace: + | Name | Condition | + |----------------|-------------------------| + | `lru_bits == 0`| Replace the first cache line | + | `lru_bits == 1`| Replace the second cache line | ### Asynchronous active low reset - - Register values reset to 0 +- All `valid_bits` and `lru_bits` are reset to 0. +- The `tag_array` is cleared. diff --git a/dv/CMakeLists.txt b/dv/CMakeLists.txt index bb1bd39..cacd56c 100644 --- a/dv/CMakeLists.txt +++ b/dv/CMakeLists.txt @@ -3,30 +3,31 @@ find_package(nyu-util REQUIRED CONFIG) add_executable(tests) target_sources(tests PRIVATE - alu.cpp + #alu.cpp - branch_addr_calc.cpp - branch_eval.cpp - branch_manager.cpp - branch_predictor.cpp + #branch_addr_calc.cpp + #branch_eval.cpp + #branch_manager.cpp + #branch_predictor.cpp - con_branch_cont.cpp - con_ex.cpp - con_id.cpp + #con_branch_cont.cpp + #con_ex.cpp + #con_id.cpp - data_cache_manager.cpp - exmem.cpp + #data_cache_manager.cpp + #exmem.cpp - gpr.cpp - idex.cpp - ifid.cpp + #gpr.cpp + #idex.cpp + #ifid.cpp # Failing # l1_data_cache.cpp + l1_ins_cache.cpp - memwb.cpp - pc.cpp - pipeline_reset.cpp + #memwb.cpp + #pc.cpp + #pipeline_reset.cpp ) nyu_link_sv(tests PRIVATE core) @@ -54,6 +55,8 @@ nyu_target_verilate(tests # Failing # L1_Data_Cache + L1_Instruction_Cache + MEMWB PC Pipeline_Reset diff --git a/dv/l1_ins_cache.cpp b/dv/l1_ins_cache.cpp new file mode 100644 index 0000000..4306c52 --- /dev/null +++ b/dv/l1_ins_cache.cpp @@ -0,0 +1,202 @@ +#include + +#include +#include + +#include + +const int BLOCK_SIZE = 4; +const int CACHE_SIZE = 1024; // 1 KB +const int ASSOCIATIVITY = 2; +const int NUM_SETS = CACHE_SIZE / (BLOCK_SIZE * ASSOCIATIVITY); +const int DATA_WIDTH = 32; +const int ADDR_WIDTH = 32; +const int INDEX_WIDTH = 7; +const int TAG_WIDTH = ADDR_WIDTH-INDEX_WIDTH; + +struct ins_ram{ + std::uint32_t data[2048]={0}; + bool ready=1; + std::uint32_t res_data=0; + void read(uint32_t address){ + res_data=data[address]; + } + void reset(){ + for(size_t i {0};i<2048;i++){ + data[i]=0; + + } + res_data=0; + } +}; + + +class CacheSimulator { +public: + std::vector> memory; + std::vector> valid_bits; + std::vector> tags; + std::vector> lru_counters; + std::uint32_t output; + CacheSimulator() { + memory.resize(NUM_SETS, std::vector(ASSOCIATIVITY, 0)); + valid_bits.resize(NUM_SETS, std::vector(ASSOCIATIVITY, false)); + tags.resize(NUM_SETS, std::vector(ASSOCIATIVITY, 0)); + lru_counters.resize(NUM_SETS, std::vector(ASSOCIATIVITY, 0)); + } + + void reset() { + for (int i = 0; i < NUM_SETS; ++i) { + for (int j = 0; j < ASSOCIATIVITY; ++j) { + memory[i][j] = 0; + valid_bits[i][j] = false; + tags[i][j] = 0; + lru_counters[i][j] = j; + //lru_counters[i][j] = 1; + } + } + } + + void accessCache(uint32_t address, bool write,bool read, uint32_t write_data,ins_ram &mem) { + uint32_t tag = address >> (ADDR_WIDTH-TAG_WIDTH); + uint32_t index = address & ((1 << INDEX_WIDTH) - 1); + + bool hit = false; + int way = 0; + + // hit check + for (int i = 0; i < ASSOCIATIVITY; ++i) { + if (valid_bits[index][i] && tags[index][i] == tag) { + hit = true; + way = i; + break; + } + } + + if (hit) { + if (read) { + output=memory[index][way]; + } + updateLRU(index, way); + } else { + if (read) { + way = findLRUWay(index); + memory[index][way] = mem.data[address]; + tags[index][way] = tag; + valid_bits[index][way] = true; + updateLRU(index, way); + } + } + } + +private: + + uint32_t address; + uint32_t tag; + uint32_t index; + + int findLRUWay(int set_index) { + int lru_way = 0, max_usage = 0; + for (int i = 0; i < ASSOCIATIVITY; ++i) { + if (lru_counters[set_index][i] > max_usage) { + max_usage = lru_counters[set_index][i]; + lru_way = i; + } + } + return lru_way; + } + + void updateLRU(int set_index, int accessed_way) { + for (int i = 0; i < ASSOCIATIVITY; ++i) { + if (i == accessed_way) { + lru_counters[set_index][i] = 0; + } else { + lru_counters[set_index][i]++; + } + } + } +}; + +static void init(auto& L1) { + L1.rstn = 0; + L1.clk = 0; + nyu::eval(L1); + L1.rstn = 1; + nyu::eval(L1); +} + +static void read_eval(auto& L1, CacheSimulator& sim_cache, ins_ram& memory, std::uint32_t request_address) { + // Set up initial conditions + L1.read_enable = 1; + L1.request_address = request_address; + L1.mem_ready = 0; + L1.mem_response_data = 0; + L1.mem_request = 0; + L1.mem_write_enable = 0; + + // De-assert read_enable after one cycle + L1.clk = 1; + nyu::eval(L1); + L1.clk = 0; + nyu::eval(L1); + L1.read_enable = 0; + + bool response_valid = false; + int max_cycles = 100; // prevent infinite loops + int cycles = 0; + uint32_t response_data = 0; + + while (!response_valid && cycles < max_cycles) { + L1.clk = !L1.clk; // Toggle clock + nyu::eval(L1); + + // Simulate memory interaction + if (L1.mem_request) { + // Simulate memory latency if needed + L1.mem_ready = 1; + L1.mem_response_data = memory.data[L1.mem_address]; + } else { + L1.mem_ready = 0; + } + + // Check if response_data is valid + if (L1.c_state == 0 && !L1.read_enable && !L1.mem_request) { + response_data = L1.response_data; + response_valid = true; + } + + cycles++; + } + + // Use the CacheSimulator to get the expected output + sim_cache.accessCache(request_address, /*write=*/false, /*read=*/true, /*write_data=*/0, memory); + + // Compare outputs + REQUIRE(response_valid); // Ensure that we got a response + REQUIRE(response_data == sim_cache.output); +} + +// Test case using Catch2 framework +TEST_CASE("Cache read test") { + // Create instances + L1_Instruction_Cache L1; + CacheSimulator sim_cache; + ins_ram memory; + + // Initialize cache and memory + init(L1); + sim_cache.reset(); + memory.reset(); + + // Initialize RAM with some data + for (size_t i = 0; i < 2048; ++i) { + memory.data[i] = i * 4; // Sample data + } + + // Addresses to test + std::uint32_t addresses[] = {0x00000000, 0x00000004, 0x00000008, 0x0000000C}; + + for (auto address : addresses) { + read_eval(L1, sim_cache, memory, address); + } +} \ No newline at end of file diff --git a/rtl/CMakeLists.txt b/rtl/CMakeLists.txt index b65582e..580f083 100644 --- a/rtl/CMakeLists.txt +++ b/rtl/CMakeLists.txt @@ -1,5 +1,5 @@ nyu_add_sv(core -Con_ID.sv Con_EX.sv Alu.sv Con_Branch_Cont.sv Branch_Eval.sv PC.sv IFID.sv MEMWB.sv GPR.sv EXMEM.sv Branch_Addr_Calc.sv IDEX.sv Branch_Predictor.sv Pipeline_Reset.sv Branch_Manager.sv Data_Cache_Manager.sv L1_Data_Cache.sv +Con_ID.sv Con_EX.sv Alu.sv Con_Branch_Cont.sv Branch_Eval.sv PC.sv IFID.sv MEMWB.sv GPR.sv EXMEM.sv Branch_Addr_Calc.sv IDEX.sv Branch_Predictor.sv Pipeline_Reset.sv Branch_Manager.sv Data_Cache_Manager.sv L1_Data_Cache.sv L1_Instruction_Cache.sv ) diff --git a/rtl/L1_Instruction_Cache.sv b/rtl/L1_Instruction_Cache.sv new file mode 100644 index 0000000..61a0c0b --- /dev/null +++ b/rtl/L1_Instruction_Cache.sv @@ -0,0 +1,281 @@ +module Ins_sram_module( + input clk, + input write_enable, read_enable, + input [INDEX_WIDTH - 1:0] set_index, + input [WAY_WIDTH - 1:0] way_select, + input [31:0] write_data, + output logic [31:0] read_data +); + // Constants for cache configuration + localparam BLOCK_SIZE = 4; // 4 bytes since our Ins are 32-bit + localparam CACHE_SIZE = 4 * 256; // 1 KB + localparam ASSOCIATIVITY = 2; + localparam BLOCK_WIDTH = BLOCK_SIZE * 8; //32 bits + localparam NUM_SETS = CACHE_SIZE/(BLOCK_SIZE * ASSOCIATIVITY); //128 sets + localparam ADDR_WIDTH = 32; + localparam INDEX_WIDTH = $clog2(NUM_SETS); // 7 bits + localparam WAY_WIDTH = $clog2(ASSOCIATIVITY); // 1 bit + reg [BLOCK_WIDTH -1:0] memory_array [0:NUM_SETS * ASSOCIATIVITY - 1]; + wire [ADDR_WIDTH -1:0] actual_address = set_index * ASSOCIATIVITY + {31'b0, way_select}; + + + always @(posedge clk) begin + if (write_enable) begin + if (actual_address < (NUM_SETS * ASSOCIATIVITY)) + begin + memory_array[actual_address] <= write_data; + end + end + end + assign read_data = (read_enable && actual_address < (NUM_SETS * ASSOCIATIVITY)) ? memory_array[actual_address] : 32'bz; + +endmodule : Ins_sram_module + +module L1_Instruction_Cache( + input clk, + input rstn, + input read_enable, + input [31:0] request_address, + output logic [31:0] response_data, + output logic [1:0] c_state, + + // To LOWER MEMORY + output logic mem_request, + output logic [31:0] mem_address, + //output logic mem_write_enable, + input [31:0] mem_response_data, + input mem_ready +); + // Hardcoded parameters for the data cache + localparam CACHE_SIZE = 4 * 256; // Cache size: 1 KB + localparam BLOCK_SIZE = 4; // Block size: 4 bytes (32 bits) + localparam ASSOCIATIVITY = 2; // 2-way + localparam DATA_WIDTH = 32; + + localparam BLOCK_WIDTH = BLOCK_SIZE * 8; //32 bits + localparam NUM_SETS = CACHE_SIZE/(BLOCK_SIZE * ASSOCIATIVITY); // 128 sets + localparam ADDR_WIDTH = 32; + + // Calculating the number of bits for index, and tag + localparam INDEX_WIDTH = $clog2(NUM_SETS); // 7 bits + localparam TAG_WIDTH = ADDR_WIDTH - INDEX_WIDTH; // 25 bits + localparam WAY_WIDTH = $clog2(ASSOCIATIVITY); + localparam LRU_WIDTH = $clog2(ASSOCIATIVITY); // 1 bit + + // Internal Variables + + reg [TAG_WIDTH - 1:0] cache_tags [0:NUM_SETS-1][0:ASSOCIATIVITY-1]; + reg valid [0:NUM_SETS-1][0:ASSOCIATIVITY-1]; + reg [LRU_WIDTH - 1 : 0] lru_counter [0 : NUM_SETS - 1][0 : ASSOCIATIVITY - 1]; + + reg hit; + reg [WAY_WIDTH - 1:0] way, lru_way; + + reg sram_read_req =0; + + typedef enum integer {IDLE, CHECK_TAG, FILL} cache_state_t; + cache_state_t state = IDLE; + + typedef struct packed{ + logic [ADDR_WIDTH - 1:0] address; + logic [TAG_WIDTH-1:0] tag; + logic [INDEX_WIDTH-1:0] index; + }current_address_t; + current_address_t current_addr; + + reg [31:0] sram_read_data; + + typedef struct packed{ + logic [31:0] write_data; + logic write_enable; + logic read_enable; + logic way; + logic [INDEX_WIDTH-1:0] index; + + }sram_data_t; + + sram_data_t put_sram_data; + + Ins_sram_module cache_Ins_sram ( + .clk(clk), + .write_enable (put_sram_data.write_enable), + .read_enable (put_sram_data.read_enable), + .set_index (put_sram_data.index), + .way_select (put_sram_data.way), + .write_data (put_sram_data.write_data), + .read_data (sram_read_data) + ); + + // LRU Function + function [WAY_WIDTH - 1 : 0] get_lru_way(input [INDEX_WIDTH - 1 : 0] set_index); + integer i; + reg [LRU_WIDTH - 1 : 0] max_count; + begin + max_count = 0; + lru_way = 0; + for (i = 0; i < ASSOCIATIVITY; i = i + 1) begin + if (lru_counter[set_index][i] > max_count) begin + max_count = lru_counter[set_index][i]; + lru_way = i[WAY_WIDTH - 1 : 0]; + end + end + get_lru_way = lru_way; + end + endfunction + + // === === === Helper Tasks === === === + task handle_cache_hit; + begin + if(!sram_read_req) begin + set_sram_read_request(current_addr.index, way); + sram_read_req <= 1; + end else begin + response_data <= sram_read_data; + sram_read_req <=0; + state <= IDLE; + end + update_lru_counters(current_addr.index, way); + end + + endtask + + task handle_cache_miss; + begin + state <= FILL; + end + endtask + + task set_sram_read_request; + input integer index, way; + begin + put_sram_data.read_enable <= 1; + put_sram_data.write_enable <= 0; + put_sram_data.index <= index; + put_sram_data.way <= way; + end + endtask + + task set_sram_write_request; + input integer index, way, data; + begin + put_sram_data.write_enable <= 1; + put_sram_data.read_enable <= 0; + put_sram_data.index <= index; + put_sram_data.way <= way; + put_sram_data.write_data <= data; + end + endtask + + task set_mem_request; + input [31:0] address; + begin + mem_address <= address; + //mem_write_enable <= 0; + mem_request <= 1; + end + endtask + + task update_lru_counters(input [INDEX_WIDTH - 1 : 0] set_index, input [WAY_WIDTH - 1 : 0] accessed_way); + integer i; + begin + for (i = 0; i < ASSOCIATIVITY; i = i + 1) begin + if (i == accessed_way) begin + lru_counter[set_index][i] <= 0; + end else begin + lru_counter[set_index][i] <= lru_counter[set_index][i] + 1; + end + end + end + endtask + + + //=== === === Cache Operation Tasks === === === + task reset_cache; + integer i, j; + begin + state <= IDLE; + for (i = 0; i < NUM_SETS; i = i+1) begin + for (j = 0; j < ASSOCIATIVITY; j = j+1) begin + //set_sram_write_request(i, j, 0, 2); + cache_tags[i][j] <= 0; + valid[i][j] <= 0; + lru_counter[i][j] <= j[LRU_WIDTH - 1 : 0]; + //lru_counter[i][j] <= 1; + end + end + end + endtask + + task idle_state_logic; + begin + if ( read_enable) begin + current_addr.address <= request_address; + current_addr.tag <= request_address[ADDR_WIDTH-1 -: TAG_WIDTH]; + current_addr.index <= request_address[(ADDR_WIDTH - TAG_WIDTH - 1)-: INDEX_WIDTH]; + state <= CHECK_TAG; + end + end + endtask + + task check_tag_logic; + integer i; + begin + hit = 0; + lru_way = get_lru_way(current_addr.index); + for (i = 0; i < ASSOCIATIVITY; i = i + 1) begin + if (valid[current_addr.index][i] && cache_tags[current_addr.index][i] == current_addr.tag) begin + hit = 1; + way = i; + break; + end + end + if (hit) begin + handle_cache_hit(); + end else begin + handle_cache_miss(); + end + end + endtask + + + task fill_logic; + begin + if (!mem_request) begin + set_mem_request(current_addr.address); + end else if (mem_ready) begin + set_sram_write_request(current_addr.index, lru_way, mem_response_data); + cache_tags[current_addr.index][lru_way] <= current_addr.tag; + valid[current_addr.index][lru_way] <= 1; + update_lru_counters(current_addr.index, lru_way); + mem_request <= 0; + state <= CHECK_TAG; + end + end + endtask + + // Main Cache Operation + always @(posedge clk or negedge rstn) begin + if (rstn == 0) begin + reset_cache(); + end else begin + case (state) + IDLE: begin + idle_state_logic(); + c_state <= 0; + end + CHECK_TAG: begin + check_tag_logic(); + c_state <= 1; + end + FILL: begin + fill_logic(); + c_state <= 2; + end + default: begin + idle_state_logic(); + c_state <= 0; + end + endcase + end + end +endmodule