BlackParrot initial commit
authorsadullah <sadullahcanakci@gmail.com>
Fri, 11 Oct 2019 16:39:43 +0000 (12:39 -0400)
committersadullah <sadullahcanakci@gmail.com>
Fri, 17 Jan 2020 00:13:02 +0000 (19:13 -0500)
w/ Litex BIOS simulation including LiteDRAM
w/ Litex BIOS working on FPGA excluding LiteDRAM

26 files changed:
.gitmodules
litex/soc/cores/cpu/__init__.py
litex/soc/cores/cpu/blackparrot/README.md [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/__init__.py [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/bp_hardware/bp_cce_mmio_cfg_loader.v [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/core.py [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/flist_litex.verilator [new file with mode: 0644]
litex/soc/cores/cpu/blackparrot/pre-alpha-release [new submodule]
litex/soc/cores/cpu/blackparrot/setEnvironment.sh [new file with mode: 0755]
litex/soc/cores/cpu/blackparrot/update_BP.sh [new file with mode: 0755]
litex/soc/software/bios/Makefile
litex/soc/software/bios/boot-helper-blackparrot.S [new file with mode: 0644]
litex/soc/software/bios/isr.c
litex/soc/software/bios/main.c
litex/soc/software/bios/sdram.c
litex/soc/software/include/base/irq.h
litex/soc/software/include/base/system.h
litex/soc/software/libbase/crt0-blackparrot.S [new file with mode: 0644]
litex/soc/software/libbase/system.c

index 509b98ac295dea0e835b8e6ea6b6e24c7e2ac34b..1a2bf3a31256ed348386690edc14abe7f45d14a1 100644 (file)
@@ -25,3 +25,6 @@
 [submodule "litex/soc/cores/cpu/microwatt/sources"]
        path = litex/soc/cores/cpu/microwatt/sources
        url = https://github.com/antonblanchard/microwatt
+[submodule "litex/soc/cores/cpu/blackparrot/pre-alpha-release"]
+       path = litex/soc/cores/cpu/blackparrot/pre-alpha-release
+       url = https://github.com/black-parrot/pre-alpha-release.git
index afcf6a172391d3fe56d689a8f437361385d454f8..1503dfff09ff2e01bae6a3f0a712904eaa2e6e7c 100644 (file)
@@ -33,6 +33,7 @@ from litex.soc.cores.cpu.vexriscv import VexRiscv
 from litex.soc.cores.cpu.minerva import Minerva
 from litex.soc.cores.cpu.rocket import RocketRV64
 from litex.soc.cores.cpu.microwatt import Microwatt
+from litex.soc.cores.cpu.blackparrot import BlackParrotRV64
 
 CPUS = {
     "lm32"       : LM32,
@@ -42,6 +43,7 @@ CPUS = {
     "minerva"    : Minerva,
     "rocket"     : RocketRV64,
     "microwatt"  : Microwatt,
+    "blackparrot" : BlackParrotRV64,
 }
 
 # CPU Variants/Extensions Definition ---------------------------------------------------------------
diff --git a/litex/soc/cores/cpu/blackparrot/README.md b/litex/soc/cores/cpu/blackparrot/README.md
new file mode 100644 (file)
index 0000000..d320d4b
--- /dev/null
@@ -0,0 +1,12 @@
+TODO: Edit
+git submodule update --init --recursive (for blackparrot pre-alpha repo)
+cd pre_alpha_release
+follow getting_started to install blackparrot
+cd ..
+source ./setEnvironment.sh #should be sourced each time you open a terminal or just add this line to bashrc
+Add $BP_TOP/external/bin to $PATH for verilator and riscv-gnu tools
+./update_BP.sh #to modify some of the files in Blackparrot repo (one-time process)
+Currently, we could simulate the LITEX-BIOS on BP processor. 
+
+[![asciicast](https://asciinema.org/a/286568.svg)](https://asciinema.org/a/286568)
+
diff --git a/litex/soc/cores/cpu/blackparrot/__init__.py b/litex/soc/cores/cpu/blackparrot/__init__.py
new file mode 100644 (file)
index 0000000..05c9fce
--- /dev/null
@@ -0,0 +1 @@
+from litex.soc.cores.cpu.blackparrot.core import BlackParrotRV64
diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/ExampleBlackParrotSystem.v
new file mode 100644 (file)
index 0000000..cdd1a95
--- /dev/null
@@ -0,0 +1,434 @@
+/**
+  *
+  * ExampleBlackParrotSystem For Simulating With Litex
+  *
+  */
+  
+`include "bsg_noc_links.vh"
+
+module ExampleBlackParrotSystem
+ import bp_common_pkg::*;
+ import bp_common_aviary_pkg::*;
+ import bp_be_pkg::*;
+ import bp_common_rv64_pkg::*;
+ import bp_cce_pkg::*;
+ import bp_cfg_link_pkg::*;
+ #(parameter bp_cfg_e cfg_p = e_bp_single_core_cfg // Replaced by the flow with a specific bp_cfg
+   `declare_bp_proc_params(cfg_p)
+   `declare_bp_me_if_widths(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p)
+
+   // Tracing parameters
+   , parameter calc_trace_p                = 1
+   , parameter cce_trace_p                 = 0
+   , parameter cmt_trace_p                 = 0
+   , parameter dram_trace_p                = 0
+   , parameter skip_init_p                 = 0
+
+   , parameter mem_load_p         = 1
+   , parameter mem_file_p         = "prog.mem"
+   , parameter mem_cap_in_bytes_p = 2**20
+   , parameter [paddr_width_p-1:0] mem_offset_p = paddr_width_p'(32'h8000_0000)
+
+   )
+  (input clk_i
+   , input reset_i
+   //Wishbone interface 
+   , input  [63:0]  wbm_dat_i
+   , output [63:0]  wbm_dat_o
+   , input          wbm_ack_i
+  // , input          wbm_err_i
+  // , input          wbm_rty_i
+   , output [36:0]  wbm_adr_o //TODO parametrize this
+   , output         wbm_stb_o
+   , output         wbm_cyc_o
+   , output         wbm_sel_o //TODO: how many bits ? check 3.5 table 3-1
+   , output         wbm_we_o
+   , output [2:0]   wbm_cti_o //TODO: hardwire in Litex
+   , output [1:0]   wbm_bte_o //TODO: hardwire in Litex
+   , output         all_finished_debug_o //SC_add
+   , output         core_passed_debug
+   , output         core_failed_debug
+   , input  [3:0]   interrupts 
+  );
+
+`declare_bsg_ready_and_link_sif_s(mem_noc_flit_width_p, bsg_ready_and_link_sif_s);
+`declare_bp_me_if(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p)
+
+bsg_ready_and_link_sif_s cmd_link_li, cmd_link_lo;
+bsg_ready_and_link_sif_s resp_link_li, resp_link_lo;
+
+bsg_ready_and_link_sif_s mem_cmd_link_li, mem_cmd_link_lo, mem_resp_link_li, mem_resp_link_lo;
+bsg_ready_and_link_sif_s cfg_cmd_link_li, cfg_cmd_link_lo, cfg_resp_link_li, cfg_resp_link_lo;
+
+assign mem_cmd_link_li = cmd_link_li;
+assign cfg_cmd_link_li = '{ready_and_rev: cmd_link_li.ready_and_rev, default: '0};
+assign cmd_link_lo = '{data: cfg_cmd_link_lo.data
+                       ,v  : cfg_cmd_link_lo.v
+                       ,ready_and_rev: mem_cmd_link_lo.ready_and_rev
+                       };
+
+assign mem_resp_link_li = '{ready_and_rev: resp_link_li.ready_and_rev, default: '0};
+assign cfg_resp_link_li = resp_link_li;
+assign resp_link_lo = '{data: mem_resp_link_lo.data
+                        ,v  : mem_resp_link_lo.v
+                        ,ready_and_rev: cfg_resp_link_lo.ready_and_rev
+                        };
+
+bp_cce_mem_msg_s       mem_resp_li;
+logic                  mem_resp_v_li, mem_resp_ready_lo;
+bp_cce_mem_msg_s       mem_cmd_lo;
+logic                  mem_cmd_v_lo, mem_cmd_yumi_li;
+
+bp_cce_mem_msg_s       dram_resp_lo;
+logic                  dram_resp_v_lo, dram_resp_ready_li;
+bp_cce_mem_msg_s       dram_cmd_li;
+logic                  dram_cmd_v_li, dram_cmd_yumi_lo;
+
+bp_cce_mem_msg_s       host_resp_lo;
+logic                  host_resp_v_lo, host_resp_ready_li;
+bp_cce_mem_msg_s       host_cmd_li;
+logic                  host_cmd_v_li, host_cmd_yumi_lo;
+
+bp_cce_mem_msg_s       cfg_cmd_lo;
+logic                  cfg_cmd_v_lo, cfg_cmd_ready_li;
+bp_cce_mem_msg_s       cfg_resp_li;
+logic                  cfg_resp_v_li, cfg_resp_ready_lo;
+
+logic [mem_noc_cord_width_p-1:0] dram_cord_lo, mmio_cord_lo, host_cord_lo;
+logic [num_core_p-1:0][mem_noc_cord_width_p-1:0] tile_cord_lo;
+logic [num_mem_p-1:0][mem_noc_cord_width_p-1:0] mem_cord_lo;
+
+assign mmio_cord_lo[0+:mem_noc_x_cord_width_p]                      = mmio_x_pos_p;
+assign mmio_cord_lo[mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = '0;
+assign dram_cord_lo[0+:mem_noc_x_cord_width_p]                      = mem_noc_x_dim_p+2;
+assign dram_cord_lo[mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = '0;
+assign host_cord_lo[0+:mem_noc_x_cord_width_p]                      = mem_noc_x_dim_p+2;
+assign host_cord_lo[mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = '0;
+
+for (genvar j = 0; j < mem_noc_y_dim_p; j++)
+  begin : y
+    for (genvar i = 0; i < mem_noc_x_dim_p; i++)
+      begin : x
+        localparam idx = j*mem_noc_x_dim_p + i;
+        assign tile_cord_lo[idx][0+:mem_noc_x_cord_width_p] = i+1;
+        assign tile_cord_lo[idx][mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = j+1;
+      end
+  end
+for (genvar i = 0; i < num_mem_p; i++)
+  begin : x
+    assign mem_cord_lo[i][0+:mem_noc_x_cord_width_p] = i;
+    assign mem_cord_lo[i][mem_noc_x_cord_width_p+:mem_noc_y_cord_width_p] = '0;
+  end
+
+// Chip
+bp_chip
+ #(.cfg_p(cfg_p))
+ chip
+  (.core_clk_i(clk_i)
+   ,.core_reset_i(reset_i)
+   
+   ,.coh_clk_i(clk_i)
+   ,.coh_reset_i(reset_i)
+
+   ,.mem_clk_i(clk_i)
+   ,.mem_reset_i(reset_i)
+
+   ,.mem_cord_i(mem_cord_lo)
+   ,.tile_cord_i(tile_cord_lo)
+   ,.dram_cord_i(dram_cord_lo)
+   ,.mmio_cord_i(mmio_cord_lo)
+   ,.host_cord_i(host_cord_lo)
+
+   ,.prev_cmd_link_i('0)
+   ,.prev_cmd_link_o()
+
+   ,.prev_resp_link_i('0)
+   ,.prev_resp_link_o()
+
+   ,.next_cmd_link_i(cmd_link_lo)
+   ,.next_cmd_link_o(cmd_link_li)
+
+   ,.next_resp_link_i(resp_link_lo)
+   ,.next_resp_link_o(resp_link_li)
+   );
+
+  bind bp_be_top
+    bp_nonsynth_commit_tracer
+     #(.cfg_p(cfg_p))
+     commit_tracer
+      (.clk_i(clk_i & (ExampleBlackParrotSystem.cmt_trace_p == 1))
+       ,.reset_i(reset_i)
+
+       ,.mhartid_i('0)
+
+       ,.commit_v_i(be_calculator.instret_mem3_o)
+       ,.commit_pc_i(be_calculator.pc_mem3_o)
+       ,.commit_instr_i(be_calculator.instr_mem3_o)
+
+       ,.rd_w_v_i(be_calculator.int_regfile.rd_w_v_i)
+       ,.rd_addr_i(be_calculator.int_regfile.rd_addr_i)
+       ,.rd_data_i(be_calculator.int_regfile.rd_data_i)
+       );
+
+
+  bind bp_be_top
+    bp_be_nonsynth_tracer
+     #(.cfg_p(cfg_p))
+     tracer
+       // Workaround for verilator binding by accident
+       // TODO: Figure out why tracing is always enabled
+      (.clk_i(clk_i & (ExampleBlackParrotSystem.calc_trace_p == 1))
+       ,.reset_i(reset_i)
+  
+       ,.mhartid_i(be_calculator.proc_cfg.core_id)
+
+       ,.issue_pkt_i(be_calculator.issue_pkt)
+       ,.issue_pkt_v_i(be_calculator.issue_pkt_v_i)
+  
+       ,.fe_nop_v_i(be_calculator.fe_nop_v)
+       ,.be_nop_v_i(be_calculator.be_nop_v)
+       ,.me_nop_v_i(be_calculator.me_nop_v)
+       ,.dispatch_pkt_i(be_calculator.dispatch_pkt)
+  
+       ,.ex1_br_tgt_i(be_calculator.calc_status.int1_br_tgt)
+       ,.ex1_btaken_i(be_calculator.calc_status.int1_btaken)
+       ,.iwb_result_i(be_calculator.comp_stage_n[3])
+       ,.fwb_result_i(be_calculator.comp_stage_n[4])
+  
+       ,.cmt_trace_exc_i(be_calculator.exc_stage_n[1+:5])
+  
+       ,.trap_v_i(be_mem.csr.trap_v_o)
+       ,.mtvec_i(be_mem.csr.mtvec_n)
+       ,.mtval_i(be_mem.csr.mtval_n[0+:vaddr_width_p])
+       ,.ret_v_i(be_mem.csr.ret_v_o)
+       ,.mepc_i(be_mem.csr.mepc_n[0+:vaddr_width_p])
+       ,.mcause_i(be_mem.csr.mcause_n)
+  
+       ,.priv_mode_i(be_mem.csr.priv_mode_n)
+       ,.mpp_i(be_mem.csr.mstatus_n.mpp)
+       );
+
+/*bind bp_be_top
+  bp_be_nonsynth_perf
+   #(.cfg_p(cfg_p))
+   perf
+    (.clk_i(clk_i)
+     ,.reset_i(reset_i)
+
+     ,.mhartid_i(be_calculator.proc_cfg.core_id)
+
+     ,.fe_nop_i(be_calculator.exc_stage_r[2].fe_nop_v)
+     ,.be_nop_i(be_calculator.exc_stage_r[2].be_nop_v)
+     ,.me_nop_i(be_calculator.exc_stage_r[2].me_nop_v)
+     ,.poison_i(be_calculator.exc_stage_r[2].poison_v)
+     ,.roll_i(be_calculator.exc_stage_r[2].roll_v)
+     ,.instr_cmt_i(be_calculator.calc_status.mem3_cmt_v)
+
+     ,.program_finish_i(testbench.program_finish)
+     );
+*/
+/*if (dram_trace_p)
+  bp_mem_nonsynth_tracer
+   #(.cfg_p(cfg_p))
+   bp_mem_tracer
+    (.clk_i(clk_i & (testbench.dram_trace_p == 1))
+     ,.reset_i(reset_i)
+
+     ,.mem_cmd_i(dram_cmd_li)
+     ,.mem_cmd_v_i(dram_cmd_v_li)
+     ,.mem_cmd_yumi_i(dram_cmd_yumi_lo)
+
+     ,.mem_resp_i(dram_resp_lo)
+     ,.mem_resp_v_i(dram_resp_v_lo)
+     ,.mem_resp_ready_i(dram_resp_ready_li)
+     );
+
+if (cce_trace_p)
+  bind bp_cce_top
+    bp_cce_nonsynth_tracer
+      #(.cfg_p(cfg_p))
+      bp_cce_tracer
+       (.clk_i(clk_i & (testbench.cce_trace_p == 1))
+        ,.reset_i(reset_i)
+  
+        ,.cce_id_i(cce_id_i)
+  
+        // To CCE
+        ,.lce_req_i(lce_req_to_cce)
+        ,.lce_req_v_i(lce_req_v_to_cce)
+        ,.lce_req_yumi_i(lce_req_yumi_from_cce)
+        ,.lce_resp_i(lce_resp_to_cce)
+        ,.lce_resp_v_i(lce_resp_v_to_cce)
+        ,.lce_resp_yumi_i(lce_resp_yumi_from_cce)
+  
+        // From CCE
+        ,.lce_cmd_i(lce_cmd_o)
+        ,.lce_cmd_v_i(lce_cmd_v_o)
+        ,.lce_cmd_ready_i(lce_cmd_ready_i)
+  
+        // To CCE
+        ,.mem_resp_i(mem_resp_to_cce)
+        ,.mem_resp_v_i(mem_resp_v_to_cce)
+        ,.mem_resp_yumi_i(mem_resp_yumi_from_cce)
+  
+        // From CCE
+        ,.mem_cmd_i(mem_cmd_from_cce)
+        ,.mem_cmd_v_i(mem_cmd_v_from_cce)
+        ,.mem_cmd_ready_i(mem_cmd_ready_to_cce)
+        );
+*/
+// DRAM + link 
+bp_me_cce_to_wormhole_link_client
+ #(.cfg_p(cfg_p))
+  client_link
+  (.clk_i(clk_i)
+  ,.reset_i(reset_i)
+
+  ,.mem_cmd_o(mem_cmd_lo)
+  ,.mem_cmd_v_o(mem_cmd_v_lo)
+  ,.mem_cmd_yumi_i(mem_cmd_yumi_li)
+
+  ,.mem_resp_i(mem_resp_li)
+  ,.mem_resp_v_i(mem_resp_v_li)
+  ,.mem_resp_ready_o(mem_resp_ready_lo)
+
+  ,.my_cord_i(dram_cord_lo)
+  ,.my_cid_i(mem_noc_cid_width_p'(0))
+     
+  ,.cmd_link_i(mem_cmd_link_li)
+  ,.cmd_link_o(mem_cmd_link_lo)
+
+  ,.resp_link_i(mem_resp_link_li)
+  ,.resp_link_o(mem_resp_link_lo)
+  );
+
+bp2wb_convertor
+  #(.cfg_p(cfg_p))
+bp2wb
+  (.clk_i(clk_i)
+  ,.reset_i(reset_i)
+  ,.mem_cmd_i(dram_cmd_li)
+  ,.mem_cmd_v_i(dram_cmd_v_li)
+  ,.mem_cmd_yumi_o(dram_cmd_yumi_lo)
+  ,.mem_resp_o(dram_resp_lo)
+  ,.mem_resp_v_o(dram_resp_v_lo)
+  ,.mem_resp_ready_i(dram_resp_ready_li)
+  ,.dat_i(wbm_dat_i)
+  ,.dat_o(wbm_dat_o)
+  ,.ack_i(wbm_ack_i)
+  ,.adr_o(wbm_adr_o)
+  ,.stb_o(wbm_stb_o)
+  ,.cyc_o(wbm_cyc_o)
+  ,.sel_o(wbm_sel_o )
+  ,.we_o(wbm_we_o)
+  ,.cti_o(wbm_cti_o)
+  ,.bte_o(wbm_bte_o )
+  );
+
+logic [num_core_p-1:0] program_finish;
+
+bp_nonsynth_host
+ #(.cfg_p(cfg_p))
+ host_mmio
+  (.clk_i(clk_i)
+   ,.reset_i(reset_i)
+
+   ,.mem_cmd_i(host_cmd_li)
+   ,.mem_cmd_v_i(host_cmd_v_li)
+   ,.mem_cmd_yumi_o(host_cmd_yumi_lo)
+
+   ,.mem_resp_o(host_resp_lo)
+   ,.mem_resp_v_o(host_resp_v_lo)
+   ,.mem_resp_ready_i(host_resp_ready_li)
+
+   ,.program_finish_o(program_finish)
+   ,.all_finished_debug_o(all_finished_debug_o)
+   ,.core_passed_debug(core_passed_debug)
+   ,.core_failed_debug(core_failed_debug)
+   );
+
+/*bp_nonsynth_if_verif
+ #(.cfg_p(cfg_p))
+ if_verif
+  ();
+*/
+// MMIO arbitration 
+//   Should this be on its own I/O router?
+logic req_outstanding_r;
+bsg_dff_reset_en
+ #(.width_p(1))
+ req_outstanding_reg
+  (.clk_i(clk_i)
+   ,.reset_i(reset_i)
+   ,.en_i(mem_cmd_yumi_li | mem_resp_v_li)
+
+   ,.data_i(mem_cmd_yumi_li)
+   ,.data_o(req_outstanding_r)
+   );
+
+wire host_cmd_not_dram      = mem_cmd_v_lo & (mem_cmd_lo.addr < 39'h00_4000_0000 );//dram_base_addr_gp
+
+assign host_cmd_li          = mem_cmd_lo;
+assign host_cmd_v_li        = mem_cmd_v_lo & host_cmd_not_dram & ~req_outstanding_r;
+assign dram_cmd_li          = mem_cmd_lo;
+assign dram_cmd_v_li        = mem_cmd_v_lo & ~host_cmd_not_dram & ~req_outstanding_r;
+assign mem_cmd_yumi_li      = host_cmd_not_dram 
+                              ? host_cmd_yumi_lo 
+                              : dram_cmd_yumi_lo;
+
+assign mem_resp_li = host_resp_v_lo ? host_resp_lo : dram_resp_lo;
+assign mem_resp_v_li = host_resp_v_lo | dram_resp_v_lo;
+assign host_resp_ready_li = mem_resp_ready_lo;
+assign dram_resp_ready_li = mem_resp_ready_lo;
+
+// CFG loader + rom + link
+bp_me_cce_to_wormhole_link_master
+ #(.cfg_p(cfg_p))
+  master_link
+  (.clk_i(clk_i)
+  ,.reset_i(reset_i)
+
+  ,.mem_cmd_i(cfg_cmd_lo)
+  ,.mem_cmd_v_i(cfg_cmd_ready_li & cfg_cmd_v_lo)
+  ,.mem_cmd_ready_o(cfg_cmd_ready_li)
+
+  ,.mem_resp_o(cfg_resp_li)
+  ,.mem_resp_v_o(cfg_resp_v_li)
+  ,.mem_resp_yumi_i(cfg_resp_ready_lo & cfg_resp_v_li)
+
+  ,.my_cord_i(dram_cord_lo)
+  ,.my_cid_i(mem_noc_cid_width_p'(0))
+  ,.dram_cord_i(dram_cord_lo)
+  ,.mmio_cord_i(mmio_cord_lo)
+  ,.host_cord_i(host_cord_lo)
+  
+  ,.cmd_link_i(cfg_cmd_link_li)
+  ,.cmd_link_o(cfg_cmd_link_lo)
+
+  ,.resp_link_i(cfg_resp_link_li)
+  ,.resp_link_o(cfg_resp_link_lo)
+  );
+
+localparam cce_instr_ram_addr_width_lp = `BSG_SAFE_CLOG2(num_cce_instr_ram_els_p);
+bp_cce_mmio_cfg_loader
+  #(.cfg_p(cfg_p)
+    ,.inst_width_p(`bp_cce_inst_width)
+    ,.inst_ram_addr_width_p(cce_instr_ram_addr_width_lp)
+    ,.inst_ram_els_p(num_cce_instr_ram_els_p)
+    ,.skip_ram_init_p(skip_init_p)
+  )
+  cfg_loader
+  (.clk_i(clk_i)
+   ,.reset_i(reset_i)
+   
+   ,.mem_cmd_o(cfg_cmd_lo)
+   ,.mem_cmd_v_o(cfg_cmd_v_lo)
+   ,.mem_cmd_yumi_i(cfg_cmd_ready_li & cfg_cmd_v_lo)
+   
+   ,.mem_resp_i(cfg_resp_li)
+   ,.mem_resp_v_i(cfg_resp_v_li)
+   ,.mem_resp_ready_o(cfg_resp_ready_lo)
+  );
+
+endmodule
+
diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/bp2wb_convertor.v
new file mode 100644 (file)
index 0000000..3780fb8
--- /dev/null
@@ -0,0 +1,214 @@
+/**
+ * bp2wb_convertor.v
+ * DESCRIPTION: THIS MODULE ADAPTS BP MEMORY BUS TO 64-BIT WISHBONE
+ */
+
+module bp2wb_convertor
+  import bp_common_pkg::*;
+  import bp_common_aviary_pkg::*;
+  import bp_cce_pkg::*;
+  import bp_me_pkg::*;
+  #(parameter bp_cfg_e cfg_p = e_bp_single_core_cfg
+   `declare_bp_proc_params(cfg_p)
+   `declare_bp_me_if_widths(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p)
+
+//   , parameter [paddr_width_p-1:0] dram_offset_p = '0
+   , localparam num_block_words_lp   = cce_block_width_p / 64
+   , localparam num_block_bytes_lp   = cce_block_width_p / 8
+   , localparam num_word_bytes_lp    = dword_width_p / 8
+   , localparam block_offset_bits_lp = `BSG_SAFE_CLOG2(num_block_bytes_lp)
+   , localparam word_offset_bits_lp  = `BSG_SAFE_CLOG2(num_block_words_lp)
+   , localparam byte_offset_bits_lp  = `BSG_SAFE_CLOG2(num_word_bytes_lp)
+   , localparam wbone_data_width  = 64
+   , localparam wbone_addr_ubound = paddr_width_p
+   , localparam mem_granularity = 64 //TODO: adapt selection bit parametrized
+   , localparam wbone_addr_lbound = 3 //`BSG_SAFE_CLOG2(wbone_data_width / mem_granularity) //dword granularity
+   , localparam total_datafetch_cycle_lp   = cce_block_width_p / wbone_data_width
+   , localparam total_datafetch_cycle_width = `BSG_SAFE_CLOG2(total_datafetch_cycle_lp)
+   , localparam cached_addr_base =  32'h4000_4000//   32'h5000_0000
+   )
+  (input                                 clk_i
+   ,(* mark_debug = "true" *) input                               reset_i
+
+   // BP side
+   ,(* mark_debug = "true" *) input [cce_mem_msg_width_lp-1:0]    mem_cmd_i
+   ,(* mark_debug = "true" *) input                               mem_cmd_v_i
+   ,(* mark_debug = "true" *) output                              mem_cmd_yumi_o
+
+   , (* mark_debug = "true" *) output [cce_mem_msg_width_lp-1:0]   mem_resp_o
+   , (* mark_debug = "true" *) output                              mem_resp_v_o
+   , (* mark_debug = "true" *) input                               mem_resp_ready_i
+
+   // Wishbone side
+   , (* mark_debug = "true" *) input [63:0]                        dat_i
+   , (* mark_debug = "true" *) output logic [63:0]                 dat_o
+   , (* mark_debug = "true" *) input                               ack_i
+  // , input                               err_i
+  // , input                               rty_i
+   , (* mark_debug = "true" *) output logic [wbone_addr_ubound-wbone_addr_lbound-1:0] adr_o//TODO: Double check!!!    
+   , (* mark_debug = "true" *) output logic stb_o
+   , output                              cyc_o
+   , output                              sel_o //TODO: double check!!!
+   , (* mark_debug = "true" *) output                              we_o
+   , output [2:0]                        cti_o //TODO: hardwire in Litex
+   , output [1:0]                        bte_o //TODO: hardwire in Litex
+   );
+
+  `declare_bp_me_if(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p);
+  
+  //locals
+ (* mark_debug = "true" *) logic  [total_datafetch_cycle_width:0] ack_ctr  = 0;
+ (* mark_debug = "true" *) bp_cce_mem_msg_s  mem_cmd_cast_i, mem_resp_cast_o, mem_cmd_r;
+ (* mark_debug = "true" *) logic ready_li, v_li, stb_justgotack;
+  (* mark_debug = "true" *) logic [cce_block_width_p-1:0] data_lo; 
+  (* mark_debug = "true" *) logic  [cce_block_width_p-1:0] data_li;
+  (* mark_debug = "true" *) wire [paddr_width_p-1:0]  mem_cmd_addr_l;
+   (* mark_debug = "true" *) logic [paddr_width_p-1:0] addr_lo;
+  (* mark_debug = "true" *) logic set_stb;
+  (* mark_debug = "true" *) wire [63:0] data_little_end;
+
+
+  //reset
+  //TODO: reset ack_ctr here
+  //Handshaking between Wishbone and BlackParrot through convertor  
+  //3.1.3:At every rising edge of [CLK_I] the terminating signal(ACK) is sampled.  If it
+  //is asserted, then  [STB_O]  is  negated. 
+  assign ready_li = ( ack_ctr == 0 );
+  assign mem_cmd_yumi_o = mem_cmd_v_i && ready_li;//!stb_o then ready to take!
+ // assign v_li =  (ack_ctr == total_datafetch_cycle_lp-1);
+  assign mem_resp_v_o = mem_resp_ready_i & v_li;
+  assign stb_o = (set_stb) && !stb_justgotack; //addresi mem_cmd_rdan aldigimiz icin 1 cycle geriden geliyo
+  assign cyc_o = stb_o; 
+  assign sel_o = 0; 
+  assign cti_o = 0;
+  assign bte_o = 0;
+
+  initial begin
+    ack_ctr = 0;
+    //stb_reset_lo =0;
+  end
+  
+/*  always_ff @(posedge clk_i) 
+    if ( mem_cmd_yumi_o )// || (ack_ctr > 0))
+    begin
+      data_li <= 0;
+      set_stb <= 1;
+    end
+*/
+
+
+//Flip stb after each ack--->RULE 3.20:
+
+// Every time we get an ACK from WB, increment counter until the counter reaches to total_datafetch_cycle_lp
+assign data_little_end = dat_i;
+  always_ff @(posedge clk_i)
+    begin
+      
+      if(reset_i)
+      begin
+        ack_ctr <= 0;
+        set_stb <= 0;
+        v_li <=0;
+      end
+      
+      else if (mem_cmd_yumi_o)
+      begin
+        data_li <= 0;
+        set_stb <= 1;
+        v_li <= 0;
+        stb_justgotack <= 0;
+      end
+      
+      else
+      begin
+        if (ack_i)//stb should be negated after ack
+        begin
+          stb_justgotack <= 1;
+          data_li[(ack_ctr*wbone_data_width) +: wbone_data_width] <= data_little_end;
+          if ((ack_ctr == total_datafetch_cycle_lp-1) || (mem_cmd_addr_l < cached_addr_base && mem_cmd_r.msg_type == e_cce_mem_uc_wr )) //if uncached store, just one cycle is fine
+          begin
+            ack_ctr <= 0;
+            v_li <=1;
+            set_stb <= 0;
+          end
+          else 
+            ack_ctr <= ack_ctr + 1; 
+        end
+        else
+        begin
+          stb_justgotack <= 0;
+          v_li <=0;
+        end
+      end
+    end
+
+  //Packet Pass from BP to BP2WB
+ assign mem_cmd_cast_i = mem_cmd_i;
+
+  bsg_dff_reset_en
+  #(.width_p(cce_mem_msg_width_lp))
+    mshr_reg
+     (.clk_i(clk_i)
+      ,.reset_i(reset_i)
+      ,.en_i(mem_cmd_yumi_o)//when
+      ,.data_i(mem_cmd_i)
+      ,.data_o(mem_cmd_r)
+      );
+
+ //Addr && Data && Command  Pass from BP2WB to WB 
+  logic [wbone_addr_lbound-1:0] throw_away; 
+  assign mem_cmd_addr_l =  mem_cmd_r.addr;
+  assign data_lo = mem_cmd_r.data;
+  logic [39:0] mem_cmd_addr_l_zero64;
+  logic [7:0] partial;
+  always_comb begin
+    if( mem_cmd_addr_l < cached_addr_base )
+    begin 
+      adr_o = mem_cmd_addr_l[wbone_addr_ubound-1:wbone_addr_lbound];//no need to change address for uncached stores/loads
+      dat_o = data_lo[(0*wbone_data_width) +: wbone_data_width];//unchached data is stored in LS 64bits
+    end
+
+    else
+    begin
+      mem_cmd_addr_l_zero64 = mem_cmd_addr_l >> 6 << 6;
+     // addr_lo = 
+      {adr_o,throw_away} =  mem_cmd_addr_l_zero64 + (ack_ctr*8);//TODO:careful
+     // adr_o = addr_lo[wbone_addr_ubound-1:wbone_addr_lbound];
+      dat_o = data_lo[(ack_ctr*wbone_data_width) +: wbone_data_width];
+    end
+  end
+
+   assign we_o = (mem_cmd_r.msg_type inside {e_cce_mem_uc_wr, e_cce_mem_wb});
+
+//DEBUG
+
+wire [3:0] typean;
+assign typean = mem_cmd_r.msg_type;
+wire [2:0] debug1;
+assign debug1 = (mem_cmd_r.addr[5:0]>>3);
+
+//Data Pass from BP2WB to BP
+
+wire [cce_block_width_p-1:0]  rd_word_offset = mem_cmd_r.addr[3+:3];
+//wire [cce_block_width_p-1:0]  rd_byte_offset = mem_cmd_r.addr[0+:3];
+wire [cce_block_width_p-1:0]    rd_bit_shift = rd_word_offset*64; // We rely on receiver to adjust bits
+
+wire [cce_block_width_p-1:0] data_li_resp = (mem_cmd_r.msg_type == e_cce_mem_uc_rd)
+                                            ? data_li >> rd_bit_shift
+                                            : data_li;
+
+assign mem_resp_cast_o = '{data     : data_li_resp
+                                ,payload : mem_cmd_r.payload
+                                ,size    : mem_cmd_r.size
+                                ,addr    : mem_cmd_r.addr
+                                ,msg_type: mem_cmd_r.msg_type
+                                };
+assign mem_resp_o = mem_resp_cast_o;
+
+  
+endmodule
+
diff --git a/litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v b/litex/soc/cores/cpu/blackparrot/bp_fpga/bsg_mem_1rw_sync_mask_write_bit.v
new file mode 100644 (file)
index 0000000..a6fdae9
--- /dev/null
@@ -0,0 +1,55 @@
+/*
+* bsg_mem_1rw_sync_mask_write_bit.v
+*
+* distributed synchronous 1-port ram for xilinx ultrascale or ultrascale plus FPGA
+* Write mode: No-change | Read mode: No-change
+* Note:
+* There are 2 basic BRAM library primitives, RAMB18E2 and RAMB36E2 in Vivado.
+* But none of them support bit-wise mask. They have Byte-wide write enable ports though.
+* So we use the RAM_STYLE attribute to instruct the tool to infer distributed LUT RAM instead.
+*
+* To save resources, the code is written to be inferred as Signle-port distributed ram RAM64X1S.
+* https://www.xilinx.com/support/documentation/user_guides/ug574-ultrascale-clb.pdf
+*
+*/
+
+
+module bsg_mem_1rw_sync_mask_write_bit #(
+  parameter width_p = "inv"
+  , parameter els_p = "inv"
+  , parameter latch_last_read_p=0
+  , parameter enable_clock_gating_p=0
+  , localparam addr_width_lp = `BSG_SAFE_CLOG2(els_p)
+) (
+  input                      clk_i
+  , input                      reset_i
+  , input  [      width_p-1:0] data_i
+  , input  [addr_width_lp-1:0] addr_i
+  , input                      v_i
+  , input  [      width_p-1:0] w_mask_i
+  , input                      w_i
+  , output [      width_p-1:0] data_o
+);
+
+  wire unused = reset_i;
+
+  (* ram_style = "distributed" *) logic [width_p-1:0] mem [els_p-1:0];
+
+  logic [width_p-1:0] data_r;
+  always_ff @(posedge clk_i) begin
+    if (v_i & ~w_i)
+      data_r <= mem[addr_i];
+  end
+
+  assign data_o = data_r;
+
+  for (genvar i=0; i<width_p; i=i+1) begin
+    always_ff @(posedge clk_i) begin
+      if (v_i)
+        if (w_i & w_mask_i[i])
+          mem[addr_i][i] <= data_i[i];
+    end
+  end
+
+endmodule
+
diff --git a/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_cce_mmio_cfg_loader.v b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_cce_mmio_cfg_loader.v
new file mode 100644 (file)
index 0000000..84f0cc0
--- /dev/null
@@ -0,0 +1,231 @@
+/**
+ *
+ * Name:
+ *   bp_cce_mmio_cfg_loader.v
+ *
+ * Description:
+ *
+ */
+
+module bp_cce_mmio_cfg_loader
+  import bp_common_pkg::*;
+  import bp_common_aviary_pkg::*;
+  import bp_cce_pkg::*;
+  import bp_cfg_link_pkg::*;
+  import bp_be_pkg::*;
+  import bp_be_dcache_pkg::*;
+  import bp_me_pkg::*;
+  #(parameter bp_cfg_e cfg_p = e_bp_inv_cfg
+    `declare_bp_proc_params(cfg_p)
+    `declare_bp_me_if_widths(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p)
+
+    , parameter inst_width_p          = "inv"
+    , parameter inst_ram_addr_width_p = "inv"
+    , parameter inst_ram_els_p        = "inv"
+    , parameter cce_ucode_filename_p  = "/tmp/cce_ucode.mem"
+    , parameter skip_ram_init_p       = 0
+    
+    , localparam bp_pc_entry_point_gp=39'h00_5000_0000 //SC_add
+    )
+  (input                                             clk_i
+   , input                                           reset_i
+
+   // Config channel
+   , output logic [cce_mem_msg_width_lp-1:0]         mem_cmd_o
+   , output logic                                    mem_cmd_v_o
+   , input                                           mem_cmd_yumi_i
+
+   // We don't need a response from the cfg network
+   , input [cce_mem_msg_width_lp-1:0]                mem_resp_i
+   , input                                           mem_resp_v_i
+   , output                                          mem_resp_ready_o
+   );
+
+  wire unused0 = &{mem_resp_i, mem_resp_v_i};
+  assign mem_resp_ready_o = 1'b1;
+   
+ `declare_bp_me_if(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p);
+
+  bp_cce_mem_msg_s mem_cmd_cast_o;
+
+  assign mem_cmd_o = mem_cmd_cast_o;
+  
+  logic [`bp_cce_inst_width-1:0]    cce_inst_boot_rom [0:inst_ram_els_p-1];
+  logic [inst_ram_addr_width_p-1:0] cce_inst_boot_rom_addr;
+  logic [`bp_cce_inst_width-1:0]    cce_inst_boot_rom_data;
+  
+  initial $readmemb(cce_ucode_filename_p, cce_inst_boot_rom);
+
+  assign cce_inst_boot_rom_data = cce_inst_boot_rom[cce_inst_boot_rom_addr];
+
+  logic                        cfg_v_lo;
+  logic [cfg_core_width_p-1:0] cfg_core_lo;
+  logic [cfg_addr_width_p-1:0] cfg_addr_lo;
+  logic [cfg_data_width_p-1:0] cfg_data_lo;
+
+  (* mark_debug = "true" *) enum logic [3:0] {
+    RESET
+    ,BP_RESET_SET
+    ,BP_FREEZE_SET
+    ,BP_RESET_CLR
+    ,SEND_RAM_LO
+    ,SEND_RAM_HI
+    ,SEND_CCE_NORMAL
+    ,SEND_ICACHE_NORMAL
+    ,SEND_DCACHE_NORMAL
+    ,SEND_PC_LO
+    ,SEND_PC_HI
+    ,BP_FREEZE_CLR
+    ,DONE
+  } state_n, state_r;
+
+  logic [cfg_addr_width_p-1:0] ucode_cnt_r;
+  logic ucode_cnt_clr, ucode_cnt_inc;
+  bsg_counter_clear_up
+   #(.max_val_p(2**cfg_addr_width_p-1)
+     ,.init_val_p(0)
+     )
+   ucode_counter
+    (.clk_i(clk_i)
+     ,.reset_i(reset_i)
+
+     ,.clear_i(ucode_cnt_clr)
+     ,.up_i(ucode_cnt_inc & mem_cmd_yumi_i)
+
+     ,.count_o(ucode_cnt_r)
+     );
+
+  wire ucode_prog_done = (ucode_cnt_r == cfg_addr_width_p'(inst_ram_els_p-1));
+
+  always_ff @(posedge clk_i) 
+    begin
+      if (reset_i)
+        state_r <= RESET;
+      else if (mem_cmd_yumi_i || (state_r == RESET))
+        state_r <= state_n;
+    end
+
+  wire [7:0] unused;
+  assign {unused, cce_inst_boot_rom_addr} = cfg_addr_lo >> 1'b1;
+
+  always_comb
+    begin
+      mem_cmd_v_o = cfg_v_lo;
+
+      // uncached store
+      mem_cmd_cast_o.msg_type      = e_cce_mem_uc_wr;
+      mem_cmd_cast_o.addr          = bp_cfg_base_addr_gp;
+      mem_cmd_cast_o.payload       = '0;
+      mem_cmd_cast_o.size          = e_mem_size_8;
+      mem_cmd_cast_o.data          = cce_block_width_p'({cfg_core_lo, cfg_addr_lo, cfg_data_lo});
+    end
+
+  always_comb 
+    begin
+      ucode_cnt_clr = 1'b0;
+      ucode_cnt_inc = 1'b0;
+
+      cfg_v_lo = '0;
+      cfg_core_lo = 8'hff;
+      cfg_addr_lo = '0;
+      cfg_data_lo = '0;
+
+      case (state_r)
+        RESET: begin
+          state_n = skip_ram_init_p ? BP_FREEZE_CLR : BP_RESET_SET;
+
+          ucode_cnt_clr = 1'b1;
+        end
+        BP_RESET_SET: begin
+          state_n = BP_FREEZE_SET;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = bp_cfg_reg_reset_gp;
+          cfg_data_lo = cfg_data_width_p'(1);
+        end
+        BP_FREEZE_SET: begin
+          state_n = BP_RESET_CLR;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = bp_cfg_reg_freeze_gp;
+          cfg_data_lo = cfg_data_width_p'(1);
+        end
+        BP_RESET_CLR: begin
+          state_n = SEND_RAM_LO;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = bp_cfg_reg_reset_gp;
+          cfg_data_lo = cfg_data_width_p'(0);
+        end
+        SEND_RAM_LO: begin
+          state_n = SEND_RAM_HI;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = cfg_addr_width_p'(bp_cfg_mem_base_cce_ucode_gp) + (ucode_cnt_r << 1);
+          cfg_data_lo = cce_inst_boot_rom_data[0+:cfg_data_width_p];
+          // TODO: This is nonsynth, won't work on FPGA
+          cfg_data_lo = (|cfg_data_lo === 'X) ? '0 : cfg_data_lo;
+        end
+        SEND_RAM_HI: begin
+          state_n = ucode_prog_done ? SEND_CCE_NORMAL : SEND_RAM_LO;
+
+          ucode_cnt_inc = 1'b1;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = cfg_addr_width_p'(bp_cfg_mem_base_cce_ucode_gp) + (ucode_cnt_r << 1) + 1'b1;
+          cfg_data_lo = cfg_data_width_p'(cce_inst_boot_rom_data[inst_width_p-1:cfg_data_width_p]);
+          // TODO: This is nonsynth, won't work on FPGA
+          cfg_data_lo = (|cfg_data_lo === 'X) ? '0 : cfg_data_lo;
+        end
+        SEND_CCE_NORMAL: begin
+          state_n = SEND_ICACHE_NORMAL;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = bp_cfg_reg_cce_mode_gp;
+          cfg_data_lo = cfg_data_width_p'(e_cce_mode_normal);
+        end
+        SEND_ICACHE_NORMAL: begin
+          state_n = SEND_DCACHE_NORMAL;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_icache_mode_gp);
+          cfg_data_lo = cfg_data_width_p'(e_dcache_lce_mode_normal); // TODO: tapeout hack, change to icache
+        end
+        SEND_DCACHE_NORMAL: begin
+          state_n = SEND_PC_LO;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_dcache_mode_gp);
+          cfg_data_lo = cfg_data_width_p'(e_dcache_lce_mode_normal);
+        end
+        SEND_PC_LO: begin
+          state_n = SEND_PC_HI;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_start_pc_lo_gp);
+          cfg_data_lo = bp_pc_entry_point_gp[0+:cfg_data_width_p];
+        end
+        SEND_PC_HI: begin
+          state_n = BP_FREEZE_CLR;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_start_pc_hi_gp);
+          cfg_data_lo = cfg_data_width_p'(bp_pc_entry_point_gp[vaddr_width_p-1:cfg_data_width_p]);
+        end
+        BP_FREEZE_CLR: begin
+          state_n = DONE;
+
+          cfg_v_lo = 1'b1;
+          cfg_addr_lo = cfg_addr_width_p'(bp_cfg_reg_freeze_gp);
+          cfg_data_lo = cfg_data_width_p'(0);;
+        end
+        DONE: begin
+          state_n = DONE;
+        end
+        default: begin
+          state_n = RESET;
+        end
+      endcase
+    end
+
+endmodule
diff --git a/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_common_pkg.vh
new file mode 100644 (file)
index 0000000..9500673
--- /dev/null
@@ -0,0 +1,55 @@
+/* 
+ * bp_common_pkg.vh
+ *
+ * Contains the interface structures used for communicating between FE, BE, ME in BlackParrot.
+ * Additionally contains global parameters used to configure the system. In the future, when 
+ *   multiple configurations are supported, these global parameters will belong to groups 
+ *   e.g. SV39, VM-disabled, ...
+ *
+ */
+
+package bp_common_pkg;
+
+  `include "bsg_defines.v"
+  `include "bp_common_defines.vh"
+  `include "bp_common_fe_be_if.vh"
+  `include "bp_common_me_if.vh"
+
+  /*
+   * RV64 specifies a 64b effective address and 32b instruction.
+   * BlackParrot supports SV39 virtual memory, which specifies 39b virtual / 56b physical address.
+   * Effective addresses must have bits 39-63 match bit 38 
+   *   or a page fault exception will occur during translation.
+   * Currently, we only support a very limited number of parameter configurations.
+   * Thought: We could have a `define surrounding core instantiations of each parameter and then
+   * when they import this package, `declare the if structs. No more casting!
+   */
+
+  localparam bp_eaddr_width_gp = 64;
+  localparam bp_instr_width_gp = 32;
+
+  parameter bp_sv39_page_table_depth_gp = 3;
+  parameter bp_sv39_pte_width_gp = 64;
+  parameter bp_sv39_vaddr_width_gp = 39;
+  parameter bp_sv39_paddr_width_gp = 56;
+  parameter bp_sv39_ppn_width_gp = 44;
+  parameter bp_page_size_in_bytes_gp = 4096;
+  parameter bp_page_offset_width_gp = `BSG_SAFE_CLOG2(bp_page_size_in_bytes_gp);
+
+  parameter bp_data_resp_num_flit_gp = 4;
+  parameter bp_data_cmd_num_flit_gp = 4;
+  localparam dram_base_addr_gp         = 32'h5000_0000;
+  localparam cfg_link_dev_base_addr_gp = 32'h01??_????;
+  localparam clint_dev_base_addr_gp    = 32'h02??_????;
+  localparam host_dev_base_addr_gp     = 32'h03??_????;
+  localparam plic_dev_base_addr_gp     = 32'h0c??_????;
+  
+  localparam mipi_reg_base_addr_gp     = 32'h0200_0???;
+  localparam mtimecmp_reg_base_addr_gp = 32'h0200_4???;
+  localparam mtime_reg_addr_gp         = 32'h0200_bff8;
+  localparam plic_reg_base_addr_gp     = 32'h0c00_0???;
+
+endpackage : bp_common_pkg
+
diff --git a/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v b/litex/soc/cores/cpu/blackparrot/bp_hardware/bp_nonsynth_host.v
new file mode 100644 (file)
index 0000000..e64ce69
--- /dev/null
@@ -0,0 +1,190 @@
+
+module bp_nonsynth_host
+ import bp_common_pkg::*;
+ import bp_common_aviary_pkg::*;
+ import bp_be_pkg::*;
+ import bp_common_rv64_pkg::*;
+ import bp_cce_pkg::*;
+ import bsg_noc_pkg::*;
+ import bp_cfg_link_pkg::*;
+ #(parameter bp_cfg_e cfg_p = e_bp_inv_cfg
+   `declare_bp_proc_params(cfg_p)
+   `declare_bp_me_if_widths(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p)
+   )
+  (input clk_i
+   , input reset_i
+
+   , input [cce_mem_msg_width_lp-1:0]              mem_cmd_i
+   , input                                         mem_cmd_v_i
+   , output logic                                  mem_cmd_yumi_o
+
+   , output logic [cce_mem_msg_width_lp-1:0]       mem_resp_o
+   , output logic                                  mem_resp_v_o
+   , input                                         mem_resp_ready_i
+
+   , output [num_core_p-1:0]                       program_finish_o
+   ,(* mark_debug = "true" *) output logic                                  all_finished_debug_o //SC_add
+   , (* mark_debug = "true" *) output logic                                 core_passed_debug
+   , (* mark_debug = "true" *) output logic                                 core_failed_debug
+   );
+
+`declare_bp_me_if(paddr_width_p, cce_block_width_p, num_lce_p, lce_assoc_p);
+
+// HOST I/O mappings
+//localparam host_dev_base_addr_gp     = 32'h03??_????;
+
+// Host I/O mappings (arbitrarily decided for now)
+//   Overall host controls 32'h0300_0000-32'h03FF_FFFF
+
+localparam hprint_base_addr_gp = paddr_width_p'(32'h0300_0???);
+localparam cprint_base_addr_gp = paddr_width_p'(64'h0300_1???);
+localparam finish_base_addr_gp = paddr_width_p'(64'h0300_2???);
+
+bp_cce_mem_msg_s  mem_cmd_cast_i;
+
+assign mem_cmd_cast_i = mem_cmd_i;
+
+localparam lg_num_core_lp = `BSG_SAFE_CLOG2(num_core_p);
+
+logic hprint_data_cmd_v;
+logic cprint_data_cmd_v;
+logic finish_data_cmd_v;
+
+always_comb
+  begin
+    hprint_data_cmd_v = 1'b0;
+    cprint_data_cmd_v = 1'b0;
+    finish_data_cmd_v = 1'b0;
+
+    unique
+    casez (mem_cmd_cast_i.addr)
+      hprint_base_addr_gp: hprint_data_cmd_v = mem_cmd_v_i; 
+      cprint_base_addr_gp: cprint_data_cmd_v = mem_cmd_v_i;
+      finish_base_addr_gp: finish_data_cmd_v = mem_cmd_v_i;
+      default: begin end
+    endcase
+  end
+
+logic [num_core_p-1:0] hprint_w_v_li;
+logic [num_core_p-1:0] cprint_w_v_li;
+logic [num_core_p-1:0] finish_w_v_li;
+
+// Memory-mapped I/O is 64 bit aligned
+localparam byte_offset_width_lp = 3;
+wire [lg_num_core_lp-1:0] mem_cmd_core_enc =
+  mem_cmd_cast_i.addr[byte_offset_width_lp+:lg_num_core_lp];
+
+bsg_decode_with_v
+ #(.num_out_p(num_core_p))
+ hprint_data_cmd_decoder
+  (.v_i(hprint_data_cmd_v)
+   ,.i(mem_cmd_core_enc)
+   
+   ,.o(hprint_w_v_li)
+   );
+
+bsg_decode_with_v
+ #(.num_out_p(num_core_p))
+ cprint_data_cmd_decoder
+  (.v_i(cprint_data_cmd_v)
+   ,.i(mem_cmd_core_enc)
+
+   ,.o(cprint_w_v_li)
+   );
+
+bsg_decode_with_v
+ #(.num_out_p(num_core_p))
+ finish_data_cmd_decoder
+  (.v_i(finish_data_cmd_v)
+   ,.i(mem_cmd_core_enc)
+
+   ,.o(finish_w_v_li)
+   );
+
+logic [num_core_p-1:0] finish_r;
+bsg_dff_reset
+ #(.width_p(num_core_p))
+ finish_accumulator
+  (.clk_i(clk_i)
+   ,.reset_i(reset_i)
+
+   ,.data_i(finish_r | finish_w_v_li)
+   ,.data_o(finish_r)
+   );
+
+logic all_finished_r;
+bsg_dff_reset
+ #(.width_p(1))
+ all_finished_reg
+  (.clk_i(clk_i)
+   ,.reset_i(reset_i)
+
+   ,.data_i(&finish_r)
+   ,.data_o(all_finished_r)
+   );
+
+assign program_finish_o = finish_r;
+
+always_ff @(negedge clk_i)
+  begin
+    for (integer i = 0; i < num_core_p; i++)
+      begin
+        if (hprint_w_v_li[i] & mem_cmd_yumi_o)
+          $display("[CORE%0x PRT] %x", i, mem_cmd_cast_i.data[0+:8]);
+        if (cprint_w_v_li[i] & mem_cmd_yumi_o)
+          $display("[CORE%0x PRT] %c", i, mem_cmd_cast_i.data[0+:8]);
+        if (finish_w_v_li[i] & mem_cmd_yumi_o & ~mem_cmd_cast_i.data[0])
+        begin
+          $display("[CORE%0x FSH] PASS", i);
+          core_passed_debug <= 1;
+        end  
+        if (finish_w_v_li[i] & mem_cmd_yumi_o &  mem_cmd_cast_i.data[0])
+        begin
+          $display("[CORE%0x FSH] FAIL", i);
+          core_failed_debug <=1;
+        end
+      end
+
+    if (all_finished_r)
+      begin
+        $display("All cores finished! Terminating...");
+        $finish();
+        all_finished_debug_o <= 1;
+      end
+    if (reset_i)
+    begin
+      all_finished_debug_o <= 0;
+      core_passed_debug <= 0;
+      core_failed_debug <= 0;
+    end
+  end
+bp_cce_mem_msg_s mem_resp_lo;
+logic mem_resp_v_lo, mem_resp_ready_lo;
+assign mem_cmd_yumi_o = mem_cmd_v_i & mem_resp_ready_lo;
+bsg_one_fifo
+ #(.width_p(cce_mem_msg_width_lp))
+ mem_resp_buffer
+  (.clk_i(clk_i)
+   ,.reset_i(reset_i)
+
+   ,.data_i(mem_resp_lo)
+   ,.v_i(mem_cmd_yumi_o)
+   ,.ready_o(mem_resp_ready_lo)
+
+   ,.data_o(mem_resp_o)
+   ,.v_o(mem_resp_v_lo)
+   ,.yumi_i(mem_resp_ready_i & mem_resp_v_lo)
+   );
+assign mem_resp_v_o = mem_resp_v_lo & mem_resp_ready_i;
+
+assign mem_resp_lo =
+  '{msg_type       : mem_cmd_cast_i.msg_type
+    ,addr          : mem_cmd_cast_i.addr
+    ,payload       : mem_cmd_cast_i.payload
+    ,size          : mem_cmd_cast_i.size
+    ,data          : '0
+    };
+
+
+endmodule : bp_nonsynth_host
+
diff --git a/litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem b/litex/soc/cores/cpu/blackparrot/bp_software/cce_ucode.mem
new file mode 100644 (file)
index 0000000..5815a73
--- /dev/null
@@ -0,0 +1,96 @@
+010001000001111100000000000000000000000001000000
+001000000001111100000000000010110000000000000000
+000001000000000011111000000000000000100000000000
+010001000011111100000000000000000000000000000010
+001000000011111100000000000000010000000000000000
+000001000010000111111000000000000000100000000000
+010001000101111100000000000000000000000000001000
+001000000101111100000000000001000000000000000000
+000001000100001011111000000000000000100000000000
+101001000000010010100110100000000000000000000000
+001111000000000000000000000001110000000000000000
+010001000001111100000000000000000000000000000000
+010001000011111100000000000000000000000000000010
+001101000010000000000000000110000000000000000000
+010001000101111100000000000000000000000000000000
+010001000111111100000000000000000000000001000000
+010001001001111100000000000000000000000000000000
+001101000110001000000000000101100000000000000000
+111001000001000001001100100100000000000000000000
+000000000100001011111000000000000000100000000000
+000000001000010011111000000000100000000000000000
+001111000000000000000000000100010000000000000000
+000000000000000011111000000000000000100000000000
+001111000000000000000000000011010000000000000000
+010001000001111100000000000000000000000000000000
+010001000011111100000000000000000000000000000010
+010001000111111100000000000000000000000000000000
+001101000010000000000000001000010000000000000000
+111001000000000010101100100100000000000000000000
+111010011001000000000000000000000000000000000000
+001001000110010000000000010111110000000000000000
+000000000000000011111000000000000000100000000000
+001111000000000000000000000110110000000000000000
+111000010000000000000000000000000000000000000000
+110001000000000000000000000000000000000000000000
+111011000000000000000000000000000000000000000000
+001010000011111100000000010110010000000000000001
+100000100000000000000000000000000000000000000000
+001010001001111100000000001000010000000000000001
+111010000000000000000000000000000000000000000000
+100001100010000000100000000000000000000000000000
+110000000000000000000000000000000000000000000000
+001010000001111100000000001100010000000000000001
+001010000101111100000000001011110000000000000001
+001010001101111100000000001011110000000000000001
+010100000001111100000000000000000000000000000010
+001111000000000000000000001100100000000000000000
+010100000001111100000000000000000000000000000001
+001111000000000000000000001100100000000000000000
+010100000001111100000000000000000000000000000110
+001010011011111100000000010000110000000000000000
+010001000001111100000000000000000000000000000000
+010001000011111100000000000000000000000000000010
+010001000101111100000000000000000000000000000000
+010001000111111100000000000000000000000000000001
+001101000010000000000000001111110000000000000000
+001011000001111100000000001111010000000000000000
+001011010000000000000000001111010000000000000000
+000000000100001011111000000000000000100000000000
+111001000110000010001010100100000000000000000000
+101010100000001010100100000000000000000000000000
+000000000000000011111000000000000000100000000000
+001111000000000000000000001101110000000000000000
+001000000101111100000000010000110000000000000000
+111010011001000000000000000000000000000000000000
+000001000100001011111000000000000000100000000000
+001111000000000000000000001111110000000000000000
+001010011001111100000000010001110000000000000000
+101010100010001000100000000000000000000000000000
+111001000101100010001000100100000000000000000000
+001111000000000000000000001000010000000000000000
+101001100010001001100010000000000000000000000000
+001010010111111100000000010011110000000000000000
+111001000011100010011011100100000000000000000000
+111011011000000000000000000000000000000000000000
+001010011101111100000000010011010000000000000001
+111001010100101010101100100000000000000000000000
+111010011000000000000000000000000000000000000000
+011000010110000000000000000000000000000000000000
+001010010101111100000000010101110000000000000000
+111001000010100110001001100100000000000000000000
+111001000011100110001001100100000000000000000000
+111011011000000000000000000000000000000000000000
+001010011101111100000000010101010000000000000001
+111001010100101010101100100100000000000000000000
+111010011000000000000000000000000000000000000000
+001111000000000000000000001000010000000000000000
+111001010000101010101100100100000000000000000000
+001111000000000000000000001000010000000000000000
+111010000000000000000000000000000000000000000000
+001010000001111100000000010111010000000000000001
+111001010000101010101100100100000000000000000000
+001111000000000000000000001000010000000000000000
+111001010000101010101100100100000000000000000000
+001111000000000000000000001000010000000000000000
+110111000000000000000000000000000000000000000000
diff --git a/litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c b/litex/soc/cores/cpu/blackparrot/bp_software/udivmoddi4.c
new file mode 100644 (file)
index 0000000..a57c6e0
--- /dev/null
@@ -0,0 +1,358 @@
+/* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
+ *
+ *                     The LLVM Compiler Infrastructure
+ *
+ * This file is dual licensed under the MIT and the University of Illinois Open
+ * Source Licenses. See LICENSE.TXT for details.
+ *
+ * ===----------------------------------------------------------------------===
+ *
+ * This file implements __udivmoddi4 for the compiler_rt library.
+ *
+ * ===----------------------------------------------------------------------===
+ */
+
+#ifndef __blackparrot__
+#include "int_lib.h"
+
+/* Effects: if rem != 0, *rem = a % b
+ * Returns: a / b
+ */
+
+/* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
+
+COMPILER_RT_ABI du_int
+__udivmoddi4(du_int a, du_int b, du_int* rem)
+{
+    const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
+    const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
+    udwords n;
+    n.all = a;
+    udwords d;
+    d.all = b;
+    udwords q;
+    udwords r;
+    unsigned sr;
+    /* special cases, X is unknown, K != 0 */
+    if (n.s.high == 0)
+    {
+        if (d.s.high == 0)
+        {
+            /* 0 X
+             * ---
+             * 0 X
+             */
+            if (rem)
+                *rem = n.s.low % d.s.low;
+            return n.s.low / d.s.low;
+        }
+        /* 0 X
+         * ---
+         * K X
+         */
+        if (rem)
+            *rem = n.s.low;
+        return 0;
+    }
+    /* n.s.high != 0 */
+    if (d.s.low == 0)
+    {
+        if (d.s.high == 0)
+        {
+            /* K X
+             * ---
+             * 0 0
+             */ 
+            if (rem)
+                *rem = n.s.high % d.s.low;
+            return n.s.high / d.s.low;
+        }
+        /* d.s.high != 0 */
+        if (n.s.low == 0)
+        {
+            /* K 0
+             * ---
+             * K 0
+             */
+            if (rem)
+            {
+                r.s.high = n.s.high % d.s.high;
+                r.s.low = 0;
+                *rem = r.all;
+            }
+            return n.s.high / d.s.high;
+        }
+        /* K K
+         * ---
+         * K 0
+         */
+        if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
+        {
+            if (rem)
+            {
+                r.s.low = n.s.low;
+                r.s.high = n.s.high & (d.s.high - 1);
+                *rem = r.all;
+            }
+            return n.s.high >> __builtin_ctz(d.s.high);
+        }
+        /* K K
+         * ---
+         * K 0
+         */
+        sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+        /* 0 <= sr <= n_uword_bits - 2 or sr large */
+        if (sr > n_uword_bits - 2)
+        {
+           if (rem)
+                *rem = n.all;
+            return 0;
+        }
+        ++sr;
+        /* 1 <= sr <= n_uword_bits - 1 */
+        /* q.all = n.all << (n_udword_bits - sr); */
+        q.s.low = 0;
+        q.s.high = n.s.low << (n_uword_bits - sr);
+        /* r.all = n.all >> sr; */
+        r.s.high = n.s.high >> sr;
+        r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+    }
+    else  /* d.s.low != 0 */
+    {
+        if (d.s.high == 0)
+        {
+            /* K X
+             * ---
+             * 0 K
+             */
+            if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
+            {
+                if (rem)
+                    *rem = n.s.low & (d.s.low - 1);
+                if (d.s.low == 1)
+                    return n.all;
+                sr = __builtin_ctz(d.s.low);
+                q.s.high = n.s.high >> sr;
+                q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+                return q.all;
+            }
+            /* K X
+             * ---
+             * 0 K
+             */
+            sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
+            /* 2 <= sr <= n_udword_bits - 1
+             * q.all = n.all << (n_udword_bits - sr);
+             * r.all = n.all >> sr;
+             */
+            if (sr == n_uword_bits)
+            {
+                q.s.low = 0;
+                q.s.high = n.s.low;
+                r.s.high = 0;
+                r.s.low = n.s.high;
+            }
+            else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
+            {
+                q.s.low = 0;
+                q.s.high = n.s.low << (n_uword_bits - sr);
+                r.s.high = n.s.high >> sr;
+                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+            }
+            else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
+            {
+                q.s.low = n.s.low << (n_udword_bits - sr);
+                q.s.high = (n.s.high << (n_udword_bits - sr)) |
+                           (n.s.low >> (sr - n_uword_bits));
+                r.s.high = 0;
+                r.s.low = n.s.high >> (sr - n_uword_bits);
+            }
+        }
+        else
+        {
+            /* K X
+             * ---
+             * K K
+             */
+            sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
+            /* 0 <= sr <= n_uword_bits - 1 or sr large */
+            if (sr > n_uword_bits - 1)
+            {
+                if (rem)
+                    *rem = n.all;
+                return 0;
+            }
+            ++sr;
+            /* 1 <= sr <= n_uword_bits */
+            /*  q.all = n.all << (n_udword_bits - sr); */
+            q.s.low = 0;
+            if (sr == n_uword_bits)
+            {
+                q.s.high = n.s.low;
+                r.s.high = 0;
+                r.s.low = n.s.high;
+            }
+            else
+            {
+                q.s.high = n.s.low << (n_uword_bits - sr);
+                r.s.high = n.s.high >> sr;
+                r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
+            }
+        }
+    }
+    /* Not a special case
+     * q and r are initialized with:
+     * q.all = n.all << (n_udword_bits - sr);
+     * r.all = n.all >> sr;
+     * 1 <= sr <= n_udword_bits - 1
+     */
+    su_int carry = 0;
+    for (; sr > 0; --sr)
+    {
+        /* r:q = ((r:q)  << 1) | carry */
+        r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
+        r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
+        q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
+        q.s.low  = (q.s.low  << 1) | carry;
+        /* carry = 0;
+         * if (r.all >= d.all)
+         * {
+         *      r.all -= d.all;
+         *      carry = 1;
+         * }
+         */
+        const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
+        carry = s & 1;
+        r.all -= d.all & s;
+    }
+    q.all = (q.all << 1) | carry;
+    if (rem)
+        *rem = r.all;
+    return q.all;
+}
+#else
+
+/* More subroutines needed by GCC output code on some machines.  */
+/* Compile this one with gcc.  */
+/* Copyright (C) 1989-2014 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+Under Section 7 of GPL version 3, you are granted additional
+permissions described in the GCC Runtime Library Exception, version
+3.1, as published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License and
+a copy of the GCC Runtime Library Exception along with this program;
+see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+<http://www.gnu.org/licenses/>.  */
+
+/* This is extracted from gcc's libgcc/libgcc2.c with these typedefs added: */
+typedef short Wtype;
+typedef int DWtype;
+typedef unsigned int UWtype;
+typedef unsigned long long UDWtype;
+#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
+struct DWstruct {Wtype high, low;};
+#else
+struct DWstruct {Wtype low, high;};
+#endif
+typedef union {
+  struct DWstruct s;
+  DWtype ll;
+} DWunion;
+
+UDWtype
+__udivmoddi4 (UDWtype n, UDWtype d, UDWtype *rp)
+{
+  UDWtype q = 0, r = n, y = d;
+  UWtype lz1, lz2, i, k;
+
+  /* Implements align divisor shift dividend method. This algorithm
+     aligns the divisor under the dividend and then perform number of
+     test-subtract iterations which shift the dividend left. Number of
+     iterations is k + 1 where k is the number of bit positions the
+     divisor must be shifted left  to align it under the dividend.
+     quotient bits can be saved in the rightmost positions of the dividend
+     as it shifts left on each test-subtract iteration. */
+
+  if (y <= r)
+    {
+      lz1 = __builtin_clzll (d);
+      lz2 = __builtin_clzll (n);
+
+      k = lz1 - lz2;
+      y = (y << k);
+
+      /* Dividend can exceed 2 ^ (width âˆ’ 1) âˆ’ 1 but still be less than the
+         aligned divisor. Normal iteration can drops the high order bit
+         of the dividend. Therefore, first test-subtract iteration is a
+         special case, saving its quotient bit in a separate location and
+         not shifting the dividend. */
+      if (r >= y)
+        {
+          r = r - y;
+          q =  (1ULL << k);
+        }
+
+      if (k > 0)
+        {
+          y = y >> 1;
+
+          /* k additional iterations where k regular test subtract shift
+            dividend iterations are done.  */
+          i = k;
+          do
+            {
+              if (r >= y)
+                r = ((r - y) << 1) + 1;
+              else
+                r =  (r << 1);
+              i = i - 1;
+            } while (i != 0);
+
+          /* First quotient bit is combined with the quotient bits resulting
+             from the k regular iterations.  */
+          q = q + r;
+          r = r >> k;
+          q = q - (r << k);
+        }
+    }
+
+  if (rp)
+    *rp = r;
+  return q;
+}
+
+DWtype
+__moddi3 (DWtype u, DWtype v)
+{
+  Wtype c = 0;
+  DWunion uu = {.ll = u};
+  DWunion vv = {.ll = v};
+  DWtype w;
+
+  if (uu.s.high < 0)
+    c = ~c,
+    uu.ll = -uu.ll;
+  if (vv.s.high < 0)
+    vv.ll = -vv.ll;
+
+  (void) __udivmoddi4 (uu.ll, vv.ll, (UDWtype*)&w);
+  if (c)
+    w = -w;
+
+  return w;
+}
+
+#endif
diff --git a/litex/soc/cores/cpu/blackparrot/core.py b/litex/soc/cores/cpu/blackparrot/core.py
new file mode 100644 (file)
index 0000000..884b079
--- /dev/null
@@ -0,0 +1,174 @@
+# litex/soc/cores/cpu/blackparrot/core.py
+# BlackParrot Chip core support for the LiteX SoC.
+#
+# Authors: Sadullah Canakci & Cansu Demirkiran  <{scanakci,cansu}@bu.edu>
+# Copyright (c) 2019, Boston University
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met:
+#
+#     * Redistributions of source code must retain the above copyright
+#       notice, this list of conditions and the following disclaimer.
+#
+#     * Redistributions in binary form must reproduce the above
+#       copyright notice, this list of conditions and the following
+#       disclaimer in the documentation and/or other materials provided
+#       with the distribution.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import os
+
+from migen import *
+
+from litex.soc.interconnect import axi
+from litex.soc.interconnect import wishbone
+from litex.soc.cores.cpu import CPU
+
+CPU_VARIANTS = {
+    "standard": "freechips.rocketchip.system.LitexConfig",
+#    "linux":    "freechips.rocketchip.system.LitexLinuxConfig",
+#    "full":     "freechips.rocketchip.system.LitexFullConfig",
+}
+
+GCC_FLAGS = {
+    "standard": "-march=rv64ia   -mabi=lp64 -O0 ",
+#    "linux":    "-march=rv64imac   -mabi=lp64 ",
+#    "full":     "-march=rv64imafdc -mabi=lp64 ",
+}
+
+class BlackParrotRV64(Module):
+    name                 = "blackparrot"
+    data_width           = 64
+    endianness           = "little"
+    gcc_triple           = ("riscv64-unknown-elf")
+    linker_output_format = "elf64-littleriscv"
+ #   io_regions           = {0x10000000: 0x70000000} # origin, length
+    io_regions           = {0x30000000: 0x20000000} # origin, length
+   
+    @property
+    def mem_map(self):
+        return {
+            "ethmac"   : 0x30000000,
+            "csr"      : 0x40000000,
+            "rom"      : 0x50000000,
+            "sram"     : 0x51000000,
+            "main_ram" : 0x80000000,
+        }
+
+    @property
+    def gcc_flags(self):
+        flags =  "-mno-save-restore "
+        flags += GCC_FLAGS[self.variant]
+        flags += "-D__blackparrot__ "
+        return flags
+
+    def __init__(self, platform, variant="standard"):
+        assert variant in CPU_VARIANTS, "Unsupported variant %s" % variant
+        print("SC: Check how to get cpu_reset_addr properly!!!!!!!!")
+        #assert cpu_reset_addr == 0x10000000, "cpu_reset_addr hardcoded in Chisel elaboration!"
+
+        self.platform = platform
+        self.variant = variant
+        self.reset = Signal()
+        self.interrupt = Signal(4)#TODO: how interrupts work?
+#        print(self.interrupt)
+# old       self.wbone = wbn = wishbone.Interface(data_width=64, adr_width=40)
+        self.wbone = wbn = wishbone.Interface(data_width=64, adr_width=37)
+
+        self.interrupts = {}#TODO: Idk why this is necessary. Without this, soc_core.py raises error with no object attirubute "interrupts" 
+
+        self.buses     = [wbn]
+        # # #
+        # connect BP adaptor to Wishbone
+        self.cpu_params = dict(
+            # clock, reset
+            i_clk_i = ClockSignal(),
+            i_reset_i = ResetSignal() | self.reset,
+            # irq
+            i_interrupts = self.interrupt,
+            i_wbm_dat_i = wbn.dat_r,
+            o_wbm_dat_o = wbn.dat_w,
+            i_wbm_ack_i = wbn.ack,
+           # i_wbm_err_i = wbn.err,
+           # i_wbm_rty_i = wbn.try,
+            o_wbm_adr_o = wbn.adr,
+            o_wbm_stb_o = wbn.stb,
+            o_wbm_cyc_o = wbn.cyc,
+            o_wbm_sel_o = wbn.sel,
+            o_wbm_we_o = wbn.we,
+            o_wbm_cti_o = wbn.cti,
+            o_wbm_bte_o = wbn.bte,
+        )
+
+#        self.submodules += mem_a2w,  mmio_a2w #need to change most probably!
+           # add verilog sources
+        self.add_sources(platform, variant)
+
+    def set_reset_address(self, reset_address):#note sure if reset address needs to be changed for BB
+        assert not hasattr(self, "reset_address")
+        self.reset_address = reset_address
+        print(hex(reset_address))
+        #assert reset_address == 0x10000000, "cpu_reset_addr hardcoded in during elaboration!"
+
+
+    @staticmethod
+    def add_sources(platform, variant="standard"):
+        #Read from a file and use add_source function
+      #  vdir = os.path.join(
+        #os.path.abspath(os.path.dirname(__file__)),"pre-alpha-release", "verilog",variant)
+      #  incdir = os.path.join(
+        #os.path.abspath(os.path.dirname(__file__)),"pre-alpha-release", "verilog",variant)
+        print("Adding the sources")
+        #vdir = os.path.join(
+        #os.path.abspath(os.path.dirname(__file__)),"verilog")
+        #platform.add_source_dir(vdir)
+        filename= os.path.join(os.path.abspath(os.path.dirname(__file__)),"flist_litex.verilator")
+        print(filename)
+#        platform.add_source('/home/scanakci/Research_sado/litex/litex/litex/soc/cores/cpu/blackparrot/pre-alpha-release/bp_fpga/ExampleBlackParrotSystem.v')
+        with open(filename) as openfileobject:
+            for line in openfileobject:
+                temp = line
+        #        print(line)
+                if (temp[0] == '/' and temp[1] == '/'):
+                    continue
+                elif ("+incdir+" in temp) :
+                    s1 = line.find('$')
+                    s2 = line.find('/')
+                    dir_ = line[s1:s2]
+                    a = os.popen('echo '+ str(dir_))
+                    dir_start = a.read()
+                    vdir = dir_start[:-1] + line[s2:-1]
+                    print("INCDIR" + vdir)
+                    platform.add_verilog_include_path(vdir)  #this line might be changed
+                elif (temp[0]=='$') :
+                    s2 = line.find('/')
+                    dir_ = line[0:s2]
+                    a = os.popen('echo '+ str(dir_))
+                    dir_start = a.read()
+                    vdir = dir_start[:-1]+ line[s2:-1]
+                    print(vdir)
+                    platform.add_source(vdir) #this line might be changed
+                elif (temp[0] == '/'):
+                    assert("No support for absolute path for now")
+
+
+
+       
+    def do_finalize(self):
+        assert hasattr(self, "reset_address")
+        self.specials += Instance("ExampleBlackParrotSystem", **self.cpu_params)
+
+
diff --git a/litex/soc/cores/cpu/blackparrot/flist_litex.verilator b/litex/soc/cores/cpu/blackparrot/flist_litex.verilator
new file mode 100644 (file)
index 0000000..65e8e1c
--- /dev/null
@@ -0,0 +1,228 @@
+//// Includes
+// bsg_ip_cores includes
++incdir+$BASEJUMP_STL_DIR/bsg_dataflow
++incdir+$BASEJUMP_STL_DIR/bsg_mem
++incdir+$BASEJUMP_STL_DIR/bsg_misc
++incdir+$BASEJUMP_STL_DIR/bsg_test
++incdir+$BASEJUMP_STL_DIR/bsg_noc
+// common includes
++incdir+$BP_COMMON_DIR/src/include
+// fe includes
++incdir+$BP_FE_DIR/src/include
+// be includes
++incdir+$BP_BE_DIR/src/include
++incdir+$BP_BE_DIR/src/include/bp_be_dcache
+// me includes 
++incdir+$BP_ME_DIR/src/include/v
+// top includes
++incdir+$BP_TOP_DIR/src/include
+//// Packages
+// bsg_ip_cores packages
+$BASEJUMP_STL_DIR/bsg_noc/bsg_noc_pkg.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_pkg.v
+// Interface packages
+$BP_COMMON_DIR/src/include/bp_common_rv64_pkg.vh
+$BP_COMMON_DIR/src/include/bp_common_pkg.vh
+$BP_COMMON_DIR/src/include/bp_common_aviary_pkg.vh
+// FE packages
+$BP_FE_DIR/src/include/bp_fe_icache_pkg.vh
+$BP_FE_DIR/src/include/bp_fe_pkg.vh
+// BE packages
+$BP_BE_DIR/src/include/bp_be_pkg.vh
+$BP_BE_DIR/src/include/bp_be_dcache/bp_be_dcache_pkg.vh
+// ME packages
+$BP_ME_DIR/src/include/v/bp_cce_pkg.v
+$BP_ME_DIR/src/include/v/bp_me_pkg.vh
+// Top packages
+$BP_TOP_DIR/src/include/bp_cfg_link_pkg.vh
+//// bsg_ip_cores files
+$BASEJUMP_STL_DIR/bsg_async/bsg_async_fifo.v
+$BASEJUMP_STL_DIR/bsg_async/bsg_launch_sync_sync.v
+$BASEJUMP_STL_DIR/bsg_async/bsg_async_ptr_gray.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_channel_tunnel.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_channel_tunnel_in.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_channel_tunnel_out.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_1_to_n_tagged_fifo.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_1_to_n_tagged.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_1r1w_large.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_1rw_large.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_serial_in_parallel_out.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_one_fifo.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_round_robin_2_to_2.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_1r1w_pseudo_large.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_1r1w_small.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_fifo_tracker.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_flow_counter.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_parallel_in_serial_out_dynamic.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_round_robin_n_to_1.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_serial_in_parallel_out_dynamic.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_shift_reg.v
+$BASEJUMP_STL_DIR/bsg_dataflow/bsg_two_fifo.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_cam_1r1w.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w_sync.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w_sync_synth.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1r1w_synth.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync.v
+// $BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_bit.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_bit_synth.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_byte.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_mask_write_byte_synth.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_1rw_sync_synth.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_2r1w_sync.v
+$BASEJUMP_STL_DIR/bsg_mem/bsg_mem_2r1w_sync_synth.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_adder_ripple_carry.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_arb_fixed.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_array_concentrate_static.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_circular_ptr.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_concentrate_static.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_counter_clear_up.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_counter_set_down.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_counter_up_down.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_counter_up_down_variable.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_crossbar_o_by_i.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_cycle_counter.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_decode.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_decode_with_v.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_dff.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_en_bypass.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_chain.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_en.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_reset.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_dff_reset_en.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_encode_one_hot.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_lfsr.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_lru_pseudo_tree_decode.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_lru_pseudo_tree_encode.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_mux.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_mux_butterfly.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_mux_one_hot.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_mux_segmented.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_priority_encode.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_priority_encode_one_hot_out.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_round_robin_arb.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_scan.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_swap.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_thermometer_count.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_transpose.v
+$BASEJUMP_STL_DIR/bsg_misc/bsg_unconcentrate_static.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_mesh_router.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_mesh_router_buffered.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_noc_repeater_node.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_concentrator.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_concentrator_in.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_concentrator_out.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_adapter.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_adapter_in.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_adapter_out.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_decoder_dor.v
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_input_control.v  
+$BASEJUMP_STL_DIR/bsg_noc/bsg_wormhole_router_output_control.v 
+// Common files
+$BP_COMMON_DIR/src/v/bsg_fifo_1r1w_fence.v
+$BP_COMMON_DIR/src/v/bsg_fifo_1r1w_rolly.v
+$BP_COMMON_DIR/src/v/bp_tlb.v
+$BP_COMMON_DIR/src/v/bp_tlb_replacement.v
+// BE files
+$BP_BE_DIR/src/v/bp_be_top.v
+// Calculator
+$BP_BE_DIR/src/v/bp_be_calculator/bp_be_bypass.v
+$BP_BE_DIR/src/v/bp_be_calculator/bp_be_calculator_top.v
+$BP_BE_DIR/src/v/bp_be_calculator/bp_be_instr_decoder.v
+$BP_BE_DIR/src/v/bp_be_calculator/bp_be_int_alu.v
+$BP_BE_DIR/src/v/bp_be_calculator/bp_be_pipe_fp.v
+$BP_BE_DIR/src/v/bp_be_calculator/bp_be_pipe_int.v
+$BP_BE_DIR/src/v/bp_be_calculator/bp_be_pipe_mem.v
+$BP_BE_DIR/src/v/bp_be_calculator/bp_be_pipe_mul.v
+$BP_BE_DIR/src/v/bp_be_calculator/bp_be_regfile.v
+// Checker
+$BP_BE_DIR/src/v/bp_be_checker/bp_be_checker_top.v
+$BP_BE_DIR/src/v/bp_be_checker/bp_be_detector.v
+$BP_BE_DIR/src/v/bp_be_checker/bp_be_director.v
+$BP_BE_DIR/src/v/bp_be_checker/bp_be_scheduler.v
+// MMU
+$BP_BE_DIR/src/v/bp_be_mem/bp_be_ptw.v
+$BP_BE_DIR/src/v/bp_be_mem/bp_be_csr.v
+$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache.v
+$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_lce_cmd.v
+$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_lce.v
+$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_lce_req.v
+$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_wbuf.v
+$BP_BE_DIR/src/v/bp_be_mem/bp_be_dcache/bp_be_dcache_wbuf_queue.v
+$BP_BE_DIR/src/v/bp_be_mem/bp_be_mem_top.v
+//// FE files
+$BP_FE_DIR/src/v/bp_fe_bht.v
+$BP_FE_DIR/src/v/bp_fe_btb.v
+$BP_FE_DIR/src/v/bp_fe_lce_cmd.v
+$BP_FE_DIR/src/v/bp_fe_icache.v
+$BP_FE_DIR/src/v/bp_fe_instr_scan.v
+$BP_FE_DIR/src/v/bp_fe_lce.v
+$BP_FE_DIR/src/v/bp_fe_lce_req.v
+$BP_FE_DIR/src/v/bp_fe_mem.v
+$BP_FE_DIR/src/v/bp_fe_pc_gen.v
+$BP_FE_DIR/src/v/bp_fe_top.v
+//// ME files
+// CCE
+$BP_ME_DIR/src/v/cce/bp_cce.v
+$BP_ME_DIR/src/v/cce/bp_cce_alu.v
+$BP_ME_DIR/src/v/cce/bp_cce_dir.v
+$BP_ME_DIR/src/v/cce/bp_cce_dir_tag_checker.v
+$BP_ME_DIR/src/v/cce/bp_cce_dir_lru_extract.v
+$BP_ME_DIR/src/v/cce/bp_cce_gad.v
+$BP_ME_DIR/src/v/cce/bp_cce_inst_decode.v
+$BP_ME_DIR/src/v/cce/bp_cce_msg.v
+$BP_ME_DIR/src/v/cce/bp_cce_msg_cached.v
+$BP_ME_DIR/src/v/cce/bp_cce_msg_uncached.v
+$BP_ME_DIR/src/v/cce/bp_cce_pc.v
+$BP_ME_DIR/src/v/cce/bp_cce_pending.v
+$BP_ME_DIR/src/v/cce/bp_cce_reg.v
+$BP_ME_DIR/src/v/cce/bp_cce_top.v
+// Network
+$BP_ME_DIR/src/v/wormhole/bp_me_cce_id_to_cord.v
+$BP_ME_DIR/src/v/wormhole/bp_me_cce_to_wormhole_link_client.v
+$BP_ME_DIR/src/v/wormhole/bp_me_cce_to_wormhole_link_master.v
+$BP_ME_DIR/src/v/wormhole/bp_me_lce_id_to_cord.v
+$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_lce_cmd.v
+$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_lce_req.v
+$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_lce_resp.v
+$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_mem_cmd.v
+$BP_ME_DIR/src/v/wormhole/bp_me_wormhole_packet_encode_mem_resp.v
+//// TOP
+$BP_TOP_DIR/src/v/bp_chip.v
+$BP_TOP_DIR/src/v/bp_core.v
+$BP_TOP_DIR/src/v/bp_core_complex.v
+$BP_TOP_DIR/src/v/bp_mem_complex.v
+$BP_TOP_DIR/src/v/bp_mmio_enclave.v
+$BP_TOP_DIR/src/v/bp_mmio_node.v
+$BP_TOP_DIR/src/v/bp_tile.v
+$BP_TOP_DIR/src/v/bp_tile_node.v
+//// Common
+$BP_COMMON_DIR/src/v/bp_addr_map.v
+
+// bsg_ip_cores files
+$BASEJUMP_STL_DIR/bsg_fsb/bsg_fsb_node_trace_replay.v
+// be files
+$BP_BE_DIR/test/common/bp_be_nonsynth_tracer.v
+// $BP_BE_DIR/test/common/bp_be_nonsynth_perf.v
+// me files
+// $BP_ME_DIR/test/common/bp_mem.v
+// $BP_ME_DIR/test/common/bp_mem_delay_model.v
+// $BP_ME_DIR/test/common/bp_mem_transducer.v
+// $BP_ME_DIR/test/common/bp_mem_storage_sync.v
+// $BP_ME_DIR/test/common/dramsim2_wrapper.cpp
+$BP_ME_DIR/test/common/bp_cce_mmio_cfg_loader.v
+// $BP_ME_DIR/test/common/bp_mem_nonsynth_tracer.v
+// $BP_ME_DIR/test/common/bp_cce_nonsynth_tracer.v
+// $BP_ME_DIR/test/common/bp_mem_utils.cpp
+// top files
+$BP_TOP_DIR/test/common/bp_nonsynth_host.v
+// $BP_TOP_DIR/test/common/bp_nonsynth_if_verif.v
+$BP_TOP_DIR/test/common/bp_nonsynth_commit_tracer.v
+// /home/scanakci/Research_sado/litex/litex/litex/soc/cores/cpu/blackparrot/pre-alpha-release/bp_top/syn/results/verilator/bp_top_trace_demo.e_bp_single_core_cfg.build/wrapper.v
+// /home/scanakci/Research_sado/litex/litex/litex/soc/cores/cpu/blackparrot/pre-alpha-release/bp_top/syn/results/verilator/bp_top_trace_demo.e_bp_single_core_cfg.build/test_bp.cpp
+$BP_FPGA_DIR/bp2wb_convertor.v
+$BP_FPGA_DIR/ExampleBlackParrotSystem.v
+$BP_FPGA_DIR/bsg_mem_1rw_sync_mask_write_bit.v
+// Recent
+$BASEJUMP_STL_DIR/bsg_noc/bsg_mesh_stitch.v
diff --git a/litex/soc/cores/cpu/blackparrot/pre-alpha-release b/litex/soc/cores/cpu/blackparrot/pre-alpha-release
new file mode 160000 (submodule)
index 0000000..8aa6b62
--- /dev/null
@@ -0,0 +1 @@
+Subproject commit 8aa6b6259308105872e19675c1cd5aee22283913
diff --git a/litex/soc/cores/cpu/blackparrot/setEnvironment.sh b/litex/soc/cores/cpu/blackparrot/setEnvironment.sh
new file mode 100755 (executable)
index 0000000..d818ec5
--- /dev/null
@@ -0,0 +1,97 @@
+#!/bin/bash
+## Set common environment variables
+export LITEX=$(git rev-parse --show-toplevel)
+export BP=$PWD
+cp bp_software/cce_ucode.mem /tmp/.
+cd pre-alpha-release
+TOP=$(git rev-parse --show-toplevel)
+export BP_COMMON_DIR=$TOP/bp_common
+export BP_FE_DIR=$TOP/bp_fe
+export BP_BE_DIR=$TOP/bp_be
+export BP_ME_DIR=$TOP/bp_me
+export BP_TOP_DIR=$TOP/bp_top
+export BP_EXTERNAL_DIR=$TOP/external
+export BASEJUMP_STL_DIR=$BP_EXTERNAL_DIR/basejump_stl
+export BP_FPGA_DIR=$TOP/bp_fpga
+## Setup CAD tools
+
+# If the machine you are working on is bsg_cadenv compliant, then you do not
+# need to setup the cad tools, simply put bsg_cadenv in the same root dir.
+#BSG_CADENV_DIR=$(TOP)/external/bsg_cadenv
+#-include $(BSG_CADENV_DIR)/cadenv.mk
+
+## Sepcify license path if needed
+#LM_LICENSE_FILE ?=
+
+## Override tool paths if needed
+#GCC       ?= gcc
+#VCS_HOME  ?=
+#VCS       ?= vcs
+#URG       ?= urg
+#VERILATOR ?= verilator
+#DC_SHELL  ?= dc_shell
+#DVE       ?= dve
+#PYTHON    ?= python
+
+## Needed for verilator g++ compilations
+export SYSTEMC_INCLUDE=$BP_EXTERNAL_DIR/include
+export SYSTEMC_LIBDIR=$BP_EXTERNAL_DIR/lib-linux64
+
+## Add external tools and libraries to environment
+export LD_LIBRARY_PATH=$SYSTEMC_LIBDIR:$LD_LIBRARY_PATH
+#export PATH=$(BP_EXTERNAL_DIR)/bin:$(PATH)
+#export SYN_PATH=$(BP_TOP_DIR)/syn
+#export TB_PATH=$(BP_TOP_DIR)/test/tb
+#export MEM_PATH=$(BP_COMMON_DIR)/test/mem
+
+#export LOG_PATH=$(BP_TOP_DIR)/syn/logs
+#export RESULTS_PATH=$(BP_TOP_DIR)/syn/results
+#export REPORT_PATH=$(BP_TOP_DIR)/syn/reports
+
+TB="bp_top_trace_demo"
+CFG="e_bp_single_core_cfg"
+START_PC=0x80000000
+TOLERANCE=2
+
+# Select CCE ROM based on CFG and Coherence Protocol
+# TODO: is there a more scalable way to do this?
+if [ $CFG = "e_bp_half_core_cfg" ]
+then
+    NUM_LCE_P=1
+    N_WG=64
+elif [ $CFG = "e_bp_single_core_cfg" ]
+then
+    NUM_LCE_P=2
+    N_WG=64
+    #echo "Single Core config"
+#elif ($CFG -eq e_bp_dual_core_cfg)
+#    NUM_LCE_P=4
+#    N_WG=32
+#elif ($CFG -eq e_bp_quad_core_cfg)
+#    NUM_LCE_P=8
+#    N_WG=16
+#elif ($CFG -eq e_bp_oct_core_cfg)
+#     NUM_LCE_P=16
+#     N_WG=8
+#elif ($(CFG), e_bp_sexta_core_cfg)
+#    NUM_LCE_P=32
+#    N_WG=4
+#elif ($(CFG), e_bp_quad_core_2d_cfg)
+#    NUM_LCE_P=8
+#    N_WG=16
+#elif ($(CFG), e_bp_oct_core_2d_cfg)
+#    NUM_LCE_P=16
+#    N_WG=8
+fi
+
+COH_PROTO="mesi"
+CCE_MEM_PATH=$BP_ME_DIR/src/asm/roms/$COH_PROTO
+CCE_MEM=bp_cce_inst_rom_$COH_PROTO_lce$NUM_LCE_P_wg$N_WG_assoc8.mem
+#DRAMSIM_CH_CFG=DDR2_micron_16M_8b_x8_sg3E.ini
+#DRAMSIM_SYS_CFG=system.ini
+#$include $BP_COMMON_DIR/syn/Makefile.verilator
+#iinclude $(BP_COMMON_DIR)/syn/Makefile.common
+#include $(BP_COMMON_DIR)/syn/Makefile.dc
+#include $(BP_COMMON_DIR)/syn/Makefile.regress
+#include $(BP_COMMON_DIR)/syn/Makefile.vcs
+cd ../
diff --git a/litex/soc/cores/cpu/blackparrot/update_BP.sh b/litex/soc/cores/cpu/blackparrot/update_BP.sh
new file mode 100755 (executable)
index 0000000..c6ddde9
--- /dev/null
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+
+##SOFTWARE CHANGES##
+
+#for a reason, provided udivmoddi4.c is not functionally correct when used with either BP or Rocket under IA extension. Another version of udivmoddi4.c is a workaround to run BIOS on these architectures.
+cp bp_software/udivmoddi4.c $LITEX/litex/soc/software/compiler_rt/lib/builtins/.
+cp bp_software/cce_ucode.mem /tmp/.
+
+##HARDWARE CHANGES## 
+#Need to change some files because of memory map differences and proper syntesis
+cp bp_hardware/bp_common_pkg.vh $BP_COMMON_DIR/src/include/.
+cp bp_hardware/bp_cce_mmio_cfg_loader.v $BP_ME_DIR/test/common/.
+cp bp_hardware/bp_nonsynth_host.v $BP_TOP_DIR/test/common/.
+
+# Neccessary files for FPGA Implementations
+cp -r bp_fpga $BP_TOP/DIR
index a454a17ea8521136611cce4144c52b35dd753afc..7c34c9872cd785b0e0e30b5a197c4835ffcb1a6c 100755 (executable)
@@ -1,6 +1,10 @@
 include ../include/generated/variables.mak
 include $(SOC_DIRECTORY)/software/common.mak
 
+ifeq ($(CPU),blackparrot)
+BP_LIBS =  -L$(BP_EXTERNAL_DIR)/lib/gcc/riscv64-unknown-elf/8.3.0 
+BP_FLAGS = -lgcc
+endif
 # Permit TFTP_SERVER_PORT override from shell environment / command line
 ifdef TFTP_SERVER_PORT
 CFLAGS += -DTFTP_SERVER_PORT=$(TFTP_SERVER_PORT)
@@ -23,6 +27,7 @@ endif
 
 bios.elf: $(BIOS_DIRECTORY)/linker.ld $(OBJECTS)
 
+
 %.elf: ../libbase/crt0-$(CPU)-ctr.o ../libnet/libnet.a ../libbase/libbase-nofloat.a ../libcompiler_rt/libcompiler_rt.a
        $(LD) $(LDFLAGS) -T $(BIOS_DIRECTORY)/linker.ld -N -o $@ \
                ../libbase/crt0-$(CPU)-ctr.o \
@@ -30,7 +35,10 @@ bios.elf: $(BIOS_DIRECTORY)/linker.ld $(OBJECTS)
                -L../libnet \
                -L../libbase \
                -L../libcompiler_rt \
-               -lnet -lbase-nofloat -lcompiler_rt
+               $(BP_LIBS) \
+               -lnet -lbase-nofloat -lcompiler_rt \
+               $(BP_FLAGS)
+       
 ifneq ($(OS),Windows_NT)
        chmod -x $@
 endif
diff --git a/litex/soc/software/bios/boot-helper-blackparrot.S b/litex/soc/software/bios/boot-helper-blackparrot.S
new file mode 100644 (file)
index 0000000..6dd74aa
--- /dev/null
@@ -0,0 +1,4 @@
+.section    .text, "ax", @progbits
+.global     boot_helper
+boot_helper:
+       jr x13
index 93c231bb1d8de6f919384d30bc3c1501b7f7b3c7..971bd37b6c0d8c4f387e0f2b1826b9e084ca3fda 100644 (file)
@@ -8,7 +8,20 @@
 #include <uart.h>
 #include <stdio.h>
 
-#ifdef __rocket__
+#if defined(__blackparrot__) /*TODO: Update this function for BP*/ //
+
+void isr(void);
+void isr(void)
+{
+  static int onetime = 0;
+  if ( onetime == 0){
+    printf("ISR blackparrot\n");
+    printf("TRAP!!\n");
+    onetime++;
+  }
+}
+#elif defined(__rocket__) 
 void plic_init(void);
 void plic_init(void)
 {
index 621d377acaa8ccb81d548622690aeeb4fc89cc23..9aa9ff912d98cf329d478ad445d05761023ad81c 100644 (file)
@@ -459,7 +459,6 @@ static void do_command(char *c)
 #endif
        else if(strcmp(token, "memtest") == 0) memtest();
 #endif
-
        else if(strcmp(token, "") != 0)
                printf("Command not found\n");
 }
@@ -589,6 +588,8 @@ int main(int i, char **c)
        printf("Minerva");
 #elif __rocket__
        printf("RocketRV64[imac]");
+#elif __blackparrot__
+        printf("BlackParrotRV64[ia]");
 #else
        printf("Unknown");
 #endif
@@ -603,9 +604,10 @@ int main(int i, char **c)
 #endif
        printf("\n");
 
-       sdr_ok = 1;
+        sdr_ok = 1;
+
 #if defined(CSR_ETHMAC_BASE) || defined(CSR_SDRAM_BASE)
-       printf("--========== \e[1mInitialization\e[0m ============--\n");
+    printf("--========== \e[1mInitialization\e[0m ============--\n");
 #ifdef CSR_ETHMAC_BASE
        eth_init();
 #endif
@@ -628,7 +630,7 @@ int main(int i, char **c)
        }
 
        printf("--============= \e[1mConsole\e[0m ================--\n");
-       while(1) {
+    while(1) {
                putsnonl("\e[92;1mlitex\e[0m> ");
                readstr(buffer, 64);
                do_command(buffer);
index f7d8458c8e0eef89ca868687653a02228891967a..01f66e6b068b764cd23dbd87186e8b21a9e9a8e8 100644 (file)
@@ -46,6 +46,8 @@ __attribute__((unused)) static void cdelay(int i)
                __asm__ volatile("nop");
 #elif defined (__microwatt__)
                __asm__ volatile("nop");
+#elif defined (__blackparrot__)
+               __asm__ volatile("nop");
 #else
 #error Unsupported architecture
 #endif
index babc5424f76ca580aac8dfe5d908452be42f929d..7ff9b40338628d60ee2fd501648635b42b4db1d7 100644 (file)
@@ -40,6 +40,21 @@ extern void _irq_setmask(unsigned int);
 #define PLIC_CLAIM   0x0c200004L // Claim & completion register address
 #endif /* __rocket__ */
 
+
+#ifdef __blackparrot__
+// The RocketChip uses a Platform-Level Interrupt Controller (PLIC) which
+// is programmed and queried via a set of MMIO registers.
+// TODO: How about Blackparrot? Should be probably included in linux version
+
+#define PLIC_BASE    0x0c000000L // Base address and per-pin priority array
+#define PLIC_PENDING 0x0c001000L // Bit field matching currently pending pins
+#define PLIC_ENABLED 0x0c002000L // Bit field corresponding to the current mask
+#define PLIC_THRSHLD 0x0c200000L // Per-pin priority must be >= this to trigger
+#define PLIC_CLAIM   0x0c200004L // Claim & completion register address
+#endif /* __blackparrot__ */
+
+
+
 static inline unsigned int irq_getie(void)
 {
 #if defined (__lm32__)
@@ -58,6 +73,8 @@ static inline unsigned int irq_getie(void)
        return (csrr(mstatus) & CSR_MSTATUS_MIE) != 0;
 #elif defined (__microwatt__)
        return 0; // FIXME
+#elif defined (__blackparrot__) 
+       return (csrr(mstatus) & CSR_MSTATUS_MIE) != 0;//TODO
 #else
 #error Unsupported architecture
 #endif
@@ -85,6 +102,8 @@ static inline void irq_setie(unsigned int ie)
        if(ie) csrs(mstatus,CSR_MSTATUS_MIE); else csrc(mstatus,CSR_MSTATUS_MIE);
 #elif defined (__microwatt__)
        // FIXME
+#elif defined (__blackparrot__)
+       if(ie) csrs(mstatus,CSR_MSTATUS_MIE); else csrc(mstatus,CSR_MSTATUS_MIE);//TODO:BP
 #else
 #error Unsupported architecture
 #endif
@@ -114,6 +133,8 @@ static inline unsigned int irq_getmask(void)
        return *((unsigned int *)PLIC_ENABLED) >> 1;
 #elif defined (__microwatt__)
        return 0; // FIXME
+#elif defined (__blackparrot__)
+       //TODO:BP
 #else
 #error Unsupported architecture
 #endif
@@ -137,6 +158,8 @@ static inline void irq_setmask(unsigned int mask)
        *((unsigned int *)PLIC_ENABLED) = mask << 1;
 #elif defined (__microwatt__)
        // FIXME
+#elif defined (__blackparrot__)
+       //TODO:BP
 #else
 #error Unsupported architecture
 #endif
@@ -164,6 +187,8 @@ static inline unsigned int irq_pending(void)
        return *((unsigned int *)PLIC_PENDING) >> 1;
 #elif defined (__microwatt__)
        return 0; // FIXME
+#elif defined (__blackparrot__)
+       return csr_readl(PLIC_PENDING) >> 1;//TODO:BP
 #else
 #error Unsupported architecture
 #endif
index 9b41a737ecd484febc7142e1658e5d48c5f0a977..18753548e1f084d21a657a27e6b68146f0684f63 100644 (file)
@@ -26,8 +26,7 @@ static inline void mtspr(unsigned long add, unsigned long val)
 }
 #endif
 
-
-#if defined(__vexriscv__) || defined(__minerva__) || defined(__rocket__)
+#if defined(__vexriscv__) || defined(__minerva__) || defined(__rocket__) || defined(__blackparrot__)
 #include <csr-defs.h>
 #define csrr(reg) ({ unsigned long __tmp; \
   asm volatile ("csrr %0, " #reg : "=r"(__tmp)); \
diff --git a/litex/soc/software/libbase/crt0-blackparrot.S b/litex/soc/software/libbase/crt0-blackparrot.S
new file mode 100644 (file)
index 0000000..9badaa4
--- /dev/null
@@ -0,0 +1,77 @@
+.global main
+.global isr
+.global _start
+
+_start:
+  j crt_init
+  nop
+  nop
+  nop
+  nop
+  nop
+  nop
+  nop
+
+trap_entry:
+  sd x1,  - 1*8(sp)
+  sd x5,  - 2*8(sp)
+  sd x6,  - 3*8(sp)
+  sd x7,  - 4*8(sp)
+  sd x10, - 5*8(sp)
+  sd x11, - 6*8(sp)
+  sd x12, - 7*8(sp)
+  sd x13, - 8*8(sp)
+  sd x14, - 9*8(sp)
+  sd x15, -10*8(sp)
+  sd x16, -11*8(sp)
+  sd x17, -12*8(sp)
+  sd x28, -13*8(sp)
+  sd x29, -14*8(sp)
+  sd x30, -15*8(sp)
+  sd x31, -16*8(sp)
+  addi sp,sp,-16*8
+  call isr
+  ld x1 , 15*8(sp)
+  ld x5,  14*8(sp)
+  ld x6,  13*8(sp)
+  ld x7,  12*8(sp)
+  ld x10, 11*8(sp)
+  ld x11, 10*8(sp)
+  ld x12,  9*8(sp)
+  ld x13,  8*8(sp)
+  ld x14,  7*8(sp)
+  ld x15,  6*8(sp)
+  ld x16,  5*8(sp)
+  ld x17,  4*8(sp)
+  ld x28,  3*8(sp)
+  ld x29,  2*8(sp)
+  ld x30,  1*8(sp)
+  ld x31,  0*8(sp)
+  addi sp,sp,16*8
+  mret
+  .text
+
+
+crt_init:
+  la sp, _fstack + 8
+  la a0, trap_entry
+  csrw mtvec, a0
+
+bss_init:
+  la a0, _fbss
+  la a1, _ebss
+bss_loop:
+  beq a0,a1,bss_done
+  sd zero,0(a0)
+  add a0,a0,8
+  j bss_loop
+bss_done:
+
+//  call plic_init // initialize external interrupt controller
+#  li a0, 0x800   // external interrupt sources only (using LiteX timer);
+                 // NOTE: must still enable mstatus.MIE!
+  csrw mie,a0
+
+  call main
+inf_loop:
+  j inf_loop
index 6e7bfafdfdea50fa5714573eea140e1ad82c4c0a..83ecd40866dbf9ecdeb73475831171422f67168c 100644 (file)
@@ -58,6 +58,9 @@ void flush_cpu_icache(void)
        asm volatile("nop");
 #elif defined (__microwatt__)
        /* FIXME: do something useful here! */
+        asm volatile("nop");
+#elif defined (__blackparrot__)
+       /* TODO: BP do something useful here! */
        asm volatile("nop");
 #else
 #error Unsupported architecture
@@ -107,6 +110,10 @@ void flush_cpu_dcache(void)
 #elif defined (__microwatt__)
        /* FIXME: do something useful here! */
        asm volatile("nop");
+/*SC_add: What BB does here?*/
+#elif defined (__blackparrot__)
+       /* FIXME: do something useful here! */
+       asm volatile("nop");
 #else
 #error Unsupported architecture
 #endif