From: Tobias Platen Date: Sun, 27 Mar 2022 16:41:34 +0000 (+0200) Subject: begin verilator_trace backport X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ab2c64fa1c73c7669295f8296e69383e3c09f50d;p=microwatt.git begin verilator_trace backport --- diff --git a/Makefile b/Makefile index 73057f2..9befa3b 100644 --- a/Makefile +++ b/Makefile @@ -263,9 +263,31 @@ microwatt.json: $(synth_files) $(RAM_INIT_FILE) $(soc_extra_v) microwatt.v: $(synth_files) $(RAM_INIT_FILE) $(YOSYS) $(GHDLSYNTH) -p "ghdl --std=08 --no-formal $(GHDL_IMAGE_GENERICS) $(synth_files) -e toplevel; write_verilog $@" -microwatt-verilator: microwatt.v verilator/microwatt-verilator.cpp verilator/uart-verilator.c +microwatt-verilator-broken: microwatt.v verilator/microwatt-verilator.cpp verilator/uart-verilator.c $(VERILATOR) $(VERILATOR_FLAGS) -CFLAGS "$(VERILATOR_CFLAGS) -DCLK_FREQUENCY=$(CLK_FREQUENCY)" -Iuart16550 --assert --cc --exe --build $^ -o $@ -top-module toplevel @cp -f obj_dir/microwatt-verilator microwatt-verilator + +# Need to investigate why yosys is hitting verilator warnings, and eventually turn on -Wall +# --top-module toplevel +microwatt-verilator: microwatt.v verilator/microwatt-verilator.cpp verilator/uart-verilator.c + verilator -O3 -CFLAGS "-DCLK_FREQUENCY=$(CLK_FREQUENCY) -I../verilator" \ + --assert \ + --cc microwatt.v \ + --exe verilator/microwatt-verilator.cpp verilator/uart-verilator.c \ + -o $@ -Iuart16550 \ + -Wno-fatal -Wno-CASEOVERLAP -Wno-UNOPTFLAT \ + -Wno-BLKANDNBLK \ + -Wno-COMBDLY \ + -Wno-CASEINCOMPLETE \ + -Wno-WIDTH \ + --savable \ + --trace \ + # --unroll-count 256 \ + # --output-split 5000 \ + # --output-split-cfuncs 500 \ + # --output-split-ctrace 500 \ + make -C obj_dir -f Vmicrowatt.mk + @cp -f obj_dir/microwatt-verilator microwatt-verilator microwatt_out.config: microwatt.json $(LPF) $(NEXTPNR) --json $< --lpf $(LPF) --textcfg $@.tmp $(NEXTPNR_FLAGS) --package $(PACKAGE) diff --git a/verilator/microwatt-verilator.cpp b/verilator/microwatt-verilator.cpp index 3c37f0a..a226393 100644 --- a/verilator/microwatt-verilator.cpp +++ b/verilator/microwatt-verilator.cpp @@ -1,7 +1,14 @@ #include -#include "Vtoplevel.h" +#include +#include +#include +#include +#include +#include +#include "Vmicrowatt.h" #include "verilated.h" #include "verilated_vcd_c.h" +#include "uart-verilator.h" /* * Current simulation time @@ -24,12 +31,12 @@ double sc_time_stamp(void) VerilatedVcdC *tfp; #endif -void tick(Vtoplevel *top) +void tick(Vmicrowatt *top, bool dump) { top->ext_clk = 1; top->eval(); #if VM_TRACE - if (tfp) + if (tfp && dump) tfp->dump((double) main_time); #endif main_time++; @@ -37,22 +44,125 @@ void tick(Vtoplevel *top) top->ext_clk = 0; top->eval(); #if VM_TRACE - if (tfp) + if (tfp && dump) tfp->dump((double) main_time); #endif main_time++; } -void uart_tx(unsigned char tx); -unsigned char uart_rx(void); +// pretty-print dumped data in ASCII (to help identify strings) +static void ascii_dump(unsigned char *data, int len, FILE *dump) +{ + for (int i = 0; i < len; i++) { + if (isalnum(data[i])) + putc(data[i], dump); + else + putc('.', dump); + } + putc('\n', dump); +} + +// save/restore of verilator model to/from file. +void save_model(vluint64_t time, Vmicrowatt* topp) +{ + char fname[128]; + sprintf(fname, "verilator.save.%ld", time); + VerilatedSave os; + struct uart_tx_state *uart = uart_get_state(); + + os.open(fname); + os << main_time; // user code must save the timestamp, etc + os.write(uart, sizeof(*uart)); + os << *topp; +} + +void restore_model(vluint64_t time, Vmicrowatt* topp) +{ + char fname[128]; + sprintf(fname, "verilator.save.%ld", time); + VerilatedRestore os; + struct uart_tx_state uart; + os.open(fname); + os >> main_time; + os.read(&uart, sizeof(uart)); + os >> *topp; + uart_restore(&uart); +} + +//save bram memory out to a file. use for snapshots +int memdump(vluint64_t time, unsigned char *mem, size_t sz) +{ + char fname[128]; + sprintf(fname, "bram.snapshot.%ld", time); + int fd = open(fname, O_RDWR | O_CREAT, (mode_t)0600); + lseek(fd, sz, SEEK_SET); + write(fd, "", 1); + void *map = mmap(NULL, sz, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0); + lseek(fd, sz, SEEK_SET); + memcpy(map, mem, sz); + msync(map, sz, MS_SYNC); + munmap(map, sz); + ftruncate(fd, sz); + close(fd); + return 0; +} + +// save-trigger offsets +vluint64_t save_offset = 1000000; +vluint64_t save_countdown = save_offset-10; + +// write (masked by sel) to internal mem offset by bram_addr line +static void mem_write(unsigned char *mem, + unsigned long bram_addr, unsigned long long bram_di, + int bram_sel) +{ + unsigned char *mat = &(mem[bram_addr*8]); // 64-bit (8 byte) wide + unsigned char *data_in = (unsigned char*)&bram_di; // treat as bytes + for (int i = 0; i < 8; i++) { + if (bram_sel & (1<open("microwatt-verilator.vcd"); #endif - // Reset - top->ext_rst = 0; - for (unsigned long i = 0; i < 5; i++) - tick(top); - top->ext_rst = 1; + // allocate a stonking chunk of memory but don't use it yet + unsigned char *_mem = NULL; // gets deleted if not required + unsigned char *mem = NULL; // the actual memory (only set on file load) + size_t sz = 0x10000000; + _mem = (unsigned char*)malloc(sz); + memset(_mem, sz, 0); + vluint64_t restore_time = 0; + + // identify bram files to load (if not starting "+[verilator]") + // here we can specify any number of files, but at present only + // (realistically) two are supported: the bootloader (at address 0x0) + // and the second file (linux kernel) at hard-coded 0x500000. + // it would be much better to use elf-reading here but that's a lot + // of hassle + for (int i = 1; i < argc; i++) { + char *bram_file = NULL; + if (strcmp("-s", argv[i]) == 0) { + bram_file = (char*)malloc(128); // okok not freed, i know + restore_time = atol(argv[i+1]); // yees, we knoow, check argc + restore_model(restore_time, top); + sprintf(bram_file, "bram.snapshot.%lld", restore_time); + i++; + } + else if (argv[i][0] != '+') { + // assume rest of argument is "+verilator", assume a filename + bram_file = argv[i]; + } + + // mmap the file in private (copy-on-write) mode so that its original + // contents are not overwritten + if (bram_file != NULL) { + unsigned char *fmem = NULL; + + int fd = open(bram_file, O_RDONLY); + if (fd < 0) { + printf("\n\"%s \" could not open\n", bram_file); + exit(1); + } + + struct stat statbuf; + int err = fstat(fd, &statbuf); + if (err < 0) { + printf("\n\"%s \" could not stat\n", bram_file); + exit(2); + } + + fmem = (unsigned char*)mmap(NULL, statbuf.st_size, + PROT_READ|PROT_WRITE, MAP_PRIVATE, + fd, 0); + if (fmem == MAP_FAILED) { + printf("Mapping Failed\n"); + exit(2); + } + close(fd); + + // copy the file over (bit of a hack, here) + mem = _mem; + size_t offs = 0x0; // normal start + if (i == 2 && restore_time == 0) { + offs = 0x600000; // hard-coded offset of the linux binary + } + printf("loading %s at 0x%x size 0x%x\n", bram_file, offs, + statbuf.st_size); + memcpy(mem+offs, fmem, statbuf.st_size); + munmap(fmem, statbuf.st_size); + } + } + // a file wasn't loaded so the stonking-chunk-o-mem can be free'd + if (mem == NULL) { + free(_mem); + } + + unsigned long long bram_data = 0; + unsigned long long bram_data1 = 0; // another clock delay + int bram_rd = false; + + // dump file for memory read/write traces [uart takes over stdin/stdout] + FILE *dump = fopen("bram.dump", "w"); + + // read data is one clock cycle delayed + bool next_read = false; + unsigned long bram_addr = 0; + + if (restore_time == 0) { + // Reset + top->ext_rst = 0; + for (unsigned long i = 0; i < 5; i++) + tick(top, true); + top->ext_rst = 1; + } else { + printf("restored at %lld\n", main_time); + } + + unsigned long long bram_do = 0; + + // trace conditions + bool traceme = true; + int trigger_occurrences = TRIGGER_OCCURENCES; +#ifdef TRIGGER_COUNTDOWN + int trigger_countdown = TRIGGER_COUNTDOWN; +#ifdef TRIGGER_ENABLE + traceme = false; +#endif +#endif // TRIGGER_COUNTDOWN while(!Verilated::gotFinish()) { - tick(top); + + // check if a snapshot of the model should be actioned, + // otherwise continue counting down + if (save_countdown == 0) { + fprintf(dump, "snapshot saving at %ld\n", main_time); + fflush(dump); + save_model(main_time, top); + if (mem != NULL) { + memdump(main_time, mem, sz); + } + save_countdown = save_offset-1; // loop for next snapshot + } else { + --save_countdown; + } + + tick(top, traceme); + + // read/write the memory to/from the mmap'd file (if given) + if (mem != NULL) { + top->bram_do = bram_do; + if (top->bram_re ) { + bram_do = ((unsigned long long*)mem)[top->bram_addr]; + } + if (top->bram_we) { + mem_write(mem, top->bram_addr, top->bram_di, top->bram_sel); + } + } uart_tx(top->uart0_txd); top->uart0_rxd = uart_rx(); + +#ifdef BRAM_DEBUG + if (top->nia_req) { +#ifdef TRIGGER_ENABLE + if ((top->nia == TRIGGER_NIA) && (top->insn == TRIGGER_INSN)) { + if (trigger_occurrences == 1) { + traceme = true; + fprintf(dump, "trace trigger enabled\n"); + } + if (trigger_occurrences != 0) { + --trigger_occurrences; + } + } +#ifdef TRIGGER_COUNTDOWN + // tracing active for only TRIGGER_COUNTDOWN cycles + if (traceme) { + --trigger_countdown; + if (trigger_countdown == 0) { + traceme = false; +#ifdef TERMINATE_AT_COUNTDOWN + break; +#endif + } + } +#endif // TRIGGER_COUNTDOWN +#endif // TRIGGER_ENABLE + fprintf(dump, "pc %16lx insn %8x msr %16lx", + top->nia, top->insn, top->msr_o); + } +#ifdef LDST_DUMP + if (top->ldst_req) { + if (!top->nia_req) { + fprintf(dump, "pc %-16s insn %-8s msr %-16s", "", "", ""); + } + fprintf(dump, " ldst %16lx", top->ldst_addr); + } +#endif + if (top->ldst_req || top->nia_req) { + fprintf(dump, "\n"); + } + if (top->bram_we) { + fprintf(dump, " " \ + "wr @ %08x do %16lx sel %02x ", + top->bram_addr, top->bram_di, top->bram_sel); + ascii_dump((unsigned char*)&top->bram_di, 8, dump); + fflush(dump); + } + // read on one clock delay + if (top->bram_re) { + fprintf(dump, " " \ + "rd @ %08x di %16lx sel %02x ", + top->bram_addr, bram_do, top->bram_sel); + ascii_dump((unsigned char*)&bram_do, 8, dump); + fflush(dump); + } +#endif // BRAM_DEBUG } + if (mem != NULL) { + free(mem); + } + + fclose(dump); + #if VM_TRACE tfp->close(); delete tfp; diff --git a/verilator/uart-verilator.c b/verilator/uart-verilator.c index 8492a11..392bf0c 100644 --- a/verilator/uart-verilator.c +++ b/verilator/uart-verilator.c @@ -5,6 +5,7 @@ #include #include #include +#include "uart-verilator.h" /* Should we exit simulation on ctrl-c or pass it through? */ #define EXIT_ON_CTRL_C @@ -13,23 +14,7 @@ /* Round to nearest */ #define BITWIDTH ((CLK_FREQUENCY+(BAUD/2))/BAUD) -/* - * Our UART uses 16x oversampling, so at 50 MHz and 115200 baud - * each sample is: 50000000/(115200*16) = 27 clock cycles. This - * means each bit is off by 0.47% so for 8 bits plus a start and - * stop bit the errors add to be 4.7%. - */ -static double error = 0.05; - -enum state { - IDLE, START_BIT, BITS, STOP_BIT, ERROR -}; - -static enum state tx_state = IDLE; -static unsigned long tx_countbits; -static unsigned char tx_bits; -static unsigned char tx_byte; -static unsigned char tx_prev; +static struct uart_tx_state uart; /* * Return an error if the transition is not close enough to the start or @@ -37,9 +22,9 @@ static unsigned char tx_prev; */ static bool is_error(unsigned long bits) { - double e = 1.0 * tx_countbits / BITWIDTH; + double e = 1.0 * uart.tx_countbits / BITWIDTH; - if ((e <= (1.0-error)) && (e >= error)) + if ((e <= (1.0-uart.error)) && (e >= uart.error)) return true; return false; @@ -47,88 +32,88 @@ static bool is_error(unsigned long bits) void uart_tx(unsigned char tx) { - switch (tx_state) { + switch (uart.tx_state) { case IDLE: if (tx == 0) { - tx_state = START_BIT; - tx_countbits = BITWIDTH; - tx_bits = 0; - tx_byte = 0; + uart.tx_state = START_BIT; + uart.tx_countbits = BITWIDTH; + uart.tx_bits = 0; + uart.tx_byte = 0; } break; case START_BIT: - tx_countbits--; + uart.tx_countbits--; if (tx == 1) { - if (is_error(tx_countbits)) { - printf("START_BIT error %ld %ld\n", BITWIDTH, tx_countbits); - tx_countbits = BITWIDTH*2; - tx_state = ERROR; + if (is_error(uart.tx_countbits)) { + printf("START_BIT error %ld %ld\n", BITWIDTH, uart.tx_countbits); + uart.tx_countbits = BITWIDTH*2; + uart.tx_state = ERROR; break; } } - if (tx_countbits == 0) { - tx_state = BITS; - tx_countbits = BITWIDTH; + if (uart.tx_countbits == 0) { + uart.tx_state = BITS; + uart.tx_countbits = BITWIDTH; } break; case BITS: - tx_countbits--; - if (tx_countbits == BITWIDTH/2) { - tx_byte = tx_byte | (tx << tx_bits); - tx_bits = tx_bits + 1; + uart.tx_countbits--; + if (uart.tx_countbits == BITWIDTH/2) { + uart.tx_byte = uart.tx_byte | (tx << uart.tx_bits); + uart.tx_bits = uart.tx_bits + 1; } - if (tx != tx_prev) { - if (is_error(tx_countbits)) { - printf("BITS error %ld %ld\n", BITWIDTH, tx_countbits); - tx_countbits = BITWIDTH*2; - tx_state = ERROR; + if (tx != uart.tx_prev) { + if (is_error(uart.tx_countbits)) { + printf("BITS error %ld %ld\n", BITWIDTH, uart.tx_countbits); + uart.tx_countbits = BITWIDTH*2; + uart.tx_state = ERROR; break; } } - if (tx_countbits == 0) { - if (tx_bits == 8) { - tx_state = STOP_BIT; + if (uart.tx_countbits == 0) { + if (uart.tx_bits == 8) { + uart.tx_state = STOP_BIT; } - tx_countbits = BITWIDTH; + uart.tx_countbits = BITWIDTH; } break; case STOP_BIT: - tx_countbits--; + uart.tx_countbits--; if (tx == 0) { - if (is_error(tx_countbits)) { - printf("STOP_BIT error %ld %ld\n", BITWIDTH, tx_countbits); - tx_countbits = BITWIDTH*2; - tx_state = ERROR; + if (is_error(uart.tx_countbits)) { + printf("STOP_BIT error %ld %ld\n", BITWIDTH, uart.tx_countbits); + uart.tx_countbits = BITWIDTH*2; + uart.tx_state = ERROR; break; } /* Go straight to idle */ - write(STDOUT_FILENO, &tx_byte, 1); - tx_state = IDLE; + write(STDOUT_FILENO, &uart.tx_byte, 1); + uart.tx_state = IDLE; } - if (tx_countbits == 0) { - write(STDOUT_FILENO, &tx_byte, 1); - tx_state = IDLE; + if (uart.tx_countbits == 0) { + write(STDOUT_FILENO, &uart.tx_byte, 1); + uart.tx_state = IDLE; } break; case ERROR: - tx_countbits--; - if (tx_countbits == 0) { - tx_state = IDLE; + uart.tx_countbits--; + if (uart.tx_countbits == 0) { + uart.tx_state = IDLE; } break; } - tx_prev = tx; + uart.tx_prev = tx; } static struct termios oldt; @@ -188,12 +173,6 @@ static int nonblocking_read(unsigned char *c) } } -static enum state rx_state = IDLE; -static unsigned char rx_char; -static unsigned long rx_countbits; -static unsigned char rx_bit; -static unsigned char rx = 1; - /* Avoid calling poll() too much */ #define RX_INTERVAL 10000 static unsigned long rx_sometimes; @@ -202,52 +181,62 @@ unsigned char uart_rx(void) { unsigned char c; - switch (rx_state) { + switch (uart.rx_state) { case IDLE: if (rx_sometimes++ >= RX_INTERVAL) { rx_sometimes = 0; if (nonblocking_read(&c)) { - rx_state = START_BIT; - rx_char = c; - rx_countbits = BITWIDTH; - rx_bit = 0; - rx = 0; + uart.rx_state = START_BIT; + uart.rx_char = c; + uart.rx_countbits = BITWIDTH; + uart.rx_bit = 0; + uart.rx = 0; } } break; case START_BIT: - rx_countbits--; - if (rx_countbits == 0) { - rx_state = BITS; - rx_countbits = BITWIDTH; - rx = rx_char & 1; + uart.rx_countbits--; + if (uart.rx_countbits == 0) { + uart.rx_state = BITS; + uart.rx_countbits = BITWIDTH; + uart.rx = uart.rx_char & 1; } break; case BITS: - rx_countbits--; - if (rx_countbits == 0) { - rx_bit = rx_bit + 1; - if (rx_bit == 8) { - rx = 1; - rx_state = STOP_BIT; + uart.rx_countbits--; + if (uart.rx_countbits == 0) { + uart.rx_bit = uart.rx_bit + 1; + if (uart.rx_bit == 8) { + uart.rx = 1; + uart.rx_state = STOP_BIT; } else { - rx = (rx_char >> rx_bit) & 1; + uart.rx = (uart.rx_char >> uart.rx_bit) & 1; } - rx_countbits = BITWIDTH; + uart.rx_countbits = BITWIDTH; } break; case STOP_BIT: - rx_countbits--; - if (rx_countbits == 0) { - rx_state = IDLE; + uart.rx_countbits--; + if (uart.rx_countbits == 0) { + uart.rx_state = IDLE; } break; } - return rx; + return uart.rx; +} + +struct uart_tx_state * uart_get_state(void) +{ + return &uart; +} + +void uart_restore(struct uart_tx_state *new_state) +{ + memcpy(&uart, new_state, sizeof(struct uart_tx_state)); } diff --git a/verilator/uart-verilator.h b/verilator/uart-verilator.h new file mode 100644 index 0000000..10ae8aa --- /dev/null +++ b/verilator/uart-verilator.h @@ -0,0 +1,32 @@ +/* + * Our UART uses 16x oversampling, so at 50 MHz and 115200 baud + * each sample is: 50000000/(115200*16) = 27 clock cycles. This + * means each bit is off by 0.47% so for 8 bits plus a start and + * stop bit the errors add to be 4.7%. + */ + +enum state { + IDLE, START_BIT, BITS, STOP_BIT, ERROR +}; + +struct uart_tx_state { + double error = 0.05; + + enum state tx_state = IDLE; + unsigned long tx_countbits; + unsigned char tx_bits; + unsigned char tx_byte; + unsigned char tx_prev; + + enum state rx_state = IDLE; + unsigned char rx_char; + unsigned long rx_countbits; + unsigned char rx_bit; + unsigned char rx = 1; +}; + +void uart_tx(unsigned char tx); +unsigned char uart_rx(void); +struct uart_tx_state * uart_get_state(void); +void uart_restore(struct uart_tx_state *); +