From 9c7cc711bcb35cd7938bb133382f45bf13d10cbc Mon Sep 17 00:00:00 2001 From: Gabe Black Date: Thu, 24 Dec 2020 07:53:57 -0800 Subject: [PATCH] dev: Let the pixel pump bypass the DMA FIFO in non-caching mode. When in non-caching mode, performance metrics are not meaningful, and we're just interested in functional level behavior. Going through the DMA FIFO in the HDLCD controller is very inefficient, and prevents reading a batch of pixels from memory all in one go. Change-Id: I3fb6d4d06730b5a94b5399f01aa02186baa5c9b3 Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38721 Reviewed-by: Andreas Sandberg Maintainer: Andreas Sandberg Tested-by: kokoro --- src/dev/arm/hdlcd.cc | 26 +++++++++++++++++++++++++- src/dev/arm/hdlcd.hh | 15 ++++++++++++++- src/dev/pixelpump.cc | 16 +++++++--------- src/dev/pixelpump.hh | 26 +++++++++++++++++++++++++- 4 files changed, 71 insertions(+), 12 deletions(-) diff --git a/src/dev/arm/hdlcd.cc b/src/dev/arm/hdlcd.cc index bb56da63c..c6ff6db29 100644 --- a/src/dev/arm/hdlcd.cc +++ b/src/dev/arm/hdlcd.cc @@ -37,6 +37,7 @@ #include "dev/arm/hdlcd.hh" +#include "base/compiler.hh" #include "base/output.hh" #include "base/trace.hh" #include "base/vnc/vncinput.hh" @@ -505,6 +506,25 @@ HDLcd::pxlNext(Pixel &p) } } +size_t +HDLcd::lineNext(std::vector::iterator pixel_it, size_t line_length) +{ + const size_t byte_count = line_length * conv.length; + + lineBuffer.resize(byte_count); + dmaRead(bypassLineAddress, byte_count, nullptr, lineBuffer.data()); + + bypassLineAddress += fb_line_pitch; + + uint8_t *bufPtr = lineBuffer.data(); + for (size_t i = 0; i < line_length; i++) { + *pixel_it++ = conv.toPixel(bufPtr); + bufPtr += conv.length; + } + + return line_length; +} + void HDLcd::pxlVSyncBegin() { @@ -516,7 +536,11 @@ void HDLcd::pxlVSyncEnd() { DPRINTF(HDLcd, "End of VSYNC, starting DMA engine\n"); - dmaEngine->startFrame(fb_base); + if (sys->bypassCaches()) { + bypassLineAddress = fb_base; + } else { + dmaEngine->startFrame(fb_base); + } } void diff --git a/src/dev/arm/hdlcd.hh b/src/dev/arm/hdlcd.hh index 30f14d93b..6deb3c22a 100644 --- a/src/dev/arm/hdlcd.hh +++ b/src/dev/arm/hdlcd.hh @@ -75,6 +75,7 @@ #include #include +#include #include "base/framebuffer.hh" #include "base/imgwriter.hh" @@ -252,6 +253,8 @@ class HDLcd: public AmbaDmaDevice ColorSelectReg blue_select = 0; /**< Blue color select register */ /** @} */ + std::vector lineBuffer; + uint32_t readReg(Addr offset); void writeReg(Addr offset, uint32_t value); @@ -267,6 +270,7 @@ class HDLcd: public AmbaDmaDevice public: // Pixel pump callbacks bool pxlNext(Pixel &p); + size_t lineNext(std::vector::iterator pixel_it, size_t line_length); void pxlVSyncBegin(); void pxlVSyncEnd(); void pxlUnderrun(); @@ -326,12 +330,19 @@ class HDLcd: public AmbaDmaDevice { public: PixelPump(HDLcd &p, ClockDomain &pxl_clk, unsigned pixel_chunk) - : BasePixelPump(p, pxl_clk, pixel_chunk), parent(p) {} + : BasePixelPump(p, pxl_clk, pixel_chunk), parent(p) + {} void dumpSettings(); protected: bool nextPixel(Pixel &p) override { return parent.pxlNext(p); } + size_t + nextLine(std::vector::iterator pixel_it, + size_t line_length) override + { + return parent.lineNext(pixel_it, line_length); + } void onVSyncBegin() override { return parent.pxlVSyncBegin(); } void onVSyncEnd() override { return parent.pxlVSyncEnd(); } @@ -348,6 +359,8 @@ class HDLcd: public AmbaDmaDevice HDLcd &parent; }; + Addr bypassLineAddress = 0; + /** Handler for fast frame refresh in KVM-mode */ void virtRefresh(); EventFunctionWrapper virtRefreshEvent; diff --git a/src/dev/pixelpump.cc b/src/dev/pixelpump.cc index 3846e7611..0722f3e8c 100644 --- a/src/dev/pixelpump.cc +++ b/src/dev/pixelpump.cc @@ -37,6 +37,8 @@ #include "dev/pixelpump.hh" +#include "base/logging.hh" + const DisplayTimings DisplayTimings::vga( 640, 480, 48, 96, 16, @@ -281,16 +283,12 @@ BasePixelPump::renderFrame() void BasePixelPump::renderLine() { - const unsigned pos_y(posY()); + const unsigned pos_y = posY(); + const size_t _width = fb.width(); - Pixel pixel(0, 0, 0); - for (_posX = 0; _posX < _timings.width; ++_posX) { - if (!nextPixel(pixel)) { - panic("Unexpected underrun in BasePixelPump (%u, %u)\n", - _posX, pos_y); - } - fb.pixel(_posX, pos_y) = pixel; - } + auto pixel_it = fb.pixels.begin() + _width * pos_y; + panic_if(nextLine(pixel_it, _width) != _width, + "Unexpected underrun in BasePixelPump (%u, %u)", _width, pos_y); } diff --git a/src/dev/pixelpump.hh b/src/dev/pixelpump.hh index 86a0ae078..b2987bb05 100644 --- a/src/dev/pixelpump.hh +++ b/src/dev/pixelpump.hh @@ -38,6 +38,8 @@ #ifndef __DEV_PIXELPUMP_HH__ #define __DEV_PIXELPUMP_HH__ +#include + #include "base/framebuffer.hh" #include "sim/clocked_object.hh" @@ -171,7 +173,7 @@ class BasePixelPump /** Update frame size using display timing */ void updateTimings(const DisplayTimings &timings); - /** Render an entire frame in KVM execution mode */ + /** Render an entire frame in non-caching mode */ void renderFrame(); /** Starting pushing pixels in timing mode */ @@ -219,6 +221,28 @@ class BasePixelPump */ virtual bool nextPixel(Pixel &p) = 0; + /** + * Get the next line of pixels directly from memory. This is for use from + * the renderFrame which is called in non-caching mode. + * + * The default implementation falls back to calling nextPixel over and + * over, but a more efficient implementation could retrieve the entire line + * of pixels all at once using fewer access to memory which bypass any + * intermediate structures like an incoming FIFO. + * + * @param ps A vector iterator to store retrieved pixels into. + * @param line_length The number of pixels being requested. + * @return The number of pixels actually retrieved. + */ + virtual size_t + nextLine(std::vector::iterator ps, size_t line_length) + { + size_t count = 0; + while (count < line_length && nextPixel(*ps++)) + count++; + return count; + } + /** First pixel clock of the first VSync line. */ virtual void onVSyncBegin() {}; -- 2.30.2