gallium/hash_table: turn it into a wrapper around util/hash_table
[mesa.git] / src / gallium / drivers / v3d / v3d_tiling.c
index f9c4a34218477f1bfd1635c2bc26fc1bdc7f1aa1..95cd0dff60fe3eb864b8c8bb3a9e2c6e444c0dcd 100644 (file)
@@ -21,7 +21,7 @@
  * IN THE SOFTWARE.
  */
 
-/** @file vc5_tiling.c
+/** @file v3d_tiling.c
  *
  * Handles information about the VC5 tiling formats, and loading and storing
  * from them.
 #include "v3d_screen.h"
 #include "v3d_context.h"
 #include "v3d_tiling.h"
+#include "broadcom/common/v3d_cpu_tiling.h"
 
 /** Return the width in pixels of a 64-byte microtile. */
 uint32_t
-vc5_utile_width(int cpp)
+v3d_utile_width(int cpp)
 {
         switch (cpp) {
         case 1:
@@ -52,7 +53,7 @@ vc5_utile_width(int cpp)
 
 /** Return the height in pixels of a 64-byte microtile. */
 uint32_t
-vc5_utile_height(int cpp)
+v3d_utile_height(int cpp)
 {
         switch (cpp) {
         case 1:
@@ -75,12 +76,11 @@ vc5_utile_height(int cpp)
  * arrangement.
  */
 static inline uint32_t
-vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
+v3d_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
 {
-        uint32_t utile_w = vc5_utile_width(cpp);
-        uint32_t utile_h = vc5_utile_height(cpp);
+        uint32_t utile_w = v3d_utile_width(cpp);
 
-        assert(x < utile_w && y < utile_h);
+        assert(x < utile_w && y < v3d_utile_height(cpp));
 
         return x * cpp + y * utile_w * cpp;
 }
@@ -91,17 +91,17 @@ vc5_get_utile_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y)
  * LINEARTILE is a single line of utiles in either the X or Y direction.
  */
 static inline uint32_t
-vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
+v3d_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
 {
-        uint32_t utile_w = vc5_utile_width(cpp);
-        uint32_t utile_h = vc5_utile_height(cpp);
+        uint32_t utile_w = v3d_utile_width(cpp);
+        uint32_t utile_h = v3d_utile_height(cpp);
         uint32_t utile_index_x = x / utile_w;
         uint32_t utile_index_y = y / utile_h;
 
         assert(utile_index_x == 0 || utile_index_y == 0);
 
         return (64 * (utile_index_x + utile_index_y) +
-                vc5_get_utile_pixel_offset(cpp,
+                v3d_get_utile_pixel_offset(cpp,
                                            x & (utile_w - 1),
                                            y & (utile_h - 1)));
 }
@@ -113,11 +113,11 @@ vc5_get_lt_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y)
  * utiles), and the UIF blocks are in 1 or 2 columns in raster order.
  */
 static inline uint32_t
-vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
+v3d_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
                               int ublinear_number)
 {
-        uint32_t utile_w = vc5_utile_width(cpp);
-        uint32_t utile_h = vc5_utile_height(cpp);
+        uint32_t utile_w = v3d_utile_width(cpp);
+        uint32_t utile_h = v3d_utile_height(cpp);
         uint32_t ub_w = utile_w * 2;
         uint32_t ub_h = utile_h * 2;
         uint32_t ub_x = x / ub_w;
@@ -127,23 +127,23 @@ vc5_get_ublinear_pixel_offset(uint32_t cpp, uint32_t x, uint32_t y,
                        ub_x) +
                 ((x & utile_w) ? 64 : 0) +
                 ((y & utile_h) ? 128 : 0) +
-                + vc5_get_utile_pixel_offset(cpp,
+                + v3d_get_utile_pixel_offset(cpp,
                                              x & (utile_w - 1),
                                              y & (utile_h - 1)));
 }
 
 static inline uint32_t
-vc5_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+v3d_get_ublinear_2_column_pixel_offset(uint32_t cpp, uint32_t image_h,
                                        uint32_t x, uint32_t y)
 {
-        return vc5_get_ublinear_pixel_offset(cpp, x, y, 2);
+        return v3d_get_ublinear_pixel_offset(cpp, x, y, 2);
 }
 
 static inline uint32_t
-vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
+v3d_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
                                        uint32_t x, uint32_t y)
 {
-        return vc5_get_ublinear_pixel_offset(cpp, x, y, 1);
+        return v3d_get_ublinear_pixel_offset(cpp, x, y, 1);
 }
 
 /**
@@ -154,11 +154,11 @@ vc5_get_ublinear_1_column_pixel_offset(uint32_t cpp, uint32_t image_h,
  * 4x4 groups, and those 4x4 groups are then stored in raster order.
  */
 static inline uint32_t
-vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y,
+v3d_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y,
                          bool do_xor)
 {
-        uint32_t utile_w = vc5_utile_width(cpp);
-        uint32_t utile_h = vc5_utile_height(cpp);
+        uint32_t utile_w = v3d_utile_width(cpp);
+        uint32_t utile_h = v3d_utile_height(cpp);
         uint32_t mb_width = utile_w * 2;
         uint32_t mb_height = utile_h * 2;
         uint32_t log2_mb_width = ffs(mb_width) - 1;
@@ -190,7 +190,7 @@ vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y,
 
         uint32_t mb_pixel_address = (mb_base_addr +
                                      mb_tile_offset +
-                                     vc5_get_utile_pixel_offset(cpp,
+                                     v3d_get_utile_pixel_offset(cpp,
                                                                 utile_x,
                                                                 utile_y));
 
@@ -198,28 +198,32 @@ vc5_get_uif_pixel_offset(uint32_t cpp, uint32_t image_h, uint32_t x, uint32_t y,
 }
 
 static inline uint32_t
-vc5_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
+v3d_get_uif_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
                              uint32_t x, uint32_t y)
 {
-        return vc5_get_uif_pixel_offset(cpp, image_h, x, y, true);
+        return v3d_get_uif_pixel_offset(cpp, image_h, x, y, true);
 }
 
 static inline uint32_t
-vc5_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
+v3d_get_uif_no_xor_pixel_offset(uint32_t cpp, uint32_t image_h,
                                 uint32_t x, uint32_t y)
 {
-        return vc5_get_uif_pixel_offset(cpp, image_h, x, y, false);
+        return v3d_get_uif_pixel_offset(cpp, image_h, x, y, false);
 }
 
+/* Loads/stores non-utile-aligned boxes by walking over the destination
+ * rectangle, computing the address on the GPU, and storing/loading a pixel at
+ * a time.
+ */
 static inline void
-vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
-                               void *cpu, uint32_t cpu_stride,
-                               int cpp, uint32_t image_h,
-                               const struct pipe_box *box,
-                               uint32_t (*get_pixel_offset)(uint32_t cpp,
-                                                            uint32_t image_h,
-                                                            uint32_t x, uint32_t y),
-                               bool is_load)
+v3d_move_pixels_unaligned(void *gpu, uint32_t gpu_stride,
+                          void *cpu, uint32_t cpu_stride,
+                          int cpp, uint32_t image_h,
+                          const struct pipe_box *box,
+                          uint32_t (*get_pixel_offset)(uint32_t cpp,
+                                                       uint32_t image_h,
+                                                       uint32_t x, uint32_t y),
+                          bool is_load)
 {
         for (uint32_t y = 0; y < box->height; y++) {
                 void *cpu_row = cpu + y * cpu_stride;
@@ -248,8 +252,109 @@ vc5_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
         }
 }
 
+/* Breaks the image down into utiles and calls either the fast whole-utile
+ * load/store functions, or the unaligned fallback case.
+ */
+static inline void
+v3d_move_pixels_general_percpp(void *gpu, uint32_t gpu_stride,
+                               void *cpu, uint32_t cpu_stride,
+                               int cpp, uint32_t image_h,
+                               const struct pipe_box *box,
+                               uint32_t (*get_pixel_offset)(uint32_t cpp,
+                                                            uint32_t image_h,
+                                                            uint32_t x, uint32_t y),
+                               bool is_load)
+{
+        uint32_t utile_w = v3d_utile_width(cpp);
+        uint32_t utile_h = v3d_utile_height(cpp);
+        uint32_t utile_gpu_stride = utile_w * cpp;
+        uint32_t x1 = box->x;
+        uint32_t y1 = box->y;
+        uint32_t x2 = box->x + box->width;
+        uint32_t y2 = box->y + box->height;
+        uint32_t align_x1 = align(x1, utile_w);
+        uint32_t align_y1 = align(y1, utile_h);
+        uint32_t align_x2 = x2 & ~(utile_w - 1);
+        uint32_t align_y2 = y2 & ~(utile_h - 1);
+
+        /* Load/store all the whole utiles first. */
+        for (uint32_t y = align_y1; y < align_y2; y += utile_h) {
+                void *cpu_row = cpu + (y - box->y) * cpu_stride;
+
+                for (uint32_t x = align_x1; x < align_x2; x += utile_w) {
+                        void *utile_gpu = (gpu +
+                                           get_pixel_offset(cpp, image_h, x, y));
+                        void *utile_cpu = cpu_row + (x - box->x) * cpp;
+
+                        if (is_load) {
+                                v3d_load_utile(utile_cpu, cpu_stride,
+                                               utile_gpu, utile_gpu_stride);
+                        } else {
+                                v3d_store_utile(utile_gpu, utile_gpu_stride,
+                                                utile_cpu, cpu_stride);
+                        }
+                }
+        }
+
+        /* If there were no aligned utiles in the middle, load/store the whole
+         * thing unaligned.
+         */
+        if (align_y2 <= align_y1 ||
+            align_x2 <= align_x1) {
+                v3d_move_pixels_unaligned(gpu, gpu_stride,
+                                          cpu, cpu_stride,
+                                          cpp, image_h,
+                                          box,
+                                          get_pixel_offset, is_load);
+                return;
+        }
+
+        /* Load/store the partial utiles. */
+        struct pipe_box partial_boxes[4] = {
+                /* Top */
+                {
+                        .x = x1,
+                        .width = x2 - x1,
+                        .y = y1,
+                        .height = align_y1 - y1,
+                },
+                /* Bottom */
+                {
+                        .x = x1,
+                        .width = x2 - x1,
+                        .y = align_y2,
+                        .height = y2 - align_y2,
+                },
+                /* Left */
+                {
+                        .x = x1,
+                        .width = align_x1 - x1,
+                        .y = align_y1,
+                        .height = align_y2 - align_y1,
+                },
+                /* Right */
+                {
+                        .x = align_x2,
+                        .width = x2 - align_x2,
+                        .y = align_y1,
+                        .height = align_y2 - align_y1,
+                },
+        };
+        for (int i = 0; i < ARRAY_SIZE(partial_boxes); i++) {
+                void *partial_cpu = (cpu +
+                                     (partial_boxes[i].y - y1) * cpu_stride +
+                                     (partial_boxes[i].x - x1) * cpp);
+
+                v3d_move_pixels_unaligned(gpu, gpu_stride,
+                                          partial_cpu, cpu_stride,
+                                          cpp, image_h,
+                                          &partial_boxes[i],
+                                          get_pixel_offset, is_load);
+        }
+}
+
 static inline void
-vc5_move_pixels_general(void *gpu, uint32_t gpu_stride,
+v3d_move_pixels_general(void *gpu, uint32_t gpu_stride,
                                void *cpu, uint32_t cpu_stride,
                                int cpp, uint32_t image_h,
                                const struct pipe_box *box,
@@ -260,35 +365,35 @@ vc5_move_pixels_general(void *gpu, uint32_t gpu_stride,
 {
         switch (cpp) {
         case 1:
-                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                v3d_move_pixels_general_percpp(gpu, gpu_stride,
                                                cpu, cpu_stride,
                                                1, image_h, box,
                                                get_pixel_offset,
                                                is_load);
                 break;
         case 2:
-                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                v3d_move_pixels_general_percpp(gpu, gpu_stride,
                                                cpu, cpu_stride,
                                                2, image_h, box,
                                                get_pixel_offset,
                                                is_load);
                 break;
         case 4:
-                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                v3d_move_pixels_general_percpp(gpu, gpu_stride,
                                                cpu, cpu_stride,
                                                4, image_h, box,
                                                get_pixel_offset,
                                                is_load);
                 break;
         case 8:
-                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                v3d_move_pixels_general_percpp(gpu, gpu_stride,
                                                cpu, cpu_stride,
                                                8, image_h, box,
                                                get_pixel_offset,
                                                is_load);
                 break;
         case 16:
-                vc5_move_pixels_general_percpp(gpu, gpu_stride,
+                v3d_move_pixels_general_percpp(gpu, gpu_stride,
                                                cpu, cpu_stride,
                                                16, image_h, box,
                                                get_pixel_offset,
@@ -298,9 +403,9 @@ vc5_move_pixels_general(void *gpu, uint32_t gpu_stride,
 }
 
 static inline void
-vc5_move_tiled_image(void *gpu, uint32_t gpu_stride,
+v3d_move_tiled_image(void *gpu, uint32_t gpu_stride,
                      void *cpu, uint32_t cpu_stride,
-                     enum vc5_tiling_mode tiling_format,
+                     enum v3d_tiling_mode tiling_format,
                      int cpp,
                      uint32_t image_h,
                      const struct pipe_box *box,
@@ -308,38 +413,38 @@ vc5_move_tiled_image(void *gpu, uint32_t gpu_stride,
 {
         switch (tiling_format) {
         case VC5_TILING_UIF_XOR:
-                vc5_move_pixels_general(gpu, gpu_stride,
+                v3d_move_pixels_general(gpu, gpu_stride,
                                         cpu, cpu_stride,
                                         cpp, image_h, box,
-                                        vc5_get_uif_xor_pixel_offset,
+                                        v3d_get_uif_xor_pixel_offset,
                                         is_load);
                 break;
         case VC5_TILING_UIF_NO_XOR:
-                vc5_move_pixels_general(gpu, gpu_stride,
+                v3d_move_pixels_general(gpu, gpu_stride,
                                         cpu, cpu_stride,
                                         cpp, image_h, box,
-                                        vc5_get_uif_no_xor_pixel_offset,
+                                        v3d_get_uif_no_xor_pixel_offset,
                                         is_load);
                 break;
         case VC5_TILING_UBLINEAR_2_COLUMN:
-                vc5_move_pixels_general(gpu, gpu_stride,
+                v3d_move_pixels_general(gpu, gpu_stride,
                                         cpu, cpu_stride,
                                         cpp, image_h, box,
-                                        vc5_get_ublinear_2_column_pixel_offset,
+                                        v3d_get_ublinear_2_column_pixel_offset,
                                         is_load);
                 break;
         case VC5_TILING_UBLINEAR_1_COLUMN:
-                vc5_move_pixels_general(gpu, gpu_stride,
+                v3d_move_pixels_general(gpu, gpu_stride,
                                         cpu, cpu_stride,
                                         cpp, image_h, box,
-                                        vc5_get_ublinear_1_column_pixel_offset,
+                                        v3d_get_ublinear_1_column_pixel_offset,
                                         is_load);
                 break;
         case VC5_TILING_LINEARTILE:
-                vc5_move_pixels_general(gpu, gpu_stride,
+                v3d_move_pixels_general(gpu, gpu_stride,
                                         cpu, cpu_stride,
                                         cpp, image_h, box,
-                                        vc5_get_lt_pixel_offset,
+                                        v3d_get_lt_pixel_offset,
                                         is_load);
                 break;
         default:
@@ -353,13 +458,13 @@ vc5_move_tiled_image(void *gpu, uint32_t gpu_stride,
  * start of \p dst according to the given tiling format.
  */
 void
-vc5_load_tiled_image(void *dst, uint32_t dst_stride,
+v3d_load_tiled_image(void *dst, uint32_t dst_stride,
                      void *src, uint32_t src_stride,
-                     enum vc5_tiling_mode tiling_format, int cpp,
+                     enum v3d_tiling_mode tiling_format, int cpp,
                      uint32_t image_h,
                      const struct pipe_box *box)
 {
-        vc5_move_tiled_image(src, src_stride,
+        v3d_move_tiled_image(src, src_stride,
                              dst, dst_stride,
                              tiling_format,
                              cpp,
@@ -373,13 +478,13 @@ vc5_load_tiled_image(void *dst, uint32_t dst_stride,
  * \p dst according to the given tiling format.
  */
 void
-vc5_store_tiled_image(void *dst, uint32_t dst_stride,
+v3d_store_tiled_image(void *dst, uint32_t dst_stride,
                       void *src, uint32_t src_stride,
-                      enum vc5_tiling_mode tiling_format, int cpp,
+                      enum v3d_tiling_mode tiling_format, int cpp,
                       uint32_t image_h,
                       const struct pipe_box *box)
 {
-        vc5_move_tiled_image(dst, dst_stride,
+        v3d_move_tiled_image(dst, dst_stride,
                              src, src_stride,
                              tiling_format,
                              cpp,