+ uint32_t xtiles = align(width, 64) / 64;
+ uint32_t ytiles = align(height, 64) / 64;
+
+#if 0
+ fprintf(stderr, "RCL: resolve 0x%x clear 0x%x resolve uncleared 0x%x\n",
+ vc4->resolve,
+ vc4->cleared,
+ resolve_uncleared);
+#endif
+
+ uint32_t reloc_size = 9;
+ uint32_t clear_size = 14;
+ uint32_t config_size = 11 + reloc_size;
+ uint32_t loadstore_size = 7 + reloc_size;
+ uint32_t tilecoords_size = 3;
+ uint32_t branch_size = 5 + reloc_size;
+ uint32_t color_store_size = 1;
+ cl_ensure_space(&vc4->rcl,
+ clear_size +
+ config_size +
+ loadstore_size +
+ xtiles * ytiles * (loadstore_size * 4 +
+ tilecoords_size * 3 +
+ branch_size +
+ color_store_size));
+
+ cl_u8(&vc4->rcl, VC4_PACKET_CLEAR_COLORS);
+ cl_u32(&vc4->rcl, vc4->clear_color[0]);
+ cl_u32(&vc4->rcl, vc4->clear_color[1]);
+ cl_u32(&vc4->rcl, vc4->clear_depth);
+ cl_u8(&vc4->rcl, vc4->clear_stencil);
+
+ /* The rendering mode config determines the pointer that's used for
+ * VC4_PACKET_STORE_MS_TILE_BUFFER address computations. The kernel
+ * could handle a no-relocation rendering mode config and deny those
+ * packets, but instead we just tell the kernel we're doing our color
+ * rendering to the Z buffer, and just don't emit any of those
+ * packets.
+ */
+ struct vc4_surface *render_surf = csurf ? csurf : zsurf;
+ struct vc4_resource *render_tex = vc4_resource(render_surf->base.texture);
+ cl_start_reloc(&vc4->rcl, 1);
+ cl_u8(&vc4->rcl, VC4_PACKET_TILE_RENDERING_MODE_CONFIG);
+ cl_reloc(vc4, &vc4->rcl, render_tex->bo, render_surf->offset);
+ cl_u16(&vc4->rcl, width);
+ cl_u16(&vc4->rcl, height);
+ cl_u16(&vc4->rcl, ((render_surf->tiling <<
+ VC4_RENDER_CONFIG_MEMORY_FORMAT_SHIFT) |
+ (vc4_rt_format_is_565(render_surf->base.format) ?
+ VC4_RENDER_CONFIG_FORMAT_BGR565 :
+ VC4_RENDER_CONFIG_FORMAT_RGBA8888) |
+ VC4_RENDER_CONFIG_EARLY_Z_COVERAGE_DISABLE));
+
+ /* The tile buffer normally gets cleared when the previous tile is
+ * stored. If the clear values changed between frames, then the tile
+ * buffer has stale clear values in it, so we have to do a store in
+ * None mode (no writes) so that we trigger the tile buffer clear.
+ *
+ * Excess clearing is only a performance cost, since per-tile contents
+ * will be loaded/stored in the loop below.
+ */
+ if (vc4->cleared & (PIPE_CLEAR_COLOR0 |
+ PIPE_CLEAR_DEPTH |
+ PIPE_CLEAR_STENCIL)) {
+ cl_u8(&vc4->rcl, VC4_PACKET_TILE_COORDINATES);
+ cl_u8(&vc4->rcl, 0);
+ cl_u8(&vc4->rcl, 0);
+
+ cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+ cl_u16(&vc4->rcl, VC4_LOADSTORE_TILE_BUFFER_NONE);
+ cl_u32(&vc4->rcl, 0); /* no address, since we're in None mode */
+ }
+
+ uint32_t color_hindex = ctex ? vc4_gem_hindex(vc4, ctex->bo) : 0;
+ uint32_t depth_hindex = ztex ? vc4_gem_hindex(vc4, ztex->bo) : 0;
+ uint32_t tile_alloc_hindex = vc4_gem_hindex(vc4, vc4->tile_alloc);
+
+ for (int y = 0; y < ytiles; y++) {
+ for (int x = 0; x < xtiles; x++) {
+ bool end_of_frame = (x == xtiles - 1 &&
+ y == ytiles - 1);
+ bool coords_emitted = false;
+
+ /* Note that the load doesn't actually occur until the
+ * tile coords packet is processed, and only one load
+ * may be outstanding at a time.
+ */
+ if (resolve_uncleared & PIPE_CLEAR_COLOR) {
+ vc4_store_before_load(vc4, &coords_emitted);
+
+ cl_start_reloc(&vc4->rcl, 1);
+ cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+ cl_u8(&vc4->rcl,
+ VC4_LOADSTORE_TILE_BUFFER_COLOR |
+ (csurf->tiling <<
+ VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
+ cl_u8(&vc4->rcl,
+ vc4_rt_format_is_565(csurf->base.format) ?
+ VC4_LOADSTORE_TILE_BUFFER_BGR565 :
+ VC4_LOADSTORE_TILE_BUFFER_RGBA8888);
+ cl_reloc_hindex(&vc4->rcl, color_hindex,
+ csurf->offset);
+
+ vc4_tile_coordinates(vc4, x, y, &coords_emitted);
+ }
+
+ if (resolve_uncleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ vc4_store_before_load(vc4, &coords_emitted);
+
+ cl_start_reloc(&vc4->rcl, 1);
+ cl_u8(&vc4->rcl, VC4_PACKET_LOAD_TILE_BUFFER_GENERAL);
+ cl_u8(&vc4->rcl,
+ VC4_LOADSTORE_TILE_BUFFER_ZS |
+ (zsurf->tiling <<
+ VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
+ cl_u8(&vc4->rcl, 0);
+ cl_reloc_hindex(&vc4->rcl, depth_hindex,
+ zsurf->offset);
+
+ vc4_tile_coordinates(vc4, x, y, &coords_emitted);
+ }
+
+ /* Clipping depends on tile coordinates having been
+ * emitted, so make sure it's happened even if
+ * everything was cleared to start.
+ */
+ vc4_tile_coordinates(vc4, x, y, &coords_emitted);
+
+ /* Wait for the binner before jumping to the first
+ * tile's lists.
+ */
+ if (x == 0 && y == 0)
+ cl_u8(&vc4->rcl, VC4_PACKET_WAIT_ON_SEMAPHORE);
+
+ cl_start_reloc(&vc4->rcl, 1);
+ cl_u8(&vc4->rcl, VC4_PACKET_BRANCH_TO_SUB_LIST);
+ cl_reloc_hindex(&vc4->rcl, tile_alloc_hindex,
+ (y * xtiles + x) * 32);
+
+ if (vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
+ vc4_tile_coordinates(vc4, x, y, &coords_emitted);
+
+ cl_start_reloc(&vc4->rcl, 1);
+ cl_u8(&vc4->rcl, VC4_PACKET_STORE_TILE_BUFFER_GENERAL);
+ cl_u8(&vc4->rcl,
+ VC4_LOADSTORE_TILE_BUFFER_ZS |
+ (zsurf->tiling <<
+ VC4_LOADSTORE_TILE_BUFFER_FORMAT_SHIFT));
+ cl_u8(&vc4->rcl,
+ VC4_STORE_TILE_BUFFER_DISABLE_COLOR_CLEAR);
+ cl_reloc_hindex(&vc4->rcl, depth_hindex,
+ zsurf->offset |
+ ((end_of_frame &&
+ !(vc4->resolve & PIPE_CLEAR_COLOR0)) ?
+ VC4_LOADSTORE_TILE_BUFFER_EOF : 0));
+
+ coords_emitted = false;
+ }
+
+ if (vc4->resolve & PIPE_CLEAR_COLOR0) {
+ vc4_tile_coordinates(vc4, x, y, &coords_emitted);
+ if (end_of_frame) {
+ cl_u8(&vc4->rcl,
+ VC4_PACKET_STORE_MS_TILE_BUFFER_AND_EOF);
+ } else {
+ cl_u8(&vc4->rcl,
+ VC4_PACKET_STORE_MS_TILE_BUFFER);