From: Kenneth Graunke Date: Fri, 6 Apr 2018 07:05:24 +0000 (-0700) Subject: iris: rewrite to use memzones and not relocs X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=651be7cf3d8887a5bb84ef70fa67719fef2dc548;p=mesa.git iris: rewrite to use memzones and not relocs --- diff --git a/src/gallium/drivers/iris/iris_batch.c b/src/gallium/drivers/iris/iris_batch.c index 7602da27f37..52e44d9bb04 100644 --- a/src/gallium/drivers/iris/iris_batch.c +++ b/src/gallium/drivers/iris/iris_batch.c @@ -82,15 +82,6 @@ uint_key_hash(const void *key) return (uintptr_t) key; } -static void -init_reloc_list(struct iris_reloc_list *rlist, int count) -{ - rlist->reloc_count = 0; - rlist->reloc_array_size = count; - rlist->relocs = malloc(rlist->reloc_array_size * - sizeof(struct drm_i915_gem_relocation_entry)); -} - static void create_batch_buffer(struct iris_bufmgr *bufmgr, struct iris_batch_buffer *buf, @@ -116,9 +107,6 @@ iris_init_batch(struct iris_batch *batch, assert(util_bitcount(ring) == 1); batch->ring = ring; - init_reloc_list(&batch->cmdbuf.relocs, 256); - init_reloc_list(&batch->statebuf.relocs, 256); - batch->exec_count = 0; batch->exec_array_size = 100; batch->exec_bos = @@ -189,12 +177,6 @@ iris_batch_reset(struct iris_batch *batch) batch->last_cmd_bo = batch->cmdbuf.bo; create_batch_buffer(bufmgr, &batch->cmdbuf, "command buffer", BATCH_SZ); - create_batch_buffer(bufmgr, &batch->statebuf, "state buffer", STATE_SZ); - - /* Avoid making 0 a valid state offset - otherwise the decoder will try - * and decode data when we use offset 0 as a null pointer. - */ - batch->statebuf.map_next += 1; add_exec_bo(batch, batch->cmdbuf.bo); assert(batch->cmdbuf.bo->index == 0); @@ -220,10 +202,6 @@ free_batch_buffer(struct iris_batch_buffer *buf) buf->bo = NULL; buf->map = NULL; buf->map_next = NULL; - - free(buf->relocs.relocs); - buf->relocs.relocs = NULL; - buf->relocs.reloc_array_size = 0; } void @@ -235,7 +213,6 @@ iris_batch_free(struct iris_batch *batch) free(batch->exec_bos); free(batch->validation_list); free_batch_buffer(&batch->cmdbuf); - free_batch_buffer(&batch->statebuf); iris_bo_unreference(batch->last_cmd_bo); @@ -406,19 +383,6 @@ iris_require_command_space(struct iris_batch *batch, unsigned size) require_buffer_space(batch, &batch->cmdbuf, size, BATCH_SZ, MAX_BATCH_SIZE); } -/** - * Reserve some space in the statebuffer, or flush. - * - * This is used to estimate when we're near the end of the batch, - * so we can flush early. - */ -void -iris_require_state_space(struct iris_batch *batch, unsigned size) -{ - require_buffer_space(batch, &batch->statebuf, size, STATE_SZ, - MAX_STATE_SIZE); -} - void iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size) { @@ -456,7 +420,6 @@ static int submit_batch(struct iris_batch *batch, int in_fence_fd, int *out_fence_fd) { iris_bo_unmap(batch->cmdbuf.bo); - iris_bo_unmap(batch->statebuf.bo); /* The requirement for using I915_EXEC_NO_RELOC are: * @@ -470,23 +433,6 @@ submit_batch(struct iris_batch *batch, int in_fence_fd, int *out_fence_fd) * To avoid stalling, execobject.offset should match the current * address of that object within the active context. */ - /* Set statebuffer relocations */ - const unsigned state_index = batch->statebuf.bo->index; - if (state_index < batch->exec_count && - batch->exec_bos[state_index] == batch->statebuf.bo) { - struct drm_i915_gem_exec_object2 *entry = - &batch->validation_list[state_index]; - assert(entry->handle == batch->statebuf.bo->gem_handle); - entry->relocation_count = batch->statebuf.relocs.reloc_count; - entry->relocs_ptr = (uintptr_t) batch->statebuf.relocs.relocs; - } - - /* Set batchbuffer relocations */ - struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[0]; - assert(entry->handle == batch->cmdbuf.bo->gem_handle); - entry->relocation_count = batch->cmdbuf.relocs.reloc_count; - entry->relocs_ptr = (uintptr_t) batch->cmdbuf.relocs.relocs; - struct drm_i915_gem_execbuffer2 execbuf = { .buffers_ptr = (uintptr_t) batch->validation_list, .buffer_count = batch->exec_count, @@ -568,16 +514,12 @@ _iris_batch_flush_fence(struct iris_batch *batch, if (unlikely(INTEL_DEBUG & (DEBUG_BATCH | DEBUG_SUBMIT))) { int bytes_for_commands = buffer_bytes_used(&batch->cmdbuf); - int bytes_for_state = buffer_bytes_used(&batch->statebuf); - fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%) (pkt)," - " %5db (%0.1f%%) (state), %4d BOs (%0.1fMb aperture)," - " %4d batch relocs, %4d state relocs\n", file, line, + fprintf(stderr, "%19s:%-3d: Batchbuffer flush with %5db (%0.1f%%), " + "%4d BOs (%0.1fMb aperture)\n", + file, line, bytes_for_commands, 100.0f * bytes_for_commands / BATCH_SZ, - bytes_for_state, 100.0f * bytes_for_state / STATE_SZ, batch->exec_count, - (float) batch->aperture_space / (1024 * 1024), - batch->cmdbuf.relocs.reloc_count, - batch->statebuf.relocs.reloc_count); + (float) batch->aperture_space / (1024 * 1024)); } int ret = submit_batch(batch, in_fence_fd, out_fence_fd); @@ -603,13 +545,9 @@ _iris_batch_flush_fence(struct iris_batch *batch, iris_bo_unreference(batch->exec_bos[i]); batch->exec_bos[i] = NULL; } - batch->cmdbuf.relocs.reloc_count = 0; - batch->statebuf.relocs.reloc_count = 0; batch->exec_count = 0; batch->aperture_space = 0; - iris_bo_unreference(batch->statebuf.bo); - /* Start a new batch buffer. */ iris_batch_reset_and_clear_render_cache(batch); @@ -630,46 +568,8 @@ iris_batch_references(struct iris_batch *batch, struct iris_bo *bo) return false; } -/* This is the only way buffers get added to the validate list. +/* This is the only way buffers get added to the validate list. */ -static uint64_t -emit_reloc(struct iris_batch *batch, - struct iris_reloc_list *rlist, uint32_t offset, - struct iris_bo *target, uint32_t target_offset, - unsigned int reloc_flags) -{ - assert(target != NULL); - - unsigned int index = add_exec_bo(batch, target); - struct drm_i915_gem_exec_object2 *entry = &batch->validation_list[index]; - - if (target->kflags & EXEC_OBJECT_PINNED) { - assert(entry->offset == target->gtt_offset); - return entry->offset + target_offset; - } - - if (rlist->reloc_count == rlist->reloc_array_size) { - rlist->reloc_array_size *= 2; - rlist->relocs = realloc(rlist->relocs, - rlist->reloc_array_size * - sizeof(struct drm_i915_gem_relocation_entry)); - } - - rlist->relocs[rlist->reloc_count++] = - (struct drm_i915_gem_relocation_entry) { - .offset = offset, - .delta = target_offset, - .target_handle = index, - .presumed_offset = entry->offset, - }; - - /* Using the old buffer offset, write in what the right data would be, in - * case the buffer doesn't move and we can short-circuit the relocation - * processing in the kernel - */ - return entry->offset + target_offset; -} - void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo) { @@ -677,79 +577,6 @@ iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo) add_exec_bo(batch, bo); } -uint64_t -iris_batch_reloc(struct iris_batch *batch, uint32_t batch_offset, - struct iris_bo *target, uint32_t target_offset, - unsigned int reloc_flags) -{ - assert(batch_offset <= batch->cmdbuf.bo->size - sizeof(uint32_t)); - - return emit_reloc(batch, &batch->cmdbuf.relocs, batch_offset, - target, target_offset, reloc_flags); -} - -uint64_t -iris_state_reloc(struct iris_batch *batch, uint32_t state_offset, - struct iris_bo *target, uint32_t target_offset, - unsigned int reloc_flags) -{ - assert(state_offset <= batch->statebuf.bo->size - sizeof(uint32_t)); - - return emit_reloc(batch, &batch->statebuf.relocs, state_offset, - target, target_offset, reloc_flags); -} - - -static uint32_t -iris_state_entry_size(struct iris_batch *batch, uint32_t offset) -{ - struct hash_entry *entry = - _mesa_hash_table_search(batch->state_sizes, (void *)(uintptr_t) offset); - return entry ? (uintptr_t) entry->data : 0; -} - -/** - * Allocates a block of space in the batchbuffer for indirect state. - */ -void * -iris_alloc_state(struct iris_batch *batch, - int size, int alignment, - uint32_t *out_offset) -{ - assert(size < batch->statebuf.bo->size); - - const unsigned existing_bytes = buffer_bytes_used(&batch->statebuf); - unsigned aligned_size = - ALIGN(existing_bytes, alignment) - existing_bytes + size; - - require_buffer_space(batch, &batch->statebuf, aligned_size, - STATE_SZ, MAX_STATE_SIZE); - - unsigned offset = ALIGN(buffer_bytes_used(&batch->statebuf), alignment); - - if (unlikely(batch->state_sizes)) { - _mesa_hash_table_insert(batch->state_sizes, - (void *) (uintptr_t) offset, - (void *) (uintptr_t) size); - } - - batch->statebuf.map_next += aligned_size; - - *out_offset = offset; - return batch->statebuf.map + offset; -} - -uint32_t -iris_emit_state(struct iris_batch *batch, - const void *data, - int size, int alignment) -{ - uint32_t out_offset; - void *dest = iris_alloc_state(batch, size, alignment, &out_offset); - memcpy(dest, data, size); - return out_offset; -} - static void decode_batch(struct iris_batch *batch) { diff --git a/src/gallium/drivers/iris/iris_batch.h b/src/gallium/drivers/iris/iris_batch.h index bb891c68263..771fb48ced1 100644 --- a/src/gallium/drivers/iris/iris_batch.h +++ b/src/gallium/drivers/iris/iris_batch.h @@ -39,13 +39,7 @@ struct iris_address { struct iris_bo *bo; unsigned reloc_flags; - uint32_t offset; -}; - -struct iris_reloc_list { - struct drm_i915_gem_relocation_entry *relocs; - int reloc_count; - int reloc_array_size; + uint64_t offset; }; struct iris_batch_buffer { @@ -55,8 +49,6 @@ struct iris_batch_buffer { struct iris_bo *partial_bo; unsigned partial_bytes; - - struct iris_reloc_list relocs; }; struct iris_batch { @@ -65,8 +57,6 @@ struct iris_batch { /** Current batchbuffer being queued up. */ struct iris_batch_buffer cmdbuf; - /** Current statebuffer being queued up. */ - struct iris_batch_buffer statebuf; /** Last BO submitted to the hardware. Used for glFinish(). */ struct iris_bo *last_cmd_bo; @@ -99,12 +89,7 @@ void iris_init_batch(struct iris_batch *batch, uint8_t ring); void iris_batch_free(struct iris_batch *batch); void iris_require_command_space(struct iris_batch *batch, unsigned size); -void iris_require_state_space(struct iris_batch *batch, unsigned size); void iris_batch_emit(struct iris_batch *batch, const void *data, unsigned size); -uint32_t iris_emit_state(struct iris_batch *batch, const void *data, int size, - int alignment); -void *iris_alloc_state(struct iris_batch *batch, int size, int alignment, - uint32_t *out_offset); int _iris_batch_flush_fence(struct iris_batch *batch, int in_fence_fd, int *out_fence_fd, @@ -123,15 +108,4 @@ bool iris_batch_references(struct iris_batch *batch, struct iris_bo *bo); void iris_use_pinned_bo(struct iris_batch *batch, struct iris_bo *bo); -uint64_t iris_batch_reloc(struct iris_batch *batch, - uint32_t batch_offset, - struct iris_bo *target, - uint32_t target_offset, - unsigned flags); - -uint64_t iris_state_reloc(struct iris_batch *batch, - uint32_t batch_offset, - struct iris_bo *target, - uint32_t target_offset, - unsigned flags); #endif diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index 3208fcfb0cc..7042cb0e496 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -57,11 +57,11 @@ static uint64_t __gen_combine_address(struct iris_batch *batch, void *location, struct iris_address addr, uint32_t delta) { - if (addr.bo == NULL) - return addr.offset + delta; + // XXX: reloc flags? + if (addr.bo) + iris_use_pinned_bo(batch, addr.bo); - return iris_batch_reloc(batch, location - batch->cmdbuf.map, addr.bo, - addr.offset + delta, addr.reloc_flags); + return addr.offset + delta; } #define __genxml_cmd_length(cmd) cmd ## _length @@ -105,26 +105,6 @@ get_command_space(struct iris_batch *batch, unsigned bytes) VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dwords)); \ } while (0) -#define iris_emit_with_addr(batch, dwords, num_dw, addr_field, addr) \ - do { \ - STATIC_ASSERT((GENX(addr_field) % 64) == 0); \ - assert(num_dw <= ARRAY_SIZE(dwords)); \ - int addr_idx = GENX(addr_field) / 32; \ - uint32_t *dw = get_command_space(batch, 4 * num_dw); \ - for (uint32_t i = 0; i < addr_idx; i++) { \ - dw[i] = (dwords)[i]; \ - } \ - uint64_t *qw = (uint64_t *) &dw[addr_idx]; \ - *qw = iris_batch_reloc(batch, (void *)qw - batch->cmdbuf.map, \ - addr.bo, \ - addr.offset + (dwords)[addr_idx + 1], \ - addr.reloc_flags); \ - for (uint32_t i = addr_idx + 1; i < num_dw; i++) { \ - dw[i] = (dwords)[i]; \ - } \ - VG(VALGRIND_CHECK_MEM_IS_DEFINED(dw, num_dw * 4)); \ - } while (0) - #include "genxml/genX_pack.h" #include "genxml/gen_macros.h" #include "genxml/genX_bits.h" @@ -290,11 +270,44 @@ translate_fill_mode(unsigned pipe_polymode) } static struct iris_address -ro_bo(struct iris_bo *bo, uint32_t offset) +ro_bo(struct iris_bo *bo, uint64_t offset) { return (struct iris_address) { .bo = bo, .offset = offset }; } +static uint32_t * +stream_state(struct iris_batch *batch, + struct u_upload_mgr *uploader, + unsigned size, + unsigned alignment, + unsigned *out_offset) +{ + struct pipe_resource *res = NULL; + void *ptr = NULL; + + u_upload_alloc(uploader, 0, size, alignment, out_offset, &res, &ptr); + iris_use_pinned_bo(batch, ((struct iris_resource *) res)->bo); + pipe_resource_reference(&res, NULL); + + return ptr; +} + +static uint32_t +emit_state(struct iris_batch *batch, + struct u_upload_mgr *uploader, + const void *data, + unsigned size, + unsigned alignment) +{ + unsigned offset = 0; + uint32_t *map = stream_state(batch, uploader, size, alignment, &offset); + + if (map) + memcpy(map, data, size); + + return offset; +} + static void iris_emit_state_base_address(struct iris_batch *batch) { @@ -323,13 +336,13 @@ iris_emit_state_base_address(struct iris_batch *batch) sba.IndirectObjectBufferSizeModifyEnable = true; sba.InstructionBuffersizeModifyEnable = true; - sba.SurfaceStateBaseAddress = ro_bo(batch->statebuf.bo, 0); - sba.DynamicStateBaseAddress = ro_bo(batch->statebuf.bo, 0); + sba.SurfaceStateBaseAddress = ro_bo(NULL, 1ull << 32); + sba.DynamicStateBaseAddress = ro_bo(NULL, 2 * (1ull << 32)); sba.GeneralStateBufferSize = 0xfffff; sba.IndirectObjectBufferSize = 0xfffff; sba.InstructionBufferSize = 0xfffff; - sba.DynamicStateBufferSize = ALIGN(MAX_STATE_SIZE, 4096); + sba.DynamicStateBufferSize = 0xfffff; } } @@ -1806,32 +1819,6 @@ static const uint32_t push_constant_opcodes[] = { [MESA_SHADER_COMPUTE] = 0, }; -static uint32_t -emit_patched_surface_state(struct iris_batch *batch, - uint32_t *surface_state, - const struct iris_resource *res, - unsigned reloc_flags) -{ - const int num_dwords = GENX(RENDER_SURFACE_STATE_length); - uint32_t offset; - uint32_t *dw = iris_alloc_state(batch, 4 * num_dwords, 64, &offset); - - STATIC_ASSERT(GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) % 32 == 0); - int addr_idx = GENX(RENDER_SURFACE_STATE_SurfaceBaseAddress_start) / 32; - for (uint32_t i = 0; i < addr_idx; i++) - dw[i] = surface_state[i]; - - uint64_t *qw = (uint64_t *) &dw[addr_idx]; - // XXX: mt->offset, if needed - *qw = iris_state_reloc(batch, (void *)qw - batch->statebuf.map, res->bo, - surface_state[addr_idx + 1], reloc_flags); - - for (uint32_t i = addr_idx + 1; i < num_dwords; i++) - dw[i] = surface_state[i]; - - return offset; -} - static void iris_upload_render_state(struct iris_context *ice, struct iris_batch *batch, @@ -1846,7 +1833,8 @@ iris_upload_render_state(struct iris_context *ice, struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_CC), ptr) { ptr.CCViewportPointer = - iris_emit_state(batch, cso->cc_vp, sizeof(cso->cc_vp), 32); + emit_state(batch, ice->state.dynamic_uploader, + cso->cc_vp, sizeof(cso->cc_vp), 32); } } @@ -1854,9 +1842,9 @@ iris_upload_render_state(struct iris_context *ice, struct iris_viewport_state *cso = ice->state.cso_vp; iris_emit_cmd(batch, GENX(3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP), ptr) { ptr.SFClipViewportPointer = - iris_emit_state(batch, cso->sf_cl_vp, - 4 * GENX(SF_CLIP_VIEWPORT_length) * - ice->state.num_viewports, 64); + emit_state(batch, ice->state.dynamic_uploader, cso->sf_cl_vp, + 4 * GENX(SF_CLIP_VIEWPORT_length) * + ice->state.num_viewports, 64); } } @@ -1874,7 +1862,8 @@ iris_upload_render_state(struct iris_context *ice, cso_fb->nr_cbufs * GENX(BLEND_STATE_ENTRY_length)); uint32_t blend_offset; uint32_t *blend_map = - iris_alloc_state(batch, num_dwords, 64, &blend_offset); + stream_state(batch, ice->state.dynamic_uploader, 4 * num_dwords, 64, + &blend_offset); uint32_t blend_state_header; iris_pack_state(GENX(BLEND_STATE), &blend_state_header, bs) { @@ -1896,9 +1885,9 @@ iris_upload_render_state(struct iris_context *ice, struct iris_depth_stencil_alpha_state *cso = ice->state.cso_zsa; uint32_t cc_offset; void *cc_map = - iris_alloc_state(batch, - sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), - 64, &cc_offset); + stream_state(batch, ice->state.dynamic_uploader, + sizeof(uint32_t) * GENX(COLOR_CALC_STATE_length), + 64, &cc_offset); iris_pack_state(GENX(COLOR_CALC_STATE), cc_map, cc) { cc.AlphaTestFormat = ALPHATEST_FLOAT32; cc.AlphaReferenceValueAsFLOAT32 = cso->alpha.ref_value; @@ -1966,8 +1955,9 @@ iris_upload_render_state(struct iris_context *ice, uint32_t *bt_map = NULL; if (prog_data->binding_table.size_bytes != 0) { - bt_map = iris_alloc_state(batch, prog_data->binding_table.size_bytes, - 64, &bt_offset); + bt_map = stream_state(batch, ice->state.surface_uploader, + prog_data->binding_table.size_bytes, + 64, &bt_offset); } iris_emit_cmd(batch, GENX(3DSTATE_BINDING_TABLE_POINTERS_VS), ptr) { @@ -1983,9 +1973,10 @@ iris_upload_render_state(struct iris_context *ice, for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { struct iris_surface *surf = (void *) cso_fb->cbufs[i]; struct iris_resource *res = (void *) surf->pipe.texture; - - *bt_map++ = emit_patched_surface_state(batch, surf->surface_state, - res, RELOC_WRITE); + *bt_map++ = + emit_state(batch, ice->state.surface_uploader, + surf->surface_state, + 4 * GENX(RENDER_SURFACE_STATE_length), 64); } } @@ -1996,7 +1987,6 @@ iris_upload_render_state(struct iris_context *ice, // XXX: these are per-context??????????? pipe_sampler_view::context *bt_map++ = emit_patched_surface_state(batch, view->surface_state, res, 0); - } // XXX: not implemented yet @@ -2019,9 +2009,9 @@ iris_upload_render_state(struct iris_context *ice, const int count = IRIS_MAX_TEXTURE_SAMPLERS; uint32_t offset; - uint32_t *map = iris_alloc_state(batch, - count * 4 * GENX(SAMPLER_STATE_length), - 32, &offset); + uint32_t *map = stream_state(batch, ice->state.dynamic_uploader, + count * 4 * GENX(SAMPLER_STATE_length), + 32, &offset); for (int i = 0; i < count; i++) { // XXX: when we have a correct count, these better be bound @@ -2169,9 +2159,9 @@ iris_upload_render_state(struct iris_context *ice, if (dirty & IRIS_DIRTY_SCISSOR) { uint32_t scissor_offset = - iris_emit_state(batch, ice->state.scissors, - sizeof(struct pipe_scissor_state) * - ice->state.num_scissors, 32); + emit_state(batch, ice->state.dynamic_uploader, ice->state.scissors, + sizeof(struct pipe_scissor_state) * + ice->state.num_scissors, 32); iris_emit_cmd(batch, GENX(3DSTATE_SCISSOR_STATE_POINTERS), ptr) { ptr.ScissorRectPointer = scissor_offset; @@ -2231,9 +2221,8 @@ iris_upload_render_state(struct iris_context *ice, sizeof(uint32_t) * (1 + 4 * cso->num_buffers)); for (unsigned i = 0; i < cso->num_buffers; i++) { - *addr = iris_batch_reloc(batch, (void *) addr - batch->cmdbuf.map, - cso->bos[i].bo, cso->bos[i].offset + - *delta, cso->bos[i].reloc_flags); + iris_use_pinned_bo(batch, cso->bos[i].bo); + *addr = cso->bos[i].offset + *delta; addr = (void *) addr + 16; delta = (void *) delta + 16; }