From 9fc672428d7247647d864110e907dbef6ac80cc1 Mon Sep 17 00:00:00 2001 From: Kenneth Graunke Date: Thu, 26 Jul 2018 21:59:20 -0700 Subject: [PATCH] iris: little bits of compute basics --- src/gallium/drivers/iris/iris_binder.c | 18 +++ src/gallium/drivers/iris/iris_binder.h | 1 + src/gallium/drivers/iris/iris_context.c | 2 + src/gallium/drivers/iris/iris_context.h | 15 ++ src/gallium/drivers/iris/iris_draw.c | 30 ++++ src/gallium/drivers/iris/iris_program.c | 53 ++++++ src/gallium/drivers/iris/iris_screen.c | 4 + src/gallium/drivers/iris/iris_screen.h | 2 + src/gallium/drivers/iris/iris_state.c | 207 +++++++++++++++++++++++- 9 files changed, 325 insertions(+), 7 deletions(-) diff --git a/src/gallium/drivers/iris/iris_binder.c b/src/gallium/drivers/iris/iris_binder.c index 57c2865a7bc..ca60287df3f 100644 --- a/src/gallium/drivers/iris/iris_binder.c +++ b/src/gallium/drivers/iris/iris_binder.c @@ -178,6 +178,24 @@ iris_binder_reserve_3d(struct iris_context *ice) } } +void +iris_binder_reserve_compute(struct iris_context *ice) +{ + if (!(ice->state.dirty & IRIS_DIRTY_BINDINGS_CS)) + return; + + struct iris_binder *binder = &ice->state.binder; + struct brw_stage_prog_data *prog_data = + ice->shaders.prog[MESA_SHADER_COMPUTE]->prog_data; + + unsigned size = prog_data->binding_table.size_bytes; + + if (size == 0) + return; + + binder->bt_offset[MESA_SHADER_COMPUTE] = iris_binder_reserve(ice, size); +} + void iris_init_binder(struct iris_context *ice) { diff --git a/src/gallium/drivers/iris/iris_binder.h b/src/gallium/drivers/iris/iris_binder.h index e63170e298f..78449286c6d 100644 --- a/src/gallium/drivers/iris/iris_binder.h +++ b/src/gallium/drivers/iris/iris_binder.h @@ -53,5 +53,6 @@ void iris_init_binder(struct iris_context *ice); void iris_destroy_binder(struct iris_binder *binder); uint32_t iris_binder_reserve(struct iris_context *ice, unsigned size); void iris_binder_reserve_3d(struct iris_context *ice); +void iris_binder_reserve_compute(struct iris_context *ice); #endif diff --git a/src/gallium/drivers/iris/iris_context.c b/src/gallium/drivers/iris/iris_context.c index 73d6b0d806d..6a38b3b4a85 100644 --- a/src/gallium/drivers/iris/iris_context.c +++ b/src/gallium/drivers/iris/iris_context.c @@ -208,6 +208,8 @@ iris_create_context(struct pipe_screen *pscreen, void *priv, unsigned flags) genX_call(devinfo, init_blorp, ice); ice->vtbl.init_render_context(screen, &ice->render_batch, &ice->vtbl, &ice->dbg); + ice->vtbl.init_compute_context(screen, &ice->compute_batch, &ice->vtbl, + &ice->dbg); return ctx; } diff --git a/src/gallium/drivers/iris/iris_context.h b/src/gallium/drivers/iris/iris_context.h index 1f553d71fa2..87c7f144ec7 100644 --- a/src/gallium/drivers/iris/iris_context.h +++ b/src/gallium/drivers/iris/iris_context.h @@ -277,11 +277,18 @@ struct iris_vtable { struct iris_batch *batch, struct iris_vtable *vtbl, struct pipe_debug_callback *dbg); + void (*init_compute_context)(struct iris_screen *screen, + struct iris_batch *batch, + struct iris_vtable *vtbl, + struct pipe_debug_callback *dbg); void (*upload_render_state)(struct iris_context *ice, struct iris_batch *batch, const struct pipe_draw_info *draw); void (*update_surface_base_address)(struct iris_batch *batch, struct iris_binder *binder); + void (*upload_compute_state)(struct iris_context *ice, + struct iris_batch *batch, + const struct pipe_grid_info *grid); void (*load_register_imm32)(struct iris_batch *batch, uint32_t reg, uint32_t val); void (*load_register_imm64)(struct iris_batch *batch, uint32_t reg, @@ -326,6 +333,8 @@ struct iris_vtable { struct brw_gs_prog_key *key); void (*populate_fs_key)(const struct iris_context *ice, struct brw_wm_prog_key *key); + void (*populate_cs_key)(const struct iris_context *ice, + struct brw_cs_prog_key *key); }; /** @@ -363,6 +372,9 @@ struct iris_context { /** The main batch for rendering. */ struct iris_batch render_batch; + /** The batch for compute shader dispatch */ + struct iris_batch compute_batch; + struct { struct iris_uncompiled_shader *uncompiled[MESA_SHADER_STAGES]; struct iris_compiled_shader *prog[MESA_SHADER_STAGES]; @@ -471,6 +483,8 @@ void iris_init_program_functions(struct pipe_context *ctx); void iris_init_resource_functions(struct pipe_context *ctx); void iris_init_query_functions(struct pipe_context *ctx); void iris_update_compiled_shaders(struct iris_context *ice); +void iris_update_compiled_compute_shader(struct iris_context *ice); + /* iris_blit.c */ void iris_blorp_surf_for_resource(struct blorp_surf *surf, @@ -481,6 +495,7 @@ void iris_blorp_surf_for_resource(struct blorp_surf *surf, /* iris_draw.c */ void iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info); +void iris_launch_grid(struct pipe_context *, const struct pipe_grid_info *); /* iris_pipe_control.c */ diff --git a/src/gallium/drivers/iris/iris_draw.c b/src/gallium/drivers/iris/iris_draw.c index 0567bbac72e..196f8d2caf4 100644 --- a/src/gallium/drivers/iris/iris_draw.c +++ b/src/gallium/drivers/iris/iris_draw.c @@ -74,7 +74,10 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) iris_batch_maybe_flush(batch, 1500); + // XXX: check if BOs are in use by the other batches (compute), if so flush + iris_update_draw_info(ice, info); + iris_update_compiled_shaders(ice); iris_predraw_resolve_inputs(ice, batch); @@ -89,3 +92,30 @@ iris_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *info) iris_postdraw_update_resolve_tracking(ice, batch); } + +void +iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) +{ + struct iris_context *ice = (struct iris_context *) ctx; + struct iris_batch *batch = &ice->compute_batch; + + if (unlikely(INTEL_DEBUG & DEBUG_REEMIT)) + ice->state.dirty |= ~0ull; + + iris_batch_maybe_flush(batch, 1500); + + // XXX: check if BOs are in use by the other batches (render), if so flush + // + //if (dirty & IRIS_DIRTY_UNCOMPILED_CS) + iris_update_compiled_compute_shader(ice); + + // XXX: predraw resolves / cache flushing + + iris_binder_reserve_compute(ice); + ice->vtbl.update_surface_base_address(batch, &ice->state.binder); + ice->vtbl.upload_compute_state(ice, batch, info); + + ice->state.dirty = 0ull; + + // XXX: postdraw resolve tracking +} diff --git a/src/gallium/drivers/iris/iris_program.c b/src/gallium/drivers/iris/iris_program.c index ba09c087d69..a62b2d0fb42 100644 --- a/src/gallium/drivers/iris/iris_program.c +++ b/src/gallium/drivers/iris/iris_program.c @@ -214,6 +214,7 @@ iris_create_uncompiled_shader(struct pipe_context *ctx, } // XXX: precompile! + // XXX: disallow more than 64KB of shared variables return ish; } @@ -1004,6 +1005,58 @@ iris_update_compiled_shaders(struct iris_context *ice) } } +static bool +iris_compile_cs(struct iris_context *ice, + struct iris_uncompiled_shader *ish, + const struct brw_cs_prog_key *key) +{ + struct iris_screen *screen = (struct iris_screen *)ice->ctx.screen; + const struct brw_compiler *compiler = screen->compiler; + const struct gen_device_info *devinfo = &screen->devinfo; + void *mem_ctx = ralloc_context(NULL); + struct brw_cs_prog_data *cs_prog_data = + rzalloc(mem_ctx, struct brw_cs_prog_data); + struct brw_stage_prog_data *prog_data = &cs_prog_data->base; + + nir_shader *nir = ish->nir; + + cs_prog_data->binding_table.work_groups_start = 0; + assign_common_binding_table_offsets(devinfo, nir, prog_data, 1); + + iris_setup_uniforms(compiler, mem_ctx, nir, prog_data); + + char *error_str = NULL; + const unsigned *program = + brw_compile_cs(compiler, &ice->dbg, mem_ctx, key, cs_prog_data, + nir, -1, &error_str); + if (program == NULL) { + dbg_printf("Failed to compile compute shader: %s\n", error_str); + ralloc_free(mem_ctx); + return false; + } + + iris_upload_and_bind_shader(ice, IRIS_CACHE_CS, key, program, prog_data, + NULL); + + ralloc_free(mem_ctx); + return true; +} + +void +iris_update_compiled_compute_shader(struct iris_context *ice) +{ + struct iris_uncompiled_shader *ish = + ice->shaders.uncompiled[MESA_SHADER_COMPUTE]; + + struct brw_cs_prog_key key = { .program_string_id = ish->program_id }; + ice->vtbl.populate_cs_key(ice, &key); + + if (iris_bind_cached_shader(ice, IRIS_CACHE_CS, &key)) + return; + + UNUSED bool success = iris_compile_cs(ice, ish, &key); +} + void iris_init_program_functions(struct pipe_context *ctx) { diff --git a/src/gallium/drivers/iris/iris_screen.c b/src/gallium/drivers/iris/iris_screen.c index 1f336adc20b..5e27acc54b1 100644 --- a/src/gallium/drivers/iris/iris_screen.c +++ b/src/gallium/drivers/iris/iris_screen.c @@ -587,6 +587,10 @@ iris_screen_create(int fd) slab_create_parent(&screen->transfer_pool, sizeof(struct iris_transfer), 64); + screen->subslice_total = + iris_getparam_integer(screen, I915_PARAM_SUBSLICE_TOTAL); + assert(screen->subslice_total >= 1); + struct pipe_screen *pscreen = &screen->base; iris_init_screen_resource_functions(pscreen); diff --git a/src/gallium/drivers/iris/iris_screen.h b/src/gallium/drivers/iris/iris_screen.h index 463b191d131..aa510efed18 100644 --- a/src/gallium/drivers/iris/iris_screen.h +++ b/src/gallium/drivers/iris/iris_screen.h @@ -51,6 +51,8 @@ struct iris_screen { /** Global program_string_id counter (see get_program_string_id()) */ unsigned program_id; + unsigned subslice_total; + struct gen_device_info devinfo; struct isl_device isl_dev; struct iris_bufmgr *bufmgr; diff --git a/src/gallium/drivers/iris/iris_state.c b/src/gallium/drivers/iris/iris_state.c index b20a608f387..189bd5e2380 100644 --- a/src/gallium/drivers/iris/iris_state.c +++ b/src/gallium/drivers/iris/iris_state.c @@ -610,6 +610,54 @@ iris_init_render_context(struct iris_screen *screen, } } +static void +iris_init_compute_context(struct iris_screen *screen, + struct iris_batch *batch, + struct iris_vtable *vtbl, + struct pipe_debug_callback *dbg) +{ + iris_init_batch(batch, screen, vtbl, dbg, I915_EXEC_RENDER); + + /* XXX: PIPE_CONTROLs */ + + iris_emit_cmd(batch, GENX(PIPELINE_SELECT), sel) { + sel.PipelineSelection = GPGPU; + } + + iris_emit_cmd(batch, GENX(STATE_BASE_ADDRESS), sba) { + #if 0 + // XXX: MOCS is stupid for this. + sba.GeneralStateMemoryObjectControlState = MOCS_WB; + sba.StatelessDataPortAccessMemoryObjectControlState = MOCS_WB; + sba.SurfaceStateMemoryObjectControlState = MOCS_WB; + sba.DynamicStateMemoryObjectControlState = MOCS_WB; + sba.IndirectObjectMemoryObjectControlState = MOCS_WB; + sba.InstructionMemoryObjectControlState = MOCS_WB; + sba.BindlessSurfaceStateMemoryObjectControlState = MOCS_WB; + #endif + + sba.GeneralStateBaseAddressModifyEnable = true; + sba.SurfaceStateBaseAddressModifyEnable = true; + sba.DynamicStateBaseAddressModifyEnable = true; + sba.IndirectObjectBaseAddressModifyEnable = true; + sba.InstructionBaseAddressModifyEnable = true; + sba.GeneralStateBufferSizeModifyEnable = true; + sba.DynamicStateBufferSizeModifyEnable = true; + sba.BindlessSurfaceStateBaseAddressModifyEnable = true; + sba.IndirectObjectBufferSizeModifyEnable = true; + sba.InstructionBuffersizeModifyEnable = true; + + sba.InstructionBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SHADER_START); + sba.SurfaceStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_SURFACE_START); + sba.DynamicStateBaseAddress = ro_bo(NULL, IRIS_MEMZONE_DYNAMIC_START); + + sba.GeneralStateBufferSize = 0xfffff; + sba.IndirectObjectBufferSize = 0xfffff; + sba.InstructionBufferSize = 0xfffff; + sba.DynamicStateBufferSize = 0xfffff; + } +} + struct iris_vertex_buffer_state { /** The 3DSTATE_VERTEX_BUFFERS hardware packet. */ uint32_t vertex_buffers[1 + 33 * GENX(VERTEX_BUFFER_STATE_length)]; @@ -646,12 +694,6 @@ struct iris_genx_state { uint32_t streamout[4 * GENX(3DSTATE_STREAMOUT_length)]; }; -// XXX: move this to iris_draw.c -static void -iris_launch_grid(struct pipe_context *ctx, const struct pipe_grid_info *info) -{ -} - /** * The pipe->set_blend_color() driver hook. * @@ -2826,6 +2868,13 @@ iris_populate_fs_key(const struct iris_context *ice, // XXX: respect hint for high_quality_derivatives:1; } +static void +iris_populate_cs_key(const struct iris_context *ice, + struct brw_cs_prog_key *key) +{ + iris_populate_sampler_key(ice, &key->tex); +} + #if 0 // XXX: these need to go in INIT_THREAD_DISPATCH_FIELDS pkt.SamplerCount = \ @@ -3074,6 +3123,26 @@ iris_store_fs_state(const struct gen_device_info *devinfo, * * This must match the data written by the iris_store_xs_state() functions. */ +static void +iris_store_cs_state(const struct gen_device_info *devinfo, + struct iris_compiled_shader *shader) +{ + struct brw_stage_prog_data *prog_data = shader->prog_data; + struct brw_cs_prog_data *cs_prog_data = (void *) shader->prog_data; + void *map = shader->derived_data; + + iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), map, desc) { + desc.KernelStartPointer = KSP(shader); + desc.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs; + desc.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads; + desc.SharedLocalMemorySize = + encode_slm_size(GEN_GEN, prog_data->total_shared); + desc.BarrierEnable = cs_prog_data->uses_barrier; + desc.CrossThreadConstantDataReadLength = + cs_prog_data->push.cross_thread.regs; + } +} + static unsigned iris_derived_program_state_size(enum iris_program_cache_id cache_id) { @@ -3086,7 +3155,7 @@ iris_derived_program_state_size(enum iris_program_cache_id cache_id) [IRIS_CACHE_GS] = GENX(3DSTATE_GS_length), [IRIS_CACHE_FS] = GENX(3DSTATE_PS_length) + GENX(3DSTATE_PS_EXTRA_length), - [IRIS_CACHE_CS] = 0, + [IRIS_CACHE_CS] = GENX(INTERFACE_DESCRIPTOR_DATA_length), [IRIS_CACHE_BLORP] = 0, }; @@ -3121,6 +3190,7 @@ iris_store_derived_program_state(const struct gen_device_info *devinfo, iris_store_fs_state(devinfo, shader); break; case IRIS_CACHE_CS: + iris_store_cs_state(devinfo, shader); case IRIS_CACHE_BLORP: break; default: @@ -4126,6 +4196,126 @@ iris_upload_render_state(struct iris_context *ice, } } +static void +iris_upload_compute_state(struct iris_context *ice, + struct iris_batch *batch, + const struct pipe_grid_info *grid) +{ + const uint64_t dirty = ice->state.dirty; + struct iris_screen *screen = batch->screen; + const struct gen_device_info *devinfo = &screen->devinfo; + struct iris_binder *binder = &ice->state.binder; + struct iris_shader_state *shs = &ice->state.shaders[MESA_SHADER_COMPUTE]; + struct iris_compiled_shader *shader = + ice->shaders.prog[MESA_SHADER_COMPUTE]; + struct brw_stage_prog_data *prog_data = shader->prog_data; + struct brw_cs_prog_data *cs_prog_data = (void *) prog_data; + + if (dirty & IRIS_DIRTY_BINDINGS_CS) + iris_populate_binding_table(ice, batch, MESA_SHADER_COMPUTE, false); + + iris_use_optional_res(batch, shs->sampler_table.res, false); + iris_use_pinned_bo(batch, iris_resource_bo(shader->assembly.res), false); + + if (ice->state.need_border_colors) + iris_use_pinned_bo(batch, ice->state.border_color_pool.bo, false); + + /* The MEDIA_VFE_STATE documentation for Gen8+ says: + * + * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless + * the only bits that are changed are scoreboard related: Scoreboard + * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For + * these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient." + */ + iris_emit_pipe_control_flush(batch, PIPE_CONTROL_CS_STALL); + + iris_emit_cmd(batch, GENX(MEDIA_VFE_STATE), vfe) { + if (prog_data->total_scratch) { + /* Per Thread Scratch Space is in the range [0, 11] where + * 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M. + */ + // XXX: vfe.ScratchSpaceBasePointer + //vfe.PerThreadScratchSpace = + //ffs(stage_state->per_thread_scratch) - 11; + } + + vfe.MaximumNumberofThreads = + devinfo->max_cs_threads * screen->subslice_total - 1; +#if GEN_GEN < 11 + vfe.ResetGatewayTimer = + Resettingrelativetimerandlatchingtheglobaltimestamp; +#endif + + vfe.NumberofURBEntries = 2; + vfe.URBEntryAllocationSize = 2; + + // XXX: Use Indirect Payload Storage? + vfe.CURBEAllocationSize = + ALIGN(cs_prog_data->push.per_thread.regs * cs_prog_data->threads + + cs_prog_data->push.cross_thread.regs, 2); + } + + // XXX: hack iris_set_constant_buffers to upload compute shader constants + // XXX: differently...? + + if (cs_prog_data->push.total.size > 0) { + iris_emit_cmd(batch, GENX(MEDIA_CURBE_LOAD), curbe) { + curbe.CURBETotalDataLength = + ALIGN(cs_prog_data->push.total.size, 64); + // XXX: curbe.CURBEDataStartAddress = stage_state->push_const_offset; + } + } + + struct pipe_resource *desc_res = NULL; + uint32_t desc[GENX(INTERFACE_DESCRIPTOR_DATA_length)]; + + iris_pack_state(GENX(INTERFACE_DESCRIPTOR_DATA), desc, idd) { + idd.SamplerStatePointer = shs->sampler_table.offset; + idd.BindingTablePointer = binder->bt_offset[MESA_SHADER_COMPUTE]; + } + + for (int i = 0; i < GENX(INTERFACE_DESCRIPTOR_DATA_length); i++) + desc[i] |= ((uint32_t *) shader->derived_data)[i]; + + iris_emit_cmd(batch, GENX(MEDIA_INTERFACE_DESCRIPTOR_LOAD), load) { + load.InterfaceDescriptorTotalLength = + GENX(INTERFACE_DESCRIPTOR_DATA_length) * sizeof(uint32_t); + load.InterfaceDescriptorDataStartAddress = + emit_state(batch, ice->state.dynamic_uploader, + &desc_res, desc, sizeof(desc), 32); + } + + pipe_resource_reference(&desc_res, NULL); + + // XXX: grid->indirect + + uint32_t group_size = grid->block[0] * grid->block[1] * grid->block[2]; + uint32_t remainder = group_size & (cs_prog_data->simd_size - 1); + uint32_t right_mask; + + if (remainder > 0) + right_mask = ~0u >> (32 - remainder); + else + right_mask = ~0u >> (32 - cs_prog_data->simd_size); + + iris_emit_cmd(batch, GENX(GPGPU_WALKER), ggw) { + ggw.SIMDSize = cs_prog_data->simd_size / 16; + ggw.ThreadDepthCounterMaximum = 0; + ggw.ThreadHeightCounterMaximum = 0; + ggw.ThreadWidthCounterMaximum = cs_prog_data->threads - 1; + ggw.ThreadGroupIDXDimension = grid->block[0]; + ggw.ThreadGroupIDYDimension = grid->block[1]; + ggw.ThreadGroupIDZDimension = grid->block[2]; + ggw.RightExecutionMask = right_mask; + ggw.BottomExecutionMask = 0xffffffff; + } + + if (!batch->contains_draw) { + //iris_restore_context_saved_bos(ice, batch, draw); + batch->contains_draw = true; + } +} + /** * State module teardown. */ @@ -4729,8 +4919,10 @@ genX(init_state)(struct iris_context *ice) ice->vtbl.destroy_state = iris_destroy_state; ice->vtbl.init_render_context = iris_init_render_context; + ice->vtbl.init_compute_context = iris_init_compute_context; ice->vtbl.upload_render_state = iris_upload_render_state; ice->vtbl.update_surface_base_address = iris_update_surface_base_address; + ice->vtbl.upload_compute_state = iris_upload_compute_state; ice->vtbl.emit_raw_pipe_control = iris_emit_raw_pipe_control; ice->vtbl.load_register_imm32 = iris_load_register_imm32; ice->vtbl.load_register_imm64 = iris_load_register_imm64; @@ -4749,6 +4941,7 @@ genX(init_state)(struct iris_context *ice) ice->vtbl.populate_tes_key = iris_populate_tes_key; ice->vtbl.populate_gs_key = iris_populate_gs_key; ice->vtbl.populate_fs_key = iris_populate_fs_key; + ice->vtbl.populate_cs_key = iris_populate_cs_key; ice->state.dirty = ~0ull; -- 2.30.2