From: Kristian Høgsberg Kristensen Date: Thu, 11 Jun 2015 22:31:42 +0000 (-0700) Subject: vk: Implement basic compute shader support X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=765175f5d195df727ed9d171720f4e843809744e;p=mesa.git vk: Implement basic compute shader support --- diff --git a/src/glsl/glsl_parser_extras.cpp b/src/glsl/glsl_parser_extras.cpp index be6713c46a2..982ade6a70a 100644 --- a/src/glsl/glsl_parser_extras.cpp +++ b/src/glsl/glsl_parser_extras.cpp @@ -87,6 +87,8 @@ _mesa_glsl_parse_state::_mesa_glsl_parse_state(struct gl_context *_ctx, this->extensions = &ctx->Extensions; + this->ARB_compute_shader_enable = true; + this->Const.MaxLights = ctx->Const.MaxLights; this->Const.MaxClipPlanes = ctx->Const.MaxClipPlanes; this->Const.MaxTextureUnits = ctx->Const.MaxTextureUnits; diff --git a/src/mesa/drivers/dri/i965/brw_cs.cpp b/src/mesa/drivers/dri/i965/brw_cs.cpp index 70731e4a4ff..2432875d0f4 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.cpp +++ b/src/mesa/drivers/dri/i965/brw_cs.cpp @@ -55,7 +55,7 @@ brw_cs_prog_data_compare(const void *in_a, const void *in_b) } -static const unsigned * +const unsigned * brw_cs_emit(struct brw_context *brw, void *mem_ctx, const struct brw_cs_prog_key *key, diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index 8404aa3e824..b83d49a0635 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -41,6 +41,15 @@ bool brw_cs_prog_data_compare(const void *a, const void *b); void brw_upload_cs_prog(struct brw_context *brw); +const unsigned * +brw_cs_emit(struct brw_context *brw, + void *mem_ctx, + const struct brw_cs_prog_key *key, + struct brw_cs_prog_data *prog_data, + struct gl_compute_program *cp, + struct gl_shader_program *prog, + unsigned *final_assembly_size); + #ifdef __cplusplus } #endif diff --git a/src/vulkan/compiler.cpp b/src/vulkan/compiler.cpp index 28c4874a438..0db7935f085 100644 --- a/src/vulkan/compiler.cpp +++ b/src/vulkan/compiler.cpp @@ -32,6 +32,7 @@ #include #include +#include #include #include @@ -603,6 +604,68 @@ really_do_gs_prog(struct brw_context *brw, return true; } +static bool +brw_codegen_cs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_compute_program *cp, + struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) +{ + struct gl_context *ctx = &brw->ctx; + const GLuint *program; + void *mem_ctx = ralloc_context(NULL); + GLuint program_size; + struct brw_cs_prog_data prog_data; + + struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + assert (cs); + + memset(&prog_data, 0, sizeof(prog_data)); + + /* Allocate the references to the uniforms that will end up in the + * prog_data associated with the compiled program, and which will be freed + * by the state cache. + */ + int param_count = cs->num_uniform_components; + + /* The backend also sometimes adds params for texture size. */ + param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; + prog_data.base.param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.pull_param = + rzalloc_array(NULL, const gl_constant_value *, param_count); + prog_data.base.nr_params = param_count; + + program = brw_cs_emit(brw, mem_ctx, key, &prog_data, + &cp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + fprintf(stderr, "\n"); + + struct anv_state cs_state = anv_state_stream_alloc(&pipeline->program_stream, + program_size, 64); + memcpy(cs_state.map, program, program_size); + + pipeline->cs_simd = cs_state.offset; + + ralloc_free(mem_ctx); + + return true; +} + +static void +brw_cs_populate_key(struct brw_context *brw, + struct brw_compute_program *bcp, struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* The unique compute program ID */ + key->program_string_id = bcp->id; +} + static void fail_on_compile_error(int status, const char *msg) { @@ -652,6 +715,22 @@ anv_compiler_create(struct anv_device *device) compiler->brw->is_baytrail = devinfo->is_baytrail; compiler->brw->is_haswell = devinfo->is_haswell; compiler->brw->is_cherryview = devinfo->is_cherryview; + + /* We need this at least for CS, which will check brw->max_cs_threads + * against the work group size. */ + compiler->brw->max_vs_threads = devinfo->max_vs_threads; + compiler->brw->max_hs_threads = devinfo->max_hs_threads; + compiler->brw->max_ds_threads = devinfo->max_ds_threads; + compiler->brw->max_gs_threads = devinfo->max_gs_threads; + compiler->brw->max_wm_threads = devinfo->max_wm_threads; + compiler->brw->max_cs_threads = devinfo->max_cs_threads; + compiler->brw->urb.size = devinfo->urb.size; + compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; + compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; + compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; + compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; + compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; + compiler->brw->intelScreen = compiler->screen; compiler->screen->devinfo = &device->info; @@ -992,6 +1071,20 @@ anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) pipeline->active_stages |= VK_SHADER_STAGE_FRAGMENT_BIT; } + if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { + struct brw_cs_prog_key cs_key; + struct gl_compute_program *cp = (struct gl_compute_program *) + program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; + struct brw_compute_program *bcp = brw_compute_program(cp); + + brw_cs_populate_key(brw, bcp, &cs_key); + + success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); + fail_if(!success, "brw_codegen_cs_prog failed\n"); + pipeline->prog_data[VK_SHADER_STAGE_COMPUTE] = &pipeline->cs_prog_data.base; + pipeline->active_stages |= VK_SHADER_STAGE_COMPUTE_BIT; + } + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); gen7_compute_urb_partition(pipeline); diff --git a/src/vulkan/device.c b/src/vulkan/device.c index e44fb2b1cd9..b27bd6d765b 100644 --- a/src/vulkan/device.c +++ b/src/vulkan/device.c @@ -2650,9 +2650,22 @@ void anv_CmdBindPipeline( struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline; - cmd_buffer->pipeline = pipeline; - cmd_buffer->vb_dirty |= pipeline->vb_used; - cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + switch (pipelineBindPoint) { + case VK_PIPELINE_BIND_POINT_COMPUTE: + cmd_buffer->compute_pipeline = pipeline; + cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; + + case VK_PIPELINE_BIND_POINT_GRAPHICS: + cmd_buffer->pipeline = pipeline; + cmd_buffer->vb_dirty |= pipeline->vb_used; + cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY; + break; + + default: + assert(!"invalid bind point"); + break; + } } void anv_CmdBindDynamicStateObject( @@ -2818,11 +2831,15 @@ void anv_CmdBindVertexBuffers( static VkResult cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, - unsigned stage) + unsigned stage, struct anv_state *bt_state) { - struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; + struct anv_pipeline_layout *layout; uint32_t color_attachments, bias, size; - struct anv_state bt_state; + + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->compute_pipeline->layout; + else + layout = cmd_buffer->pipeline->layout; if (stage == VK_SHADER_STAGE_FRAGMENT) { bias = MAX_RTS; @@ -2841,26 +2858,12 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, return VK_SUCCESS; size = (bias + surface_count) * sizeof(uint32_t); - bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); - uint32_t *bt_map = bt_state.map; + *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32); + uint32_t *bt_map = bt_state->map; - if (bt_state.map == NULL) + if (bt_state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - static const uint32_t binding_table_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 38, - [VK_SHADER_STAGE_TESS_CONTROL] = 39, - [VK_SHADER_STAGE_TESS_EVALUATION] = 40, - [VK_SHADER_STAGE_GEOMETRY] = 41, - [VK_SHADER_STAGE_FRAGMENT] = 42, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, - ._3DCommandSubOpcode = binding_table_opcodes[stage], - .PointertoVSBindingTable = bt_state.offset); - for (uint32_t ca = 0; ca < color_attachments; ca++) { const struct anv_surface_view *view = cmd_buffer->framebuffer->color_attachments[ca]; @@ -2935,39 +2938,27 @@ cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer, } static VkResult -cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) +cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, + unsigned stage, struct anv_state *state) { - struct anv_pipeline_layout *layout = cmd_buffer->pipeline->layout; - struct anv_state state; + struct anv_pipeline_layout *layout; + uint32_t sampler_count; - if (!layout) - return VK_SUCCESS; - - uint32_t sampler_count = layout->stage[stage].sampler_count; + if (stage == VK_SHADER_STAGE_COMPUTE) + layout = cmd_buffer->compute_pipeline->layout; + else + layout = cmd_buffer->pipeline->layout; + sampler_count = layout ? layout->stage[stage].sampler_count : 0; if (sampler_count == 0) return VK_SUCCESS; uint32_t size = sampler_count * 16; - state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); + *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32); - if (state.map == NULL) + if (state->map == NULL) return VK_ERROR_OUT_OF_DEVICE_MEMORY; - static const uint32_t sampler_state_opcodes[] = { - [VK_SHADER_STAGE_VERTEX] = 43, - [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ - [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ - [VK_SHADER_STAGE_GEOMETRY] = 46, - [VK_SHADER_STAGE_FRAGMENT] = 47, - [VK_SHADER_STAGE_COMPUTE] = 0, - }; - - anv_batch_emit(&cmd_buffer->batch, - GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, - ._3DCommandSubOpcode = sampler_state_opcodes[stage], - .PointertoVSSamplerState = state.offset); - for (uint32_t set = 0; set < layout->num_sets; set++) { struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set]; struct anv_descriptor_set_layout *set_layout = layout->set[set].layout; @@ -2983,7 +2974,7 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) if (!sampler) continue; - memcpy(state.map + (start + b) * 16, + memcpy(state->map + (start + b) * 16, sampler->state, sizeof(sampler->state)); } } @@ -2991,6 +2982,54 @@ cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer, unsigned stage) return VK_SUCCESS; } +static VkResult +flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage) +{ + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces); + if (result != VK_SUCCESS) + return result; + + static const uint32_t sampler_state_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 43, + [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */ + [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */ + [VK_SHADER_STAGE_GEOMETRY] = 46, + [VK_SHADER_STAGE_FRAGMENT] = 47, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + static const uint32_t binding_table_opcodes[] = { + [VK_SHADER_STAGE_VERTEX] = 38, + [VK_SHADER_STAGE_TESS_CONTROL] = 39, + [VK_SHADER_STAGE_TESS_EVALUATION] = 40, + [VK_SHADER_STAGE_GEOMETRY] = 41, + [VK_SHADER_STAGE_FRAGMENT] = 42, + [VK_SHADER_STAGE_COMPUTE] = 0, + }; + + if (samplers.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS, + ._3DCommandSubOpcode = sampler_state_opcodes[stage], + .PointertoVSSamplerState = samplers.offset); + } + + if (surfaces.alloc_size > 0) { + anv_batch_emit(&cmd_buffer->batch, + GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS, + ._3DCommandSubOpcode = binding_table_opcodes[stage], + .PointertoVSBindingTable = surfaces.offset); + } + + return VK_SUCCESS; +} + static void flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) { @@ -2999,11 +3038,7 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) VkResult result; for_each_bit(s, dirty) { - result = cmd_buffer_emit_binding_table(cmd_buffer, s); - if (result != VK_SUCCESS) - break; - - result = cmd_buffer_emit_samplers(cmd_buffer, s); + result = flush_descriptor_set(cmd_buffer, s); if (result != VK_SUCCESS) break; } @@ -3016,12 +3051,11 @@ flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer) /* Re-emit all active binding tables */ for_each_bit(s, cmd_buffer->pipeline->active_stages) { - result = cmd_buffer_emit_binding_table(cmd_buffer, s); - result = cmd_buffer_emit_samplers(cmd_buffer, s); - } + result = flush_descriptor_set(cmd_buffer, s); - /* It had better succeed this time */ - assert(result == VK_SUCCESS); + /* It had better succeed this time */ + assert(result == VK_SUCCESS); + } } cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages; @@ -3061,6 +3095,78 @@ anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer, return state; } +static VkResult +flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_device *device = cmd_buffer->device; + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + struct anv_state surfaces = { 0, }, samplers = { 0, }; + VkResult result; + + result = cmd_buffer_emit_samplers(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &samplers); + if (result != VK_SUCCESS) + return result; + result = cmd_buffer_emit_binding_table(cmd_buffer, + VK_SHADER_STAGE_COMPUTE, &surfaces); + if (result != VK_SUCCESS) + return result; + + struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = { + .KernelStartPointer = pipeline->cs_simd, + .KernelStartPointerHigh = 0, + .BindingTablePointer = surfaces.offset, + .BindingTableEntryCount = 0, + .SamplerStatePointer = samplers.offset, + .SamplerCount = 0, + .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */ + }; + + uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t); + struct anv_state state = + anv_state_pool_alloc(&device->dynamic_state_pool, size, 64); + + GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD, + .InterfaceDescriptorTotalLength = size, + .InterfaceDescriptorDataStartAddress = state.offset); + + return VK_SUCCESS; +} + +static void +anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer) +{ + struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline; + VkResult result; + + assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT); + + if (cmd_buffer->current_pipeline != GPGPU) { + anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT, + .PipelineSelection = GPGPU); + cmd_buffer->current_pipeline = GPGPU; + } + + if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) + anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch); + + if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) || + (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) { + result = flush_compute_descriptor_set(cmd_buffer); + if (result != VK_SUCCESS) { + result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer); + assert(result == VK_SUCCESS); + result = flush_compute_descriptor_set(cmd_buffer); + assert(result == VK_SUCCESS); + } + cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE; + } + + cmd_buffer->compute_dirty = 0; +} + static void anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer) { @@ -3278,15 +3384,80 @@ void anv_CmdDispatch( uint32_t y, uint32_t z) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + uint32_t size = SIMD8; /* FIXME */ + uint32_t right_mask = 0; /* FIXME */ + uint32_t thread_width_max = 0; /* FIXME */ + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + + .InterfaceDescriptorOffset = 0, + .IndirectDataLength = 0, + .IndirectDataStartAddress = 0, + + .SIMDSize = size, + + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = thread_width_max, + + .ThreadGroupIDStartingX = 0, + .ThreadGroupIDXDimension = x, + .ThreadGroupIDStartingY = 0, + .ThreadGroupIDYDimension = y, + .ThreadGroupIDStartingResumeZ = 0, + .ThreadGroupIDZDimension = z, + .RightExecutionMask = right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); } +#define GPGPU_DISPATCHDIMX 0x2500 +#define GPGPU_DISPATCHDIMY 0x2504 +#define GPGPU_DISPATCHDIMZ 0x2508 + void anv_CmdDispatchIndirect( VkCmdBuffer cmdBuffer, - VkBuffer buffer, + VkBuffer _buffer, VkDeviceSize offset) { - stub(); + struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer; + struct anv_buffer *buffer = (struct anv_buffer *) _buffer; + struct anv_bo *bo = buffer->bo; + uint32_t bo_offset = buffer->offset + offset; + + anv_cmd_buffer_flush_compute_state(cmd_buffer); + + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4); + anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8); + + uint32_t size = SIMD8; /* FIXME */ + uint32_t right_mask = 0; /* FIXME */ + uint32_t thread_width_max = 0; /* FIXME */ + + /* FIXME: We can't compute thread_width_max for indirect, looks like it + * depends on DIMX. */ + + anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER, + .IndirectParameterEnable = true, + .InterfaceDescriptorOffset = 0, + .IndirectDataLength = 0, + .IndirectDataStartAddress = 0, + + .SIMDSize = size, + + .ThreadDepthCounterMaximum = 0, + .ThreadHeightCounterMaximum = 0, + .ThreadWidthCounterMaximum = thread_width_max, + + .RightExecutionMask = right_mask, + .BottomExecutionMask = 0xffffffff); + + anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH); } void anv_CmdSetEvent( diff --git a/src/vulkan/pipeline.c b/src/vulkan/pipeline.c index 28818152ae0..cf7562ae496 100644 --- a/src/vulkan/pipeline.c +++ b/src/vulkan/pipeline.c @@ -718,11 +718,61 @@ VkResult anv_CreateGraphicsPipelineDerivative( } VkResult anv_CreateComputePipeline( - VkDevice device, + VkDevice _device, const VkComputePipelineCreateInfo* pCreateInfo, VkPipeline* pPipeline) { - stub_return(VK_UNSUPPORTED); + struct anv_device *device = (struct anv_device *) _device; + struct anv_pipeline *pipeline; + VkResult result; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO); + + pipeline = anv_device_alloc(device, sizeof(*pipeline), 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (pipeline == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + pipeline->base.destructor = anv_pipeline_destroy; + pipeline->device = device; + pipeline->layout = (struct anv_pipeline_layout *) pCreateInfo->layout; + + result = anv_reloc_list_init(&pipeline->batch.relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + memset(pipeline->shaders, 0, sizeof(pipeline->shaders)); + + pipeline->shaders[VK_SHADER_STAGE_COMPUTE] = + (struct anv_shader *) pCreateInfo->cs.shader; + + pipeline->use_repclear = false; + + anv_compiler_run(device->compiler, pipeline); + + anv_batch_emit(&pipeline->batch, GEN8_MEDIA_VFE_STATE, + .ScratchSpaceBasePointer = 0, /* FIXME: Scratch bo, this should be a reloc? */ + .StackSize = 0, + .PerThreadScratchSpace = 0, + .ScratchSpaceBasePointerHigh = 0, + + .MaximumNumberofThreads = device->info.max_cs_threads - 1, + .NumberofURBEntries = 2, + .ResetGatewayTimer = true, + .BypassGatewayControl = true, + .URBEntryAllocationSize = 2, + .CURBEAllocationSize = 0); + + *pPipeline = (VkPipeline) pipeline; + + return VK_SUCCESS; } VkResult anv_StorePipeline( diff --git a/src/vulkan/private.h b/src/vulkan/private.h index 8bc5fd10880..cf1cf4c1268 100644 --- a/src/vulkan/private.h +++ b/src/vulkan/private.h @@ -653,8 +653,10 @@ struct anv_cmd_buffer { uint32_t current_pipeline; uint32_t vb_dirty; uint32_t dirty; + uint32_t compute_dirty; uint32_t descriptors_dirty; struct anv_pipeline * pipeline; + struct anv_pipeline * compute_pipeline; struct anv_framebuffer * framebuffer; struct anv_dynamic_rs_state * rs_state; struct anv_dynamic_ds_state * ds_state; @@ -692,6 +694,7 @@ struct anv_pipeline { struct brw_vs_prog_data vs_prog_data; struct brw_wm_prog_data wm_prog_data; struct brw_gs_prog_data gs_prog_data; + struct brw_cs_prog_data cs_prog_data; struct brw_stage_prog_data * prog_data[VK_NUM_SHADER_STAGE]; struct { uint32_t vs_start; @@ -705,6 +708,7 @@ struct anv_pipeline { struct anv_bo vs_scratch_bo; struct anv_bo ps_scratch_bo; struct anv_bo gs_scratch_bo; + struct anv_bo cs_scratch_bo; uint32_t active_stages; struct anv_state_stream program_stream; @@ -714,6 +718,7 @@ struct anv_pipeline { uint32_t ps_simd16; uint32_t gs_vec4; uint32_t gs_vertex_count; + uint32_t cs_simd; uint32_t vb_used; uint32_t binding_stride[MAX_VBS];