From 003b04e266ae0faad563c1228561b53f33a68474 Mon Sep 17 00:00:00 2001 From: Jason Ekstrand Date: Sun, 3 Feb 2019 19:46:16 -0600 Subject: [PATCH] intel/compiler: Allow MESA_SHADER_KERNEL Reviewed-by: Kenneth Graunke Part-of: --- src/intel/compiler/brw_compiler.c | 6 ++++-- src/intel/compiler/brw_compiler.h | 4 ++-- src/intel/compiler/brw_fs.cpp | 6 +++--- src/intel/compiler/brw_fs_nir.cpp | 14 ++++++++------ src/intel/compiler/brw_fs_visitor.cpp | 4 ++-- src/intel/compiler/brw_nir_lower_cs_intrinsics.c | 3 ++- 6 files changed, 21 insertions(+), 16 deletions(-) diff --git a/src/intel/compiler/brw_compiler.c b/src/intel/compiler/brw_compiler.c index e58ed67900b..0dc3d90eb8c 100644 --- a/src/intel/compiler/brw_compiler.c +++ b/src/intel/compiler/brw_compiler.c @@ -112,7 +112,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) if (devinfo->gen >= 10) { /* We don't support vec4 mode on Cannonlake. */ - for (int i = MESA_SHADER_VERTEX; i < MESA_SHADER_STAGES; i++) + for (int i = MESA_SHADER_VERTEX; i < MESA_ALL_SHADER_STAGES; i++) compiler->scalar_stage[i] = true; } else { compiler->scalar_stage[MESA_SHADER_VERTEX] = @@ -158,7 +158,7 @@ brw_compiler_create(void *mem_ctx, const struct gen_device_info *devinfo) int64_options |= nir_lower_imul_2x32_64; /* We want the GLSL compiler to emit code that uses condition codes */ - for (int i = 0; i < MESA_SHADER_STAGES; i++) { + for (int i = 0; i < MESA_ALL_SHADER_STAGES; i++) { compiler->glsl_compiler_options[i].MaxUnrollIterations = 0; compiler->glsl_compiler_options[i].MaxIfDepth = devinfo->gen < 6 ? 16 : UINT_MAX; @@ -247,6 +247,7 @@ brw_prog_data_size(gl_shader_stage stage) [MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_data), [MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_data), [MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_data), + [MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_data), }; assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes)); return stage_sizes[stage]; @@ -262,6 +263,7 @@ brw_prog_key_size(gl_shader_stage stage) [MESA_SHADER_GEOMETRY] = sizeof(struct brw_gs_prog_key), [MESA_SHADER_FRAGMENT] = sizeof(struct brw_wm_prog_key), [MESA_SHADER_COMPUTE] = sizeof(struct brw_cs_prog_key), + [MESA_SHADER_KERNEL] = sizeof(struct brw_cs_prog_key), }; assert((int)stage >= 0 && stage < ARRAY_SIZE(stage_sizes)); return stage_sizes[stage]; diff --git a/src/intel/compiler/brw_compiler.h b/src/intel/compiler/brw_compiler.h index ccbb5cc6409..8df2ee59207 100644 --- a/src/intel/compiler/brw_compiler.h +++ b/src/intel/compiler/brw_compiler.h @@ -92,9 +92,9 @@ struct brw_compiler { void (*shader_debug_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); void (*shader_perf_log)(void *, const char *str, ...) PRINTFLIKE(2, 3); - bool scalar_stage[MESA_SHADER_STAGES]; + bool scalar_stage[MESA_ALL_SHADER_STAGES]; bool use_tcs_8_patch; - struct gl_shader_compiler_options glsl_compiler_options[MESA_SHADER_STAGES]; + struct gl_shader_compiler_options glsl_compiler_options[MESA_ALL_SHADER_STAGES]; /** * Apply workarounds for SIN and COS output range problems. diff --git a/src/intel/compiler/brw_fs.cpp b/src/intel/compiler/brw_fs.cpp index dbf7391baef..ec08d84554a 100644 --- a/src/intel/compiler/brw_fs.cpp +++ b/src/intel/compiler/brw_fs.cpp @@ -7841,7 +7841,7 @@ fs_visitor::allocate_registers(bool allow_spilling) prog_data->total_scratch = brw_get_scratch_size(last_scratch); - if (stage == MESA_SHADER_COMPUTE) { + if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL) { if (devinfo->is_haswell) { /* According to the MEDIA_VFE_STATE's "Per Thread Scratch Space" * field documentation, Haswell supports a minimum of 2kB of @@ -8229,7 +8229,7 @@ fs_visitor::run_fs(bool allow_spilling, bool do_rep_send) bool fs_visitor::run_cs(bool allow_spilling) { - assert(stage == MESA_SHADER_COMPUTE); + assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL); setup_cs_payload(); @@ -8796,7 +8796,7 @@ brw_compile_fs(const struct brw_compiler *compiler, void *log_data, fs_reg * fs_visitor::emit_cs_work_group_id_setup() { - assert(stage == MESA_SHADER_COMPUTE); + assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL); fs_reg *reg = new(this->mem_ctx) fs_reg(vgrf(glsl_type::uvec3_type)); diff --git a/src/intel/compiler/brw_fs_nir.cpp b/src/intel/compiler/brw_fs_nir.cpp index afbb436b95a..f10525741f2 100644 --- a/src/intel/compiler/brw_fs_nir.cpp +++ b/src/intel/compiler/brw_fs_nir.cpp @@ -100,7 +100,7 @@ fs_visitor::nir_setup_uniforms() uniforms = nir->num_uniforms / 4; - if (stage == MESA_SHADER_COMPUTE) { + if (stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL) { /* Add uniforms for builtins after regular NIR uniforms. */ assert(uniforms == prog_data->nr_params); @@ -184,7 +184,8 @@ emit_system_values_block(nir_block *block, fs_visitor *v) break; case nir_intrinsic_load_work_group_id: - assert(v->stage == MESA_SHADER_COMPUTE); + assert(v->stage == MESA_SHADER_COMPUTE || + v->stage == MESA_SHADER_KERNEL); reg = &v->nir_system_values[SYSTEM_VALUE_WORK_GROUP_ID]; if (reg->file == BAD_FILE) *reg = *v->emit_cs_work_group_id_setup(); @@ -489,6 +490,7 @@ fs_visitor::nir_emit_instr(nir_instr *instr) nir_emit_fs_intrinsic(abld, nir_instr_as_intrinsic(instr)); break; case MESA_SHADER_COMPUTE: + case MESA_SHADER_KERNEL: nir_emit_cs_intrinsic(abld, nir_instr_as_intrinsic(instr)); break; default: @@ -3714,7 +3716,7 @@ void fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr) { - assert(stage == MESA_SHADER_COMPUTE); + assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL); struct brw_cs_prog_data *cs_prog_data = brw_cs_prog_data(prog_data); fs_reg dest; @@ -3792,7 +3794,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, case nir_intrinsic_load_shared: { assert(devinfo->gen >= 7); - assert(stage == MESA_SHADER_COMPUTE); + assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL); const unsigned bit_size = nir_dest_bit_size(instr->dest); fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; @@ -3828,7 +3830,7 @@ fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld, case nir_intrinsic_store_shared: { assert(devinfo->gen >= 7); - assert(stage == MESA_SHADER_COMPUTE); + assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL); const unsigned bit_size = nir_src_bit_size(instr->src[0]); fs_reg srcs[SURFACE_LOGICAL_NUM_SRCS]; @@ -4280,7 +4282,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr break; } - if (stage != MESA_SHADER_COMPUTE) + if (stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL) slm_fence = false; /* If the workgroup fits in a single HW thread, the messages for SLM are diff --git a/src/intel/compiler/brw_fs_visitor.cpp b/src/intel/compiler/brw_fs_visitor.cpp index 5e06f10a0f4..d8918858a8d 100644 --- a/src/intel/compiler/brw_fs_visitor.cpp +++ b/src/intel/compiler/brw_fs_visitor.cpp @@ -845,7 +845,7 @@ fs_visitor::emit_cs_terminate() assert(devinfo->gen >= 7); /* We are getting the thread ID from the compute shader header */ - assert(stage == MESA_SHADER_COMPUTE); + assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL); /* We can't directly send from g0, since sends with EOT have to use * g112-127. So, copy it to a virtual register, The register allocator will @@ -880,7 +880,7 @@ fs_visitor::emit_barrier() } /* We are getting the barrier ID from the compute shader header */ - assert(stage == MESA_SHADER_COMPUTE); + assert(stage == MESA_SHADER_COMPUTE || stage == MESA_SHADER_KERNEL); fs_reg payload = fs_reg(VGRF, alloc.allocate(1), BRW_REGISTER_TYPE_UD); diff --git a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c index 102bd29595a..401fbdd1a4b 100644 --- a/src/intel/compiler/brw_nir_lower_cs_intrinsics.c +++ b/src/intel/compiler/brw_nir_lower_cs_intrinsics.c @@ -211,7 +211,8 @@ lower_cs_intrinsics_convert_impl(struct lower_intrinsics_state *state) bool brw_nir_lower_cs_intrinsics(nir_shader *nir) { - assert(nir->info.stage == MESA_SHADER_COMPUTE); + assert(nir->info.stage == MESA_SHADER_COMPUTE || + nir->info.stage == MESA_SHADER_KERNEL); struct lower_intrinsics_state state = { .nir = nir, -- 2.30.2