X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fvulkan%2Fanv_pipeline.c;h=f25cf37ea254a5b25738dc66117f99c4e3a0d8ed;hb=773a51e77260bc7766dd6caf93152808f320d78c;hp=2cd06bd78d55944971b065e5886f846e0424b202;hpb=a9eabd539cf75f8e0e0c1c012a7f5e666304dd9e;p=mesa.git diff --git a/src/intel/vulkan/anv_pipeline.c b/src/intel/vulkan/anv_pipeline.c index 2cd06bd78d5..f25cf37ea25 100644 --- a/src/intel/vulkan/anv_pipeline.c +++ b/src/intel/vulkan/anv_pipeline.c @@ -30,7 +30,7 @@ #include "util/mesa-sha1.h" #include "common/gen_l3_config.h" #include "anv_private.h" -#include "brw_nir.h" +#include "compiler/brw_nir.h" #include "anv_nir.h" #include "spirv/nir_spirv.h" @@ -83,19 +83,27 @@ void anv_DestroyShaderModule( #define SPIR_V_MAGIC_NUMBER 0x07230203 +static const uint64_t stage_to_debug[] = { + [MESA_SHADER_VERTEX] = DEBUG_VS, + [MESA_SHADER_TESS_CTRL] = DEBUG_TCS, + [MESA_SHADER_TESS_EVAL] = DEBUG_TES, + [MESA_SHADER_GEOMETRY] = DEBUG_GS, + [MESA_SHADER_FRAGMENT] = DEBUG_WM, + [MESA_SHADER_COMPUTE] = DEBUG_CS, +}; + /* Eventually, this will become part of anv_CreateShader. Unfortunately, * we can't do that yet because we don't have the ability to copy nir. */ static nir_shader * -anv_shader_compile_to_nir(struct anv_device *device, +anv_shader_compile_to_nir(struct anv_pipeline *pipeline, + void *mem_ctx, struct anv_shader_module *module, const char *entrypoint_name, gl_shader_stage stage, const VkSpecializationInfo *spec_info) { - if (strcmp(entrypoint_name, "main") != 0) { - anv_finishme("Multiple shaders per module not really supported"); - } + const struct anv_device *device = pipeline->device; const struct brw_compiler *compiler = device->instance->physicalDevice.compiler; @@ -117,30 +125,52 @@ anv_shader_compile_to_nir(struct anv_device *device, assert(data + entry.size <= spec_info->pData + spec_info->dataSize); spec_entries[i].id = spec_info->pMapEntries[i].constantID; - spec_entries[i].data = *(const uint32_t *)data; + if (spec_info->dataSize == 8) + spec_entries[i].data64 = *(const uint64_t *)data; + else + spec_entries[i].data32 = *(const uint32_t *)data; } } + struct spirv_to_nir_options spirv_options = { + .lower_workgroup_access_to_offsets = true, + .caps = { + .float64 = device->instance->physicalDevice.info.gen >= 8, + .int64 = device->instance->physicalDevice.info.gen >= 8, + .tessellation = true, + .device_group = true, + .draw_parameters = true, + .image_write_without_format = true, + .multiview = true, + .variable_pointers = true, + .storage_16bit = device->instance->physicalDevice.info.gen >= 8, + }, + }; + nir_function *entry_point = spirv_to_nir(spirv, module->size / 4, spec_entries, num_spec_entries, - stage, entrypoint_name, nir_options); + stage, entrypoint_name, &spirv_options, nir_options); nir_shader *nir = entry_point->shader; - assert(nir->stage == stage); + assert(nir->info.stage == stage); nir_validate_shader(nir); + ralloc_steal(mem_ctx, nir); free(spec_entries); - if (stage == MESA_SHADER_FRAGMENT) { - nir_lower_wpos_center(nir); - nir_validate_shader(nir); + if (unlikely(INTEL_DEBUG & stage_to_debug[stage])) { + fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n", + gl_shader_stage_name(stage)); + nir_print_shader(nir, stderr); } - nir_lower_returns(nir); - nir_validate_shader(nir); - - nir_inline_functions(nir); - nir_validate_shader(nir); + /* We have to lower away local constant initializers right before we + * inline functions. That way they get properly initialized at the top + * of the function and not at the top of its caller. + */ + NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_local); + NIR_PASS_V(nir, nir_lower_returns); + NIR_PASS_V(nir, nir_inline_functions); /* Pick off the single entrypoint that we want */ foreach_list_typed_safe(nir_function, func, node, &nir->functions) { @@ -150,39 +180,32 @@ anv_shader_compile_to_nir(struct anv_device *device, assert(exec_list_length(&nir->functions) == 1); entry_point->name = ralloc_strdup(entry_point, "main"); - nir_remove_dead_variables(nir, nir_var_shader_in); - nir_remove_dead_variables(nir, nir_var_shader_out); - nir_remove_dead_variables(nir, nir_var_system_value); - nir_validate_shader(nir); + /* Make sure we lower constant initializers on output variables so that + * nir_remove_dead_variables below sees the corresponding stores + */ + NIR_PASS_V(nir, nir_lower_constant_initializers, nir_var_shader_out); - nir_propagate_invariant(nir); - nir_validate_shader(nir); + NIR_PASS_V(nir, nir_remove_dead_variables, + nir_var_shader_in | nir_var_shader_out | nir_var_system_value); - nir_lower_io_to_temporaries(entry_point->shader, entry_point->impl, - true, false); + if (stage == MESA_SHADER_FRAGMENT) + NIR_PASS_V(nir, nir_lower_wpos_center, pipeline->sample_shading_enable); - nir_lower_system_values(nir); - nir_validate_shader(nir); + /* Now that we've deleted all but the main function, we can go ahead and + * lower the rest of the constant initializers. + */ + NIR_PASS_V(nir, nir_lower_constant_initializers, ~0); + NIR_PASS_V(nir, nir_propagate_invariant); + NIR_PASS_V(nir, nir_lower_io_to_temporaries, + entry_point->impl, true, false); /* Vulkan uses the separate-shader linking model */ - nir->info->separate_shader = true; + nir->info.separate_shader = true; nir = brw_preprocess_nir(compiler, nir); - nir_lower_clip_cull_distance_arrays(nir); - nir_validate_shader(nir); - - nir_shader_gather_info(nir, entry_point->impl); - - nir_variable_mode indirect_mask = 0; - if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput) - indirect_mask |= nir_var_shader_in; - if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput) - indirect_mask |= nir_var_shader_out; - if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp) - indirect_mask |= nir_var_local; - - nir_lower_indirect_derefs(nir, indirect_mask); + if (stage == MESA_SHADER_FRAGMENT) + NIR_PASS_V(nir, anv_nir_lower_input_attachments); return nir; } @@ -222,13 +245,31 @@ static const uint32_t vk_to_gen_primitive_type[] = { [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ, [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ, -/* [VK_PRIMITIVE_TOPOLOGY_PATCH_LIST] = _3DPRIM_PATCHLIST_1 */ }; static void populate_sampler_prog_key(const struct gen_device_info *devinfo, struct brw_sampler_prog_key_data *key) { + /* Almost all multisampled textures are compressed. The only time when we + * don't compress a multisampled texture is for 16x MSAA with a surface + * width greater than 8k which is a bit of an edge case. Since the sampler + * just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe + * to tell the compiler to always assume compression. + */ + key->compressed_multisample_layout_mask = ~0; + + /* SkyLake added support for 16x MSAA. With this came a new message for + * reading from a 16x MSAA surface with compression. The new message was + * needed because now the MCS data is 64 bits instead of 32 or lower as is + * the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which + * message we use. Fortunately, the 16x message works for 8x, 4x, and 2x + * so we can just use it unconditionally. This may not be quite as + * efficient but it saves us from recompiling. + */ + if (devinfo->gen >= 9) + key->msaa_16 = ~0; + /* XXX: Handle texture swizzle on HSW- */ for (int i = 0; i < MAX_SAMPLERS; i++) { /* Assume color sampler, no swizzling. (Works for BDW+) */ @@ -259,17 +300,21 @@ populate_gs_prog_key(const struct gen_device_info *devinfo, } static void -populate_wm_prog_key(const struct gen_device_info *devinfo, +populate_wm_prog_key(const struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *info, struct brw_wm_prog_key *key) { - ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + const struct gen_device_info *devinfo = &pipeline->device->info; memset(key, 0, sizeof(*key)); populate_sampler_prog_key(devinfo, &key->tex); - /* TODO: Fill out key->input_slots_valid */ + /* TODO: we could set this to 0 based on the information in nir_shader, but + * this function is called before spirv_to_nir. */ + const struct brw_vue_map *vue_map = + &anv_pipeline_get_last_vue_prog_data(pipeline)->vue_map; + key->input_slots_valid = vue_map->slots_valid; /* Vulkan doesn't specify a default */ key->high_quality_derivatives = false; @@ -277,21 +322,25 @@ populate_wm_prog_key(const struct gen_device_info *devinfo, /* XXX Vulkan doesn't appear to specify */ key->clamp_fragment_color = false; - key->nr_color_regions = - render_pass->subpasses[info->subpass].color_count; + key->nr_color_regions = pipeline->subpass->color_count; key->replicate_alpha = key->nr_color_regions > 1 && info->pMultisampleState && info->pMultisampleState->alphaToCoverageEnable; - if (info->pMultisampleState && info->pMultisampleState->rasterizationSamples > 1) { + if (info->pMultisampleState) { /* We should probably pull this out of the shader, but it's fairly * harmless to compute it and then let dead-code take care of it. */ - key->persample_interp = - (info->pMultisampleState->minSampleShading * - info->pMultisampleState->rasterizationSamples) > 1; - key->multisample_fbo = true; + if (info->pMultisampleState->rasterizationSamples > 1) { + key->persample_interp = + (info->pMultisampleState->minSampleShading * + info->pMultisampleState->rasterizationSamples) > 1; + key->multisample_fbo = true; + } + + key->frag_coord_adds_sample_pos = + info->pMultisampleState->sampleShadingEnable; } } @@ -304,8 +353,41 @@ populate_cs_prog_key(const struct gen_device_info *devinfo, populate_sampler_prog_key(devinfo, &key->tex); } +static void +anv_pipeline_hash_shader(struct anv_pipeline *pipeline, + struct anv_pipeline_layout *layout, + struct anv_shader_module *module, + const char *entrypoint, + gl_shader_stage stage, + const VkSpecializationInfo *spec_info, + const void *key, size_t key_size, + unsigned char *sha1_out) +{ + struct mesa_sha1 ctx; + + _mesa_sha1_init(&ctx); + if (stage != MESA_SHADER_COMPUTE) { + _mesa_sha1_update(&ctx, &pipeline->subpass->view_mask, + sizeof(pipeline->subpass->view_mask)); + } + if (layout) + _mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1)); + _mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1)); + _mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint)); + _mesa_sha1_update(&ctx, &stage, sizeof(stage)); + if (spec_info) { + _mesa_sha1_update(&ctx, spec_info->pMapEntries, + spec_info->mapEntryCount * sizeof(*spec_info->pMapEntries)); + _mesa_sha1_update(&ctx, spec_info->pData, spec_info->dataSize); + } + _mesa_sha1_update(&ctx, key, key_size); + _mesa_sha1_final(&ctx, sha1_out); +} + static nir_shader * anv_pipeline_compile(struct anv_pipeline *pipeline, + void *mem_ctx, + struct anv_pipeline_layout *layout, struct anv_shader_module *module, const char *entrypoint, gl_shader_stage stage, @@ -313,44 +395,37 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, struct brw_stage_prog_data *prog_data, struct anv_pipeline_bind_map *map) { - nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline, mem_ctx, module, entrypoint, stage, spec_info); if (nir == NULL) return NULL; - anv_nir_lower_push_constants(nir); + NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout); - /* Figure out the number of parameters */ - prog_data->nr_params = 0; + NIR_PASS_V(nir, anv_nir_lower_push_constants); + + if (stage != MESA_SHADER_COMPUTE) + NIR_PASS_V(nir, anv_nir_lower_multiview, pipeline->subpass->view_mask); + + if (stage == MESA_SHADER_COMPUTE) + prog_data->total_shared = nir->num_shared; + + nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir)); if (nir->num_uniforms > 0) { + assert(prog_data->nr_params == 0); + /* If the shader uses any push constants at all, we'll just give * them the maximum possible number */ assert(nir->num_uniforms <= MAX_PUSH_CONSTANTS_SIZE); + nir->num_uniforms = MAX_PUSH_CONSTANTS_SIZE; prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); - } - - if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) - prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; - - if (nir->info->num_images > 0) { - prog_data->nr_params += nir->info->num_images * BRW_IMAGE_PARAM_SIZE; - pipeline->needs_data_cache = true; - } - - if (stage == MESA_SHADER_COMPUTE) - ((struct brw_cs_prog_data *)prog_data)->thread_local_id_index = - prog_data->nr_params++; /* The CS Thread ID uniform */ - - if (nir->info->num_ssbos > 0) - pipeline->needs_data_cache = true; - - if (prog_data->nr_params > 0) { - /* XXX: I think we're leaking this */ - prog_data->param = (const union gl_constant_value **) - malloc(prog_data->nr_params * sizeof(union gl_constant_value *)); + prog_data->param = ralloc_array(mem_ctx, uint32_t, prog_data->nr_params); /* We now set the param values to be offsets into a * anv_push_constant_data structure. Since the compiler doesn't @@ -358,25 +433,24 @@ anv_pipeline_compile(struct anv_pipeline *pipeline, * params array, it doesn't really matter what we put here. */ struct anv_push_constants *null_data = NULL; - if (nir->num_uniforms > 0) { - /* Fill out the push constants section of the param array */ - for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) - prog_data->param[i] = (const union gl_constant_value *) - &null_data->client_data[i * sizeof(float)]; + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) { + prog_data->param[i] = ANV_PARAM_PUSH( + (uintptr_t)&null_data->client_data[i * sizeof(float)]); } } - /* Set up dynamic offsets */ - anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + if (nir->info.num_ssbos > 0 || nir->info.num_images > 0) + pipeline->needs_data_cache = true; /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ - if (pipeline->layout) - anv_nir_apply_pipeline_layout(pipeline, nir, prog_data, map); + if (layout) + anv_nir_apply_pipeline_layout(pipeline, layout, nir, prog_data, map); - /* nir_lower_io will only handle the push constants; we need to set this - * to the full number of possible uniforms. - */ - nir->num_uniforms = prog_data->nr_params * 4; + if (stage != MESA_SHADER_COMPUTE) + brw_nir_analyze_ubo_ranges(compiler, nir, prog_data->ubo_ranges); + + assert(nir->num_uniforms == prog_data->nr_params * 4); return nir; } @@ -434,57 +508,58 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct anv_pipeline_bind_map map; struct brw_vs_prog_key key; struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_vs_prog_key(&pipeline->device->info, &key); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); + if (cache) { - anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, - pipeline->layout, spec_info); + anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, + MESA_SHADER_VERTEX, spec_info, + &key, sizeof(key), sha1); bin = anv_pipeline_cache_search(cache, sha1, 20); } if (bin == NULL) { - struct brw_vs_prog_data prog_data = { 0, }; + struct brw_vs_prog_data prog_data = {}; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; - map = (struct anv_pipeline_bind_map) { + struct anv_pipeline_bind_map map = { .surface_to_descriptor = surface_to_descriptor, .sampler_to_descriptor = sampler_to_descriptor }; - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + void *mem_ctx = ralloc_context(NULL); + + nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx, layout, + module, entrypoint, MESA_SHADER_VERTEX, spec_info, &prog_data.base.base, &map); - if (nir == NULL) + if (nir == NULL) { + ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } anv_fill_binding_table(&prog_data.base.base, 0); - void *mem_ctx = ralloc_context(NULL); - - ralloc_steal(mem_ctx, nir); - - prog_data.inputs_read = nir->info->inputs_read; - brw_compute_vue_map(&pipeline->device->info, &prog_data.base.vue_map, - nir->info->outputs_written, - nir->info->separate_shader); + nir->info.outputs_written, + nir->info.separate_shader); - unsigned code_size; const unsigned *shader_code = brw_compile_vs(compiler, NULL, mem_ctx, &key, &prog_data, nir, - NULL, false, -1, &code_size, NULL); + -1, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + unsigned code_size = prog_data.base.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, &prog_data.base.base, sizeof(prog_data), @@ -502,6 +577,196 @@ anv_pipeline_compile_vs(struct anv_pipeline *pipeline, return VK_SUCCESS; } +static void +merge_tess_info(struct shader_info *tes_info, + const struct shader_info *tcs_info) +{ + /* The Vulkan 1.0.38 spec, section 21.1 Tessellator says: + * + * "PointMode. Controls generation of points rather than triangles + * or lines. This functionality defaults to disabled, and is + * enabled if either shader stage includes the execution mode. + * + * and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw, + * PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd, + * and OutputVertices, it says: + * + * "One mode must be set in at least one of the tessellation + * shader stages." + * + * So, the fields can be set in either the TCS or TES, but they must + * agree if set in both. Our backend looks at TES, so bitwise-or in + * the values from the TCS. + */ + assert(tcs_info->tess.tcs_vertices_out == 0 || + tes_info->tess.tcs_vertices_out == 0 || + tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out); + tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out; + + assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED || + tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED || + tcs_info->tess.spacing == tes_info->tess.spacing); + tes_info->tess.spacing |= tcs_info->tess.spacing; + + assert(tcs_info->tess.primitive_mode == 0 || + tes_info->tess.primitive_mode == 0 || + tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode); + tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode; + tes_info->tess.ccw |= tcs_info->tess.ccw; + tes_info->tess.point_mode |= tcs_info->tess.point_mode; +} + +static VkResult +anv_pipeline_compile_tcs_tes(struct anv_pipeline *pipeline, + struct anv_pipeline_cache *cache, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader_module *tcs_module, + const char *tcs_entrypoint, + const VkSpecializationInfo *tcs_spec_info, + struct anv_shader_module *tes_module, + const char *tes_entrypoint, + const VkSpecializationInfo *tes_spec_info) +{ + const struct gen_device_info *devinfo = &pipeline->device->info; + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_tcs_prog_key tcs_key = {}; + struct brw_tes_prog_key tes_key = {}; + struct anv_shader_bin *tcs_bin = NULL; + struct anv_shader_bin *tes_bin = NULL; + unsigned char tcs_sha1[40]; + unsigned char tes_sha1[40]; + + populate_sampler_prog_key(&pipeline->device->info, &tcs_key.tex); + populate_sampler_prog_key(&pipeline->device->info, &tes_key.tex); + tcs_key.input_vertices = info->pTessellationState->patchControlPoints; + + ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); + + if (cache) { + anv_pipeline_hash_shader(pipeline, layout, tcs_module, tcs_entrypoint, + MESA_SHADER_TESS_CTRL, tcs_spec_info, + &tcs_key, sizeof(tcs_key), tcs_sha1); + anv_pipeline_hash_shader(pipeline, layout, tes_module, tes_entrypoint, + MESA_SHADER_TESS_EVAL, tes_spec_info, + &tes_key, sizeof(tes_key), tes_sha1); + memcpy(&tcs_sha1[20], tes_sha1, 20); + memcpy(&tes_sha1[20], tcs_sha1, 20); + tcs_bin = anv_pipeline_cache_search(cache, tcs_sha1, sizeof(tcs_sha1)); + tes_bin = anv_pipeline_cache_search(cache, tes_sha1, sizeof(tes_sha1)); + } + + if (tcs_bin == NULL || tes_bin == NULL) { + struct brw_tcs_prog_data tcs_prog_data = {}; + struct brw_tes_prog_data tes_prog_data = {}; + struct anv_pipeline_binding tcs_surface_to_descriptor[256]; + struct anv_pipeline_binding tcs_sampler_to_descriptor[256]; + struct anv_pipeline_binding tes_surface_to_descriptor[256]; + struct anv_pipeline_binding tes_sampler_to_descriptor[256]; + + struct anv_pipeline_bind_map tcs_map = { + .surface_to_descriptor = tcs_surface_to_descriptor, + .sampler_to_descriptor = tcs_sampler_to_descriptor + }; + struct anv_pipeline_bind_map tes_map = { + .surface_to_descriptor = tes_surface_to_descriptor, + .sampler_to_descriptor = tes_sampler_to_descriptor + }; + + void *mem_ctx = ralloc_context(NULL); + + nir_shader *tcs_nir = + anv_pipeline_compile(pipeline, mem_ctx, layout, + tcs_module, tcs_entrypoint, + MESA_SHADER_TESS_CTRL, tcs_spec_info, + &tcs_prog_data.base.base, &tcs_map); + nir_shader *tes_nir = + anv_pipeline_compile(pipeline, mem_ctx, layout, + tes_module, tes_entrypoint, + MESA_SHADER_TESS_EVAL, tes_spec_info, + &tes_prog_data.base.base, &tes_map); + if (tcs_nir == NULL || tes_nir == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + nir_lower_tes_patch_vertices(tes_nir, + tcs_nir->info.tess.tcs_vertices_out); + + /* Copy TCS info into the TES info */ + merge_tess_info(&tes_nir->info, &tcs_nir->info); + + anv_fill_binding_table(&tcs_prog_data.base.base, 0); + anv_fill_binding_table(&tes_prog_data.base.base, 0); + + /* Whacking the key after cache lookup is a bit sketchy, but all of + * this comes from the SPIR-V, which is part of the hash used for the + * pipeline cache. So it should be safe. + */ + tcs_key.tes_primitive_mode = tes_nir->info.tess.primitive_mode; + tcs_key.outputs_written = tcs_nir->info.outputs_written; + tcs_key.patch_outputs_written = tcs_nir->info.patch_outputs_written; + tcs_key.quads_workaround = + devinfo->gen < 9 && + tes_nir->info.tess.primitive_mode == 7 /* GL_QUADS */ && + tes_nir->info.tess.spacing == TESS_SPACING_EQUAL; + + tes_key.inputs_read = tcs_key.outputs_written; + tes_key.patch_inputs_read = tcs_key.patch_outputs_written; + + const int shader_time_index = -1; + const unsigned *shader_code; + + shader_code = + brw_compile_tcs(compiler, NULL, mem_ctx, &tcs_key, &tcs_prog_data, + tcs_nir, shader_time_index, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + unsigned code_size = tcs_prog_data.base.base.program_size; + tcs_bin = anv_pipeline_upload_kernel(pipeline, cache, + tcs_sha1, sizeof(tcs_sha1), + shader_code, code_size, + &tcs_prog_data.base.base, + sizeof(tcs_prog_data), + &tcs_map); + if (!tcs_bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + shader_code = + brw_compile_tes(compiler, NULL, mem_ctx, &tes_key, + &tcs_prog_data.base.vue_map, &tes_prog_data, tes_nir, + NULL, shader_time_index, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + code_size = tes_prog_data.base.base.program_size; + tes_bin = anv_pipeline_upload_kernel(pipeline, cache, + tes_sha1, sizeof(tes_sha1), + shader_code, code_size, + &tes_prog_data.base.base, + sizeof(tes_prog_data), + &tes_map); + if (!tes_bin) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + ralloc_free(mem_ctx); + } + + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_TESS_CTRL, tcs_bin); + anv_pipeline_add_compiled_stage(pipeline, MESA_SHADER_TESS_EVAL, tes_bin); + + return VK_SUCCESS; +} + static VkResult anv_pipeline_compile_gs(struct anv_pipeline *pipeline, struct anv_pipeline_cache *cache, @@ -512,56 +777,59 @@ anv_pipeline_compile_gs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct anv_pipeline_bind_map map; struct brw_gs_prog_key key; struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_gs_prog_key(&pipeline->device->info, &key); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); + if (cache) { - anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, - pipeline->layout, spec_info); + anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, + MESA_SHADER_GEOMETRY, spec_info, + &key, sizeof(key), sha1); bin = anv_pipeline_cache_search(cache, sha1, 20); } if (bin == NULL) { - struct brw_gs_prog_data prog_data = { 0, }; + struct brw_gs_prog_data prog_data = {}; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; - map = (struct anv_pipeline_bind_map) { + struct anv_pipeline_bind_map map = { .surface_to_descriptor = surface_to_descriptor, .sampler_to_descriptor = sampler_to_descriptor }; - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + void *mem_ctx = ralloc_context(NULL); + + nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx, layout, + module, entrypoint, MESA_SHADER_GEOMETRY, spec_info, &prog_data.base.base, &map); - if (nir == NULL) + if (nir == NULL) { + ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } anv_fill_binding_table(&prog_data.base.base, 0); - void *mem_ctx = ralloc_context(NULL); - - ralloc_steal(mem_ctx, nir); - brw_compute_vue_map(&pipeline->device->info, &prog_data.base.vue_map, - nir->info->outputs_written, - nir->info->separate_shader); + nir->info.outputs_written, + nir->info.separate_shader); - unsigned code_size; const unsigned *shader_code = brw_compile_gs(compiler, NULL, mem_ctx, &key, &prog_data, nir, - NULL, -1, &code_size, NULL); + NULL, -1, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } /* TODO: SIMD8 GS */ + const unsigned code_size = prog_data.base.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, &prog_data.base.base, sizeof(prog_data), @@ -589,43 +857,88 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct anv_pipeline_bind_map map; struct brw_wm_prog_key key; struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; - populate_wm_prog_key(&pipeline->device->info, info, &key); + populate_wm_prog_key(pipeline, info, &key); + + ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); if (cache) { - anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, - pipeline->layout, spec_info); + anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, + MESA_SHADER_FRAGMENT, spec_info, + &key, sizeof(key), sha1); bin = anv_pipeline_cache_search(cache, sha1, 20); } if (bin == NULL) { - struct brw_wm_prog_data prog_data = { 0, }; + struct brw_wm_prog_data prog_data = {}; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; - map = (struct anv_pipeline_bind_map) { + struct anv_pipeline_bind_map map = { .surface_to_descriptor = surface_to_descriptor + 8, .sampler_to_descriptor = sampler_to_descriptor }; - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + void *mem_ctx = ralloc_context(NULL); + + nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx, layout, + module, entrypoint, MESA_SHADER_FRAGMENT, spec_info, &prog_data.base, &map); - if (nir == NULL) + if (nir == NULL) { + ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } unsigned num_rts = 0; - struct anv_pipeline_binding rt_bindings[8]; + const int max_rt = FRAG_RESULT_DATA7 - FRAG_RESULT_DATA0 + 1; + struct anv_pipeline_binding rt_bindings[max_rt]; nir_function_impl *impl = nir_shader_get_entrypoint(nir); + int rt_to_bindings[max_rt]; + memset(rt_to_bindings, -1, sizeof(rt_to_bindings)); + bool rt_used[max_rt]; + memset(rt_used, 0, sizeof(rt_used)); + + /* Flag used render targets */ + nir_foreach_variable_safe(var, &nir->outputs) { + if (var->data.location < FRAG_RESULT_DATA0) + continue; + + const unsigned rt = var->data.location - FRAG_RESULT_DATA0; + /* Out-of-bounds */ + if (rt >= key.nr_color_regions) + continue; + + const unsigned array_len = + glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; + assert(rt + array_len <= max_rt); + + for (unsigned i = 0; i < array_len; i++) + rt_used[rt + i] = true; + } + + /* Set new, compacted, location */ + for (unsigned i = 0; i < max_rt; i++) { + if (!rt_used[i]) + continue; + + rt_to_bindings[i] = num_rts; + rt_bindings[rt_to_bindings[i]] = (struct anv_pipeline_binding) { + .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, + .binding = 0, + .index = i, + }; + num_rts++; + } + nir_foreach_variable_safe(var, &nir->outputs) { if (var->data.location < FRAG_RESULT_DATA0) continue; - unsigned rt = var->data.location - FRAG_RESULT_DATA0; + const unsigned rt = var->data.location - FRAG_RESULT_DATA0; if (rt >= key.nr_color_regions) { /* Out-of-bounds, throw it away */ var->data.mode = nir_var_local; @@ -634,22 +947,9 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, continue; } - /* Give it a new, compacted, location */ - var->data.location = FRAG_RESULT_DATA0 + num_rts; - - unsigned array_len = - glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1; - assert(num_rts + array_len <= 8); - - for (unsigned i = 0; i < array_len; i++) { - rt_bindings[num_rts + i] = (struct anv_pipeline_binding) { - .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, - .binding = 0, - .index = rt + i, - }; - } - - num_rts += array_len; + /* Give it the new location */ + assert(rt_to_bindings[rt] != -1); + var->data.location = rt_to_bindings[rt] + FRAG_RESULT_DATA0; } if (num_rts == 0) { @@ -657,12 +957,12 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, rt_bindings[0] = (struct anv_pipeline_binding) { .set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS, .binding = 0, - .index = UINT8_MAX, + .index = UINT32_MAX, }; num_rts = 1; } - assert(num_rts <= 8); + assert(num_rts <= max_rt); map.surface_to_descriptor -= num_rts; map.surface_count += num_rts; assert(map.surface_count <= 256); @@ -671,19 +971,15 @@ anv_pipeline_compile_fs(struct anv_pipeline *pipeline, anv_fill_binding_table(&prog_data.base, num_rts); - void *mem_ctx = ralloc_context(NULL); - - ralloc_steal(mem_ctx, nir); - - unsigned code_size; const unsigned *shader_code = brw_compile_fs(compiler, NULL, mem_ctx, &key, &prog_data, nir, - NULL, -1, -1, true, false, NULL, &code_size, NULL); + NULL, -1, -1, true, false, NULL, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + unsigned code_size = prog_data.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, &prog_data.base, sizeof(prog_data), @@ -711,50 +1007,55 @@ anv_pipeline_compile_cs(struct anv_pipeline *pipeline, { const struct brw_compiler *compiler = pipeline->device->instance->physicalDevice.compiler; - struct anv_pipeline_bind_map map; struct brw_cs_prog_key key; struct anv_shader_bin *bin = NULL; unsigned char sha1[20]; populate_cs_prog_key(&pipeline->device->info, &key); + ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout); + if (cache) { - anv_hash_shader(sha1, &key, sizeof(key), module, entrypoint, - pipeline->layout, spec_info); + anv_pipeline_hash_shader(pipeline, layout, module, entrypoint, + MESA_SHADER_COMPUTE, spec_info, + &key, sizeof(key), sha1); bin = anv_pipeline_cache_search(cache, sha1, 20); } if (bin == NULL) { - struct brw_cs_prog_data prog_data = { 0, }; + struct brw_cs_prog_data prog_data = {}; struct anv_pipeline_binding surface_to_descriptor[256]; struct anv_pipeline_binding sampler_to_descriptor[256]; - map = (struct anv_pipeline_bind_map) { + struct anv_pipeline_bind_map map = { .surface_to_descriptor = surface_to_descriptor, .sampler_to_descriptor = sampler_to_descriptor }; - nir_shader *nir = anv_pipeline_compile(pipeline, module, entrypoint, + void *mem_ctx = ralloc_context(NULL); + + nir_shader *nir = anv_pipeline_compile(pipeline, mem_ctx, layout, + module, entrypoint, MESA_SHADER_COMPUTE, spec_info, &prog_data.base, &map); - if (nir == NULL) + if (nir == NULL) { + ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } - anv_fill_binding_table(&prog_data.base, 1); + NIR_PASS_V(nir, anv_nir_add_base_work_group_id, &prog_data); - void *mem_ctx = ralloc_context(NULL); - - ralloc_steal(mem_ctx, nir); + anv_fill_binding_table(&prog_data.base, 1); - unsigned code_size; const unsigned *shader_code = brw_compile_cs(compiler, NULL, mem_ctx, &key, &prog_data, nir, - -1, &code_size, NULL); + -1, NULL); if (shader_code == NULL) { ralloc_free(mem_ctx); return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); } + const unsigned code_size = prog_data.base.program_size; bin = anv_pipeline_upload_kernel(pipeline, cache, sha1, 20, shader_code, code_size, &prog_data.base, sizeof(prog_data), @@ -790,8 +1091,7 @@ copy_non_dynamic_state(struct anv_pipeline *pipeline, const VkGraphicsPipelineCreateInfo *pCreateInfo) { anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; - ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); - struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + struct anv_subpass *subpass = pipeline->subpass; pipeline->dynamic_state = default_dynamic_state; @@ -850,7 +1150,7 @@ copy_non_dynamic_state(struct anv_pipeline *pipeline, */ bool uses_color_att = false; for (unsigned i = 0; i < subpass->color_count; ++i) { - if (subpass->color_attachments[i] != VK_ATTACHMENT_UNUSED) { + if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) { uses_color_att = true; break; } @@ -878,7 +1178,7 @@ copy_non_dynamic_state(struct anv_pipeline *pipeline, * against does not use a depth/stencil attachment. */ if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable && - subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) { assert(pCreateInfo->pDepthStencilState); if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { @@ -916,6 +1216,7 @@ copy_non_dynamic_state(struct anv_pipeline *pipeline, static void anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) { +#ifdef DEBUG struct anv_render_pass *renderpass = NULL; struct anv_subpass *subpass = NULL; @@ -938,7 +1239,7 @@ anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) assert(info->pViewportState); assert(info->pMultisampleState); - if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) + if (subpass && subpass->depth_stencil_attachment.attachment != VK_ATTACHMENT_UNUSED) assert(info->pDepthStencilState); if (subpass && subpass->color_count > 0) @@ -955,6 +1256,7 @@ anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) break; } } +#endif } /** @@ -986,15 +1288,16 @@ anv_pipeline_init(struct anv_pipeline *pipeline, { VkResult result; - anv_validate { - anv_pipeline_validate_create_info(pCreateInfo); - } + anv_pipeline_validate_create_info(pCreateInfo); if (alloc == NULL) alloc = &device->alloc; pipeline->device = device; - pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass); + assert(pCreateInfo->subpass < render_pass->subpass_count); + pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass]; result = anv_reloc_list_init(&pipeline->batch_relocs, alloc); if (result != VK_SUCCESS) @@ -1004,11 +1307,15 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); pipeline->batch.relocs = &pipeline->batch_relocs; + pipeline->batch.status = VK_SUCCESS; copy_non_dynamic_state(pipeline, pCreateInfo); pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState && pCreateInfo->pRasterizationState->depthClampEnable; + pipeline->sample_shading_enable = pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->sampleShadingEnable; + pipeline->needs_data_cache = false; /* When we free the pipeline, we detect stages based on the NULL status @@ -1018,8 +1325,8 @@ anv_pipeline_init(struct anv_pipeline *pipeline, pipeline->active_stages = 0; - const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = { 0, }; - struct anv_shader_module *modules[MESA_SHADER_STAGES] = { 0, }; + const VkPipelineShaderStageCreateInfo *pStages[MESA_SHADER_STAGES] = {}; + struct anv_shader_module *modules[MESA_SHADER_STAGES] = {}; for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { gl_shader_stage stage = ffs(pCreateInfo->pStages[i].stage) - 1; pStages[stage] = &pCreateInfo->pStages[i]; @@ -1035,8 +1342,15 @@ anv_pipeline_init(struct anv_pipeline *pipeline, goto compile_fail; } - if (modules[MESA_SHADER_TESS_CTRL] || modules[MESA_SHADER_TESS_EVAL]) - anv_finishme("no tessellation support"); + if (modules[MESA_SHADER_TESS_EVAL]) { + anv_pipeline_compile_tcs_tes(pipeline, cache, pCreateInfo, + modules[MESA_SHADER_TESS_CTRL], + pStages[MESA_SHADER_TESS_CTRL]->pName, + pStages[MESA_SHADER_TESS_CTRL]->pSpecializationInfo, + modules[MESA_SHADER_TESS_EVAL], + pStages[MESA_SHADER_TESS_EVAL]->pName, + pStages[MESA_SHADER_TESS_EVAL]->pSpecializationInfo); + } if (modules[MESA_SHADER_GEOMETRY]) { result = anv_pipeline_compile_gs(pipeline, cache, pCreateInfo, @@ -1070,7 +1384,7 @@ anv_pipeline_init(struct anv_pipeline *pipeline, const VkVertexInputAttributeDescription *desc = &vi_info->pVertexAttributeDescriptions[i]; - if (inputs_read & (1 << (VERT_ATTRIB_GENERIC0 + desc->location))) + if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location))) pipeline->vb_used |= 1 << desc->binding; } @@ -1096,8 +1410,14 @@ anv_pipeline_init(struct anv_pipeline *pipeline, const VkPipelineInputAssemblyStateCreateInfo *ia_info = pCreateInfo->pInputAssemblyState; + const VkPipelineTessellationStateCreateInfo *tess_info = + pCreateInfo->pTessellationState; pipeline->primitive_restart = ia_info->primitiveRestartEnable; - pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; + + if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL)) + pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints); + else + pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; return VK_SUCCESS;