From: Kristian Høgsberg Kristensen Date: Wed, 30 Sep 2015 00:10:50 +0000 (-0700) Subject: Merge branch 'master' of ../mesa into vulkan X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=005c8e01062e8e88a86904b955d5422742bd32e7;p=mesa.git Merge branch 'master' of ../mesa into vulkan --- 005c8e01062e8e88a86904b955d5422742bd32e7 diff --cc src/glsl/Makefile.sources index acd13ef7745,32b6dba2e91..0cd3d284756 --- a/src/glsl/Makefile.sources +++ b/src/glsl/Makefile.sources @@@ -74,10 -79,7 +80,9 @@@ NIR_FILES = nir/nir_vla.h \ nir/nir_worklist.c \ nir/nir_worklist.h \ - nir/nir_types.cpp + nir/nir_types.cpp \ + nir/spirv_to_nir.c \ - nir/spirv_glsl450_to_nir.c \ - $(NIR_GENERATED_FILES) ++ nir/spirv_glsl450_to_nir.c # libglsl diff --cc src/glsl/glsl_types.h index 0a8a0b97dc9,3ec764219de..b83e1ca3d2c --- a/src/glsl/glsl_types.h +++ b/src/glsl/glsl_types.h @@@ -801,7 -829,17 +848,17 @@@ struct glsl_struct_field */ int stream; - + /** + * Image qualifiers, applicable to buffer variables defined in shader + * storage buffer objects (SSBOs) + */ + unsigned image_read_only:1; + unsigned image_write_only:1; + unsigned image_coherent:1; + unsigned image_volatile:1; + unsigned image_restrict:1; + +#ifdef __cplusplus glsl_struct_field(const struct glsl_type *_type, const char *_name) : type(_type), name(_name), location(-1), interpolation(0), centroid(0), sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), diff --cc src/glsl/nir/nir.h index 7b188fa8ed1,d0c7b04a49f..19a4a361a25 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@@ -1815,12 -1811,12 +1848,14 @@@ bool nir_split_var_copies(nir_shader *s void nir_lower_var_copy_instr(nir_intrinsic_instr *copy, void *mem_ctx); void nir_lower_var_copies(nir_shader *shader); - void nir_lower_global_vars_to_local(nir_shader *shader); + bool nir_lower_global_vars_to_local(nir_shader *shader); - void nir_lower_locals_to_regs(nir_shader *shader); + bool nir_lower_locals_to_regs(nir_shader *shader); + + void nir_lower_outputs_to_temporaries(nir_shader *shader); +void nir_lower_outputs_to_temporaries(nir_shader *shader); + void nir_assign_var_locations(struct exec_list *var_list, unsigned *size, int (*type_size)(const struct glsl_type *)); @@@ -1839,12 -1836,52 +1875,53 @@@ void nir_lower_phis_to_scalar(nir_shade void nir_lower_samplers(nir_shader *shader, const struct gl_shader_program *shader_program); +void nir_lower_samplers_for_vk(nir_shader *shader); - void nir_lower_system_values(nir_shader *shader); - void nir_lower_tex_projector(nir_shader *shader); + bool nir_lower_system_values(nir_shader *shader); + + typedef struct nir_lower_tex_options { + /** + * bitmask of (1 << GLSL_SAMPLER_DIM_x) to control for which + * sampler types a texture projector is lowered. + */ + unsigned lower_txp; + + /** + * If true, lower rect textures to 2D, using txs to fetch the + * texture dimensions and dividing the texture coords by the + * texture dims to normalize. + */ + bool lower_rect; + + /** + * To emulate certain texture wrap modes, this can be used + * to saturate the specified tex coord to [0.0, 1.0]. The + * bits are according to sampler #, ie. if, for example: + * + * (conf->saturate_s & (1 << n)) + * + * is true, then the s coord for sampler n is saturated. + * + * Note that clamping must happen *after* projector lowering + * so any projected texture sample instruction with a clamped + * coordinate gets automatically lowered, regardless of the + * 'lower_txp' setting. + */ + unsigned saturate_s; + unsigned saturate_t; + unsigned saturate_r; + } nir_lower_tex_options; + + void nir_lower_tex(nir_shader *shader, + const nir_lower_tex_options *options); + void nir_lower_idiv(nir_shader *shader); + void nir_lower_clip_vs(nir_shader *shader, unsigned ucp_enables); + void nir_lower_clip_fs(nir_shader *shader, unsigned ucp_enables); + + void nir_lower_two_sided_color(nir_shader *shader); + void nir_lower_atomics(nir_shader *shader); void nir_lower_to_source_mods(nir_shader *shader); diff --cc src/glsl/nir/nir_builder.h index 295a209b4e6,624329d0a8a..bf45d0373c0 --- a/src/glsl/nir/nir_builder.h +++ b/src/glsl/nir/nir_builder.h @@@ -216,23 -249,13 +249,30 @@@ nir_swizzle(nir_builder *build, nir_ssa nir_imov_alu(build, alu_src, num_components); } +/* Selects the right fdot given the number of components in each source. */ +static inline nir_ssa_def * +nir_fdot(nir_builder *build, nir_ssa_def *src0, nir_ssa_def *src1) +{ + assert(src0->num_components == src1->num_components); + switch (src0->num_components) { + case 1: return nir_fmul(build, src0, src1); + case 2: return nir_fdot2(build, src0, src1); + case 3: return nir_fdot3(build, src0, src1); + case 4: return nir_fdot4(build, src0, src1); + default: + unreachable("bad component size"); + } + + return NULL; +} + + static inline nir_ssa_def * + nir_channel(nir_builder *b, nir_ssa_def *def, unsigned c) + { + unsigned swizzle[4] = {c, c, c, c}; + return nir_swizzle(b, def, swizzle, 1, false); + } + /** * Turns a nir_src into a nir_ssa_def * so it can be passed to * nir_build_alu()-based builder calls. diff --cc src/glsl/nir/nir_intrinsics.h index 1f24f9f677d,649312fec51..e02779e157b --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@@ -166,9 -225,9 +226,9 @@@ SYSTEM_VALUE(num_work_groups, 3, 0 true, 0, 0, indices, flags) LOAD(uniform, 0, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) -LOAD(ubo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) +LOAD(ubo, 1, 2, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) LOAD(input, 0, 1, NIR_INTRINSIC_CAN_ELIMINATE | NIR_INTRINSIC_CAN_REORDER) - /* LOAD(ssbo, 1, 0) */ + LOAD(ssbo, 1, 1, NIR_INTRINSIC_CAN_ELIMINATE) /* * Stores work the same way as loads, except now the first register input is diff --cc src/glsl/nir/nir_lower_samplers.c index 00000000000,58ea0db4e0f..33cd9c8b0cf mode 000000,100644..100644 --- a/src/glsl/nir/nir_lower_samplers.c +++ b/src/glsl/nir/nir_lower_samplers.c @@@ -1,0 -1,188 +1,258 @@@ + /* + * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. + * Copyright (C) 2008 VMware, Inc. All Rights Reserved. + * Copyright © 2014 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + + #include "nir.h" + #include "nir_builder.h" + #include "../program.h" + #include "program/hash_table.h" + #include "ir_uniform.h" + + #include "main/compiler.h" + #include "main/mtypes.h" + #include "program/prog_parameter.h" + #include "program/program.h" + ++static void ++add_indirect_to_tex(nir_tex_instr *instr, nir_src indirect) ++{ ++ /* First, we have to resize the array of texture sources */ ++ nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, ++ instr->num_srcs + 1); ++ ++ for (unsigned i = 0; i < instr->num_srcs; i++) { ++ new_srcs[i].src_type = instr->src[i].src_type; ++ nir_instr_move_src(&instr->instr, &new_srcs[i].src, &instr->src[i].src); ++ } ++ ++ ralloc_free(instr->src); ++ instr->src = new_srcs; ++ ++ /* Now we can go ahead and move the source over to being a ++ * first-class texture source. ++ */ ++ instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; ++ instr->num_srcs++; ++ nir_instr_rewrite_src(&instr->instr, &instr->src[instr->num_srcs - 1].src, ++ indirect); ++} ++ + /* Calculate the sampler index based on array indicies and also + * calculate the base uniform location for struct members. + */ + static void + calc_sampler_offsets(nir_deref *tail, nir_tex_instr *instr, + unsigned *array_elements, nir_ssa_def **indirect, + nir_builder *b, unsigned *location) + { + if (tail->child == NULL) + return; + + switch (tail->child->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(tail->child); + + assert(deref_array->deref_array_type != nir_deref_array_type_wildcard); + + calc_sampler_offsets(tail->child, instr, array_elements, + indirect, b, location); + instr->sampler_index += deref_array->base_offset * *array_elements; + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) { + nir_ssa_def *mul = + nir_imul(b, nir_imm_int(b, *array_elements), + nir_ssa_for_src(b, deref_array->indirect, 1)); + + nir_instr_rewrite_src(&instr->instr, &deref_array->indirect, + NIR_SRC_INIT); + + if (*indirect) { + *indirect = nir_iadd(b, *indirect, mul); + } else { + *indirect = mul; + } + } + + *array_elements *= glsl_get_length(tail->type); + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(tail->child); + *location += glsl_get_record_location_offset(tail->type, deref_struct->index); + calc_sampler_offsets(tail->child, instr, array_elements, + indirect, b, location); + break; + } + + default: + unreachable("Invalid deref type"); + break; + } + } + + static void + lower_sampler(nir_tex_instr *instr, const struct gl_shader_program *shader_program, + gl_shader_stage stage, nir_builder *builder) + { + if (instr->sampler == NULL) + return; + + instr->sampler_index = 0; + unsigned location = instr->sampler->var->data.location; + unsigned array_elements = 1; + nir_ssa_def *indirect = NULL; + + builder->cursor = nir_before_instr(&instr->instr); + calc_sampler_offsets(&instr->sampler->deref, instr, &array_elements, + &indirect, builder, &location); + + if (indirect) { + /* First, we have to resize the array of texture sources */ + nir_tex_src *new_srcs = rzalloc_array(instr, nir_tex_src, + instr->num_srcs + 1); + + for (unsigned i = 0; i < instr->num_srcs; i++) { + new_srcs[i].src_type = instr->src[i].src_type; + nir_instr_move_src(&instr->instr, &new_srcs[i].src, + &instr->src[i].src); + } + + ralloc_free(instr->src); + instr->src = new_srcs; + + /* Now we can go ahead and move the source over to being a + * first-class texture source. + */ + instr->src[instr->num_srcs].src_type = nir_tex_src_sampler_offset; + instr->num_srcs++; + nir_instr_rewrite_src(&instr->instr, + &instr->src[instr->num_srcs - 1].src, + nir_src_for_ssa(indirect)); + + instr->sampler_array_size = array_elements; + } + + if (location > shader_program->NumUniformStorage - 1 || + !shader_program->UniformStorage[location].sampler[stage].active) { + assert(!"cannot return a sampler"); + return; + } + + instr->sampler_index += + shader_program->UniformStorage[location].sampler[stage].index; + + instr->sampler = NULL; + } + + typedef struct { + nir_builder builder; + const struct gl_shader_program *shader_program; + gl_shader_stage stage; + } lower_state; + + static bool + lower_block_cb(nir_block *block, void *_state) + { + lower_state *state = (lower_state *) _state; + + nir_foreach_instr(block, instr) { + if (instr->type == nir_instr_type_tex) { + nir_tex_instr *tex_instr = nir_instr_as_tex(instr); + lower_sampler(tex_instr, state->shader_program, state->stage, + &state->builder); + } + } + + return true; + } + + static void + lower_impl(nir_function_impl *impl, const struct gl_shader_program *shader_program, + gl_shader_stage stage) + { + lower_state state; + + nir_builder_init(&state.builder, impl); + state.shader_program = shader_program; + state.stage = stage; + + nir_foreach_block(impl, lower_block_cb, &state); + } + + void + nir_lower_samplers(nir_shader *shader, + const struct gl_shader_program *shader_program) + { + nir_foreach_overload(shader, overload) { + if (overload->impl) + lower_impl(overload->impl, shader_program, shader->stage); + } + } ++ ++static bool ++lower_samplers_for_vk_block(nir_block *block, void *data) ++{ ++ nir_foreach_instr(block, instr) { ++ if (instr->type != nir_instr_type_tex) ++ continue; ++ ++ nir_tex_instr *tex = nir_instr_as_tex(instr); ++ ++ assert(tex->sampler); ++ ++ tex->sampler_set = tex->sampler->var->data.descriptor_set; ++ tex->sampler_index = tex->sampler->var->data.binding; ++ ++ if (tex->sampler->deref.child) { ++ assert(tex->sampler->deref.child->deref_type == nir_deref_type_array); ++ nir_deref_array *arr = nir_deref_as_array(tex->sampler->deref.child); ++ ++ /* Only one-level arrays are allowed in vulkan */ ++ assert(arr->deref.child == NULL); ++ ++ tex->sampler_index += arr->base_offset; ++ if (arr->deref_array_type == nir_deref_array_type_indirect) { ++ add_indirect_to_tex(tex, arr->indirect); ++ nir_instr_rewrite_src(instr, &arr->indirect, NIR_SRC_INIT); ++ ++ tex->sampler_array_size = glsl_get_length(tex->sampler->deref.type); ++ } ++ } ++ ++ tex->sampler = NULL; ++ } ++ ++ return true; ++} ++ ++void ++nir_lower_samplers_for_vk(nir_shader *shader) ++{ ++ nir_foreach_overload(shader, overload) { ++ if (overload->impl) { ++ nir_foreach_block(overload->impl, lower_samplers_for_vk_block, NULL); ++ } ++ } ++} diff --cc src/glsl/nir/nir_split_var_copies.c index 5c163b59819,d2ea58a8b7c..f583178caf4 --- a/src/glsl/nir/nir_split_var_copies.c +++ b/src/glsl/nir/nir_split_var_copies.c @@@ -64,8 -64,17 +64,9 @@@ struct split_var_copies_state { void *mem_ctx; void *dead_ctx; + bool progress; }; -static nir_deref * -get_deref_tail(nir_deref *deref) -{ - while (deref->child != NULL) - deref = deref->child; - return deref; -} - /* Recursively constructs deref chains to split a copy instruction into * multiple (if needed) copy instructions with full-length deref chains. * External callers of this function should pass the tail and head of the diff --cc src/glsl/nir/nir_types.cpp index 69cfac18587,da9807f0e62..01f0e9b5abc --- a/src/glsl/nir/nir_types.cpp +++ b/src/glsl/nir/nir_types.cpp @@@ -124,20 -112,13 +124,27 @@@ glsl_get_struct_elem_name(const struct return type->fields.structure[index].name; } +glsl_sampler_dim +glsl_get_sampler_dim(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return (glsl_sampler_dim)type->sampler_dimensionality; +} + +glsl_base_type +glsl_get_sampler_result_type(const struct glsl_type *type) +{ + assert(glsl_type_is_sampler(type)); + return (glsl_base_type)type->sampler_type; +} + + unsigned + glsl_get_record_location_offset(const struct glsl_type *type, + unsigned length) + { + return type->record_location_offset(length); + } + bool glsl_type_is_void(const glsl_type *type) { diff --cc src/glsl/nir/nir_types.h index a2fa7934e16,49d6a65e7c4..1a0cb1fb774 --- a/src/glsl/nir/nir_types.h +++ b/src/glsl/nir/nir_types.h @@@ -68,9 -62,9 +68,12 @@@ unsigned glsl_get_length(const struct g const char *glsl_get_struct_elem_name(const struct glsl_type *type, unsigned index); +enum glsl_sampler_dim glsl_get_sampler_dim(const struct glsl_type *type); +enum glsl_base_type glsl_get_sampler_result_type(const struct glsl_type *type); + + unsigned glsl_get_record_location_offset(const struct glsl_type *type, + unsigned length); + bool glsl_type_is_void(const struct glsl_type *type); bool glsl_type_is_vector(const struct glsl_type *type); bool glsl_type_is_scalar(const struct glsl_type *type); diff --cc src/mesa/drivers/dri/i965/brw_defines.h index f9dcdc735b3,393f17ac98c..a8cde20e045 --- a/src/mesa/drivers/dri/i965/brw_defines.h +++ b/src/mesa/drivers/dri/i965/brw_defines.h @@@ -78,8 -77,7 +78,8 @@@ #define _3DPRIM_LINESTRIP_CONT 0x12 #define _3DPRIM_LINESTRIP_BF 0x13 #define _3DPRIM_LINESTRIP_CONT_BF 0x14 - #define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + #define _3DPRIM_TRIFAN_NOSTIPPLE 0x16 +#endif /* We use this offset to be able to pass native primitive types in struct * _mesa_prim::mode. Native primitive types are BRW_PRIM_OFFSET + diff --cc src/mesa/drivers/dri/i965/brw_fs.cpp index 76530a476d6,64215ae5a6a..e620301fde7 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@@ -5011,8 -5032,9 +5053,10 @@@ fs_visitor::run_fs(bool do_rep_send assert(stage == MESA_SHADER_FRAGMENT); + sanity_param_count = prog->Parameters->NumParameters; + - assign_fs_binding_table_offsets(); + if (prog_data->map_entries == NULL) - assign_binding_table_offsets(); ++ assign_fs_binding_table_offsets(); if (devinfo->gen >= 6) setup_payload_gen6(); diff --cc src/mesa/drivers/dri/i965/brw_nir.c index 4c8602a1085,1d4f6ab2ccd..35855092dff --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@@ -90,26 -96,10 +92,34 @@@ brw_create_nir(struct brw_context *brw } nir_validate_shader(nir); + brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage, is_scalar); + + static GLuint msg_id = 0; + _mesa_gl_debug(&brw->ctx, &msg_id, + MESA_DEBUG_SOURCE_SHADER_COMPILER, + MESA_DEBUG_TYPE_OTHER, + MESA_DEBUG_SEVERITY_NOTIFICATION, + "%s NIR shader:\n", + _mesa_shader_stage_to_abbrev(stage)); + + return nir; +} + +void +brw_process_nir(nir_shader *nir, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + gl_shader_stage stage, bool is_scalar) +{ + bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); ++ static const nir_lower_tex_options tex_options = { ++ .lower_txp = ~0, ++ }; ++ + if (stage == MESA_SHADER_GEOMETRY) { + nir_lower_gs_intrinsics(nir); + nir_validate_shader(nir); + } nir_lower_global_vars_to_local(nir); nir_validate_shader(nir); diff --cc src/mesa/drivers/dri/i965/brw_shader.cpp index 4ef2777559f,785cb2722fe..1060d93ae6b --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@@ -96,8 -96,15 +96,17 @@@ brw_compiler_create(void *mem_ctx, cons */ nir_options->lower_ffma = true; nir_options->lower_sub = true; + nir_options->lower_fdiv = true; + + /* In the vec4 backend, our dpN instruction replicates its result to all + * the components of a vec4. We would like NIR to give us replicated fdot + * instructions because it can optimize better for us. + * + * For the FS backend, it should be lowered away by the scalarizing pass so + * we should never see fdot anyway. + */ + nir_options->fdot_replicates = true; + /* We want the GLSL compiler to emit code that uses condition codes */ for (int i = 0; i < MESA_SHADER_STAGES; i++) { compiler->glsl_compiler_options[i].MaxUnrollIterations = 32; diff --cc src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index d5a24d823b5,94906d2e705..9d56f9a1ae0 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@@ -640,10 -889,10 +891,10 @@@ vec4_visitor::nir_emit_intrinsic(nir_in */ brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.ubo_start + - shader_prog->NumUniformBlocks - 1); + shader_prog->NumBufferInterfaceBlocks - 1); } - unsigned const_offset = instr->const_index[0]; + unsigned const_offset = instr->const_index[1]; src_reg offset; if (!has_indirect) { diff --cc src/vulkan/anv_compiler.cpp index dcd5581f957,00000000000..8acfc068e90 mode 100644,000000..100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@@ -1,1128 -1,0 +1,1120 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#include +#include +#include + +#include "anv_private.h" +#include "anv_nir.h" + +#include +#include /* brw_new_shader_program is here */ +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +/* XXX: We need this to keep symbols in nir.h from conflicting with the + * generated GEN command packing headers. We need to fix *both* to not + * define something as generic as LOAD. + */ +#undef LOAD + +#include + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +static void +fail_if(int cond, const char *format, ...) +{ + va_list args; + + if (!cond) + return; + + va_start(args, format); + vfprintf(stderr, format, args); + va_end(args); + + exit(1); +} + +static VkResult +set_binding_table_layout(struct brw_stage_prog_data *prog_data, + struct anv_pipeline *pipeline, uint32_t stage) +{ + uint32_t bias, count, k, *map; + struct anv_pipeline_layout *layout = pipeline->layout; + + /* No layout is valid for shaders that don't bind any resources. */ + if (pipeline->layout == NULL) + return VK_SUCCESS; + + if (stage == VK_SHADER_STAGE_FRAGMENT) + bias = MAX_RTS; + else + bias = 0; + + count = layout->stage[stage].surface_count; + prog_data->map_entries = + (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0])); + if (prog_data->map_entries == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + k = bias; + map = prog_data->map_entries; + for (uint32_t i = 0; i < layout->num_sets; i++) { + prog_data->bind_map[i].index = map; + for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++) + *map++ = k++; + + prog_data->bind_map[i].index_count = + layout->set[i].layout->stage[stage].surface_count; + } + + return VK_SUCCESS; +} + +static uint32_t +upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size) +{ + struct anv_state state = + anv_state_stream_alloc(&pipeline->program_stream, size, 64); + + assert(size < pipeline->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + return state.offset; +} + +static void +create_params_array(struct anv_pipeline *pipeline, + struct gl_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + VkShaderStage stage = anv_vk_shader_stage_for_mesa_stage(shader->Stage); + unsigned num_params = 0; + + if (shader->num_uniform_components) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + num_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + num_params += MAX_DYNAMIC_BUFFERS; + + if (num_params == 0) + return; + + prog_data->param = (const gl_constant_value **) + anv_device_alloc(pipeline->device, + num_params * sizeof(gl_constant_value *), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < num_params; i++) + prog_data->param[i] = + (const gl_constant_value *)&null_data->client_data[i * sizeof(float)]; +} + +static void +brw_vs_populate_key(struct brw_context *brw, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + /* BRW_NEW_VERTEX_PROGRAM */ + struct gl_program *prog = (struct gl_program *) vp; + + memset(key, 0, sizeof(*key)); + + /* Just upload the program verbatim for now. Always send it all + * the inputs it asks for, whether they are varying or not. + */ - key->base.program_string_id = vp->id; - brw_setup_vue_key_clip_info(brw, &key->base, - vp->program.Base.UsesClipDistanceOut); ++ key->program_string_id = vp->id; + + /* _NEW_POLYGON */ + if (brw->gen < 6) { + key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL || + ctx->Polygon.BackMode != GL_FILL); + } + + if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 | + VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) { + /* _NEW_LIGHT | _NEW_BUFFERS */ + key->clamp_vertex_color = ctx->Light._ClampVertexColor; + } + + /* _NEW_POINT */ + if (brw->gen < 6 && ctx->Point.PointSprite) { + for (int i = 0; i < 8; i++) { + if (ctx->Point.CoordReplace[i]) + key->point_coord_replace |= (1 << i); + } + } + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count, - &key->base.tex); ++ &key->tex); +} + +static bool +really_do_vs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_vertex_program *vp, + struct brw_vs_prog_key *key, struct anv_pipeline *pipeline) +{ + GLuint program_size; + const GLuint *program; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + void *mem_ctx; + struct gl_shader *vs = NULL; + + if (prog) + vs = prog->_LinkedShaders[MESA_SHADER_VERTEX]; + + memset(prog_data, 0, sizeof(*prog_data)); + + mem_ctx = ralloc_context(NULL); + + create_params_array(pipeline, vs, &prog_data->base.base); + anv_nir_apply_dynamic_offsets(pipeline, vs->Program->nir, + &prog_data->base.base); + + GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; + prog_data->inputs_read = vp->program.Base.InputsRead; + + if (key->copy_edgeflag) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE); + prog_data->inputs_read |= VERT_BIT_EDGEFLAG; + } + + if (brw->gen < 6) { + /* Put dummy slots into the VUE for the SF to put the replaced + * point sprite coords in. We shouldn't need these dummy slots, + * which take up precious URB space, but it would mean that the SF + * doesn't get nice aligned pairs of input coords into output + * coords, which would be a pain to handle. + */ + for (int i = 0; i < 8; i++) { + if (key->point_coord_replace & (1 << i)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i); + } + + /* if back colors are written, allocate slots for front colors too */ + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0); + if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1)) + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1); + } + + /* In order for legacy clipping to work, we need to populate the clip + * distance varying slots whenever clipping is enabled, even if the vertex + * shader doesn't write to gl_ClipDistance. + */ - if (key->base.userclip_active) { ++ if (key->nr_userclip_plane_consts) { + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0); + outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1); + } + + brw_compute_vue_map(brw->intelScreen->devinfo, - &prog_data->base.vue_map, outputs_written); - \ ++ &prog_data->base.vue_map, outputs_written, ++ prog ? prog->SeparateShader : false); ++ + set_binding_table_layout(&prog_data->base.base, pipeline, + VK_SHADER_STAGE_VERTEX); + + /* Emit GEN4 code. + */ + program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program, + prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + const uint32_t offset = upload_kernel(pipeline, program, program_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } + + ralloc_free(mem_ctx); + + return true; +} + +void brw_wm_populate_key(struct brw_context *brw, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct gl_program *prog = (struct gl_program *) brw->fragment_program; + GLuint lookup = 0; + GLuint line_aa; + bool program_uses_dfdy = fp->program.UsesDFdy; + struct gl_framebuffer draw_buffer; + bool multisample_fbo; + + memset(key, 0, sizeof(*key)); + + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. */ + key->tex.swizzles[i] = SWIZZLE_XYZW; + } + + /* A non-zero framebuffer name indicates that the framebuffer was created by + * the user rather than the window system. */ + draw_buffer.Name = 1; + draw_buffer.Visual.samples = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer._NumColorDrawBuffers = 1; + draw_buffer.Width = 400; + draw_buffer.Height = 400; + ctx->DrawBuffer = &draw_buffer; + + multisample_fbo = ctx->DrawBuffer->Visual.samples > 1; + + /* Build the index for table lookup + */ + if (brw->gen < 6) { + /* _NEW_COLOR */ + if (fp->program.UsesKill || ctx->Color.AlphaEnabled) + lookup |= IZ_PS_KILL_ALPHATEST_BIT; + + if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + lookup |= IZ_PS_COMPUTES_DEPTH_BIT; + + /* _NEW_DEPTH */ + if (ctx->Depth.Test) + lookup |= IZ_DEPTH_TEST_ENABLE_BIT; + + if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */ + lookup |= IZ_DEPTH_WRITE_ENABLE_BIT; + + /* _NEW_STENCIL | _NEW_BUFFERS */ + if (ctx->Stencil._Enabled) { + lookup |= IZ_STENCIL_TEST_ENABLE_BIT; + + if (ctx->Stencil.WriteMask[0] || + ctx->Stencil.WriteMask[ctx->Stencil._BackFace]) + lookup |= IZ_STENCIL_WRITE_ENABLE_BIT; + } + key->iz_lookup = lookup; + } + + line_aa = AA_NEVER; + + /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */ + if (ctx->Line.SmoothFlag) { + if (brw->reduced_primitive == GL_LINES) { + line_aa = AA_ALWAYS; + } + else if (brw->reduced_primitive == GL_TRIANGLES) { + if (ctx->Polygon.FrontMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if (ctx->Polygon.BackMode == GL_LINE || + (ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_BACK)) + line_aa = AA_ALWAYS; + } + else if (ctx->Polygon.BackMode == GL_LINE) { + line_aa = AA_SOMETIMES; + + if ((ctx->Polygon.CullFlag && + ctx->Polygon.CullFaceMode == GL_FRONT)) + line_aa = AA_ALWAYS; + } + } + } + + key->line_aa = line_aa; + + /* _NEW_HINT */ + key->high_quality_derivatives = + ctx->Hint.FragmentShaderDerivative == GL_NICEST; + + if (brw->gen < 6) + key->stats_wm = brw->stats_wm; + + /* _NEW_LIGHT */ + key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT); + + /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */ + key->clamp_fragment_color = ctx->Color._ClampFragmentColor; + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count, + &key->tex); + + /* _NEW_BUFFERS */ + /* + * Include the draw buffer origin and height so that we can calculate + * fragment position values relative to the bottom left of the drawable, + * from the incoming screen origin relative position we get as part of our + * payload. + * + * This is only needed for the WM_WPOSXY opcode when the fragment program + * uses the gl_FragCoord input. + * + * We could avoid recompiling by including this as a constant referenced by + * our program, but if we were to do that it would also be nice to handle + * getting that constant updated at batchbuffer submit time (when we + * hold the lock and know where the buffer really is) rather than at emit + * time when we don't hold the lock and are just guessing. We could also + * just avoid using this as key data if the program doesn't use + * fragment.position. + * + * For DRI2 the origin_x/y will always be (0,0) but we still need the + * drawable height in order to invert the Y axis. + */ + if (fp->program.Base.InputsRead & VARYING_BIT_POS) { + key->drawable_height = ctx->DrawBuffer->Height; + } + + if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) { + key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer); + } + + /* _NEW_BUFFERS */ + key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers; + + /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */ + key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 && + (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled); + + /* _NEW_BUFFERS _NEW_MULTISAMPLE */ + /* Ignore sample qualifier while computing this flag. */ + key->persample_shading = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1; + if (key->persample_shading) + key->persample_2x = ctx->DrawBuffer->Visual.samples == 2; + + key->compute_pos_offset = + _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 && + fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS; + + key->compute_sample_id = + multisample_fbo && + ctx->Multisample.Enabled && + (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID); + + /* BRW_NEW_VUE_MAP_GEOM_OUT */ + if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead & + BRW_FS_VARYING_INPUT_MASK) > 16) + key->input_slots_valid = brw->vue_map_geom_out.slots_valid; + + + /* _NEW_COLOR | _NEW_BUFFERS */ + /* Pre-gen6, the hardware alpha test always used each render + * target's alpha to do alpha test, as opposed to render target 0's alpha + * like GL requires. Fix that by building the alpha test into the + * shader, and we'll skip enabling the fixed function alpha test. + */ + if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) { + key->alpha_test_func = ctx->Color.AlphaFunc; + key->alpha_test_ref = ctx->Color.AlphaRef; + } + + /* The unique fragment program ID */ + key->program_string_id = fp->id; + + ctx->DrawBuffer = NULL; +} + +static uint8_t +computed_depth_mode(struct gl_fragment_program *fp) +{ + if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + switch (fp->FragDepthLayout) { + case FRAG_DEPTH_LAYOUT_NONE: + case FRAG_DEPTH_LAYOUT_ANY: + return BRW_PSCDEPTH_ON; + case FRAG_DEPTH_LAYOUT_GREATER: + return BRW_PSCDEPTH_ON_GE; + case FRAG_DEPTH_LAYOUT_LESS: + return BRW_PSCDEPTH_ON_LE; + case FRAG_DEPTH_LAYOUT_UNCHANGED: + return BRW_PSCDEPTH_OFF; + } + } + return BRW_PSCDEPTH_OFF; +} + +static bool +really_do_wm_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_fragment_program *fp, + struct brw_wm_prog_key *key, struct anv_pipeline *pipeline) +{ + void *mem_ctx = ralloc_context(NULL); + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct gl_shader *fs = NULL; + unsigned int program_size; + const uint32_t *program; + + if (prog) + fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT]; + + memset(prog_data, 0, sizeof(*prog_data)); + + /* key->alpha_test_func means simulating alpha testing via discards, + * so the shader definitely kills pixels. + */ + prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func; + + prog_data->computed_depth_mode = computed_depth_mode(&fp->program); + + create_params_array(pipeline, fs, &prog_data->base); + anv_nir_apply_dynamic_offsets(pipeline, fs->Program->nir, &prog_data->base); + + prog_data->barycentric_interp_modes = + brw_compute_barycentric_interp_modes(brw, key->flat_shade, + key->persample_shading, + &fp->program); + + set_binding_table_layout(&prog_data->base, pipeline, + VK_SHADER_STAGE_FRAGMENT); + /* This needs to come after shader time and pull constant entries, but we + * don't have those set up now, so just put it after the layout entries. + */ + prog_data->binding_table.render_target_start = 0; + + program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data, + &fp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + uint32_t offset = upload_kernel(pipeline, program, program_size); + + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + ralloc_free(mem_ctx); + + return true; +} + +static void +brw_gs_populate_key(struct brw_context *brw, + struct anv_pipeline *pipeline, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key) +{ + struct gl_context *ctx = &brw->ctx; + struct brw_stage_state *stage_state = &brw->gs.base; + struct gl_program *prog = &gp->program.Base; + + memset(key, 0, sizeof(*key)); + - key->base.program_string_id = gp->id; - brw_setup_vue_key_clip_info(brw, &key->base, - gp->program.Base.UsesClipDistanceOut); ++ key->program_string_id = gp->id; + + /* _NEW_TEXTURE */ + brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count, - &key->base.tex); - - struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; - - /* BRW_NEW_VUE_MAP_VS */ - key->input_varyings = prog_data->base.vue_map.slots_valid; ++ &key->tex); +} + +static bool +really_do_gs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_geometry_program *gp, + struct brw_gs_prog_key *key, struct anv_pipeline *pipeline) +{ + struct brw_gs_compile_output output; + + /* FIXME: We pass the bind map to the compile in the output struct. Need + * something better. */ + set_binding_table_layout(&output.prog_data.base.base, + pipeline, VK_SHADER_STAGE_GEOMETRY); + + brw_compile_gs_prog(brw, prog, gp, key, &output); + + pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size); + pipeline->gs_vertex_count = gp->program.VerticesIn; + + ralloc_free(output.mem_ctx); + + return true; +} + +static bool +brw_codegen_cs_prog(struct brw_context *brw, + struct gl_shader_program *prog, + struct brw_compute_program *cp, + struct brw_cs_prog_key *key, struct anv_pipeline *pipeline) +{ + const GLuint *program; + void *mem_ctx = ralloc_context(NULL); + GLuint program_size; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + + struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE]; + assert (cs); + + memset(prog_data, 0, sizeof(*prog_data)); + + set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE); + + create_params_array(pipeline, cs, &prog_data->base); + anv_nir_apply_dynamic_offsets(pipeline, cs->Program->nir, &prog_data->base); + + program = brw_cs_emit(brw, mem_ctx, key, prog_data, + &cp->program, prog, &program_size); + if (program == NULL) { + ralloc_free(mem_ctx); + return false; + } + + if (unlikely(INTEL_DEBUG & DEBUG_CS)) + fprintf(stderr, "\n"); + + pipeline->cs_simd = upload_kernel(pipeline, program, program_size); + + ralloc_free(mem_ctx); + + return true; +} + +static void +brw_cs_populate_key(struct brw_context *brw, + struct brw_compute_program *bcp, struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + /* The unique compute program ID */ + key->program_string_id = bcp->id; +} + +struct anv_compiler { + struct anv_device *device; + struct intel_screen *screen; + struct brw_context *brw; + struct gl_pipeline_object pipeline; +}; + +extern "C" { + +struct anv_compiler * +anv_compiler_create(struct anv_device *device) +{ + const struct brw_device_info *devinfo = &device->info; + struct anv_compiler *compiler; + struct gl_context *ctx; + + compiler = rzalloc(NULL, struct anv_compiler); + if (compiler == NULL) + return NULL; + + compiler->screen = rzalloc(compiler, struct intel_screen); + if (compiler->screen == NULL) + goto fail; + + compiler->brw = rzalloc(compiler, struct brw_context); + if (compiler->brw == NULL) + goto fail; + + compiler->device = device; + + compiler->brw->gen = devinfo->gen; + compiler->brw->is_g4x = devinfo->is_g4x; + compiler->brw->is_baytrail = devinfo->is_baytrail; + compiler->brw->is_haswell = devinfo->is_haswell; + compiler->brw->is_cherryview = devinfo->is_cherryview; + + /* We need this at least for CS, which will check brw->max_cs_threads + * against the work group size. */ + compiler->brw->max_vs_threads = devinfo->max_vs_threads; + compiler->brw->max_hs_threads = devinfo->max_hs_threads; + compiler->brw->max_ds_threads = devinfo->max_ds_threads; + compiler->brw->max_gs_threads = devinfo->max_gs_threads; + compiler->brw->max_wm_threads = devinfo->max_wm_threads; + compiler->brw->max_cs_threads = devinfo->max_cs_threads; + compiler->brw->urb.size = devinfo->urb.size; + compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries; + compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries; + compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries; + compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries; + compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries; + + compiler->brw->intelScreen = compiler->screen; + compiler->screen->devinfo = &device->info; + + brw_process_intel_debug_variable(compiler->screen); + + compiler->screen->compiler = brw_compiler_create(compiler, &device->info); + + ctx = &compiler->brw->ctx; + _mesa_init_shader_object_functions(&ctx->Driver); + + /* brw_select_clip_planes() needs this for bogus reasons. */ + ctx->_Shader = &compiler->pipeline; + + return compiler; + + fail: + ralloc_free(compiler); + return NULL; +} + +void +anv_compiler_destroy(struct anv_compiler *compiler) +{ + _mesa_free_errors_data(&compiler->brw->ctx); + ralloc_free(compiler); +} + +/* From gen7_urb.c */ + +/* FIXME: Add to struct intel_device_info */ + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->vs_simd8 != NO_KERNEL; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->gs_vec4 != NO_KERNEL; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static const struct { + uint32_t token; + gl_shader_stage stage; + const char *name; +} stage_info[] = { + { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" }, + { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" }, + { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" }, + { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" }, + { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" }, + { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" }, +}; + +struct spirv_header{ + uint32_t magic; + uint32_t version; + uint32_t gen_magic; +}; + +static void +setup_nir_io(struct gl_shader *mesa_shader, + nir_shader *shader) +{ + struct gl_program *prog = mesa_shader->Program; + foreach_list_typed(nir_variable, var, node, &shader->inputs) { + prog->InputsRead |= BITFIELD64_BIT(var->data.location); + if (shader->stage == MESA_SHADER_FRAGMENT) { + struct gl_fragment_program *fprog = (struct gl_fragment_program *)prog; + + fprog->InterpQualifier[var->data.location] = + (glsl_interp_qualifier)var->data.interpolation; + if (var->data.centroid) + fprog->IsCentroid |= BITFIELD64_BIT(var->data.location); + if (var->data.sample) + fprog->IsSample |= BITFIELD64_BIT(var->data.location); + } + } + + foreach_list_typed(nir_variable, var, node, &shader->outputs) { + prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); + } + + mesa_shader->num_uniform_components = shader->num_uniforms; +} + +static void +anv_compile_shader_spirv(struct anv_compiler *compiler, + struct gl_shader_program *program, + struct anv_pipeline *pipeline, uint32_t stage) +{ + struct brw_context *brw = compiler->brw; + struct anv_shader *shader = pipeline->shaders[stage]; + struct gl_shader *mesa_shader; + int name = 0; + uint32_t *spirv; + + mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token); + fail_if(mesa_shader == NULL, + "failed to create %s shader\n", stage_info[stage].name); + +#define CREATE_PROGRAM(stage) \ + _mesa_init_##stage##_program(&brw->ctx, &ralloc(mesa_shader, struct brw_##stage##_program)->program, 0, 0) + + bool is_scalar; + struct gl_program *prog; + switch (stage) { + case VK_SHADER_STAGE_VERTEX: + prog = CREATE_PROGRAM(vertex); + is_scalar = compiler->screen->compiler->scalar_vs; + break; + case VK_SHADER_STAGE_GEOMETRY: + prog = CREATE_PROGRAM(geometry); + is_scalar = false; + break; + case VK_SHADER_STAGE_FRAGMENT: + prog = CREATE_PROGRAM(fragment); + is_scalar = true; + break; + case VK_SHADER_STAGE_COMPUTE: + prog = CREATE_PROGRAM(compute); + is_scalar = true; + break; + default: + unreachable("Unsupported shader stage"); + } + _mesa_reference_program(&brw->ctx, &mesa_shader->Program, prog); + + mesa_shader->Program->Parameters = + rzalloc(mesa_shader, struct gl_program_parameter_list); + + mesa_shader->Type = stage_info[stage].token; + mesa_shader->Stage = stage_info[stage].stage; + + struct gl_shader_compiler_options *glsl_options = + &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage]; + + spirv = (uint32_t *) shader->module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(shader->module->size % 4 == 0); + + mesa_shader->Program->nir = + spirv_to_nir(spirv, shader->module->size / 4, + stage_info[stage].stage, glsl_options->NirOptions); + nir_validate_shader(mesa_shader->Program->nir); + + brw_process_nir(mesa_shader->Program->nir, + compiler->screen->devinfo, + NULL, mesa_shader->Stage, is_scalar); + + setup_nir_io(mesa_shader, mesa_shader->Program->nir); + + fail_if(mesa_shader->Program->nir == NULL, + "failed to translate SPIR-V to NIR\n"); + + _mesa_reference_shader(&brw->ctx, &program->Shaders[program->NumShaders], + mesa_shader); + program->NumShaders++; +} + +static void +add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, + [VK_SHADER_STAGE_TESS_CONTROL] = 0, + [VK_SHADER_STAGE_TESS_EVALUATION] = 0, + [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, + [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, + [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= 1 << stage; + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +int +anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline) +{ + struct gl_shader_program *program; + int name = 0; + struct brw_context *brw = compiler->brw; + + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + brw->use_rep_send = pipeline->use_repclear; + brw->no_simd8 = pipeline->use_repclear; + + program = brw->ctx.Driver.NewShaderProgram(name); + program->Shaders = (struct gl_shader **) + calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *)); + fail_if(program == NULL || program->Shaders == NULL, + "failed to create program\n"); + + for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) { + if (pipeline->shaders[i]) + anv_compile_shader_spirv(compiler, program, pipeline, i); + } + + for (unsigned i = 0; i < program->NumShaders; i++) { + struct gl_shader *shader = program->Shaders[i]; + program->_LinkedShaders[shader->Stage] = shader; + } + + bool success; + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) { + struct brw_vs_prog_key vs_key; + struct gl_vertex_program *vp = (struct gl_vertex_program *) + program->_LinkedShaders[MESA_SHADER_VERTEX]->Program; + struct brw_vertex_program *bvp = brw_vertex_program(vp); + + brw_vs_populate_key(brw, bvp, &vs_key); + + success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + &pipeline->vs_prog_data.base.base); + + if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; + } else { + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + } + + + if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) { + struct brw_gs_prog_key gs_key; + struct gl_geometry_program *gp = (struct gl_geometry_program *) + program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program; + struct brw_geometry_program *bgp = brw_geometry_program(gp); + + brw_gs_populate_key(brw, pipeline, bgp, &gs_key); + + success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline); + fail_if(!success, "do_gs_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, + &pipeline->gs_prog_data.base.base); + + if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ) + pipeline->writes_point_size = true; + } else { + pipeline->gs_vec4 = NO_KERNEL; + } + + if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) { + struct brw_wm_prog_key wm_key; + struct gl_fragment_program *fp = (struct gl_fragment_program *) + program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program; + struct brw_fragment_program *bfp = brw_fragment_program(fp); + + brw_wm_populate_key(brw, bfp, &wm_key); + + success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline); + fail_if(!success, "do_wm_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + &pipeline->wm_prog_data.base); + } + + if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) { + struct brw_cs_prog_key cs_key; + struct gl_compute_program *cp = (struct gl_compute_program *) + program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program; + struct brw_compute_program *bcp = brw_compute_program(cp); + + brw_cs_populate_key(brw, bcp, &cs_key); + + success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline); + fail_if(!success, "brw_codegen_cs_prog failed\n"); + add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + &pipeline->cs_prog_data.base); + } + + brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program); + + struct anv_device *device = compiler->device; + while (device->scratch_block_pool.bo.size < pipeline->total_scratch) + anv_block_pool_alloc(&device->scratch_block_pool); + + gen7_compute_urb_partition(pipeline); + + return 0; +} + +/* This badly named function frees the struct anv_pipeline data that the compiler + * allocates. Currently just the prog_data structs. + */ +void +anv_compiler_free(struct anv_pipeline *pipeline) +{ + for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) { + if (pipeline->prog_data[stage]) { + free(pipeline->prog_data[stage]->map_entries); + /* We only ever set up the params array because we don't do + * non-UBO pull constants + */ + anv_device_free(pipeline->device, pipeline->prog_data[stage]->param); + } + } +} + +} diff --cc src/vulkan/anv_nir_apply_dynamic_offsets.c index 367c4f82d64,00000000000..1f6c64a9e02 mode 100644,000000..100644 --- a/src/vulkan/anv_nir_apply_dynamic_offsets.c +++ b/src/vulkan/anv_nir_apply_dynamic_offsets.c @@@ -1,150 -1,0 +1,149 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "anv_nir.h" +#include "glsl/nir/nir_builder.h" + +struct apply_dynamic_offsets_state { + nir_shader *shader; + nir_builder builder; + + VkShaderStage stage; + struct anv_pipeline_layout *layout; + + uint32_t indices_start; +}; + +static bool +apply_dynamic_offsets_block(nir_block *block, void *void_state) +{ + struct apply_dynamic_offsets_state *state = void_state; + struct anv_descriptor_set_layout *set_layout; + const struct anv_descriptor_slot *slot; + + nir_foreach_instr_safe(block, instr) { + if (instr->type != nir_instr_type_intrinsic) + continue; + + nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + + bool has_indirect = false; + uint32_t set, binding; + switch (intrin->intrinsic) { + case nir_intrinsic_load_ubo_indirect: + has_indirect = true; + /* fallthrough */ + case nir_intrinsic_load_ubo: { + set = intrin->const_index[0]; + + nir_const_value *const_binding = nir_src_as_const_value(intrin->src[0]); + if (const_binding) { + binding = const_binding->u[0]; + } else { + assert(0 && "need more info from the ir for this."); + } + break; + } + default: + continue; /* the loop */ + } + + set_layout = state->layout->set[set].layout; + slot = &set_layout->stage[state->stage].surface_start[binding]; + if (slot->dynamic_slot < 0) + continue; + + uint32_t dynamic_index = state->layout->set[set].dynamic_offset_start + + slot->dynamic_slot; + + state->builder.cursor = nir_before_instr(&intrin->instr); + + nir_intrinsic_instr *offset_load = + nir_intrinsic_instr_create(state->shader, nir_intrinsic_load_uniform); + offset_load->num_components = 1; + offset_load->const_index[0] = state->indices_start + dynamic_index; + offset_load->const_index[1] = 0; + nir_ssa_dest_init(&offset_load->instr, &offset_load->dest, 1, NULL); + nir_builder_instr_insert(&state->builder, &offset_load->instr); + + nir_ssa_def *offset = &offset_load->dest.ssa; + if (has_indirect) { + assert(intrin->src[1].is_ssa); + offset = nir_iadd(&state->builder, intrin->src[1].ssa, offset); + } + + assert(intrin->dest.is_ssa); + + nir_intrinsic_instr *new_load = + nir_intrinsic_instr_create(state->shader, + nir_intrinsic_load_ubo_indirect); + new_load->num_components = intrin->num_components; + new_load->const_index[0] = intrin->const_index[0]; + new_load->const_index[1] = intrin->const_index[1]; + nir_src_copy(&new_load->src[0], &intrin->src[0], &new_load->instr); + new_load->src[1] = nir_src_for_ssa(offset); + nir_ssa_dest_init(&new_load->instr, &new_load->dest, + intrin->dest.ssa.num_components, + intrin->dest.ssa.name); + nir_builder_instr_insert(&state->builder, &new_load->instr); + + nir_ssa_def_rewrite_uses(&intrin->dest.ssa, - nir_src_for_ssa(&new_load->dest.ssa), - state->shader); ++ nir_src_for_ssa(&new_load->dest.ssa)); + + nir_instr_remove(&intrin->instr); + } + + return true; +} + +void +anv_nir_apply_dynamic_offsets(struct anv_pipeline *pipeline, + nir_shader *shader, + struct brw_stage_prog_data *prog_data) +{ + struct apply_dynamic_offsets_state state = { + .shader = shader, + .stage = anv_vk_shader_stage_for_mesa_stage(shader->stage), + .layout = pipeline->layout, + .indices_start = shader->num_uniforms, + }; + + if (!state.layout || !state.layout->stage[state.stage].has_dynamic_offsets) + return; + + nir_foreach_overload(shader, overload) { + if (overload->impl) { + nir_builder_init(&state.builder, overload->impl); + nir_foreach_block(overload->impl, apply_dynamic_offsets_block, &state); + nir_metadata_preserve(overload->impl, nir_metadata_block_index | + nir_metadata_dominance); + } + } + + struct anv_push_constants *null_data = NULL; + for (unsigned i = 0; i < MAX_DYNAMIC_BUFFERS; i++) + prog_data->param[i + shader->num_uniforms] = + (const gl_constant_value *)&null_data->dynamic_offsets[i]; + + shader->num_uniforms += MAX_DYNAMIC_BUFFERS; +}