From: Jason Ekstrand Date: Fri, 2 Oct 2015 23:45:48 +0000 (-0700) Subject: Merge remote-tracking branch 'mesa-public/master' into vulkan X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=ef56cf7738ecb25e8c668c509097fc714ca71c96;hp=10f97718c353e101c64fa60fcde91e1550e39957;p=mesa.git Merge remote-tracking branch 'mesa-public/master' into vulkan --- diff --git a/src/glsl/ast_array_index.cpp b/src/glsl/ast_array_index.cpp index dfb31073f82..5e8f49d70b0 100644 --- a/src/glsl/ast_array_index.cpp +++ b/src/glsl/ast_array_index.cpp @@ -231,15 +231,17 @@ _mesa_ast_array_index_to_hir(void *mem_ctx, _mesa_glsl_error(&loc, state, "unsized array index must be constant"); } } else if (array->type->fields.array->is_interface() - && array->variable_referenced()->data.mode == ir_var_uniform + && (array->variable_referenced()->data.mode == ir_var_uniform || + array->variable_referenced()->data.mode == ir_var_shader_storage) && !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) { - /* Page 46 in section 4.3.7 of the OpenGL ES 3.00 spec says: + /* Page 50 in section 4.3.9 of the OpenGL ES 3.10 spec says: * - * "All indexes used to index a uniform block array must be - * constant integral expressions." + * "All indices used to index a uniform or shader storage block + * array must be constant integral expressions." */ - _mesa_glsl_error(&loc, state, - "uniform block array index must be constant"); + _mesa_glsl_error(&loc, state, "%s block array index must be constant", + array->variable_referenced()->data.mode + == ir_var_uniform ? "uniform" : "shader storage"); } else { /* whole_variable_referenced can return NULL if the array is a * member of a structure. In this case it is safe to not update diff --git a/src/glsl/ast_to_hir.cpp b/src/glsl/ast_to_hir.cpp index 351aafc1a72..849a8ea29fd 100644 --- a/src/glsl/ast_to_hir.cpp +++ b/src/glsl/ast_to_hir.cpp @@ -5766,6 +5766,10 @@ ast_process_structure_or_interface_block(exec_list *instructions, const struct ast_type_qualifier *const qual = & decl_list->type->qualifier; + + if (qual->flags.q.explicit_binding) + validate_binding_qualifier(state, &loc, decl_type, qual); + if (qual->flags.q.std140 || qual->flags.q.std430 || qual->flags.q.packed || diff --git a/src/glsl/glsl_types.cpp b/src/glsl/glsl_types.cpp index 15cd45ea708..112e5ffdf7c 100644 --- a/src/glsl/glsl_types.cpp +++ b/src/glsl/glsl_types.cpp @@ -206,7 +206,7 @@ glsl_type::glsl_type(const char *subroutine_name) : base_type(GLSL_TYPE_SUBROUTINE), sampler_dimensionality(0), sampler_shadow(0), sampler_array(0), sampler_type(0), interface_packing(0), - vector_elements(0), matrix_columns(0), + vector_elements(1), matrix_columns(1), length(0) { mtx_lock(&glsl_type::mutex); @@ -214,7 +214,6 @@ glsl_type::glsl_type(const char *subroutine_name) : init_ralloc_type_ctx(); assert(subroutine_name != NULL); this->name = ralloc_strdup(this->mem_ctx, subroutine_name); - this->vector_elements = 1; mtx_unlock(&glsl_type::mutex); } @@ -1615,6 +1614,7 @@ glsl_type::std430_base_alignment(bool row_major) const base_alignment = MAX2(base_alignment, field_type->std430_base_alignment(field_row_major)); } + assert(base_alignment > 0); return base_alignment; } assert(!"not reached"); diff --git a/src/glsl/link_uniforms.cpp b/src/glsl/link_uniforms.cpp index 47d49c84e65..740b0a46aee 100644 --- a/src/glsl/link_uniforms.cpp +++ b/src/glsl/link_uniforms.cpp @@ -1131,15 +1131,15 @@ link_assign_uniform_locations(struct gl_shader_program *prog, const unsigned num_data_slots = uniform_size.num_values; const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms; + /* assign hidden uniforms a slot id */ + hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size); + delete hiddenUniforms; + /* On the outside chance that there were no uniforms, bail out. */ if (num_uniforms == 0) return; - /* assign hidden uniforms a slot id */ - hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size); - delete hiddenUniforms; - struct gl_uniform_storage *uniforms = rzalloc_array(prog, struct gl_uniform_storage, num_uniforms); union gl_constant_value *data = diff --git a/src/glsl/linker.cpp b/src/glsl/linker.cpp index 87c7d4b087b..dbf300ac691 100644 --- a/src/glsl/linker.cpp +++ b/src/glsl/linker.cpp @@ -3133,6 +3133,60 @@ check_explicit_uniform_locations(struct gl_context *ctx, delete uniform_map; } +static bool +should_add_buffer_variable(struct gl_shader_program *shProg, + GLenum type, const char *name) +{ + bool found_interface = false; + const char *block_name = NULL; + + /* These rules only apply to buffer variables. So we return + * true for the rest of types. + */ + if (type != GL_BUFFER_VARIABLE) + return true; + + for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) { + block_name = shProg->UniformBlocks[i].Name; + if (strncmp(block_name, name, strlen(block_name)) == 0) { + found_interface = true; + break; + } + } + + /* We remove the interface name from the buffer variable name, + * including the dot that follows it. + */ + if (found_interface) + name = name + strlen(block_name) + 1; + + /* From: ARB_program_interface_query extension: + * + * "For an active shader storage block member declared as an array, an + * entry will be generated only for the first array element, regardless + * of its type. For arrays of aggregate types, the enumeration rules are + * applied recursively for the single enumerated array element. + */ + const char *first_dot = strchr(name, '.'); + const char *first_square_bracket = strchr(name, '['); + + /* The buffer variable is on top level and it is not an array */ + if (!first_square_bracket) { + return true; + /* The shader storage block member is a struct, then generate the entry */ + } else if (first_dot && first_dot < first_square_bracket) { + return true; + } else { + /* Shader storage block member is an array, only generate an entry for the + * first array element. + */ + if (strncmp(first_square_bracket, "[0]", 3) == 0) + return true; + } + + return false; +} + static bool add_program_resource(struct gl_shader_program *prog, GLenum type, const void *data, uint8_t stages) @@ -3412,6 +3466,10 @@ build_program_resource_list(struct gl_shader_program *shProg) bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage; GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM; + if (!should_add_buffer_variable(shProg, type, + shProg->UniformStorage[i].name)) + continue; + if (!add_program_resource(shProg, type, &shProg->UniformStorage[i], stageref)) return; diff --git a/src/glsl/lower_ubo_reference.cpp b/src/glsl/lower_ubo_reference.cpp index e581306019b..247620e6148 100644 --- a/src/glsl/lower_ubo_reference.cpp +++ b/src/glsl/lower_ubo_reference.cpp @@ -754,6 +754,12 @@ lower_ubo_reference_visitor::emit_access(bool is_write, add(base_offset, new(mem_ctx) ir_constant(deref_offset + i * matrix_stride)); if (is_write) { + /* If the component is not in the writemask, then don't + * store any value. + */ + if (!((1 << i) & write_mask)) + continue; + base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1)); } else { if (!this->is_shader_storage) { diff --git a/src/glsl/nir/glsl_to_nir.cpp b/src/glsl/nir/glsl_to_nir.cpp index 5ee6ff1d854..6cd23340e3f 100644 --- a/src/glsl/nir/glsl_to_nir.cpp +++ b/src/glsl/nir/glsl_to_nir.cpp @@ -131,9 +131,13 @@ private: }; /* end of anonymous namespace */ nir_shader * -glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options) +glsl_to_nir(const struct gl_shader_program *shader_prog, + gl_shader_stage stage, + const nir_shader_compiler_options *options) { - nir_shader *shader = nir_shader_create(NULL, sh->Stage, options); + struct gl_shader *sh = shader_prog->_LinkedShaders[stage]; + + nir_shader *shader = nir_shader_create(NULL, stage, options); nir_visitor v1(shader, sh); nir_function_visitor v2(&v1); @@ -142,8 +146,26 @@ glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options) nir_lower_outputs_to_temporaries(shader); - shader->gs.vertices_out = sh->Geom.VerticesOut; - shader->gs.invocations = sh->Geom.Invocations; + /* TODO: Use _mesa_fls instead */ + unsigned num_textures = 0; + for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++) + if (sh->Program->SamplersUsed & (1 << i)) + num_textures = i; + + shader->info.name = ralloc_asprintf(shader, "GLSL%d", sh->Name); + shader->info.num_textures = num_textures; + shader->info.num_ubos = sh->NumUniformBlocks; + shader->info.num_abos = shader_prog->NumAtomicBuffers; + shader->info.num_ssbos = shader_prog->NumBufferInterfaceBlocks; + shader->info.num_images = sh->NumImages; + shader->info.inputs_read = sh->Program->InputsRead; + shader->info.outputs_written = sh->Program->OutputsWritten; + shader->info.system_values_read = sh->Program->SystemValuesRead; + shader->info.uses_texture_gather = sh->Program->UsesGather; + shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut; + shader->info.separate_shader = shader_prog->SeparateShader; + shader->info.gs.vertices_out = sh->Geom.VerticesOut; + shader->info.gs.invocations = sh->Geom.Invocations; return shader; } @@ -274,6 +296,11 @@ nir_visitor::visit(ir_variable *ir) /* For whatever reason, GLSL IR makes gl_FrontFacing an input */ var->data.location = SYSTEM_VALUE_FRONT_FACE; var->data.mode = nir_var_system_value; + } else if (shader->stage == MESA_SHADER_GEOMETRY && + ir->data.location == VARYING_SLOT_PRIMITIVE_ID) { + /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */ + var->data.location = SYSTEM_VALUE_PRIMITIVE_ID; + var->data.mode = nir_var_system_value; } else { var->data.mode = nir_var_shader_in; } @@ -799,7 +826,6 @@ nir_visitor::visit(ir_call *ir) instr = nir_intrinsic_instr_create(shader, op); instr->src[2] = evaluate_rvalue(offset); instr->const_index[0] = 0; - dest = &instr->dest; } else { instr->const_index[0] = const_offset->value.u[0]; } diff --git a/src/glsl/nir/glsl_to_nir.h b/src/glsl/nir/glsl_to_nir.h index 3801e8c55c6..29badcda08d 100644 --- a/src/glsl/nir/glsl_to_nir.h +++ b/src/glsl/nir/glsl_to_nir.h @@ -32,7 +32,8 @@ extern "C" { #endif -nir_shader *glsl_to_nir(struct gl_shader *sh, +nir_shader *glsl_to_nir(const struct gl_shader_program *shader_prog, + gl_shader_stage stage, const nir_shader_compiler_options *options); #ifdef __cplusplus diff --git a/src/glsl/nir/nir.c b/src/glsl/nir/nir.c index 57fd959c931..e12da805281 100644 --- a/src/glsl/nir/nir.c +++ b/src/glsl/nir/nir.c @@ -41,6 +41,7 @@ nir_shader_create(void *mem_ctx, exec_list_make_empty(&shader->outputs); shader->options = options; + memset(&shader->info, 0, sizeof(shader->info)); exec_list_make_empty(&shader->functions); exec_list_make_empty(&shader->registers); @@ -54,9 +55,6 @@ nir_shader_create(void *mem_ctx, shader->stage = stage; - shader->gs.vertices_out = 0; - shader->gs.invocations = 0; - return shader; } @@ -1489,10 +1487,11 @@ nir_intrinsic_from_system_value(gl_system_value val) return nir_intrinsic_load_work_group_id; case SYSTEM_VALUE_NUM_WORK_GROUPS: return nir_intrinsic_load_num_work_groups; + case SYSTEM_VALUE_PRIMITIVE_ID: + return nir_intrinsic_load_primitive_id; /* FINISHME: Add tessellation intrinsics. case SYSTEM_VALUE_TESS_COORD: case SYSTEM_VALUE_VERTICES_IN: - case SYSTEM_VALUE_PRIMITIVE_ID: case SYSTEM_VALUE_TESS_LEVEL_OUTER: case SYSTEM_VALUE_TESS_LEVEL_INNER: */ @@ -1529,6 +1528,8 @@ nir_system_value_from_intrinsic(nir_intrinsic_op intrin) return SYSTEM_VALUE_NUM_WORK_GROUPS; case nir_intrinsic_load_work_group_id: return SYSTEM_VALUE_WORK_GROUP_ID; + case nir_intrinsic_load_primitive_id: + return SYSTEM_VALUE_PRIMITIVE_ID; /* FINISHME: Add tessellation intrinsics. return SYSTEM_VALUE_TESS_COORD; return SYSTEM_VALUE_VERTICES_IN; diff --git a/src/glsl/nir/nir.h b/src/glsl/nir/nir.h index 19a4a361a25..107df3a2a67 100644 --- a/src/glsl/nir/nir.h +++ b/src/glsl/nir/nir.h @@ -1480,6 +1480,45 @@ typedef struct nir_shader_compiler_options { bool native_integers; } nir_shader_compiler_options; +typedef struct nir_shader_info { + const char *name; + + /* Number of textures used by this shader */ + unsigned num_textures; + /* Number of uniform buffers used by this shader */ + unsigned num_ubos; + /* Number of atomic buffers used by this shader */ + unsigned num_abos; + /* Number of shader storage buffers used by this shader */ + unsigned num_ssbos; + /* Number of images used by this shader */ + unsigned num_images; + + /* Which inputs are actually read */ + uint64_t inputs_read; + /* Which outputs are actually written */ + uint64_t outputs_written; + /* Which system values are actually read */ + uint64_t system_values_read; + + /* Whether or not this shader ever uses textureGather() */ + bool uses_texture_gather; + + /* Whether or not this shader uses the gl_ClipDistance output */ + bool uses_clip_distance_out; + + /* Whether or not separate shader objects were used */ + bool separate_shader; + + struct { + /** The maximum number of vertices the geometry shader might write. */ + unsigned vertices_out; + + /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ + unsigned invocations; + } gs; +} nir_shader_info; + typedef struct nir_shader { /** list of uniforms (nir_variable) */ struct exec_list uniforms; @@ -1497,6 +1536,9 @@ typedef struct nir_shader { */ const struct nir_shader_compiler_options *options; + /** Various bits of compile-time information about a given shader */ + struct nir_shader_info info; + /** list of global variables in the shader (nir_variable) */ struct exec_list globals; @@ -1519,14 +1561,6 @@ typedef struct nir_shader { /** The shader stage, such as MESA_SHADER_VERTEX. */ gl_shader_stage stage; - - struct { - /** The maximum number of vertices the geometry shader might write. */ - unsigned vertices_out; - - /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */ - unsigned invocations; - } gs; } nir_shader; #define nir_foreach_overload(shader, overload) \ @@ -1861,6 +1895,7 @@ void nir_assign_var_locations(struct exec_list *var_list, int (*type_size)(const struct glsl_type *)); void nir_lower_io(nir_shader *shader, + nir_variable_mode mode, int (*type_size)(const struct glsl_type *)); void nir_lower_vars_to_ssa(nir_shader *shader); diff --git a/src/glsl/nir/nir_intrinsics.h b/src/glsl/nir/nir_intrinsics.h index e02779e157b..44eff3b4da5 100644 --- a/src/glsl/nir/nir_intrinsics.h +++ b/src/glsl/nir/nir_intrinsics.h @@ -194,6 +194,7 @@ SYSTEM_VALUE(instance_id, 1, 0) SYSTEM_VALUE(sample_id, 1, 0) SYSTEM_VALUE(sample_pos, 2, 0) SYSTEM_VALUE(sample_mask_in, 1, 0) +SYSTEM_VALUE(primitive_id, 1, 0) SYSTEM_VALUE(invocation_id, 1, 0) SYSTEM_VALUE(local_invocation_id, 3, 0) SYSTEM_VALUE(work_group_id, 3, 0) diff --git a/src/glsl/nir/nir_lower_gs_intrinsics.c b/src/glsl/nir/nir_lower_gs_intrinsics.c index 2ee4e5c45d6..e0d067885d8 100644 --- a/src/glsl/nir/nir_lower_gs_intrinsics.c +++ b/src/glsl/nir/nir_lower_gs_intrinsics.c @@ -76,7 +76,7 @@ rewrite_emit_vertex(nir_intrinsic_instr *intrin, struct state *state) b->cursor = nir_before_instr(&intrin->instr); nir_ssa_def *count = nir_load_var(b, state->vertex_count_var); - nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->gs.vertices_out); + nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->info.gs.vertices_out); /* Create: if (vertex_count < max_vertices) and insert it. * diff --git a/src/glsl/nir/nir_lower_io.c b/src/glsl/nir/nir_lower_io.c index 9f79c5606ca..f32c09d04a2 100644 --- a/src/glsl/nir/nir_lower_io.c +++ b/src/glsl/nir/nir_lower_io.c @@ -38,6 +38,7 @@ struct lower_io_state { nir_builder builder; void *mem_ctx; int (*type_size)(const struct glsl_type *type); + nir_variable_mode mode; }; void @@ -154,9 +155,17 @@ nir_lower_io_block(nir_block *block, void *void_state) nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); + if (intrin->intrinsic != nir_intrinsic_load_var && + intrin->intrinsic != nir_intrinsic_store_var) + continue; + + nir_variable_mode mode = intrin->variables[0]->var->data.mode; + + if (state->mode != -1 && state->mode != mode) + continue; + switch (intrin->intrinsic) { case nir_intrinsic_load_var: { - nir_variable_mode mode = intrin->variables[0]->var->data.mode; if (mode != nir_var_shader_in && mode != nir_var_uniform) continue; @@ -239,12 +248,15 @@ nir_lower_io_block(nir_block *block, void *void_state) } static void -nir_lower_io_impl(nir_function_impl *impl, int(*type_size)(const struct glsl_type *)) +nir_lower_io_impl(nir_function_impl *impl, + nir_variable_mode mode, + int (*type_size)(const struct glsl_type *)) { struct lower_io_state state; nir_builder_init(&state.builder, impl); state.mem_ctx = ralloc_parent(impl); + state.mode = mode; state.type_size = type_size; nir_foreach_block(impl, nir_lower_io_block, &state); @@ -254,10 +266,11 @@ nir_lower_io_impl(nir_function_impl *impl, int(*type_size)(const struct glsl_typ } void -nir_lower_io(nir_shader *shader, int(*type_size)(const struct glsl_type *)) +nir_lower_io(nir_shader *shader, nir_variable_mode mode, + int (*type_size)(const struct glsl_type *)) { nir_foreach_overload(shader, overload) { if (overload->impl) - nir_lower_io_impl(overload->impl, type_size); + nir_lower_io_impl(overload->impl, mode, type_size); } } diff --git a/src/glsl/nir/nir_lower_system_values.c b/src/glsl/nir/nir_lower_system_values.c index d77bb2f8213..21904f81b97 100644 --- a/src/glsl/nir/nir_lower_system_values.c +++ b/src/glsl/nir/nir_lower_system_values.c @@ -74,7 +74,7 @@ convert_block(nir_block *block, void *state) static bool convert_impl(nir_function_impl *impl) { - bool progress; + bool progress = false; nir_foreach_block(impl, convert_block, &progress); nir_metadata_preserve(impl, nir_metadata_block_index | diff --git a/src/glsl/nir/nir_opt_remove_phis.c b/src/glsl/nir/nir_opt_remove_phis.c index bf4a67e70ea..5bdf7ef4da7 100644 --- a/src/glsl/nir/nir_opt_remove_phis.c +++ b/src/glsl/nir/nir_opt_remove_phis.c @@ -58,6 +58,20 @@ remove_phis_block(nir_block *block, void *state) nir_foreach_phi_src(phi, src) { assert(src->src.is_ssa); + + /* For phi nodes at the beginning of loops, we may encounter some + * sources from backedges that point back to the destination of the + * same phi, i.e. something like: + * + * a = phi(a, b, ...) + * + * We can safely ignore these sources, since if all of the normal + * sources point to the same definition, then that definition must + * still dominate the phi node, and the phi will still always take + * the value of that definition. + */ + if (src->src.ssa == &phi->dest.ssa) + continue; if (def == NULL) { def = src->src.ssa; @@ -72,6 +86,11 @@ remove_phis_block(nir_block *block, void *state) if (!srcs_same) continue; + /* We must have found at least one definition, since there must be at + * least one forward edge. + */ + assert(def != NULL); + assert(phi->dest.is_ssa); nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def)); nir_instr_remove(instr); diff --git a/src/glsl/shader_enums.h b/src/glsl/shader_enums.h index 99acc640496..2a5d2c5bfa7 100644 --- a/src/glsl/shader_enums.h +++ b/src/glsl/shader_enums.h @@ -399,7 +399,7 @@ typedef enum /*@{*/ SYSTEM_VALUE_TESS_COORD, SYSTEM_VALUE_VERTICES_IN, /**< Tessellation vertices in input patch */ - SYSTEM_VALUE_PRIMITIVE_ID, /**< (currently not used by GS) */ + SYSTEM_VALUE_PRIMITIVE_ID, SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */ SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */ /*@}*/ diff --git a/src/glx/Makefile.am b/src/glx/Makefile.am index 6e50e096fa2..e64955e3b3e 100644 --- a/src/glx/Makefile.am +++ b/src/glx/Makefile.am @@ -46,6 +46,7 @@ AM_CFLAGS = \ $(EXTRA_DEFINES_XF86VIDMODE) \ -D_REENTRANT \ -DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \ + -DGL_LIB_NAME=\"lib@GL_LIB@.so.1\" \ $(DEFINES) \ $(LIBDRM_CFLAGS) \ $(DRI2PROTO_CFLAGS) \ diff --git a/src/glx/dri_common.c b/src/glx/dri_common.c index eedcd46a15a..8a56385c4bd 100644 --- a/src/glx/dri_common.c +++ b/src/glx/dri_common.c @@ -73,6 +73,10 @@ dri_message(int level, const char *f, ...) } } +#ifndef GL_LIB_NAME +#define GL_LIB_NAME "libGL.so.1" +#endif + #ifndef DEFAULT_DRIVER_DIR /* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */ #define DEFAULT_DRIVER_DIR "/usr/local/lib/dri" @@ -99,7 +103,7 @@ driOpenDriver(const char *driverName) int len; /* Attempt to make sure libGL symbols will be visible to the driver */ - glhandle = dlopen("libGL.so.1", RTLD_NOW | RTLD_GLOBAL); + glhandle = dlopen(GL_LIB_NAME, RTLD_NOW | RTLD_GLOBAL); libPaths = NULL; if (geteuid() == getuid()) { diff --git a/src/glx/glxcmds.c b/src/glx/glxcmds.c index 26ff804297f..93e8db5a367 100644 --- a/src/glx/glxcmds.c +++ b/src/glx/glxcmds.c @@ -2646,7 +2646,11 @@ _X_EXPORT void (*glXGetProcAddressARB(const GLubyte * procName)) (void) */ _X_EXPORT void (*glXGetProcAddress(const GLubyte * procName)) (void) #if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED) +# if defined(USE_MGL_NAMESPACE) + __attribute__ ((alias("mglXGetProcAddressARB"))); +# else __attribute__ ((alias("glXGetProcAddressARB"))); +# endif #else { return glXGetProcAddressARB(procName); diff --git a/src/glx/glxextensions.h b/src/glx/glxextensions.h index 90b173fc915..3a9bc823052 100644 --- a/src/glx/glxextensions.h +++ b/src/glx/glxextensions.h @@ -281,11 +281,17 @@ typedef void (*PFNGLXDISABLEEXTENSIONPROC) (const char *name); # define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) #else # if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED) -# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \ +/* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the + * extra expansion means that the name mangling macros in glx_mangle.h will + * apply before stringification, so the alias attribute will have a string like + * "mglXFoo" instead of "glXFoo". */ +# define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \ return_type real_func proto_args \ __attribute__ ((alias( # aliased_func ) )); +# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \ + GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) # define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \ - GLX_ALIAS(void, real_func, proto_args, args, aliased_func) + GLX_ALIAS2(void, real_func, proto_args, args, aliased_func) # else # define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \ return_type real_func proto_args \ diff --git a/src/mapi/mapi_glapi.c b/src/mapi/mapi_glapi.c index 70605f3dfa1..9f02edb7cd3 100644 --- a/src/mapi/mapi_glapi.c +++ b/src/mapi/mapi_glapi.c @@ -175,7 +175,7 @@ _glapi_get_stub(const char *name, int generate) const struct mapi_stub *stub; #ifdef USE_MGL_NAMESPACE - if (name) + if (name && name[0] == 'm') name++; #endif diff --git a/src/mesa/drivers/common/meta_blit.c b/src/mesa/drivers/common/meta_blit.c index a41fe42338f..5972a5af0c9 100644 --- a/src/mesa/drivers/common/meta_blit.c +++ b/src/mesa/drivers/common/meta_blit.c @@ -71,9 +71,7 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, char *sample_map_str = rzalloc_size(mem_ctx, 1); char *sample_map_expr = rzalloc_size(mem_ctx, 1); char *texel_fetch_macro = rzalloc_size(mem_ctx, 1); - const char *vs_source; const char *sampler_array_suffix = ""; - const char *texcoord_type = "vec2"; float y_scale; enum blit_msaa_shader shader_index; @@ -99,7 +97,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE - BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE; sampler_array_suffix = "Array"; - texcoord_type = "vec3"; } if (blit->msaa_shaders[shader_index]) { @@ -150,28 +147,37 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, " const int sample_map[%d] = int[%d](%s);\n", samples, samples, sample_map_str); - ralloc_asprintf_append(&texel_fetch_macro, - "#define TEXEL_FETCH(coord) texelFetch(texSampler, i%s(coord), %s);\n", - texcoord_type, sample_number); + if (target == GL_TEXTURE_2D_MULTISAMPLE) { + ralloc_asprintf_append(&texel_fetch_macro, + "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec2(coord), %s);\n", + sample_number); + } else { + ralloc_asprintf_append(&texel_fetch_macro, + "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord, layer), %s);\n", + sample_number); + } - vs_source = ralloc_asprintf(mem_ctx, + static const char vs_source[] = "#version 130\n" "in vec2 position;\n" - "in %s textureCoords;\n" - "out %s texCoords;\n" + "in vec3 textureCoords;\n" + "out vec2 texCoords;\n" + "flat out int layer;\n" "void main()\n" "{\n" - " texCoords = textureCoords;\n" + " texCoords = textureCoords.xy;\n" + " layer = int(textureCoords.z);\n" " gl_Position = vec4(position, 0.0, 1.0);\n" - "}\n", - texcoord_type, - texcoord_type); + "}\n" + ; + fs_source = ralloc_asprintf(mem_ctx, "#version 130\n" "#extension GL_ARB_texture_multisample : enable\n" "uniform sampler2DMS%s texSampler;\n" "uniform float src_width, src_height;\n" - "in %s texCoords;\n" + "in vec2 texCoords;\n" + "flat in int layer;\n" "out vec4 out_color;\n" "\n" "void main()\n" @@ -212,7 +218,6 @@ setup_glsl_msaa_blit_scaled_shader(struct gl_context *ctx, " out_color = mix(x_0_color, x_1_color, interp.y);\n" "}\n", sampler_array_suffix, - texcoord_type, sample_map_expr, y_scale, 1.0f / y_scale, diff --git a/src/mesa/drivers/dri/i915/i830_context.h b/src/mesa/drivers/dri/i915/i830_context.h index 140f617e007..92952cf61b3 100644 --- a/src/mesa/drivers/dri/i915/i830_context.h +++ b/src/mesa/drivers/dri/i915/i830_context.h @@ -42,10 +42,10 @@ #define I830_UPLOAD_STIPPLE 0x4 #define I830_UPLOAD_INVARIENT 0x8 #define I830_UPLOAD_RASTER_RULES 0x10 -#define I830_UPLOAD_TEX(i) (0x10<<(i)) -#define I830_UPLOAD_TEXBLEND(i) (0x100<<(i)) -#define I830_UPLOAD_TEX_ALL (0x0f0) -#define I830_UPLOAD_TEXBLEND_ALL (0xf00) +#define I830_UPLOAD_TEX(i) (0x0100<<(i)) +#define I830_UPLOAD_TEXBLEND(i) (0x1000<<(i)) +#define I830_UPLOAD_TEX_ALL (0x0f00) +#define I830_UPLOAD_TEXBLEND_ALL (0xf000) /* State structure offsets - these will probably disappear. */ diff --git a/src/mesa/drivers/dri/i915/i915_context.h b/src/mesa/drivers/dri/i915/i915_context.h index fa58ecb8165..d8f592bcb9f 100644 --- a/src/mesa/drivers/dri/i915/i915_context.h +++ b/src/mesa/drivers/dri/i915/i915_context.h @@ -115,6 +115,8 @@ enum { I915_RASTER_RULES_SETUP_SIZE, }; +#define I915_TEX_UNITS 8 + #define I915_MAX_CONSTANT 32 #define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT)) @@ -194,7 +196,8 @@ struct i915_fragment_program /* Helpers for i915_fragprog.c: */ - GLuint wpos_tex; + uint8_t texcoord_mapping[I915_TEX_UNITS]; + uint8_t wpos_tex; bool depth_written; struct @@ -205,15 +208,6 @@ struct i915_fragment_program GLuint nr_params; }; - - - - - - -#define I915_TEX_UNITS 8 - - struct i915_hw_state { GLuint Ctx[I915_CTX_SETUP_SIZE]; diff --git a/src/mesa/drivers/dri/i915/i915_fragprog.c b/src/mesa/drivers/dri/i915/i915_fragprog.c index 03c32e56d82..1a5943c87fb 100644 --- a/src/mesa/drivers/dri/i915/i915_fragprog.c +++ b/src/mesa/drivers/dri/i915/i915_fragprog.c @@ -72,6 +72,22 @@ static const GLfloat cos_constants[4] = { 1.0, -1.0 / (6 * 5 * 4 * 3 * 2 * 1) }; +/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */ +#define TEXCOORD_TEX (0<<7) +#define TEXCOORD_VAR (1<<7) + +static unsigned +get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord) +{ + for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { + if (p->texcoord_mapping[i] == texcoord) + return i; + } + + /* blah */ + return p->ctx->Const.MaxTextureCoordUnits - 1; +} + /** * Retrieve a ureg for the given source register. Will emit * constants, apply swizzling and negation as needed. @@ -82,6 +98,7 @@ src_vector(struct i915_fragment_program *p, const struct gl_fragment_program *program) { GLuint src; + unsigned unit; switch (source->File) { @@ -119,8 +136,10 @@ src_vector(struct i915_fragment_program *p, case VARYING_SLOT_TEX5: case VARYING_SLOT_TEX6: case VARYING_SLOT_TEX7: + unit = get_texcoord_mapping(p, (source->Index - + VARYING_SLOT_TEX0) | TEXCOORD_TEX); src = i915_emit_decl(p, REG_TYPE_T, - T_TEX0 + (source->Index - VARYING_SLOT_TEX0), + T_TEX0 + unit, D0_CHANNEL_ALL); break; @@ -132,8 +151,10 @@ src_vector(struct i915_fragment_program *p, case VARYING_SLOT_VAR0 + 5: case VARYING_SLOT_VAR0 + 6: case VARYING_SLOT_VAR0 + 7: + unit = get_texcoord_mapping(p, (source->Index - + VARYING_SLOT_VAR0) | TEXCOORD_VAR); src = i915_emit_decl(p, REG_TYPE_T, - T_TEX0 + (source->Index - VARYING_SLOT_VAR0), + T_TEX0 + unit, D0_CHANNEL_ALL); break; @@ -1176,27 +1197,54 @@ fixup_depth_write(struct i915_fragment_program *p) } } +static void +check_texcoord_mapping(struct i915_fragment_program *p) +{ + GLbitfield64 inputs = p->FragProg.Base.InputsRead; + unsigned unit = 0; + + for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { + if (inputs & VARYING_BIT_TEX(i)) { + if (unit >= p->ctx->Const.MaxTextureCoordUnits) { + unit++; + break; + } + p->texcoord_mapping[unit++] = i | TEXCOORD_TEX; + } + if (inputs & VARYING_BIT_VAR(i)) { + if (unit >= p->ctx->Const.MaxTextureCoordUnits) { + unit++; + break; + } + p->texcoord_mapping[unit++] = i | TEXCOORD_VAR; + } + } + + if (unit > p->ctx->Const.MaxTextureCoordUnits) + i915_program_error(p, "Too many texcoord units"); +} static void check_wpos(struct i915_fragment_program *p) { GLbitfield64 inputs = p->FragProg.Base.InputsRead; GLint i; + unsigned unit = 0; p->wpos_tex = -1; + if ((inputs & VARYING_BIT_POS) == 0) + return; + for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { - if (inputs & (VARYING_BIT_TEX(i) | VARYING_BIT_VAR(i))) - continue; - else if (inputs & VARYING_BIT_POS) { - p->wpos_tex = i; - inputs &= ~VARYING_BIT_POS; - } + unit += !!(inputs & VARYING_BIT_TEX(i)); + unit += !!(inputs & VARYING_BIT_VAR(i)); } - if (inputs & VARYING_BIT_POS) { + if (unit < p->ctx->Const.MaxTextureCoordUnits) + p->wpos_tex = unit; + else i915_program_error(p, "No free texcoord for wpos value"); - } } @@ -1212,6 +1260,7 @@ translate_program(struct i915_fragment_program *p) } i915_init_program(i915, p); + check_texcoord_mapping(p); check_wpos(p); upload_program(p); fixup_depth_write(p); @@ -1420,22 +1469,24 @@ i915ValidateFragmentProgram(struct i915_context *i915) for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) { if (inputsRead & VARYING_BIT_TEX(i)) { + int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX); int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size; - s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); - s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); + s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK); + s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz)); EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4); } - else if (inputsRead & VARYING_BIT_VAR(i)) { + if (inputsRead & VARYING_BIT_VAR(i)) { + int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR); int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size; - s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK); - s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz)); + s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK); + s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz)); EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4); } - else if (i == p->wpos_tex) { + if (i == p->wpos_tex) { int wpos_size = 4 * sizeof(float); /* If WPOS is required, duplicate the XYZ position data in an * unused texture coordinate: diff --git a/src/mesa/drivers/dri/i915/intel_fbo.c b/src/mesa/drivers/dri/i915/intel_fbo.c index 67013666377..12cc7e3a71b 100644 --- a/src/mesa/drivers/dri/i915/intel_fbo.c +++ b/src/mesa/drivers/dri/i915/intel_fbo.c @@ -658,6 +658,11 @@ intel_blit_framebuffer_with_blitter(struct gl_context *ctx, { struct intel_context *intel = intel_context(ctx); + /* Sync up the state of window system buffers. We need to do this before + * we go looking for the buffers. + */ + intel_prepare_render(intel); + if (mask & GL_COLOR_BUFFER_BIT) { GLint i; struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer; diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index cc3ecaf7a8b..eb8196d4845 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -86,6 +86,7 @@ i965_FILES = \ brw_nir.h \ brw_nir.c \ brw_nir_analyze_boolean_resolves.c \ + brw_nir_uniforms.cpp \ brw_object_purgeable.c \ brw_packed_float.c \ brw_performance_monitor.c \ @@ -133,7 +134,6 @@ i965_FILES = \ brw_vec4_surface_builder.cpp \ brw_vec4_surface_builder.h \ brw_vec4_visitor.cpp \ - brw_vec4_vp.cpp \ brw_vec4_vs_visitor.cpp \ brw_vs.c \ brw_vs.h \ diff --git a/src/mesa/drivers/dri/i965/brw_context.c b/src/mesa/drivers/dri/i965/brw_context.c index 9dcdaf5cd4f..4d499295730 100644 --- a/src/mesa/drivers/dri/i965/brw_context.c +++ b/src/mesa/drivers/dri/i965/brw_context.c @@ -1436,7 +1436,6 @@ intel_process_dri2_buffer(struct brw_context *brw, buffer->cpp, buffer->pitch); } - intel_miptree_release(&rb->mt); bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name, buffer->name); if (!bo) { diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h index 2479182e370..d921a9bb7dd 100644 --- a/src/mesa/drivers/dri/i965/brw_context.h +++ b/src/mesa/drivers/dri/i965/brw_context.h @@ -340,9 +340,6 @@ struct brw_shader { bool compiled_once; }; -/* Note: If adding fields that need anything besides a normal memcmp() for - * comparing them, be sure to go fix brw_stage_prog_data_compare(). - */ struct brw_stage_prog_data { struct { /** size of our binding table. */ @@ -384,18 +381,11 @@ struct brw_stage_prog_data { /* Pointers to tracked values (only valid once * _mesa_load_state_parameters has been called at runtime). - * - * These must be the last fields of the struct (see - * brw_stage_prog_data_compare()). */ const gl_constant_value **param; const gl_constant_value **pull_param; - /** - * Image metadata passed to the shader as uniforms. This is deliberately - * ignored by brw_stage_prog_data_compare() because its contents don't have - * any influence on program compilation. - */ + /** Image metadata passed to the shader as uniforms. */ struct brw_image_param *image_param; }; @@ -449,9 +439,6 @@ struct brw_image_param { * there can be many of these, each in a different GL state * corresponding to a different brw_wm_prog_key struct, with different * compiled programs. - * - * Note: brw_wm_prog_data_compare() must be updated when adding fields to this - * struct! */ struct brw_wm_prog_data { struct brw_stage_prog_data base; @@ -495,9 +482,6 @@ struct brw_wm_prog_data { int urb_setup[VARYING_SLOT_MAX]; }; -/* Note: brw_cs_prog_data_compare() must be updated when adding fields to this - * struct! - */ struct brw_cs_prog_data { struct brw_stage_prog_data base; @@ -698,9 +682,6 @@ enum shader_dispatch_mode { DISPATCH_MODE_SIMD8 = 3, }; -/* Note: brw_vue_prog_data_compare() must be updated when adding fields to - * this struct! - */ struct brw_vue_prog_data { struct brw_stage_prog_data base; struct brw_vue_map vue_map; @@ -718,9 +699,6 @@ struct brw_vue_prog_data { }; -/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this - * struct! - */ struct brw_vs_prog_data { struct brw_vue_prog_data base; @@ -780,9 +758,6 @@ struct brw_vs_prog_data { #define SURF_INDEX_GEN6_SOL_BINDING(t) (t) -/* Note: brw_gs_prog_data_compare() must be updated when adding fields to - * this struct! - */ struct brw_gs_prog_data { struct brw_vue_prog_data base; @@ -875,7 +850,6 @@ struct brw_cache_item { }; -typedef bool (*cache_aux_compare_func)(const void *a, const void *b); typedef void (*cache_aux_free_func)(const void *aux); struct brw_cache { @@ -888,12 +862,6 @@ struct brw_cache { uint32_t next_offset; bool bo_used_by_gpu; - /** - * Optional functions used in determining whether the prog_data for a new - * cache item matches an existing cache item (in case there's relevant data - * outside of the prog_data). If NULL, a plain memcmp is done. - */ - cache_aux_compare_func aux_compare[BRW_MAX_CACHE]; /** Optional functions for freeing other pointers attached to a prog_data. */ cache_aux_free_func aux_free[BRW_MAX_CACHE]; }; @@ -1554,7 +1522,7 @@ struct brw_context int num_atoms[BRW_NUM_PIPELINES]; const struct brw_tracked_state render_atoms[60]; - const struct brw_tracked_state compute_atoms[7]; + const struct brw_tracked_state compute_atoms[8]; /* If (INTEL_DEBUG & DEBUG_BATCH) */ struct { diff --git a/src/mesa/drivers/dri/i965/brw_cs.c b/src/mesa/drivers/dri/i965/brw_cs.c index cb3fae66ec3..6b64030a868 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.c +++ b/src/mesa/drivers/dri/i965/brw_cs.c @@ -30,26 +30,23 @@ #include "intel_mipmap_tree.h" #include "brw_state.h" #include "intel_batchbuffer.h" +#include "brw_nir.h" -bool -brw_cs_prog_data_compare(const void *in_a, const void *in_b) +static void +assign_cs_binding_table_offsets(const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + const struct gl_program *prog, + struct brw_cs_prog_data *prog_data) { - const struct brw_cs_prog_data *a = - (const struct brw_cs_prog_data *)in_a; - const struct brw_cs_prog_data *b = - (const struct brw_cs_prog_data *)in_b; - - /* Compare the base structure. */ - if (!brw_stage_prog_data_compare(&a->base, &b->base)) - return false; + uint32_t next_binding_table_offset = 0; - /* Compare the rest of the structure. */ - const unsigned offset = sizeof(struct brw_stage_prog_data); - if (memcmp(((char *) a) + offset, ((char *) b) + offset, - sizeof(struct brw_cs_prog_data) - offset)) - return false; + /* May not be used if the gl_NumWorkGroups variable is not accessed. */ + prog_data->binding_table.work_groups_start = next_binding_table_offset; + next_binding_table_offset++; - return true; + brw_assign_common_binding_table_offsets(MESA_SHADER_COMPUTE, devinfo, + shader_prog, prog, &prog_data->base, + next_binding_table_offset); } static bool @@ -72,12 +69,14 @@ brw_codegen_cs_prog(struct brw_context *brw, memset(&prog_data, 0, sizeof(prog_data)); + assign_cs_binding_table_offsets(brw->intelScreen->devinfo, prog, + &cp->program.Base, &prog_data); + /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ - int param_count = cs->base.num_uniform_components + - cs->base.NumImages * BRW_IMAGE_PARAM_SIZE; + int param_count = cp->program.Base.nir->num_uniforms; /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits; @@ -90,6 +89,9 @@ brw_codegen_cs_prog(struct brw_context *brw, prog_data.base.nr_params = param_count; prog_data.base.nr_image_params = cs->base.NumImages; + brw_nir_setup_glsl_uniforms(cp->program.Base.nir, prog, &cp->program.Base, + &prog_data.base, true); + if (unlikely(brw->perf_debug)) { start_busy = (brw->batch.last_bo && drm_intel_bo_busy(brw->batch.last_bo)); diff --git a/src/mesa/drivers/dri/i965/brw_cs.h b/src/mesa/drivers/dri/i965/brw_cs.h index 746fb05166c..0c0ed2bc909 100644 --- a/src/mesa/drivers/dri/i965/brw_cs.h +++ b/src/mesa/drivers/dri/i965/brw_cs.h @@ -36,8 +36,6 @@ struct brw_cs_prog_key { extern "C" { #endif -bool brw_cs_prog_data_compare(const void *a, const void *b); - void brw_upload_cs_prog(struct brw_context *brw); @@ -51,8 +49,7 @@ brw_cs_emit(struct brw_context *brw, unsigned *final_assembly_size); unsigned -brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog, - unsigned dispatch_width); +brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width); #ifdef __cplusplus } diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index e620301fde7..0a6b23284d9 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -949,20 +949,6 @@ fs_visitor::import_uniforms(fs_visitor *v) this->param_size = v->param_size; } -void -fs_visitor::setup_vec4_uniform_value(unsigned param_offset, - const gl_constant_value *values, - unsigned n) -{ - static const gl_constant_value zero = { 0 }; - - for (unsigned i = 0; i < n; ++i) - stage_prog_data->param[param_offset + i] = &values[i]; - - for (unsigned i = n; i < 4; ++i) - stage_prog_data->param[param_offset + i] = &zero; -} - fs_reg * fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer, bool origin_upper_left) @@ -1416,7 +1402,7 @@ fs_visitor::calculate_urb_setup() int urb_next = 0; /* Figure out where each of the incoming setup attributes lands. */ if (devinfo->gen >= 6) { - if (_mesa_bitcount_64(prog->InputsRead & + if (_mesa_bitcount_64(nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK) <= 16) { /* The SF/SBE pipeline stage can do arbitrary rearrangement of the * first 16 varying inputs, so we can put them wherever we want. @@ -1428,7 +1414,7 @@ fs_visitor::calculate_urb_setup() * a different vertex (or geometry) shader. */ for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) { - if (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK & + if (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK & BITFIELD64_BIT(i)) { prog_data->urb_setup[i] = urb_next++; } @@ -1442,7 +1428,7 @@ fs_visitor::calculate_urb_setup() struct brw_vue_map prev_stage_vue_map; brw_compute_vue_map(devinfo, &prev_stage_vue_map, key->input_slots_valid, - shader_prog->SeparateShader); + nir->info.separate_shader); int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET; assert(prev_stage_vue_map.num_slots <= first_slot + 32); for (int slot = first_slot; slot < prev_stage_vue_map.num_slots; @@ -1452,7 +1438,7 @@ fs_visitor::calculate_urb_setup() * unused. */ if (varying != BRW_VARYING_SLOT_COUNT && - (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK & + (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK & BITFIELD64_BIT(varying))) { prog_data->urb_setup[varying] = slot - first_slot; } @@ -1485,7 +1471,7 @@ fs_visitor::calculate_urb_setup() * * See compile_sf_prog() for more info. */ - if (prog->InputsRead & BITFIELD64_BIT(VARYING_SLOT_PNTC)) + if (nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC)) prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++; } @@ -4537,7 +4523,7 @@ fs_visitor::dump_instruction(backend_instruction *be_inst, FILE *file) fprintf(file, "***m%d***", inst->src[i].reg); break; case ATTR: - fprintf(file, "attr%d", inst->src[i].reg + inst->src[i].reg_offset); + fprintf(file, "attr%d+%d", inst->src[i].reg, inst->src[i].reg_offset); break; case UNIFORM: fprintf(file, "u%d", inst->src[i].reg + inst->src[i].reg_offset); @@ -4668,7 +4654,7 @@ void fs_visitor::setup_payload_gen6() { bool uses_depth = - (prog->InputsRead & (1 << VARYING_SLOT_POS)) != 0; + (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0; unsigned barycentric_interp_modes = (stage == MESA_SHADER_FRAGMENT) ? ((brw_wm_prog_data*) this->prog_data)->barycentric_interp_modes : 0; @@ -4727,7 +4713,7 @@ fs_visitor::setup_payload_gen6() } /* R32: MSAA input coverage mask */ - if (prog->SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) { + if (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) { assert(devinfo->gen >= 7); payload.sample_mask_in_reg = payload.num_regs; payload.num_regs++; @@ -4740,7 +4726,7 @@ fs_visitor::setup_payload_gen6() /* R34-: bary for 32-pixel. */ /* R58-59: interp W for 32-pixel. */ - if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) { source_depth_to_render_target = true; } } @@ -4759,9 +4745,9 @@ fs_visitor::setup_cs_payload() payload.num_regs = 1; - if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { + if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) { const unsigned local_id_dwords = - brw_cs_prog_local_id_payload_dwords(prog, dispatch_width); + brw_cs_prog_local_id_payload_dwords(dispatch_width); assert((local_id_dwords & 0x7) == 0); const unsigned local_id_regs = local_id_dwords / 8; payload.local_invocation_id_reg = payload.num_regs; @@ -4769,37 +4755,6 @@ fs_visitor::setup_cs_payload() } } -void -fs_visitor::assign_fs_binding_table_offsets() -{ - assert(stage == MESA_SHADER_FRAGMENT); - brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data; - brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - uint32_t next_binding_table_offset = 0; - - /* If there are no color regions, we still perform an FB write to a null - * renderbuffer, which we place at surface index 0. - */ - prog_data->binding_table.render_target_start = next_binding_table_offset; - next_binding_table_offset += MAX2(key->nr_color_regions, 1); - - assign_common_binding_table_offsets(next_binding_table_offset); -} - -void -fs_visitor::assign_cs_binding_table_offsets() -{ - assert(stage == MESA_SHADER_COMPUTE); - brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data; - uint32_t next_binding_table_offset = 0; - - /* May not be used if the gl_NumWorkGroups variable is not accessed. */ - prog_data->binding_table.work_groups_start = next_binding_table_offset; - next_binding_table_offset++; - - assign_common_binding_table_offsets(next_binding_table_offset); -} - void fs_visitor::calculate_register_pressure() { @@ -4851,8 +4806,8 @@ fs_visitor::optimize() \ if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \ char filename[64]; \ - snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \ - stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ + snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \ + stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \ \ backend_shader::dump_instructions(filename); \ } \ @@ -4865,9 +4820,8 @@ fs_visitor::optimize() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { char filename[64]; - snprintf(filename, 64, "%s%d-%04d-00-start", - stage_abbrev, dispatch_width, - shader_prog ? shader_prog->Name : 0); + snprintf(filename, 64, "%s%d-%s-00-start", + stage_abbrev, dispatch_width, nir->info.name); backend_shader::dump_instructions(filename); } @@ -5013,8 +4967,6 @@ fs_visitor::run_vs(gl_clip_plane *clip_planes) { assert(stage == MESA_SHADER_VERTEX); - if (prog_data->map_entries == NULL) - assign_common_binding_table_offsets(0); setup_vs_payload(); if (shader_time_index >= 0) @@ -5053,11 +5005,6 @@ fs_visitor::run_fs(bool do_rep_send) assert(stage == MESA_SHADER_FRAGMENT); - sanity_param_count = prog->Parameters->NumParameters; - - if (prog_data->map_entries == NULL) - assign_fs_binding_table_offsets(); - if (devinfo->gen >= 6) setup_payload_gen6(); else @@ -5073,7 +5020,7 @@ fs_visitor::run_fs(bool do_rep_send) emit_shader_time_begin(); calculate_urb_setup(); - if (prog->InputsRead > 0) { + if (nir->info.inputs_read > 0) { if (devinfo->gen < 6) emit_interpolation_setup_gen4(); else @@ -5133,11 +5080,6 @@ bool fs_visitor::run_cs() { assert(stage == MESA_SHADER_COMPUTE); - assert(shader); - - sanity_param_count = prog->Parameters->NumParameters; - - assign_cs_binding_table_offsets(); setup_cs_payload(); @@ -5166,13 +5108,6 @@ fs_visitor::run_cs() if (failed) return false; - /* If any state parameters were appended, then ParameterValues could have - * been realloced, in which case the driver uniform storage set up by - * _mesa_associate_uniform_storage() would point to freed memory. Make - * sure that didn't happen. - */ - assert(sanity_param_count == prog->Parameters->NumParameters); - return !failed; } @@ -5200,9 +5135,8 @@ brw_wm_fs_emit(struct brw_context *brw, /* Now the main event: Visit the shader IR and generate our FS IR for it. */ - fs_visitor v(brw->intelScreen->compiler, brw, - mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, - prog, &fp->Base, 8, st_index8); + fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key, + &prog_data->base, &fp->Base, fp->Base.nir, 8, st_index8); if (!v.run_fs(false /* do_rep_send */)) { if (prog) { prog->LinkStatus = false; @@ -5216,9 +5150,8 @@ brw_wm_fs_emit(struct brw_context *brw, } cfg_t *simd16_cfg = NULL; - fs_visitor v2(brw->intelScreen->compiler, brw, - mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base, - prog, &fp->Base, 16, st_index16); + fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key, + &prog_data->base, &fp->Base, fp->Base.nir, 16, st_index16); if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) { if (!v.simd16_unsupported) { /* Try a SIMD16 compile */ @@ -5332,9 +5265,8 @@ brw_cs_emit(struct brw_context *brw, /* Now the main event: Visit the shader IR and generate our CS IR for it. */ - fs_visitor v8(brw->intelScreen->compiler, brw, - mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, - &cp->Base, 8, st_index); + fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key, + &prog_data->base, &cp->Base, cp->Base.nir, 8, st_index); if (!v8.run_cs()) { fail_msg = v8.fail_msg; } else if (local_workgroup_size <= 8 * brw->max_cs_threads) { @@ -5342,9 +5274,8 @@ brw_cs_emit(struct brw_context *brw, prog_data->simd_size = 8; } - fs_visitor v16(brw->intelScreen->compiler, brw, - mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog, - &cp->Base, 16, st_index); + fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key, + &prog_data->base, &cp->Base, cp->Base.nir, 16, st_index); if (likely(!(INTEL_DEBUG & DEBUG_NO16)) && !fail_msg && !v8.simd16_unsupported && local_workgroup_size <= 16 * brw->max_cs_threads) { diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index a8b6726b9f0..e8b511f9ce6 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -93,11 +93,10 @@ class fs_visitor : public backend_shader public: fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - gl_shader_stage stage, const void *key, struct brw_stage_prog_data *prog_data, - struct gl_shader_program *shader_prog, struct gl_program *prog, + nir_shader *shader, unsigned dispatch_width, int shader_time_index); @@ -128,8 +127,6 @@ public: bool run_cs(); void optimize(); void allocate_registers(); - void assign_fs_binding_table_offsets(); - void assign_cs_binding_table_offsets(); void setup_payload_gen4(); void setup_payload_gen6(); void setup_vs_payload(); @@ -207,7 +204,7 @@ public: void emit_interpolation_setup_gen6(); void compute_sample_position(fs_reg dst, fs_reg int_sample_pos); fs_reg rescale_texcoord(fs_reg coordinate, int coord_components, - bool is_rect, uint32_t sampler, int texunit); + bool is_rect, uint32_t sampler); void emit_texture(ir_texture_opcode op, const glsl_type *dest_type, fs_reg coordinate, int components, @@ -220,8 +217,7 @@ public: bool is_cube_array, bool is_rect, uint32_t sampler, - fs_reg sampler_reg, - int texunit); + fs_reg sampler_reg); fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components, const fs_reg &sampler); void emit_gen6_gather_wa(uint8_t wa, fs_reg dst); @@ -238,12 +234,10 @@ public: uint32_t spill_offset, int count); void emit_nir_code(); - void nir_setup_inputs(nir_shader *shader); - void nir_setup_outputs(nir_shader *shader); - void nir_setup_uniforms(nir_shader *shader); - void nir_setup_uniform(nir_variable *var); - void nir_setup_builtin_uniform(nir_variable *var); - void nir_emit_system_values(nir_shader *shader); + void nir_setup_inputs(); + void nir_setup_outputs(); + void nir_setup_uniforms(); + void nir_emit_system_values(); void nir_emit_impl(nir_function_impl *impl); void nir_emit_cf_list(exec_list *list); void nir_emit_if(nir_if *if_stmt); @@ -294,10 +288,6 @@ public: struct brw_reg interp_reg(int location, int channel); - virtual void setup_vec4_uniform_value(unsigned param_offset, - const gl_constant_value *values, - unsigned n); - int implied_mrf_writes(fs_inst *inst); virtual void dump_instructions(); @@ -309,7 +299,7 @@ public: const struct brw_sampler_prog_key_data *key_tex; struct brw_stage_prog_data *prog_data; - unsigned int sanity_param_count; + struct gl_program *prog; int *param_size; diff --git a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 61d0c896b8e..8a03597c72b 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@ -37,16 +37,13 @@ using namespace brw::surface_access; void fs_visitor::emit_nir_code() { - nir_shader *nir = prog->nir; - /* emit the arrays used for inputs and outputs - load/store intrinsics will * be converted to reads/writes of these arrays */ - nir_setup_inputs(nir); - nir_setup_outputs(nir); - uniforms = nir->num_uniforms; - //nir_setup_uniforms(nir); - nir_emit_system_values(nir); + nir_setup_inputs(); + nir_setup_outputs(); + nir_setup_uniforms(); + nir_emit_system_values(); /* get the main function and emit it */ nir_foreach_overload(nir, overload) { @@ -57,11 +54,11 @@ fs_visitor::emit_nir_code() } void -fs_visitor::nir_setup_inputs(nir_shader *shader) +fs_visitor::nir_setup_inputs() { - nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_inputs); + nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs); - foreach_list_typed(nir_variable, var, node, &shader->inputs) { + foreach_list_typed(nir_variable, var, node, &nir->inputs) { enum brw_reg_type type = brw_type_for_base_type(var->type); fs_reg input = offset(nir_inputs, bld, var->data.driver_location); @@ -119,13 +116,13 @@ fs_visitor::nir_setup_inputs(nir_shader *shader) } void -fs_visitor::nir_setup_outputs(nir_shader *shader) +fs_visitor::nir_setup_outputs() { brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_outputs); + nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs); - foreach_list_typed(nir_variable, var, node, &shader->outputs) { + foreach_list_typed(nir_variable, var, node, &nir->outputs) { fs_reg reg = offset(nir_outputs, bld, var->data.driver_location); int vector_elements = @@ -176,108 +173,20 @@ fs_visitor::nir_setup_outputs(nir_shader *shader) } void -fs_visitor::nir_setup_uniforms(nir_shader *shader) +fs_visitor::nir_setup_uniforms() { if (dispatch_width != 8) return; - uniforms = shader->num_uniforms; - - if (shader_prog) { - foreach_list_typed(nir_variable, var, node, &shader->uniforms) { - /* UBO's and atomics don't take up space in the uniform file */ - if (var->interface_type != NULL || var->type->contains_atomic()) - continue; - - if (strncmp(var->name, "gl_", 3) == 0) - nir_setup_builtin_uniform(var); - else - nir_setup_uniform(var); - if(type_size_scalar(var->type) > 0) - param_size[var->data.driver_location] = type_size_scalar(var->type); - } - } else { - /* prog_to_nir only creates a single giant uniform variable so we can - * just set param up directly. */ - for (unsigned p = 0; p < prog->Parameters->NumParameters; p++) { - for (unsigned int i = 0; i < 4; i++) { - stage_prog_data->param[4 * p + i] = - &prog->Parameters->ParameterValues[p][i]; - } - } - if(prog->Parameters->NumParameters > 0) - param_size[0] = prog->Parameters->NumParameters * 4; - } -} + uniforms = nir->num_uniforms; -void -fs_visitor::nir_setup_uniform(nir_variable *var) -{ - int namelen = strlen(var->name); - - /* The data for our (non-builtin) uniforms is stored in a series of - * gl_uniform_driver_storage structs for each subcomponent that - * glGetUniformLocation() could name. We know it's been set up in the - * same order we'd walk the type, so walk the list of storage and find - * anything with our name, or the prefix of a component that starts with - * our name. - */ - unsigned index = var->data.driver_location; - for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { - struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; - - if (storage->builtin) - continue; - - if (strncmp(var->name, storage->name, namelen) != 0 || - (storage->name[namelen] != 0 && - storage->name[namelen] != '.' && - storage->name[namelen] != '[')) { + foreach_list_typed(nir_variable, var, node, &nir->uniforms) { + /* UBO's and atomics don't take up space in the uniform file */ + if (var->interface_type != NULL || var->type->contains_atomic()) continue; - } - - if (storage->type->is_image()) { - setup_image_uniform_values(index, storage); - } else { - unsigned slots = storage->type->component_slots(); - if (storage->array_elements) - slots *= storage->array_elements; - for (unsigned i = 0; i < slots; i++) { - stage_prog_data->param[index++] = &storage->storage[i]; - } - } - } -} - -void -fs_visitor::nir_setup_builtin_uniform(nir_variable *var) -{ - const nir_state_slot *const slots = var->state_slots; - assert(var->state_slots != NULL); - - unsigned uniform_index = var->data.driver_location; - for (unsigned int i = 0; i < var->num_state_slots; i++) { - /* This state reference has already been setup by ir_to_mesa, but we'll - * get the same index back here. - */ - int index = _mesa_add_state_reference(this->prog->Parameters, - (gl_state_index *)slots[i].tokens); - - /* Add each of the unique swizzles of the element as a parameter. - * This'll end up matching the expected layout of the - * array/matrix/structure we're trying to fill in. - */ - int last_swiz = -1; - for (unsigned int j = 0; j < 4; j++) { - int swiz = GET_SWZ(slots[i].swizzle, j); - if (swiz == last_swiz) - break; - last_swiz = swiz; - - stage_prog_data->param[uniform_index++] = - &prog->Parameters->ParameterValues[index][swiz]; - } + if (type_size_scalar(var->type) > 0) + param_size[var->data.driver_location] = type_size_scalar(var->type); } } @@ -363,10 +272,10 @@ emit_system_values_block(nir_block *block, void *void_visitor) } void -fs_visitor::nir_emit_system_values(nir_shader *shader) +fs_visitor::nir_emit_system_values() { nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX); - nir_foreach_overload(shader, overload) { + nir_foreach_overload(nir, overload) { assert(strcmp(overload->function->name, "main") == 0); assert(overload->impl); nir_foreach_block(overload->impl, emit_system_values_block, this); @@ -1540,7 +1449,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr */ brw_mark_surface_used(prog_data, stage_prog_data->binding_table.ubo_start + - shader_prog->NumBufferInterfaceBlocks - 1); + nir->info.num_ssbos - 1); } if (has_indirect) { @@ -1603,7 +1512,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr */ brw_mark_surface_used(prog_data, stage_prog_data->binding_table.ubo_start + - shader_prog->NumBufferInterfaceBlocks - 1); + nir->info.num_ssbos - 1); } /* Get the offset to read from */ @@ -1796,7 +1705,7 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr brw_mark_surface_used(prog_data, stage_prog_data->binding_table.ubo_start + - shader_prog->NumBufferInterfaceBlocks - 1); + nir->info.num_ssbos - 1); } /* Offset */ @@ -1913,8 +1822,6 @@ fs_visitor::nir_emit_intrinsic(const fs_builder &bld, nir_intrinsic_instr *instr unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0; int reg_width = dispatch_width / 8; - assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage); - /* Set LOD = 0 */ fs_reg source = fs_reg(0); @@ -1990,7 +1897,7 @@ fs_visitor::nir_emit_ssbo_atomic(const fs_builder &bld, */ brw_mark_surface_used(prog_data, stage_prog_data->binding_table.ubo_start + - shader_prog->NumBufferInterfaceBlocks - 1); + nir->info.num_ssbos - 1); } fs_reg offset = get_nir_src(instr->src[1]); @@ -2023,12 +1930,6 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) unsigned sampler = stage_prog_data->bind_map[set].index[binding]; fs_reg sampler_reg(sampler); - /* FINISHME: We're failing to recompile our programs when the sampler is - * updated. This only matters for the texture rectangle scale parameters - * (pre-gen6, or gen6+ with GL_CLAMP). - */ - int texunit = prog->SamplerUnits[sampler]; - int gather_component = instr->component; bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT; @@ -2169,7 +2070,7 @@ fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) emit_texture(op, dest_type, coordinate, instr->coord_components, shadow_comparitor, lod, lod2, lod_components, sample_index, tex_offset, mcs, gather_component, - is_cube_array, is_rect, sampler, sampler_reg, texunit); + is_cube_array, is_rect, sampler, sampler_reg); fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; diff --git a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 47d7ae4f57c..df1a7ed9b59 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@ -79,7 +79,7 @@ fs_visitor::emit_vs_system_value(int location) fs_reg fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, - bool is_rect, uint32_t sampler, int texunit) + bool is_rect, uint32_t sampler) { bool needs_gl_clamp = true; fs_reg scale_x, scale_y; @@ -93,10 +93,16 @@ fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components, (devinfo->gen >= 6 && (key_tex->gl_clamp_mask[0] & (1 << sampler) || key_tex->gl_clamp_mask[1] & (1 << sampler))))) { struct gl_program_parameter_list *params = prog->Parameters; + + + /* FINISHME: We're failing to recompile our programs when the sampler is + * updated. This only matters for the texture rectangle scale + * parameters (pre-gen6, or gen6+ with GL_CLAMP). + */ int tokens[STATE_LENGTH] = { STATE_INTERNAL, STATE_TEXRECT_SCALE, - texunit, + prog->SamplerUnits[sampler], 0, 0 }; @@ -221,7 +227,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, bool is_cube_array, bool is_rect, uint32_t sampler, - fs_reg sampler_reg, int texunit) + fs_reg sampler_reg) { fs_inst *inst = NULL; @@ -256,7 +262,7 @@ fs_visitor::emit_texture(ir_texture_opcode op, * samplers. This should only be a problem with GL_CLAMP on Gen7. */ coordinate = rescale_texcoord(coordinate, coord_components, is_rect, - sampler, texunit); + sampler); } /* Writemasking doesn't eliminate channels on SIMD8 texture @@ -692,7 +698,7 @@ fs_visitor::emit_single_fb_write(const fs_builder &bld, fs_reg src_depth; if (source_depth_to_render_target) { - if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) + if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) src_depth = frag_depth; else src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0)); @@ -1060,16 +1066,14 @@ fs_visitor::emit_barrier() fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - gl_shader_stage stage, const void *key, struct brw_stage_prog_data *prog_data, - struct gl_shader_program *shader_prog, struct gl_program *prog, + nir_shader *shader, unsigned dispatch_width, int shader_time_index) - : backend_shader(compiler, log_data, mem_ctx, - shader_prog, prog, prog_data, stage), - key(key), prog_data(prog_data), + : backend_shader(compiler, log_data, mem_ctx, shader, prog_data), + key(key), prog_data(prog_data), prog(prog), dispatch_width(dispatch_width), shader_time_index(shader_time_index), promoted_constants(0), diff --git a/src/mesa/drivers/dri/i965/brw_gs.c b/src/mesa/drivers/dri/i965/brw_gs.c index 0119a906e9e..4d0b125ffe4 100644 --- a/src/mesa/drivers/dri/i965/brw_gs.c +++ b/src/mesa/drivers/dri/i965/brw_gs.c @@ -32,6 +32,24 @@ #include "brw_vec4_gs_visitor.h" #include "brw_state.h" #include "brw_ff_gs.h" +#include "brw_nir.h" + +static void +assign_gs_binding_table_offsets(const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + const struct gl_program *prog, + struct brw_gs_prog_data *prog_data) +{ + /* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform + * feedback surfaces. + */ + uint32_t reserved = devinfo->gen == 6 ? BRW_MAX_SOL_BINDINGS : 0; + + brw_assign_common_binding_table_offsets(MESA_SHADER_GEOMETRY, devinfo, + shader_prog, prog, + &prog_data->base.base, + reserved); +} bool brw_compile_gs_prog(struct brw_context *brw, @@ -55,6 +73,9 @@ brw_compile_gs_prog(struct brw_context *brw, c.prog_data.invocations = gp->program.Invocations; + assign_gs_binding_table_offsets(brw->intelScreen->devinfo, prog, + &gp->program.Base, &c.prog_data); + /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. @@ -64,9 +85,7 @@ brw_compile_gs_prog(struct brw_context *brw, * every uniform is a float which gets padded to the size of a vec4. */ struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - int param_count = gs->num_uniform_components * 4; - - param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE; + int param_count = gp->program.Base.nir->num_uniforms * 4; c.prog_data.base.base.param = rzalloc_array(NULL, const gl_constant_value *, param_count); @@ -77,6 +96,9 @@ brw_compile_gs_prog(struct brw_context *brw, c.prog_data.base.base.nr_params = param_count; c.prog_data.base.base.nr_image_params = gs->NumImages; + brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base, + &c.prog_data.base.base, false); + if (brw->gen >= 8) { c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 : nir_gs_count_vertices(gp->program.Base.nir); @@ -418,24 +440,3 @@ brw_gs_precompile(struct gl_context *ctx, return success; } - - -bool -brw_gs_prog_data_compare(const void *in_a, const void *in_b) -{ - const struct brw_gs_prog_data *a = in_a; - const struct brw_gs_prog_data *b = in_b; - - /* Compare the base structure. */ - if (!brw_stage_prog_data_compare(&a->base.base, &b->base.base)) - return false; - - /* Compare the rest of the struct. */ - const unsigned offset = sizeof(struct brw_stage_prog_data); - if (memcmp(((char *) a) + offset, ((char *) b) + offset, - sizeof(struct brw_gs_prog_data) - offset)) { - return false; - } - - return true; -} diff --git a/src/mesa/drivers/dri/i965/brw_nir.c b/src/mesa/drivers/dri/i965/brw_nir.c index 35855092dff..bae44d5aa24 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.c +++ b/src/mesa/drivers/dri/i965/brw_nir.c @@ -27,6 +27,24 @@ #include "glsl/nir/glsl_to_nir.h" #include "program/prog_to_nir.h" +static void +brw_nir_lower_inputs(nir_shader *nir, bool is_scalar) +{ + nir_assign_var_locations(&nir->inputs, &nir->num_inputs, + is_scalar ? type_size_scalar : type_size_vec4); +} + +static void +brw_nir_lower_outputs(nir_shader *nir, bool is_scalar) +{ + if (is_scalar) { + nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar); + } else { + foreach_list_typed(nir_variable, var, node, &nir->outputs) + var->data.driver_location = var->data.location; + } +} + static void nir_optimize(nir_shader *nir, bool is_scalar) { @@ -80,12 +98,11 @@ brw_create_nir(struct brw_context *brw, struct gl_context *ctx = &brw->ctx; const nir_shader_compiler_options *options = ctx->Const.ShaderCompilerOptions[stage].NirOptions; - struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL; nir_shader *nir; /* First, lower the GLSL IR or Mesa IR to NIR */ if (shader_prog) { - nir = glsl_to_nir(shader, options); + nir = glsl_to_nir(shader_prog, stage, options); } else { nir = prog_to_nir(prog, options); nir_convert_to_ssa(nir); /* turn registers into SSA */ @@ -142,26 +159,12 @@ brw_process_nir(nir_shader *nir, /* Get rid of split copies */ nir_optimize(nir, is_scalar); - if (is_scalar) { - nir_assign_var_locations(&nir->uniforms, - &nir->num_uniforms, - type_size_scalar); - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar); - nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar); - nir_lower_io(nir, type_size_scalar); - } else { - nir_assign_var_locations(&nir->uniforms, - &nir->num_uniforms, - type_size_vec4); - - nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_vec4); - - foreach_list_typed(nir_variable, var, node, &nir->outputs) - var->data.driver_location = var->data.location; - - nir_lower_io(nir, type_size_vec4); - } - + brw_nir_lower_inputs(nir, is_scalar); + brw_nir_lower_outputs(nir, is_scalar); + nir_assign_var_locations(&nir->uniforms, + &nir->num_uniforms, + is_scalar ? type_size_scalar : type_size_vec4); + nir_lower_io(nir, -1, is_scalar ? type_size_scalar : type_size_vec4); nir_validate_shader(nir); nir_remove_dead_variables(nir); diff --git a/src/mesa/drivers/dri/i965/brw_nir.h b/src/mesa/drivers/dri/i965/brw_nir.h index 5a1358890cc..19e55527545 100644 --- a/src/mesa/drivers/dri/i965/brw_nir.h +++ b/src/mesa/drivers/dri/i965/brw_nir.h @@ -91,6 +91,15 @@ brw_process_nir(nir_shader *nir, const struct gl_shader_program *shader_prog, gl_shader_stage stage, bool is_scalar); +void brw_nir_setup_glsl_uniforms(nir_shader *shader, + struct gl_shader_program *shader_prog, + const struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + bool is_scalar); + +void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data); + #ifdef __cplusplus } #endif diff --git a/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp new file mode 100644 index 00000000000..f19d74610a1 --- /dev/null +++ b/src/mesa/drivers/dri/i965/brw_nir_uniforms.cpp @@ -0,0 +1,182 @@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include "brw_shader.h" +#include "brw_nir.h" +#include "glsl/ir.h" +#include "glsl/ir_uniform.h" + +static void +brw_nir_setup_glsl_builtin_uniform(nir_variable *var, + const struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + unsigned comps_per_unit) +{ + const nir_state_slot *const slots = var->state_slots; + assert(var->state_slots != NULL); + + unsigned uniform_index = var->data.driver_location * comps_per_unit; + for (unsigned int i = 0; i < var->num_state_slots; i++) { + /* This state reference has already been setup by ir_to_mesa, but we'll + * get the same index back here. + */ + int index = _mesa_add_state_reference(prog->Parameters, + (gl_state_index *)slots[i].tokens); + + /* Add each of the unique swizzles of the element as a parameter. + * This'll end up matching the expected layout of the + * array/matrix/structure we're trying to fill in. + */ + int last_swiz = -1; + for (unsigned j = 0; j < 4; j++) { + int swiz = GET_SWZ(slots[i].swizzle, j); + + /* If we hit a pair of identical swizzles, this means we've hit the + * end of the builtin variable. In scalar mode, we should just quit + * and move on to the next one. In vec4, we need to continue and pad + * it out to 4 components. + */ + if (swiz == last_swiz && comps_per_unit == 1) + break; + + last_swiz = swiz; + + stage_prog_data->param[uniform_index++] = + &prog->Parameters->ParameterValues[index][swiz]; + } + } +} + +static void +brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var, + struct gl_shader_program *shader_prog, + struct brw_stage_prog_data *stage_prog_data, + unsigned comps_per_unit) +{ + int namelen = strlen(var->name); + + /* The data for our (non-builtin) uniforms is stored in a series of + * gl_uniform_driver_storage structs for each subcomponent that + * glGetUniformLocation() could name. We know it's been set up in the same + * order we'd walk the type, so walk the list of storage and find anything + * with our name, or the prefix of a component that starts with our name. + */ + unsigned uniform_index = var->data.driver_location * comps_per_unit; + for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { + struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; + + if (storage->builtin) + continue; + + if (strncmp(var->name, storage->name, namelen) != 0 || + (storage->name[namelen] != 0 && + storage->name[namelen] != '.' && + storage->name[namelen] != '[')) { + continue; + } + + if (storage->type->is_image()) { + brw_setup_image_uniform_values(stage, stage_prog_data, + uniform_index, storage); + } else { + gl_constant_value *components = storage->storage; + unsigned vector_count = (MAX2(storage->array_elements, 1) * + storage->type->matrix_columns); + unsigned vector_size = storage->type->vector_elements; + + for (unsigned s = 0; s < vector_count; s++) { + unsigned i; + for (i = 0; i < vector_size; i++) { + stage_prog_data->param[uniform_index++] = components++; + } + + /* Pad out with zeros if needed (only needed for vec4) */ + for (; i < comps_per_unit; i++) { + static const gl_constant_value zero = { 0.0 }; + stage_prog_data->param[uniform_index++] = &zero; + } + } + } + } +} + +void +brw_nir_setup_glsl_uniforms(nir_shader *shader, + struct gl_shader_program *shader_prog, + const struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + bool is_scalar) +{ + unsigned comps_per_unit = is_scalar ? 1 : 4; + + foreach_list_typed(nir_variable, var, node, &shader->uniforms) { + /* UBO's, atomics and samplers don't take up space in the + uniform file */ + if (var->interface_type != NULL || var->type->contains_atomic()) + continue; + + if (strncmp(var->name, "gl_", 3) == 0) { + brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data, + comps_per_unit); + } else { + brw_nir_setup_glsl_uniform(shader->stage, var, shader_prog, + stage_prog_data, comps_per_unit); + } + } +} + +void +brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data) +{ + struct gl_program_parameter_list *plist = prog->Parameters; + +#ifndef NDEBUG + if (!shader->uniforms.is_empty()) { + /* For ARB programs, only a single "parameters" variable is generated to + * support uniform data. + */ + assert(shader->uniforms.length() == 1); + nir_variable *var = (nir_variable *) shader->uniforms.get_head(); + assert(strcmp(var->name, "parameters") == 0); + assert(var->type->array_size() == (int)plist->NumParameters); + } +#endif + + for (unsigned p = 0; p < plist->NumParameters; p++) { + /* Parameters should be either vec4 uniforms or single component + * constants; matrices and other larger types should have been broken + * down earlier. + */ + assert(plist->Parameters[p].Size <= 4); + + unsigned i; + for (i = 0; i < plist->Parameters[p].Size; i++) { + stage_prog_data->param[4 * p + i] = &plist->ParameterValues[p][i]; + } + for (; i < 4; i++) { + static const gl_constant_value zero = { 0.0 }; + stage_prog_data->param[4 * p + i] = &zero; + } + } +} diff --git a/src/mesa/drivers/dri/i965/brw_program.c b/src/mesa/drivers/dri/i965/brw_program.c index fa59338950a..0a9a99edf2d 100644 --- a/src/mesa/drivers/dri/i965/brw_program.c +++ b/src/mesa/drivers/dri/i965/brw_program.c @@ -142,9 +142,7 @@ brwProgramStringNotify(struct gl_context *ctx, brw_add_texrect_params(prog); - if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) { - prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true); - } + prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true); brw_fs_precompile(ctx, NULL, prog); break; @@ -168,10 +166,8 @@ brwProgramStringNotify(struct gl_context *ctx, brw_add_texrect_params(prog); - if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) { - prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX, - brw->intelScreen->compiler->scalar_vs); - } + prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX, + brw->intelScreen->compiler->scalar_vs); brw_vs_precompile(ctx, NULL, prog); break; @@ -544,23 +540,6 @@ brw_mark_surface_used(struct brw_stage_prog_data *prog_data, MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4); } -bool -brw_stage_prog_data_compare(const struct brw_stage_prog_data *a, - const struct brw_stage_prog_data *b) -{ - /* Compare all the struct up to the pointers. */ - if (memcmp(a, b, offsetof(struct brw_stage_prog_data, param))) - return false; - - if (memcmp(a->param, b->param, a->nr_params * sizeof(void *))) - return false; - - if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *))) - return false; - - return true; -} - void brw_stage_prog_data_free(const void *p) { @@ -568,6 +547,7 @@ brw_stage_prog_data_free(const void *p) ralloc_free(prog_data->param); ralloc_free(prog_data->pull_param); + ralloc_free(prog_data->image_param); } void diff --git a/src/mesa/drivers/dri/i965/brw_program.h b/src/mesa/drivers/dri/i965/brw_program.h index 72d68d869fc..cf0522a8b10 100644 --- a/src/mesa/drivers/dri/i965/brw_program.h +++ b/src/mesa/drivers/dri/i965/brw_program.h @@ -169,10 +169,6 @@ void brw_mark_surface_used(struct brw_stage_prog_data *prog_data, unsigned surf_index); -bool -brw_stage_prog_data_compare(const struct brw_stage_prog_data *a, - const struct brw_stage_prog_data *b); - void brw_stage_prog_data_free(const void *prog_data); diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp index 1060d93ae6b..8bc7d9d6aac 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.cpp +++ b/src/mesa/drivers/dri/i965/brw_shader.cpp @@ -72,6 +72,20 @@ shader_perf_log_mesa(void *data, const char *fmt, ...) va_end(args); } +static bool +is_scalar_shader_stage(const struct brw_compiler *compiler, int stage) +{ + switch (stage) { + case MESA_SHADER_FRAGMENT: + case MESA_SHADER_COMPUTE: + return true; + case MESA_SHADER_VERTEX: + return compiler->scalar_vs; + default: + return false; + } +} + struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) { @@ -120,19 +134,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false; compiler->glsl_compiler_options[i].LowerClipDistance = true; - bool is_scalar; - switch (i) { - case MESA_SHADER_FRAGMENT: - case MESA_SHADER_COMPUTE: - is_scalar = true; - break; - case MESA_SHADER_VERTEX: - is_scalar = compiler->scalar_vs; - break; - default: - is_scalar = false; - break; - } + bool is_scalar = is_scalar_shader_stage(compiler, i); compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar; compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar; @@ -142,8 +144,7 @@ brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo) if (devinfo->gen < 7) compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true; - if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true)) - compiler->glsl_compiler_options[i].NirOptions = nir_options; + compiler->glsl_compiler_options[i].NirOptions = nir_options; } return compiler; @@ -194,20 +195,6 @@ brw_shader_precompile(struct gl_context *ctx, return true; } -static inline bool -is_scalar_shader_stage(struct brw_context *brw, int stage) -{ - switch (stage) { - case MESA_SHADER_FRAGMENT: - case MESA_SHADER_COMPUTE: - return true; - case MESA_SHADER_VERTEX: - return brw->intelScreen->compiler->scalar_vs; - default: - return false; - } -} - static void brw_lower_packing_builtins(struct brw_context *brw, gl_shader_stage shader_type, @@ -218,7 +205,7 @@ brw_lower_packing_builtins(struct brw_context *brw, | LOWER_PACK_UNORM_2x16 | LOWER_UNPACK_UNORM_2x16; - if (is_scalar_shader_stage(brw, shader_type)) { + if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { ops |= LOWER_UNPACK_UNORM_4x8 | LOWER_UNPACK_SNORM_4x8 | LOWER_PACK_UNORM_4x8 @@ -231,7 +218,7 @@ brw_lower_packing_builtins(struct brw_context *brw, * lowering is needed. For SOA code, the Half2x16 ops must be * scalarized. */ - if (is_scalar_shader_stage(brw, shader_type)) { + if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) { ops |= LOWER_PACK_HALF_2x16_TO_SPLIT | LOWER_UNPACK_HALF_2x16_TO_SPLIT; } @@ -285,8 +272,6 @@ process_glsl_ir(gl_shader_stage stage, brw_lower_texture_gradients(brw, shader->ir); do_vec_index_to_cond_assign(shader->ir); lower_vector_insert(shader->ir, true); - if (options->NirOptions == NULL) - brw_do_cubemap_normalize(shader->ir); lower_offset_arrays(shader->ir); brw_do_lower_unnormalized_offset(shader->ir); lower_noise(shader->ir); @@ -312,7 +297,7 @@ process_glsl_ir(gl_shader_stage stage, do { progress = false; - if (is_scalar_shader_stage(brw, shader->Stage)) { + if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) { brw_do_channel_expressions(shader->ir); brw_do_vector_splitting(shader->ir); } @@ -350,13 +335,11 @@ GLboolean brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) { struct brw_context *brw = brw_context(ctx); + const struct brw_compiler *compiler = brw->intelScreen->compiler; unsigned int stage; for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) { struct gl_shader *shader = shProg->_LinkedShaders[stage]; - const struct gl_shader_compiler_options *options = - &ctx->Const.ShaderCompilerOptions[stage]; - if (!shader) continue; @@ -404,10 +387,8 @@ brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg) brw_add_texrect_params(prog); - if (options->NirOptions) { - prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, - is_scalar_shader_stage(brw, stage)); - } + prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage, + is_scalar_shader_stage(compiler, stage)); _mesa_reference_program(ctx, &prog, NULL); } @@ -917,21 +898,16 @@ brw_abs_immediate(enum brw_reg_type type, struct brw_reg *reg) backend_shader::backend_shader(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - gl_shader_stage stage) + nir_shader *shader, + struct brw_stage_prog_data *stage_prog_data) : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo), - shader(shader_prog ? - (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL), - shader_prog(shader_prog), - prog(prog), + nir(shader), stage_prog_data(stage_prog_data), mem_ctx(mem_ctx), cfg(NULL), - stage(stage) + stage(shader->stage) { debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); stage_name = _mesa_shader_stage_to_string(stage); @@ -1374,16 +1350,25 @@ backend_shader::invalidate_cfg() * trigger some of our asserts that surface indices are < BRW_MAX_SURFACES. */ void -backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_offset) +brw_assign_common_binding_table_offsets(gl_shader_stage stage, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + const struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + uint32_t next_binding_table_offset) { + const struct gl_shader *shader = NULL; int num_textures = _mesa_fls(prog->SamplersUsed); + if (shader_prog) + shader = shader_prog->_LinkedShaders[stage]; + stage_prog_data->binding_table.texture_start = next_binding_table_offset; next_binding_table_offset += num_textures; if (shader) { stage_prog_data->binding_table.ubo_start = next_binding_table_offset; - next_binding_table_offset += shader->base.NumUniformBlocks; + next_binding_table_offset += shader->NumUniformBlocks; } else { stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0; } @@ -1414,9 +1399,9 @@ backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_ stage_prog_data->binding_table.abo_start = 0xd0d0d0d0; } - if (shader && shader->base.NumImages) { + if (shader && shader->NumImages) { stage_prog_data->binding_table.image_start = next_binding_table_offset; - next_binding_table_offset += shader->base.NumImages; + next_binding_table_offset += shader->NumImages; } else { stage_prog_data->binding_table.image_start = 0xd0d0d0d0; } @@ -1430,32 +1415,50 @@ backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_ /* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */ } +static void +setup_vec4_uniform_value(const gl_constant_value **params, + const gl_constant_value *values, + unsigned n) +{ + static const gl_constant_value zero = { 0 }; + + for (unsigned i = 0; i < n; ++i) + params[i] = &values[i]; + + for (unsigned i = n; i < 4; ++i) + params[i] = &zero; +} + void -backend_shader::setup_image_uniform_values(unsigned param_offset, - const gl_uniform_storage *storage) +brw_setup_image_uniform_values(gl_shader_stage stage, + struct brw_stage_prog_data *stage_prog_data, + unsigned param_start_index, + const gl_uniform_storage *storage) { - const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target); + const gl_constant_value **param = + &stage_prog_data->param[param_start_index]; for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) { const unsigned image_idx = storage->image[stage].index + i; - const brw_image_param *param = &stage_prog_data->image_param[image_idx]; + const brw_image_param *image_param = + &stage_prog_data->image_param[image_idx]; /* Upload the brw_image_param structure. The order is expected to match * the BRW_IMAGE_PARAM_*_OFFSET defines. */ - setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, - (const gl_constant_value *)¶m->surface_idx, 1); - setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_OFFSET_OFFSET, - (const gl_constant_value *)param->offset, 2); - setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SIZE_OFFSET, - (const gl_constant_value *)param->size, 3); - setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_STRIDE_OFFSET, - (const gl_constant_value *)param->stride, 4); - setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_TILING_OFFSET, - (const gl_constant_value *)param->tiling, 3); - setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, - (const gl_constant_value *)param->swizzling, 2); - param_offset += BRW_IMAGE_PARAM_SIZE; + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET, + (const gl_constant_value *)&image_param->surface_idx, 1); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET, + (const gl_constant_value *)image_param->offset, 2); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET, + (const gl_constant_value *)image_param->size, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET, + (const gl_constant_value *)image_param->stride, 4); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET, + (const gl_constant_value *)image_param->tiling, 3); + setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET, + (const gl_constant_value *)image_param->swizzling, 2); + param += BRW_IMAGE_PARAM_SIZE; brw_mark_surface_used( stage_prog_data, diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h index ccccf4d6938..fd96740526b 100644 --- a/src/mesa/drivers/dri/i965/brw_shader.h +++ b/src/mesa/drivers/dri/i965/brw_shader.h @@ -24,6 +24,7 @@ #include #include "brw_reg.h" #include "brw_defines.h" +#include "brw_context.h" #include "main/compiler.h" #include "glsl/ir.h" #include "program/prog_parameter.h" @@ -224,10 +225,8 @@ protected: backend_shader(const struct brw_compiler *compiler, void *log_data, void *mem_ctx, - struct gl_shader_program *shader_prog, - struct gl_program *prog, - struct brw_stage_prog_data *stage_prog_data, - gl_shader_stage stage); + nir_shader *shader, + struct brw_stage_prog_data *stage_prog_data); public: @@ -235,9 +234,7 @@ public: void *log_data; /* Passed to compiler->*_log functions */ const struct brw_device_info * const devinfo; - struct brw_shader * const shader; - struct gl_shader_program * const shader_prog; - struct gl_program * const prog; + nir_shader *nir; struct brw_stage_prog_data * const stage_prog_data; /** ralloc context for temporary data used during compile */ @@ -266,19 +263,16 @@ public: void calculate_cfg(); void invalidate_cfg(); - void assign_common_binding_table_offsets(uint32_t next_binding_table_offset); - virtual void invalidate_live_intervals() = 0; - - virtual void setup_vec4_uniform_value(unsigned param_offset, - const gl_constant_value *values, - unsigned n) = 0; - void setup_image_uniform_values(unsigned param_offset, - const gl_uniform_storage *storage); }; uint32_t brw_texture_offset(int *offsets, unsigned num_components); +void brw_setup_image_uniform_values(gl_shader_stage stage, + struct brw_stage_prog_data *stage_prog_data, + unsigned param_start_index, + const gl_uniform_storage *storage); + #endif /* __cplusplus */ enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type); @@ -296,6 +290,14 @@ extern "C" { struct brw_compiler * brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo); +void +brw_assign_common_binding_table_offsets(gl_shader_stage stage, + const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + const struct gl_program *prog, + struct brw_stage_prog_data *stage_prog_data, + uint32_t next_binding_table_offset); + bool brw_vs_precompile(struct gl_context *ctx, struct gl_shader_program *shader_prog, struct gl_program *prog); diff --git a/src/mesa/drivers/dri/i965/brw_state.h b/src/mesa/drivers/dri/i965/brw_state.h index 3b7a4330c7a..dc2b9415673 100644 --- a/src/mesa/drivers/dri/i965/brw_state.h +++ b/src/mesa/drivers/dri/i965/brw_state.h @@ -87,6 +87,7 @@ extern const struct brw_tracked_state brw_vs_binding_table; extern const struct brw_tracked_state brw_wm_ubo_surfaces; extern const struct brw_tracked_state brw_wm_abo_surfaces; extern const struct brw_tracked_state brw_wm_image_surfaces; +extern const struct brw_tracked_state brw_cs_ubo_surfaces; extern const struct brw_tracked_state brw_cs_abo_surfaces; extern const struct brw_tracked_state brw_cs_image_surfaces; extern const struct brw_tracked_state brw_wm_unit; diff --git a/src/mesa/drivers/dri/i965/brw_state_cache.c b/src/mesa/drivers/dri/i965/brw_state_cache.c index fbc041920f4..2fbcd146750 100644 --- a/src/mesa/drivers/dri/i965/brw_state_cache.c +++ b/src/mesa/drivers/dri/i965/brw_state_cache.c @@ -350,10 +350,6 @@ brw_init_caches(struct brw_context *brw) if (brw->has_llc) drm_intel_gem_bo_map_unsynchronized(cache->bo); - cache->aux_compare[BRW_CACHE_VS_PROG] = brw_vs_prog_data_compare; - cache->aux_compare[BRW_CACHE_GS_PROG] = brw_gs_prog_data_compare; - cache->aux_compare[BRW_CACHE_FS_PROG] = brw_wm_prog_data_compare; - cache->aux_compare[BRW_CACHE_CS_PROG] = brw_cs_prog_data_compare; cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free; cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free; cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free; diff --git a/src/mesa/drivers/dri/i965/brw_state_upload.c b/src/mesa/drivers/dri/i965/brw_state_upload.c index 46687e342d3..79b8301954e 100644 --- a/src/mesa/drivers/dri/i965/brw_state_upload.c +++ b/src/mesa/drivers/dri/i965/brw_state_upload.c @@ -259,6 +259,7 @@ static const struct brw_tracked_state *gen7_compute_atoms[] = &brw_state_base_address, &brw_cs_image_surfaces, &gen7_cs_push_constants, + &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, &brw_texture_surfaces, &brw_cs_work_groups_surface, @@ -352,6 +353,7 @@ static const struct brw_tracked_state *gen8_compute_atoms[] = &gen8_state_base_address, &brw_cs_image_surfaces, &gen7_cs_push_constants, + &brw_cs_ubo_surfaces, &brw_cs_abo_surfaces, &brw_texture_surfaces, &brw_cs_work_groups_surface, diff --git a/src/mesa/drivers/dri/i965/brw_tex_layout.c b/src/mesa/drivers/dri/i965/brw_tex_layout.c index 268b995f92e..2955c8dcc2e 100644 --- a/src/mesa/drivers/dri/i965/brw_tex_layout.c +++ b/src/mesa/drivers/dri/i965/brw_tex_layout.c @@ -282,7 +282,7 @@ gen9_miptree_layout_1d(struct intel_mipmap_tree *mt) /* When this layout is used the horizontal alignment is fixed at 64 and the * hardware ignores the value given in the surface state */ - const unsigned int align_w = 64; + const unsigned int halign = 64; mt->total_height = mt->physical_height0; mt->total_width = 0; @@ -292,7 +292,7 @@ gen9_miptree_layout_1d(struct intel_mipmap_tree *mt) intel_miptree_set_level_info(mt, level, x, 0, depth); - img_width = ALIGN(width, align_w); + img_width = ALIGN(width, halign); mt->total_width = MAX2(mt->total_width, x + img_width); @@ -328,10 +328,10 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) unsigned mip1_width; if (mt->compressed) { - mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) + + mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->halign) + ALIGN_NPOT(minify(mt->physical_width0, 2), bw); } else { - mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) + + mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->halign) + minify(mt->physical_width0, 2); } @@ -348,7 +348,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) intel_miptree_set_level_info(mt, level, x, y, depth); - img_height = ALIGN_NPOT(height, mt->align_h); + img_height = ALIGN_NPOT(height, mt->valign); if (mt->compressed) img_height /= bh; @@ -365,7 +365,7 @@ brw_miptree_layout_2d(struct intel_mipmap_tree *mt) /* Layout_below: step right after second mipmap. */ if (level == mt->first_level + 1) { - x += ALIGN_NPOT(width, mt->align_w) / bw; + x += ALIGN_NPOT(width, mt->halign) / bw; } else { y += img_height; } @@ -385,7 +385,7 @@ brw_miptree_get_horizontal_slice_pitch(const struct brw_context *brw, { if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) || (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) { - return ALIGN_NPOT(minify(mt->physical_width0, level), mt->align_w); + return ALIGN_NPOT(minify(mt->physical_width0, level), mt->halign); } else { return 0; } @@ -426,13 +426,13 @@ brw_miptree_get_vertical_slice_pitch(const struct brw_context *brw, } else if (mt->target == GL_TEXTURE_3D || (brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) || mt->array_layout == ALL_SLICES_AT_EACH_LOD) { - return ALIGN_NPOT(minify(mt->physical_height0, level), mt->align_h); + return ALIGN_NPOT(minify(mt->physical_height0, level), mt->valign); } else { - const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->align_h); - const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->align_h); + const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->valign); + const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->valign); - return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h; + return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->valign; } } @@ -502,9 +502,9 @@ brw_miptree_layout_texture_array(struct brw_context *brw, for (unsigned level = mt->first_level; level <= mt->last_level; level++) { unsigned img_height; - img_height = ALIGN_NPOT(height, mt->align_h); + img_height = ALIGN_NPOT(height, mt->valign); if (mt->compressed) - img_height /= mt->align_h; + img_height /= mt->valign; for (unsigned q = 0; q < mt->level[level].depth; q++) { if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) { @@ -537,8 +537,8 @@ brw_miptree_layout_texture_3d(struct brw_context *brw, unsigned WL = MAX2(mt->physical_width0 >> level, 1); unsigned HL = MAX2(mt->physical_height0 >> level, 1); unsigned DL = MAX2(mt->physical_depth0 >> level, 1); - unsigned wL = ALIGN_NPOT(WL, mt->align_w); - unsigned hL = ALIGN_NPOT(HL, mt->align_h); + unsigned wL = ALIGN_NPOT(WL, mt->halign); + unsigned hL = ALIGN_NPOT(HL, mt->valign); if (mt->target == GL_TEXTURE_CUBE_MAP) DL = 6; @@ -656,7 +656,7 @@ brw_miptree_choose_tiling(struct brw_context *brw, * to know that ahead of time. And besides, since we use a vertical * alignment of 4 as often as we can, this shouldn't happen very often. */ - if (brw->gen == 7 && mt->align_h == 2 && + if (brw->gen == 7 && mt->valign == 2 && brw->format_supported_as_render_target[mt->format]) { return I915_TILING_X; } @@ -748,21 +748,21 @@ intel_miptree_set_alignment(struct brw_context *brw, /* Stencil uses W tiling, so we force W tiling alignment for the * ALL_SLICES_AT_EACH_LOD miptree layout. */ - mt->align_w = 64; - mt->align_h = 64; + mt->halign = 64; + mt->valign = 64; assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0); } else { /* Depth uses Y tiling, so we force need Y tiling alignment for the * ALL_SLICES_AT_EACH_LOD miptree layout. */ - mt->align_w = 128 / mt->cpp; - mt->align_h = 32; + mt->halign = 128 / mt->cpp; + mt->valign = 32; } } else if (mt->compressed) { /* The hardware alignment requirements for compressed textures * happen to match the block boundaries. */ - _mesa_get_format_block_size(mt->format, &mt->align_w, &mt->align_h); + _mesa_get_format_block_size(mt->format, &mt->halign, &mt->valign); /* On Gen9+ we can pick our own alignment for compressed textures but it * has to be a multiple of the block size. The minimum alignment we can @@ -770,21 +770,21 @@ intel_miptree_set_alignment(struct brw_context *brw, * size */ if (brw->gen >= 9) { - mt->align_w *= 4; - mt->align_h *= 4; + mt->halign *= 4; + mt->valign *= 4; } } else if (mt->format == MESA_FORMAT_S_UINT8) { - mt->align_w = 8; - mt->align_h = brw->gen >= 7 ? 8 : 4; + mt->halign = 8; + mt->valign = brw->gen >= 7 ? 8 : 4; } else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) { /* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or * vertical alignment < 64. */ - mt->align_w = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32); - mt->align_h = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64); + mt->halign = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32); + mt->valign = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64); } else { - mt->align_w = + mt->halign = intel_horizontal_texture_alignment_unit(brw, mt, layout_flags); - mt->align_h = intel_vertical_texture_alignment_unit(brw, mt); + mt->valign = intel_vertical_texture_alignment_unit(brw, mt); } } @@ -809,8 +809,8 @@ brw_miptree_layout(struct brw_context *brw, if (brw->gen >= 9) { unsigned int i, j; _mesa_get_format_block_size(mt->format, &i, &j); - mt->align_w /= i; - mt->align_h /= j; + mt->halign /= i; + mt->valign /= j; } if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0) diff --git a/src/mesa/drivers/dri/i965/brw_vec4.cpp b/src/mesa/drivers/dri/i965/brw_vec4.cpp index 1d62f2f6a75..689c767d2d7 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4.cpp @@ -518,11 +518,11 @@ vec4_visitor::split_uniform_registers() void vec4_visitor::pack_uniform_registers() { - bool uniform_used[this->uniforms]; + uint8_t chans_used[this->uniforms]; int new_loc[this->uniforms]; int new_chan[this->uniforms]; - memset(uniform_used, 0, sizeof(uniform_used)); + memset(chans_used, 0, sizeof(chans_used)); memset(new_loc, 0, sizeof(new_loc)); memset(new_chan, 0, sizeof(new_chan)); @@ -531,11 +531,36 @@ vec4_visitor::pack_uniform_registers() * to pull constants, and from some GLSL code generators like wine. */ foreach_block_and_inst(block, vec4_instruction, inst, cfg) { + unsigned readmask; + switch (inst->opcode) { + case VEC4_OPCODE_PACK_BYTES: + case BRW_OPCODE_DP4: + case BRW_OPCODE_DPH: + readmask = 0xf; + break; + case BRW_OPCODE_DP3: + readmask = 0x7; + break; + case BRW_OPCODE_DP2: + readmask = 0x3; + break; + default: + readmask = inst->dst.writemask; + break; + } + for (int i = 0 ; i < 3; i++) { - if (inst->src[i].file != UNIFORM) - continue; + if (inst->src[i].file != UNIFORM) + continue; - uniform_used[inst->src[i].reg] = true; + int reg = inst->src[i].reg; + for (int c = 0; c < 4; c++) { + if (!(readmask & (1 << c))) + continue; + + chans_used[reg] = MAX2(chans_used[reg], + BRW_GET_SWZ(inst->src[i].swizzle, c) + 1); + } } } @@ -546,17 +571,15 @@ vec4_visitor::pack_uniform_registers() */ for (int src = 0; src < uniforms; src++) { assert(src < uniform_array_size); - int size = this->uniform_vector_size[src]; + int size = chans_used[src]; - if (!uniform_used[src]) { - this->uniform_vector_size[src] = 0; - continue; - } + if (size == 0) + continue; int dst; /* Find the lowest place we can slot this uniform in. */ for (dst = 0; dst < src; dst++) { - if (this->uniform_vector_size[dst] + size <= 4) + if (chans_used[dst] + size <= 4) break; } @@ -565,7 +588,7 @@ vec4_visitor::pack_uniform_registers() new_chan[src] = 0; } else { new_loc[src] = dst; - new_chan[src] = this->uniform_vector_size[dst]; + new_chan[src] = chans_used[dst]; /* Move the references to the data */ for (int j = 0; j < size; j++) { @@ -573,8 +596,8 @@ vec4_visitor::pack_uniform_registers() stage_prog_data->param[src * 4 + j]; } - this->uniform_vector_size[dst] += size; - this->uniform_vector_size[src] = 0; + chans_used[dst] += size; + chans_used[src] = 0; } new_uniform_count = MAX2(new_uniform_count, dst + 1); @@ -1643,7 +1666,6 @@ vec4_visitor::setup_uniforms(int reg) */ if (devinfo->gen < 6 && this->uniforms == 0) { assert(this->uniforms < this->uniform_array_size); - this->uniform_vector_size[this->uniforms] = 1; stage_prog_data->param = reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4); @@ -1685,12 +1707,6 @@ vec4_vs_visitor::setup_payload(void) this->first_non_payload_grf = reg; } -void -vec4_visitor::assign_binding_table_offsets() -{ - assign_common_binding_table_offsets(0); -} - src_reg vec4_visitor::get_timestamp() { @@ -1786,31 +1802,14 @@ vec4_visitor::emit_shader_time_write(int shader_time_subindex, src_reg value) bool vec4_visitor::run() { - bool use_vec4_nir = - compiler->glsl_compiler_options[stage].NirOptions != NULL; - - sanity_param_count = prog->Parameters->NumParameters; - if (shader_time_index >= 0) emit_shader_time_begin(); - assign_binding_table_offsets(); - emit_prolog(); - if (use_vec4_nir) { - assert(prog->nir != NULL); - emit_nir_code(); - if (failed) - return false; - } else if (shader) { - /* Generate VS IR for main(). (the visitor only descends into - * functions called "main"). - */ - visit_instructions(shader->base.ir); - } else { - emit_program_code(); - } + emit_nir_code(); + if (failed) + return false; base_ir = NULL; emit_thread_end(); @@ -1823,18 +1822,9 @@ vec4_visitor::run() * that we have reladdr computations available for CSE, since we'll * often do repeated subexpressions for those. */ - if (shader || use_vec4_nir) { - move_grf_array_access_to_scratch(); - move_uniform_array_access_to_pull_constants(); - } else { - /* The ARB_vertex_program frontend emits pull constant loads directly - * rather than using reladdr, so we don't need to walk through all the - * instructions looking for things to move. There isn't anything. - * - * We do still need to split things to vec4 size. - */ - split_uniform_registers(); - } + move_grf_array_access_to_scratch(); + move_uniform_array_access_to_pull_constants(); + pack_uniform_registers(); move_push_constants_to_pull_constants(); split_virtual_grfs(); @@ -1845,8 +1835,8 @@ vec4_visitor::run() \ if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \ char filename[64]; \ - snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \ - stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \ + snprintf(filename, 64, "%s-%s-%02d-%02d-" #pass, \ + stage_abbrev, nir->info.name, iteration, pass_num); \ \ backend_shader::dump_instructions(filename); \ } \ @@ -1858,8 +1848,8 @@ vec4_visitor::run() if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) { char filename[64]; - snprintf(filename, 64, "%s-%04d-00-start", - stage_abbrev, shader_prog ? shader_prog->Name : 0); + snprintf(filename, 64, "%s-%s-00-start", + stage_abbrev, nir->info.name); backend_shader::dump_instructions(filename); } @@ -1933,13 +1923,6 @@ vec4_visitor::run() brw_get_scratch_size(last_scratch * REG_SIZE); } - /* If any state parameters were appended, then ParameterValues could have - * been realloced, in which case the driver uniform storage set up by - * _mesa_associate_uniform_storage() would point to freed memory. Make - * sure that didn't happen. - */ - assert(sanity_param_count == prog->Parameters->NumParameters); - return !failed; } @@ -1974,27 +1957,13 @@ brw_vs_emit(struct brw_context *brw, if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir) brw_dump_ir("vertex", prog, &shader->base, &vp->Base); - if (!vp->Base.nir && - (brw->intelScreen->compiler->scalar_vs || - brw->intelScreen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions != NULL)) { - /* Normally we generate NIR in LinkShader() or - * ProgramStringNotify(), but Mesa's fixed-function vertex program - * handling doesn't notify the driver at all. Just do it here, at - * the last minute, even though it's lame. - */ - assert(vp->Base.Id == 0 && prog == NULL); - vp->Base.nir = - brw_create_nir(brw, NULL, &vp->Base, MESA_SHADER_VERTEX, - brw->intelScreen->compiler->scalar_vs); - } - if (brw->intelScreen->compiler->scalar_vs) { prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_visitor v(brw->intelScreen->compiler, brw, - mem_ctx, MESA_SHADER_VERTEX, key, - &prog_data->base.base, prog, &vp->Base, - 8, st_index); + mem_ctx, key, &prog_data->base.base, + NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */ + vp->Base.nir, 8, st_index); if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) { if (prog) { prog->LinkStatus = false; @@ -2031,7 +2000,7 @@ brw_vs_emit(struct brw_context *brw, prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data, - vp, prog, brw_select_clip_planes(&brw->ctx), + vp->Base.nir, brw_select_clip_planes(&brw->ctx), mem_ctx, st_index, !_mesa_is_gles3(&brw->ctx)); if (!v.run()) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h index ac9bd4a11fa..51b3161f659 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@ -65,16 +65,14 @@ class vec4_live_variables; * Translates either GLSL IR or Mesa IR (for ARB_vertex_program and * fixed-function) into VS IR. */ -class vec4_visitor : public backend_shader, public ir_visitor +class vec4_visitor : public backend_shader { public: vec4_visitor(const struct brw_compiler *compiler, void *log_data, - struct gl_program *prog, const struct brw_sampler_prog_key_data *key, struct brw_vue_prog_data *prog_data, - struct gl_shader_program *shader_prog, - gl_shader_stage stage, + nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index); @@ -97,8 +95,6 @@ public: const struct brw_sampler_prog_key_data * const key_tex; struct brw_vue_prog_data * const prog_data; - unsigned int sanity_param_count; - char *fail_msg; bool failed; @@ -116,66 +112,22 @@ public: brw::vec4_live_variables *live_intervals; dst_reg userplane[MAX_CLIP_PLANES]; - dst_reg *variable_storage(ir_variable *var); - - void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); - bool need_all_constants_in_pull_buffer; - /** - * \name Visit methods - * - * As typical for the visitor pattern, there must be one \c visit method for - * each concrete subclass of \c ir_instruction. Virtual base classes within - * the hierarchy should not have \c visit methods. - */ - /*@{*/ - virtual void visit(ir_variable *); - virtual void visit(ir_loop *); - virtual void visit(ir_loop_jump *); - virtual void visit(ir_function_signature *); - virtual void visit(ir_function *); - virtual void visit(ir_expression *); - virtual void visit(ir_swizzle *); - virtual void visit(ir_dereference_variable *); - virtual void visit(ir_dereference_array *); - virtual void visit(ir_dereference_record *); - virtual void visit(ir_assignment *); - virtual void visit(ir_constant *); - virtual void visit(ir_call *); - virtual void visit(ir_return *); - virtual void visit(ir_discard *); - virtual void visit(ir_texture *); - virtual void visit(ir_if *); - virtual void visit(ir_emit_vertex *); - virtual void visit(ir_end_primitive *); - virtual void visit(ir_barrier *); - /*@}*/ - - src_reg result; - /* Regs for vertex results. Generated at ir_variable visiting time * for the ir->location's used. */ dst_reg output_reg[BRW_VARYING_SLOT_COUNT]; const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT]; int *uniform_size; - int *uniform_vector_size; - int uniform_array_size; /*< Size of uniform_[vector_]size arrays */ + int uniform_array_size; /*< Size of the uniform_size array */ int uniforms; src_reg shader_start_time; - struct hash_table *variable_ht; - bool run(); void fail(const char *msg, ...); - virtual void setup_vec4_uniform_value(unsigned param_offset, - const gl_constant_value *values, - unsigned n); - void setup_uniform_values(ir_variable *ir); - void setup_builtin_uniform_values(ir_variable *ir); int setup_uniforms(int payload_reg); bool reg_allocate_trivial(); @@ -271,21 +223,9 @@ public: int implied_mrf_writes(vec4_instruction *inst); - bool try_rewrite_rhs_to_dst(ir_assignment *ir, - dst_reg dst, - src_reg src, - vec4_instruction *pre_rhs_inst, - vec4_instruction *last_rhs_inst); - - /** Walks an exec_list of ir_instruction and sends it through this visitor. */ - void visit_instructions(const exec_list *list); - void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst, src_reg src0, src_reg src1, src_reg one); - void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate); - void emit_if_gen6(ir_if *ir); - vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, src_reg src0, src_reg src1); @@ -298,22 +238,11 @@ public: */ src_reg emit_uniformize(const src_reg &src); - void emit_block_move(dst_reg *dst, src_reg *src, - const struct glsl_type *type, brw_predicate predicate); - - void emit_constant_values(dst_reg *dst, ir_constant *value); - /** * Emit the correct dot-product instruction for the type of arguments */ void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements); - void emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0); - - void emit_scalar(ir_instruction *ir, enum prog_opcode op, - dst_reg dst, src_reg src0, src_reg src1); - src_reg fix_3src_operand(const src_reg &src); src_reg resolve_source_modifiers(const src_reg &src); @@ -389,29 +318,20 @@ public: src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block, vec4_instruction *inst, src_reg src); - bool try_emit_mad(ir_expression *ir); - bool try_emit_b2f_of_compare(ir_expression *ir); void resolve_ud_negate(src_reg *reg); - void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg); src_reg get_timestamp(); - bool process_move_condition(ir_rvalue *ir); - void dump_instruction(backend_instruction *inst); void dump_instruction(backend_instruction *inst, FILE *file); - void visit_atomic_counter_intrinsic(ir_call *ir); - bool is_high_sampler(src_reg sampler); virtual void emit_nir_code(); - virtual void nir_setup_inputs(nir_shader *shader); - virtual void nir_setup_uniforms(nir_shader *shader); - virtual void nir_setup_uniform(nir_variable *var); - virtual void nir_setup_builtin_uniform(nir_variable *var); + virtual void nir_setup_inputs(); + virtual void nir_setup_uniforms(); virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr); - virtual void nir_setup_system_values(nir_shader *shader); + virtual void nir_setup_system_values(); virtual void nir_emit_impl(nir_function_impl *impl); virtual void nir_emit_cf_list(exec_list *list); virtual void nir_emit_if(nir_if *if_stmt); @@ -450,14 +370,11 @@ protected: bool interleaved); void setup_payload_interference(struct ra_graph *g, int first_payload_node, int reg_node_count); - virtual void assign_binding_table_offsets(); virtual void setup_payload() = 0; virtual void emit_prolog() = 0; - virtual void emit_program_code() = 0; virtual void emit_thread_end() = 0; virtual void emit_urb_write_header(int mrf) = 0; virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0; - virtual int compute_array_stride(ir_dereference_array *ir); virtual void gs_emit_vertex(int stream_id); virtual void gs_end_primitive(); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp index 4f4e1e12fab..af4c102c026 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_nir.cpp @@ -27,11 +27,11 @@ namespace brw { void -vec4_gs_visitor::nir_setup_inputs(nir_shader *shader) +vec4_gs_visitor::nir_setup_inputs() { - nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs); + nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs); - foreach_list_typed(nir_variable, var, node, &shader->inputs) { + foreach_list_typed(nir_variable, var, node, &nir->inputs) { int offset = var->data.driver_location; if (var->type->base_type == GLSL_TYPE_ARRAY) { /* Geometry shader inputs are arrays, but they use an unusual array @@ -72,6 +72,10 @@ vec4_gs_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr) dst_reg *reg; switch (instr->intrinsic) { + case nir_intrinsic_load_primitive_id: + /* We'll just read g1 directly; don't create a temporary. */ + break; + case nir_intrinsic_load_invocation_id: reg = &this->nir_system_values[SYSTEM_VALUE_INVOCATION_ID]; if (reg->file == BAD_FILE) @@ -111,6 +115,12 @@ vec4_gs_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD); break; + case nir_intrinsic_load_primitive_id: + assert(c->prog_data.include_primitive_id); + dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D); + emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D))); + break; + case nir_intrinsic_load_invocation_id: { src_reg invocation_id = src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index d2edc5782fd..c673ccd137c 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -38,13 +38,14 @@ vec4_gs_visitor::vec4_gs_visitor(const struct brw_compiler *compiler, void *log_data, struct brw_gs_compile *c, struct gl_shader_program *prog, + nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index) - : vec4_visitor(compiler, log_data, - &c->gp->program.Base, &c->key.tex, - &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx, + : vec4_visitor(compiler, log_data, &c->key.tex, + &c->prog_data.base, shader, mem_ctx, no_spills, shader_time_index), + shader_prog(prog), c(c) { } @@ -207,15 +208,6 @@ vec4_gs_visitor::emit_prolog() this->current_annotation = NULL; } - -void -vec4_gs_visitor::emit_program_code() -{ - /* We don't support NV_geometry_program4. */ - unreachable("Unreached"); -} - - void vec4_gs_visitor::emit_thread_end() { @@ -310,24 +302,6 @@ vec4_gs_visitor::emit_urb_write_opcode(bool complete) } -int -vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir) -{ - /* Geometry shader inputs are arrays, but they use an unusual array layout: - * instead of all array elements for a given geometry shader input being - * stored consecutively, all geometry shader inputs are interleaved into - * one giant array. At this stage of compilation, we assume that the - * stride of the array is BRW_VARYING_SLOT_COUNT. Later, - * setup_attributes() will remap our accesses to the actual input array. - */ - ir_dereference_variable *deref_var = ir->array->as_dereference_variable(); - if (deref_var && deref_var->var->data.mode == ir_var_shader_in) - return BRW_VARYING_SLOT_COUNT; - else - return vec4_visitor::compute_array_stride(ir); -} - - /** * Write out a batch of 32 control data bits from the control_data_bits * register to the URB. @@ -575,27 +549,6 @@ vec4_gs_visitor::gs_emit_vertex(int stream_id) this->current_annotation = NULL; } -void -vec4_gs_visitor::visit(ir_emit_vertex *ir) -{ - /* To ensure that we don't output more vertices than the shader specified - * using max_vertices, do the logic inside a conditional of the form "if - * (vertex_count < MAX)" - */ - unsigned num_output_vertices = c->gp->program.VerticesOut; - emit(CMP(dst_null_d(), this->vertex_count, - src_reg(num_output_vertices), BRW_CONDITIONAL_L)); - emit(IF(BRW_PREDICATE_NORMAL)); - - gs_emit_vertex(ir->stream_id()); - - this->current_annotation = "emit vertex: increment vertex count"; - emit(ADD(dst_reg(this->vertex_count), this->vertex_count, - src_reg(1u))); - - emit(BRW_OPCODE_ENDIF); -} - void vec4_gs_visitor::gs_end_primitive() { @@ -647,12 +600,6 @@ vec4_gs_visitor::gs_end_primitive() emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask)); } -void -vec4_gs_visitor::visit(ir_end_primitive *) -{ - gs_end_primitive(); -} - static const unsigned * generate_assembly(struct brw_context *brw, struct gl_shader_program *shader_prog, @@ -675,12 +622,10 @@ brw_gs_emit(struct brw_context *brw, void *mem_ctx, unsigned *final_assembly_size) { - if (unlikely(INTEL_DEBUG & DEBUG_GS)) { - struct brw_shader *shader = - (brw_shader *) prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; + struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY]; - brw_dump_ir("geometry", prog, &shader->base, NULL); - } + if (unlikely(INTEL_DEBUG & DEBUG_GS)) + brw_dump_ir("geometry", prog, shader, NULL); int st_index = -1; if (INTEL_DEBUG & DEBUG_SHADER_TIME) @@ -696,7 +641,8 @@ brw_gs_emit(struct brw_context *brw, c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT; vec4_gs_visitor v(brw->intelScreen->compiler, brw, - c, prog, mem_ctx, true /* no_spills */, st_index); + c, prog, shader->Program->nir, + mem_ctx, true /* no_spills */, st_index); if (v.run()) { return generate_assembly(brw, prog, &c->gp->program.Base, &c->prog_data.base, mem_ctx, v.cfg, @@ -738,11 +684,13 @@ brw_gs_emit(struct brw_context *brw, if (brw->gen >= 7) gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw, - c, prog, mem_ctx, false /* no_spills */, + c, prog, shader->Program->nir, + mem_ctx, false /* no_spills */, st_index); else gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw, - c, prog, mem_ctx, false /* no_spills */, + c, prog, shader->Program->nir, + mem_ctx, false /* no_spills */, st_index); if (!gs->run()) { diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h index 0e8fefabecc..85d80b8fc63 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.h @@ -71,11 +71,12 @@ public: void *log_data, struct brw_gs_compile *c, struct gl_shader_program *prog, + nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index); - virtual void nir_setup_inputs(nir_shader *shader); + virtual void nir_setup_inputs(); virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr); protected: @@ -83,13 +84,9 @@ protected: const glsl_type *type); virtual void setup_payload(); virtual void emit_prolog(); - virtual void emit_program_code(); virtual void emit_thread_end(); virtual void emit_urb_write_header(int mrf); virtual vec4_instruction *emit_urb_write_opcode(bool complete); - virtual int compute_array_stride(ir_dereference_array *ir); - virtual void visit(ir_emit_vertex *); - virtual void visit(ir_end_primitive *); virtual void gs_emit_vertex(int stream_id); virtual void gs_end_primitive(); virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr); @@ -100,6 +97,8 @@ protected: void emit_control_data_bits(); void set_stream_control_data_bits(unsigned stream_id); + struct gl_shader_program *shader_prog; + src_reg vertex_count; src_reg control_data_bits; const struct brw_gs_compile * const c; diff --git a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index 9d56f9a1ae0..6536b1bfeb8 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@ -35,15 +35,13 @@ namespace brw { void vec4_visitor::emit_nir_code() { - nir_shader *nir = prog->nir; - if (nir->num_inputs > 0) - nir_setup_inputs(nir); + nir_setup_inputs(); if (nir->num_uniforms > 0) - nir_setup_uniforms(nir); + nir_setup_uniforms(); - nir_setup_system_values(nir); + nir_setup_system_values(); /* get the main function and emit it */ nir_foreach_overload(nir, overload) { @@ -105,11 +103,11 @@ setup_system_values_block(nir_block *block, void *void_visitor) } void -vec4_visitor::nir_setup_system_values(nir_shader *shader) +vec4_visitor::nir_setup_system_values() { nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX); - nir_foreach_overload(shader, overload) { + nir_foreach_overload(nir, overload) { assert(strcmp(overload->function->name, "main") == 0); assert(overload->impl); nir_foreach_block(overload->impl, setup_system_values_block, this); @@ -117,11 +115,11 @@ vec4_visitor::nir_setup_system_values(nir_shader *shader) } void -vec4_visitor::nir_setup_inputs(nir_shader *shader) +vec4_visitor::nir_setup_inputs() { - nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs); + nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs); - foreach_list_typed(nir_variable, var, node, &shader->inputs) { + foreach_list_typed(nir_variable, var, node, &nir->inputs) { int offset = var->data.driver_location; unsigned size = type_size_vec4(var->type); for (unsigned i = 0; i < size; i++) { @@ -132,137 +130,17 @@ vec4_visitor::nir_setup_inputs(nir_shader *shader) } void -vec4_visitor::nir_setup_uniforms(nir_shader *shader) -{ - uniforms = 0; - - if (shader_prog) { - foreach_list_typed(nir_variable, var, node, &shader->uniforms) { - /* UBO's, atomics and samplers don't take up space in the - uniform file */ - if (var->interface_type != NULL || var->type->contains_atomic() || - type_size_vec4(var->type) == 0) { - continue; - } - - assert(uniforms < uniform_array_size); - uniform_size[uniforms] = type_size_vec4(var->type); - - if (strncmp(var->name, "gl_", 3) == 0) - nir_setup_builtin_uniform(var); - else - nir_setup_uniform(var); - } - } else { - /* For ARB_vertex_program, only a single "parameters" variable is - * generated to support uniform data. - */ - nir_variable *var = (nir_variable *) shader->uniforms.get_head(); - assert(shader->uniforms.length() == 1 && - strcmp(var->name, "parameters") == 0); - - assert(uniforms < uniform_array_size); - uniform_size[uniforms] = type_size_vec4(var->type); - - struct gl_program_parameter_list *plist = prog->Parameters; - for (unsigned p = 0; p < plist->NumParameters; p++) { - uniform_vector_size[uniforms] = plist->Parameters[p].Size; - - /* Parameters should be either vec4 uniforms or single component - * constants; matrices and other larger types should have been broken - * down earlier. - */ - assert(uniform_vector_size[uniforms] <= 4); - - int i; - for (i = 0; i < uniform_vector_size[uniforms]; i++) { - stage_prog_data->param[uniforms * 4 + i] = &plist->ParameterValues[p][i]; - } - for (; i < 4; i++) { - static const gl_constant_value zero = { 0.0 }; - stage_prog_data->param[uniforms * 4 + i] = &zero; - } - - uniforms++; - } - } -} - -void -vec4_visitor::nir_setup_uniform(nir_variable *var) -{ - int namelen = strlen(var->name); - - /* The data for our (non-builtin) uniforms is stored in a series of - * gl_uniform_driver_storage structs for each subcomponent that - * glGetUniformLocation() could name. We know it's been set up in the same - * order we'd walk the type, so walk the list of storage and find anything - * with our name, or the prefix of a component that starts with our name. - */ - for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { - struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; - - if (storage->builtin) - continue; - - if (strncmp(var->name, storage->name, namelen) != 0 || - (storage->name[namelen] != 0 && - storage->name[namelen] != '.' && - storage->name[namelen] != '[')) { - continue; - } - - gl_constant_value *components = storage->storage; - unsigned vector_count = (MAX2(storage->array_elements, 1) * - storage->type->matrix_columns); - - for (unsigned s = 0; s < vector_count; s++) { - assert(uniforms < uniform_array_size); - uniform_vector_size[uniforms] = storage->type->vector_elements; - - int i; - for (i = 0; i < uniform_vector_size[uniforms]; i++) { - stage_prog_data->param[uniforms * 4 + i] = components; - components++; - } - for (; i < 4; i++) { - static const gl_constant_value zero = { 0.0 }; - stage_prog_data->param[uniforms * 4 + i] = &zero; - } - - uniforms++; - } - } -} - -void -vec4_visitor::nir_setup_builtin_uniform(nir_variable *var) +vec4_visitor::nir_setup_uniforms() { - const nir_state_slot *const slots = var->state_slots; - assert(var->state_slots != NULL); - - for (unsigned int i = 0; i < var->num_state_slots; i++) { - /* This state reference has already been setup by ir_to_mesa, - * but we'll get the same index back here. We can reference - * ParameterValues directly, since unlike brw_fs.cpp, we never - * add new state references during compile. - */ - int index = _mesa_add_state_reference(prog->Parameters, - (gl_state_index *)slots[i].tokens); - gl_constant_value *values = - &prog->Parameters->ParameterValues[index][0]; - - assert(uniforms < uniform_array_size); + uniforms = nir->num_uniforms; - for (unsigned j = 0; j < 4; j++) - stage_prog_data->param[uniforms * 4 + j] = - &values[GET_SWZ(slots[i].swizzle, j)]; - - uniform_vector_size[uniforms] = - (var->type->is_scalar() || var->type->is_vector() || - var->type->is_matrix() ? var->type->vector_elements : 4); + foreach_list_typed(nir_variable, var, node, &nir->uniforms) { + /* UBO's and atomics don't take up space in the uniform file */ + if (var->interface_type != NULL || var->type->contains_atomic()) + continue; - uniforms++; + if (type_size_vec4(var->type) > 0) + uniform_size[var->data.driver_location] = type_size_vec4(var->type); } } @@ -547,8 +425,6 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]); unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0; - assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage); - src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start + ubo_index); dst_reg result_dst = get_nir_dest(instr->dest); @@ -592,7 +468,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.ubo_start + - shader_prog->NumBufferInterfaceBlocks - 1); + nir->info.num_ssbos - 1); } /* Offset */ @@ -739,7 +615,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.ubo_start + - shader_prog->NumBufferInterfaceBlocks - 1); + nir->info.num_ssbos - 1); } src_reg offset_reg = src_reg(this, glsl_type::uint_type); @@ -891,7 +767,7 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) */ brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.ubo_start + - shader_prog->NumBufferInterfaceBlocks - 1); + nir->info.num_ssbos - 1); } unsigned const_offset = instr->const_index[1]; @@ -923,6 +799,15 @@ vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr *instr) break; } + case nir_intrinsic_memory_barrier: { + const vec4_builder bld = + vec4_builder(this).at_end().annotate(current_annotation, base_ir); + const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2); + bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp) + ->regs_written = 2; + break; + } + default: unreachable("Unknown intrinsic"); } @@ -952,7 +837,7 @@ vec4_visitor::nir_emit_ssbo_atomic(int op, nir_intrinsic_instr *instr) */ brw_mark_surface_used(&prog_data->base, prog_data->base.binding_table.ubo_start + - shader_prog->NumBufferInterfaceBlocks - 1); + nir->info.num_ssbos - 1); } src_reg offset = get_nir_src(instr->src[1], 1); diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6d61112056c..f80425a5b00 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@ -580,15 +580,6 @@ vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0) emit(VEC4_OPCODE_PACK_BYTES, dst, bytes); } -void -vec4_visitor::visit_instructions(const exec_list *list) -{ - foreach_in_list(ir_instruction, ir, list) { - base_ir = ir; - ir->accept(this); - } -} - /** * Returns the minimum number of vec4 elements needed to pack a type. * @@ -680,1808 +671,161 @@ src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type, int size) this->swizzle = BRW_SWIZZLE_NOOP; this->type = brw_type_for_base_type(type); -} - -dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) -{ - init(); - - this->file = GRF; - this->reg = v->alloc.allocate(type_size_vec4(type)); - - if (type->is_array() || type->is_record()) { - this->writemask = WRITEMASK_XYZW; - } else { - this->writemask = (1 << type->vector_elements) - 1; - } - - this->type = brw_type_for_base_type(type); -} - -void -vec4_visitor::setup_vec4_uniform_value(unsigned param_offset, - const gl_constant_value *values, - unsigned n) -{ - static const gl_constant_value zero = { 0 }; - - assert(param_offset % 4 == 0); - - for (unsigned i = 0; i < n; ++i) - stage_prog_data->param[param_offset + i] = &values[i]; - - for (unsigned i = n; i < 4; ++i) - stage_prog_data->param[param_offset + i] = &zero; - - uniform_vector_size[param_offset / 4] = n; -} - -/* Our support for uniforms is piggy-backed on the struct - * gl_fragment_program, because that's where the values actually - * get stored, rather than in some global gl_shader_program uniform - * store. - */ -void -vec4_visitor::setup_uniform_values(ir_variable *ir) -{ - int namelen = strlen(ir->name); - - /* The data for our (non-builtin) uniforms is stored in a series of - * gl_uniform_driver_storage structs for each subcomponent that - * glGetUniformLocation() could name. We know it's been set up in the same - * order we'd walk the type, so walk the list of storage and find anything - * with our name, or the prefix of a component that starts with our name. - */ - for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) { - struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u]; - - if (storage->builtin) - continue; - - if (strncmp(ir->name, storage->name, namelen) != 0 || - (storage->name[namelen] != 0 && - storage->name[namelen] != '.' && - storage->name[namelen] != '[')) { - continue; - } - - const unsigned vector_count = (MAX2(storage->array_elements, 1) * - storage->type->matrix_columns); - const unsigned vector_size = storage->type->vector_elements; - - for (unsigned s = 0; s < vector_count; s++) { - setup_vec4_uniform_value(uniforms * 4, - &storage->storage[s * vector_size], - vector_size); - uniforms++; - } - } -} - -/* Our support for builtin uniforms is even scarier than non-builtin. - * It sits on top of the PROG_STATE_VAR parameters that are - * automatically updated from GL context state. - */ -void -vec4_visitor::setup_builtin_uniform_values(ir_variable *ir) -{ - const ir_state_slot *const slots = ir->get_state_slots(); - assert(slots != NULL); - - for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { - /* This state reference has already been setup by ir_to_mesa, - * but we'll get the same index back here. We can reference - * ParameterValues directly, since unlike brw_fs.cpp, we never - * add new state references during compile. - */ - int index = _mesa_add_state_reference(this->prog->Parameters, - (gl_state_index *)slots[i].tokens); - gl_constant_value *values = - &this->prog->Parameters->ParameterValues[index][0]; - - assert(this->uniforms < uniform_array_size); - - for (unsigned j = 0; j < 4; j++) - stage_prog_data->param[this->uniforms * 4 + j] = - &values[GET_SWZ(slots[i].swizzle, j)]; - - this->uniform_vector_size[this->uniforms] = - (ir->type->is_scalar() || ir->type->is_vector() || - ir->type->is_matrix() ? ir->type->vector_elements : 4); - - this->uniforms++; - } -} - -dst_reg * -vec4_visitor::variable_storage(ir_variable *var) -{ - return (dst_reg *)hash_table_find(this->variable_ht, var); -} - -void -vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir, - enum brw_predicate *predicate) -{ - ir_expression *expr = ir->as_expression(); - - *predicate = BRW_PREDICATE_NORMAL; - - if (expr && expr->operation != ir_binop_ubo_load) { - src_reg op[3]; - vec4_instruction *inst; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - expr->operands[i]->accept(this); - op[i] = this->result; - - resolve_ud_negate(&op[i]); - } - - switch (expr->operation) { - case ir_unop_logic_not: - inst = emit(AND(dst_null_d(), op[0], src_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_Z; - break; - - case ir_binop_logic_xor: - if (devinfo->gen <= 5) { - src_reg temp = src_reg(this, ir->type); - emit(XOR(dst_reg(temp), op[0], op[1])); - inst = emit(AND(dst_null_d(), temp, src_reg(1))); - } else { - inst = emit(XOR(dst_null_d(), op[0], op[1])); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_or: - if (devinfo->gen <= 5) { - src_reg temp = src_reg(this, ir->type); - emit(OR(dst_reg(temp), op[0], op[1])); - inst = emit(AND(dst_null_d(), temp, src_reg(1))); - } else { - inst = emit(OR(dst_null_d(), op[0], op[1])); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_binop_logic_and: - if (devinfo->gen <= 5) { - src_reg temp = src_reg(this, ir->type); - emit(AND(dst_reg(temp), op[0], op[1])); - inst = emit(AND(dst_null_d(), temp, src_reg(1))); - } else { - inst = emit(AND(dst_null_d(), op[0], op[1])); - } - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - - case ir_unop_f2b: - if (devinfo->gen >= 6) { - emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(dst_null_f(), op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_unop_i2b: - if (devinfo->gen >= 6) { - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - } else { - inst = emit(MOV(dst_null_d(), op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - } - break; - - case ir_binop_all_equal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); - } - inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); - *predicate = BRW_PREDICATE_ALIGN16_ALL4H; - break; - - case ir_binop_any_nequal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); - } - inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); - *predicate = BRW_PREDICATE_ALIGN16_ANY4H; - break; - - case ir_unop_any: - if (devinfo->gen <= 5) { - resolve_bool_comparison(expr->operands[0], &op[0]); - } - inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - *predicate = BRW_PREDICATE_ALIGN16_ANY4H; - break; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_nequal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(expr->operands[0], &op[0]); - resolve_bool_comparison(expr->operands[1], &op[1]); - } - emit(CMP(dst_null_d(), op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - break; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - inst = emit(MOV(dst_null_d(), op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to return. */ - dst_reg temp(this, expr->operands[1]->type); - inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]); - inst->predicate = BRW_PREDICATE_NORMAL; - - /* Expand the result to a condition code. */ - inst = emit(MOV(dst_null_d(), src_reg(temp))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - break; - } - - default: - unreachable("not reached"); - } - return; - } - - ir->accept(this); - - resolve_ud_negate(&this->result); - - vec4_instruction *inst = emit(AND(dst_null_d(), this->result, src_reg(1))); - inst->conditional_mod = BRW_CONDITIONAL_NZ; -} - -/** - * Emit a gen6 IF statement with the comparison folded into the IF - * instruction. - */ -void -vec4_visitor::emit_if_gen6(ir_if *ir) -{ - ir_expression *expr = ir->condition->as_expression(); - - if (expr && expr->operation != ir_binop_ubo_load) { - src_reg op[3]; - dst_reg temp; - - assert(expr->get_num_operands() <= 3); - for (unsigned int i = 0; i < expr->get_num_operands(); i++) { - expr->operands[i]->accept(this); - op[i] = this->result; - } - - switch (expr->operation) { - case ir_unop_logic_not: - emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z)); - return; - - case ir_binop_logic_xor: - emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_or: - temp = dst_reg(this, glsl_type::bool_type); - emit(OR(temp, op[0], op[1])); - emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_logic_and: - temp = dst_reg(this, glsl_type::bool_type); - emit(AND(temp, op[0], op[1])); - emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_unop_f2b: - emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_unop_i2b: - emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - return; - - case ir_binop_greater: - case ir_binop_gequal: - case ir_binop_less: - case ir_binop_lequal: - case ir_binop_equal: - case ir_binop_nequal: - emit(IF(op[0], op[1], - brw_conditional_for_comparison(expr->operation))); - return; - - case ir_binop_all_equal: - emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); - emit(IF(BRW_PREDICATE_ALIGN16_ALL4H)); - return; - - case ir_binop_any_nequal: - emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); - emit(IF(BRW_PREDICATE_ALIGN16_ANY4H)); - return; - - case ir_unop_any: - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - emit(IF(BRW_PREDICATE_ALIGN16_ANY4H)); - return; - - case ir_triop_csel: { - /* Expand the boolean condition into the flag register. */ - vec4_instruction *inst = emit(MOV(dst_null_d(), op[0])); - inst->conditional_mod = BRW_CONDITIONAL_NZ; - - /* Select which boolean to return. */ - dst_reg temp(this, expr->operands[1]->type); - inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]); - inst->predicate = BRW_PREDICATE_NORMAL; - - emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ)); - return; - } - - default: - unreachable("not reached"); - } - return; - } - - ir->condition->accept(this); - - emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ)); -} - -void -vec4_visitor::visit(ir_variable *ir) -{ - dst_reg *reg = NULL; - - if (variable_storage(ir)) - return; - - switch (ir->data.mode) { - case ir_var_shader_in: - assert(ir->data.location != -1); - reg = new(mem_ctx) dst_reg(ATTR, ir->data.location); - break; - - case ir_var_shader_out: - assert(ir->data.location != -1); - reg = new(mem_ctx) dst_reg(this, ir->type); - - for (int i = 0; i < type_size_vec4(ir->type); i++) { - output_reg[ir->data.location + i] = *reg; - output_reg[ir->data.location + i].reg_offset = i; - output_reg_annotation[ir->data.location + i] = ir->name; - } - break; - - case ir_var_auto: - case ir_var_temporary: - reg = new(mem_ctx) dst_reg(this, ir->type); - break; - - case ir_var_uniform: - case ir_var_shader_storage: - reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms); - - /* Thanks to the lower_ubo_reference pass, we will see only - * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable - * for UBO/SSBO variables, so no need for them to be in variable_ht. - * - * Some uniforms, such as samplers and atomic counters, have no actual - * storage, so we should ignore them. - */ - if (ir->is_in_buffer_block() || type_size_vec4(ir->type) == 0) - return; - - /* Track how big the whole uniform variable is, in case we need to put a - * copy of its data into pull constants for array access. - */ - assert(this->uniforms < uniform_array_size); - this->uniform_size[this->uniforms] = type_size_vec4(ir->type); - - if (!strncmp(ir->name, "gl_", 3)) { - setup_builtin_uniform_values(ir); - } else { - setup_uniform_values(ir); - } - break; - - case ir_var_system_value: - reg = make_reg_for_system_value(ir->data.location, ir->type); - break; - - default: - unreachable("not reached"); - } - - reg->type = brw_type_for_base_type(ir->type); - hash_table_insert(this->variable_ht, reg, ir); -} - -void -vec4_visitor::visit(ir_loop *ir) -{ - /* We don't want debugging output to print the whole body of the - * loop as the annotation. - */ - this->base_ir = NULL; - - emit(BRW_OPCODE_DO); - - visit_instructions(&ir->body_instructions); - - emit(BRW_OPCODE_WHILE); -} - -void -vec4_visitor::visit(ir_loop_jump *ir) -{ - switch (ir->mode) { - case ir_loop_jump::jump_break: - emit(BRW_OPCODE_BREAK); - break; - case ir_loop_jump::jump_continue: - emit(BRW_OPCODE_CONTINUE); - break; - } -} - - -void -vec4_visitor::visit(ir_function_signature *) -{ - unreachable("not reached"); -} - -void -vec4_visitor::visit(ir_function *ir) -{ - /* Ignore function bodies other than main() -- we shouldn't see calls to - * them since they should all be inlined. - */ - if (strcmp(ir->name, "main") == 0) { - const ir_function_signature *sig; - exec_list empty; - - sig = ir->matching_signature(NULL, &empty, false); - - assert(sig); - - visit_instructions(&sig->body); - } -} - -bool -vec4_visitor::try_emit_mad(ir_expression *ir) -{ - /* 3-src instructions were introduced in gen6. */ - if (devinfo->gen < 6) - return false; - - /* MAD can only handle floating-point data. */ - if (ir->type->base_type != GLSL_TYPE_FLOAT) - return false; - - ir_rvalue *nonmul; - ir_expression *mul; - bool mul_negate, mul_abs; - - for (int i = 0; i < 2; i++) { - mul_negate = false; - mul_abs = false; - - mul = ir->operands[i]->as_expression(); - nonmul = ir->operands[1 - i]; - - if (mul && mul->operation == ir_unop_abs) { - mul = mul->operands[0]->as_expression(); - mul_abs = true; - } else if (mul && mul->operation == ir_unop_neg) { - mul = mul->operands[0]->as_expression(); - mul_negate = true; - } - - if (mul && mul->operation == ir_binop_mul) - break; - } - - if (!mul || mul->operation != ir_binop_mul) - return false; - - nonmul->accept(this); - src_reg src0 = fix_3src_operand(this->result); - - mul->operands[0]->accept(this); - src_reg src1 = fix_3src_operand(this->result); - src1.negate ^= mul_negate; - src1.abs = mul_abs; - if (mul_abs) - src1.negate = false; - - mul->operands[1]->accept(this); - src_reg src2 = fix_3src_operand(this->result); - src2.abs = mul_abs; - if (mul_abs) - src2.negate = false; - - this->result = src_reg(this, ir->type); - emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2); - - return true; -} - -bool -vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir) -{ - /* This optimization relies on CMP setting the destination to 0 when - * false. Early hardware only sets the least significant bit, and - * leaves the other bits undefined. So we can't use it. - */ - if (devinfo->gen < 6) - return false; - - ir_expression *const cmp = ir->operands[0]->as_expression(); - - if (cmp == NULL) - return false; - - switch (cmp->operation) { - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_nequal: - break; - - default: - return false; - } - - cmp->operands[0]->accept(this); - const src_reg cmp_src0 = this->result; - - cmp->operands[1]->accept(this); - const src_reg cmp_src1 = this->result; - - this->result = src_reg(this, ir->type); - - emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1, - brw_conditional_for_comparison(cmp->operation))); - - /* If the comparison is false, this->result will just happen to be zero. - */ - vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result), - this->result, src_reg(1.0f)); - inst->predicate = BRW_PREDICATE_NORMAL; - inst->predicate_inverse = true; - - return true; -} - -vec4_instruction * -vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, - src_reg src0, src_reg src1) -{ - vec4_instruction *inst; - - if (devinfo->gen >= 6) { - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->conditional_mod = conditionalmod; - } else { - emit(CMP(dst, src0, src1, conditionalmod)); - - inst = emit(BRW_OPCODE_SEL, dst, src0, src1); - inst->predicate = BRW_PREDICATE_NORMAL; - } - - return inst; -} - -vec4_instruction * -vec4_visitor::emit_lrp(const dst_reg &dst, - const src_reg &x, const src_reg &y, const src_reg &a) -{ - if (devinfo->gen >= 6) { - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y), - fix_3src_operand(x))); - } else { - /* Earlier generations don't support three source operations, so we - * need to emit x*(1-a) + y*a. - */ - dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type); - dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); - dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type); - y_times_a.writemask = dst.writemask; - one_minus_a.writemask = dst.writemask; - x_times_one_minus_a.writemask = dst.writemask; - - emit(MUL(y_times_a, y, a)); - emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); - emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); - return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); - } -} - -/** - * Emits the instructions needed to perform a pull constant load. before_block - * and before_inst can be NULL in which case the instruction will be appended - * to the end of the instruction list. - */ -void -vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, - src_reg surf_index, - src_reg offset_reg, - bblock_t *before_block, - vec4_instruction *before_inst) -{ - assert((before_inst == NULL && before_block == NULL) || - (before_inst && before_block)); - - vec4_instruction *pull; - - if (devinfo->gen >= 9) { - /* Gen9+ needs a message header in order to use SIMD4x2 mode */ - src_reg header(this, glsl_type::uvec4_type, 2); - - pull = new(mem_ctx) - vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, - dst_reg(header)); - - if (before_inst) - emit_before(before_block, before_inst, pull); - else - emit(pull); - - dst_reg index_reg = retype(offset(dst_reg(header), 1), - offset_reg.type); - pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg); - - if (before_inst) - emit_before(before_block, before_inst, pull); - else - emit(pull); - - pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, - dst, - surf_index, - header); - pull->mlen = 2; - pull->header_size = 1; - } else if (devinfo->gen >= 7) { - dst_reg grf_offset = dst_reg(this, glsl_type::int_type); - - grf_offset.type = offset_reg.type; - - pull = MOV(grf_offset, offset_reg); - - if (before_inst) - emit_before(before_block, before_inst, pull); - else - emit(pull); - - pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, - dst, - surf_index, - src_reg(grf_offset)); - pull->mlen = 1; - } else { - pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD, - dst, - surf_index, - offset_reg); - pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; - pull->mlen = 1; - } - - if (before_inst) - emit_before(before_block, before_inst, pull); - else - emit(pull); -} - -src_reg -vec4_visitor::emit_uniformize(const src_reg &src) -{ - const src_reg chan_index(this, glsl_type::uint_type); - const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type), - src.type); - - emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index)) - ->force_writemask_all = true; - emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index) - ->force_writemask_all = true; - - return src_reg(dst); -} - -void -vec4_visitor::visit(ir_expression *ir) -{ - unsigned int operand; - src_reg op[ARRAY_SIZE(ir->operands)]; - vec4_instruction *inst; - - if (ir->operation == ir_binop_add) { - if (try_emit_mad(ir)) - return; - } - - if (ir->operation == ir_unop_b2f) { - if (try_emit_b2f_of_compare(ir)) - return; - } - - /* Storage for our result. Ideally for an assignment we'd be using - * the actual storage for the result here, instead. - */ - dst_reg result_dst(this, ir->type); - src_reg result_src(result_dst); - - if (ir->operation == ir_triop_csel) { - ir->operands[1]->accept(this); - op[1] = this->result; - ir->operands[2]->accept(this); - op[2] = this->result; - - enum brw_predicate predicate; - emit_bool_to_cond_code(ir->operands[0], &predicate); - inst = emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]); - inst->predicate = predicate; - this->result = result_src; - return; - } - - for (operand = 0; operand < ir->get_num_operands(); operand++) { - this->result.file = BAD_FILE; - ir->operands[operand]->accept(this); - if (this->result.file == BAD_FILE) { - fprintf(stderr, "Failed to get tree for expression operand:\n"); - ir->operands[operand]->fprint(stderr); - exit(1); - } - op[operand] = this->result; - - /* Matrix expression operands should have been broken down to vector - * operations already. - */ - assert(!ir->operands[operand]->type->is_matrix()); - } - - /* If nothing special happens, this is the result. */ - this->result = result_src; - - switch (ir->operation) { - case ir_unop_logic_not: - emit(NOT(result_dst, op[0])); - break; - case ir_unop_neg: - op[0].negate = !op[0].negate; - emit(MOV(result_dst, op[0])); - break; - case ir_unop_abs: - op[0].abs = true; - op[0].negate = false; - emit(MOV(result_dst, op[0])); - break; - - case ir_unop_sign: - if (ir->type->is_float()) { - /* AND(val, 0x80000000) gives the sign bit. - * - * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not - * zero. - */ - emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); - - op[0].type = BRW_REGISTER_TYPE_UD; - result_dst.type = BRW_REGISTER_TYPE_UD; - emit(AND(result_dst, op[0], src_reg(0x80000000u))); - - inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u))); - inst->predicate = BRW_PREDICATE_NORMAL; - - this->result.type = BRW_REGISTER_TYPE_F; - } else { - /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1). - * -> non-negative val generates 0x00000000. - * Predicated OR sets 1 if val is positive. - */ - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G)); - - emit(ASR(result_dst, op[0], src_reg(31))); - - inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1))); - inst->predicate = BRW_PREDICATE_NORMAL; - } - break; - - case ir_unop_rcp: - emit_math(SHADER_OPCODE_RCP, result_dst, op[0]); - break; - - case ir_unop_exp2: - emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]); - break; - case ir_unop_log2: - emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]); - break; - case ir_unop_exp: - case ir_unop_log: - unreachable("not reached: should be handled by ir_explog_to_explog2"); - case ir_unop_sin: - emit_math(SHADER_OPCODE_SIN, result_dst, op[0]); - break; - case ir_unop_cos: - emit_math(SHADER_OPCODE_COS, result_dst, op[0]); - break; - - case ir_unop_dFdx: - case ir_unop_dFdx_coarse: - case ir_unop_dFdx_fine: - case ir_unop_dFdy: - case ir_unop_dFdy_coarse: - case ir_unop_dFdy_fine: - unreachable("derivatives not valid in vertex shader"); - - case ir_unop_bitfield_reverse: - emit(BFREV(result_dst, op[0])); - break; - case ir_unop_bit_count: - emit(CBIT(result_dst, op[0])); - break; - case ir_unop_find_msb: { - src_reg temp = src_reg(this, glsl_type::uint_type); - - inst = emit(FBH(dst_reg(temp), op[0])); - inst->dst.writemask = WRITEMASK_XYZW; - - /* FBH counts from the MSB side, while GLSL's findMSB() wants the count - * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then - * subtract the result from 31 to convert the MSB count into an LSB count. - */ - - /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */ - temp.swizzle = BRW_SWIZZLE_NOOP; - emit(MOV(result_dst, temp)); - - src_reg src_tmp = src_reg(result_dst); - emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ)); - - src_tmp.negate = true; - inst = emit(ADD(result_dst, src_tmp, src_reg(31))); - inst->predicate = BRW_PREDICATE_NORMAL; - break; - } - case ir_unop_find_lsb: - emit(FBL(result_dst, op[0])); - break; - case ir_unop_saturate: - inst = emit(MOV(result_dst, op[0])); - inst->saturate = true; - break; - - case ir_unop_noise: - unreachable("not reached: should be handled by lower_noise"); - - case ir_unop_subroutine_to_int: - emit(MOV(result_dst, op[0])); - break; - - case ir_unop_ssbo_unsized_array_length: - unreachable("not reached: should be handled by lower_ubo_reference"); - break; - - case ir_binop_add: - emit(ADD(result_dst, op[0], op[1])); - break; - case ir_binop_sub: - unreachable("not reached: should be handled by ir_sub_to_add_neg"); - - case ir_binop_mul: - if (devinfo->gen < 8 && ir->type->is_integer()) { - /* For integer multiplication, the MUL uses the low 16 bits of one of - * the operands (src0 through SNB, src1 on IVB and later). The MACH - * accumulates in the contribution of the upper 16 bits of that - * operand. If we can determine that one of the args is in the low - * 16 bits, though, we can just emit a single MUL. - */ - if (ir->operands[0]->is_uint16_constant()) { - if (devinfo->gen < 7) - emit(MUL(result_dst, op[0], op[1])); - else - emit(MUL(result_dst, op[1], op[0])); - } else if (ir->operands[1]->is_uint16_constant()) { - if (devinfo->gen < 7) - emit(MUL(result_dst, op[1], op[0])); - else - emit(MUL(result_dst, op[0], op[1])); - } else { - struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(dst_null_d(), op[0], op[1])); - emit(MOV(result_dst, src_reg(acc))); - } - } else { - emit(MUL(result_dst, op[0], op[1])); - } - break; - case ir_binop_imul_high: { - struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type); - - emit(MUL(acc, op[0], op[1])); - emit(MACH(result_dst, op[0], op[1])); - break; - } - case ir_binop_div: - /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]); - break; - - case ir_binop_carry: - unreachable("Should have been lowered by carry_to_arith()."); - - case ir_binop_borrow: - unreachable("Should have been lowered by borrow_to_arith()."); - - case ir_binop_mod: - /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ - assert(ir->type->is_integer()); - emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]); - break; - - case ir_binop_less: - case ir_binop_greater: - case ir_binop_lequal: - case ir_binop_gequal: - case ir_binop_equal: - case ir_binop_nequal: { - if (devinfo->gen <= 5) { - resolve_bool_comparison(ir->operands[0], &op[0]); - resolve_bool_comparison(ir->operands[1], &op[1]); - } - emit(CMP(result_dst, op[0], op[1], - brw_conditional_for_comparison(ir->operation))); - break; - } - - case ir_binop_all_equal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(ir->operands[0], &op[0]); - resolve_bool_comparison(ir->operands[1], &op[1]); - } - - /* "==" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z)); - emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(~0))); - inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H; - } else { - emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z)); - } - break; - case ir_binop_any_nequal: - if (devinfo->gen <= 5) { - resolve_bool_comparison(ir->operands[0], &op[0]); - resolve_bool_comparison(ir->operands[1], &op[1]); - } - - /* "!=" operator producing a scalar boolean. */ - if (ir->operands[0]->type->is_vector() || - ir->operands[1]->type->is_vector()) { - emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ)); - - emit(MOV(result_dst, src_reg(0))); - inst = emit(MOV(result_dst, src_reg(~0))); - inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; - } else { - emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ)); - } - break; - - case ir_unop_any: - if (devinfo->gen <= 5) { - resolve_bool_comparison(ir->operands[0], &op[0]); - } - emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - emit(MOV(result_dst, src_reg(0))); - - inst = emit(MOV(result_dst, src_reg(~0))); - inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H; - break; - - case ir_binop_logic_xor: - emit(XOR(result_dst, op[0], op[1])); - break; - - case ir_binop_logic_or: - emit(OR(result_dst, op[0], op[1])); - break; - - case ir_binop_logic_and: - emit(AND(result_dst, op[0], op[1])); - break; - - case ir_binop_dot: - assert(ir->operands[0]->type->is_vector()); - assert(ir->operands[0]->type == ir->operands[1]->type); - emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements); - break; - - case ir_unop_sqrt: - emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]); - break; - case ir_unop_rsq: - emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]); - break; - - case ir_unop_bitcast_i2f: - case ir_unop_bitcast_u2f: - this->result = op[0]; - this->result.type = BRW_REGISTER_TYPE_F; - break; - - case ir_unop_bitcast_f2i: - this->result = op[0]; - this->result.type = BRW_REGISTER_TYPE_D; - break; - - case ir_unop_bitcast_f2u: - this->result = op[0]; - this->result.type = BRW_REGISTER_TYPE_UD; - break; - - case ir_unop_i2f: - case ir_unop_i2u: - case ir_unop_u2i: - case ir_unop_u2f: - case ir_unop_f2i: - case ir_unop_f2u: - emit(MOV(result_dst, op[0])); - break; - case ir_unop_b2i: - case ir_unop_b2f: - if (devinfo->gen <= 5) { - resolve_bool_comparison(ir->operands[0], &op[0]); - } - emit(MOV(result_dst, negate(op[0]))); - break; - case ir_unop_f2b: - emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ)); - break; - case ir_unop_i2b: - emit(CMP(result_dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ)); - break; - - case ir_unop_trunc: - emit(RNDZ(result_dst, op[0])); - break; - case ir_unop_ceil: { - src_reg tmp = src_reg(this, ir->type); - op[0].negate = !op[0].negate; - emit(RNDD(dst_reg(tmp), op[0])); - tmp.negate = true; - emit(MOV(result_dst, tmp)); - } - break; - case ir_unop_floor: - inst = emit(RNDD(result_dst, op[0])); - break; - case ir_unop_fract: - inst = emit(FRC(result_dst, op[0])); - break; - case ir_unop_round_even: - emit(RNDE(result_dst, op[0])); - break; - - case ir_unop_get_buffer_size: - unreachable("not reached: not implemented"); - break; - - case ir_binop_min: - emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]); - break; - case ir_binop_max: - emit_minmax(BRW_CONDITIONAL_GE, result_dst, op[0], op[1]); - break; - - case ir_binop_pow: - emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]); - break; - - case ir_unop_bit_not: - inst = emit(NOT(result_dst, op[0])); - break; - case ir_binop_bit_and: - inst = emit(AND(result_dst, op[0], op[1])); - break; - case ir_binop_bit_xor: - inst = emit(XOR(result_dst, op[0], op[1])); - break; - case ir_binop_bit_or: - inst = emit(OR(result_dst, op[0], op[1])); - break; - - case ir_binop_lshift: - inst = emit(SHL(result_dst, op[0], op[1])); - break; - - case ir_binop_rshift: - if (ir->type->base_type == GLSL_TYPE_INT) - inst = emit(ASR(result_dst, op[0], op[1])); - else - inst = emit(SHR(result_dst, op[0], op[1])); - break; - - case ir_binop_bfm: - emit(BFI1(result_dst, op[0], op[1])); - break; - - case ir_binop_ubo_load: { - ir_constant *const_uniform_block = ir->operands[0]->as_constant(); - ir_constant *const_offset_ir = ir->operands[1]->as_constant(); - unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0; - src_reg offset; - - /* Now, load the vector from that offset. */ - assert(ir->type->is_vector() || ir->type->is_scalar()); - - src_reg packed_consts = src_reg(this, glsl_type::vec4_type); - packed_consts.type = result.type; - src_reg surf_index; - - if (const_uniform_block) { - /* The block index is a constant, so just emit the binding table entry - * as an immediate. - */ - surf_index = src_reg(prog_data->base.binding_table.ubo_start + - const_uniform_block->value.u[0]); - } else { - /* The block index is not a constant. Evaluate the index expression - * per-channel and add the base UBO index; we have to select a value - * from any live channel. - */ - surf_index = src_reg(this, glsl_type::uint_type); - emit(ADD(dst_reg(surf_index), op[0], - src_reg(prog_data->base.binding_table.ubo_start))); - surf_index = emit_uniformize(surf_index); - - /* Assume this may touch any UBO. It would be nice to provide - * a tighter bound, but the array information is already lowered away. - */ - brw_mark_surface_used(&prog_data->base, - prog_data->base.binding_table.ubo_start + - shader_prog->NumBufferInterfaceBlocks - 1); - } - - if (const_offset_ir) { - if (devinfo->gen >= 8) { - /* Store the offset in a GRF so we can send-from-GRF. */ - offset = src_reg(this, glsl_type::int_type); - emit(MOV(dst_reg(offset), src_reg(const_offset / 16))); - } else { - /* Immediates are fine on older generations since they'll be moved - * to a (potentially fake) MRF at the generator level. - */ - offset = src_reg(const_offset / 16); - } - } else { - offset = src_reg(this, glsl_type::uint_type); - emit(SHR(dst_reg(offset), op[1], src_reg(4u))); - } - - emit_pull_constant_load_reg(dst_reg(packed_consts), - surf_index, - offset, - NULL, NULL /* before_block/inst */); - - packed_consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements); - packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4, - const_offset % 16 / 4); - - /* UBO bools are any nonzero int. We need to convert them to 0/~0. */ - if (ir->type->base_type == GLSL_TYPE_BOOL) { - emit(CMP(result_dst, packed_consts, src_reg(0u), - BRW_CONDITIONAL_NZ)); - } else { - emit(MOV(result_dst, packed_consts)); - } - break; - } - - case ir_binop_vector_extract: - unreachable("should have been lowered by vec_index_to_cond_assign"); - - case ir_triop_fma: - op[0] = fix_3src_operand(op[0]); - op[1] = fix_3src_operand(op[1]); - op[2] = fix_3src_operand(op[2]); - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(MAD(result_dst, op[2], op[1], op[0])); - break; - - case ir_triop_lrp: - emit_lrp(result_dst, op[0], op[1], op[2]); - break; - - case ir_triop_csel: - unreachable("already handled above"); - break; - - case ir_triop_bfi: - op[0] = fix_3src_operand(op[0]); - op[1] = fix_3src_operand(op[1]); - op[2] = fix_3src_operand(op[2]); - emit(BFI2(result_dst, op[0], op[1], op[2])); - break; - - case ir_triop_bitfield_extract: - op[0] = fix_3src_operand(op[0]); - op[1] = fix_3src_operand(op[1]); - op[2] = fix_3src_operand(op[2]); - /* Note that the instruction's argument order is reversed from GLSL - * and the IR. - */ - emit(BFE(result_dst, op[2], op[1], op[0])); - break; - - case ir_triop_vector_insert: - unreachable("should have been lowered by lower_vector_insert"); - - case ir_quadop_bitfield_insert: - unreachable("not reached: should be handled by " - "bitfield_insert_to_bfm_bfi\n"); - - case ir_quadop_vector: - unreachable("not reached: should be handled by lower_quadop_vector"); - - case ir_unop_pack_half_2x16: - emit_pack_half_2x16(result_dst, op[0]); - break; - case ir_unop_unpack_half_2x16: - emit_unpack_half_2x16(result_dst, op[0]); - break; - case ir_unop_unpack_unorm_4x8: - emit_unpack_unorm_4x8(result_dst, op[0]); - break; - case ir_unop_unpack_snorm_4x8: - emit_unpack_snorm_4x8(result_dst, op[0]); - break; - case ir_unop_pack_unorm_4x8: - emit_pack_unorm_4x8(result_dst, op[0]); - break; - case ir_unop_pack_snorm_4x8: - emit_pack_snorm_4x8(result_dst, op[0]); - break; - case ir_unop_pack_snorm_2x16: - case ir_unop_pack_unorm_2x16: - case ir_unop_unpack_snorm_2x16: - case ir_unop_unpack_unorm_2x16: - unreachable("not reached: should be handled by lower_packing_builtins"); - case ir_unop_unpack_half_2x16_split_x: - case ir_unop_unpack_half_2x16_split_y: - case ir_binop_pack_half_2x16_split: - case ir_unop_interpolate_at_centroid: - case ir_binop_interpolate_at_sample: - case ir_binop_interpolate_at_offset: - unreachable("not reached: should not occur in vertex shader"); - case ir_binop_ldexp: - unreachable("not reached: should be handled by ldexp_to_arith()"); - case ir_unop_d2f: - case ir_unop_f2d: - case ir_unop_d2i: - case ir_unop_i2d: - case ir_unop_d2u: - case ir_unop_u2d: - case ir_unop_d2b: - case ir_unop_pack_double_2x32: - case ir_unop_unpack_double_2x32: - case ir_unop_frexp_sig: - case ir_unop_frexp_exp: - unreachable("fp64 todo"); - } -} - - -void -vec4_visitor::visit(ir_swizzle *ir) -{ - /* Note that this is only swizzles in expressions, not those on the left - * hand side of an assignment, which do write masking. See ir_assignment - * for that. - */ - const unsigned swz = brw_compose_swizzle( - brw_swizzle_for_size(ir->type->vector_elements), - BRW_SWIZZLE4(ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w)); - - ir->val->accept(this); - this->result = swizzle(this->result, swz); -} - -void -vec4_visitor::visit(ir_dereference_variable *ir) -{ - const struct glsl_type *type = ir->type; - dst_reg *reg = variable_storage(ir->var); - - if (!reg) { - fail("Failed to find variable storage for %s\n", ir->var->name); - this->result = src_reg(brw_null_reg()); - return; - } - - this->result = src_reg(*reg); - - /* System values get their swizzle from the dst_reg writemask */ - if (ir->var->data.mode == ir_var_system_value) - return; - - if (type->is_scalar() || type->is_vector() || type->is_matrix()) - this->result.swizzle = brw_swizzle_for_size(type->vector_elements); -} - - -int -vec4_visitor::compute_array_stride(ir_dereference_array *ir) -{ - /* Under normal circumstances array elements are stored consecutively, so - * the stride is equal to the size of the array element. - */ - return type_size_vec4(ir->type); -} - - -void -vec4_visitor::visit(ir_dereference_array *ir) -{ - ir_constant *constant_index; - src_reg src; - int array_stride = compute_array_stride(ir); - - constant_index = ir->array_index->constant_expression_value(); - - ir->array->accept(this); - src = this->result; - - if (constant_index) { - src.reg_offset += constant_index->value.i[0] * array_stride; - } else { - /* Variable index array dereference. It eats the "vec4" of the - * base of the array and an index that offsets the Mesa register - * index. - */ - ir->array_index->accept(this); - - src_reg index_reg; - - if (array_stride == 1) { - index_reg = this->result; - } else { - index_reg = src_reg(this, glsl_type::int_type); - - emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride))); - } - - if (src.reladdr) { - src_reg temp = src_reg(this, glsl_type::int_type); - - emit(ADD(dst_reg(temp), *src.reladdr, index_reg)); - - index_reg = temp; - } - - src.reladdr = ralloc(mem_ctx, src_reg); - memcpy(src.reladdr, &index_reg, sizeof(index_reg)); - } - - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix()) - src.swizzle = brw_swizzle_for_size(ir->type->vector_elements); - else - src.swizzle = BRW_SWIZZLE_NOOP; - src.type = brw_type_for_base_type(ir->type); - - this->result = src; -} - -void -vec4_visitor::visit(ir_dereference_record *ir) -{ - unsigned int i; - const glsl_type *struct_type = ir->record->type; - int offset = 0; - - ir->record->accept(this); - - for (i = 0; i < struct_type->length; i++) { - if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0) - break; - offset += type_size_vec4(struct_type->fields.structure[i].type); - } - - /* If the type is smaller than a vec4, replicate the last channel out. */ - if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix()) - this->result.swizzle = brw_swizzle_for_size(ir->type->vector_elements); - else - this->result.swizzle = BRW_SWIZZLE_NOOP; - this->result.type = brw_type_for_base_type(ir->type); - - this->result.reg_offset += offset; -} - -/** - * We want to be careful in assignment setup to hit the actual storage - * instead of potentially using a temporary like we might with the - * ir_dereference handler. - */ -static dst_reg -get_assignment_lhs(ir_dereference *ir, vec4_visitor *v) -{ - /* The LHS must be a dereference. If the LHS is a variable indexed array - * access of a vector, it must be separated into a series conditional moves - * before reaching this point (see ir_vec_index_to_cond_assign). - */ - assert(ir->as_dereference()); - ir_dereference_array *deref_array = ir->as_dereference_array(); - if (deref_array) { - assert(!deref_array->array->type->is_vector()); - } - - /* Use the rvalue deref handler for the most part. We'll ignore - * swizzles in it and write swizzles using writemask, though. - */ - ir->accept(v); - return dst_reg(v->result); -} - -void -vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src, - const struct glsl_type *type, - enum brw_predicate predicate) -{ - if (type->base_type == GLSL_TYPE_STRUCT) { - for (unsigned int i = 0; i < type->length; i++) { - emit_block_move(dst, src, type->fields.structure[i].type, predicate); - } - return; - } - - if (type->is_array()) { - for (unsigned int i = 0; i < type->length; i++) { - emit_block_move(dst, src, type->fields.array, predicate); - } - return; - } - - if (type->is_matrix()) { - const struct glsl_type *vec_type; - - vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT, - type->vector_elements, 1); - - for (int i = 0; i < type->matrix_columns; i++) { - emit_block_move(dst, src, vec_type, predicate); - } - return; - } - - assert(type->is_scalar() || type->is_vector()); - - dst->type = brw_type_for_base_type(type); - src->type = dst->type; - - dst->writemask = (1 << type->vector_elements) - 1; - - src->swizzle = brw_swizzle_for_size(type->vector_elements); - - vec4_instruction *inst = emit(MOV(*dst, *src)); - inst->predicate = predicate; - - dst->reg_offset++; - src->reg_offset++; -} - - -/* If the RHS processing resulted in an instruction generating a - * temporary value, and it would be easy to rewrite the instruction to - * generate its result right into the LHS instead, do so. This ends - * up reliably removing instructions where it can be tricky to do so - * later without real UD chain information. - */ -bool -vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir, - dst_reg dst, - src_reg src, - vec4_instruction *pre_rhs_inst, - vec4_instruction *last_rhs_inst) -{ - /* This could be supported, but it would take more smarts. */ - if (ir->condition) - return false; - - if (pre_rhs_inst == last_rhs_inst) - return false; /* No instructions generated to work with. */ - - /* Make sure the last instruction generated our source reg. */ - if (src.file != GRF || - src.file != last_rhs_inst->dst.file || - src.reg != last_rhs_inst->dst.reg || - src.reg_offset != last_rhs_inst->dst.reg_offset || - src.reladdr || - src.abs || - src.negate || - last_rhs_inst->predicate != BRW_PREDICATE_NONE) - return false; - - /* Check that that last instruction fully initialized the channels - * we want to use, in the order we want to use them. We could - * potentially reswizzle the operands of many instructions so that - * we could handle out of order channels, but don't yet. - */ - - for (unsigned i = 0; i < 4; i++) { - if (dst.writemask & (1 << i)) { - if (!(last_rhs_inst->dst.writemask & (1 << i))) - return false; - - if (BRW_GET_SWZ(src.swizzle, i) != i) - return false; - } - } - - /* Success! Rewrite the instruction. */ - last_rhs_inst->dst.file = dst.file; - last_rhs_inst->dst.reg = dst.reg; - last_rhs_inst->dst.reg_offset = dst.reg_offset; - last_rhs_inst->dst.reladdr = dst.reladdr; - last_rhs_inst->dst.writemask &= dst.writemask; - - return true; -} - -void -vec4_visitor::visit(ir_assignment *ir) -{ - dst_reg dst = get_assignment_lhs(ir->lhs, this); - enum brw_predicate predicate = BRW_PREDICATE_NONE; - - if (!ir->lhs->type->is_scalar() && - !ir->lhs->type->is_vector()) { - ir->rhs->accept(this); - src_reg src = this->result; - - if (ir->condition) { - emit_bool_to_cond_code(ir->condition, &predicate); - } - - /* emit_block_move doesn't account for swizzles in the source register. - * This should be ok, since the source register is a structure or an - * array, and those can't be swizzled. But double-check to be sure. - */ - assert(src.swizzle == - (ir->rhs->type->is_matrix() - ? brw_swizzle_for_size(ir->rhs->type->vector_elements) - : BRW_SWIZZLE_NOOP)); - - emit_block_move(&dst, &src, ir->rhs->type, predicate); - return; - } - - /* Now we're down to just a scalar/vector with writemasks. */ - int i; - - vec4_instruction *pre_rhs_inst, *last_rhs_inst; - pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); +} - ir->rhs->accept(this); +dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type) +{ + init(); - last_rhs_inst = (vec4_instruction *)this->instructions.get_tail(); + this->file = GRF; + this->reg = v->alloc.allocate(type_size_vec4(type)); - int swizzles[4]; - int src_chan = 0; + if (type->is_array() || type->is_record()) { + this->writemask = WRITEMASK_XYZW; + } else { + this->writemask = (1 << type->vector_elements) - 1; + } - assert(ir->lhs->type->is_vector() || - ir->lhs->type->is_scalar()); - dst.writemask = ir->write_mask; + this->type = brw_type_for_base_type(type); +} - /* Swizzle a small RHS vector into the channels being written. - * - * glsl ir treats write_mask as dictating how many channels are - * present on the RHS while in our instructions we need to make - * those channels appear in the slots of the vec4 they're written to. - */ - for (int i = 0; i < 4; i++) - swizzles[i] = (ir->write_mask & (1 << i) ? src_chan++ : 0); +vec4_instruction * +vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst, + src_reg src0, src_reg src1) +{ + vec4_instruction *inst; - src_reg src = swizzle(this->result, - BRW_SWIZZLE4(swizzles[0], swizzles[1], - swizzles[2], swizzles[3])); + if (devinfo->gen >= 6) { + inst = emit(BRW_OPCODE_SEL, dst, src0, src1); + inst->conditional_mod = conditionalmod; + } else { + emit(CMP(dst, src0, src1, conditionalmod)); - if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) { - return; + inst = emit(BRW_OPCODE_SEL, dst, src0, src1); + inst->predicate = BRW_PREDICATE_NORMAL; } - if (ir->condition) { - emit_bool_to_cond_code(ir->condition, &predicate); - } + return inst; +} - for (i = 0; i < type_size_vec4(ir->lhs->type); i++) { - vec4_instruction *inst = emit(MOV(dst, src)); - inst->predicate = predicate; +vec4_instruction * +vec4_visitor::emit_lrp(const dst_reg &dst, + const src_reg &x, const src_reg &y, const src_reg &a) +{ + if (devinfo->gen >= 6) { + /* Note that the instruction's argument order is reversed from GLSL + * and the IR. + */ + return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y), + fix_3src_operand(x))); + } else { + /* Earlier generations don't support three source operations, so we + * need to emit x*(1-a) + y*a. + */ + dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type); + dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type); + dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type); + y_times_a.writemask = dst.writemask; + one_minus_a.writemask = dst.writemask; + x_times_one_minus_a.writemask = dst.writemask; - dst.reg_offset++; - src.reg_offset++; + emit(MUL(y_times_a, y, a)); + emit(ADD(one_minus_a, negate(a), src_reg(1.0f))); + emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a))); + return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a))); } } +/** + * Emits the instructions needed to perform a pull constant load. before_block + * and before_inst can be NULL in which case the instruction will be appended + * to the end of the instruction list. + */ void -vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir) +vec4_visitor::emit_pull_constant_load_reg(dst_reg dst, + src_reg surf_index, + src_reg offset_reg, + bblock_t *before_block, + vec4_instruction *before_inst) { - if (ir->type->base_type == GLSL_TYPE_STRUCT) { - foreach_in_list(ir_constant, field_value, &ir->components) { - emit_constant_values(dst, field_value); - } - return; - } - - if (ir->type->is_array()) { - for (unsigned int i = 0; i < ir->type->length; i++) { - emit_constant_values(dst, ir->array_elements[i]); - } - return; - } - - if (ir->type->is_matrix()) { - for (int i = 0; i < ir->type->matrix_columns; i++) { - float *vec = &ir->value.f[i * ir->type->vector_elements]; + assert((before_inst == NULL && before_block == NULL) || + (before_inst && before_block)); - for (int j = 0; j < ir->type->vector_elements; j++) { - dst->writemask = 1 << j; - dst->type = BRW_REGISTER_TYPE_F; + vec4_instruction *pull; - emit(MOV(*dst, src_reg(vec[j]))); - } - dst->reg_offset++; - } - return; - } + if (devinfo->gen >= 9) { + /* Gen9+ needs a message header in order to use SIMD4x2 mode */ + src_reg header(this, glsl_type::uvec4_type, 2); - int remaining_writemask = (1 << ir->type->vector_elements) - 1; + pull = new(mem_ctx) + vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9, + dst_reg(header)); - for (int i = 0; i < ir->type->vector_elements; i++) { - if (!(remaining_writemask & (1 << i))) - continue; + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); - dst->writemask = 1 << i; - dst->type = brw_type_for_base_type(ir->type); + dst_reg index_reg = retype(offset(dst_reg(header), 1), + offset_reg.type); + pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg); - /* Find other components that match the one we're about to - * write. Emits fewer instructions for things like vec4(0.5, - * 1.5, 1.5, 1.5). - */ - for (int j = i + 1; j < ir->type->vector_elements; j++) { - if (ir->type->base_type == GLSL_TYPE_BOOL) { - if (ir->value.b[i] == ir->value.b[j]) - dst->writemask |= (1 << j); - } else { - /* u, i, and f storage all line up, so no need for a - * switch case for comparing each type. - */ - if (ir->value.u[i] == ir->value.u[j]) - dst->writemask |= (1 << j); - } - } + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); - switch (ir->type->base_type) { - case GLSL_TYPE_FLOAT: - emit(MOV(*dst, src_reg(ir->value.f[i]))); - break; - case GLSL_TYPE_INT: - emit(MOV(*dst, src_reg(ir->value.i[i]))); - break; - case GLSL_TYPE_UINT: - emit(MOV(*dst, src_reg(ir->value.u[i]))); - break; - case GLSL_TYPE_BOOL: - emit(MOV(*dst, src_reg(ir->value.b[i] != 0 ? ~0 : 0))); - break; - default: - unreachable("Non-float/uint/int/bool constant"); - } + pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + dst, + surf_index, + header); + pull->mlen = 2; + pull->header_size = 1; + } else if (devinfo->gen >= 7) { + dst_reg grf_offset = dst_reg(this, glsl_type::int_type); - remaining_writemask &= ~dst->writemask; - } - dst->reg_offset++; -} + grf_offset.type = offset_reg.type; -void -vec4_visitor::visit(ir_constant *ir) -{ - dst_reg dst = dst_reg(this, ir->type); - this->result = src_reg(dst); + pull = MOV(grf_offset, offset_reg); - emit_constant_values(&dst, ir); -} + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); -void -vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir) -{ - ir_dereference *deref = static_cast( - ir->actual_parameters.get_head()); - ir_variable *location = deref->variable_referenced(); - unsigned surf_index = (prog_data->base.binding_table.abo_start + - location->data.binding); - - /* Calculate the surface offset */ - src_reg offset(this, glsl_type::uint_type); - ir_dereference_array *deref_array = deref->as_dereference_array(); - if (deref_array) { - deref_array->array_index->accept(this); - - src_reg tmp(this, glsl_type::uint_type); - emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE)); - emit(ADD(dst_reg(offset), tmp, location->data.atomic.offset)); + pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7, + dst, + surf_index, + src_reg(grf_offset)); + pull->mlen = 1; } else { - offset = location->data.atomic.offset; - } - - /* Emit the appropriate machine instruction */ - const char *callee = ir->callee->function_name(); - dst_reg dst = get_assignment_lhs(ir->return_deref, this); - - if (!strcmp("__intrinsic_atomic_read", callee)) { - emit_untyped_surface_read(surf_index, dst, offset); - - } else if (!strcmp("__intrinsic_atomic_increment", callee)) { - emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset, - src_reg(), src_reg()); - - } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) { - emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset, - src_reg(), src_reg()); + pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD, + dst, + surf_index, + offset_reg); + pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1; + pull->mlen = 1; } - brw_mark_surface_used(stage_prog_data, surf_index); + if (before_inst) + emit_before(before_block, before_inst, pull); + else + emit(pull); } -void -vec4_visitor::visit(ir_call *ir) +src_reg +vec4_visitor::emit_uniformize(const src_reg &src) { - const char *callee = ir->callee->function_name(); + const src_reg chan_index(this, glsl_type::uint_type); + const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type), + src.type); - if (!strcmp("__intrinsic_atomic_read", callee) || - !strcmp("__intrinsic_atomic_increment", callee) || - !strcmp("__intrinsic_atomic_predecrement", callee)) { - visit_atomic_counter_intrinsic(ir); - } else { - unreachable("Unsupported intrinsic."); - } + emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index)) + ->force_writemask_all = true; + emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index) + ->force_writemask_all = true; + + return src_reg(dst); } src_reg @@ -2718,155 +1062,6 @@ vec4_visitor::emit_texture(ir_texture_opcode op, src_reg(inst->dst), sampler, dest_type); } -void -vec4_visitor::visit(ir_texture *ir) -{ - uint32_t sampler = - _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog); - - ir_rvalue *nonconst_sampler_index = - _mesa_get_sampler_array_nonconst_index(ir->sampler); - - /* Handle non-constant sampler array indexing */ - src_reg sampler_reg; - if (nonconst_sampler_index) { - /* The highest sampler which may be used by this operation is - * the last element of the array. Mark it here, because the generator - * doesn't have enough information to determine the bound. - */ - uint32_t array_size = ir->sampler->as_dereference_array() - ->array->type->array_size(); - - uint32_t max_used = sampler + array_size - 1; - if (ir->op == ir_tg4 && devinfo->gen < 8) { - max_used += prog_data->base.binding_table.gather_texture_start; - } else { - max_used += prog_data->base.binding_table.texture_start; - } - - brw_mark_surface_used(&prog_data->base, max_used); - - /* Emit code to evaluate the actual indexing expression */ - nonconst_sampler_index->accept(this); - src_reg temp(this, glsl_type::uint_type); - emit(ADD(dst_reg(temp), this->result, src_reg(sampler))); - sampler_reg = emit_uniformize(temp); - } else { - /* Single sampler, or constant array index; the indexing expression - * is just an immediate. - */ - sampler_reg = src_reg(sampler); - } - - /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother - * emitting anything other than setting up the constant result. - */ - if (ir->op == ir_tg4) { - ir_constant *chan = ir->lod_info.component->as_constant(); - int swiz = GET_SWZ(key_tex->swizzles[sampler], chan->value.i[0]); - if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) { - dst_reg result(this, ir->type); - this->result = src_reg(result); - emit(MOV(result, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f))); - return; - } - } - - /* Should be lowered by do_lower_texture_projection */ - assert(!ir->projector); - - /* Should be lowered */ - assert(!ir->offset || !ir->offset->type->is_array()); - - /* Generate code to compute all the subexpression trees. This has to be - * done before loading any values into MRFs for the sampler message since - * generating these values may involve SEND messages that need the MRFs. - */ - src_reg coordinate; - int coord_components = 0; - if (ir->coordinate) { - coord_components = ir->coordinate->type->vector_elements; - ir->coordinate->accept(this); - coordinate = this->result; - } - - src_reg shadow_comparitor; - if (ir->shadow_comparitor) { - ir->shadow_comparitor->accept(this); - shadow_comparitor = this->result; - } - - bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant(); - src_reg offset_value; - if (has_nonconstant_offset) { - ir->offset->accept(this); - offset_value = src_reg(this->result); - } - - src_reg lod, lod2, sample_index, mcs; - switch (ir->op) { - case ir_tex: - lod = src_reg(0.0f); - break; - case ir_txf: - case ir_txl: - case ir_txs: - ir->lod_info.lod->accept(this); - lod = this->result; - break; - case ir_query_levels: - lod = src_reg(0); - break; - case ir_txf_ms: - ir->lod_info.sample_index->accept(this); - sample_index = this->result; - - if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler)) - mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg); - else - mcs = src_reg(0u); - break; - case ir_txd: - ir->lod_info.grad.dPdx->accept(this); - lod = this->result; - - ir->lod_info.grad.dPdy->accept(this); - lod2 = this->result; - break; - case ir_txb: - case ir_lod: - case ir_tg4: - case ir_texture_samples: - break; - } - - uint32_t constant_offset = 0; - if (ir->offset != NULL && !has_nonconstant_offset) { - constant_offset = - brw_texture_offset(ir->offset->as_constant()->value.i, - ir->offset->type->vector_elements); - } - - /* Stuff the channel select bits in the top of the texture offset */ - if (ir->op == ir_tg4) - constant_offset |= - gather_channel( ir->lod_info.component->as_constant()->value.i[0], - sampler) << 16; - - glsl_type const *type = ir->sampler->type; - bool is_cube_array = type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE && - type->sampler_array; - - this->result = src_reg(this, ir->type); - dst_reg dest = dst_reg(this->result); - - emit_texture(ir->op, dest, ir->type, coordinate, coord_components, - shadow_comparitor, - lod, lod2, sample_index, - constant_offset, offset_value, - mcs, is_cube_array, sampler, sampler_reg); -} - /** * Apply workarounds for Gen6 gather with UINT/SINT */ @@ -2975,78 +1170,18 @@ vec4_visitor::swizzle_result(ir_texture_opcode op, dst_reg dest, } } -void -vec4_visitor::visit(ir_return *) -{ - unreachable("not reached"); -} - -void -vec4_visitor::visit(ir_discard *) -{ - unreachable("not reached"); -} - -void -vec4_visitor::visit(ir_if *ir) -{ - /* Don't point the annotation at the if statement, because then it plus - * the then and else blocks get printed. - */ - this->base_ir = ir->condition; - - if (devinfo->gen == 6) { - emit_if_gen6(ir); - } else { - enum brw_predicate predicate; - emit_bool_to_cond_code(ir->condition, &predicate); - emit(IF(predicate)); - } - - visit_instructions(&ir->then_instructions); - - if (!ir->else_instructions.is_empty()) { - this->base_ir = ir->condition; - emit(BRW_OPCODE_ELSE); - - visit_instructions(&ir->else_instructions); - } - - this->base_ir = ir->condition; - emit(BRW_OPCODE_ENDIF); -} - void vec4_visitor::gs_emit_vertex(int stream_id) { unreachable("not reached"); } -void -vec4_visitor::visit(ir_emit_vertex *) -{ - unreachable("not reached"); -} - void vec4_visitor::gs_end_primitive() { unreachable("not reached"); } - -void -vec4_visitor::visit(ir_end_primitive *) -{ - unreachable("not reached"); -} - -void -vec4_visitor::visit(ir_barrier *) -{ - unreachable("not reached"); -} - void vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index, dst_reg dst, src_reg offset, @@ -3688,42 +1823,17 @@ vec4_visitor::resolve_ud_negate(src_reg *reg) *reg = temp; } -/** - * Resolve the result of a Gen4-5 CMP instruction to a proper boolean. - * - * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined. - * If we need a proper boolean value, we have to fix it up to be 0 or ~0. - */ -void -vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg) -{ - assert(devinfo->gen <= 5); - - if (!rvalue->type->is_boolean()) - return; - - src_reg and_result = src_reg(this, rvalue->type); - src_reg neg_result = src_reg(this, rvalue->type); - emit(AND(dst_reg(and_result), *reg, src_reg(1))); - emit(MOV(dst_reg(neg_result), negate(and_result))); - *reg = neg_result; -} - vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, void *log_data, - struct gl_program *prog, const struct brw_sampler_prog_key_data *key_tex, struct brw_vue_prog_data *prog_data, - struct gl_shader_program *shader_prog, - gl_shader_stage stage, + nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index) - : backend_shader(compiler, log_data, mem_ctx, - shader_prog, prog, &prog_data->base, stage), + : backend_shader(compiler, log_data, mem_ctx, shader, &prog_data->base), key_tex(key_tex), prog_data(prog_data), - sanity_param_count(0), fail_msg(NULL), first_non_payload_grf(0), need_all_constants_in_pull_buffer(false), @@ -3737,10 +1847,6 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, this->current_annotation = NULL; memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation)); - this->variable_ht = hash_table_ctor(0, - hash_table_pointer_hash, - hash_table_pointer_compare); - this->virtual_grf_start = NULL; this->virtual_grf_end = NULL; this->live_intervals = NULL; @@ -3759,12 +1865,10 @@ vec4_visitor::vec4_visitor(const struct brw_compiler *compiler, } this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); - this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size); } vec4_visitor::~vec4_visitor() { - hash_table_dtor(this->variable_ht); } diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp deleted file mode 100644 index d1a72d787e7..00000000000 --- a/src/mesa/drivers/dri/i965/brw_vec4_vp.cpp +++ /dev/null @@ -1,649 +0,0 @@ -/* - * Copyright © 2012 Intel Corporation - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING - * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS - * IN THE SOFTWARE. - */ - -/** @file brw_vec4_vp.cpp - * - * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement - * ARB_vertex_program and fixed-function vertex processing. - */ - -#include "brw_context.h" -#include "brw_vec4.h" -#include "brw_vs.h" -extern "C" { -#include "program/prog_parameter.h" -#include "program/prog_print.h" -} -using namespace brw; - -void -vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod, - dst_reg dst, src_reg src0, src_reg src1, - src_reg one) -{ - vec4_instruction *inst; - - inst = emit(CMP(dst_null_f(), src0, src1, conditional_mod)); - - inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f)); - inst->predicate = BRW_PREDICATE_NORMAL; -} - -void -vec4_vs_visitor::emit_program_code() -{ - this->need_all_constants_in_pull_buffer = false; - - setup_vp_regs(); - - /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just - * be: - * - * sel.f0 dst 1.0 0.0 - * - * instead of - * - * mov dst 0.0 - * mov.f0 dst 1.0 - */ - src_reg one = src_reg(this, glsl_type::float_type); - emit(MOV(dst_reg(one), src_reg(1.0f))); - - for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) { - const struct prog_instruction *vpi = &prog->Instructions[insn]; - base_ir = vpi; - - dst_reg dst; - src_reg src[3]; - - /* We always emit into a temporary destination register to avoid - * aliasing issues. - */ - dst = dst_reg(this, glsl_type::vec4_type); - - for (int i = 0; i < 3; i++) - src[i] = get_vp_src_reg(vpi->SrcReg[i]); - - switch (vpi->Opcode) { - case OPCODE_ABS: - src[0].abs = true; - src[0].negate = false; - emit(MOV(dst, src[0])); - break; - - case OPCODE_ADD: - emit(ADD(dst, src[0], src[1])); - break; - - case OPCODE_ARL: - if (devinfo->gen >= 6) { - dst.writemask = WRITEMASK_X; - dst_reg dst_f = dst; - dst_f.type = BRW_REGISTER_TYPE_F; - - emit(RNDD(dst_f, src[0])); - emit(MOV(dst, src_reg(dst_f))); - } else { - emit(RNDD(dst, src[0])); - } - break; - - case OPCODE_DP3: - emit(DP3(dst, src[0], src[1])); - break; - case OPCODE_DP4: - emit(DP4(dst, src[0], src[1])); - break; - case OPCODE_DPH: - emit(DPH(dst, src[0], src[1])); - break; - - case OPCODE_DST: { - dst_reg t = dst; - if (vpi->DstReg.WriteMask & WRITEMASK_X) { - t.writemask = WRITEMASK_X; - emit(MOV(t, src_reg(1.0f))); - } - if (vpi->DstReg.WriteMask & WRITEMASK_Y) { - t.writemask = WRITEMASK_Y; - emit(MUL(t, src[0], src[1])); - } - if (vpi->DstReg.WriteMask & WRITEMASK_Z) { - t.writemask = WRITEMASK_Z; - emit(MOV(t, src[0])); - } - if (vpi->DstReg.WriteMask & WRITEMASK_W) { - t.writemask = WRITEMASK_W; - emit(MOV(t, src[1])); - } - break; - } - - case OPCODE_EXP: { - dst_reg result = dst; - if (vpi->DstReg.WriteMask & WRITEMASK_X) { - /* tmp_d = floor(src[0].x) */ - src_reg tmp_d = src_reg(this, glsl_type::ivec4_type); - assert(tmp_d.type == BRW_REGISTER_TYPE_D); - emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX))); - - /* result[0] = 2.0 ^ tmp */ - /* Adjust exponent for floating point: exp += 127 */ - dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X); - emit(ADD(tmp_d_x, tmp_d, src_reg(127))); - - /* Install exponent and sign. Excess drops off the edge: */ - dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X); - emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23)); - } - if (vpi->DstReg.WriteMask & WRITEMASK_Y) { - result.writemask = WRITEMASK_Y; - emit(FRC(result, src[0])); - } - if (vpi->DstReg.WriteMask & WRITEMASK_Z) { - result.writemask = WRITEMASK_Z; - emit_math(SHADER_OPCODE_EXP2, result, src[0]); - } - if (vpi->DstReg.WriteMask & WRITEMASK_W) { - result.writemask = WRITEMASK_W; - emit(MOV(result, src_reg(1.0f))); - } - break; - } - - case OPCODE_EX2: - emit_math(SHADER_OPCODE_EXP2, dst, src[0]); - break; - - case OPCODE_FLR: - emit(RNDD(dst, src[0])); - break; - - case OPCODE_FRC: - emit(FRC(dst, src[0])); - break; - - case OPCODE_LG2: - emit_math(SHADER_OPCODE_LOG2, dst, src[0]); - break; - - case OPCODE_LIT: { - dst_reg result = dst; - /* From the ARB_vertex_program spec: - * - * tmp = VectorLoad(op0); - * if (tmp.x < 0) tmp.x = 0; - * if (tmp.y < 0) tmp.y = 0; - * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); - * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; - * result.x = 1.0; - * result.y = tmp.x; - * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; - * result.w = 1.0; - * - * Note that we don't do the clamping to +/- 128. We didn't in - * brw_vs_emit.c either. - */ - if (vpi->DstReg.WriteMask & WRITEMASK_XW) { - result.writemask = WRITEMASK_XW; - emit(MOV(result, src_reg(1.0f))); - } - if (vpi->DstReg.WriteMask & WRITEMASK_YZ) { - result.writemask = WRITEMASK_YZ; - emit(MOV(result, src_reg(0.0f))); - - src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX); - - emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G)); - emit(IF(BRW_PREDICATE_NORMAL)); - - if (vpi->DstReg.WriteMask & WRITEMASK_Y) { - result.writemask = WRITEMASK_Y; - emit(MOV(result, tmp_x)); - } - - if (vpi->DstReg.WriteMask & WRITEMASK_Z) { - /* if (tmp.y < 0) tmp.y = 0; */ - src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY); - result.writemask = WRITEMASK_Z; - emit_minmax(BRW_CONDITIONAL_GE, result, tmp_y, src_reg(0.0f)); - - src_reg clamped_y(result); - clamped_y.swizzle = BRW_SWIZZLE_ZZZZ; - - src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW); - - emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w); - } - emit(BRW_OPCODE_ENDIF); - } - break; - } - - case OPCODE_LOG: { - dst_reg result = dst; - result.type = BRW_REGISTER_TYPE_UD; - src_reg result_src = src_reg(result); - - src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_XXXX); - arg0_ud.type = BRW_REGISTER_TYPE_UD; - - /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt - * according to spec: - * - * These almost look likey they could be joined up, but not really - * practical: - * - * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127 - * result[1].i = (x.i & ((1<<23)-1) + (127<<23) - */ - if (vpi->DstReg.WriteMask & WRITEMASK_XZ) { - result.writemask = WRITEMASK_X; - emit(AND(result, arg0_ud, src_reg((1u << 31) - 1))); - emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u)); - src_reg result_d(result_src); - result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */ - result.type = BRW_REGISTER_TYPE_F; - emit(ADD(result, result_d, src_reg(-127))); - } - - if (vpi->DstReg.WriteMask & WRITEMASK_YZ) { - result.writemask = WRITEMASK_Y; - result.type = BRW_REGISTER_TYPE_UD; - emit(AND(result, arg0_ud, src_reg((1u << 23) - 1))); - emit(OR(result, result_src, src_reg(127u << 23))); - } - - if (vpi->DstReg.WriteMask & WRITEMASK_Z) { - /* result[2] = result[0] + LOG2(result[1]); */ - - /* Why bother? The above is just a hint how to do this with a - * taylor series. Maybe we *should* use a taylor series as by - * the time all the above has been done it's almost certainly - * quicker than calling the mathbox, even with low precision. - * - * Options are: - * - result[0] + mathbox.LOG2(result[1]) - * - mathbox.LOG2(arg0.x) - * - result[0] + inline_taylor_approx(result[1]) - */ - result.type = BRW_REGISTER_TYPE_F; - result.writemask = WRITEMASK_Z; - src_reg result_x(result), result_y(result), result_z(result); - result_x.swizzle = BRW_SWIZZLE_XXXX; - result_y.swizzle = BRW_SWIZZLE_YYYY; - result_z.swizzle = BRW_SWIZZLE_ZZZZ; - emit_math(SHADER_OPCODE_LOG2, result, result_y); - emit(ADD(result, result_z, result_x)); - } - - if (vpi->DstReg.WriteMask & WRITEMASK_W) { - result.type = BRW_REGISTER_TYPE_F; - result.writemask = WRITEMASK_W; - emit(MOV(result, src_reg(1.0f))); - } - break; - } - - case OPCODE_MAD: { - src_reg temp = src_reg(this, glsl_type::vec4_type); - emit(MUL(dst_reg(temp), src[0], src[1])); - emit(ADD(dst, temp, src[2])); - break; - } - - case OPCODE_MAX: - emit_minmax(BRW_CONDITIONAL_GE, dst, src[0], src[1]); - break; - - case OPCODE_MIN: - emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]); - break; - - case OPCODE_MOV: - emit(MOV(dst, src[0])); - break; - - case OPCODE_MUL: - emit(MUL(dst, src[0], src[1])); - break; - - case OPCODE_POW: - emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]); - break; - - case OPCODE_RCP: - emit_math(SHADER_OPCODE_RCP, dst, src[0]); - break; - - case OPCODE_RSQ: - emit_math(SHADER_OPCODE_RSQ, dst, src[0]); - break; - - case OPCODE_SGE: - emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one); - break; - - case OPCODE_SLT: - emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one); - break; - - case OPCODE_SUB: { - src_reg neg_src1 = src[1]; - neg_src1.negate = !src[1].negate; - emit(ADD(dst, src[0], neg_src1)); - break; - } - - case OPCODE_SWZ: - /* Note that SWZ's extended swizzles are handled in the general - * get_src_reg() code. - */ - emit(MOV(dst, src[0])); - break; - - case OPCODE_XPD: { - src_reg t1 = src_reg(this, glsl_type::vec4_type); - src_reg t2 = src_reg(this, glsl_type::vec4_type); - - emit(MUL(dst_reg(t1), - swizzle(src[0], BRW_SWIZZLE_YZXW), - swizzle(src[1], BRW_SWIZZLE_ZXYW))); - emit(MUL(dst_reg(t2), - swizzle(src[0], BRW_SWIZZLE_ZXYW), - swizzle(src[1], BRW_SWIZZLE_YZXW))); - t2.negate = true; - emit(ADD(dst, t1, t2)); - break; - } - - case OPCODE_END: - break; - - default: - assert(!"Unsupported opcode in vertex program"); - } - - /* Copy the temporary back into the actual destination register. */ - if (_mesa_num_inst_dst_regs(vpi->Opcode) != 0) { - emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst))); - } - } - - /* If we used relative addressing, we need to upload all constants as - * pull constants. Do that now. - */ - if (this->need_all_constants_in_pull_buffer) { - const struct gl_program_parameter_list *params = vp->Base.Parameters; - unsigned i; - for (i = 0; i < params->NumParameters * 4; i++) { - stage_prog_data->pull_param[i] = - ¶ms->ParameterValues[i / 4][i % 4]; - } - stage_prog_data->nr_pull_params = i; - } -} - -void -vec4_vs_visitor::setup_vp_regs() -{ - /* PROGRAM_TEMPORARY */ - int num_temp = prog->NumTemporaries; - vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp); - for (int i = 0; i < num_temp; i++) - vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type); - - /* PROGRAM_STATE_VAR etc. */ - struct gl_program_parameter_list *plist = vp->Base.Parameters; - for (unsigned p = 0; p < plist->NumParameters; p++) { - unsigned components = plist->Parameters[p].Size; - - /* Parameters should be either vec4 uniforms or single component - * constants; matrices and other larger types should have been broken - * down earlier. - */ - assert(components <= 4); - - this->uniform_size[this->uniforms] = 1; /* 1 vec4 */ - this->uniform_vector_size[this->uniforms] = components; - for (unsigned i = 0; i < 4; i++) { - stage_prog_data->param[this->uniforms * 4 + i] = i >= components - ? 0 : &plist->ParameterValues[p][i]; - } - this->uniforms++; /* counted in vec4 units */ - } - - /* PROGRAM_OUTPUT */ - for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) { - int varying = prog_data->vue_map.slot_to_varying[slot]; - if (varying == VARYING_SLOT_PSIZ) - output_reg[varying] = dst_reg(this, glsl_type::float_type); - else - output_reg[varying] = dst_reg(this, glsl_type::vec4_type); - assert(output_reg[varying].type == BRW_REGISTER_TYPE_F); - } - - /* PROGRAM_ADDRESS */ - this->vp_addr_reg = src_reg(this, glsl_type::int_type); - assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D); -} - -dst_reg -vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst) -{ - dst_reg result; - - assert(!dst.RelAddr); - - switch (dst.File) { - case PROGRAM_TEMPORARY: - result = dst_reg(vp_temp_regs[dst.Index]); - break; - - case PROGRAM_OUTPUT: - result = output_reg[dst.Index]; - break; - - case PROGRAM_ADDRESS: { - assert(dst.Index == 0); - result = dst_reg(this->vp_addr_reg); - break; - } - - case PROGRAM_UNDEFINED: - return dst_null_f(); - - default: - unreachable("vec4_vp: bad destination register file"); - } - - result.writemask = dst.WriteMask; - return result; -} - -src_reg -vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src) -{ - struct gl_program_parameter_list *plist = vp->Base.Parameters; - - src_reg result; - - assert(!src.Abs); - - switch (src.File) { - case PROGRAM_UNDEFINED: - return src_reg(brw_null_reg()); - - case PROGRAM_TEMPORARY: - result = vp_temp_regs[src.Index]; - break; - - case PROGRAM_INPUT: - result = src_reg(ATTR, src.Index, glsl_type::vec4_type); - result.type = BRW_REGISTER_TYPE_F; - break; - - case PROGRAM_ADDRESS: { - assert(src.Index == 0); - result = this->vp_addr_reg; - break; - } - - case PROGRAM_STATE_VAR: - case PROGRAM_CONSTANT: - /* From the ARB_vertex_program specification: - * "Relative addressing can only be used for accessing program - * parameter arrays." - */ - if (src.RelAddr) { - /* Since we have no idea what the base of the array is, we need to - * upload ALL constants as push constants. - */ - this->need_all_constants_in_pull_buffer = true; - - /* Add the small constant index to the address register */ - src_reg reladdr = src_reg(this, glsl_type::int_type); - - dst_reg dst_reladdr = dst_reg(reladdr); - dst_reladdr.writemask = WRITEMASK_X; - emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index))); - - if (devinfo->gen < 6) - emit(MUL(dst_reladdr, reladdr, src_reg(16))); - - #if 0 - assert(src.Index < this->uniforms); - result = src_reg(dst_reg(UNIFORM, 0)); - result.type = BRW_REGISTER_TYPE_F; - result.reladdr = new(mem_ctx) src_reg(); - memcpy(result.reladdr, &reladdr, sizeof(src_reg)); - #endif - - result = src_reg(this, glsl_type::vec4_type); - src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start)); - - emit_pull_constant_load_reg(dst_reg(result), - surf_index, - reladdr, - NULL, NULL /* before_block/inst */); - break; - } - - /* We actually want to look at the type in the Parameters list for this, - * because this lets us upload constant builtin uniforms as actual - * constants. - */ - switch (plist->Parameters[src.Index].Type) { - case PROGRAM_CONSTANT: - result = src_reg(this, glsl_type::vec4_type); - for (int i = 0; i < 4; i++) { - dst_reg t = dst_reg(result); - t.writemask = 1 << i; - emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f))); - } - break; - - case PROGRAM_STATE_VAR: - assert(src.Index < this->uniforms); - result = src_reg(dst_reg(UNIFORM, src.Index)); - result.type = BRW_REGISTER_TYPE_F; - break; - - default: - assert(!"Bad uniform in src register file"); - return src_reg(this, glsl_type::vec4_type); - } - break; - - default: - assert(!"Bad src register file"); - return src_reg(this, glsl_type::vec4_type); - } - - if (src.Swizzle != SWIZZLE_NOOP || src.Negate) { - unsigned short zeros_mask = 0; - unsigned short ones_mask = 0; - unsigned short src_mask = 0; - unsigned short src_swiz[4]; - - for (int i = 0; i < 4; i++) { - src_swiz[i] = 0; /* initialize for safety */ - - /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ, - * but it's simplest to handle it here. - */ - int s = GET_SWZ(src.Swizzle, i); - switch (s) { - case SWIZZLE_X: - case SWIZZLE_Y: - case SWIZZLE_Z: - case SWIZZLE_W: - src_mask |= 1 << i; - src_swiz[i] = s; - break; - case SWIZZLE_ZERO: - zeros_mask |= 1 << i; - break; - case SWIZZLE_ONE: - ones_mask |= 1 << i; - break; - } - } - - result.swizzle = - BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]); - - /* The hardware doesn't natively handle the SWZ instruction's zero/one - * swizzles or per-component negation, so we need to use a temporary. - */ - if (zeros_mask || ones_mask || src.Negate) { - src_reg temp_src(this, glsl_type::vec4_type); - dst_reg temp(temp_src); - - if (src_mask) { - temp.writemask = src_mask; - emit(MOV(temp, result)); - } - - if (zeros_mask) { - temp.writemask = zeros_mask; - emit(MOV(temp, src_reg(0.0f))); - } - - if (ones_mask) { - temp.writemask = ones_mask; - emit(MOV(temp, src_reg(1.0f))); - } - - if (src.Negate) { - temp.writemask = src.Negate; - src_reg neg(temp_src); - neg.negate = true; - emit(MOV(temp, neg)); - } - result = temp_src; - } - } - - return result; -} diff --git a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp index f4b50ba9d9f..b6e1971c2ee 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_vs_visitor.cpp @@ -262,7 +262,6 @@ vec4_vs_visitor::setup_uniform_clipplane_values() { for (int i = 0; i < key->nr_userclip_plane_consts; ++i) { assert(this->uniforms < uniform_array_size); - this->uniform_vector_size[this->uniforms] = 4; this->userplane[i] = dst_reg(UNIFORM, this->uniforms); this->userplane[i].type = BRW_REGISTER_TYPE_F; for (int j = 0; j < 4; ++j) { @@ -302,20 +301,15 @@ vec4_vs_visitor::vec4_vs_visitor(const struct brw_compiler *compiler, void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, - struct gl_vertex_program *vp, - struct gl_shader_program *prog, + nir_shader *shader, gl_clip_plane *clip_planes, void *mem_ctx, int shader_time_index, bool use_legacy_snorm_formula) - : vec4_visitor(compiler, log_data, - &vp->Base, &key->tex, &vs_prog_data->base, prog, - MESA_SHADER_VERTEX, - mem_ctx, false /* no_spills */, - shader_time_index), + : vec4_visitor(compiler, log_data, &key->tex, &vs_prog_data->base, shader, + mem_ctx, false /* no_spills */, shader_time_index), key(key), vs_prog_data(vs_prog_data), - vp(vp), clip_planes(clip_planes), use_legacy_snorm_formula(use_legacy_snorm_formula) { diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c index 0c60bde511e..3c6ee0a7a03 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.c +++ b/src/mesa/drivers/dri/i965/brw_vs.c @@ -37,6 +37,7 @@ #include "brw_state.h" #include "program/prog_print.h" #include "program/prog_parameter.h" +#include "brw_nir.h" #include "util/ralloc.h" @@ -65,27 +66,6 @@ gl_clip_plane *brw_select_clip_planes(struct gl_context *ctx) } } - -bool -brw_vs_prog_data_compare(const void *in_a, const void *in_b) -{ - const struct brw_vs_prog_data *a = in_a; - const struct brw_vs_prog_data *b = in_b; - - /* Compare the base structure. */ - if (!brw_stage_prog_data_compare(&a->base.base, &b->base.base)) - return false; - - /* Compare the rest of the struct. */ - const unsigned offset = sizeof(struct brw_stage_prog_data); - if (memcmp(((char *) a) + offset, ((char *) b) + offset, - sizeof(struct brw_vs_prog_data) - offset)) { - return false; - } - - return true; -} - bool brw_codegen_vs_prog(struct brw_context *brw, struct gl_shader_program *prog, @@ -102,6 +82,18 @@ brw_codegen_vs_prog(struct brw_context *brw, bool start_busy = false; double start_time = 0; + if (!vp->program.Base.nir) { + /* Normally we generate NIR in LinkShader() or + * ProgramStringNotify(), but Mesa's fixed-function vertex program + * handling doesn't notify the driver at all. Just do it here, at + * the last minute, even though it's lame. + */ + assert(vp->program.Base.Id == 0 && prog == NULL); + vp->program.Base.nir = + brw_create_nir(brw, NULL, &vp->program.Base, MESA_SHADER_VERTEX, + brw->intelScreen->compiler->scalar_vs); + } + if (prog) vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX]; @@ -113,22 +105,22 @@ brw_codegen_vs_prog(struct brw_context *brw, mem_ctx = ralloc_context(NULL); + brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX, + brw->intelScreen->devinfo, + prog, &vp->program.Base, + &prog_data.base.base, 0); + /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ - int param_count; - if (vs) { - /* We add padding around uniform values below vec4 size, with the worst - * case being a float value that gets blown up to a vec4, so be - * conservative here. - */ - param_count = vs->base.num_uniform_components * 4 + - vs->base.NumImages * BRW_IMAGE_PARAM_SIZE; - stage_prog_data->nr_image_params = vs->base.NumImages; - } else { - param_count = vp->program.Base.Parameters->NumParameters * 4; - } + int param_count = vp->program.Base.nir->num_uniforms; + if (!brw->intelScreen->compiler->scalar_vs) + param_count *= 4; + + if (vs) + prog_data.base.base.nr_image_params = vs->base.NumImages; + /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip * planes as uniforms. */ @@ -143,6 +135,15 @@ brw_codegen_vs_prog(struct brw_context *brw, stage_prog_data->nr_image_params); stage_prog_data->nr_params = param_count; + if (prog) { + brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base, + &prog_data.base.base, + brw->intelScreen->compiler->scalar_vs); + } else { + brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base, + &prog_data.base.base); + } + GLbitfield64 outputs_written = vp->program.Base.OutputsWritten; prog_data.inputs_read = vp->program.Base.InputsRead; diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h index 3a847fcd28a..96d2435a515 100644 --- a/src/mesa/drivers/dri/i965/brw_vs.h +++ b/src/mesa/drivers/dri/i965/brw_vs.h @@ -64,7 +64,6 @@ const unsigned *brw_vs_emit(struct brw_context *brw, void brw_vs_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, const struct brw_vs_prog_key *key); -bool brw_vs_prog_data_compare(const void *a, const void *b); void brw_upload_vs_prog(struct brw_context *brw); @@ -88,8 +87,7 @@ public: void *log_data, const struct brw_vs_prog_key *key, struct brw_vs_prog_data *vs_prog_data, - struct gl_vertex_program *vp, - struct gl_shader_program *prog, + nir_shader *shader, gl_clip_plane *clip_planes, void *mem_ctx, int shader_time_index, @@ -100,7 +98,6 @@ protected: const glsl_type *type); virtual void setup_payload(); virtual void emit_prolog(); - virtual void emit_program_code(); virtual void emit_thread_end(); virtual void emit_urb_write_header(int mrf); virtual void emit_urb_slot(dst_reg reg, int varying); @@ -116,7 +113,6 @@ private: const struct brw_vs_prog_key *const key; struct brw_vs_prog_data * const vs_prog_data; - struct gl_vertex_program *const vp; src_reg *vp_temp_regs; src_reg vp_addr_reg; diff --git a/src/mesa/drivers/dri/i965/brw_wm.c b/src/mesa/drivers/dri/i965/brw_wm.c index 1faf2eaa346..087bf5a53e1 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.c +++ b/src/mesa/drivers/dri/i965/brw_wm.c @@ -35,6 +35,7 @@ #include "program/prog_parameter.h" #include "program/program.h" #include "intel_mipmap_tree.h" +#include "brw_nir.h" #include "util/ralloc.h" @@ -131,23 +132,24 @@ computed_depth_mode(struct gl_fragment_program *fp) return BRW_PSCDEPTH_OFF; } -bool -brw_wm_prog_data_compare(const void *in_a, const void *in_b) +static void +assign_fs_binding_table_offsets(const struct brw_device_info *devinfo, + const struct gl_shader_program *shader_prog, + const struct gl_program *prog, + const struct brw_wm_prog_key *key, + struct brw_wm_prog_data *prog_data) { - const struct brw_wm_prog_data *a = in_a; - const struct brw_wm_prog_data *b = in_b; - - /* Compare the base structure. */ - if (!brw_stage_prog_data_compare(&a->base, &b->base)) - return false; + uint32_t next_binding_table_offset = 0; - /* Compare the rest of the structure. */ - const unsigned offset = sizeof(struct brw_stage_prog_data); - if (memcmp(((char *) a) + offset, ((char *) b) + offset, - sizeof(struct brw_wm_prog_data) - offset)) - return false; + /* If there are no color regions, we still perform an FB write to a null + * renderbuffer, which we place at surface index 0. + */ + prog_data->binding_table.render_target_start = next_binding_table_offset; + next_binding_table_offset += MAX2(key->nr_color_regions, 1); - return true; + brw_assign_common_binding_table_offsets(MESA_SHADER_FRAGMENT, devinfo, + shader_prog, prog, &prog_data->base, + next_binding_table_offset); } /** @@ -188,18 +190,16 @@ brw_codegen_wm_prog(struct brw_context *brw, if (!prog) prog_data.base.use_alt_mode = true; + assign_fs_binding_table_offsets(brw->intelScreen->devinfo, prog, + &fp->program.Base, key, &prog_data); + /* Allocate the references to the uniforms that will end up in the * prog_data associated with the compiled program, and which will be freed * by the state cache. */ - int param_count; - if (fs) { - param_count = fs->base.num_uniform_components + - fs->base.NumImages * BRW_IMAGE_PARAM_SIZE; + int param_count = fp->program.Base.nir->num_uniforms; + if (fs) prog_data.base.nr_image_params = fs->base.NumImages; - } else { - param_count = fp->program.Base.Parameters->NumParameters * 4; - } /* The backend also sometimes adds params for texture size. */ param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; prog_data.base.param = @@ -211,6 +211,14 @@ brw_codegen_wm_prog(struct brw_context *brw, prog_data.base.nr_image_params); prog_data.base.nr_params = param_count; + if (prog) { + brw_nir_setup_glsl_uniforms(fp->program.Base.nir, prog, &fp->program.Base, + &prog_data.base, true); + } else { + brw_nir_setup_arb_uniforms(fp->program.Base.nir, &fp->program.Base, + &prog_data.base); + } + prog_data.barycentric_interp_modes = brw_compute_barycentric_interp_modes(brw, key->flat_shade, key->persample_shading, diff --git a/src/mesa/drivers/dri/i965/brw_wm.h b/src/mesa/drivers/dri/i965/brw_wm.h index 73a741f89e4..667edf2eddf 100644 --- a/src/mesa/drivers/dri/i965/brw_wm.h +++ b/src/mesa/drivers/dri/i965/brw_wm.h @@ -85,7 +85,6 @@ bool brw_codegen_wm_prog(struct brw_context *brw, void brw_wm_debug_recompile(struct brw_context *brw, struct gl_shader_program *prog, const struct brw_wm_prog_key *key); -bool brw_wm_prog_data_compare(const void *a, const void *b); void brw_upload_wm_prog(struct brw_context *brw); diff --git a/src/mesa/drivers/dri/i965/brw_wm_iz.cpp b/src/mesa/drivers/dri/i965/brw_wm_iz.cpp index 14930eb0184..6f22f294476 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_iz.cpp +++ b/src/mesa/drivers/dri/i965/brw_wm_iz.cpp @@ -124,12 +124,11 @@ void fs_visitor::setup_payload_gen4() { assert(stage == MESA_SHADER_FRAGMENT); brw_wm_prog_key *key = (brw_wm_prog_key*) this->key; - gl_fragment_program *fp = (gl_fragment_program*) prog; GLuint reg = 2; bool kill_stats_promoted_workaround = false; int lookup = key->iz_lookup; bool uses_depth = - (fp->Base.InputsRead & (1 << VARYING_SLOT_POS)) != 0; + (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0; assert(lookup < IZ_BIT_MAX); diff --git a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c index c9316963840..c671e23827e 100644 --- a/src/mesa/drivers/dri/i965/brw_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @@ -380,7 +380,7 @@ brw_update_texture_surface(struct gl_context *ctx, surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) | SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD)); - surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0; + surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0; /* Emit relocation to surface contents */ drm_intel_bo_emit_reloc(brw->batch.bo, @@ -718,7 +718,7 @@ brw_update_renderbuffer_surface(struct brw_context *brw, assert(tile_y % 2 == 0); surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | - (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); + (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); if (brw->gen < 6) { /* _NEW_COLOR */ @@ -1001,6 +1001,32 @@ const struct brw_tracked_state brw_wm_ubo_surfaces = { .emit = brw_upload_wm_ubo_surfaces, }; +static void +brw_upload_cs_ubo_surfaces(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + /* _NEW_PROGRAM */ + struct gl_shader_program *prog = + ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE]; + + if (!prog) + return; + + /* BRW_NEW_CS_PROG_DATA */ + brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE], + &brw->cs.base, &brw->cs.prog_data->base, true); +} + +const struct brw_tracked_state brw_cs_ubo_surfaces = { + .dirty = { + .mesa = _NEW_PROGRAM, + .brw = BRW_NEW_BATCH | + BRW_NEW_CS_PROG_DATA | + BRW_NEW_UNIFORM_BUFFER, + }, + .emit = brw_upload_cs_ubo_surfaces, +}; + void brw_upload_abo_surfaces(struct brw_context *brw, struct gl_shader_program *prog, diff --git a/src/mesa/drivers/dri/i965/gen6_blorp.cpp b/src/mesa/drivers/dri/i965/gen6_blorp.cpp index cba5c2f456a..74f3a70a64e 100644 --- a/src/mesa/drivers/dri/i965/gen6_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen6_blorp.cpp @@ -413,7 +413,7 @@ gen6_blorp_emit_surface_state(struct brw_context *brw, assert(tile_y % 2 == 0); surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT | (tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT | - (surface->mt->align_h == 4 ? + (surface->mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0)); /* Emit relocation to surface contents */ diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp index 4c9c96028f8..def21d80b24 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.cpp @@ -35,15 +35,6 @@ const unsigned MAX_GS_INPUT_VERTICES = 6; namespace brw { -void -gen6_gs_visitor::assign_binding_table_offsets() -{ - /* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform - * feedback surfaces. - */ - assign_common_binding_table_offsets(BRW_MAX_SOL_BINDINGS); -} - void gen6_gs_visitor::emit_prolog() { @@ -146,27 +137,6 @@ gen6_gs_visitor::emit_prolog() } } -void -gen6_gs_visitor::visit(ir_emit_vertex *ir) -{ - /* To ensure that we don't output more vertices than the shader specified - * using max_vertices, do the logic inside a conditional of the form "if - * (vertex_count < MAX)" - */ - unsigned num_output_vertices = c->gp->program.VerticesOut; - emit(CMP(dst_null_d(), this->vertex_count, - src_reg(num_output_vertices), BRW_CONDITIONAL_L)); - emit(IF(BRW_PREDICATE_NORMAL)); - - gs_emit_vertex(ir->stream_id()); - - this->current_annotation = "emit vertex: increment vertex count"; - emit(ADD(dst_reg(this->vertex_count), this->vertex_count, - src_reg(1u))); - - emit(BRW_OPCODE_ENDIF); -} - void gen6_gs_visitor::gs_emit_vertex(int stream_id) { @@ -230,12 +200,6 @@ gen6_gs_visitor::gs_emit_vertex(int stream_id) this->vertex_output_offset, 1u)); } -void -gen6_gs_visitor::visit(ir_end_primitive *) -{ - gs_end_primitive(); -} - void gen6_gs_visitor::gs_end_primitive() { @@ -356,9 +320,7 @@ gen6_gs_visitor::emit_thread_end() if (c->gp->program.OutputType != GL_POINTS) { emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z)); emit(IF(BRW_PREDICATE_NORMAL)); - { - visit((ir_end_primitive *) NULL); - } + gs_end_primitive(); emit(BRW_OPCODE_ENDIF); } diff --git a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h index 4cf94893261..41c6d183acd 100644 --- a/src/mesa/drivers/dri/i965/gen6_gs_visitor.h +++ b/src/mesa/drivers/dri/i965/gen6_gs_visitor.h @@ -39,18 +39,16 @@ public: void *log_data, struct brw_gs_compile *c, struct gl_shader_program *prog, + nir_shader *shader, void *mem_ctx, bool no_spills, int shader_time_index) : - vec4_gs_visitor(comp, log_data, c, prog, mem_ctx, no_spills, + vec4_gs_visitor(comp, log_data, c, prog, shader, mem_ctx, no_spills, shader_time_index) {} protected: - virtual void assign_binding_table_offsets(); virtual void emit_prolog(); virtual void emit_thread_end(); - virtual void visit(ir_emit_vertex *); - virtual void visit(ir_end_primitive *); virtual void gs_emit_vertex(int stream_id); virtual void gs_end_primitive(); virtual void emit_urb_write_header(int mrf); diff --git a/src/mesa/drivers/dri/i965/gen6_surface_state.c b/src/mesa/drivers/dri/i965/gen6_surface_state.c index 39de62f2304..d892c932af4 100644 --- a/src/mesa/drivers/dri/i965/gen6_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen6_surface_state.c @@ -126,7 +126,7 @@ gen6_update_renderbuffer_surface(struct brw_context *brw, SET_FIELD(min_array_element, BRW_SURFACE_MIN_ARRAY_ELEMENT) | SET_FIELD(depth - 1, BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT); - surf[5] = (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0); + surf[5] = (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0); drm_intel_bo_emit_reloc(brw->batch.bo, offset + 4, diff --git a/src/mesa/drivers/dri/i965/gen7_blorp.cpp b/src/mesa/drivers/dri/i965/gen7_blorp.cpp index 9822dc1fe79..f90e78e43c7 100644 --- a/src/mesa/drivers/dri/i965/gen7_blorp.cpp +++ b/src/mesa/drivers/dri/i965/gen7_blorp.cpp @@ -158,9 +158,9 @@ gen7_blorp_emit_surface_state(struct brw_context *brw, surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT | gen7_surface_tiling_mode(tiling); - if (surface->mt->align_h == 4) + if (surface->mt->valign == 4) surf[0] |= GEN7_SURFACE_VALIGN_4; - if (surface->mt->align_w == 8) + if (surface->mt->halign == 8) surf[0] |= GEN7_SURFACE_HALIGN_8; if (surface->array_layout == ALL_SLICES_AT_EACH_LOD) diff --git a/src/mesa/drivers/dri/i965/gen7_cs_state.c b/src/mesa/drivers/dri/i965/gen7_cs_state.c index 0b88b2c0e71..5edc4fc9842 100644 --- a/src/mesa/drivers/dri/i965/gen7_cs_state.c +++ b/src/mesa/drivers/dri/i965/gen7_cs_state.c @@ -72,7 +72,7 @@ brw_upload_cs_state(struct brw_context *brw) if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { local_id_dwords = - brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size); + brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size); } unsigned push_constant_data_size = @@ -216,8 +216,7 @@ const struct brw_tracked_state brw_cs_state = { * */ unsigned -brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog, - unsigned dispatch_width) +brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width) { return 3 * dispatch_width; } @@ -272,7 +271,7 @@ brw_upload_cs_push_constants(struct brw_context *brw, if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) { local_id_dwords = - brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size); + brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size); } /* Updates the ParamaterValues[i] pointers for all parameters of the diff --git a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c index 15ab2b0eae3..5080f1c3fe4 100644 --- a/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen7_wm_surface_state.c @@ -288,9 +288,9 @@ gen7_emit_texture_surface_state(struct brw_context *brw, if (target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_CUBE_MAP_ARRAY) surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES; - if (mt->align_h == 4) + if (mt->valign == 4) surf[0] |= GEN7_SURFACE_VALIGN_4; - if (mt->align_w == 8) + if (mt->halign == 8) surf[0] |= GEN7_SURFACE_HALIGN_8; if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP) @@ -509,9 +509,9 @@ gen7_update_renderbuffer_surface(struct brw_context *brw, GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL) | gen7_surface_tiling_mode(mt->tiling); - if (irb->mt->align_h == 4) + if (irb->mt->valign == 4) surf[0] |= GEN7_SURFACE_VALIGN_4; - if (irb->mt->align_w == 8) + if (irb->mt->halign == 8) surf[0] |= GEN7_SURFACE_HALIGN_8; if (is_array) { diff --git a/src/mesa/drivers/dri/i965/gen8_surface_state.c b/src/mesa/drivers/dri/i965/gen8_surface_state.c index c5f1bae8ecb..e1e7704655d 100644 --- a/src/mesa/drivers/dri/i965/gen8_surface_state.c +++ b/src/mesa/drivers/dri/i965/gen8_surface_state.c @@ -95,7 +95,7 @@ vertical_alignment(const struct brw_context *brw, surf_type == BRW_SURFACE_1D)) return GEN8_SURFACE_VALIGN_4; - switch (mt->align_h) { + switch (mt->valign) { case 4: return GEN8_SURFACE_VALIGN_4; case 8: @@ -120,7 +120,7 @@ horizontal_alignment(const struct brw_context *brw, gen9_use_linear_1d_layout(brw, mt))) return GEN8_SURFACE_HALIGN_4; - switch (mt->align_w) { + switch (mt->halign) { case 4: return GEN8_SURFACE_HALIGN_4; case 8: @@ -221,8 +221,8 @@ gen8_emit_texture_surface_state(struct brw_context *brw, * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN * 16 must be used." */ - assert(brw->gen < 9 || mt->align_w == 16); - assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16); + assert(brw->gen < 9 || mt->halign == 16); + assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16); } const uint32_t surf_type = translate_tex_target(target); @@ -470,8 +470,8 @@ gen8_update_renderbuffer_surface(struct brw_context *brw, * "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN * 16 must be used." */ - assert(brw->gen < 9 || mt->align_w == 16); - assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16); + assert(brw->gen < 9 || mt->halign == 16); + assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16); } uint32_t *surf = allocate_surface_state(brw, &offset, surf_index); diff --git a/src/mesa/drivers/dri/i965/intel_copy_image.c b/src/mesa/drivers/dri/i965/intel_copy_image.c index d57651cef5f..0a3337ee285 100644 --- a/src/mesa/drivers/dri/i965/intel_copy_image.c +++ b/src/mesa/drivers/dri/i965/intel_copy_image.c @@ -235,7 +235,7 @@ intel_copy_image_sub_data(struct gl_context *ctx, } else { assert(dst_renderbuffer); dst_mt = intel_renderbuffer(dst_renderbuffer)->mt; - src_image = src_renderbuffer->TexImage; + dst_image = dst_renderbuffer->TexImage; } if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) { diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index 9c0304c7684..ffc356c9240 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -694,7 +694,7 @@ intel_miptree_create(struct brw_context *brw, if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) && intel_miptree_is_fast_clear_capable(brw, mt)) { mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED; - assert(brw->gen < 8 || mt->align_w == 16 || num_samples <= 1); + assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1); } return mt; @@ -2168,16 +2168,18 @@ intel_miptree_map_blit(struct brw_context *brw, struct intel_miptree_map *map, unsigned int level, unsigned int slice) { - map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format, - 0, 0, - map->w, map->h, 1, - 0, MIPTREE_LAYOUT_TILING_NONE); + map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format, + /* first_level */ 0, + /* last_level */ 0, + map->w, map->h, 1, + /* samples */ 0, + MIPTREE_LAYOUT_TILING_NONE); - if (!map->mt) { + if (!map->linear_mt) { fprintf(stderr, "Failed to allocate blit temporary\n"); goto fail; } - map->stride = map->mt->pitch; + map->stride = map->linear_mt->pitch; /* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no * INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless @@ -2188,7 +2190,7 @@ intel_miptree_map_blit(struct brw_context *brw, if (!intel_miptree_blit(brw, mt, level, slice, map->x, map->y, false, - map->mt, 0, 0, + map->linear_mt, 0, 0, 0, 0, false, map->w, map->h, GL_COPY)) { fprintf(stderr, "Failed to blit\n"); @@ -2196,7 +2198,7 @@ intel_miptree_map_blit(struct brw_context *brw, } } - map->ptr = intel_miptree_map_raw(brw, map->mt); + map->ptr = intel_miptree_map_raw(brw, map->linear_mt); DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__, map->x, map->y, map->w, map->h, @@ -2206,7 +2208,7 @@ intel_miptree_map_blit(struct brw_context *brw, return; fail: - intel_miptree_release(&map->mt); + intel_miptree_release(&map->linear_mt); map->ptr = NULL; map->stride = 0; } @@ -2220,11 +2222,11 @@ intel_miptree_unmap_blit(struct brw_context *brw, { struct gl_context *ctx = &brw->ctx; - intel_miptree_unmap_raw(map->mt); + intel_miptree_unmap_raw(map->linear_mt); if (map->mode & GL_MAP_WRITE_BIT) { bool ok = intel_miptree_blit(brw, - map->mt, 0, 0, + map->linear_mt, 0, 0, 0, 0, false, mt, level, slice, map->x, map->y, false, @@ -2232,7 +2234,7 @@ intel_miptree_unmap_blit(struct brw_context *brw, WARN_ONCE(!ok, "Failed to blit from linear temporary mapping"); } - intel_miptree_release(&map->mt); + intel_miptree_release(&map->linear_mt); } /** @@ -2756,7 +2758,7 @@ intel_miptree_unmap(struct brw_context *brw, intel_miptree_unmap_etc(brw, mt, map, level, slice); } else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) { intel_miptree_unmap_depthstencil(brw, mt, map, level, slice); - } else if (map->mt) { + } else if (map->linear_mt) { intel_miptree_unmap_blit(brw, mt, map, level, slice); #if defined(USE_SSE41) } else if (map->buffer && cpu_has_sse4_1) { diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h index 7610d754451..486e5c6f43b 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.h +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.h @@ -62,9 +62,11 @@ struct intel_resolve_map; struct intel_texture_image; /** + * This bit extends the set of GL_MAP_*_BIT enums. + * * When calling intel_miptree_map() on an ETC-transcoded-to-RGB miptree or a * depthstencil-split-to-separate-stencil miptree, we'll normally make a - * tmeporary and recreate the kind of data requested by Mesa core, since we're + * temporary and recreate the kind of data requested by Mesa core, since we're * satisfying some glGetTexImage() request or something. * * However, occasionally you want to actually map the miptree's current data @@ -73,14 +75,14 @@ struct intel_texture_image; #define BRW_MAP_DIRECT_BIT 0x80000000 struct intel_miptree_map { - /** Bitfield of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_BIT */ + /** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */ GLbitfield mode; /** Region of interest for the map. */ int x, y, w, h; /** Possibly malloced temporary buffer for the mapping. */ void *buffer; /** Possible pointer to a temporary linear miptree for the mapping. */ - struct intel_mipmap_tree *mt; + struct intel_mipmap_tree *linear_mt; /** Pointer to the start of (map_x, map_y) returned by the mapping. */ void *ptr; /** Stride of the mapping. */ @@ -140,6 +142,9 @@ struct intel_mipmap_level * \code * x = mt->level[l].slice[s].x_offset * y = mt->level[l].slice[s].y_offset + * + * On some hardware generations, we program these offsets into + * RENDER_SURFACE_STATE.XOffset and RENDER_SURFACE_STATE.YOffset. */ GLuint x_offset; GLuint y_offset; @@ -168,12 +173,16 @@ enum intel_msaa_layout * accommodated by scaling up the width and the height of the surface so * that all the samples corresponding to a pixel are located at nearby * memory locations. + * + * @see PRM section "Interleaved Multisampled Surfaces" */ INTEL_MSAA_LAYOUT_IMS, /** * Uncompressed Multisample Surface. The surface is stored as a 2D array, * with array slice n containing all pixel data for sample n. + * + * @see PRM section "Uncompressed Multisampled Surfaces" */ INTEL_MSAA_LAYOUT_UMS, @@ -185,6 +194,8 @@ enum intel_msaa_layout * the common case (where all samples constituting a pixel have the same * color value) to be stored efficiently by just using a single array * slice. + * + * @see PRM section "Compressed Multisampled Surfaces" */ INTEL_MSAA_LAYOUT_CMS, }; @@ -318,14 +329,34 @@ enum miptree_array_layout { */ struct intel_miptree_aux_buffer { - /** Buffer object containing the pixel data. */ + /** + * Buffer object containing the pixel data. + * + * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress + * @see 3DSTATE_HIER_DEPTH_BUFFER.AuxiliarySurfaceBaseAddress + */ drm_intel_bo *bo; - uint32_t pitch; /**< pitch in bytes. */ + /** + * Pitch in bytes. + * + * @see RENDER_SURFACE_STATE.AuxiliarySurfacePitch + * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfacePitch + */ + uint32_t pitch; - uint32_t qpitch; /**< The distance in rows between array slices. */ + /** + * The distance in rows between array slices. + * + * @see RENDER_SURFACE_STATE.AuxiliarySurfaceQPitch + * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch + */ + uint32_t qpitch; - struct intel_mipmap_tree *mt; /**< hiz miptree used with Gen6 */ + /** + * Hiz miptree. Used only by Gen6. + */ + struct intel_mipmap_tree *mt; }; /* Tile resource modes */ @@ -337,15 +368,49 @@ enum intel_miptree_tr_mode { struct intel_mipmap_tree { - /** Buffer object containing the pixel data. */ + /** + * Buffer object containing the surface. + * + * @see intel_mipmap_tree::offset + * @see RENDER_SURFACE_STATE.SurfaceBaseAddress + * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress + * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress + * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress + * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress + */ drm_intel_bo *bo; - uint32_t pitch; /**< pitch in bytes. */ + /** + * Pitch in bytes. + * + * @see RENDER_SURFACE_STATE.SurfacePitch + * @see RENDER_SURFACE_STATE.AuxiliarySurfacePitch + * @see 3DSTATE_DEPTH_BUFFER.SurfacePitch + * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfacePitch + * @see 3DSTATE_STENCIL_BUFFER.SurfacePitch + */ + uint32_t pitch; + + /** + * One of the I915_TILING_* flags. + * + * @see RENDER_SURFACE_STATE.TileMode + * @see 3DSTATE_DEPTH_BUFFER.TileMode + */ + uint32_t tiling; - uint32_t tiling; /**< One of the I915_TILING_* flags */ + /** + * @see RENDER_SURFACE_STATE.TiledResourceMode + * @see 3DSTATE_DEPTH_BUFFER.TiledResourceMode + */ enum intel_miptree_tr_mode tr_mode; - /* Effectively the key: + /** + * @brief One of GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, etc. + * + * @see RENDER_SURFACE_STATE.SurfaceType + * @see RENDER_SURFACE_STATE.SurfaceArray + * @see 3DSTATE_DEPTH_BUFFER.SurfaceType */ GLenum target; @@ -362,18 +427,43 @@ struct intel_mipmap_tree * * For ETC1/ETC2 textures, this is one of the uncompressed mesa texture * formats if the hardware lacks support for ETC1/ETC2. See @ref etc_format. + * + * @see RENDER_SURFACE_STATE.SurfaceFormat + * @see 3DSTATE_DEPTH_BUFFER.SurfaceFormat */ mesa_format format; - /** This variable stores the value of ETC compressed texture format */ + /** + * This variable stores the value of ETC compressed texture format + * + * @see RENDER_SURFACE_STATE.SurfaceFormat + */ mesa_format etc_format; /** - * The X offset of each image in the miptree must be aligned to this. - * See the comments in brw_tex_layout.c. + * @name Surface Alignment + * @{ + * + * This defines the alignment of the upperleft pixel of each "slice" in the + * surface. The alignment is in pixel coordinates relative to the surface's + * most upperleft pixel, which is the pixel at (x=0, y=0, layer=0, + * level=0). + * + * The hardware docs do not use the term "slice". We use "slice" to mean + * the pixels at a given miplevel and layer. For 2D surfaces, the layer is + * the array slice; for 3D surfaces, the layer is the z offset. + * + * In the surface layout equations found in the hardware docs, the + * horizontal and vertical surface alignments often appear as variables 'i' + * and 'j'. */ - unsigned int align_w; - unsigned int align_h; /**< \see align_w */ + + /** @see RENDER_SURFACE_STATE.SurfaceHorizontalAlignment */ + uint32_t halign; + + /** @see RENDER_SURFACE_STATE.SurfaceVerticalAlignment */ + uint32_t valign; + /** @} */ GLuint first_level; GLuint last_level; @@ -388,19 +478,47 @@ struct intel_mipmap_tree */ GLuint physical_width0, physical_height0, physical_depth0; - GLuint cpp; /**< bytes per pixel (or bytes per block if compressed) */ + /** Bytes per pixel (or bytes per block if compressed) */ + GLuint cpp; + + /** + * @see RENDER_SURFACE_STATE.NumberOfMultisamples + * @see 3DSTATE_MULTISAMPLE.NumberOfMultisamples + */ GLuint num_samples; + bool compressed; /** - * Level zero image dimensions. These dimensions correspond to the + * @name Level zero image dimensions + * @{ + * + * These dimensions correspond to the * logical width, height, and depth of the texture as seen by client code. * Accordingly, they do not account for the extra width, height, and/or * depth that must be allocated in order to accommodate multisample * formats, nor do they account for the extra factor of 6 in depth that * must be allocated in order to accommodate cubemap textures. */ - uint32_t logical_width0, logical_height0, logical_depth0; + + /** + * @see RENDER_SURFACE_STATE.Width + * @see 3DSTATE_DEPTH_BUFFER.Width + */ + uint32_t logical_width0; + + /** + * @see RENDER_SURFACE_STATE.Height + * @see 3DSTATE_DEPTH_BUFFER.Height + */ + uint32_t logical_height0; + + /** + * @see RENDER_SURFACE_STATE.Depth + * @see 3DSTATE_DEPTH_BUFFER.Depth + */ + uint32_t logical_depth0; + /** @} */ /** * Indicates if we use the standard miptree layout (ALL_LOD_IN_EACH_SLICE), @@ -417,11 +535,18 @@ struct intel_mipmap_tree * surfaces it is the number of blocks. For 1D array surfaces that have the * mipmap tree stored horizontally it is the number of pixels between each * slice. + * + * @see RENDER_SURFACE_STATE.SurfaceQPitch + * @see 3DSTATE_DEPTH_BUFFER.SurfaceQPitch + * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch + * @see 3DSTATE_STENCIL_BUFFER.SurfaceQPitch */ uint32_t qpitch; /** * MSAA layout used by this buffer. + * + * @see RENDER_SURFACE_STATE.MultisampledSurfaceStorageFormat */ enum intel_msaa_layout msaa_layout; @@ -430,24 +555,34 @@ struct intel_mipmap_tree GLuint total_width; GLuint total_height; - /* The 3DSTATE_CLEAR_PARAMS value associated with the last depth clear to - * this depth mipmap tree, if any. + /** + * The depth value used during the most recent fast depth clear performed + * on the surface. This field is invalid only if surface has never + * underwent a fast depth clear. + * + * @see 3DSTATE_CLEAR_PARAMS.DepthClearValue */ uint32_t depth_clear_value; - /* Includes image offset tables: - */ + /* Includes image offset tables: */ struct intel_mipmap_level level[MAX_TEXTURE_LEVELS]; - /* Offset into bo where miptree starts: + /** + * Offset into bo where the surface starts. + * + * @see intel_mipmap_tree::bo + * + * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress + * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress + * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress + * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress */ uint32_t offset; /** * \brief HiZ aux buffer * - * The hiz miptree contains the miptree's hiz buffer. To allocate the hiz - * buffer, use intel_miptree_alloc_hiz(). + * To allocate the hiz buffer, use intel_miptree_alloc_hiz(). * * To determine if hiz is enabled, do not check this pointer. Instead, use * intel_miptree_slice_has_hiz(). @@ -472,6 +607,7 @@ struct intel_mipmap_tree * require separate stencil. It always has the true copy of the stencil * bits, regardless of mt->format. * + * \see 3DSTATE_STENCIL_BUFFER * \see intel_miptree_map_depthstencil() * \see intel_miptree_unmap_depthstencil() */ @@ -499,6 +635,11 @@ struct intel_mipmap_tree * * This value will only ever contain ones in bits 28-31, so it is safe to * OR into dword 7 of SURFACE_STATE. + * + * @see RENDER_SURFACE_STATE.RedClearColor + * @see RENDER_SURFACE_STATE.GreenClearColor + * @see RENDER_SURFACE_STATE.BlueClearColor + * @see RENDER_SURFACE_STATE.AlphaClearColor */ uint32_t fast_clear_color_value; diff --git a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp index ba67bc59e19..8adb626d420 100644 --- a/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_cmod_propagation.cpp @@ -46,10 +46,10 @@ class cmod_propagation_fs_visitor : public fs_visitor public: cmod_propagation_fs_visitor(struct brw_compiler *compiler, struct brw_wm_prog_data *prog_data, - struct gl_shader_program *shader_prog) - : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL, - &prog_data->base, shader_prog, - (struct gl_program *) NULL, 8, -1) {} + nir_shader *shader) + : fs_visitor(compiler, NULL, NULL, NULL, + &prog_data->base, (struct gl_program *) NULL, + shader, 8, -1) {} }; @@ -62,9 +62,9 @@ void cmod_propagation_test::SetUp() fp = ralloc(NULL, struct brw_fragment_program); prog_data = ralloc(NULL, struct brw_wm_prog_data); - shader_prog = ralloc(NULL, struct gl_shader_program); + nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL); - v = new cmod_propagation_fs_visitor(compiler, prog_data, shader_prog); + v = new cmod_propagation_fs_visitor(compiler, prog_data, shader); _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); diff --git a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp index 1caa0b50ec6..f77b18e7db8 100644 --- a/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_fs_saturate_propagation.cpp @@ -46,10 +46,10 @@ class saturate_propagation_fs_visitor : public fs_visitor public: saturate_propagation_fs_visitor(struct brw_compiler *compiler, struct brw_wm_prog_data *prog_data, - struct gl_shader_program *shader_prog) - : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL, - &prog_data->base, shader_prog, - (struct gl_program *) NULL, 8, -1) {} + nir_shader *shader) + : fs_visitor(compiler, NULL, NULL, NULL, + &prog_data->base, (struct gl_program *) NULL, + shader, 8, -1) {} }; @@ -62,9 +62,9 @@ void saturate_propagation_test::SetUp() fp = ralloc(NULL, struct brw_fragment_program); prog_data = ralloc(NULL, struct brw_wm_prog_data); - shader_prog = ralloc(NULL, struct gl_shader_program); + nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL); - v = new saturate_propagation_fs_visitor(compiler, prog_data, shader_prog); + v = new saturate_propagation_fs_visitor(compiler, prog_data, shader); _mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0); diff --git a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp index fbd9fa8f19b..40253961a65 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_copy_propagation.cpp @@ -45,9 +45,8 @@ class copy_propagation_vec4_visitor : public vec4_visitor { public: copy_propagation_vec4_visitor(struct brw_compiler *compiler, - struct gl_shader_program *shader_prog) - : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog, - MESA_SHADER_VERTEX, NULL, + nir_shader *shader) + : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL, false /* no_spills */, -1) { } @@ -69,11 +68,6 @@ protected: unreachable("Not reached"); } - virtual void emit_program_code() - { - unreachable("Not reached"); - } - virtual void emit_thread_end() { unreachable("Not reached"); @@ -100,9 +94,9 @@ void copy_propagation_test::SetUp() vp = ralloc(NULL, struct brw_vertex_program); - shader_prog = ralloc(NULL, struct gl_shader_program); + nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL); - v = new copy_propagation_vec4_visitor(compiler, shader_prog); + v = new copy_propagation_vec4_visitor(compiler, shader); _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); diff --git a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp index a3055fcc851..76028d36311 100644 --- a/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp +++ b/src/mesa/drivers/dri/i965/test_vec4_register_coalesce.cpp @@ -48,9 +48,8 @@ class register_coalesce_vec4_visitor : public vec4_visitor { public: register_coalesce_vec4_visitor(struct brw_compiler *compiler, - struct gl_shader_program *shader_prog) - : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog, - MESA_SHADER_VERTEX, NULL, + nir_shader *shader) + : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL, false /* no_spills */, -1) { } @@ -72,11 +71,6 @@ protected: unreachable("Not reached"); } - virtual void emit_program_code() - { - unreachable("Not reached"); - } - virtual void emit_thread_end() { unreachable("Not reached"); @@ -103,9 +97,9 @@ void register_coalesce_test::SetUp() vp = ralloc(NULL, struct brw_vertex_program); - shader_prog = ralloc(NULL, struct gl_shader_program); + nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL); - v = new register_coalesce_vec4_visitor(compiler, shader_prog); + v = new register_coalesce_vec4_visitor(compiler, shader); _mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0); diff --git a/src/mesa/main/compute.c b/src/mesa/main/compute.c index 8bc3bcd25a0..53e7a500f61 100644 --- a/src/mesa/main/compute.c +++ b/src/mesa/main/compute.c @@ -50,7 +50,7 @@ _mesa_DispatchComputeIndirect(GLintptr indirect) GET_CURRENT_CONTEXT(ctx); if (MESA_VERBOSE & VERBOSE_API) - _mesa_debug(ctx, "glDispatchComputeIndirect(%d)\n", indirect); + _mesa_debug(ctx, "glDispatchComputeIndirect(%ld)\n", (long) indirect); if (!_mesa_validate_DispatchComputeIndirect(ctx, indirect)) return; diff --git a/src/mesa/main/ff_fragment_shader.cpp b/src/mesa/main/ff_fragment_shader.cpp index c6828925f5e..c0030bc5687 100644 --- a/src/mesa/main/ff_fragment_shader.cpp +++ b/src/mesa/main/ff_fragment_shader.cpp @@ -27,29 +27,27 @@ * **************************************************************************/ -#include "glheader.h" -#include "imports.h" -#include "mtypes.h" +#include "main/glheader.h" #include "main/context.h" +#include "main/imports.h" #include "main/macros.h" #include "main/samplerobj.h" #include "main/texenvprogram.h" #include "main/texobj.h" #include "main/uniforms.h" +#include "glsl/ir_builder.h" +#include "glsl/ir_optimization.h" +#include "glsl/glsl_parser_extras.h" +#include "glsl/glsl_symbol_table.h" +#include "glsl/glsl_types.h" +#include "program/ir_to_mesa.h" #include "program/program.h" -#include "program/prog_parameter.h" +#include "program/programopt.h" #include "program/prog_cache.h" #include "program/prog_instruction.h" +#include "program/prog_parameter.h" #include "program/prog_print.h" #include "program/prog_statevars.h" -#include "program/programopt.h" -#include "../glsl/glsl_types.h" -#include "../glsl/ir.h" -#include "../glsl/ir_builder.h" -#include "../glsl/glsl_symbol_table.h" -#include "../glsl/glsl_parser_extras.h" -#include "../glsl/ir_optimization.h" -#include "../program/ir_to_mesa.h" using namespace ir_builder; diff --git a/src/mesa/main/ffvertex_prog.c b/src/mesa/main/ffvertex_prog.c index 95b428dca3e..a6183b47e2e 100644 --- a/src/mesa/main/ffvertex_prog.c +++ b/src/mesa/main/ffvertex_prog.c @@ -293,9 +293,10 @@ struct ureg { GLuint file:4; GLint idx:9; /* relative addressing may be negative */ /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ + GLuint abs:1; GLuint negate:1; GLuint swz:12; - GLuint pad:6; + GLuint pad:5; }; @@ -324,6 +325,7 @@ static const struct ureg undef = { 0, 0, 0, + 0, 0 }; @@ -342,6 +344,7 @@ static struct ureg make_ureg(GLuint file, GLint idx) struct ureg reg; reg.file = file; reg.idx = idx; + reg.abs = 0; reg.negate = 0; reg.swz = SWIZZLE_NOOP; reg.pad = 0; @@ -350,6 +353,14 @@ static struct ureg make_ureg(GLuint file, GLint idx) +static struct ureg absolute( struct ureg reg ) +{ + reg.abs = 1; + reg.negate = 0; + return reg; +} + + static struct ureg negate( struct ureg reg ) { reg.negate ^= 1; @@ -526,8 +537,8 @@ static void emit_arg( struct prog_src_register *src, src->File = reg.file; src->Index = reg.idx; src->Swizzle = reg.swz; + src->Abs = reg.abs; src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; - src->Abs = 0; src->RelAddr = 0; /* Check that bitfield sizes aren't exceeded */ assert(src->Index == reg.idx); @@ -953,7 +964,7 @@ static struct ureg calculate_light_attenuation( struct tnl_program *p, emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); - emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); + emit_op2(p, OPCODE_POW, spot, 0, absolute(spot), swizzle1(attenuation, W)); emit_op2(p, OPCODE_MUL, att, 0, slt, spot); release_temp(p, spot); diff --git a/src/mesa/main/imports.h b/src/mesa/main/imports.h index d61279ac4e5..90247587be3 100644 --- a/src/mesa/main/imports.h +++ b/src/mesa/main/imports.h @@ -423,6 +423,9 @@ _mesa_vsnprintf(char *str, size_t size, const char *fmt, va_list arg); #define snprintf _snprintf #endif +#if defined(_WIN32) && !defined(strtok_r) +#define strtok_r strtok_s +#endif #ifdef __cplusplus } diff --git a/src/mesa/main/pipelineobj.c b/src/mesa/main/pipelineobj.c index c2e1d29ad80..51ee10ff858 100644 --- a/src/mesa/main/pipelineobj.c +++ b/src/mesa/main/pipelineobj.c @@ -31,6 +31,7 @@ * GL_ARB_separate_shader_objects extension. */ +#include #include "main/glheader.h" #include "main/context.h" #include "main/dispatch.h" @@ -42,12 +43,11 @@ #include "main/shaderobj.h" #include "main/transformfeedback.h" #include "main/uniforms.h" +#include "glsl/glsl_parser_extras.h" +#include "glsl/ir_uniform.h" #include "program/program.h" #include "program/prog_parameter.h" #include "util/ralloc.h" -#include -#include "../glsl/glsl_parser_extras.h" -#include "../glsl/ir_uniform.h" /** * Delete a pipeline object. diff --git a/src/mesa/main/shader_query.cpp b/src/mesa/main/shader_query.cpp index 73dee85cc4d..718967605b5 100644 --- a/src/mesa/main/shader_query.cpp +++ b/src/mesa/main/shader_query.cpp @@ -611,11 +611,10 @@ _mesa_program_resource_index(struct gl_shader_program *shProg, return GL_INVALID_INDEX; switch (res->Type) { - case GL_UNIFORM_BLOCK: - case GL_SHADER_STORAGE_BLOCK: - return RESOURCE_UBO(res)- shProg->UniformBlocks; case GL_ATOMIC_COUNTER_BUFFER: return RESOURCE_ATC(res) - shProg->AtomicBuffers; + case GL_UNIFORM_BLOCK: + case GL_SHADER_STORAGE_BLOCK: case GL_TRANSFORM_FEEDBACK_VARYING: default: return calc_resource_index(shProg, res); diff --git a/src/mesa/main/texobj.c b/src/mesa/main/texobj.c index a1be1e33042..173e43c817c 100644 --- a/src/mesa/main/texobj.c +++ b/src/mesa/main/texobj.c @@ -1192,18 +1192,18 @@ invalidate_tex_image_error_check(struct gl_context *ctx, GLuint texture, struct gl_texture_object * _mesa_create_nameless_texture(struct gl_context *ctx, GLenum target) { - struct gl_texture_object *texObj = NULL; - GLint targetIndex; + struct gl_texture_object *texObj = NULL; + GLint targetIndex; - if (target == 0) - return texObj; + if (target == 0) + return texObj; - texObj = ctx->Driver.NewTextureObject(ctx, 0, target); - targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target); - assert(targetIndex < NUM_TEXTURE_TARGETS); - texObj->TargetIndex = targetIndex; + texObj = ctx->Driver.NewTextureObject(ctx, 0, target); + targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target); + assert(targetIndex < NUM_TEXTURE_TARGETS); + texObj->TargetIndex = targetIndex; - return texObj; + return texObj; } /** @@ -1622,25 +1622,79 @@ _mesa_tex_target_to_index(const struct gl_context *ctx, GLenum target) /** - * Bind a named texture to a texturing target. + * Do actual texture binding. All error checking should have been done prior + * to calling this function. Note that the texture target (1D, 2D, etc) is + * always specified by the texObj->TargetIndex. + * + * \param unit index of texture unit to update + * \param texObj the new texture object (cannot be NULL) + */ +static void +bind_texture(struct gl_context *ctx, + unsigned unit, + struct gl_texture_object *texObj) +{ + struct gl_texture_unit *texUnit; + int targetIndex; + + assert(unit < ARRAY_SIZE(ctx->Texture.Unit)); + texUnit = &ctx->Texture.Unit[unit]; + + assert(texObj); + assert(valid_texture_object(texObj)); + + targetIndex = texObj->TargetIndex; + assert(targetIndex >= 0); + assert(targetIndex < NUM_TEXTURE_TARGETS); + + /* Check if this texture is only used by this context and is already bound. + * If so, just return. + */ + { + bool early_out; + mtx_lock(&ctx->Shared->Mutex); + early_out = ((ctx->Shared->RefCount == 1) + && (texObj == texUnit->CurrentTex[targetIndex])); + mtx_unlock(&ctx->Shared->Mutex); + if (early_out) { + return; + } + } + + /* flush before changing binding */ + FLUSH_VERTICES(ctx, _NEW_TEXTURE); + + /* If the refcount on the previously bound texture is decremented to + * zero, it'll be deleted here. + */ + _mesa_reference_texobj(&texUnit->CurrentTex[targetIndex], texObj); + + ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed, + unit + 1); + + if (texObj->Name != 0) + texUnit->_BoundTextures |= (1 << targetIndex); + else + texUnit->_BoundTextures &= ~(1 << targetIndex); + + /* Pass BindTexture call to device driver */ + if (ctx->Driver.BindTexture) { + ctx->Driver.BindTexture(ctx, unit, texObj->Target, texObj); + } +} + + +/** + * Implement glBindTexture(). Do error checking, look-up or create a new + * texture object, then bind it in the current texture unit. * * \param target texture target. * \param texName texture name. - * - * \sa glBindTexture(). - * - * Determines the old texture object bound and returns immediately if rebinding - * the same texture. Get the current texture which is either a default texture - * if name is null, a named texture from the hash, or a new texture if the - * given texture name is new. Increments its reference count, binds it, and - * calls dd_function_table::BindTexture. Decrements the old texture reference - * count and deletes it if it reaches zero. */ void GLAPIENTRY _mesa_BindTexture( GLenum target, GLuint texName ) { GET_CURRENT_CONTEXT(ctx); - struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx); struct gl_texture_object *newTexObj = NULL; GLint targetIndex; @@ -1702,95 +1756,12 @@ _mesa_BindTexture( GLenum target, GLuint texName ) newTexObj->TargetIndex = targetIndex; } - assert(valid_texture_object(newTexObj)); - - /* Check if this texture is only used by this context and is already bound. - * If so, just return. - */ - { - GLboolean early_out; - mtx_lock(&ctx->Shared->Mutex); - early_out = ((ctx->Shared->RefCount == 1) - && (newTexObj == texUnit->CurrentTex[targetIndex])); - mtx_unlock(&ctx->Shared->Mutex); - if (early_out) { - return; - } - } - - /* flush before changing binding */ - FLUSH_VERTICES(ctx, _NEW_TEXTURE); - - /* Do the actual binding. The refcount on the previously bound - * texture object will be decremented. It'll be deleted if the - * count hits zero. - */ - _mesa_reference_texobj(&texUnit->CurrentTex[targetIndex], newTexObj); - ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed, - ctx->Texture.CurrentUnit + 1); - assert(texUnit->CurrentTex[targetIndex]); - - if (texName != 0) - texUnit->_BoundTextures |= (1 << targetIndex); - else - texUnit->_BoundTextures &= ~(1 << targetIndex); - - /* Pass BindTexture call to device driver */ - if (ctx->Driver.BindTexture) - ctx->Driver.BindTexture(ctx, ctx->Texture.CurrentUnit, target, newTexObj); + bind_texture(ctx, ctx->Texture.CurrentUnit, newTexObj); } -/** - * Do the actual binding to a numbered texture unit. - * The refcount on the previously bound - * texture object will be decremented. It'll be deleted if the - * count hits zero. - */ -static void -bind_texture_unit(struct gl_context *ctx, - GLuint unit, - struct gl_texture_object *texObj) -{ - struct gl_texture_unit *texUnit; - - /* Get the texture unit (this is an array look-up) */ - texUnit = _mesa_get_tex_unit_err(ctx, unit, "glBindTextureUnit"); - if (!texUnit) - return; - - /* Check if this texture is only used by this context and is already bound. - * If so, just return. - */ - { - bool early_out; - mtx_lock(&ctx->Shared->Mutex); - early_out = ((ctx->Shared->RefCount == 1) - && (texObj == texUnit->CurrentTex[texObj->TargetIndex])); - mtx_unlock(&ctx->Shared->Mutex); - if (early_out) { - return; - } - } - - /* flush before changing binding */ - FLUSH_VERTICES(ctx, _NEW_TEXTURE); - - _mesa_reference_texobj(&texUnit->CurrentTex[texObj->TargetIndex], - texObj); - assert(texUnit->CurrentTex[texObj->TargetIndex]); - ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed, - unit + 1); - texUnit->_BoundTextures |= (1 << texObj->TargetIndex); - - - /* Pass BindTexture call to device driver */ - if (ctx->Driver.BindTexture) { - ctx->Driver.BindTexture(ctx, unit, texObj->Target, texObj); - } -} /** - * Bind a named texture to the specified texture unit. + * OpenGL 4.5 / GL_ARB_direct_state_access glBindTextureUnit(). * * \param unit texture unit. * \param texture texture name. @@ -1807,6 +1778,18 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture) { GET_CURRENT_CONTEXT(ctx); struct gl_texture_object *texObj; + struct gl_texture_unit *texUnit; + + if (unit >= _mesa_max_tex_unit(ctx)) { + _mesa_error(ctx, GL_INVALID_VALUE, "glBindTextureUnit(unit=%u)", unit); + return; + } + + texUnit = _mesa_get_tex_unit(ctx, unit); + assert(texUnit); + if (!texUnit) { + return; + } if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE)) _mesa_debug(ctx, "glBindTextureUnit %s %d\n", @@ -1833,15 +1816,19 @@ _mesa_BindTextureUnit(GLuint unit, GLuint texture) return; } if (texObj->Target == 0) { - _mesa_error(ctx, GL_INVALID_ENUM, "glBindTextureUnit(target)"); + /* Texture object was gen'd but never bound so the target is not set */ + _mesa_error(ctx, GL_INVALID_OPERATION, "glBindTextureUnit(target)"); return; } assert(valid_texture_object(texObj)); - bind_texture_unit(ctx, unit, texObj); + bind_texture(ctx, unit, texObj); } +/** + * OpenGL 4.4 / GL_ARB_multi_bind glBindTextures(). + */ void GLAPIENTRY _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures) { @@ -1862,12 +1849,6 @@ _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures) return; } - /* Flush before changing bindings */ - FLUSH_VERTICES(ctx, 0); - - ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed, - first + count); - if (textures) { /* Note that the error semantics for multi-bind commands differ from * those of other GL commands. @@ -1902,24 +1883,7 @@ _mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures) texObj = _mesa_lookup_texture_locked(ctx, textures[i]); if (texObj && texObj->Target != 0) { - const gl_texture_index targetIndex = texObj->TargetIndex; - - if (texUnit->CurrentTex[targetIndex] != texObj) { - /* Do the actual binding. The refcount on the previously - * bound texture object will be decremented. It will be - * deleted if the count hits zero. - */ - _mesa_reference_texobj(&texUnit->CurrentTex[targetIndex], - texObj); - - texUnit->_BoundTextures |= (1 << targetIndex); - ctx->NewState |= _NEW_TEXTURE; - - /* Pass the BindTexture call to the device driver */ - if (ctx->Driver.BindTexture) - ctx->Driver.BindTexture(ctx, first + i, - texObj->Target, texObj); - } + bind_texture(ctx, first + i, texObj); } else { /* The ARB_multi_bind spec says: * diff --git a/src/mesa/main/texstate.h b/src/mesa/main/texstate.h index bee8c9c3316..52fe60275c2 100644 --- a/src/mesa/main/texstate.h +++ b/src/mesa/main/texstate.h @@ -63,24 +63,6 @@ _mesa_max_tex_unit(struct gl_context *ctx) ctx->Const.MaxTextureCoordUnits); } -static inline struct gl_texture_unit * -_mesa_get_tex_unit_err(struct gl_context *ctx, GLuint unit, const char *func) -{ - if (unit < _mesa_max_tex_unit(ctx)) - return _mesa_get_tex_unit(ctx, unit); - - /* Note: This error is a precedent set by glBindTextures. From the GL 4.5 - * specification (30.10.2014) Section 8.1 ("Texture Objects"): - * - * "An INVALID_OPERATION error is generated if first + count is greater - * than the number of texture image units supported by the - * implementation." - */ - _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unit=%s)", func, - _mesa_enum_to_string(GL_TEXTURE0+unit)); - return NULL; -} - extern void _mesa_copy_texture_state( const struct gl_context *src, struct gl_context *dst ); diff --git a/src/mesa/main/uniform_query.cpp b/src/mesa/main/uniform_query.cpp index 0bee59455a3..33c959dc1a5 100644 --- a/src/mesa/main/uniform_query.cpp +++ b/src/mesa/main/uniform_query.cpp @@ -28,15 +28,14 @@ #include "main/core.h" #include "main/context.h" -#include "ir.h" -#include "ir_uniform.h" -#include "program/hash_table.h" -#include "../glsl/program.h" -#include "../glsl/ir_uniform.h" -#include "../glsl/glsl_parser_extras.h" #include "main/shaderapi.h" #include "main/shaderobj.h" -#include "uniforms.h" +#include "main/uniforms.h" +#include "glsl/ir.h" +#include "glsl/ir_uniform.h" +#include "glsl/glsl_parser_extras.h" +#include "glsl/program.h" +#include "program/hash_table.h" extern "C" void GLAPIENTRY diff --git a/src/mesa/main/uniforms.h b/src/mesa/main/uniforms.h index 96172b72aa1..bec035cdc97 100644 --- a/src/mesa/main/uniforms.h +++ b/src/mesa/main/uniforms.h @@ -26,10 +26,10 @@ #ifndef UNIFORMS_H #define UNIFORMS_H -#include "glheader.h" +#include "main/glheader.h" +#include "glsl/glsl_types.h" +#include "glsl/ir_uniform.h" #include "program/prog_parameter.h" -#include "../glsl/glsl_types.h" -#include "../glsl/ir_uniform.h" #ifdef __cplusplus extern "C" { diff --git a/src/mesa/program/hash_table.h b/src/mesa/program/hash_table.h index e85a836a00b..d0a2abffa34 100644 --- a/src/mesa/program/hash_table.h +++ b/src/mesa/program/hash_table.h @@ -249,6 +249,7 @@ public: wrapper->closure = closure; hash_table_call_foreach(this->ht, subtract_one_wrapper, wrapper); + free(wrapper); } /** diff --git a/src/mesa/program/ir_to_mesa.cpp b/src/mesa/program/ir_to_mesa.cpp index 1cfcf9119f9..98032456662 100644 --- a/src/mesa/program/ir_to_mesa.cpp +++ b/src/mesa/program/ir_to_mesa.cpp @@ -31,22 +31,20 @@ #include #include "main/compiler.h" -#include "ir.h" -#include "ir_visitor.h" -#include "ir_expression_flattening.h" -#include "ir_uniform.h" -#include "glsl_types.h" -#include "glsl_parser_extras.h" -#include "../glsl/program.h" -#include "ir_optimization.h" -#include "ast.h" -#include "linker.h" - #include "main/mtypes.h" #include "main/shaderapi.h" #include "main/shaderobj.h" #include "main/uniforms.h" - +#include "glsl/ast.h" +#include "glsl/ir.h" +#include "glsl/ir_expression_flattening.h" +#include "glsl/ir_visitor.h" +#include "glsl/ir_optimization.h" +#include "glsl/ir_uniform.h" +#include "glsl/glsl_parser_extras.h" +#include "glsl/glsl_types.h" +#include "glsl/linker.h" +#include "glsl/program.h" #include "program/hash_table.h" #include "program/prog_instruction.h" #include "program/prog_optimize.h" diff --git a/src/mesa/program/prog_to_nir.c b/src/mesa/program/prog_to_nir.c index 1bd735a47bb..fc00534028f 100644 --- a/src/mesa/program/prog_to_nir.c +++ b/src/mesa/program/prog_to_nir.c @@ -1122,6 +1122,19 @@ prog_to_nir(const struct gl_program *prog, ptn_add_output_stores(c); + s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id); + s->info.num_textures = _mesa_fls(prog->SamplersUsed); + s->info.num_ubos = 0; + s->info.num_abos = 0; + s->info.num_ssbos = 0; + s->info.num_images = 0; + s->info.inputs_read = prog->InputsRead; + s->info.outputs_written = prog->OutputsWritten; + s->info.system_values_read = prog->SystemValuesRead; + s->info.uses_texture_gather = false; + s->info.uses_clip_distance_out = false; + s->info.separate_shader = false; + fail: if (c->error) { ralloc_free(s); diff --git a/src/mesa/program/sampler.cpp b/src/mesa/program/sampler.cpp index ea3024d512a..b1168fdade8 100644 --- a/src/mesa/program/sampler.cpp +++ b/src/mesa/program/sampler.cpp @@ -23,13 +23,12 @@ * DEALINGS IN THE SOFTWARE. */ -#include "ir.h" -#include "glsl_types.h" -#include "ir_visitor.h" -#include "../glsl/program.h" -#include "ir_uniform.h" - #include "main/mtypes.h" +#include "glsl/glsl_types.h" +#include "glsl/ir.h" +#include "glsl/ir_uniform.h" +#include "glsl/ir_visitor.h" +#include "glsl/program.h" #include "program/hash_table.h" #include "program/prog_parameter.h" #include "program/program.h" diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 2ad679b1cb8..f4b273bf93f 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -42,6 +42,8 @@ #include "main/macros.h" #include "main/varray.h" +#include "glsl/ir_uniform.h" + #include "vbo/vbo.h" #include "st_context.h" @@ -62,8 +64,6 @@ #include "draw/draw_context.h" #include "cso_cache/cso_context.h" -#include "../glsl/ir_uniform.h" - /** * This is very similar to vbo_all_varyings_in_vbos() but we are diff --git a/src/util/ralloc.c b/src/util/ralloc.c index 01719c888b1..e07fce74f23 100644 --- a/src/util/ralloc.c +++ b/src/util/ralloc.c @@ -359,10 +359,7 @@ ralloc_strndup(const void *ctx, const char *str, size_t max) if (unlikely(str == NULL)) return NULL; - n = strlen(str); - if (n > max) - n = max; - + n = strnlen(str, max); ptr = ralloc_array(ctx, char, n + 1); memcpy(ptr, str, n); ptr[n] = '\0'; diff --git a/src/util/strndup.c b/src/util/strndup.c index ca1c6f53b57..5ceb32fe474 100644 --- a/src/util/strndup.c +++ b/src/util/strndup.c @@ -35,10 +35,7 @@ strndup(const char *str, size_t max) if (!str) return NULL; - n = strlen(str); - if (n > max) - n = max; - + n = strnlen(str, max); ptr = (char *) calloc(n + 1, sizeof(char)); if (!ptr) return NULL; diff --git a/src/vulkan/anv_compiler.cpp b/src/vulkan/anv_compiler.cpp index b06775d05ca..4a00863b718 100644 --- a/src/vulkan/anv_compiler.cpp +++ b/src/vulkan/anv_compiler.cpp @@ -883,6 +883,9 @@ setup_nir_io(struct gl_shader *mesa_shader, prog->OutputsWritten |= BITFIELD64_BIT(var->data.location); } + shader->info.inputs_read = prog->InputsRead; + shader->info.outputs_written = prog->OutputsWritten; + mesa_shader->num_uniform_components = shader->num_uniforms; }