X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;ds=inline;f=src%2Fintel%2Fcompiler%2Fbrw_shader.cpp;h=d1672d3a1cac0fbd84847b9a27ddd7e2c957bb88;hb=2df2e081fde5ff84ce87fe4763e8e6a3372694e8;hp=798c799a8286862ecc007813ca98cd767e949c0a;hpb=3cbc02e4693030d18a24602cf72e693b92e1a7a3;p=mesa.git diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index 798c799a828..d1672d3a1ca 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -26,7 +26,7 @@ #include "brw_fs.h" #include "brw_nir.h" #include "brw_vec4_tes.h" -#include "common/gen_debug.h" +#include "dev/gen_debug.h" #include "main/uniforms.h" #include "util/macros.h" @@ -55,6 +55,7 @@ brw_type_for_base_type(const struct glsl_type *type) case GLSL_TYPE_ARRAY: return brw_type_for_base_type(type->fields.array); case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_ATOMIC_UINT: /* These should be overridden with the type of the member when @@ -72,7 +73,6 @@ brw_type_for_base_type(const struct glsl_type *type) return BRW_REGISTER_TYPE_Q; case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@ -129,14 +129,13 @@ brw_math_function(enum opcode op) } bool -brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits) +brw_texture_offset(const nir_tex_instr *tex, unsigned src, + uint32_t *offset_bits_out) { - if (!offsets) return false; /* nonconstant offset; caller will handle it. */ + if (!nir_src_is_const(tex->src[src].src)) + return false; - /* offset out of bounds; caller will handle it. */ - for (unsigned i = 0; i < num_components; i++) - if (offsets[i] > 7 || offsets[i] < -8) - return false; + const unsigned num_components = nir_tex_instr_src_size(tex, src); /* Combine all three offsets into a single unsigned dword: * @@ -144,11 +143,20 @@ brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits) * bits 7:4 - V Offset (Y component) * bits 3:0 - R Offset (Z component) */ - *offset_bits = 0; + uint32_t offset_bits = 0; for (unsigned i = 0; i < num_components; i++) { + int offset = nir_src_comp_as_int(tex->src[src].src, i); + + /* offset out of bounds; caller will handle it. */ + if (offset > 7 || offset < -8) + return false; + const unsigned shift = 4 * (2 - i); - *offset_bits |= (offsets[i] << shift) & (0xF << shift); + offset_bits |= (offset << shift) & (0xF << shift); } + + *offset_bits_out = offset_bits; + return true; } @@ -206,6 +214,12 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_COS: return "cos"; + case SHADER_OPCODE_SEND: + return "send"; + + case SHADER_OPCODE_UNDEF: + return "undef"; + case SHADER_OPCODE_TEX: return "tex"; case SHADER_OPCODE_TEX_LOGICAL: @@ -267,38 +281,44 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_SAMPLEINFO_LOGICAL: return "sampleinfo_logical"; - case SHADER_OPCODE_IMAGE_SIZE: - return "image_size"; + case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: + return "image_size_logical"; case SHADER_OPCODE_SHADER_TIME_ADD: return "shader_time_add"; - case SHADER_OPCODE_UNTYPED_ATOMIC: + case VEC4_OPCODE_UNTYPED_ATOMIC: return "untyped_atomic"; case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: return "untyped_atomic_logical"; - case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: - return "untyped_atomic_float"; case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: return "untyped_atomic_float_logical"; - case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case VEC4_OPCODE_UNTYPED_SURFACE_READ: return "untyped_surface_read"; case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: return "untyped_surface_read_logical"; - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: return "untyped_surface_write"; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: return "untyped_surface_write_logical"; - case SHADER_OPCODE_TYPED_ATOMIC: - return "typed_atomic"; + case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: + return "a64_untyped_read_logical"; + case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: + return "a64_untyped_write_logical"; + case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: + return "a64_byte_scattered_read_logical"; + case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: + return "a64_byte_scattered_write_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + return "a64_untyped_atomic_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: + return "a64_untyped_atomic_int64_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + return "a64_untyped_atomic_float_logical"; case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: return "typed_atomic_logical"; - case SHADER_OPCODE_TYPED_SURFACE_READ: - return "typed_surface_read"; case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: return "typed_surface_read_logical"; - case SHADER_OPCODE_TYPED_SURFACE_WRITE: - return "typed_surface_write"; case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return "typed_surface_write_logical"; case SHADER_OPCODE_MEMORY_FENCE: @@ -307,12 +327,8 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) /* For an interlock we actually issue a memory fence via sendc. */ return "interlock"; - case SHADER_OPCODE_BYTE_SCATTERED_READ: - return "byte_scattered_read"; case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: return "byte_scattered_read_logical"; - case SHADER_OPCODE_BYTE_SCATTERED_WRITE: - return "byte_scattered_write"; case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: return "byte_scattered_write_logical"; @@ -402,8 +418,6 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "uniform_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4: return "varying_pull_const_gen4"; - case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: - return "varying_pull_const_gen7"; case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL: return "varying_pull_const_logical"; @@ -415,10 +429,6 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case FS_OPCODE_PACK_HALF_2x16_SPLIT: return "pack_half_2x16_split"; - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: - return "unpack_half_2x16_split_x"; - case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: - return "unpack_half_2x16_split_y"; case FS_OPCODE_PLACEHOLDER_HALT: return "placeholder_halt"; @@ -509,6 +519,8 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_RND_MODE: return "rnd_mode"; + case SHADER_OPCODE_FLOAT_CONTROL_MODE: + return "float_control_mode"; } unreachable("not reached"); @@ -708,11 +720,20 @@ backend_reg::is_zero() const if (file != IMM) return false; + assert(type_sz(type) > 1); + switch (type) { + case BRW_REGISTER_TYPE_HF: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000; case BRW_REGISTER_TYPE_F: return f == 0; case BRW_REGISTER_TYPE_DF: return df == 0; + case BRW_REGISTER_TYPE_W: + case BRW_REGISTER_TYPE_UW: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0; case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UD: return d == 0; @@ -730,11 +751,20 @@ backend_reg::is_one() const if (file != IMM) return false; + assert(type_sz(type) > 1); + switch (type) { + case BRW_REGISTER_TYPE_HF: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0x3c00; case BRW_REGISTER_TYPE_F: return f == 1.0f; case BRW_REGISTER_TYPE_DF: return df == 1.0; + case BRW_REGISTER_TYPE_W: + case BRW_REGISTER_TYPE_UW: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 1; case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UD: return d == 1; @@ -752,11 +782,19 @@ backend_reg::is_negative_one() const if (file != IMM) return false; + assert(type_sz(type) > 1); + switch (type) { + case BRW_REGISTER_TYPE_HF: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0xbc00; case BRW_REGISTER_TYPE_F: return f == -1.0; case BRW_REGISTER_TYPE_DF: return df == -1.0; + case BRW_REGISTER_TYPE_W: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0xffff; case BRW_REGISTER_TYPE_D: return d == -1; case BRW_REGISTER_TYPE_Q: @@ -1001,18 +1039,22 @@ bool backend_instruction::has_side_effects() const { switch (opcode) { - case SHADER_OPCODE_UNTYPED_ATOMIC: + case SHADER_OPCODE_SEND: + return send_has_side_effects; + + case VEC4_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: - case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: - case SHADER_OPCODE_BYTE_SCATTERED_WRITE: + case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: + case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: - case SHADER_OPCODE_TYPED_ATOMIC: case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: - case SHADER_OPCODE_TYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_INTERLOCK: @@ -1027,6 +1069,7 @@ backend_instruction::has_side_effects() const case TCS_OPCODE_URB_WRITE: case TCS_OPCODE_RELEASE_INPUT: case SHADER_OPCODE_RND_MODE: + case SHADER_OPCODE_FLOAT_CONTROL_MODE: return true; default: return eot; @@ -1037,12 +1080,15 @@ bool backend_instruction::is_volatile() const { switch (opcode) { - case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case SHADER_OPCODE_SEND: + return send_is_volatile; + + case VEC4_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: - case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: - case SHADER_OPCODE_BYTE_SCATTERED_READ: case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: + case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: case SHADER_OPCODE_URB_READ_SIMD8: case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT: case VEC4_OPCODE_URB_READ: @@ -1189,23 +1235,22 @@ brw_compile_tes(const struct brw_compiler *compiler, const struct brw_tes_prog_key *key, const struct brw_vue_map *input_vue_map, struct brw_tes_prog_data *prog_data, - const nir_shader *src_shader, - struct gl_program *prog, + nir_shader *nir, int shader_time_index, + struct brw_compile_stats *stats, char **error_str) { const struct gen_device_info *devinfo = compiler->devinfo; const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL]; const unsigned *assembly; - nir_shader *nir = nir_shader_clone(mem_ctx, src_shader); nir->info.inputs_read = key->inputs_read; nir->info.patch_inputs_read = key->patch_inputs_read; - nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); + brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); brw_nir_lower_tes_inputs(nir, input_vue_map); brw_nir_lower_vue_outputs(nir); - nir = brw_postprocess_nir(nir, compiler, is_scalar); + brw_postprocess_nir(nir, compiler, is_scalar); brw_compute_vue_map(devinfo, &prog_data->base.vue_map, nir->info.outputs_written, @@ -1280,8 +1325,8 @@ brw_compile_tes(const struct brw_compiler *compiler, } if (is_scalar) { - fs_visitor v(compiler, log_data, mem_ctx, (void *) key, - &prog_data->base.base, NULL, nir, 8, + fs_visitor v(compiler, log_data, mem_ctx, &key->base, + &prog_data->base.base, nir, 8, shader_time_index, input_vue_map); if (!v.run_tes()) { if (error_str) @@ -1293,7 +1338,7 @@ brw_compile_tes(const struct brw_compiler *compiler, prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_generator g(compiler, log_data, mem_ctx, - &prog_data->base.base, v.promoted_constants, false, + &prog_data->base.base, v.shader_stats, false, MESA_SHADER_TESS_EVAL); if (unlikely(INTEL_DEBUG & DEBUG_TES)) { g.enable_debug(ralloc_asprintf(mem_ctx, @@ -1303,7 +1348,7 @@ brw_compile_tes(const struct brw_compiler *compiler, nir->info.name)); } - g.generate_code(v.cfg, 8); + g.generate_code(v.cfg, 8, stats); assembly = g.get_assembly(); } else { @@ -1319,7 +1364,7 @@ brw_compile_tes(const struct brw_compiler *compiler, v.dump_instructions(); assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, - &prog_data->base, v.cfg); + &prog_data->base, v.cfg, stats); } return assembly;