X-Git-Url: https://git.libre-soc.org/?a=blobdiff_plain;f=src%2Fintel%2Fcompiler%2Fbrw_shader.cpp;h=72478e3c39e6539b830226879ede817b28aff962;hb=9458b017a946778ef5d065bfd61c47dafdfe3e94;hp=c2751557af80573bac1081340c868b58af9f329a;hpb=1c25bf4373d68777c3561fdd1a30766698437109;p=mesa.git diff --git a/src/intel/compiler/brw_shader.cpp b/src/intel/compiler/brw_shader.cpp index c2751557af8..72478e3c39e 100644 --- a/src/intel/compiler/brw_shader.cpp +++ b/src/intel/compiler/brw_shader.cpp @@ -26,7 +26,7 @@ #include "brw_fs.h" #include "brw_nir.h" #include "brw_vec4_tes.h" -#include "common/gen_debug.h" +#include "dev/gen_debug.h" #include "main/uniforms.h" #include "util/macros.h" @@ -55,6 +55,7 @@ brw_type_for_base_type(const struct glsl_type *type) case GLSL_TYPE_ARRAY: return brw_type_for_base_type(type->fields.array); case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: case GLSL_TYPE_SAMPLER: case GLSL_TYPE_ATOMIC_UINT: /* These should be overridden with the type of the member when @@ -72,7 +73,6 @@ brw_type_for_base_type(const struct glsl_type *type) return BRW_REGISTER_TYPE_Q; case GLSL_TYPE_VOID: case GLSL_TYPE_ERROR: - case GLSL_TYPE_INTERFACE: case GLSL_TYPE_FUNCTION: unreachable("not reached"); } @@ -129,14 +129,13 @@ brw_math_function(enum opcode op) } bool -brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits) +brw_texture_offset(const nir_tex_instr *tex, unsigned src, + uint32_t *offset_bits_out) { - if (!offsets) return false; /* nonconstant offset; caller will handle it. */ + if (!nir_src_is_const(tex->src[src].src)) + return false; - /* offset out of bounds; caller will handle it. */ - for (unsigned i = 0; i < num_components; i++) - if (offsets[i] > 7 || offsets[i] < -8) - return false; + const unsigned num_components = nir_tex_instr_src_size(tex, src); /* Combine all three offsets into a single unsigned dword: * @@ -144,11 +143,20 @@ brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits) * bits 7:4 - V Offset (Y component) * bits 3:0 - R Offset (Z component) */ - *offset_bits = 0; + uint32_t offset_bits = 0; for (unsigned i = 0; i < num_components; i++) { + int offset = nir_src_comp_as_int(tex->src[src].src, i); + + /* offset out of bounds; caller will handle it. */ + if (offset > 7 || offset < -8) + return false; + const unsigned shift = 4 * (2 - i); - *offset_bits |= (offsets[i] << shift) & (0xF << shift); + offset_bits |= (offset << shift) & (0xF << shift); } + + *offset_bits_out = offset_bits; + return true; } @@ -156,7 +164,7 @@ const char * brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) { switch (op) { - case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP: + case 0 ... NUM_BRW_OPCODES - 1: /* The DO instruction doesn't exist on Gen6+, but we use it to mark the * start of a loop in the IR. */ @@ -209,6 +217,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_SEND: return "send"; + case SHADER_OPCODE_UNDEF: + return "undef"; + case SHADER_OPCODE_TEX: return "tex"; case SHADER_OPCODE_TEX_LOGICAL: @@ -270,27 +281,23 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_SAMPLEINFO_LOGICAL: return "sampleinfo_logical"; - case SHADER_OPCODE_IMAGE_SIZE: - return "image_size"; case SHADER_OPCODE_IMAGE_SIZE_LOGICAL: return "image_size_logical"; case SHADER_OPCODE_SHADER_TIME_ADD: return "shader_time_add"; - case SHADER_OPCODE_UNTYPED_ATOMIC: + case VEC4_OPCODE_UNTYPED_ATOMIC: return "untyped_atomic"; case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: return "untyped_atomic_logical"; - case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: - return "untyped_atomic_float"; case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: return "untyped_atomic_float_logical"; - case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case VEC4_OPCODE_UNTYPED_SURFACE_READ: return "untyped_surface_read"; case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: return "untyped_surface_read_logical"; - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: return "untyped_surface_write"; case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: return "untyped_surface_write_logical"; @@ -302,32 +309,34 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "a64_byte_scattered_read_logical"; case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: return "a64_byte_scattered_write_logical"; - case SHADER_OPCODE_TYPED_ATOMIC: - return "typed_atomic"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + return "a64_untyped_atomic_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: + return "a64_untyped_atomic_int64_logical"; + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: + return "a64_untyped_atomic_float_logical"; case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: return "typed_atomic_logical"; - case SHADER_OPCODE_TYPED_SURFACE_READ: - return "typed_surface_read"; case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: return "typed_surface_read_logical"; - case SHADER_OPCODE_TYPED_SURFACE_WRITE: - return "typed_surface_write"; case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: return "typed_surface_write_logical"; case SHADER_OPCODE_MEMORY_FENCE: return "memory_fence"; + case FS_OPCODE_SCHEDULING_FENCE: + return "scheduling_fence"; case SHADER_OPCODE_INTERLOCK: /* For an interlock we actually issue a memory fence via sendc. */ return "interlock"; - case SHADER_OPCODE_BYTE_SCATTERED_READ: - return "byte_scattered_read"; case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: return "byte_scattered_read_logical"; - case SHADER_OPCODE_BYTE_SCATTERED_WRITE: - return "byte_scattered_write"; case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: return "byte_scattered_write_logical"; + case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: + return "dword_scattered_read_logical"; + case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: + return "dword_scattered_write_logical"; case SHADER_OPCODE_LOAD_PAYLOAD: return "load_payload"; @@ -355,6 +364,9 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_FIND_LIVE_CHANNEL: return "find_live_channel"; + case FS_OPCODE_LOAD_LIVE_CHANNELS: + return "load_live_channels"; + case SHADER_OPCODE_BROADCAST: return "broadcast"; case SHADER_OPCODE_SHUFFLE: @@ -484,6 +496,10 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) return "barrier"; case SHADER_OPCODE_MULH: return "mulh"; + case SHADER_OPCODE_ISUB_SAT: + return "isub_sat"; + case SHADER_OPCODE_USUB_SAT: + return "usub_sat"; case SHADER_OPCODE_MOV_INDIRECT: return "mov_indirect"; @@ -516,6 +532,8 @@ brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op) case SHADER_OPCODE_RND_MODE: return "rnd_mode"; + case SHADER_OPCODE_FLOAT_CONTROL_MODE: + return "float_control_mode"; } unreachable("not reached"); @@ -685,7 +703,7 @@ backend_shader::backend_shader(const struct brw_compiler *compiler, nir(shader), stage_prog_data(stage_prog_data), mem_ctx(mem_ctx), - cfg(NULL), + cfg(NULL), idom_analysis(this), stage(shader->info.stage) { debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage); @@ -715,11 +733,20 @@ backend_reg::is_zero() const if (file != IMM) return false; + assert(type_sz(type) > 1); + switch (type) { + case BRW_REGISTER_TYPE_HF: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000; case BRW_REGISTER_TYPE_F: return f == 0; case BRW_REGISTER_TYPE_DF: return df == 0; + case BRW_REGISTER_TYPE_W: + case BRW_REGISTER_TYPE_UW: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0; case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UD: return d == 0; @@ -737,11 +764,20 @@ backend_reg::is_one() const if (file != IMM) return false; + assert(type_sz(type) > 1); + switch (type) { + case BRW_REGISTER_TYPE_HF: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0x3c00; case BRW_REGISTER_TYPE_F: return f == 1.0f; case BRW_REGISTER_TYPE_DF: return df == 1.0; + case BRW_REGISTER_TYPE_W: + case BRW_REGISTER_TYPE_UW: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 1; case BRW_REGISTER_TYPE_D: case BRW_REGISTER_TYPE_UD: return d == 1; @@ -759,11 +795,19 @@ backend_reg::is_negative_one() const if (file != IMM) return false; + assert(type_sz(type) > 1); + switch (type) { + case BRW_REGISTER_TYPE_HF: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0xbc00; case BRW_REGISTER_TYPE_F: return f == -1.0; case BRW_REGISTER_TYPE_DF: return df == -1.0; + case BRW_REGISTER_TYPE_W: + assert((d & 0xffff) == ((d >> 16) & 0xffff)); + return (d & 0xffff) == 0xffff; case BRW_REGISTER_TYPE_D: return d == -1; case BRW_REGISTER_TYPE_Q: @@ -1011,20 +1055,21 @@ backend_instruction::has_side_effects() const case SHADER_OPCODE_SEND: return send_has_side_effects; - case SHADER_OPCODE_UNTYPED_ATOMIC: + case BRW_OPCODE_SYNC: + case VEC4_OPCODE_UNTYPED_ATOMIC: case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL: - case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT: case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_GEN4_SCRATCH_WRITE: - case SHADER_OPCODE_UNTYPED_SURFACE_WRITE: + case VEC4_OPCODE_UNTYPED_SURFACE_WRITE: case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL: - case SHADER_OPCODE_BYTE_SCATTERED_WRITE: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL: + case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL: case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL: - case SHADER_OPCODE_TYPED_ATOMIC: + case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL: case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL: - case SHADER_OPCODE_TYPED_SURFACE_WRITE: case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL: case SHADER_OPCODE_MEMORY_FENCE: case SHADER_OPCODE_INTERLOCK: @@ -1039,6 +1084,8 @@ backend_instruction::has_side_effects() const case TCS_OPCODE_URB_WRITE: case TCS_OPCODE_RELEASE_INPUT: case SHADER_OPCODE_RND_MODE: + case SHADER_OPCODE_FLOAT_CONTROL_MODE: + case FS_OPCODE_SCHEDULING_FENCE: return true; default: return eot; @@ -1052,12 +1099,11 @@ backend_instruction::is_volatile() const case SHADER_OPCODE_SEND: return send_is_volatile; - case SHADER_OPCODE_UNTYPED_SURFACE_READ: + case VEC4_OPCODE_UNTYPED_SURFACE_READ: case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL: - case SHADER_OPCODE_TYPED_SURFACE_READ: case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL: - case SHADER_OPCODE_BYTE_SCATTERED_READ: case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL: + case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL: case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL: case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL: case SHADER_OPCODE_URB_READ_SIMD8: @@ -1155,13 +1201,13 @@ backend_instruction::remove(bblock_t *block) } void -backend_shader::dump_instructions() +backend_shader::dump_instructions() const { dump_instructions(NULL); } void -backend_shader::dump_instructions(const char *name) +backend_shader::dump_instructions(const char *name) const { FILE *file = stderr; if (name && geteuid() != 0) { @@ -1196,7 +1242,13 @@ backend_shader::calculate_cfg() { if (this->cfg) return; - cfg = new(mem_ctx) cfg_t(&this->instructions); + cfg = new(mem_ctx) cfg_t(this, &this->instructions); +} + +void +backend_shader::invalidate_analysis(brw::analysis_dependency_class c) +{ + idom_analysis.invalidate(c); } extern "C" const unsigned * @@ -1207,8 +1259,8 @@ brw_compile_tes(const struct brw_compiler *compiler, const struct brw_vue_map *input_vue_map, struct brw_tes_prog_data *prog_data, nir_shader *nir, - struct gl_program *prog, int shader_time_index, + struct brw_compile_stats *stats, char **error_str) { const struct gen_device_info *devinfo = compiler->devinfo; @@ -1218,10 +1270,10 @@ brw_compile_tes(const struct brw_compiler *compiler, nir->info.inputs_read = key->inputs_read; nir->info.patch_inputs_read = key->patch_inputs_read; - nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar); + brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar); brw_nir_lower_tes_inputs(nir, input_vue_map); brw_nir_lower_vue_outputs(nir); - nir = brw_postprocess_nir(nir, compiler, is_scalar); + brw_postprocess_nir(nir, compiler, is_scalar); brw_compute_vue_map(devinfo, &prog_data->base.vue_map, nir->info.outputs_written, @@ -1296,8 +1348,8 @@ brw_compile_tes(const struct brw_compiler *compiler, } if (is_scalar) { - fs_visitor v(compiler, log_data, mem_ctx, (void *) key, - &prog_data->base.base, NULL, nir, 8, + fs_visitor v(compiler, log_data, mem_ctx, &key->base, + &prog_data->base.base, nir, 8, shader_time_index, input_vue_map); if (!v.run_tes()) { if (error_str) @@ -1309,8 +1361,7 @@ brw_compile_tes(const struct brw_compiler *compiler, prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8; fs_generator g(compiler, log_data, mem_ctx, - &prog_data->base.base, v.promoted_constants, false, - MESA_SHADER_TESS_EVAL); + &prog_data->base.base, false, MESA_SHADER_TESS_EVAL); if (unlikely(INTEL_DEBUG & DEBUG_TES)) { g.enable_debug(ralloc_asprintf(mem_ctx, "%s tessellation evaluation shader %s", @@ -1319,7 +1370,7 @@ brw_compile_tes(const struct brw_compiler *compiler, nir->info.name)); } - g.generate_code(v.cfg, 8); + g.generate_code(v.cfg, 8, v.shader_stats, stats); assembly = g.get_assembly(); } else { @@ -1335,7 +1386,7 @@ brw_compile_tes(const struct brw_compiler *compiler, v.dump_instructions(); assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir, - &prog_data->base, v.cfg); + &prog_data->base, v.cfg, stats); } return assembly;