#include "brw_fs.h"
#include "brw_nir.h"
#include "brw_vec4_tes.h"
-#include "common/gen_debug.h"
+#include "dev/gen_debug.h"
#include "main/uniforms.h"
#include "util/macros.h"
return BRW_REGISTER_TYPE_D;
case GLSL_TYPE_INT16:
return BRW_REGISTER_TYPE_W;
+ case GLSL_TYPE_INT8:
+ return BRW_REGISTER_TYPE_B;
case GLSL_TYPE_UINT:
return BRW_REGISTER_TYPE_UD;
case GLSL_TYPE_UINT16:
return BRW_REGISTER_TYPE_UW;
+ case GLSL_TYPE_UINT8:
+ return BRW_REGISTER_TYPE_UB;
case GLSL_TYPE_ARRAY:
return brw_type_for_base_type(type->fields.array);
case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT:
/* These should be overridden with the type of the member when
return BRW_REGISTER_TYPE_Q;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
- case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_FUNCTION:
unreachable("not reached");
}
}
bool
-brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits)
+brw_texture_offset(const nir_tex_instr *tex, unsigned src,
+ uint32_t *offset_bits_out)
{
- if (!offsets) return false; /* nonconstant offset; caller will handle it. */
+ if (!nir_src_is_const(tex->src[src].src))
+ return false;
- /* offset out of bounds; caller will handle it. */
- for (unsigned i = 0; i < num_components; i++)
- if (offsets[i] > 7 || offsets[i] < -8)
- return false;
+ const unsigned num_components = nir_tex_instr_src_size(tex, src);
/* Combine all three offsets into a single unsigned dword:
*
* bits 7:4 - V Offset (Y component)
* bits 3:0 - R Offset (Z component)
*/
- *offset_bits = 0;
+ uint32_t offset_bits = 0;
for (unsigned i = 0; i < num_components; i++) {
+ int offset = nir_src_comp_as_int(tex->src[src].src, i);
+
+ /* offset out of bounds; caller will handle it. */
+ if (offset > 7 || offset < -8)
+ return false;
+
const unsigned shift = 4 * (2 - i);
- *offset_bits |= (offsets[i] << shift) & (0xF << shift);
+ offset_bits |= (offset << shift) & (0xF << shift);
}
+
+ *offset_bits_out = offset_bits;
+
return true;
}
brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
{
switch (op) {
- case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP:
+ case 0 ... NUM_BRW_OPCODES - 1:
/* The DO instruction doesn't exist on Gen6+, but we use it to mark the
* start of a loop in the IR.
*/
case SHADER_OPCODE_COS:
return "cos";
+ case SHADER_OPCODE_SEND:
+ return "send";
+
+ case SHADER_OPCODE_UNDEF:
+ return "undef";
+
case SHADER_OPCODE_TEX:
return "tex";
case SHADER_OPCODE_TEX_LOGICAL:
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
return "sampleinfo_logical";
+ case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
+ return "image_size_logical";
+
case SHADER_OPCODE_SHADER_TIME_ADD:
return "shader_time_add";
- case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case VEC4_OPCODE_UNTYPED_ATOMIC:
return "untyped_atomic";
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
return "untyped_atomic_logical";
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ return "untyped_atomic_float_logical";
+ case VEC4_OPCODE_UNTYPED_SURFACE_READ:
return "untyped_surface_read";
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
return "untyped_surface_read_logical";
- case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
return "untyped_surface_write";
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
return "untyped_surface_write_logical";
- case SHADER_OPCODE_TYPED_ATOMIC:
- return "typed_atomic";
+ case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
+ return "a64_untyped_read_logical";
+ case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
+ return "a64_untyped_write_logical";
+ case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
+ return "a64_byte_scattered_read_logical";
+ case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
+ return "a64_byte_scattered_write_logical";
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+ return "a64_untyped_atomic_logical";
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
+ return "a64_untyped_atomic_int64_logical";
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ return "a64_untyped_atomic_float_logical";
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
return "typed_atomic_logical";
- case SHADER_OPCODE_TYPED_SURFACE_READ:
- return "typed_surface_read";
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
return "typed_surface_read_logical";
- case SHADER_OPCODE_TYPED_SURFACE_WRITE:
- return "typed_surface_write";
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
return "typed_surface_write_logical";
case SHADER_OPCODE_MEMORY_FENCE:
return "memory_fence";
+ case FS_OPCODE_SCHEDULING_FENCE:
+ return "scheduling_fence";
+ case SHADER_OPCODE_INTERLOCK:
+ /* For an interlock we actually issue a memory fence via sendc. */
+ return "interlock";
- case SHADER_OPCODE_BYTE_SCATTERED_READ:
- return "byte_scattered_read";
case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
return "byte_scattered_read_logical";
- case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
- return "byte_scattered_write";
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
return "byte_scattered_write_logical";
+ case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
+ return "dword_scattered_read_logical";
+ case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
+ return "dword_scattered_write_logical";
case SHADER_OPCODE_LOAD_PAYLOAD:
return "load_payload";
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
return "find_live_channel";
+ case FS_OPCODE_LOAD_LIVE_CHANNELS:
+ return "load_live_channels";
+
case SHADER_OPCODE_BROADCAST:
return "broadcast";
case SHADER_OPCODE_SHUFFLE:
return "shuffle";
case SHADER_OPCODE_SEL_EXEC:
return "sel_exec";
+ case SHADER_OPCODE_QUAD_SWIZZLE:
+ return "quad_swizzle";
case SHADER_OPCODE_CLUSTER_BROADCAST:
return "cluster_broadcast";
case FS_OPCODE_DDY_FINE:
return "ddy_fine";
- case FS_OPCODE_CINTERP:
- return "cinterp";
case FS_OPCODE_LINTERP:
return "linterp";
return "uniform_pull_const_gen7";
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
return "varying_pull_const_gen4";
- case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
- return "varying_pull_const_gen7";
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
return "varying_pull_const_logical";
- case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
- return "mov_dispatch_to_flags";
case FS_OPCODE_DISCARD_JUMP:
return "discard_jump";
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
return "pack_half_2x16_split";
- case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
- return "unpack_half_2x16_split_x";
- case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
- return "unpack_half_2x16_split_y";
case FS_OPCODE_PLACEHOLDER_HALT:
return "placeholder_halt";
return "barrier";
case SHADER_OPCODE_MULH:
return "mulh";
+ case SHADER_OPCODE_ISUB_SAT:
+ return "isub_sat";
+ case SHADER_OPCODE_USUB_SAT:
+ return "usub_sat";
case SHADER_OPCODE_MOV_INDIRECT:
return "mov_indirect";
case SHADER_OPCODE_RND_MODE:
return "rnd_mode";
+ case SHADER_OPCODE_FLOAT_CONTROL_MODE:
+ return "float_control_mode";
}
unreachable("not reached");
reg->d = -reg->d;
return true;
case BRW_REGISTER_TYPE_W:
- case BRW_REGISTER_TYPE_UW:
- reg->d = -(int16_t)reg->ud;
+ case BRW_REGISTER_TYPE_UW: {
+ uint16_t value = -(int16_t)reg->ud;
+ reg->ud = value | (uint32_t)value << 16;
return true;
+ }
case BRW_REGISTER_TYPE_F:
reg->f = -reg->f;
return true;
case BRW_REGISTER_TYPE_V:
assert(!"unimplemented: negate UV/V immediate");
case BRW_REGISTER_TYPE_HF:
- assert(!"unimplemented: negate HF immediate");
+ reg->ud ^= 0x80008000;
+ return true;
case BRW_REGISTER_TYPE_NF:
unreachable("no NF immediates");
}
case BRW_REGISTER_TYPE_D:
reg->d = abs(reg->d);
return true;
- case BRW_REGISTER_TYPE_W:
- reg->d = abs((int16_t)reg->ud);
+ case BRW_REGISTER_TYPE_W: {
+ uint16_t value = abs((int16_t)reg->ud);
+ reg->ud = value | (uint32_t)value << 16;
return true;
+ }
case BRW_REGISTER_TYPE_F:
reg->f = fabsf(reg->f);
return true;
case BRW_REGISTER_TYPE_V:
assert(!"unimplemented: abs V immediate");
case BRW_REGISTER_TYPE_HF:
- assert(!"unimplemented: abs HF immediate");
+ reg->ud &= ~0x80008000;
+ return true;
case BRW_REGISTER_TYPE_NF:
unreachable("no NF immediates");
}
nir(shader),
stage_prog_data(stage_prog_data),
mem_ctx(mem_ctx),
- cfg(NULL),
+ cfg(NULL), idom_analysis(this),
stage(shader->info.stage)
{
debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
return brw_regs_equal(this, &r) && offset == r.offset;
}
+bool
+backend_reg::negative_equals(const backend_reg &r) const
+{
+ return brw_regs_negative_equal(this, &r) && offset == r.offset;
+}
+
bool
backend_reg::is_zero() const
{
if (file != IMM)
return false;
+ assert(type_sz(type) > 1);
+
switch (type) {
+ case BRW_REGISTER_TYPE_HF:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
case BRW_REGISTER_TYPE_F:
return f == 0;
case BRW_REGISTER_TYPE_DF:
return df == 0;
+ case BRW_REGISTER_TYPE_W:
+ case BRW_REGISTER_TYPE_UW:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0;
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_UD:
return d == 0;
if (file != IMM)
return false;
+ assert(type_sz(type) > 1);
+
switch (type) {
+ case BRW_REGISTER_TYPE_HF:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0x3c00;
case BRW_REGISTER_TYPE_F:
return f == 1.0f;
case BRW_REGISTER_TYPE_DF:
return df == 1.0;
+ case BRW_REGISTER_TYPE_W:
+ case BRW_REGISTER_TYPE_UW:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 1;
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_UD:
return d == 1;
if (file != IMM)
return false;
+ assert(type_sz(type) > 1);
+
switch (type) {
+ case BRW_REGISTER_TYPE_HF:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0xbc00;
case BRW_REGISTER_TYPE_F:
return f == -1.0;
case BRW_REGISTER_TYPE_DF:
return df == -1.0;
+ case BRW_REGISTER_TYPE_W:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0xffff;
case BRW_REGISTER_TYPE_D:
return d == -1;
case BRW_REGISTER_TYPE_Q:
case BRW_OPCODE_SHR:
case BRW_OPCODE_SUBB:
case BRW_OPCODE_XOR:
- case FS_OPCODE_CINTERP:
case FS_OPCODE_LINTERP:
return true;
default:
return writes_accumulator ||
(devinfo->gen < 6 &&
((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
- (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP &&
- opcode != FS_OPCODE_CINTERP)));
+ (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) ||
+ (opcode == FS_OPCODE_LINTERP &&
+ (!devinfo->has_pln || devinfo->gen <= 6));
}
bool
backend_instruction::has_side_effects() const
{
switch (opcode) {
- case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_SEND:
+ return send_has_side_effects;
+
+ case BRW_OPCODE_SYNC:
+ case VEC4_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
- case SHADER_OPCODE_BYTE_SCATTERED_WRITE:
+ case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
+ case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
- case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
- case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_MEMORY_FENCE:
+ case SHADER_OPCODE_INTERLOCK:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_FB_WRITE_LOGICAL:
+ case FS_OPCODE_REP_FB_WRITE:
case SHADER_OPCODE_BARRIER:
case TCS_OPCODE_URB_WRITE:
case TCS_OPCODE_RELEASE_INPUT:
case SHADER_OPCODE_RND_MODE:
+ case SHADER_OPCODE_FLOAT_CONTROL_MODE:
+ case FS_OPCODE_SCHEDULING_FENCE:
return true;
default:
return eot;
backend_instruction::is_volatile() const
{
switch (opcode) {
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_SEND:
+ return send_is_volatile;
+
+ case VEC4_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
- case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
- case SHADER_OPCODE_BYTE_SCATTERED_READ:
case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+ case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
+ case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
case VEC4_OPCODE_URB_READ:
}
void
-backend_shader::dump_instructions()
+backend_shader::dump_instructions() const
{
dump_instructions(NULL);
}
void
-backend_shader::dump_instructions(const char *name)
+backend_shader::dump_instructions(const char *name) const
{
FILE *file = stderr;
if (name && geteuid() != 0) {
{
if (this->cfg)
return;
- cfg = new(mem_ctx) cfg_t(&this->instructions);
+ cfg = new(mem_ctx) cfg_t(this, &this->instructions);
+}
+
+void
+backend_shader::invalidate_analysis(brw::analysis_dependency_class c)
+{
+ idom_analysis.invalidate(c);
}
extern "C" const unsigned *
const struct brw_tes_prog_key *key,
const struct brw_vue_map *input_vue_map,
struct brw_tes_prog_data *prog_data,
- const nir_shader *src_shader,
- struct gl_program *prog,
+ nir_shader *nir,
int shader_time_index,
+ struct brw_compile_stats *stats,
char **error_str)
{
const struct gen_device_info *devinfo = compiler->devinfo;
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
const unsigned *assembly;
- nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
nir->info.inputs_read = key->inputs_read;
nir->info.patch_inputs_read = key->patch_inputs_read;
- nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar);
+ brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
brw_nir_lower_tes_inputs(nir, input_vue_map);
- brw_nir_lower_vue_outputs(nir, is_scalar);
- nir = brw_postprocess_nir(nir, compiler, is_scalar);
+ brw_nir_lower_vue_outputs(nir);
+ brw_postprocess_nir(nir, compiler, is_scalar);
brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
nir->info.outputs_written,
}
if (is_scalar) {
- fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
- &prog_data->base.base, NULL, nir, 8,
+ fs_visitor v(compiler, log_data, mem_ctx, &key->base,
+ &prog_data->base.base, nir, 8,
shader_time_index, input_vue_map);
if (!v.run_tes()) {
if (error_str)
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
- fs_generator g(compiler, log_data, mem_ctx, (void *) key,
- &prog_data->base.base, v.promoted_constants, false,
- MESA_SHADER_TESS_EVAL);
+ fs_generator g(compiler, log_data, mem_ctx,
+ &prog_data->base.base, false, MESA_SHADER_TESS_EVAL);
if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
g.enable_debug(ralloc_asprintf(mem_ctx,
"%s tessellation evaluation shader %s",
nir->info.name));
}
- g.generate_code(v.cfg, 8);
+ g.generate_code(v.cfg, 8, v.shader_stats, stats);
assembly = g.get_assembly();
} else {
v.dump_instructions();
assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
- &prog_data->base, v.cfg);
+ &prog_data->base, v.cfg, stats);
}
return assembly;