#include "brw_fs.h"
#include "brw_nir.h"
#include "brw_vec4_tes.h"
-#include "common/gen_debug.h"
+#include "dev/gen_debug.h"
#include "main/uniforms.h"
#include "util/macros.h"
brw_type_for_base_type(const struct glsl_type *type)
{
switch (type->base_type) {
+ case GLSL_TYPE_FLOAT16:
+ return BRW_REGISTER_TYPE_HF;
case GLSL_TYPE_FLOAT:
return BRW_REGISTER_TYPE_F;
case GLSL_TYPE_INT:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_SUBROUTINE:
return BRW_REGISTER_TYPE_D;
+ case GLSL_TYPE_INT16:
+ return BRW_REGISTER_TYPE_W;
+ case GLSL_TYPE_INT8:
+ return BRW_REGISTER_TYPE_B;
case GLSL_TYPE_UINT:
return BRW_REGISTER_TYPE_UD;
+ case GLSL_TYPE_UINT16:
+ return BRW_REGISTER_TYPE_UW;
+ case GLSL_TYPE_UINT8:
+ return BRW_REGISTER_TYPE_UB;
case GLSL_TYPE_ARRAY:
return brw_type_for_base_type(type->fields.array);
case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT:
/* These should be overridden with the type of the member when
return BRW_REGISTER_TYPE_Q;
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
- case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_FUNCTION:
unreachable("not reached");
}
switch (op) {
case ir_binop_less:
return BRW_CONDITIONAL_L;
- case ir_binop_greater:
- return BRW_CONDITIONAL_G;
- case ir_binop_lequal:
- return BRW_CONDITIONAL_LE;
case ir_binop_gequal:
return BRW_CONDITIONAL_GE;
case ir_binop_equal:
}
bool
-brw_texture_offset(int *offsets, unsigned num_components, uint32_t *offset_bits)
+brw_texture_offset(const nir_tex_instr *tex, unsigned src,
+ uint32_t *offset_bits_out)
{
- if (!offsets) return false; /* nonconstant offset; caller will handle it. */
+ if (!nir_src_is_const(tex->src[src].src))
+ return false;
- /* offset out of bounds; caller will handle it. */
- for (unsigned i = 0; i < num_components; i++)
- if (offsets[i] > 7 || offsets[i] < -8)
- return false;
+ const unsigned num_components = nir_tex_instr_src_size(tex, src);
/* Combine all three offsets into a single unsigned dword:
*
* bits 7:4 - V Offset (Y component)
* bits 3:0 - R Offset (Z component)
*/
- *offset_bits = 0;
+ uint32_t offset_bits = 0;
for (unsigned i = 0; i < num_components; i++) {
+ int offset = nir_src_comp_as_int(tex->src[src].src, i);
+
+ /* offset out of bounds; caller will handle it. */
+ if (offset > 7 || offset < -8)
+ return false;
+
const unsigned shift = 4 * (2 - i);
- *offset_bits |= (offsets[i] << shift) & (0xF << shift);
+ offset_bits |= (offset << shift) & (0xF << shift);
}
+
+ *offset_bits_out = offset_bits;
+
return true;
}
brw_instruction_name(const struct gen_device_info *devinfo, enum opcode op)
{
switch (op) {
- case BRW_OPCODE_ILLEGAL ... BRW_OPCODE_NOP:
+ case 0 ... NUM_BRW_OPCODES - 1:
/* The DO instruction doesn't exist on Gen6+, but we use it to mark the
* start of a loop in the IR.
*/
case SHADER_OPCODE_COS:
return "cos";
+ case SHADER_OPCODE_SEND:
+ return "send";
+
+ case SHADER_OPCODE_UNDEF:
+ return "undef";
+
case SHADER_OPCODE_TEX:
return "tex";
case SHADER_OPCODE_TEX_LOGICAL:
case SHADER_OPCODE_SAMPLEINFO_LOGICAL:
return "sampleinfo_logical";
+ case SHADER_OPCODE_IMAGE_SIZE_LOGICAL:
+ return "image_size_logical";
+
case SHADER_OPCODE_SHADER_TIME_ADD:
return "shader_time_add";
- case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case VEC4_OPCODE_UNTYPED_ATOMIC:
return "untyped_atomic";
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
return "untyped_atomic_logical";
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ return "untyped_atomic_float_logical";
+ case VEC4_OPCODE_UNTYPED_SURFACE_READ:
return "untyped_surface_read";
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
return "untyped_surface_read_logical";
- case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
return "untyped_surface_write";
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
return "untyped_surface_write_logical";
- case SHADER_OPCODE_TYPED_ATOMIC:
- return "typed_atomic";
+ case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
+ return "a64_untyped_read_logical";
+ case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
+ return "a64_untyped_write_logical";
+ case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
+ return "a64_byte_scattered_read_logical";
+ case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
+ return "a64_byte_scattered_write_logical";
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+ return "a64_untyped_atomic_logical";
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
+ return "a64_untyped_atomic_int64_logical";
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ return "a64_untyped_atomic_float_logical";
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
return "typed_atomic_logical";
- case SHADER_OPCODE_TYPED_SURFACE_READ:
- return "typed_surface_read";
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
return "typed_surface_read_logical";
- case SHADER_OPCODE_TYPED_SURFACE_WRITE:
- return "typed_surface_write";
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
return "typed_surface_write_logical";
case SHADER_OPCODE_MEMORY_FENCE:
return "memory_fence";
+ case FS_OPCODE_SCHEDULING_FENCE:
+ return "scheduling_fence";
+ case SHADER_OPCODE_INTERLOCK:
+ /* For an interlock we actually issue a memory fence via sendc. */
+ return "interlock";
+
+ case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+ return "byte_scattered_read_logical";
+ case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+ return "byte_scattered_write_logical";
+ case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
+ return "dword_scattered_read_logical";
+ case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
+ return "dword_scattered_write_logical";
case SHADER_OPCODE_LOAD_PAYLOAD:
return "load_payload";
case SHADER_OPCODE_FIND_LIVE_CHANNEL:
return "find_live_channel";
+ case FS_OPCODE_LOAD_LIVE_CHANNELS:
+ return "load_live_channels";
+
case SHADER_OPCODE_BROADCAST:
return "broadcast";
+ case SHADER_OPCODE_SHUFFLE:
+ return "shuffle";
+ case SHADER_OPCODE_SEL_EXEC:
+ return "sel_exec";
+ case SHADER_OPCODE_QUAD_SWIZZLE:
+ return "quad_swizzle";
+ case SHADER_OPCODE_CLUSTER_BROADCAST:
+ return "cluster_broadcast";
+
+ case SHADER_OPCODE_GET_BUFFER_SIZE:
+ return "get_buffer_size";
case VEC4_OPCODE_MOV_BYTES:
return "mov_bytes";
return "pack_bytes";
case VEC4_OPCODE_UNPACK_UNIFORM:
return "unpack_uniform";
- case VEC4_OPCODE_FROM_DOUBLE:
- return "double_to_single";
+ case VEC4_OPCODE_DOUBLE_TO_F32:
+ return "double_to_f32";
+ case VEC4_OPCODE_DOUBLE_TO_D32:
+ return "double_to_d32";
+ case VEC4_OPCODE_DOUBLE_TO_U32:
+ return "double_to_u32";
case VEC4_OPCODE_TO_DOUBLE:
return "single_to_double";
case VEC4_OPCODE_PICK_LOW_32BIT:
case FS_OPCODE_DDY_FINE:
return "ddy_fine";
- case FS_OPCODE_CINTERP:
- return "cinterp";
case FS_OPCODE_LINTERP:
return "linterp";
case FS_OPCODE_PIXEL_Y:
return "pixel_y";
- case FS_OPCODE_GET_BUFFER_SIZE:
- return "fs_get_buffer_size";
-
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD:
return "uniform_pull_const";
case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7:
return "uniform_pull_const_gen7";
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN4:
return "varying_pull_const_gen4";
- case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7:
- return "varying_pull_const_gen7";
case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_LOGICAL:
return "varying_pull_const_logical";
- case FS_OPCODE_MOV_DISPATCH_TO_FLAGS:
- return "mov_dispatch_to_flags";
case FS_OPCODE_DISCARD_JUMP:
return "discard_jump";
case FS_OPCODE_PACK_HALF_2x16_SPLIT:
return "pack_half_2x16_split";
- case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X:
- return "unpack_half_2x16_split_x";
- case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y:
- return "unpack_half_2x16_split_y";
case FS_OPCODE_PLACEHOLDER_HALT:
return "placeholder_halt";
case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
return "set_simd4x2_header_gen9";
- case VS_OPCODE_GET_BUFFER_SIZE:
- return "vs_get_buffer_size";
-
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
return "unpack_flags_simd4x2";
return "barrier";
case SHADER_OPCODE_MULH:
return "mulh";
+ case SHADER_OPCODE_ISUB_SAT:
+ return "isub_sat";
+ case SHADER_OPCODE_USUB_SAT:
+ return "usub_sat";
case SHADER_OPCODE_MOV_INDIRECT:
return "mov_indirect";
return "tes_add_indirect_urb_offset";
case TES_OPCODE_GET_PRIMITIVE_ID:
return "tes_get_primitive_id";
+
+ case SHADER_OPCODE_RND_MODE:
+ return "rnd_mode";
+ case SHADER_OPCODE_FLOAT_CONTROL_MODE:
+ return "float_control_mode";
}
unreachable("not reached");
unreachable("unimplemented: saturate vector immediate");
case BRW_REGISTER_TYPE_HF:
unreachable("unimplemented: saturate HF immediate");
+ case BRW_REGISTER_TYPE_NF:
+ unreachable("no NF immediates");
}
if (size < 8) {
reg->d = -reg->d;
return true;
case BRW_REGISTER_TYPE_W:
- case BRW_REGISTER_TYPE_UW:
- reg->d = -(int16_t)reg->ud;
+ case BRW_REGISTER_TYPE_UW: {
+ uint16_t value = -(int16_t)reg->ud;
+ reg->ud = value | (uint32_t)value << 16;
return true;
+ }
case BRW_REGISTER_TYPE_F:
reg->f = -reg->f;
return true;
case BRW_REGISTER_TYPE_V:
assert(!"unimplemented: negate UV/V immediate");
case BRW_REGISTER_TYPE_HF:
- assert(!"unimplemented: negate HF immediate");
+ reg->ud ^= 0x80008000;
+ return true;
+ case BRW_REGISTER_TYPE_NF:
+ unreachable("no NF immediates");
}
return false;
case BRW_REGISTER_TYPE_D:
reg->d = abs(reg->d);
return true;
- case BRW_REGISTER_TYPE_W:
- reg->d = abs((int16_t)reg->ud);
+ case BRW_REGISTER_TYPE_W: {
+ uint16_t value = abs((int16_t)reg->ud);
+ reg->ud = value | (uint32_t)value << 16;
return true;
+ }
case BRW_REGISTER_TYPE_F:
reg->f = fabsf(reg->f);
return true;
case BRW_REGISTER_TYPE_V:
assert(!"unimplemented: abs V immediate");
case BRW_REGISTER_TYPE_HF:
- assert(!"unimplemented: abs HF immediate");
+ reg->ud &= ~0x80008000;
+ return true;
+ case BRW_REGISTER_TYPE_NF:
+ unreachable("no NF immediates");
}
return false;
}
-/**
- * Get the appropriate atomic op for an image atomic intrinsic.
- */
-unsigned
-get_atomic_counter_op(nir_intrinsic_op op)
-{
- switch (op) {
- case nir_intrinsic_atomic_counter_inc:
- return BRW_AOP_INC;
- case nir_intrinsic_atomic_counter_dec:
- return BRW_AOP_PREDEC;
- case nir_intrinsic_atomic_counter_add:
- return BRW_AOP_ADD;
- case nir_intrinsic_atomic_counter_min:
- return BRW_AOP_UMIN;
- case nir_intrinsic_atomic_counter_max:
- return BRW_AOP_UMAX;
- case nir_intrinsic_atomic_counter_and:
- return BRW_AOP_AND;
- case nir_intrinsic_atomic_counter_or:
- return BRW_AOP_OR;
- case nir_intrinsic_atomic_counter_xor:
- return BRW_AOP_XOR;
- case nir_intrinsic_atomic_counter_exchange:
- return BRW_AOP_MOV;
- case nir_intrinsic_atomic_counter_comp_swap:
- return BRW_AOP_CMPWR;
- default:
- unreachable("Not reachable.");
- }
-}
-
backend_shader::backend_shader(const struct brw_compiler *compiler,
void *log_data,
void *mem_ctx,
nir(shader),
stage_prog_data(stage_prog_data),
mem_ctx(mem_ctx),
- cfg(NULL),
- stage(shader->stage)
+ cfg(NULL), idom_analysis(this),
+ stage(shader->info.stage)
{
debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
stage_name = _mesa_shader_stage_to_string(stage);
stage_abbrev = _mesa_shader_stage_to_abbrev(stage);
}
+backend_shader::~backend_shader()
+{
+}
+
bool
backend_reg::equals(const backend_reg &r) const
{
return brw_regs_equal(this, &r) && offset == r.offset;
}
+bool
+backend_reg::negative_equals(const backend_reg &r) const
+{
+ return brw_regs_negative_equal(this, &r) && offset == r.offset;
+}
+
bool
backend_reg::is_zero() const
{
if (file != IMM)
return false;
+ assert(type_sz(type) > 1);
+
switch (type) {
+ case BRW_REGISTER_TYPE_HF:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0 || (d & 0xffff) == 0x8000;
case BRW_REGISTER_TYPE_F:
return f == 0;
case BRW_REGISTER_TYPE_DF:
return df == 0;
+ case BRW_REGISTER_TYPE_W:
+ case BRW_REGISTER_TYPE_UW:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0;
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_UD:
return d == 0;
if (file != IMM)
return false;
+ assert(type_sz(type) > 1);
+
switch (type) {
+ case BRW_REGISTER_TYPE_HF:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0x3c00;
case BRW_REGISTER_TYPE_F:
return f == 1.0f;
case BRW_REGISTER_TYPE_DF:
return df == 1.0;
+ case BRW_REGISTER_TYPE_W:
+ case BRW_REGISTER_TYPE_UW:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 1;
case BRW_REGISTER_TYPE_D:
case BRW_REGISTER_TYPE_UD:
return d == 1;
if (file != IMM)
return false;
+ assert(type_sz(type) > 1);
+
switch (type) {
+ case BRW_REGISTER_TYPE_HF:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0xbc00;
case BRW_REGISTER_TYPE_F:
return f == -1.0;
case BRW_REGISTER_TYPE_DF:
return df == -1.0;
+ case BRW_REGISTER_TYPE_W:
+ assert((d & 0xffff) == ((d >> 16) & 0xffff));
+ return (d & 0xffff) == 0xffff;
case BRW_REGISTER_TYPE_D:
return d == -1;
case BRW_REGISTER_TYPE_Q:
case BRW_OPCODE_FBH:
case BRW_OPCODE_FBL:
case BRW_OPCODE_SUBB:
+ case SHADER_OPCODE_BROADCAST:
+ case SHADER_OPCODE_CLUSTER_BROADCAST:
+ case SHADER_OPCODE_MOV_INDIRECT:
return false;
default:
return true;
case BRW_OPCODE_SHR:
case BRW_OPCODE_SUBB:
case BRW_OPCODE_XOR:
- case FS_OPCODE_CINTERP:
case FS_OPCODE_LINTERP:
return true;
default:
return writes_accumulator ||
(devinfo->gen < 6 &&
((opcode >= BRW_OPCODE_ADD && opcode < BRW_OPCODE_NOP) ||
- (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP &&
- opcode != FS_OPCODE_CINTERP)));
+ (opcode >= FS_OPCODE_DDX_COARSE && opcode <= FS_OPCODE_LINTERP))) ||
+ (opcode == FS_OPCODE_LINTERP &&
+ (!devinfo->has_pln || devinfo->gen <= 6));
}
bool
backend_instruction::has_side_effects() const
{
switch (opcode) {
- case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case SHADER_OPCODE_SEND:
+ return send_has_side_effects;
+
+ case BRW_OPCODE_SYNC:
+ case VEC4_OPCODE_UNTYPED_ATOMIC:
case SHADER_OPCODE_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_UNTYPED_ATOMIC_FLOAT_LOGICAL:
case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
- case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
case SHADER_OPCODE_UNTYPED_SURFACE_WRITE_LOGICAL:
- case SHADER_OPCODE_TYPED_ATOMIC:
+ case SHADER_OPCODE_A64_UNTYPED_WRITE_LOGICAL:
+ case SHADER_OPCODE_A64_BYTE_SCATTERED_WRITE_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_INT64_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_ATOMIC_FLOAT_LOGICAL:
+ case SHADER_OPCODE_BYTE_SCATTERED_WRITE_LOGICAL:
+ case SHADER_OPCODE_DWORD_SCATTERED_WRITE_LOGICAL:
case SHADER_OPCODE_TYPED_ATOMIC_LOGICAL:
- case SHADER_OPCODE_TYPED_SURFACE_WRITE:
case SHADER_OPCODE_TYPED_SURFACE_WRITE_LOGICAL:
case SHADER_OPCODE_MEMORY_FENCE:
+ case SHADER_OPCODE_INTERLOCK:
case SHADER_OPCODE_URB_WRITE_SIMD8:
case SHADER_OPCODE_URB_WRITE_SIMD8_PER_SLOT:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED:
case SHADER_OPCODE_URB_WRITE_SIMD8_MASKED_PER_SLOT:
case FS_OPCODE_FB_WRITE:
case FS_OPCODE_FB_WRITE_LOGICAL:
+ case FS_OPCODE_REP_FB_WRITE:
case SHADER_OPCODE_BARRIER:
case TCS_OPCODE_URB_WRITE:
case TCS_OPCODE_RELEASE_INPUT:
+ case SHADER_OPCODE_RND_MODE:
+ case SHADER_OPCODE_FLOAT_CONTROL_MODE:
+ case FS_OPCODE_SCHEDULING_FENCE:
return true;
default:
- return false;
+ return eot;
}
}
backend_instruction::is_volatile() const
{
switch (opcode) {
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case SHADER_OPCODE_SEND:
+ return send_is_volatile;
+
+ case VEC4_OPCODE_UNTYPED_SURFACE_READ:
case SHADER_OPCODE_UNTYPED_SURFACE_READ_LOGICAL:
- case SHADER_OPCODE_TYPED_SURFACE_READ:
case SHADER_OPCODE_TYPED_SURFACE_READ_LOGICAL:
+ case SHADER_OPCODE_BYTE_SCATTERED_READ_LOGICAL:
+ case SHADER_OPCODE_DWORD_SCATTERED_READ_LOGICAL:
+ case SHADER_OPCODE_A64_UNTYPED_READ_LOGICAL:
+ case SHADER_OPCODE_A64_BYTE_SCATTERED_READ_LOGICAL:
case SHADER_OPCODE_URB_READ_SIMD8:
case SHADER_OPCODE_URB_READ_SIMD8_PER_SLOT:
case VEC4_OPCODE_URB_READ:
}
void
-backend_shader::dump_instructions()
+backend_shader::dump_instructions() const
{
dump_instructions(NULL);
}
void
-backend_shader::dump_instructions(const char *name)
+backend_shader::dump_instructions(const char *name) const
{
FILE *file = stderr;
if (name && geteuid() != 0) {
{
if (this->cfg)
return;
- cfg = new(mem_ctx) cfg_t(&this->instructions);
+ cfg = new(mem_ctx) cfg_t(this, &this->instructions);
+}
+
+void
+backend_shader::invalidate_analysis(brw::analysis_dependency_class c)
+{
+ idom_analysis.invalidate(c);
}
extern "C" const unsigned *
const struct brw_tes_prog_key *key,
const struct brw_vue_map *input_vue_map,
struct brw_tes_prog_data *prog_data,
- const nir_shader *src_shader,
- struct gl_program *prog,
+ nir_shader *nir,
int shader_time_index,
- unsigned *final_assembly_size,
+ struct brw_compile_stats *stats,
char **error_str)
{
const struct gen_device_info *devinfo = compiler->devinfo;
const bool is_scalar = compiler->scalar_stage[MESA_SHADER_TESS_EVAL];
+ const unsigned *assembly;
- nir_shader *nir = nir_shader_clone(mem_ctx, src_shader);
- nir->info->inputs_read = key->inputs_read;
- nir->info->patch_inputs_read = key->patch_inputs_read;
+ nir->info.inputs_read = key->inputs_read;
+ nir->info.patch_inputs_read = key->patch_inputs_read;
- nir = brw_nir_apply_sampler_key(nir, compiler, &key->tex, is_scalar);
+ brw_nir_apply_key(nir, compiler, &key->base, 8, is_scalar);
brw_nir_lower_tes_inputs(nir, input_vue_map);
- brw_nir_lower_vue_outputs(nir, is_scalar);
- nir = brw_postprocess_nir(nir, compiler, is_scalar);
+ brw_nir_lower_vue_outputs(nir);
+ brw_postprocess_nir(nir, compiler, is_scalar);
brw_compute_vue_map(devinfo, &prog_data->base.vue_map,
- nir->info->outputs_written,
- nir->info->separate_shader);
+ nir->info.outputs_written,
+ nir->info.separate_shader);
unsigned output_size_bytes = prog_data->base.vue_map.num_slots * 4 * 4;
}
prog_data->base.clip_distance_mask =
- ((1 << nir->info->clip_distance_array_size) - 1);
+ ((1 << nir->info.clip_distance_array_size) - 1);
prog_data->base.cull_distance_mask =
- ((1 << nir->info->cull_distance_array_size) - 1) <<
- nir->info->clip_distance_array_size;
+ ((1 << nir->info.cull_distance_array_size) - 1) <<
+ nir->info.clip_distance_array_size;
/* URB entry sizes are stored as a multiple of 64 bytes. */
prog_data->base.urb_entry_size = ALIGN(output_size_bytes, 64) / 64;
+
+ /* On Cannonlake software shall not program an allocation size that
+ * specifies a size that is a multiple of 3 64B (512-bit) cachelines.
+ */
+ if (devinfo->gen == 10 &&
+ prog_data->base.urb_entry_size % 3 == 0)
+ prog_data->base.urb_entry_size++;
+
prog_data->base.urb_read_length = 0;
STATIC_ASSERT(BRW_TESS_PARTITIONING_INTEGER == TESS_SPACING_EQUAL - 1);
TESS_SPACING_FRACTIONAL_EVEN - 1);
prog_data->partitioning =
- (enum brw_tess_partitioning) (nir->info->tess.spacing - 1);
+ (enum brw_tess_partitioning) (nir->info.tess.spacing - 1);
- switch (nir->info->tess.primitive_mode) {
+ switch (nir->info.tess.primitive_mode) {
case GL_QUADS:
prog_data->domain = BRW_TESS_DOMAIN_QUAD;
break;
unreachable("invalid domain shader primitive mode");
}
- if (nir->info->tess.point_mode) {
+ if (nir->info.tess.point_mode) {
prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_POINT;
- } else if (nir->info->tess.primitive_mode == GL_ISOLINES) {
+ } else if (nir->info.tess.primitive_mode == GL_ISOLINES) {
prog_data->output_topology = BRW_TESS_OUTPUT_TOPOLOGY_LINE;
} else {
/* Hardware winding order is backwards from OpenGL */
prog_data->output_topology =
- nir->info->tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW
+ nir->info.tess.ccw ? BRW_TESS_OUTPUT_TOPOLOGY_TRI_CW
: BRW_TESS_OUTPUT_TOPOLOGY_TRI_CCW;
}
}
if (is_scalar) {
- fs_visitor v(compiler, log_data, mem_ctx, (void *) key,
- &prog_data->base.base, NULL, nir, 8,
+ fs_visitor v(compiler, log_data, mem_ctx, &key->base,
+ &prog_data->base.base, nir, 8,
shader_time_index, input_vue_map);
if (!v.run_tes()) {
if (error_str)
prog_data->base.base.dispatch_grf_start_reg = v.payload.num_regs;
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
- fs_generator g(compiler, log_data, mem_ctx, (void *) key,
- &prog_data->base.base, v.promoted_constants, false,
- MESA_SHADER_TESS_EVAL);
+ fs_generator g(compiler, log_data, mem_ctx,
+ &prog_data->base.base, false, MESA_SHADER_TESS_EVAL);
if (unlikely(INTEL_DEBUG & DEBUG_TES)) {
g.enable_debug(ralloc_asprintf(mem_ctx,
"%s tessellation evaluation shader %s",
- nir->info->label ? nir->info->label
+ nir->info.label ? nir->info.label
: "unnamed",
- nir->info->name));
+ nir->info.name));
}
- g.generate_code(v.cfg, 8);
+ g.generate_code(v.cfg, 8, v.shader_stats, stats);
- return g.get_assembly(final_assembly_size);
+ assembly = g.get_assembly();
} else {
brw::vec4_tes_visitor v(compiler, log_data, key, prog_data,
nir, mem_ctx, shader_time_index);
if (unlikely(INTEL_DEBUG & DEBUG_TES))
v.dump_instructions();
- return brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
- &prog_data->base, v.cfg,
- final_assembly_size);
+ assembly = brw_vec4_generate_assembly(compiler, log_data, mem_ctx, nir,
+ &prog_data->base, v.cfg, stats);
}
+
+ return assembly;
}