this->predicate = BRW_PREDICATE_NONE;
this->predicate_inverse = false;
this->target = 0;
- this->regs_written = (dst.file == BAD_FILE ? 0 : 1);
this->shadow_compare = false;
this->ir = NULL;
this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
this->mlen = 0;
this->base_mrf = 0;
this->offset = 0;
+ this->exec_size = 8;
+ this->group = 0;
+ this->size_written = (dst.file == BAD_FILE ?
+ 0 : this->exec_size * type_sz(dst.type));
this->annotation = NULL;
}
ALU2_ACC(ADDC)
ALU2_ACC(SUBB)
ALU2(MAC)
+ALU1(DIM)
/** Gen4 predicated IF. */
vec4_instruction *
emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
}
-/**
- * Returns the minimum number of vec4 elements needed to pack a type.
- *
- * For simple types, it will return 1 (a single vec4); for matrices, the
- * number of columns; for array and struct, the sum of the vec4_size of
- * each of its elements; and for sampler and atomic, zero.
- *
- * This method is useful to calculate how much register space is needed to
- * store a particular type.
+/*
+ * Returns the minimum number of vec4 (as_vec4 == true) or dvec4 (as_vec4 ==
+ * false) elements needed to pack a type.
*/
-extern "C" int
-type_size_vec4(const struct glsl_type *type)
+static int
+type_size_xvec4(const struct glsl_type *type, bool as_vec4)
{
unsigned int i;
int size;
case GLSL_TYPE_INT:
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_BOOL:
+ case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_UINT64:
+ case GLSL_TYPE_INT64:
if (type->is_matrix()) {
- return type->matrix_columns;
+ const glsl_type *col_type = type->column_type();
+ unsigned col_slots =
+ (as_vec4 && col_type->is_dual_slot()) ? 2 : 1;
+ return type->matrix_columns * col_slots;
} else {
- /* Regardless of size of vector, it gets a vec4. This is bad
- * packing for things like floats, but otherwise arrays become a
- * mess. Hopefully a later pass over the code can pack scalars
- * down if appropriate.
- */
- return 1;
+ /* Regardless of size of vector, it gets a vec4. This is bad
+ * packing for things like floats, but otherwise arrays become a
+ * mess. Hopefully a later pass over the code can pack scalars
+ * down if appropriate.
+ */
+ return (as_vec4 && type->is_dual_slot()) ? 2 : 1;
}
case GLSL_TYPE_ARRAY:
assert(type->length > 0);
- return type_size_vec4(type->fields.array) * type->length;
+ return type_size_xvec4(type->fields.array, as_vec4) * type->length;
case GLSL_TYPE_STRUCT:
size = 0;
for (i = 0; i < type->length; i++) {
- size += type_size_vec4(type->fields.structure[i].type);
+ size += type_size_xvec4(type->fields.structure[i].type, as_vec4);
}
return size;
case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_IMAGE:
return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
case GLSL_TYPE_VOID:
- case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_FUNCTION:
return 0;
}
+/**
+ * Returns the minimum number of vec4 elements needed to pack a type.
+ *
+ * For simple types, it will return 1 (a single vec4); for matrices, the
+ * number of columns; for array and struct, the sum of the vec4_size of
+ * each of its elements; and for sampler and atomic, zero.
+ *
+ * This method is useful to calculate how much register space is needed to
+ * store a particular type.
+ */
+extern "C" int
+type_size_vec4(const struct glsl_type *type)
+{
+ return type_size_xvec4(type, true);
+}
+
+/**
+ * Returns the minimum number of dvec4 elements needed to pack a type.
+ *
+ * For simple types, it will return 1 (a single dvec4); for matrices, the
+ * number of columns; for array and struct, the sum of the dvec4_size of
+ * each of its elements; and for sampler and atomic, zero.
+ *
+ * This method is useful to calculate how much register space is needed to
+ * store a particular type.
+ *
+ * Measuring double-precision vertex inputs as dvec4 is required because
+ * ARB_vertex_attrib_64bit states that these uses the same number of locations
+ * than the single-precision version. That is, two consecutives dvec4 would be
+ * located in location "x" and location "x+1", not "x+2".
+ *
+ * In order to map vec4/dvec4 vertex inputs in the proper ATTRs,
+ * remap_vs_attrs() will take in account both the location and also if the
+ * type fits in one or two vec4 slots.
+ */
+extern "C" int
+type_size_dvec4(const struct glsl_type *type)
+{
+ return type_size_xvec4(type, false);
+}
+
src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
{
init();
vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
src_reg src0, src_reg src1)
{
- vec4_instruction *inst;
-
- if (devinfo->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->conditional_mod = conditionalmod;
- } else {
- emit(CMP(dst, src0, src1, conditionalmod));
-
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-
+ vec4_instruction *inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+ inst->conditional_mod = conditionalmod;
return inst;
}
else
emit(pull);
- dst_reg index_reg = retype(offset(dst_reg(header), 1),
+ dst_reg index_reg = retype(byte_offset(dst_reg(header), REG_SIZE),
offset_reg.type);
pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
pull->mlen = 2;
pull->header_size = 1;
} else if (devinfo->gen >= 7) {
- dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+ dst_reg grf_offset = dst_reg(this, glsl_type::uint_type);
grf_offset.type = offset_reg.type;
src_reg
vec4_visitor::emit_mcs_fetch(const glsl_type *coordinate_type,
- src_reg coordinate, src_reg sampler)
+ src_reg coordinate, src_reg surface)
{
vec4_instruction *inst =
new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
dst_reg(this, glsl_type::uvec4_type));
inst->base_mrf = 2;
- inst->src[1] = sampler;
+ inst->src[1] = surface;
+ inst->src[2] = surface;
int param_base;
const glsl_type *dest_type,
src_reg coordinate,
int coord_components,
- src_reg shadow_comparitor,
+ src_reg shadow_comparator,
src_reg lod, src_reg lod2,
src_reg sample_index,
uint32_t constant_offset,
src_reg offset_value,
src_reg mcs,
- bool is_cube_array,
uint32_t surface,
src_reg surface_reg,
- uint32_t sampler,
src_reg sampler_reg)
{
/* The sampler can only meaningfully compute LOD for fragment shader
inst->base_mrf = 2;
inst->mlen = inst->header_size;
inst->dst.writemask = WRITEMASK_XYZW;
- inst->shadow_compare = shadow_comparitor.file != BAD_FILE;
+ inst->shadow_compare = shadow_comparator.file != BAD_FILE;
inst->src[1] = surface_reg;
inst->src[2] = sampler_reg;
emit(MOV(dst_reg(MRF, param_base, coordinate.type, zero_mask),
brw_imm_d(0)));
}
- /* Load the shadow comparitor */
- if (shadow_comparitor.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
- emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparitor.type,
+ /* Load the shadow comparator */
+ if (shadow_comparator.file != BAD_FILE && op != ir_txd && (op != ir_tg4 || offset_value.file == BAD_FILE)) {
+ emit(MOV(dst_reg(MRF, param_base + 1, shadow_comparator.type,
WRITEMASK_X),
- shadow_comparitor));
+ shadow_comparator));
inst->mlen++;
}
int mrf, writemask;
if (devinfo->gen >= 5) {
mrf = param_base + 1;
- if (shadow_comparitor.file != BAD_FILE) {
+ if (shadow_comparator.file != BAD_FILE) {
writemask = WRITEMASK_Y;
/* mlen already incremented */
} else {
emit(MOV(dst_reg(MRF, param_base + 1, type, WRITEMASK_YW), lod2));
inst->mlen++;
- if (dest_type->vector_elements == 3 || shadow_comparitor.file != BAD_FILE) {
+ if (dest_type->vector_elements == 3 || shadow_comparator.file != BAD_FILE) {
lod.swizzle = BRW_SWIZZLE_ZZZZ;
lod2.swizzle = BRW_SWIZZLE_ZZZZ;
emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_X), lod));
emit(MOV(dst_reg(MRF, param_base + 2, type, WRITEMASK_Y), lod2));
inst->mlen++;
- if (shadow_comparitor.file != BAD_FILE) {
+ if (shadow_comparator.file != BAD_FILE) {
emit(MOV(dst_reg(MRF, param_base + 2,
- shadow_comparitor.type, WRITEMASK_Z),
- shadow_comparitor));
+ shadow_comparator.type, WRITEMASK_Z),
+ shadow_comparator));
}
}
} else /* devinfo->gen == 4 */ {
inst->mlen += 2;
}
} else if (op == ir_tg4 && offset_value.file != BAD_FILE) {
- if (shadow_comparitor.file != BAD_FILE) {
- emit(MOV(dst_reg(MRF, param_base, shadow_comparitor.type, WRITEMASK_W),
- shadow_comparitor));
+ if (shadow_comparator.file != BAD_FILE) {
+ emit(MOV(dst_reg(MRF, param_base, shadow_comparator.type, WRITEMASK_W),
+ shadow_comparator));
}
emit(MOV(dst_reg(MRF, param_base + 1, glsl_type::ivec2_type, WRITEMASK_XY),
/* fixup num layers (z) for cube arrays: hardware returns faces * layers;
* spec requires layers.
*/
- if (op == ir_txs && is_cube_array) {
- emit_math(SHADER_OPCODE_INT_QUOTIENT,
- writemask(inst->dst, WRITEMASK_Z),
- src_reg(inst->dst), brw_imm_d(6));
+ if (op == ir_txs && devinfo->gen < 7) {
+ /* Gen4-6 return 0 instead of 1 for single layer surfaces. */
+ emit_minmax(BRW_CONDITIONAL_GE, writemask(inst->dst, WRITEMASK_Z),
+ src_reg(inst->dst), brw_imm_d(1));
}
if (devinfo->gen == 6 && op == ir_tg4) {
}
void
-vec4_visitor::gs_emit_vertex(int stream_id)
+vec4_visitor::gs_emit_vertex(int /* stream_id */)
{
unreachable("not reached");
}
unreachable("not reached");
}
-void
-vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
- dst_reg dst, src_reg surf_offset,
- src_reg src0, src_reg src1)
-{
- unsigned mlen = 1 + (src0.file != BAD_FILE) + (src1.file != BAD_FILE);
- src_reg src_payload(this, glsl_type::uint_type, mlen);
- dst_reg payload(src_payload);
- payload.writemask = WRITEMASK_X;
-
- /* Set the atomic operation offset. */
- emit(MOV(offset(payload, 0), surf_offset));
- unsigned i = 1;
-
- /* Set the atomic operation arguments. */
- if (src0.file != BAD_FILE) {
- emit(MOV(offset(payload, i), src0));
- i++;
- }
-
- if (src1.file != BAD_FILE) {
- emit(MOV(offset(payload, i), src1));
- i++;
- }
-
- /* Emit the instruction. Note that this maps to the normal SIMD8
- * untyped atomic message on Ivy Bridge, but that's OK because
- * unused channels will be masked out.
- */
- vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_ATOMIC, dst,
- src_payload,
- brw_imm_ud(surf_index), brw_imm_ud(atomic_op));
- inst->mlen = mlen;
-}
-
-void
-vec4_visitor::emit_untyped_surface_read(unsigned surf_index, dst_reg dst,
- src_reg surf_offset)
-{
- dst_reg offset(this, glsl_type::uint_type);
- offset.writemask = WRITEMASK_X;
-
- /* Set the surface read offset. */
- emit(MOV(offset, surf_offset));
-
- /* Emit the instruction. Note that this maps to the normal SIMD8
- * untyped surface read message, but that's OK because unused
- * channels will be masked out.
- */
- vec4_instruction *inst = emit(SHADER_OPCODE_UNTYPED_SURFACE_READ, dst,
- src_reg(offset),
- brw_imm_ud(surf_index), brw_imm_d(1));
- inst->mlen = 1;
-}
-
void
vec4_visitor::emit_ndc_computation()
{
- if (output_reg[VARYING_SLOT_POS].file == BAD_FILE)
+ if (output_reg[VARYING_SLOT_POS][0].file == BAD_FILE)
return;
/* Get the position */
- src_reg pos = src_reg(output_reg[VARYING_SLOT_POS]);
+ src_reg pos = src_reg(output_reg[VARYING_SLOT_POS][0]);
/* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
- output_reg[BRW_VARYING_SLOT_NDC] = ndc;
+ output_reg[BRW_VARYING_SLOT_NDC][0] = ndc;
+ output_num_components[BRW_VARYING_SLOT_NDC][0] = 4;
current_annotation = "NDC";
dst_reg ndc_w = ndc;
{
if (devinfo->gen < 6 &&
((prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) ||
- output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE ||
+ output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE ||
devinfo->has_negative_rhw_bug)) {
dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
dst_reg header1_w = header1;
emit(MOV(header1, brw_imm_ud(0u)));
if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
- src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ]);
+ src_reg psiz = src_reg(output_reg[VARYING_SLOT_PSIZ][0]);
current_annotation = "Point size";
emit(MUL(header1_w, psiz, brw_imm_f((float)(1 << 11))));
emit(AND(header1_w, src_reg(header1_w), brw_imm_d(0x7ff << 8)));
}
- if (output_reg[VARYING_SLOT_CLIP_DIST0].file != BAD_FILE) {
+ if (output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE) {
current_annotation = "Clipping flags";
dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
- emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
+ emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0));
emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
- emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
+ emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0));
emit(SHL(flags1, src_reg(flags1), brw_imm_d(4)));
emit(OR(header1_w, src_reg(header1_w), src_reg(flags1)));
* clipped against all fixed planes.
*/
if (devinfo->has_negative_rhw_bug &&
- output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE) {
- src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC]);
+ output_reg[BRW_VARYING_SLOT_NDC][0].file != BAD_FILE) {
+ src_reg ndc_w = src_reg(output_reg[BRW_VARYING_SLOT_NDC][0]);
ndc_w.swizzle = BRW_SWIZZLE_WWWW;
emit(CMP(dst_null_f(), ndc_w, brw_imm_f(0.0f), BRW_CONDITIONAL_L));
vec4_instruction *inst;
inst = emit(OR(header1_w, src_reg(header1_w), brw_imm_ud(1u << 6)));
inst->predicate = BRW_PREDICATE_NORMAL;
- output_reg[BRW_VARYING_SLOT_NDC].type = BRW_REGISTER_TYPE_F;
- inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC], brw_imm_f(0.0f)));
+ output_reg[BRW_VARYING_SLOT_NDC][0].type = BRW_REGISTER_TYPE_F;
+ inst = emit(MOV(output_reg[BRW_VARYING_SLOT_NDC][0], brw_imm_f(0.0f)));
inst->predicate = BRW_PREDICATE_NORMAL;
}
if (prog_data->vue_map.slots_valid & VARYING_BIT_PSIZ) {
dst_reg reg_w = reg;
reg_w.writemask = WRITEMASK_W;
- src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ]);
+ src_reg reg_as_src = src_reg(output_reg[VARYING_SLOT_PSIZ][0]);
reg_as_src.type = reg_w.type;
reg_as_src.swizzle = brw_swizzle_for_size(1);
emit(MOV(reg_w, reg_as_src));
dst_reg reg_y = reg;
reg_y.writemask = WRITEMASK_Y;
reg_y.type = BRW_REGISTER_TYPE_D;
- output_reg[VARYING_SLOT_LAYER].type = reg_y.type;
- emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER])));
+ output_reg[VARYING_SLOT_LAYER][0].type = reg_y.type;
+ emit(MOV(reg_y, src_reg(output_reg[VARYING_SLOT_LAYER][0])));
}
if (prog_data->vue_map.slots_valid & VARYING_BIT_VIEWPORT) {
dst_reg reg_z = reg;
reg_z.writemask = WRITEMASK_Z;
reg_z.type = BRW_REGISTER_TYPE_D;
- output_reg[VARYING_SLOT_VIEWPORT].type = reg_z.type;
- emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT])));
+ output_reg[VARYING_SLOT_VIEWPORT][0].type = reg_z.type;
+ emit(MOV(reg_z, src_reg(output_reg[VARYING_SLOT_VIEWPORT][0])));
}
}
}
vec4_instruction *
-vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying)
+vec4_visitor::emit_generic_urb_slot(dst_reg reg, int varying, int component)
{
assert(varying < VARYING_SLOT_MAX);
- assert(output_reg[varying].type == reg.type);
- current_annotation = output_reg_annotation[varying];
- if (output_reg[varying].file != BAD_FILE)
- return emit(MOV(reg, src_reg(output_reg[varying])));
- else
+
+ unsigned num_comps = output_num_components[varying][component];
+ if (num_comps == 0)
return NULL;
+
+ assert(output_reg[varying][component].type == reg.type);
+ current_annotation = output_reg_annotation[varying];
+ if (output_reg[varying][component].file != BAD_FILE) {
+ src_reg src = src_reg(output_reg[varying][component]);
+ src.swizzle = BRW_SWZ_COMP_OUTPUT(component);
+ reg.writemask =
+ brw_writemask_for_component_packing(num_comps, component);
+ return emit(MOV(reg, src));
+ }
+ return NULL;
}
void
vec4_visitor::emit_urb_slot(dst_reg reg, int varying)
{
reg.type = BRW_REGISTER_TYPE_F;
- output_reg[varying].type = reg.type;
+ output_reg[varying][0].type = reg.type;
switch (varying) {
case VARYING_SLOT_PSIZ:
}
case BRW_VARYING_SLOT_NDC:
current_annotation = "NDC";
- if (output_reg[BRW_VARYING_SLOT_NDC].file != BAD_FILE)
- emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC])));
+ if (output_reg[BRW_VARYING_SLOT_NDC][0].file != BAD_FILE)
+ emit(MOV(reg, src_reg(output_reg[BRW_VARYING_SLOT_NDC][0])));
break;
case VARYING_SLOT_POS:
current_annotation = "gl_Position";
- if (output_reg[VARYING_SLOT_POS].file != BAD_FILE)
- emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS])));
+ if (output_reg[VARYING_SLOT_POS][0].file != BAD_FILE)
+ emit(MOV(reg, src_reg(output_reg[VARYING_SLOT_POS][0])));
break;
case VARYING_SLOT_EDGE:
/* This is present when doing unfilled polygons. We're supposed to copy
/* No need to write to this slot */
break;
default:
- emit_generic_urb_slot(reg, varying);
+ for (int i = 0; i < 4; i++) {
+ emit_generic_urb_slot(reg, varying, i);
+ }
break;
}
}
static int
-align_interleaved_urb_mlen(const struct brw_device_info *devinfo, int mlen)
+align_interleaved_urb_mlen(const struct gen_device_info *devinfo, int mlen)
{
if (devinfo->gen >= 6) {
/* URB data written (does not include the message header reg) must
message_header_scale *= 16;
if (reladdr) {
+ /* A vec4 is 16 bytes and a dvec4 is 32 bytes so for doubles we have
+ * to multiply the reladdr by 2. Notice that the reg_offset part
+ * is in units of 16 bytes and is used to select the low/high 16-byte
+ * chunk of a full dvec4, so we don't want to multiply that part.
+ */
src_reg index = src_reg(this, glsl_type::int_type);
-
- emit_before(block, inst, ADD(dst_reg(index), *reladdr,
- brw_imm_d(reg_offset)));
- emit_before(block, inst, MUL(dst_reg(index), index,
- brw_imm_d(message_header_scale)));
-
+ if (type_sz(inst->dst.type) < 8) {
+ emit_before(block, inst, ADD(dst_reg(index), *reladdr,
+ brw_imm_d(reg_offset)));
+ emit_before(block, inst, MUL(dst_reg(index), index,
+ brw_imm_d(message_header_scale)));
+ } else {
+ emit_before(block, inst, MUL(dst_reg(index), *reladdr,
+ brw_imm_d(message_header_scale * 2)));
+ emit_before(block, inst, ADD(dst_reg(index), index,
+ brw_imm_d(reg_offset * message_header_scale)));
+ }
return index;
} else {
return brw_imm_d(reg_offset * message_header_scale);
}
}
-src_reg
-vec4_visitor::get_pull_constant_offset(bblock_t * block, vec4_instruction *inst,
- src_reg *reladdr, int reg_offset)
-{
- if (reladdr) {
- src_reg index = src_reg(this, glsl_type::int_type);
-
- emit_before(block, inst, ADD(dst_reg(index), *reladdr,
- brw_imm_d(reg_offset * 16)));
-
- return index;
- } else if (devinfo->gen >= 8) {
- /* Store the offset in a GRF so we can send-from-GRF. */
- src_reg offset = src_reg(this, glsl_type::int_type);
- emit_before(block, inst, MOV(dst_reg(offset), brw_imm_d(reg_offset * 16)));
- return offset;
- } else {
- return brw_imm_d(reg_offset * 16);
- }
-}
-
/**
* Emits an instruction before @inst to load the value named by @orig_src
* from scratch space at @base_offset to @temp.
dst_reg temp, src_reg orig_src,
int base_offset)
{
- int reg_offset = base_offset + orig_src.reg_offset;
+ assert(orig_src.offset % REG_SIZE == 0);
+ int reg_offset = base_offset + orig_src.offset / REG_SIZE;
src_reg index = get_scratch_offset(block, inst, orig_src.reladdr,
reg_offset);
- emit_before(block, inst, SCRATCH_READ(temp, index));
+ if (type_sz(orig_src.type) < 8) {
+ emit_before(block, inst, SCRATCH_READ(temp, index));
+ } else {
+ dst_reg shuffled = dst_reg(this, glsl_type::dvec4_type);
+ dst_reg shuffled_float = retype(shuffled, BRW_REGISTER_TYPE_F);
+ emit_before(block, inst, SCRATCH_READ(shuffled_float, index));
+ index = get_scratch_offset(block, inst, orig_src.reladdr, reg_offset + 1);
+ vec4_instruction *last_read =
+ SCRATCH_READ(byte_offset(shuffled_float, REG_SIZE), index);
+ emit_before(block, inst, last_read);
+ shuffle_64bit_data(temp, src_reg(shuffled), false, block, last_read);
+ }
}
/**
vec4_visitor::emit_scratch_write(bblock_t *block, vec4_instruction *inst,
int base_offset)
{
- int reg_offset = base_offset + inst->dst.reg_offset;
+ assert(inst->dst.offset % REG_SIZE == 0);
+ int reg_offset = base_offset + inst->dst.offset / REG_SIZE;
src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
reg_offset);
* weren't initialized, it will confuse live interval analysis, which will
* make spilling fail to make progress.
*/
- const src_reg temp = swizzle(retype(src_reg(this, glsl_type::vec4_type),
+ bool is_64bit = type_sz(inst->dst.type) == 8;
+ const glsl_type *alloc_type =
+ is_64bit ? glsl_type::dvec4_type : glsl_type::vec4_type;
+ const src_reg temp = swizzle(retype(src_reg(this, alloc_type),
inst->dst.type),
brw_swizzle_for_mask(inst->dst.writemask));
- dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
- inst->dst.writemask));
- vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
- if (inst->opcode != BRW_OPCODE_SEL)
- write->predicate = inst->predicate;
- write->ir = inst->ir;
- write->annotation = inst->annotation;
- inst->insert_after(block, write);
+
+ if (!is_64bit) {
+ dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0),
+ inst->dst.writemask));
+ vec4_instruction *write = SCRATCH_WRITE(dst, temp, index);
+ if (inst->opcode != BRW_OPCODE_SEL)
+ write->predicate = inst->predicate;
+ write->ir = inst->ir;
+ write->annotation = inst->annotation;
+ inst->insert_after(block, write);
+ } else {
+ dst_reg shuffled = dst_reg(this, alloc_type);
+ vec4_instruction *last =
+ shuffle_64bit_data(shuffled, temp, true, block, inst);
+ src_reg shuffled_float = src_reg(retype(shuffled, BRW_REGISTER_TYPE_F));
+
+ uint8_t mask = 0;
+ if (inst->dst.writemask & WRITEMASK_X)
+ mask |= WRITEMASK_XY;
+ if (inst->dst.writemask & WRITEMASK_Y)
+ mask |= WRITEMASK_ZW;
+ if (mask) {
+ dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), mask));
+
+ vec4_instruction *write = SCRATCH_WRITE(dst, shuffled_float, index);
+ if (inst->opcode != BRW_OPCODE_SEL)
+ write->predicate = inst->predicate;
+ write->ir = inst->ir;
+ write->annotation = inst->annotation;
+ last->insert_after(block, write);
+ }
+
+ mask = 0;
+ if (inst->dst.writemask & WRITEMASK_Z)
+ mask |= WRITEMASK_XY;
+ if (inst->dst.writemask & WRITEMASK_W)
+ mask |= WRITEMASK_ZW;
+ if (mask) {
+ dst_reg dst = dst_reg(brw_writemask(brw_vec8_grf(0, 0), mask));
+
+ src_reg index = get_scratch_offset(block, inst, inst->dst.reladdr,
+ reg_offset + 1);
+ vec4_instruction *write =
+ SCRATCH_WRITE(dst, byte_offset(shuffled_float, REG_SIZE), index);
+ if (inst->opcode != BRW_OPCODE_SEL)
+ write->predicate = inst->predicate;
+ write->ir = inst->ir;
+ write->annotation = inst->annotation;
+ last->insert_after(block, write);
+ }
+ }
inst->dst.file = temp.file;
inst->dst.nr = temp.nr;
- inst->dst.reg_offset = temp.reg_offset;
+ inst->dst.offset %= REG_SIZE;
inst->dst.reladdr = NULL;
}
/* Now handle scratch access on src */
if (src.file == VGRF && scratch_loc[src.nr] != -1) {
- dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+ dst_reg temp = dst_reg(this, type_sz(src.type) == 8 ?
+ glsl_type::dvec4_type : glsl_type::vec4_type);
emit_scratch_read(block, inst, temp, src, scratch_loc[src.nr]);
src.nr = temp.nr;
- src.reg_offset = temp.reg_offset;
+ src.offset %= REG_SIZE;
src.reladdr = NULL;
}
*/
void
vec4_visitor::emit_pull_constant_load(bblock_t *block, vec4_instruction *inst,
- dst_reg temp, src_reg orig_src,
- int base_offset)
+ dst_reg temp, src_reg orig_src,
+ int base_offset, src_reg indirect)
{
- int reg_offset = base_offset + orig_src.reg_offset;
+ assert(orig_src.offset % 16 == 0);
const unsigned index = prog_data->base.binding_table.pull_constants_start;
- src_reg offset = get_pull_constant_offset(block, inst, orig_src.reladdr,
- reg_offset);
- emit_pull_constant_load_reg(temp,
- brw_imm_ud(index),
- offset,
- block, inst);
+ /* For 64bit loads we need to emit two 32-bit load messages and we also
+ * we need to shuffle the 32-bit data result into proper 64-bit data. To do
+ * that we emit the 32-bit loads into a temporary and we shuffle the result
+ * into the original destination.
+ */
+ dst_reg orig_temp = temp;
+ bool is_64bit = type_sz(orig_src.type) == 8;
+ if (is_64bit) {
+ assert(type_sz(temp.type) == 8);
+ dst_reg temp_df = dst_reg(this, glsl_type::dvec4_type);
+ temp = retype(temp_df, BRW_REGISTER_TYPE_F);
+ }
+
+ src_reg src = orig_src;
+ for (int i = 0; i < (is_64bit ? 2 : 1); i++) {
+ int reg_offset = base_offset + src.offset / 16;
+
+ src_reg offset;
+ if (indirect.file != BAD_FILE) {
+ offset = src_reg(this, glsl_type::uint_type);
+ emit_before(block, inst, ADD(dst_reg(offset), indirect,
+ brw_imm_ud(reg_offset * 16)));
+ } else if (devinfo->gen >= 8) {
+ /* Store the offset in a GRF so we can send-from-GRF. */
+ offset = src_reg(this, glsl_type::uint_type);
+ emit_before(block, inst, MOV(dst_reg(offset),
+ brw_imm_ud(reg_offset * 16)));
+ } else {
+ offset = brw_imm_d(reg_offset * 16);
+ }
+
+ emit_pull_constant_load_reg(byte_offset(temp, i * REG_SIZE),
+ brw_imm_ud(index),
+ offset,
+ block, inst);
+
+ src = byte_offset(src, 16);
+ }
brw_mark_surface_used(&prog_data->base, index);
+
+ if (is_64bit) {
+ temp = retype(temp, BRW_REGISTER_TYPE_DF);
+ shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst);
+ }
}
/**
void
vec4_visitor::move_uniform_array_access_to_pull_constants()
{
+ /* The vulkan dirver doesn't support pull constants other than UBOs so
+ * everything has to be pushed regardless.
+ */
+ if (stage_prog_data->pull_param == NULL) {
+ split_uniform_registers();
+ return;
+ }
+
int pull_constant_loc[this->uniforms];
memset(pull_constant_loc, -1, sizeof(pull_constant_loc));
- bool nested_reladdr;
- /* Walk through and find array access of uniforms. Put a copy of that
- * uniform in the pull constant buffer.
- *
- * Note that we don't move constant-indexed accesses to arrays. No
- * testing has been done of the performance impact of this choice.
+ /* First, walk through the instructions and determine which things need to
+ * be pulled. We mark something as needing to be pulled by setting
+ * pull_constant_loc to 0.
*/
- do {
- nested_reladdr = false;
-
- foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
- for (int i = 0 ; i < 3; i++) {
- if (inst->src[i].file != UNIFORM || !inst->src[i].reladdr)
- continue;
+ foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ /* We only care about MOV_INDIRECT of a uniform */
+ if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
+ inst->src[0].file != UNIFORM)
+ continue;
- int uniform = inst->src[i].nr;
+ int uniform_nr = inst->src[0].nr + inst->src[0].offset / 16;
- if (inst->src[i].reladdr->reladdr)
- nested_reladdr = true; /* will need another pass */
+ for (unsigned j = 0; j < DIV_ROUND_UP(inst->src[2].ud, 16); j++)
+ pull_constant_loc[uniform_nr + j] = 0;
+ }
- /* If this array isn't already present in the pull constant buffer,
- * add it.
- */
- if (pull_constant_loc[uniform] == -1) {
- const gl_constant_value **values =
- &stage_prog_data->param[uniform * 4];
+ /* Next, we walk the list of uniforms and assign real pull constant
+ * locations and set their corresponding entries in pull_param.
+ */
+ for (int j = 0; j < this->uniforms; j++) {
+ if (pull_constant_loc[j] < 0)
+ continue;
- pull_constant_loc[uniform] = stage_prog_data->nr_pull_params / 4;
+ pull_constant_loc[j] = stage_prog_data->nr_pull_params / 4;
- assert(uniform < uniform_array_size);
- for (int j = 0; j < uniform_size[uniform] * 4; j++) {
- stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
- = values[j];
- }
- }
+ for (int i = 0; i < 4; i++) {
+ stage_prog_data->pull_param[stage_prog_data->nr_pull_params++]
+ = stage_prog_data->param[j * 4 + i];
+ }
+ }
- /* Set up the annotation tracking for new generated instructions. */
- base_ir = inst->ir;
- current_annotation = inst->annotation;
+ /* Finally, we can walk through the instructions and lower MOV_INDIRECT
+ * instructions to actual uniform pulls.
+ */
+ foreach_block_and_inst_safe(block, vec4_instruction, inst, cfg) {
+ /* We only care about MOV_INDIRECT of a uniform */
+ if (inst->opcode != SHADER_OPCODE_MOV_INDIRECT ||
+ inst->src[0].file != UNIFORM)
+ continue;
- dst_reg temp = dst_reg(this, glsl_type::vec4_type);
+ int uniform_nr = inst->src[0].nr + inst->src[0].offset / 16;
- emit_pull_constant_load(block, inst, temp, inst->src[i],
- pull_constant_loc[uniform]);
+ assert(inst->src[0].swizzle == BRW_SWIZZLE_NOOP);
- inst->src[i].file = temp.file;
- inst->src[i].nr = temp.nr;
- inst->src[i].reg_offset = temp.reg_offset;
- inst->src[i].reladdr = NULL;
- }
- }
- } while (nested_reladdr);
+ emit_pull_constant_load(block, inst, inst->dst, inst->src[0],
+ pull_constant_loc[uniform_nr], inst->src[1]);
+ inst->remove(block);
+ }
/* Now there are no accesses of the UNIFORM file with a reladdr, so
* no need to track them as larger-than-vec4 objects. This will be
this->current_annotation = NULL;
memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
+ memset(this->output_num_components, 0, sizeof(this->output_num_components));
+
this->virtual_grf_start = NULL;
this->virtual_grf_end = NULL;
this->live_intervals = NULL;
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
this->uniforms = 0;
-
- /* Initialize uniform_array_size to at least 1 because pre-gen6 VS requires
- * at least one. See setup_uniforms() in brw_vec4.cpp.
- */
- this->uniform_array_size = 1;
- if (prog_data) {
- this->uniform_array_size =
- MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
- }
-
- this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
}
vec4_visitor::~vec4_visitor()