* IN THE SOFTWARE.
*/
+#include <ctype.h>
#include "brw_vec4.h"
#include "brw_cfg.h"
}
struct brw_reg
-vec4_instruction::get_src(const struct brw_vec4_prog_data *prog_data, int i)
+vec4_instruction::get_src(const struct brw_vue_prog_data *prog_data, int i)
{
struct brw_reg brw_reg;
case BRW_REGISTER_TYPE_UD:
brw_reg = brw_imm_ud(src[i].fixed_hw_reg.dw1.ud);
break;
+ case BRW_REGISTER_TYPE_VF:
+ brw_reg = brw_imm_vf(src[i].fixed_hw_reg.dw1.ud);
+ break;
default:
unreachable("not reached");
}
return brw_reg;
}
-vec4_generator::vec4_generator(struct brw_context *brw,
+vec4_generator::vec4_generator(const struct brw_compiler *compiler,
+ void *log_data,
struct gl_shader_program *shader_prog,
struct gl_program *prog,
- struct brw_vec4_prog_data *prog_data,
+ struct brw_vue_prog_data *prog_data,
void *mem_ctx,
- bool debug_flag)
- : brw(brw), shader_prog(shader_prog), prog(prog), prog_data(prog_data),
- mem_ctx(mem_ctx), debug_flag(debug_flag)
+ bool debug_flag,
+ const char *stage_name,
+ const char *stage_abbrev)
+ : compiler(compiler), log_data(log_data), devinfo(compiler->devinfo),
+ shader_prog(shader_prog), prog(prog), prog_data(prog_data),
+ mem_ctx(mem_ctx), stage_name(stage_name), stage_abbrev(stage_abbrev),
+ debug_flag(debug_flag)
{
- p = rzalloc(mem_ctx, struct brw_compile);
- brw_init_compile(brw, p, mem_ctx);
+ p = rzalloc(mem_ctx, struct brw_codegen);
+ brw_init_codegen(devinfo, p, mem_ctx);
}
vec4_generator::~vec4_generator()
brw_math_function(inst->opcode),
inst->base_mrf,
src,
- BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
brw_math_function(inst->opcode),
inst->base_mrf,
op0,
- BRW_MATH_DATA_VECTOR,
BRW_MATH_PRECISION_FULL);
}
{
int msg_type = -1;
- if (brw->gen >= 5) {
+ if (devinfo->gen >= 5) {
switch (inst->opcode) {
case SHADER_OPCODE_TEX:
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXD:
if (inst->shadow_compare) {
/* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */
- assert(brw->gen >= 8 || brw->is_haswell);
+ assert(devinfo->gen >= 8 || devinfo->is_haswell);
msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
} else {
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
case SHADER_OPCODE_TXF_CMS:
- if (brw->gen >= 7)
+ if (devinfo->gen >= 7)
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
else
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
case SHADER_OPCODE_TXF_MCS:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
break;
case SHADER_OPCODE_TXS:
* to set it up explicitly and load the offset bitfield. Otherwise, we can
* use an implied move from g0 to the first message register.
*/
- if (inst->header_present) {
- if (brw->gen < 6 && !inst->texture_offset) {
+ if (inst->header_size != 0) {
+ if (devinfo->gen < 6 && !inst->offset) {
/* Set up an implied move from g0 to the MRF. */
src = brw_vec8_grf(0, 0);
} else {
struct brw_reg header =
retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
+ uint32_t dw2 = 0;
/* Explicitly set up the message header by copying g0 to the MRF. */
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- if (inst->texture_offset) {
+ if (inst->offset)
/* Set the texel offset bits in DWord 2. */
- brw_MOV(p, get_element_ud(header, 2),
- brw_imm_ud(inst->texture_offset));
- }
+ dw2 = inst->offset;
+
+ if (devinfo->gen >= 9)
+ /* SKL+ overloads BRW_SAMPLER_SIMD_MODE_SIMD4X2 to also do SIMD8D,
+ * based on bit 22 in the header.
+ */
+ dw2 |= GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2;
+
+ if (dw2)
+ brw_MOV(p, get_element_ud(header, 2), brw_imm_ud(dw2));
- brw_adjust_sampler_state_pointer(p, header, sampler_index, dst);
+ brw_adjust_sampler_state_pointer(p, header, sampler_index);
brw_pop_insn_state(p);
}
}
msg_type,
1, /* response length */
inst->mlen,
- inst->header_present,
+ inst->header_size != 0,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
return_format);
brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index);
} else {
/* Non-constant sampler index. */
- /* Note: this clobbers `dst` as a temporary before emitting the send */
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
- struct brw_reg temp = vec1(retype(dst, BRW_REGISTER_TYPE_UD));
-
struct brw_reg sampler_reg = vec1(retype(sampler_index, BRW_REGISTER_TYPE_UD));
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- /* Some care required: `sampler` and `temp` may alias:
- * addr = sampler & 0xff
- * temp = (sampler << 8) & 0xf00
- * addr = addr | temp
- */
- brw_ADD(p, addr, sampler_reg, brw_imm_ud(base_binding_table_index));
- brw_SHL(p, temp, sampler_reg, brw_imm_ud(8u));
- brw_AND(p, temp, temp, brw_imm_ud(0x0f00));
- brw_AND(p, addr, addr, brw_imm_ud(0x0ff));
- brw_OR(p, addr, addr, temp);
-
- /* a0.0 |= <descriptor> */
- brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR);
- brw_set_sampler_message(p, insn_or,
+ /* addr = ((sampler * 0x101) + base_binding_table_index) & 0xfff */
+ brw_MUL(p, addr, sampler_reg, brw_imm_uw(0x101));
+ if (base_binding_table_index)
+ brw_ADD(p, addr, addr, brw_imm_ud(base_binding_table_index));
+ brw_AND(p, addr, addr, brw_imm_ud(0xfff));
+
+ brw_pop_insn_state(p);
+
+ if (inst->base_mrf != -1)
+ gen6_resolve_implied_move(p, &src, inst->base_mrf);
+
+ /* dst = send(offset, a0.0 | <descriptor>) */
+ brw_inst *insn = brw_send_indirect_message(
+ p, BRW_SFID_SAMPLER, dst, src, addr);
+ brw_set_sampler_message(p, insn,
0 /* surface */,
0 /* sampler */,
msg_type,
1 /* rlen */,
inst->mlen /* mlen */,
- inst->header_present /* header */,
+ inst->header_size != 0 /* header */,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
return_format);
- brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
- brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
- brw_set_src0(p, insn_or, addr);
- brw_set_dest(p, insn_or, addr);
-
-
- /* dst = send(offset, a0.0) */
- brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(p, insn_send, dst);
- brw_set_src0(p, insn_send, src);
- brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);
-
- brw_pop_insn_state(p);
/* visitor knows more than we do about the surface limit required,
* so has already done marking.
inst->base_mrf, /* starting mrf reg nr */
src,
BRW_URB_WRITE_EOT | inst->urb_write_flags,
- brw->gen >= 8 ? 2 : 1,/* message len */
+ devinfo->gen >= 8 ? 2 : 1,/* message len */
0, /* response len */
0, /* urb destination offset */
BRW_URB_SWIZZLE_INTERLEAVE);
*
* We can do this with the following EU instruction:
*
- * mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
+ * mul(2) dst.3<1>UD src0<8;2,4>UD src1<...>UW { Align1 WE_all }
*/
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ assert(devinfo->gen >= 7 &&
+ src1.file == BRW_IMMEDIATE_VALUE &&
+ src1.type == BRW_REGISTER_TYPE_UD &&
+ src1.dw1.ud <= USHRT_MAX);
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
- src1);
+ retype(src1, BRW_REGISTER_TYPE_UW));
brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- if (brw->gen >= 8) {
+ if (devinfo->gen >= 8) {
/* Move the vertex count into the second MRF for the EOT write. */
brw_MOV(p, retype(brw_message_reg(dst.nr + 1), BRW_REGISTER_TYPE_UD),
src);
}
void
-vec4_generator::generate_gs_set_dword_2_immed(struct brw_reg dst,
- struct brw_reg src)
+vec4_generator::generate_gs_svb_write(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
- assert(src.file == BRW_IMMEDIATE_VALUE);
+ int binding = inst->sol_binding;
+ bool final_write = inst->sol_final_write;
+ brw_push_insn_state(p);
+ /* Copy Vertex data into M0.x */
+ brw_MOV(p, stride(dst, 4, 4, 1),
+ stride(retype(src0, BRW_REGISTER_TYPE_UD), 4, 4, 1));
+
+ /* Send SVB Write */
+ brw_svb_write(p,
+ final_write ? src1 : brw_null_reg(), /* dest == src1 */
+ 1, /* msg_reg_nr */
+ dst, /* src0 == previous dst */
+ SURF_INDEX_GEN6_SOL_BINDING(binding), /* binding_table_index */
+ final_write); /* send_commit_msg */
+
+ /* Finally, wait for the write commit to occur so that we can proceed to
+ * other things safely.
+ *
+ * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3:
+ *
+ * The write commit does not modify the destination register, but
+ * merely clears the dependency associated with the destination
+ * register. Thus, a simple “mov” instruction using the register as a
+ * source is sufficient to wait for the write commit to occur.
+ */
+ if (final_write) {
+ brw_MOV(p, src1, src1);
+ }
+ brw_pop_insn_state(p);
+}
+
+void
+vec4_generator::generate_gs_svb_set_destination_index(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+
+ int vertex = inst->sol_vertex;
brw_push_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, suboffset(vec1(dst), 2), src);
- brw_set_default_access_mode(p, BRW_ALIGN_16);
+ brw_MOV(p, get_element_ud(dst, 5), get_element_ud(src, vertex));
+ brw_pop_insn_state(p);
+}
+
+void
+vec4_generator::generate_gs_set_dword_2(struct brw_reg dst, struct brw_reg src)
+{
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, suboffset(vec1(dst), 2), suboffset(vec1(src), 0));
brw_pop_insn_state(p);
}
brw_pop_insn_state(p);
}
+void
+vec4_generator::generate_gs_ff_sync_set_primitives(struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1,
+ struct brw_reg src2)
+{
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ /* Save src0 data in 16:31 bits of dst.0 */
+ brw_AND(p, suboffset(vec1(dst), 0), suboffset(vec1(src0), 0),
+ brw_imm_ud(0xffffu));
+ brw_SHL(p, suboffset(vec1(dst), 0), suboffset(vec1(dst), 0), brw_imm_ud(16));
+ /* Save src1 data in 0:15 bits of dst.0 */
+ brw_AND(p, suboffset(vec1(src2), 0), suboffset(vec1(src1), 0),
+ brw_imm_ud(0xffffu));
+ brw_OR(p, suboffset(vec1(dst), 0),
+ suboffset(vec1(dst), 0),
+ suboffset(vec1(src2), 0));
+ brw_pop_insn_state(p);
+}
+
void
vec4_generator::generate_gs_ff_sync(vec4_instruction *inst,
struct brw_reg dst,
- struct brw_reg src0)
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* This opcode uses an implied MRF register for:
* - the header of the ff_sync message. And as such it is expected to be
struct brw_reg header =
retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
- /* Overwrite dword 0 of the header (cleared for now since we are not doing
- * transform feedback) and dword 1 (to hold the number of primitives
- * written).
+ /* Overwrite dword 0 of the header (SO vertices to write) and
+ * dword 1 (number of primitives written).
*/
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p, get_element_ud(header, 0), brw_imm_ud(0));
+ brw_MOV(p, get_element_ud(header, 0), get_element_ud(src1, 0));
brw_MOV(p, get_element_ud(header, 1), get_element_ud(src0, 0));
brw_pop_insn_state(p);
brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, get_element_ud(header, 0), get_element_ud(dst, 0));
+
+ /* src1 is not an immediate when we use transform feedback */
+ if (src1.file != BRW_IMMEDIATE_VALUE)
+ brw_MOV(p, brw_vec4_grf(src1.nr, 0), brw_vec4_grf(dst.nr, 1));
+
+ brw_pop_insn_state(p);
+}
+
+void
+vec4_generator::generate_gs_set_primitive_id(struct brw_reg dst)
+{
+ /* In gen6, PrimitiveID is delivered in R0.1 of the payload */
+ struct brw_reg src = brw_vec8_grf(0, 0);
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, get_element_ud(dst, 0), get_element_ud(src, 1));
brw_pop_insn_state(p);
}
{
int second_vertex_offset;
- if (brw->gen >= 6)
+ if (devinfo->gen >= 6)
second_vertex_offset = 1;
else
second_vertex_offset = 16;
}
void
-vec4_generator::generate_unpack_flags(vec4_instruction *inst,
- struct brw_reg dst)
+vec4_generator::generate_unpack_flags(struct brw_reg dst)
{
brw_push_insn_state(p);
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
uint32_t msg_type;
- if (brw->gen >= 6)
+ if (devinfo->gen >= 6)
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
- else if (brw->gen == 5 || brw->is_g4x)
+ else if (devinfo->gen == 5 || devinfo->is_g4x)
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, send, dst);
brw_set_src0(p, send, header);
- if (brw->gen < 6)
- brw_inst_set_cond_modifier(brw, send, inst->base_mrf);
+ if (devinfo->gen < 6)
+ brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
brw_set_dp_read_message(p, send,
255, /* binding table index: stateless access */
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
uint32_t msg_type;
- if (brw->gen >= 7)
- msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
- else if (brw->gen == 6)
+ if (devinfo->gen >= 7)
+ msg_type = GEN7_DATAPORT_DC_OWORD_DUAL_BLOCK_WRITE;
+ else if (devinfo->gen == 6)
msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
else
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
* guaranteed and write commits only matter for inter-thread
* synchronization.
*/
- if (brw->gen >= 6) {
+ if (devinfo->gen >= 6) {
write_commit = false;
} else {
/* The visitor set up our destination register to be g0. This
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, send, dst);
brw_set_src0(p, send, header);
- if (brw->gen < 6)
- brw_inst_set_cond_modifier(brw, send, inst->base_mrf);
+ if (devinfo->gen < 6)
+ brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
brw_set_dp_write_message(p, send,
255, /* binding table index: stateless access */
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
uint32_t msg_type;
- if (brw->gen >= 6)
+ if (devinfo->gen >= 6)
msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
- else if (brw->gen == 5 || brw->is_g4x)
+ else if (devinfo->gen == 5 || devinfo->is_g4x)
msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
else
msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
brw_set_dest(p, send, dst);
brw_set_src0(p, send, header);
- if (brw->gen < 6)
- brw_inst_set_cond_modifier(brw, send, inst->base_mrf);
+ if (devinfo->gen < 6)
+ brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
brw_set_dp_read_message(p, send,
surf_index,
BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
0, /* LD message ignores sampler unit */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1, /* rlen */
- 1, /* mlen */
- false, /* no header */
+ inst->mlen,
+ inst->header_size != 0,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
/* a0.0 = surf_index & 0xff */
brw_inst *insn_and = brw_next_insn(p, BRW_OPCODE_AND);
- brw_inst_set_exec_size(p->brw, insn_and, BRW_EXECUTE_1);
+ brw_inst_set_exec_size(p->devinfo, insn_and, BRW_EXECUTE_1);
brw_set_dest(p, insn_and, addr);
brw_set_src0(p, insn_and, vec1(retype(surf_index, BRW_REGISTER_TYPE_UD)));
brw_set_src1(p, insn_and, brw_imm_ud(0x0ff));
+ brw_pop_insn_state(p);
- /* a0.0 |= <descriptor> */
- brw_inst *insn_or = brw_next_insn(p, BRW_OPCODE_OR);
- brw_set_sampler_message(p, insn_or,
+ /* dst = send(offset, a0.0 | <descriptor>) */
+ brw_inst *insn = brw_send_indirect_message(
+ p, BRW_SFID_SAMPLER, dst, offset, addr);
+ brw_set_sampler_message(p, insn,
0 /* surface */,
0 /* sampler */,
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
1 /* rlen */,
- 1 /* mlen */,
- false /* header */,
+ inst->mlen,
+ inst->header_size != 0,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
- brw_inst_set_exec_size(p->brw, insn_or, BRW_EXECUTE_1);
- brw_inst_set_src1_reg_type(p->brw, insn_or, BRW_REGISTER_TYPE_UD);
- brw_set_src0(p, insn_or, addr);
- brw_set_dest(p, insn_or, addr);
-
-
- /* dst = send(offset, a0.0) */
- brw_inst *insn_send = brw_next_insn(p, BRW_OPCODE_SEND);
- brw_set_dest(p, insn_send, dst);
- brw_set_src0(p, insn_send, offset);
- brw_set_indirect_send_descriptor(p, insn_send, BRW_SFID_SAMPLER, addr);
-
- brw_pop_insn_state(p);
/* visitor knows more than we do about the surface limit required,
* so has already done marking.
}
void
-vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg atomic_op,
- struct brw_reg surf_index)
+vec4_generator::generate_set_simd4x2_header_gen9(vec4_instruction *inst,
+ struct brw_reg dst)
{
- assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
- atomic_op.type == BRW_REGISTER_TYPE_UD &&
- surf_index.file == BRW_IMMEDIATE_VALUE &&
- surf_index.type == BRW_REGISTER_TYPE_UD);
-
- brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf),
- atomic_op.dw1.ud, surf_index.dw1.ud,
- inst->mlen, 1);
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
- brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
-}
+ brw_set_default_exec_size(p, BRW_EXECUTE_8);
+ brw_MOV(p, vec8(dst), retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-void
-vec4_generator::generate_untyped_surface_read(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg surf_index)
-{
- assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
- surf_index.type == BRW_REGISTER_TYPE_UD);
-
- brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf),
- surf_index.dw1.ud,
- inst->mlen, 1);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_MOV(p, get_element_ud(dst, 2),
+ brw_imm_ud(GEN9_SAMPLER_SIMD_MODE_EXTENSION_SIMD4X2));
- brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+ brw_pop_insn_state(p);
}
void
struct brw_reg src[3], dst;
if (unlikely(debug_flag))
- annotate(brw, &annotation, cfg, inst, p->next_insn_offset);
+ annotate(p->devinfo, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < 3; i++) {
src[i] = inst->get_src(this->prog_data, i);
brw_set_default_predicate_control(p, inst->predicate);
brw_set_default_predicate_inverse(p, inst->predicate_inverse);
+ brw_set_default_flag_reg(p, 0, inst->flag_subreg);
brw_set_default_saturate(p, inst->saturate);
brw_set_default_mask_control(p, inst->force_writemask_all);
brw_set_default_acc_write_control(p, inst->writes_accumulator);
}
switch (inst->opcode) {
+ case VEC4_OPCODE_UNPACK_UNIFORM:
case BRW_OPCODE_MOV:
brw_MOV(p, dst, src[0]);
break;
break;
case BRW_OPCODE_MAD:
- assert(brw->gen >= 6);
+ assert(devinfo->gen >= 6);
brw_MAD(p, dst, src[0], src[1], src[2]);
break;
break;
case BRW_OPCODE_F32TO16:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
brw_F32TO16(p, dst, src[0]);
break;
case BRW_OPCODE_F16TO32:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
brw_F16TO32(p, dst, src[0]);
break;
case BRW_OPCODE_LRP:
- assert(brw->gen >= 6);
+ assert(devinfo->gen >= 6);
brw_LRP(p, dst, src[0], src[1], src[2]);
break;
case BRW_OPCODE_BFREV:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
/* BFREV only supports UD type for src and dst. */
brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
retype(src[0], BRW_REGISTER_TYPE_UD));
break;
case BRW_OPCODE_FBH:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
/* FBH only supports UD type for dst. */
brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
case BRW_OPCODE_FBL:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
/* FBL only supports UD type for dst. */
brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
case BRW_OPCODE_CBIT:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
/* CBIT only supports UD type for dst. */
brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
break;
case BRW_OPCODE_ADDC:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
brw_ADDC(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_SUBB:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
brw_SUBB(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_MAC:
break;
case BRW_OPCODE_BFE:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
brw_BFE(p, dst, src[0], src[1], src[2]);
break;
case BRW_OPCODE_BFI1:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
brw_BFI1(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_BFI2:
- assert(brw->gen >= 7);
+ assert(devinfo->gen >= 7);
brw_BFI2(p, dst, src[0], src[1], src[2]);
break;
case BRW_OPCODE_IF:
if (inst->src[0].file != BAD_FILE) {
/* The instruction has an embedded compare (only allowed on gen6) */
- assert(brw->gen == 6);
+ assert(devinfo->gen == 6);
gen6_IF(p, inst->conditional_mod, src[0], src[1]);
} else {
brw_inst *if_inst = brw_IF(p, BRW_EXECUTE_8);
- brw_inst_set_pred_control(brw, if_inst, inst->predicate);
+ brw_inst_set_pred_control(p->devinfo, if_inst, inst->predicate);
}
break;
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
- if (brw->gen >= 7) {
+ assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
+ if (devinfo->gen >= 7) {
gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
brw_null_reg());
- } else if (brw->gen == 6) {
+ } else if (devinfo->gen == 6) {
generate_math_gen6(inst, dst, src[0], brw_null_reg());
} else {
generate_math1_gen4(inst, dst, src[0]);
case SHADER_OPCODE_POW:
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
- if (brw->gen >= 7) {
+ assert(inst->conditional_mod == BRW_CONDITIONAL_NONE);
+ if (devinfo->gen >= 7) {
gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
- } else if (brw->gen == 6) {
+ } else if (devinfo->gen == 6) {
generate_math_gen6(inst, dst, src[0], src[1]);
} else {
generate_math2_gen4(inst, dst, src[0], src[1]);
generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
break;
+ case VS_OPCODE_SET_SIMD4X2_HEADER_GEN9:
+ generate_set_simd4x2_header_gen9(inst, dst);
+ break;
+
case GS_OPCODE_URB_WRITE:
generate_gs_urb_write(inst);
break;
generate_gs_urb_write_allocate(inst);
break;
+ case GS_OPCODE_SVB_WRITE:
+ generate_gs_svb_write(inst, dst, src[0], src[1]);
+ break;
+
+ case GS_OPCODE_SVB_SET_DST_INDEX:
+ generate_gs_svb_set_destination_index(inst, dst, src[0]);
+ break;
+
case GS_OPCODE_THREAD_END:
generate_gs_thread_end(inst);
break;
break;
case GS_OPCODE_FF_SYNC:
- generate_gs_ff_sync(inst, dst, src[0]);
+ generate_gs_ff_sync(inst, dst, src[0], src[1]);
+ break;
+
+ case GS_OPCODE_FF_SYNC_SET_PRIMITIVES:
+ generate_gs_ff_sync_set_primitives(dst, src[0], src[1], src[2]);
+ break;
+
+ case GS_OPCODE_SET_PRIMITIVE_ID:
+ generate_gs_set_primitive_id(dst);
break;
- case GS_OPCODE_SET_DWORD_2_IMMED:
- generate_gs_set_dword_2_immed(dst, src[0]);
+ case GS_OPCODE_SET_DWORD_2:
+ generate_gs_set_dword_2(dst, src[0]);
break;
case GS_OPCODE_PREPARE_CHANNEL_MASKS:
break;
case SHADER_OPCODE_UNTYPED_ATOMIC:
- generate_untyped_atomic(inst, dst, src[0], src[1]);
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
+ brw_untyped_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen,
+ !inst->dst.is_null());
break;
case SHADER_OPCODE_UNTYPED_SURFACE_READ:
- generate_untyped_surface_read(inst, dst, src[0]);
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
+ brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
+ src[2].dw1.ud);
+ break;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
+ brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
+ src[2].dw1.ud);
+ break;
+
+ case SHADER_OPCODE_TYPED_ATOMIC:
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
+ brw_typed_atomic(p, dst, src[0], src[1], src[2].dw1.ud, inst->mlen,
+ !inst->dst.is_null());
+ break;
+
+ case SHADER_OPCODE_TYPED_SURFACE_READ:
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
+ brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen,
+ src[2].dw1.ud);
+ break;
+
+ case SHADER_OPCODE_TYPED_SURFACE_WRITE:
+ assert(src[2].file == BRW_IMMEDIATE_VALUE);
+ brw_typed_surface_write(p, src[0], src[1], inst->mlen,
+ src[2].dw1.ud);
+ break;
+
+ case SHADER_OPCODE_MEMORY_FENCE:
+ brw_memory_fence(p, dst);
+ break;
+
+ case SHADER_OPCODE_FIND_LIVE_CHANNEL:
+ brw_find_live_channel(p, dst);
+ break;
+
+ case SHADER_OPCODE_BROADCAST:
+ brw_broadcast(p, dst, src[0], src[1]);
break;
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
- generate_unpack_flags(inst, dst);
+ generate_unpack_flags(dst);
break;
+ case VEC4_OPCODE_MOV_BYTES: {
+ /* Moves the low byte from each channel, using an Align1 access mode
+ * and a <4,1,0> source region.
+ */
+ assert(src[0].type == BRW_REGISTER_TYPE_UB ||
+ src[0].type == BRW_REGISTER_TYPE_B);
+
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ src[0].vstride = BRW_VERTICAL_STRIDE_4;
+ src[0].width = BRW_WIDTH_1;
+ src[0].hstride = BRW_HORIZONTAL_STRIDE_0;
+ brw_MOV(p, dst, src[0]);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ break;
+ }
+
+ case VEC4_OPCODE_PACK_BYTES: {
+ /* Is effectively:
+ *
+ * mov(8) dst<16,4,1>:UB src<4,1,0>:UB
+ *
+ * but destinations' only regioning is horizontal stride, so instead we
+ * have to use two instructions:
+ *
+ * mov(4) dst<1>:UB src<4,1,0>:UB
+ * mov(4) dst.16<1>:UB src.16<4,1,0>:UB
+ *
+ * where they pack the four bytes from the low and high four DW.
+ */
+ assert(_mesa_is_pow_two(dst.dw1.bits.writemask) &&
+ dst.dw1.bits.writemask != 0);
+ unsigned offset = __builtin_ctz(dst.dw1.bits.writemask);
+
+ dst.type = BRW_REGISTER_TYPE_UB;
+
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+ src[0].type = BRW_REGISTER_TYPE_UB;
+ src[0].vstride = BRW_VERTICAL_STRIDE_4;
+ src[0].width = BRW_WIDTH_1;
+ src[0].hstride = BRW_HORIZONTAL_STRIDE_0;
+ dst.subnr = offset * 4;
+ struct brw_inst *insn = brw_MOV(p, dst, src[0]);
+ brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
+ brw_inst_set_no_dd_clear(p->devinfo, insn, true);
+ brw_inst_set_no_dd_check(p->devinfo, insn, inst->no_dd_check);
+
+ src[0].subnr = 16;
+ dst.subnr = 16 + offset * 4;
+ insn = brw_MOV(p, dst, src[0]);
+ brw_inst_set_exec_size(p->devinfo, insn, BRW_EXECUTE_4);
+ brw_inst_set_no_dd_clear(p->devinfo, insn, inst->no_dd_clear);
+ brw_inst_set_no_dd_check(p->devinfo, insn, true);
+
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
+ break;
+ }
+
default:
- if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
- _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n",
- opcode_descs[inst->opcode].name);
- } else {
- _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode);
- }
- abort();
+ unreachable("Unsupported opcode");
}
- if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
+ if (inst->opcode == VEC4_OPCODE_PACK_BYTES) {
+ /* Handled dependency hints in the generator. */
+
+ assert(!inst->conditional_mod);
+ } else if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
assert(p->nr_insn == pre_emit_nr_insn + 1 ||
!"conditional_mod, no_dd_check, or no_dd_clear set for IR "
"emitting more than 1 instruction");
brw_inst *last = &p->store[pre_emit_nr_insn];
- brw_inst_set_cond_modifier(brw, last, inst->conditional_mod);
- brw_inst_set_no_dd_clear(brw, last, inst->no_dd_clear);
- brw_inst_set_no_dd_check(brw, last, inst->no_dd_check);
+ if (inst->conditional_mod)
+ brw_inst_set_cond_modifier(p->devinfo, last, inst->conditional_mod);
+ brw_inst_set_no_dd_clear(p->devinfo, last, inst->no_dd_clear);
+ brw_inst_set_no_dd_check(p->devinfo, last, inst->no_dd_check);
}
}
if (unlikely(debug_flag)) {
if (shader_prog) {
- fprintf(stderr, "Native code for %s vertex shader %d:\n",
+ fprintf(stderr, "Native code for %s %s shader %d:\n",
shader_prog->Label ? shader_prog->Label : "unnamed",
- shader_prog->Name);
+ stage_name, shader_prog->Name);
} else {
- fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
+ fprintf(stderr, "Native code for %s program %d:\n", stage_name,
+ prog->Id);
}
- fprintf(stderr, "vec4 shader: %d instructions. %d loops. Compacted %d to %d"
+ fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. Compacted %d to %d"
" bytes (%.0f%%)\n",
+ stage_abbrev,
before_size / 16, loop_count, before_size, after_size,
100.0f * (before_size - after_size) / before_size);
- dump_assembly(p->store, annotation.ann_count, annotation.ann, brw, prog);
+ dump_assembly(p->store, annotation.ann_count, annotation.ann,
+ p->devinfo, prog);
ralloc_free(annotation.ann);
}
+
+ compiler->shader_debug_log(log_data,
+ "%s vec4 shader: %d inst, %d loops, "
+ "compacted %d to %d bytes.\n",
+ stage_abbrev, before_size / 16, loop_count,
+ before_size, after_size);
}
const unsigned *