*/
#include "brw_vec4.h"
+#include "brw_cfg.h"
extern "C" {
#include "brw_eu.h"
struct brw_reg dst,
struct brw_reg src)
{
- brw_math(p,
- dst,
- brw_math_function(inst->opcode),
- inst->base_mrf,
- src,
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+ gen4_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ inst->base_mrf,
+ src,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
}
static void
}
void
-vec4_generator::generate_math1_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src)
-{
- /* Can't do writemask because math can't be align16. */
- assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
- check_gen6_math_src_arg(src);
-
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_math(p,
- dst,
- brw_math_function(inst->opcode),
- inst->base_mrf,
- src,
- BRW_MATH_DATA_SCALAR,
- BRW_MATH_PRECISION_FULL);
- brw_set_access_mode(p, BRW_ALIGN_16);
-}
-
-void
-vec4_generator::generate_math2_gen7(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
-{
- brw_math2(p,
- dst,
- brw_math_function(inst->opcode),
- src0, src1);
-}
-
-void
-vec4_generator::generate_math2_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+vec4_generator::generate_math_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* Can't do writemask because math can't be align16. */
assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
/* Source swizzles are ignored. */
check_gen6_math_src_arg(src0);
- check_gen6_math_src_arg(src1);
+ if (src1.file == BRW_GENERAL_REGISTER_FILE)
+ check_gen6_math_src_arg(src1);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_math2(p,
- dst,
- brw_math_function(inst->opcode),
- src0, src1);
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ gen6_math(p, dst, brw_math_function(inst->opcode), src0, src1);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
}
void
struct brw_reg &op1 = is_int_div ? src0 : src1;
brw_push_insn_state(p);
- brw_set_saturate(p, false);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_saturate(p, false);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
brw_pop_insn_state(p);
- brw_math(p,
- dst,
- brw_math_function(inst->opcode),
- inst->base_mrf,
- op0,
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+ gen4_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ inst->base_mrf,
+ op0,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
}
void
/* Explicitly set up the message header by copying g0 to the MRF. */
brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
- brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
if (inst->texture_offset) {
/* Set the texel offset bits in DWord 2. */
* mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
*/
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
src1);
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
struct brw_reg src)
{
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
/* If we think of the src and dst registers as composed of 8 DWORDs each,
* we want to pick up the contents of DWORDs 0 and 4 from src, truncate
*/
brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
assert(src.file == BRW_IMMEDIATE_VALUE);
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, suboffset(vec1(dst), 2), src);
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
*/
dst = suboffset(vec1(dst), 4);
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_SHL(p, dst, dst, brw_imm_ud(4));
brw_pop_insn_state(p);
}
dst = retype(dst, BRW_REGISTER_TYPE_UB);
src = retype(src, BRW_REGISTER_TYPE_UB);
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
brw_pop_insn_state(p);
}
* shr(8) dst<1> R0<1,4,0> GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT { align1 WE_normal 1Q }
*/
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
dst = retype(dst, BRW_REGISTER_TYPE_UD);
struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
brw_SHR(p, dst, stride(r0, 1, 4, 0),
struct brw_reg index_4 = suboffset(vec1(index), 4);
brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, m1_0, index_0);
struct brw_reg dst)
{
brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
struct brw_reg flags = brw_flag_reg(0, 0);
struct brw_reg dst_0 = suboffset(vec1(dst), 0);
/* If the instruction is predicated, we'll predicate the send, not
* the header setup.
*/
- brw_set_predicate_control(p, false);
+ brw_set_default_predicate_control(p, false);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
else
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
- brw_set_predicate_control(p, inst->predicate);
+ brw_set_default_predicate_control(p, inst->predicate);
/* Pre-gen6, we have to specify write commits to ensure ordering
* between reads and writes within a thread. Afterwards, that's
case BRW_OPCODE_BREAK:
brw_BREAK(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_CONTINUE:
/* FINISHME: We need to write the loop instruction support still. */
gen6_CONT(p);
else
brw_CONT(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_WHILE:
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
- if (brw->gen == 6) {
- generate_math1_gen6(inst, dst, src[0]);
+ if (brw->gen >= 7) {
+ gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
+ brw_null_reg());
+ } else if (brw->gen == 6) {
+ generate_math_gen6(inst, dst, src[0], brw_null_reg());
} else {
- /* Also works for Gen7. */
generate_math1_gen4(inst, dst, src[0]);
}
break;
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
if (brw->gen >= 7) {
- generate_math2_gen7(inst, dst, src[0], src[1]);
+ gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
} else if (brw->gen == 6) {
- generate_math2_gen6(inst, dst, src[0], src[1]);
+ generate_math_gen6(inst, dst, src[0], src[1]);
} else {
generate_math2_gen4(inst, dst, src[0], src[1]);
}
void
vec4_generator::generate_code(exec_list *instructions)
{
- int last_native_insn_offset = 0;
- const char *last_annotation_string = NULL;
- const void *last_annotation_ir = NULL;
+ struct annotation_info annotation;
+ memset(&annotation, 0, sizeof(annotation));
- if (unlikely(debug_flag)) {
- if (shader_prog) {
- fprintf(stderr, "Native code for %s vertex shader %d:\n",
- shader_prog->Label ? shader_prog->Label : "unnamed",
- shader_prog->Name);
- } else {
- fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
- }
- }
+ cfg_t *cfg = NULL;
+ if (unlikely(debug_flag))
+ cfg = new(mem_ctx) cfg_t(instructions);
foreach_list(node, instructions) {
vec4_instruction *inst = (vec4_instruction *)node;
struct brw_reg src[3], dst;
- if (unlikely(debug_flag)) {
- if (last_annotation_ir != inst->ir) {
- last_annotation_ir = inst->ir;
- if (last_annotation_ir) {
- fprintf(stderr, " ");
- if (shader_prog) {
- ((ir_instruction *) last_annotation_ir)->fprint(stderr);
- } else {
- const prog_instruction *vpi;
- vpi = (const prog_instruction *) inst->ir;
- fprintf(stderr, "%d: ", (int)(vpi - prog->Instructions));
- _mesa_fprint_instruction_opt(stderr, vpi, 0,
- PROG_PRINT_DEBUG, NULL);
- }
- fprintf(stderr, "\n");
- }
- }
- if (last_annotation_string != inst->annotation) {
- last_annotation_string = inst->annotation;
- if (last_annotation_string)
- fprintf(stderr, " %s\n", last_annotation_string);
- }
- }
+ if (unlikely(debug_flag))
+ annotate(brw, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < 3; i++) {
src[i] = inst->get_src(this->prog_data, i);
}
dst = inst->get_dst();
- brw_set_conditionalmod(p, inst->conditional_mod);
- brw_set_predicate_control(p, inst->predicate);
- brw_set_predicate_inverse(p, inst->predicate_inverse);
- brw_set_saturate(p, inst->saturate);
- brw_set_mask_control(p, inst->force_writemask_all);
- brw_set_acc_write_control(p, inst->writes_accumulator);
+ brw_set_default_predicate_control(p, inst->predicate);
+ brw_set_default_predicate_inverse(p, inst->predicate_inverse);
+ brw_set_default_saturate(p, inst->saturate);
+ brw_set_default_mask_control(p, inst->force_writemask_all);
+ brw_set_default_acc_write_control(p, inst->writes_accumulator);
unsigned pre_emit_nr_insn = p->nr_insn;
generate_vec4_instruction(inst, dst, src);
- if (inst->no_dd_clear || inst->no_dd_check) {
+ if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
assert(p->nr_insn == pre_emit_nr_insn + 1 ||
- !"no_dd_check or no_dd_clear set for IR emitting more "
- "than 1 instruction");
+ !"conditional_mod, no_dd_check, or no_dd_clear set for IR "
+ "emitting more than 1 instruction");
struct brw_instruction *last = &p->store[pre_emit_nr_insn];
+ if (inst->conditional_mod)
+ last->header.destreg__conditionalmod = inst->conditional_mod;
if (inst->no_dd_clear)
last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
if (inst->no_dd_check)
last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
}
-
- if (unlikely(debug_flag)) {
- brw_dump_compile(p, stderr,
- last_native_insn_offset, p->next_insn_offset);
- }
-
- last_native_insn_offset = p->next_insn_offset;
- }
-
- if (unlikely(debug_flag)) {
- fprintf(stderr, "\n");
}
brw_set_uip_jip(p);
+ annotation_finalize(&annotation, p->next_insn_offset);
- /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
- * emit issues, it doesn't get the jump distances into the output,
- * which is often something we want to debug. So this is here in
- * case you're doing that.
- */
- if (0 && unlikely(debug_flag)) {
- brw_dump_compile(p, stderr, 0, p->next_insn_offset);
+ int before_size = p->next_insn_offset;
+ brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann);
+ int after_size = p->next_insn_offset;
+
+ if (unlikely(debug_flag)) {
+ if (shader_prog) {
+ fprintf(stderr, "Native code for %s vertex shader %d:\n",
+ shader_prog->Label ? shader_prog->Label : "unnamed",
+ shader_prog->Name);
+ } else {
+ fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
+ }
+ fprintf(stderr, "vec4 shader: %d instructions. Compacted %d to %d"
+ " bytes (%.0f%%)\n",
+ before_size / 16, before_size, after_size,
+ 100.0f * (before_size - after_size) / before_size);
+
+ dump_assembly(p->store, annotation.ann_count, annotation.ann,
+ brw, prog, brw_disassemble);
+ ralloc_free(annotation.ann);
}
}
vec4_generator::generate_assembly(exec_list *instructions,
unsigned *assembly_size)
{
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
generate_code(instructions);
+
return brw_get_program(p, assembly_size);
}