*/
#include "brw_vec4.h"
+#include "brw_cfg.h"
extern "C" {
#include "brw_eu.h"
break;
case HW_REG:
+ assert(dst.type == dst.fixed_hw_reg.type);
brw_reg = dst.fixed_hw_reg;
break;
break;
case HW_REG:
+ assert(src[i].type == src[i].fixed_hw_reg.type);
brw_reg = src[i].fixed_hw_reg;
break;
: brw(brw), shader_prog(shader_prog), prog(prog), prog_data(prog_data),
mem_ctx(mem_ctx), debug_flag(debug_flag)
{
- shader = shader_prog ? shader_prog->_LinkedShaders[MESA_SHADER_VERTEX] : NULL;
-
p = rzalloc(mem_ctx, struct brw_compile);
brw_init_compile(brw, p, mem_ctx);
}
{
}
-void
-vec4_generator::mark_surface_used(unsigned surf_index)
-{
- assert(surf_index < BRW_MAX_SURFACES);
-
- prog_data->base.binding_table.size_bytes =
- MAX2(prog_data->base.binding_table.size_bytes, (surf_index + 1) * 4);
-}
-
void
vec4_generator::generate_math1_gen4(vec4_instruction *inst,
struct brw_reg dst,
struct brw_reg src)
{
- brw_math(p,
- dst,
- brw_math_function(inst->opcode),
- inst->base_mrf,
- src,
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+ gen4_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ inst->base_mrf,
+ src,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
}
static void
}
void
-vec4_generator::generate_math1_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src)
-{
- /* Can't do writemask because math can't be align16. */
- assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
- check_gen6_math_src_arg(src);
-
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_math(p,
- dst,
- brw_math_function(inst->opcode),
- inst->base_mrf,
- src,
- BRW_MATH_DATA_SCALAR,
- BRW_MATH_PRECISION_FULL);
- brw_set_access_mode(p, BRW_ALIGN_16);
-}
-
-void
-vec4_generator::generate_math2_gen7(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
-{
- brw_math2(p,
- dst,
- brw_math_function(inst->opcode),
- src0, src1);
-}
-
-void
-vec4_generator::generate_math2_gen6(vec4_instruction *inst,
- struct brw_reg dst,
- struct brw_reg src0,
- struct brw_reg src1)
+vec4_generator::generate_math_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src0,
+ struct brw_reg src1)
{
/* Can't do writemask because math can't be align16. */
assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
/* Source swizzles are ignored. */
check_gen6_math_src_arg(src0);
- check_gen6_math_src_arg(src1);
+ if (src1.file == BRW_GENERAL_REGISTER_FILE)
+ check_gen6_math_src_arg(src1);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_math2(p,
- dst,
- brw_math_function(inst->opcode),
- src0, src1);
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ gen6_math(p, dst, brw_math_function(inst->opcode), src0, src1);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
}
void
struct brw_reg &op1 = is_int_div ? src0 : src1;
brw_push_insn_state(p);
- brw_set_saturate(p, false);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_saturate(p, false);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
brw_pop_insn_state(p);
- brw_math(p,
- dst,
- brw_math_function(inst->opcode),
- inst->base_mrf,
- op0,
- BRW_MATH_DATA_VECTOR,
- BRW_MATH_PRECISION_FULL);
+ gen4_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ inst->base_mrf,
+ op0,
+ BRW_MATH_DATA_VECTOR,
+ BRW_MATH_PRECISION_FULL);
}
void
case SHADER_OPCODE_TXF:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
- case SHADER_OPCODE_TXF_MS:
+ case SHADER_OPCODE_TXF_CMS:
if (brw->gen >= 7)
msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
else
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
break;
+ case SHADER_OPCODE_TXF_MCS:
+ assert(brw->gen >= 7);
+ msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD_MCS;
+ break;
case SHADER_OPCODE_TXS:
msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
break;
}
break;
default:
- assert(!"should not get here: invalid VS texture opcode");
+ assert(!"should not get here: invalid vec4 texture opcode");
break;
}
} else {
assert(inst->mlen == 2);
break;
default:
- assert(!"should not get here: invalid VS texture opcode");
+ assert(!"should not get here: invalid vec4 texture opcode");
break;
}
}
* to set it up explicitly and load the offset bitfield. Otherwise, we can
* use an implied move from g0 to the first message register.
*/
- if (inst->texture_offset) {
- /* Explicitly set up the message header by copying g0 to the MRF. */
- brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
- retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
-
- /* Then set the offset bits in DWord 2. */
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_MOV(p,
- retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, inst->base_mrf, 2),
- BRW_REGISTER_TYPE_UD),
- brw_imm_ud(inst->texture_offset));
- brw_pop_insn_state(p);
- } else if (inst->header_present) {
- /* Set up an implied move from g0 to the MRF. */
- src = brw_vec8_grf(0, 0);
+ if (inst->header_present) {
+ if (brw->gen < 6 && !inst->texture_offset) {
+ /* Set up an implied move from g0 to the MRF. */
+ src = brw_vec8_grf(0, 0);
+ } else {
+ struct brw_reg header =
+ retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD);
+
+ /* Explicitly set up the message header by copying g0 to the MRF. */
+ brw_push_insn_state(p);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_MOV(p, header, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+
+ if (inst->texture_offset) {
+ /* Set the texel offset bits in DWord 2. */
+ brw_MOV(p, get_element_ud(header, 2),
+ brw_imm_ud(inst->texture_offset));
+ }
+
+ if (inst->sampler >= 16) {
+ /* The "Sampler Index" field can only store values between 0 and 15.
+ * However, we can add an offset to the "Sampler State Pointer"
+ * field, effectively selecting a different set of 16 samplers.
+ *
+ * The "Sampler State Pointer" needs to be aligned to a 32-byte
+ * offset, and each sampler state is only 16-bytes, so we can't
+ * exclusively use the offset - we have to use both.
+ */
+ assert(brw->is_haswell); /* field only exists on Haswell */
+ brw_ADD(p,
+ get_element_ud(header, 3),
+ get_element_ud(brw_vec8_grf(0, 0), 3),
+ brw_imm_ud(16 * (inst->sampler / 16) *
+ sizeof(gen7_sampler_state)));
+ }
+ brw_pop_insn_state(p);
+ }
}
uint32_t return_format;
inst->base_mrf,
src,
surface_index,
- inst->sampler,
+ inst->sampler % 16,
msg_type,
1, /* response length */
inst->mlen,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
return_format);
- mark_surface_used(surface_index);
+ brw_mark_surface_used(&prog_data->base, surface_index);
}
void
* mul(2) dst.3<1>UD src0<8;2,4>UD src1 { Align1 WE_all }
*/
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
src1);
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
struct brw_reg src)
{
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
/* If we think of the src and dst registers as composed of 8 DWORDs each,
* we want to pick up the contents of DWORDs 0 and 4 from src, truncate
*/
brw_MOV(p, suboffset(stride(retype(dst, BRW_REGISTER_TYPE_UW), 2, 2, 1), 4),
stride(retype(src, BRW_REGISTER_TYPE_UW), 8, 1, 0));
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
assert(src.file == BRW_IMMEDIATE_VALUE);
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, suboffset(vec1(dst), 2), src);
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
brw_pop_insn_state(p);
}
*/
dst = suboffset(vec1(dst), 4);
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_SHL(p, dst, dst, brw_imm_ud(4));
brw_pop_insn_state(p);
}
dst = retype(dst, BRW_REGISTER_TYPE_UB);
src = retype(src, BRW_REGISTER_TYPE_UB);
brw_push_insn_state(p);
- brw_set_access_mode(p, BRW_ALIGN_1);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_OR(p, suboffset(vec1(dst), 21), vec1(src), suboffset(vec1(src), 16));
brw_pop_insn_state(p);
}
+void
+vec4_generator::generate_gs_get_instance_id(struct brw_reg dst)
+{
+ /* We want to right shift R0.0 & R0.1 by GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT
+ * and store into dst.0 & dst.4. So generate the instruction:
+ *
+ * shr(8) dst<1> R0<1,4,0> GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT { align1 WE_normal 1Q }
+ */
+ brw_push_insn_state(p);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
+ dst = retype(dst, BRW_REGISTER_TYPE_UD);
+ struct brw_reg r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
+ brw_SHR(p, dst, stride(r0, 1, 4, 0),
+ brw_imm_ud(GEN7_GS_PAYLOAD_INSTANCE_ID_SHIFT));
+ brw_pop_insn_state(p);
+}
+
void
vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
struct brw_reg index)
struct brw_reg index_4 = suboffset(vec1(index), 4);
brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
brw_MOV(p, m1_0, index_0);
struct brw_reg dst)
{
brw_push_insn_state(p);
- brw_set_mask_control(p, BRW_MASK_DISABLE);
- brw_set_access_mode(p, BRW_ALIGN_1);
+ brw_set_default_mask_control(p, BRW_MASK_DISABLE);
+ brw_set_default_access_mode(p, BRW_ALIGN_1);
struct brw_reg flags = brw_flag_reg(0, 0);
struct brw_reg dst_0 = suboffset(vec1(dst), 0);
/* If the instruction is predicated, we'll predicate the send, not
* the header setup.
*/
- brw_set_predicate_control(p, false);
+ brw_set_default_predicate_control(p, false);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
else
msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
- brw_set_predicate_control(p, inst->predicate);
+ brw_set_default_predicate_control(p, inst->predicate);
/* Pre-gen6, we have to specify write commits to ensure ordering
* between reads and writes within a thread. Afterwards, that's
true, /* header_present */
1 /* rlen */);
- mark_surface_used(surf_index);
+ brw_mark_surface_used(&prog_data->base, surf_index);
}
void
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
0);
- mark_surface_used(surf_index.dw1.ud);
+ brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+}
+
+void
+vec4_generator::generate_untyped_atomic(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg atomic_op,
+ struct brw_reg surf_index)
+{
+ assert(atomic_op.file == BRW_IMMEDIATE_VALUE &&
+ atomic_op.type == BRW_REGISTER_TYPE_UD &&
+ surf_index.file == BRW_IMMEDIATE_VALUE &&
+ surf_index.type == BRW_REGISTER_TYPE_UD);
+
+ brw_untyped_atomic(p, dst, brw_message_reg(inst->base_mrf),
+ atomic_op.dw1.ud, surf_index.dw1.ud,
+ inst->mlen, 1);
+
+ brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
+}
+
+void
+vec4_generator::generate_untyped_surface_read(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg surf_index)
+{
+ assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
+ surf_index.type == BRW_REGISTER_TYPE_UD);
+
+ brw_untyped_surface_read(p, dst, brw_message_reg(inst->base_mrf),
+ surf_index.dw1.ud,
+ inst->mlen, 1);
+
+ brw_mark_surface_used(&prog_data->base, surf_index.dw1.ud);
}
/**
brw_MUL(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_MACH:
- brw_set_acc_write_control(p, 1);
brw_MACH(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_MAD:
break;
case BRW_OPCODE_ADDC:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_ADDC(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
break;
case BRW_OPCODE_SUBB:
assert(brw->gen >= 7);
- brw_set_acc_write_control(p, 1);
brw_SUBB(p, dst, src[0], src[1]);
- brw_set_acc_write_control(p, 0);
+ break;
+ case BRW_OPCODE_MAC:
+ brw_MAC(p, dst, src[0], src[1]);
break;
case BRW_OPCODE_BFE:
case BRW_OPCODE_BREAK:
brw_BREAK(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_CONTINUE:
/* FINISHME: We need to write the loop instruction support still. */
gen6_CONT(p);
else
brw_CONT(p);
- brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_WHILE:
case SHADER_OPCODE_LOG2:
case SHADER_OPCODE_SIN:
case SHADER_OPCODE_COS:
- if (brw->gen == 6) {
- generate_math1_gen6(inst, dst, src[0]);
+ if (brw->gen >= 7) {
+ gen6_math(p, dst, brw_math_function(inst->opcode), src[0],
+ brw_null_reg());
+ } else if (brw->gen == 6) {
+ generate_math_gen6(inst, dst, src[0], brw_null_reg());
} else {
- /* Also works for Gen7. */
generate_math1_gen4(inst, dst, src[0]);
}
break;
case SHADER_OPCODE_INT_QUOTIENT:
case SHADER_OPCODE_INT_REMAINDER:
if (brw->gen >= 7) {
- generate_math2_gen7(inst, dst, src[0], src[1]);
+ gen6_math(p, dst, brw_math_function(inst->opcode), src[0], src[1]);
} else if (brw->gen == 6) {
- generate_math2_gen6(inst, dst, src[0], src[1]);
+ generate_math_gen6(inst, dst, src[0], src[1]);
} else {
generate_math2_gen4(inst, dst, src[0], src[1]);
}
case SHADER_OPCODE_TEX:
case SHADER_OPCODE_TXD:
case SHADER_OPCODE_TXF:
- case SHADER_OPCODE_TXF_MS:
+ case SHADER_OPCODE_TXF_CMS:
+ case SHADER_OPCODE_TXF_MCS:
case SHADER_OPCODE_TXL:
case SHADER_OPCODE_TXS:
case SHADER_OPCODE_TG4:
generate_vs_urb_write(inst);
break;
- case VS_OPCODE_SCRATCH_READ:
+ case SHADER_OPCODE_GEN4_SCRATCH_READ:
generate_scratch_read(inst, dst, src[0]);
break;
- case VS_OPCODE_SCRATCH_WRITE:
+ case SHADER_OPCODE_GEN4_SCRATCH_WRITE:
generate_scratch_write(inst, dst, src[0], src[1]);
break;
generate_gs_set_channel_masks(dst, src[0]);
break;
+ case GS_OPCODE_GET_INSTANCE_ID:
+ generate_gs_get_instance_id(dst);
+ break;
+
case SHADER_OPCODE_SHADER_TIME_ADD:
brw_shader_time_add(p, src[0],
prog_data->base.binding_table.shader_time_start);
- mark_surface_used(prog_data->base.binding_table.shader_time_start);
+ brw_mark_surface_used(&prog_data->base,
+ prog_data->base.binding_table.shader_time_start);
+ break;
+
+ case SHADER_OPCODE_UNTYPED_ATOMIC:
+ generate_untyped_atomic(inst, dst, src[0], src[1]);
+ break;
+
+ case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ generate_untyped_surface_read(inst, dst, src[0]);
break;
case VS_OPCODE_UNPACK_FLAGS_SIMD4X2:
default:
if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
- _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in VS\n",
+ _mesa_problem(&brw->ctx, "Unsupported opcode in `%s' in vec4\n",
opcode_descs[inst->opcode].name);
} else {
- _mesa_problem(&brw->ctx, "Unsupported opcode %d in VS", inst->opcode);
+ _mesa_problem(&brw->ctx, "Unsupported opcode %d in vec4", inst->opcode);
}
abort();
}
void
vec4_generator::generate_code(exec_list *instructions)
{
- int last_native_insn_offset = 0;
- const char *last_annotation_string = NULL;
- const void *last_annotation_ir = NULL;
+ struct annotation_info annotation;
+ memset(&annotation, 0, sizeof(annotation));
- if (unlikely(debug_flag)) {
- if (shader) {
- printf("Native code for vertex shader %d:\n", shader_prog->Name);
- } else {
- printf("Native code for vertex program %d:\n", prog->Id);
- }
- }
+ cfg_t *cfg = NULL;
+ if (unlikely(debug_flag))
+ cfg = new(mem_ctx) cfg_t(instructions);
foreach_list(node, instructions) {
vec4_instruction *inst = (vec4_instruction *)node;
struct brw_reg src[3], dst;
- if (unlikely(debug_flag)) {
- if (last_annotation_ir != inst->ir) {
- last_annotation_ir = inst->ir;
- if (last_annotation_ir) {
- printf(" ");
- if (shader) {
- ((ir_instruction *) last_annotation_ir)->print();
- } else {
- const prog_instruction *vpi;
- vpi = (const prog_instruction *) inst->ir;
- printf("%d: ", (int)(vpi - prog->Instructions));
- _mesa_fprint_instruction_opt(stdout, vpi, 0,
- PROG_PRINT_DEBUG, NULL);
- }
- printf("\n");
- }
- }
- if (last_annotation_string != inst->annotation) {
- last_annotation_string = inst->annotation;
- if (last_annotation_string)
- printf(" %s\n", last_annotation_string);
- }
- }
+ if (unlikely(debug_flag))
+ annotate(brw, &annotation, cfg, inst, p->next_insn_offset);
for (unsigned int i = 0; i < 3; i++) {
src[i] = inst->get_src(this->prog_data, i);
}
dst = inst->get_dst();
- brw_set_conditionalmod(p, inst->conditional_mod);
- brw_set_predicate_control(p, inst->predicate);
- brw_set_predicate_inverse(p, inst->predicate_inverse);
- brw_set_saturate(p, inst->saturate);
- brw_set_mask_control(p, inst->force_writemask_all);
+ brw_set_default_predicate_control(p, inst->predicate);
+ brw_set_default_predicate_inverse(p, inst->predicate_inverse);
+ brw_set_default_saturate(p, inst->saturate);
+ brw_set_default_mask_control(p, inst->force_writemask_all);
+ brw_set_default_acc_write_control(p, inst->writes_accumulator);
unsigned pre_emit_nr_insn = p->nr_insn;
generate_vec4_instruction(inst, dst, src);
- if (inst->no_dd_clear || inst->no_dd_check) {
+ if (inst->no_dd_clear || inst->no_dd_check || inst->conditional_mod) {
assert(p->nr_insn == pre_emit_nr_insn + 1 ||
- !"no_dd_check or no_dd_clear set for IR emitting more "
- "than 1 instruction");
+ !"conditional_mod, no_dd_check, or no_dd_clear set for IR "
+ "emitting more than 1 instruction");
struct brw_instruction *last = &p->store[pre_emit_nr_insn];
+ if (inst->conditional_mod)
+ last->header.destreg__conditionalmod = inst->conditional_mod;
if (inst->no_dd_clear)
last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
if (inst->no_dd_check)
last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
}
-
- if (unlikely(debug_flag)) {
- brw_dump_compile(p, stdout,
- last_native_insn_offset, p->next_insn_offset);
- }
-
- last_native_insn_offset = p->next_insn_offset;
- }
-
- if (unlikely(debug_flag)) {
- printf("\n");
}
brw_set_uip_jip(p);
+ annotation_finalize(&annotation, p->next_insn_offset);
- /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
- * emit issues, it doesn't get the jump distances into the output,
- * which is often something we want to debug. So this is here in
- * case you're doing that.
- */
- if (0 && unlikely(debug_flag)) {
- brw_dump_compile(p, stdout, 0, p->next_insn_offset);
+ int before_size = p->next_insn_offset;
+ brw_compact_instructions(p, 0, annotation.ann_count, annotation.ann);
+ int after_size = p->next_insn_offset;
+
+ if (unlikely(debug_flag)) {
+ if (shader_prog) {
+ fprintf(stderr, "Native code for %s vertex shader %d:\n",
+ shader_prog->Label ? shader_prog->Label : "unnamed",
+ shader_prog->Name);
+ } else {
+ fprintf(stderr, "Native code for vertex program %d:\n", prog->Id);
+ }
+ fprintf(stderr, "vec4 shader: %d instructions. Compacted %d to %d"
+ " bytes (%.0f%%)\n",
+ before_size / 16, before_size, after_size,
+ 100.0f * (before_size - after_size) / before_size);
+
+ dump_assembly(p->store, annotation.ann_count, annotation.ann,
+ brw, prog, brw_disassemble);
+ ralloc_free(annotation.ann);
}
}
vec4_generator::generate_assembly(exec_list *instructions,
unsigned *assembly_size)
{
- brw_set_access_mode(p, BRW_ALIGN_16);
+ brw_set_default_access_mode(p, BRW_ALIGN_16);
generate_code(instructions);
+
return brw_get_program(p, assembly_size);
}