#include "brw_vec4.h"
#include "brw_cfg.h"
#include "brw_eu.h"
-#include "common/gen_debug.h"
+#include "dev/gen_debug.h"
+#include "util/mesa-sha1.h"
using namespace brw;
inst->header_size != 0,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
return_format);
-
- brw_mark_surface_used(&prog_data->base, sampler + base_binding_table_index);
} else {
/* Non-constant sampler index. */
0 /* sampler */,
msg_type,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
- return_format));
+ return_format),
+ false /* EOT */);
/* visitor knows more than we do about the surface limit required,
* so has already done marking.
brw_MOV(p, suboffset(stride(dst, 2, 2, 1), 3),
brw_imm_ud(src0.ud * src1.ud));
} else {
+ if (src1.file == BRW_IMMEDIATE_VALUE) {
+ src1 = brw_imm_uw(src1.ud);
+ }
brw_MUL(p, suboffset(stride(dst, 2, 2, 1), 3), stride(src0, 8, 2, 4),
retype(src1, BRW_REGISTER_TYPE_UW));
}
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_MOV(p, dst, header);
+
+ /* Uniforms will have a stride <0;4,1>, and we need to convert to <0;1,0>.
+ * Other values get <4;1,0>.
+ */
+ struct brw_reg restrided_offset;
+ if (offset.vstride == BRW_VERTICAL_STRIDE_0 &&
+ offset.width == BRW_WIDTH_4 &&
+ offset.hstride == BRW_HORIZONTAL_STRIDE_1) {
+ restrided_offset = stride(offset, 0, 1, 0);
+ } else {
+ restrided_offset = stride(offset, 4, 1, 0);
+ }
+
/* m0.3-0.4: 128-bit-granular offsets into the URB from the handles */
- brw_MOV(p, vec2(get_element_ud(dst, 3)), stride(offset, 4, 1, 0));
+ brw_MOV(p, vec2(get_element_ud(dst, 3)), restrided_offset);
brw_pop_insn_state(p);
}
const unsigned target_cache =
(devinfo->gen >= 7 ? GEN7_SFID_DATAPORT_DATA_CACHE :
devinfo->gen >= 6 ? GEN6_SFID_DATAPORT_RENDER_CACHE :
- BRW_DATAPORT_READ_TARGET_RENDER_CACHE);
+ BRW_SFID_DATAPORT_WRITE);
struct brw_reg header = brw_vec8_grf(0, 0);
bool write_commit;
/* If the instruction is predicated, we'll predicate the send, not
* the header setup.
*/
- brw_set_default_predicate_control(p, false);
+ brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
gen6_resolve_implied_move(p, &header, inst->base_mrf);
* dword is written.
*/
brw_inst *send = brw_next_insn(p, BRW_OPCODE_SEND);
+ brw_inst_set_sfid(p->devinfo, send, target_cache);
brw_set_dest(p, send, dst);
brw_set_src0(p, send, header);
if (devinfo->gen < 6)
brw_inst_set_cond_modifier(p->devinfo, send, inst->base_mrf);
- brw_set_dp_write_message(p, send,
- brw_scratch_surface_idx(p),
- BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
- msg_type,
- target_cache,
- 3, /* mlen */
- true, /* header present */
- false, /* not a render target write */
- write_commit, /* rlen */
- false, /* eot */
- write_commit);
+ brw_set_desc(p, send,
+ brw_message_desc(devinfo, 3, write_commit, true) |
+ brw_dp_write_desc(devinfo,
+ brw_scratch_surface_idx(p),
+ BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
+ msg_type,
+ false, /* not a render target write */
+ write_commit));
}
static void
inst->header_size > 0,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
BRW_SAMPLER_RETURN_FORMAT_SINT32);
-
- brw_mark_surface_used(&prog_data->base, surf_index.ud);
}
static void
0, /* LD message ignores sampler unit */
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
BRW_SAMPLER_SIMD_MODE_SIMD4X2, 0));
-
- brw_mark_surface_used(&prog_data->base, surf_index.ud);
-
} else {
struct brw_reg addr = vec1(retype(brw_address_reg(0), BRW_REGISTER_TYPE_UD));
0 /* sampler */,
GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
BRW_SAMPLER_SIMD_MODE_SIMD4X2,
- 0));
+ 0),
+ false /* EOT */);
}
}
void *log_data,
const nir_shader *nir,
struct brw_vue_prog_data *prog_data,
- const struct cfg_t *cfg)
+ const struct cfg_t *cfg,
+ struct brw_compile_stats *stats)
{
const struct gen_device_info *devinfo = p->devinfo;
const char *stage_abbrev = _mesa_shader_stage_to_abbrev(nir->info.stage);
case SHADER_OPCODE_SHADER_TIME_ADD:
brw_shader_time_add(p, src[0],
prog_data->base.binding_table.shader_time_start);
- brw_mark_surface_used(&prog_data->base,
- prog_data->base.binding_table.shader_time_start);
break;
- case SHADER_OPCODE_UNTYPED_ATOMIC:
+ case VEC4_OPCODE_UNTYPED_ATOMIC:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
!inst->dst.is_null(), inst->header_size);
break;
- case SHADER_OPCODE_UNTYPED_SURFACE_READ:
+ case VEC4_OPCODE_UNTYPED_SURFACE_READ:
assert(!inst->header_size);
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_read(p, dst, src[0], src[1], inst->mlen,
src[2].ud);
break;
- case SHADER_OPCODE_UNTYPED_SURFACE_WRITE:
+ case VEC4_OPCODE_UNTYPED_SURFACE_WRITE:
assert(src[2].file == BRW_IMMEDIATE_VALUE);
brw_untyped_surface_write(p, src[0], src[1], inst->mlen,
src[2].ud, inst->header_size);
break;
- case SHADER_OPCODE_TYPED_ATOMIC:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_typed_atomic(p, dst, src[0], src[1], src[2].ud, inst->mlen,
- !inst->dst.is_null(), inst->header_size);
- break;
-
- case SHADER_OPCODE_TYPED_SURFACE_READ:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_typed_surface_read(p, dst, src[0], src[1], inst->mlen,
- src[2].ud, inst->header_size);
- break;
-
- case SHADER_OPCODE_TYPED_SURFACE_WRITE:
- assert(src[2].file == BRW_IMMEDIATE_VALUE);
- brw_typed_surface_write(p, src[0], src[1], inst->mlen,
- src[2].ud, inst->header_size);
- break;
-
case SHADER_OPCODE_MEMORY_FENCE:
- brw_memory_fence(p, dst, BRW_OPCODE_SEND);
+ brw_memory_fence(p, dst, src[0], BRW_OPCODE_SEND, false, /* bti */ 0);
break;
case SHADER_OPCODE_FIND_LIVE_CHANNEL: {
int after_size = p->next_insn_offset;
if (unlikely(debug_flag)) {
- fprintf(stderr, "Native code for %s %s shader %s:\n",
- nir->info.label ? nir->info.label : "unnamed",
- _mesa_shader_stage_to_string(nir->info.stage), nir->info.name);
+ unsigned char sha1[21];
+ char sha1buf[41];
- fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
- "spills:fills. Compacted %d to %d bytes (%.0f%%)\n",
- stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
- spill_count, fill_count, before_size, after_size,
- 100.0f * (before_size - after_size) / before_size);
+ _mesa_sha1_compute(p->store, p->next_insn_offset, sha1);
+ _mesa_sha1_format(sha1buf, sha1);
+
+ fprintf(stderr, "Native code for %s %s shader %s (sha1 %s):\n",
+ nir->info.label ? nir->info.label : "unnamed",
+ _mesa_shader_stage_to_string(nir->info.stage), nir->info.name,
+ sha1buf);
- dump_assembly(p->store, disasm_info);
+ fprintf(stderr, "%s vec4 shader: %d instructions. %d loops. %u cycles. %d:%d "
+ "spills:fills. Compacted %d to %d bytes (%.0f%%)\n",
+ stage_abbrev, before_size / 16, loop_count, cfg->cycle_count,
+ spill_count, fill_count, before_size, after_size,
+ 100.0f * (before_size - after_size) / before_size);
+
+ /* overriding the shader makes disasm_info invalid */
+ if (!brw_try_override_assembly(p, 0, sha1buf)) {
+ dump_assembly(p->store, disasm_info);
+ } else {
+ fprintf(stderr, "Successfully overrode shader with sha1 %s\n\n", sha1buf);
+ }
}
ralloc_free(disasm_info);
assert(validated);
stage_abbrev, before_size / 16,
loop_count, cfg->cycle_count, spill_count,
fill_count, before_size, after_size);
-
+ if (stats) {
+ stats->dispatch_width = 0;
+ stats->instructions = before_size / 16;
+ stats->loops = loop_count;
+ stats->cycles = cfg->cycle_count;
+ stats->spills = spill_count;
+ stats->fills = fill_count;
+ }
}
extern "C" const unsigned *
void *mem_ctx,
const nir_shader *nir,
struct brw_vue_prog_data *prog_data,
- const struct cfg_t *cfg)
+ const struct cfg_t *cfg,
+ struct brw_compile_stats *stats)
{
struct brw_codegen *p = rzalloc(mem_ctx, struct brw_codegen);
brw_init_codegen(compiler->devinfo, p, mem_ctx);
brw_set_default_access_mode(p, BRW_ALIGN_16);
- generate_code(p, compiler, log_data, nir, prog_data, cfg);
+ generate_code(p, compiler, log_data, nir, prog_data, cfg, stats);
return brw_get_program(p, &prog_data->base.program_size);
}