struct brw_wm_prog_data *prog_data,
struct gl_shader_program *prog,
struct gl_fragment_program *fp,
- bool dual_source_output,
bool runtime_check_aads_emit,
bool debug_flag)
: brw(brw), key(key), prog_data(prog_data), prog(prog), fp(fp),
- dual_source_output(dual_source_output),
runtime_check_aads_emit(runtime_check_aads_emit), debug_flag(debug_flag),
mem_ctx(mem_ctx)
{
if (brw->gen < 6 || this->discard_halt_patches.is_empty())
return false;
+ int scale = brw_jump_scale(brw);
+
/* There is a somewhat strange undocumented requirement of using
* HALT, according to the simulator. If some channel has HALTed to
* a particular UIP, then by the end of the program, every channel
* tests.
*/
brw_inst *last_halt = gen6_HALT(p);
- brw_inst_set_uip(brw, last_halt, 2);
- brw_inst_set_jip(brw, last_halt, 2);
+ brw_inst_set_uip(brw, last_halt, 1 * scale);
+ brw_inst_set_jip(brw, last_halt, 1 * scale);
int ip = p->nr_insn;
assert(brw_inst_opcode(brw, patch) == BRW_OPCODE_HALT);
/* HALT takes a half-instruction distance from the pre-incremented IP. */
- brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * 2);
+ brw_inst_set_uip(brw, patch, (ip - patch_ip->ip) * scale);
}
this->discard_halt_patches.make_empty();
brw_pop_insn_state(p);
}
- if (this->dual_source_output)
+ if (prog_data->dual_src_blend)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01;
else if (dispatch_width == 16)
msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE;
brw_set_default_mask_control(p, BRW_MASK_DISABLE);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
brw_set_default_compression_control(p, BRW_COMPRESSION_NONE);
+ brw_set_default_flag_reg(p, 0, 0);
/* On HSW, the GPU will use the predicate on SENDC, unless the header is
* present.
}
void
-fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src)
+fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src,
+ struct brw_reg sampler_index)
{
int msg_type = -1;
int rlen = 4;
src.nr++;
}
+ assert(sampler_index.file == BRW_IMMEDIATE_VALUE);
+ assert(sampler_index.type == BRW_REGISTER_TYPE_UD);
+
+ uint32_t sampler = sampler_index.dw1.ud;
+
/* Load the message header if present. If there's a texture offset,
* we need to set it up explicitly and load the offset bitfield.
* Otherwise, we can use an implied move from g0 to the first message reg.
brw_imm_ud(inst->texture_offset));
}
- if (inst->sampler >= 16) {
+ if (sampler >= 16) {
/* The "Sampler Index" field can only store values between 0 and 15.
* However, we can add an offset to the "Sampler State Pointer"
* field, effectively selecting a different set of 16 samplers.
* exclusively use the offset - we have to use both.
*/
assert(brw->gen >= 8 || brw->is_haswell);
+ const int sampler_state_size = 16; /* 16 bytes */
brw_ADD(p,
get_element_ud(header_reg, 3),
get_element_ud(brw_vec8_grf(0, 0), 3),
- brw_imm_ud(16 * (inst->sampler / 16) *
- sizeof(gen7_sampler_state)));
+ brw_imm_ud(16 * (sampler / 16) * sampler_state_size));
}
brw_pop_insn_state(p);
}
uint32_t surface_index = ((inst->opcode == SHADER_OPCODE_TG4 ||
inst->opcode == SHADER_OPCODE_TG4_OFFSET)
? prog_data->base.binding_table.gather_texture_start
- : prog_data->base.binding_table.texture_start) + inst->sampler;
+ : prog_data->base.binding_table.texture_start) + sampler;
brw_SAMPLE(p,
retype(dst, BRW_REGISTER_TYPE_UW),
inst->base_mrf,
src,
surface_index,
- inst->sampler % 16,
+ sampler % 16,
msg_type,
rlen,
inst->mlen,
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
break;
case BRW_OPCODE_CONTINUE:
- /* FINISHME: We need to write the loop instruction support still. */
- if (brw->gen >= 6)
- gen6_CONT(p);
- else
- brw_CONT(p);
+ brw_CONT(p);
brw_set_default_predicate_control(p, BRW_PREDICATE_NONE);
break;
case SHADER_OPCODE_LOD:
case SHADER_OPCODE_TG4:
case SHADER_OPCODE_TG4_OFFSET:
- generate_tex(inst, dst, src[0]);
+ generate_tex(inst, dst, src[0], src[1]);
break;
case FS_OPCODE_DDX:
generate_ddx(inst, dst, src[0]);