}
#include "brw_fs.h"
#include "glsl/glsl_types.h"
-#include "glsl/ir_print_visitor.h"
void
fs_inst::init()
ALU2(SHR)
ALU2(ASR)
ALU3(LRP)
+ALU1(BFREV)
+ALU3(BFE)
+ALU2(BFI1)
+ALU3(BFI2)
+ALU1(FBH)
+ALU1(FBL)
+ALU1(CBIT)
/** Gen4 predicated IF. */
fs_inst *
*/
if (intel->gen == 4) {
dst.type = src0.type;
- if (dst.file == FIXED_HW_REG)
+ if (dst.file == HW_REG)
dst.fixed_hw_reg.type = dst.type;
}
reg.reg_offset < dst.reg_offset + regs_written);
}
-bool
-fs_inst::is_tex()
-{
- return (opcode == SHADER_OPCODE_TEX ||
- opcode == FS_OPCODE_TXB ||
- opcode == SHADER_OPCODE_TXD ||
- opcode == SHADER_OPCODE_TXF ||
- opcode == SHADER_OPCODE_TXF_MS ||
- opcode == SHADER_OPCODE_TXL ||
- opcode == SHADER_OPCODE_TXS ||
- opcode == SHADER_OPCODE_LOD);
-}
-
-bool
-fs_inst::is_math()
-{
- return (opcode == SHADER_OPCODE_RCP ||
- opcode == SHADER_OPCODE_RSQ ||
- opcode == SHADER_OPCODE_SQRT ||
- opcode == SHADER_OPCODE_EXP2 ||
- opcode == SHADER_OPCODE_LOG2 ||
- opcode == SHADER_OPCODE_SIN ||
- opcode == SHADER_OPCODE_COS ||
- opcode == SHADER_OPCODE_INT_QUOTIENT ||
- opcode == SHADER_OPCODE_INT_REMAINDER ||
- opcode == SHADER_OPCODE_POW);
-}
-
-bool
-fs_inst::is_control_flow()
-{
- switch (opcode) {
- case BRW_OPCODE_DO:
- case BRW_OPCODE_WHILE:
- case BRW_OPCODE_IF:
- case BRW_OPCODE_ELSE:
- case BRW_OPCODE_ENDIF:
- case BRW_OPCODE_BREAK:
- case BRW_OPCODE_CONTINUE:
- return true;
- default:
- return false;
- }
-}
-
bool
fs_inst::is_send_from_grf()
{
fs_reg::fs_reg(struct brw_reg fixed_hw_reg)
{
init();
- this->file = FIXED_HW_REG;
+ this->file = HW_REG;
this->fixed_hw_reg = fixed_hw_reg;
this->type = fixed_hw_reg.type;
}
return type == BRW_REGISTER_TYPE_F ? imm.f == 1.0 : imm.i == 1;
}
+bool
+fs_reg::is_valid_3src() const
+{
+ return file == GRF || file == UNIFORM;
+}
+
int
fs_visitor::type_size(const struct glsl_type *type)
{
import_uniforms_callback,
variable_ht);
this->params_remap = v->params_remap;
+ this->nr_params_remap = v->nr_params_remap;
}
/* Our support for uniforms is piggy-backed on the struct
/* Make sure we actually initialized the right amount of stuff here. */
assert(params_before + ir->type->component_slots() ==
c->prog_data.nr_params);
+ (void)params_before;
}
bool is_centroid)
{
brw_wm_barycentric_interp_mode barycoord_mode;
- if (is_centroid) {
- if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
- barycoord_mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
- else
- barycoord_mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+ if (intel->gen >= 6) {
+ if (is_centroid) {
+ if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
+ barycoord_mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
+ else
+ barycoord_mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
+ } else {
+ if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
+ barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
+ else
+ barycoord_mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+ }
} else {
- if (interpolation_mode == INTERP_QUALIFIER_SMOOTH)
- barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
- else
- barycoord_mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
+ /* On Ironlake and below, there is only one interpolation mode.
+ * Centroid interpolation doesn't mean anything on this hardware --
+ * there is no multisampling.
+ */
+ barycoord_mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
}
return emit(FS_OPCODE_LINTERP, attr,
this->delta_x[barycoord_mode],
constant_nr / 8,
constant_nr % 8);
- inst->src[i].file = FIXED_HW_REG;
+ inst->src[i].file = HW_REG;
inst->src[i].fixed_hw_reg = retype(brw_reg, inst->src[i].type);
}
}
fs_inst *inst = (fs_inst *)node;
if (inst->opcode == FS_OPCODE_LINTERP) {
- assert(inst->src[2].file == FIXED_HW_REG);
+ assert(inst->src[2].file == HW_REG);
inst->src[2].fixed_hw_reg.nr += urb_start;
}
if (inst->opcode == FS_OPCODE_CINTERP) {
- assert(inst->src[0].file == FIXED_HW_REG);
+ assert(inst->src[0].file == HW_REG);
inst->src[0].fixed_hw_reg.nr += urb_start;
}
}
remap_table[i] = new_index;
virtual_grf_sizes[new_index] = virtual_grf_sizes[i];
if (live_intervals_valid) {
- virtual_grf_use[new_index] = virtual_grf_use[i];
- virtual_grf_def[new_index] = virtual_grf_def[i];
+ virtual_grf_start[new_index] = virtual_grf_start[i];
+ virtual_grf_end[new_index] = virtual_grf_end[i];
}
++new_index;
}
{
if (dispatch_width == 8) {
this->params_remap = ralloc_array(mem_ctx, int, c->prog_data.nr_params);
+ this->nr_params_remap = c->prog_data.nr_params;
for (unsigned int i = 0; i < c->prog_data.nr_params; i++)
this->params_remap[i] = -1;
if (inst->src[i].file != UNIFORM)
continue;
- assert(constant_nr < (int)c->prog_data.nr_params);
+ /* Section 5.11 of the OpenGL 4.3 spec says:
+ *
+ * "Out-of-bounds reads return undefined values, which include
+ * values from other variables of the active program or zero."
+ */
+ if (constant_nr < 0 || constant_nr >= (int)c->prog_data.nr_params) {
+ constant_nr = 0;
+ }
/* For now, set this to non-negative. We'll give it the
* actual new number in a moment, in order to keep the
if (inst->src[i].file != UNIFORM)
continue;
+ /* as above alias to 0 */
+ if (constant_nr < 0 || constant_nr >= (int)this->nr_params_remap) {
+ constant_nr = 0;
+ }
assert(this->params_remap[constant_nr] != -1);
inst->src[i].reg = this->params_remap[constant_nr];
inst->src[i].reg_offset = 0;
}
/**
- * Must be called after calculate_live_intervales() to remove unused
- * writes to registers -- register allocation will fail otherwise
- * because something deffed but not used won't be considered to
- * interfere with other regs.
+ * Removes any instructions writing a VGRF where that VGRF is not used by any
+ * later instruction.
*/
bool
fs_visitor::dead_code_eliminate()
foreach_list_safe(node, &this->instructions) {
fs_inst *inst = (fs_inst *)node;
- if (inst->dst.file == GRF && this->virtual_grf_use[inst->dst.reg] <= pc) {
- inst->remove();
- progress = true;
+ if (inst->dst.file == GRF) {
+ assert(this->virtual_grf_end[inst->dst.reg] >= pc);
+ if (this->virtual_grf_end[inst->dst.reg] == pc) {
+ inst->remove();
+ progress = true;
+ }
}
pc++;
/* Can't compute-to-MRF this GRF if someone else was going to
* read it later.
*/
- if (this->virtual_grf_use[inst->src[0].reg] > ip)
+ if (this->virtual_grf_end[inst->src[0].reg] > ip)
continue;
/* Found a move of a GRF to a MRF. Let's see if we can go
int grf;
if (inst->src[i].file == GRF) {
grf = inst->src[i].reg;
- } else if (inst->src[i].file == FIXED_HW_REG &&
+ } else if (inst->src[i].file == HW_REG &&
inst->src[i].fixed_hw_reg.file == BRW_GENERAL_REGISTER_FILE) {
grf = inst->src[i].fixed_hw_reg.nr;
} else {
}
void
-fs_visitor::dump_instruction(fs_inst *inst)
+fs_visitor::dump_instruction(backend_instruction *be_inst)
{
+ fs_inst *inst = (fs_inst *)be_inst;
+
if (inst->predicate) {
printf("(%cf0.%d) ",
inst->predicate_inverse ? '-' : '+',
printf("\n");
}
-void
-fs_visitor::dump_instructions()
-{
- int ip = 0;
- foreach_list(node, &this->instructions) {
- fs_inst *inst = (fs_inst *)node;
- printf("%d: ", ip++);
- dump_instruction(inst);
- }
-}
-
/**
* Possibly returns an instruction that set up @param reg.
*
void
fs_visitor::setup_payload_gen6()
{
- struct intel_context *intel = &brw->intel;
bool uses_depth =
(fp->Base.InputsRead & (1 << VARYING_SLOT_POS)) != 0;
unsigned barycentric_interp_modes = c->prog_data.barycentric_interp_modes;
bool start_busy = false;
float start_time = 0;
- if (unlikely(intel->perf_debug)) {
- start_busy = (intel->batch.last_bo &&
- drm_intel_bo_busy(intel->batch.last_bo));
+ if (unlikely(brw->perf_debug)) {
+ start_busy = (brw->batch.last_bo &&
+ drm_intel_bo_busy(brw->batch.last_bo));
start_time = get_time();
}
simd16_instructions,
final_assembly_size);
- if (unlikely(intel->perf_debug) && shader) {
+ if (unlikely(brw->perf_debug) && shader) {
if (shader->compiled_once)
brw_wm_debug_recompile(brw, prog, &c->key);
shader->compiled_once = true;
- if (start_busy && !drm_intel_bo_busy(intel->batch.last_bo)) {
+ if (start_busy && !drm_intel_bo_busy(brw->batch.last_bo)) {
perf_debug("FS compile took %.03f ms and stalled the GPU\n",
(get_time() - start_time) * 1000);
}
}
}
- key.clamp_fragment_color = true;
+ key.clamp_fragment_color = ctx->API == API_OPENGL_COMPAT;
for (int i = 0; i < MAX_SAMPLERS; i++) {
if (fp->Base.ShadowSamplers & (1 << i)) {