* IN THE SOFTWARE.
*/
-#include "glsl/ir.h"
+#include "compiler/glsl/ir.h"
#include "main/shaderimage.h"
#include "brw_fs.h"
#include "brw_fs_surface_builder.h"
break;
}
case MESA_SHADER_FRAGMENT:
- if (var->data.index > 0) {
+ if (key->force_dual_color_blend &&
+ var->data.location == FRAG_RESULT_DATA1) {
+ this->dual_src_output = reg;
+ this->do_dual_src = true;
+ } else if (var->data.index > 0) {
assert(var->data.location == FRAG_RESULT_DATA0);
assert(var->data.index == 1);
this->dual_src_output = reg;
}
}
+/**
+ * Recognizes a parent instruction of nir_op_extract_* and changes the type to
+ * match instr.
+ */
+bool
+fs_visitor::optimize_extract_to_float(nir_alu_instr *instr,
+ const fs_reg &result)
+{
+ if (!instr->src[0].src.is_ssa ||
+ !instr->src[0].src.ssa->parent_instr)
+ return false;
+
+ if (instr->src[0].src.ssa->parent_instr->type != nir_instr_type_alu)
+ return false;
+
+ nir_alu_instr *src0 =
+ nir_instr_as_alu(instr->src[0].src.ssa->parent_instr);
+
+ if (src0->op != nir_op_extract_u8 && src0->op != nir_op_extract_u16 &&
+ src0->op != nir_op_extract_i8 && src0->op != nir_op_extract_i16)
+ return false;
+
+ nir_const_value *element = nir_src_as_const_value(src0->src[1].src);
+ assert(element != NULL);
+
+ enum opcode extract_op;
+ if (src0->op == nir_op_extract_u16 || src0->op == nir_op_extract_i16) {
+ assert(element->u32[0] <= 1);
+ extract_op = SHADER_OPCODE_EXTRACT_WORD;
+ } else {
+ assert(element->u32[0] <= 3);
+ extract_op = SHADER_OPCODE_EXTRACT_BYTE;
+ }
+
+ fs_reg op0 = get_nir_src(src0->src[0].src);
+ op0.type = brw_type_for_nir_type(nir_op_infos[src0->op].input_types[0]);
+ op0 = offset(op0, bld, src0->src[0].swizzle[0]);
+
+ set_saturate(instr->dest.saturate,
+ bld.emit(extract_op, result, op0, brw_imm_ud(element->u32[0])));
+ return true;
+}
+
bool
fs_visitor::optimize_frontfacing_ternary(nir_alu_instr *instr,
const fs_reg &result)
return false;
nir_const_value *value1 = nir_src_as_const_value(instr->src[1].src);
- if (!value1 || fabsf(value1->f[0]) != 1.0f)
+ if (!value1 || fabsf(value1->f32[0]) != 1.0f)
return false;
nir_const_value *value2 = nir_src_as_const_value(instr->src[2].src);
- if (!value2 || fabsf(value2->f[0]) != 1.0f)
+ if (!value2 || fabsf(value2->f32[0]) != 1.0f)
return false;
fs_reg tmp = vgrf(glsl_type::int_type);
* surely be TRIANGLES
*/
- if (value1->f[0] == -1.0f) {
+ if (value1->f32[0] == -1.0f) {
g0.negate = true;
}
* surely be TRIANGLES
*/
- if (value1->f[0] == -1.0f) {
+ if (value1->f32[0] == -1.0f) {
g1_6.negate = true;
}
switch (instr->op) {
case nir_op_i2f:
case nir_op_u2f:
+ if (optimize_extract_to_float(instr, result))
+ return;
+
inst = bld.MOV(result, op[0]);
inst->saturate = instr->dest.saturate;
break;
* When we XOR the sources, the top bit is 0 if they are the same and 1
* if they are different. We can then use a conditional modifier to
* turn that into a predicate. This leads us to an XOR.l instruction.
+ *
+ * Technically, according to the PRM, you're not allowed to use .l on a
+ * XOR instruction. However, emperical experiments and Curro's reading
+ * of the simulator source both indicate that it's safe.
*/
fs_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_D);
inst = bld.XOR(tmp, op[0], op[1]);
case nir_op_fmin:
case nir_op_imin:
case nir_op_umin:
- if (devinfo->gen >= 6) {
- inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_L;
- } else {
- bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_L);
- inst = bld.SEL(result, op[0], op[1]);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
+ inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_L);
inst->saturate = instr->dest.saturate;
break;
case nir_op_fmax:
case nir_op_imax:
case nir_op_umax:
- if (devinfo->gen >= 6) {
- inst = bld.emit(BRW_OPCODE_SEL, result, op[0], op[1]);
- inst->conditional_mod = BRW_CONDITIONAL_GE;
- } else {
- bld.CMP(bld.null_reg_d(), op[0], op[1], BRW_CONDITIONAL_GE);
- inst = bld.SEL(result, op[0], op[1]);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
+ inst = bld.emit_minmax(result, op[0], op[1], BRW_CONDITIONAL_GE);
inst->saturate = instr->dest.saturate;
break;
inst->predicate = BRW_PREDICATE_NORMAL;
break;
+ case nir_op_extract_u8:
+ case nir_op_extract_i8: {
+ nir_const_value *byte = nir_src_as_const_value(instr->src[1].src);
+ bld.emit(SHADER_OPCODE_EXTRACT_BYTE,
+ result, op[0], brw_imm_ud(byte->u32[0]));
+ break;
+ }
+
+ case nir_op_extract_u16:
+ case nir_op_extract_i16: {
+ nir_const_value *word = nir_src_as_const_value(instr->src[1].src);
+ bld.emit(SHADER_OPCODE_EXTRACT_WORD,
+ result, op[0], brw_imm_ud(word->u32[0]));
+ break;
+ }
+
default:
unreachable("unhandled instruction");
}
fs_reg reg = bld.vgrf(BRW_REGISTER_TYPE_D, instr->def.num_components);
for (unsigned i = 0; i < instr->def.num_components; i++)
- bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i[i]));
+ bld.MOV(offset(reg, bld, i), brw_imm_d(instr->value.i32[i]));
nir_ssa_values[instr->def.index] = reg;
}
static brw_reg_type
get_image_base_type(const glsl_type *type)
{
- switch ((glsl_base_type)type->sampler_type) {
+ switch ((glsl_base_type)type->sampled_type) {
case GLSL_TYPE_UINT:
return BRW_REGISTER_TYPE_UD;
case GLSL_TYPE_INT:
const bool is_point_size = (base_offset == 0);
if (offset_const != NULL && vertex_const != NULL &&
- 4 * (base_offset + offset_const->u[0]) < push_reg_count) {
- int imm_offset = (base_offset + offset_const->u[0]) * 4 +
- vertex_const->u[0] * push_reg_count;
+ 4 * (base_offset + offset_const->u32[0]) < push_reg_count) {
+ int imm_offset = (base_offset + offset_const->u32[0]) * 4 +
+ vertex_const->u32[0] * push_reg_count;
/* This input was pushed into registers. */
if (is_point_size) {
/* gl_PointSize comes in .w */
if (vertex_const) {
/* The vertex index is constant; just select the proper URB handle. */
icp_handle =
- retype(brw_vec8_grf(first_icp_handle + vertex_const->i[0], 0),
+ retype(brw_vec8_grf(first_icp_handle + vertex_const->i32[0], 0),
BRW_REGISTER_TYPE_UD);
} else {
/* The vertex index is non-constant. We need to use indirect
if (offset_const) {
/* Constant indexing - use global offset. */
inst = bld.emit(SHADER_OPCODE_URB_READ_SIMD8, dst, icp_handle);
- inst->offset = base_offset + offset_const->u[0];
+ inst->offset = base_offset + offset_const->u32[0];
inst->base_mrf = -1;
inst->mlen = 1;
inst->regs_written = num_components;
* add_const_offset_to_base() will fold other constant offsets
* into instr->const_index[0].
*/
- assert(const_value->u[0] == 0);
+ assert(const_value->u32[0] == 0);
return fs_reg();
}
nir_const_value *const_sample = nir_src_as_const_value(instr->src[0]);
if (const_sample) {
- unsigned msg_data = const_sample->i[0] << 4;
+ unsigned msg_data = const_sample->i32[0] << 4;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SAMPLE,
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
- unsigned off_x = MIN2((int)(const_offset->f[0] * 16), 7) & 0xf;
- unsigned off_y = MIN2((int)(const_offset->f[1] * 16), 7) & 0xf;
+ unsigned off_x = MIN2((int)(const_offset->f32[0] * 16), 7) & 0xf;
+ unsigned off_y = MIN2((int)(const_offset->f32[1] * 16), 7) & 0xf;
emit_pixel_interpolater_send(bld,
FS_OPCODE_INTERPOLATE_AT_SHARED_OFFSET,
fs_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
- offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0]);
+ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0]);
} else {
offset_reg = vgrf(glsl_type::uint_type);
bld.ADD(offset_reg,
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset) {
- offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u[0] +
+ offset_reg = brw_imm_ud(instr->const_index[0] + const_offset->u32[0] +
4 * first_component);
} else {
offset_reg = vgrf(glsl_type::uint_type);
case nir_intrinsic_atomic_counter_inc:
case nir_intrinsic_atomic_counter_dec:
case nir_intrinsic_atomic_counter_read: {
- using namespace surface_access;
-
/* Get the arguments of the atomic intrinsic. */
const fs_reg offset = get_nir_src(instr->src[0]);
const unsigned surface = (stage_prog_data->binding_table.abo_start +
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
if (const_offset) {
/* Offsets are in bytes but they should always be multiples of 4 */
- assert(const_offset->u[0] % 4 == 0);
- src.reg_offset = const_offset->u[0] / 4;
+ assert(const_offset->u32[0] % 4 == 0);
+ src.reg_offset = const_offset->u32[0] / 4;
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
if (const_index) {
const unsigned index = stage_prog_data->binding_table.ubo_start +
- const_index->u[0];
+ const_index->u32[0];
surf_index = brw_imm_ud(index);
brw_mark_surface_used(prog_data, index);
} else {
fs_reg packed_consts = vgrf(glsl_type::float_type);
packed_consts.type = dest.type;
- struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u[0] & ~15);
+ struct brw_reg const_offset_reg = brw_imm_ud(const_offset->u32[0] & ~15);
bld.emit(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD, packed_consts,
surf_index, const_offset_reg);
for (unsigned i = 0; i < instr->num_components; i++) {
- packed_consts.set_smear(const_offset->u[0] % 16 / 4 + i);
+ packed_consts.set_smear(const_offset->u32[0] % 16 / 4 + i);
/* The std140 packing rules don't allow vectors to cross 16-byte
* boundaries, and a reg is 32 bytes.
fs_reg surf_index;
if (const_uniform_block) {
unsigned index = stage_prog_data->binding_table.ssbo_start +
- const_uniform_block->u[0];
+ const_uniform_block->u32[0];
surf_index = brw_imm_ud(index);
brw_mark_surface_used(prog_data, index);
} else {
fs_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
if (const_offset) {
- offset_reg = brw_imm_ud(const_offset->u[0]);
+ offset_reg = brw_imm_ud(const_offset->u32[0]);
} else {
offset_reg = get_nir_src(instr->src[1]);
}
nir_const_value *const_offset = nir_src_as_const_value(instr->src[0]);
assert(const_offset && "Indirect input loads not allowed");
- src = offset(src, bld, const_offset->u[0]);
+ src = offset(src, bld, const_offset->u32[0]);
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(dest, bld, j), offset(src, bld, j));
nir_src_as_const_value(instr->src[1]);
if (const_uniform_block) {
unsigned index = stage_prog_data->binding_table.ssbo_start +
- const_uniform_block->u[0];
+ const_uniform_block->u32[0];
surf_index = brw_imm_ud(index);
brw_mark_surface_used(prog_data, index);
} else {
fs_reg offset_reg;
nir_const_value *const_offset = nir_src_as_const_value(instr->src[2]);
if (const_offset) {
- offset_reg = brw_imm_ud(const_offset->u[0] + 4 * first_component);
+ offset_reg = brw_imm_ud(const_offset->u32[0] + 4 * first_component);
} else {
offset_reg = vgrf(glsl_type::uint_type);
bld.ADD(offset_reg,
nir_const_value *const_offset = nir_src_as_const_value(instr->src[1]);
assert(const_offset && "Indirect output stores not allowed");
- new_dest = offset(new_dest, bld, const_offset->u[0]);
+ new_dest = offset(new_dest, bld, const_offset->u32[0]);
for (unsigned j = 0; j < instr->num_components; j++) {
bld.MOV(offset(new_dest, bld, j), offset(src, bld, j));
case nir_intrinsic_get_buffer_size: {
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
- unsigned ssbo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
+ unsigned ssbo_index = const_uniform_block ? const_uniform_block->u32[0] : 0;
int reg_width = dispatch_width / 8;
/* Set LOD = 0 */
nir_const_value *const_surface = nir_src_as_const_value(instr->src[0]);
if (const_surface) {
unsigned surf_index = stage_prog_data->binding_table.ssbo_start +
- const_surface->u[0];
+ const_surface->u32[0];
surface = brw_imm_ud(surf_index);
brw_mark_surface_used(prog_data, surf_index);
} else {
/* Emit the actual atomic operation operation */
- fs_reg atomic_result =
- surface_access::emit_untyped_atomic(bld, surface, offset,
- data1, data2,
- 1 /* dims */, 1 /* rsize */,
- op,
- BRW_PREDICATE_NONE);
+ fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
+ data1, data2,
+ 1 /* dims */, 1 /* rsize */,
+ op,
+ BRW_PREDICATE_NONE);
dest.type = atomic_result.type;
bld.MOV(dest, atomic_result);
}
/* Emit the actual atomic operation operation */
- fs_reg atomic_result =
- surface_access::emit_untyped_atomic(bld, surface, offset,
- data1, data2,
- 1 /* dims */, 1 /* rsize */,
- op,
- BRW_PREDICATE_NONE);
+ fs_reg atomic_result = emit_untyped_atomic(bld, surface, offset,
+ data1, data2,
+ 1 /* dims */, 1 /* rsize */,
+ op,
+ BRW_PREDICATE_NONE);
dest.type = atomic_result.type;
bld.MOV(dest, atomic_result);
}
instr->is_array;
int lod_components = 0;
- int UNUSED offset_components = 0;
fs_reg coordinate, shadow_comparitor, lod, lod2, sample_index, mcs, tex_offset;
- /* Our hardware requires a LOD for buffer textures */
+ /* The hardware requires a LOD for buffer textures */
if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
lod = brw_imm_d(0);
case nir_tex_src_ms_index:
sample_index = retype(src, BRW_REGISTER_TYPE_UD);
break;
- case nir_tex_src_offset:
- tex_offset = retype(src, BRW_REGISTER_TYPE_D);
- if (instr->is_array)
- offset_components = instr->coord_components - 1;
- else
- offset_components = instr->coord_components;
+
+ case nir_tex_src_offset: {
+ nir_const_value *const_offset =
+ nir_src_as_const_value(instr->src[i].src);
+ if (const_offset) {
+ tex_offset = brw_imm_ud(brw_texture_offset(const_offset->i32, 3));
+ } else {
+ tex_offset = retype(src, BRW_REGISTER_TYPE_D);
+ }
break;
+ }
+
case nir_tex_src_projector:
unreachable("should be lowered");
}
}
- for (unsigned i = 0; i < 3; i++) {
- if (instr->const_offset[i] != 0) {
- assert(offset_components == 0);
- tex_offset = brw_imm_ud(brw_texture_offset(instr->const_offset, 3));
- break;
- }
- }
-
enum glsl_base_type dest_base_type =
brw_glsl_base_type_for_nir_type (instr->dest_type);
emit_texture(op, dest_type, coordinate, instr->coord_components,
shadow_comparitor, lod, lod2, lod_components, sample_index,
- tex_offset, mcs, gather_component,
- is_cube_array, texture, texture_reg, sampler, sampler_reg);
+ tex_offset, mcs, gather_component, is_cube_array,
+ texture, texture_reg, sampler, sampler_reg);
fs_reg dest = get_nir_dest(instr->dest);
dest.type = this->result.type;