vec4_visitor::nir_setup_uniforms()
{
uniforms = nir->num_uniforms / 16;
-
- nir_foreach_variable(var, &nir->uniforms) {
- /* UBO's and atomics don't take up space in the uniform file */
- if (var->interface_type != NULL || var->type->contains_atomic())
- continue;
-
- if (type_size_vec4(var->type) > 0)
- uniform_size[var->data.driver_location / 16] = type_size_vec4(var->type);
- }
}
void
/* Offsets are in bytes but they should always be multiples of 16 */
assert(const_offset->u[0] % 16 == 0);
src.reg_offset = const_offset->u[0] / 16;
+
+ emit(MOV(dest, src));
} else {
- src_reg tmp = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_D, 1);
- src.reladdr = new(mem_ctx) src_reg(tmp);
- }
+ src_reg indirect = get_nir_src(instr->src[0], BRW_REGISTER_TYPE_UD, 1);
- emit(MOV(dest, src));
+ emit(SHADER_OPCODE_MOV_INDIRECT, dest, src,
+ indirect, brw_imm_ud(instr->const_index[1]));
+ }
break;
}
inst->saturate = instr->dest.saturate;
break;
- case nir_op_fsin:
- inst = emit_math(SHADER_OPCODE_SIN, dst, op[0]);
- inst->saturate = instr->dest.saturate;
+ case nir_op_fsin: {
+ src_reg tmp = src_reg(this, glsl_type::vec4_type);
+ inst = emit_math(SHADER_OPCODE_SIN, dst_reg(tmp), op[0]);
+ if (instr->dest.saturate) {
+ inst->dst = dst;
+ inst->saturate = true;
+ } else {
+ emit(MUL(dst, tmp, brw_imm_f(0.99997)));
+ }
break;
+ }
- case nir_op_fcos:
- inst = emit_math(SHADER_OPCODE_COS, dst, op[0]);
- inst->saturate = instr->dest.saturate;
+ case nir_op_fcos: {
+ src_reg tmp = src_reg(this, glsl_type::vec4_type);
+ inst = emit_math(SHADER_OPCODE_COS, dst_reg(tmp), op[0]);
+ if (instr->dest.saturate) {
+ inst->dst = dst;
+ inst->saturate = true;
+ } else {
+ emit(MUL(dst, tmp, brw_imm_f(0.99997)));
+ }
break;
+ }
case nir_op_idiv:
case nir_op_udiv:
break;
case nir_op_umod:
+ case nir_op_irem:
+ /* According to the sign table for INT DIV in the Ivy Bridge PRM, it
+ * appears that our hardware just does the right thing for signed
+ * remainder.
+ */
emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
break;
+ case nir_op_imod: {
+ /* Get a regular C-style remainder. If a % b == 0, set the predicate. */
+ inst = emit_math(SHADER_OPCODE_INT_REMAINDER, dst, op[0], op[1]);
+
+ /* Math instructions don't support conditional mod */
+ inst = emit(MOV(dst_null_d(), src_reg(dst)));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ /* Now, we need to determine if signs of the sources are different.
+ * When we XOR the sources, the top bit is 0 if they are the same and 1
+ * if they are different. We can then use a conditional modifier to
+ * turn that into a predicate. This leads us to an XOR.l instruction.
+ */
+ src_reg tmp = src_reg(this, glsl_type::ivec4_type);
+ inst = emit(XOR(dst_reg(tmp), op[0], op[1]));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ /* If the result of the initial remainder operation is non-zero and the
+ * two sources have different signs, add in a copy of op[1] to get the
+ * final integer modulus value.
+ */
+ inst = emit(ADD(dst, src_reg(dst), op[1]));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ }
+
case nir_op_ldexp:
unreachable("not reached: should be handled by ldexp_to_arith()");
inst->saturate = instr->dest.saturate;
break;
+ case nir_op_fquantize2f16: {
+ /* See also vec4_visitor::emit_pack_half_2x16() */
+ src_reg tmp16 = src_reg(this, glsl_type::uvec4_type);
+ src_reg tmp32 = src_reg(this, glsl_type::vec4_type);
+ src_reg zero = src_reg(this, glsl_type::vec4_type);
+
+ /* Check for denormal */
+ src_reg abs_src0 = op[0];
+ abs_src0.abs = true;
+ emit(CMP(dst_null_f(), abs_src0, brw_imm_f(ldexpf(1.0, -14)),
+ BRW_CONDITIONAL_L));
+ /* Get the appropriately signed zero */
+ emit(AND(retype(dst_reg(zero), BRW_REGISTER_TYPE_UD),
+ retype(op[0], BRW_REGISTER_TYPE_UD),
+ brw_imm_ud(0x80000000)));
+ /* Do the actual F32 -> F16 -> F32 conversion */
+ emit(F32TO16(dst_reg(tmp16), op[0]));
+ emit(F16TO32(dst_reg(tmp32), tmp16));
+ /* Select that or zero based on normal status */
+ inst = emit(BRW_OPCODE_SEL, dst, zero, tmp32);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ inst->predicate_inverse = true;
+ inst->saturate = instr->dest.saturate;
+ break;
+ }
+
case nir_op_fmin:
case nir_op_imin:
case nir_op_umin:
break;
case nir_jump_return:
- /* fall through */
default:
unreachable("unknown jump");
}
nir_tex_instr_dest_size(instr));
dst_reg dest = get_nir_dest(instr->dest, instr->dest_type);
+ /* Our hardware requires a LOD for buffer textures */
+ if (instr->sampler_dim == GLSL_SAMPLER_DIM_BUF)
+ lod = brw_imm_d(0);
+
/* Load the texture operation sources */
uint32_t constant_offset = 0;
for (unsigned i = 0; i < instr->num_srcs; i++) {