namespace brw {
-vec4_instruction::vec4_instruction(vec4_visitor *v,
- enum opcode opcode, const dst_reg &dst,
+vec4_instruction::vec4_instruction(enum opcode opcode, const dst_reg &dst,
const src_reg &src0, const src_reg &src1,
const src_reg &src2)
{
this->no_dd_check = false;
this->writes_accumulator = false;
this->conditional_mod = BRW_CONDITIONAL_NONE;
+ this->predicate = BRW_PREDICATE_NONE;
+ this->predicate_inverse = false;
this->target = 0;
+ this->regs_written = (dst.file == BAD_FILE ? 0 : 1);
this->shadow_compare = false;
- this->ir = v->base_ir;
+ this->ir = NULL;
this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
this->header_present = false;
+ this->flag_subreg = 0;
this->mlen = 0;
this->base_mrf = 0;
this->offset = 0;
- this->annotation = v->current_annotation;
+ this->annotation = NULL;
}
vec4_instruction *
vec4_visitor::emit(vec4_instruction *inst)
{
+ inst->ir = this->base_ir;
+ inst->annotation = this->current_annotation;
+
this->instructions.push_tail(inst);
return inst;
vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
const src_reg &src1, const src_reg &src2)
{
- return emit(new(mem_ctx) vec4_instruction(this, opcode, dst,
- src0, src1, src2));
+ return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1, src2));
}
vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0,
const src_reg &src1)
{
- return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0, src1));
+ return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0, src1));
}
vec4_instruction *
vec4_visitor::emit(enum opcode opcode, const dst_reg &dst, const src_reg &src0)
{
- return emit(new(mem_ctx) vec4_instruction(this, opcode, dst, src0));
+ return emit(new(mem_ctx) vec4_instruction(opcode, dst, src0));
}
vec4_instruction *
vec4_visitor::emit(enum opcode opcode, const dst_reg &dst)
{
- return emit(new(mem_ctx) vec4_instruction(this, opcode, dst));
+ return emit(new(mem_ctx) vec4_instruction(opcode, dst));
}
vec4_instruction *
vec4_visitor::emit(enum opcode opcode)
{
- return emit(new(mem_ctx) vec4_instruction(this, opcode, dst_reg()));
+ return emit(new(mem_ctx) vec4_instruction(opcode, dst_reg()));
}
#define ALU1(op) \
vec4_instruction * \
vec4_visitor::op(const dst_reg &dst, const src_reg &src0) \
{ \
- return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
- src0); \
+ return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, src0); \
}
#define ALU2(op) \
vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
const src_reg &src1) \
{ \
- return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
- src0, src1); \
+ return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
+ src0, src1); \
}
#define ALU2_ACC(op) \
vec4_visitor::op(const dst_reg &dst, const src_reg &src0, \
const src_reg &src1) \
{ \
- vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, \
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction( \
BRW_OPCODE_##op, dst, src0, src1); \
- inst->writes_accumulator = true; \
- return inst; \
+ inst->writes_accumulator = true; \
+ return inst; \
}
#define ALU3(op) \
const src_reg &src1, const src_reg &src2) \
{ \
assert(brw->gen >= 6); \
- return new(mem_ctx) vec4_instruction(this, BRW_OPCODE_##op, dst, \
+ return new(mem_ctx) vec4_instruction(BRW_OPCODE_##op, dst, \
src0, src1, src2); \
}
{
vec4_instruction *inst;
- inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF);
+ inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF);
inst->predicate = predicate;
return inst;
resolve_ud_negate(&src0);
resolve_ud_negate(&src1);
- inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_IF, dst_null_d(),
+ inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_IF, dst_null_d(),
src0, src1);
inst->conditional_mod = condition;
resolve_ud_negate(&src0);
resolve_ud_negate(&src1);
- inst = new(mem_ctx) vec4_instruction(this, BRW_OPCODE_CMP, dst, src0, src1);
+ inst = new(mem_ctx) vec4_instruction(BRW_OPCODE_CMP, dst, src0, src1);
inst->conditional_mod = condition;
return inst;
{
vec4_instruction *inst;
- inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_GEN4_SCRATCH_READ,
+ inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_READ,
dst, index);
inst->base_mrf = 14;
inst->mlen = 2;
{
vec4_instruction *inst;
- inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_GEN4_SCRATCH_WRITE,
+ inst = new(mem_ctx) vec4_instruction(SHADER_OPCODE_GEN4_SCRATCH_WRITE,
dst, src, index);
inst->base_mrf = 13;
inst->mlen = 3;
shifted.type = BRW_REGISTER_TYPE_UB;
dst_reg f(this, glsl_type::vec4_type);
- emit(MOV(f, src_reg(shifted)));
+ emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
emit(MUL(dst, src_reg(f), src_reg(1.0f / 255.0f)));
}
shifted.type = BRW_REGISTER_TYPE_B;
dst_reg f(this, glsl_type::vec4_type);
- emit(MOV(f, src_reg(shifted)));
+ emit(VEC4_OPCODE_MOV_BYTES, f, src_reg(shifted));
dst_reg scaled(this, glsl_type::vec4_type);
emit(MUL(scaled, src_reg(f), src_reg(1.0f / 127.0f)));
dst_reg max(this, glsl_type::vec4_type);
- emit_minmax(BRW_CONDITIONAL_G, max, src_reg(scaled), src_reg(-1.0f));
+ emit_minmax(BRW_CONDITIONAL_GE, max, src_reg(scaled), src_reg(-1.0f));
emit_minmax(BRW_CONDITIONAL_L, dst, src_reg(max), src_reg(1.0f));
}
vec4_visitor::emit_pack_snorm_4x8(const dst_reg &dst, const src_reg &src0)
{
dst_reg max(this, glsl_type::vec4_type);
- emit_minmax(BRW_CONDITIONAL_G, max, src0, src_reg(-1.0f));
+ emit_minmax(BRW_CONDITIONAL_GE, max, src0, src_reg(-1.0f));
dst_reg min(this, glsl_type::vec4_type);
emit_minmax(BRW_CONDITIONAL_L, min, src_reg(max), src_reg(1.0f));
return 0;
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_VOID:
+ case GLSL_TYPE_DOUBLE:
case GLSL_TYPE_ERROR:
case GLSL_TYPE_INTERFACE:
unreachable("not reached");
if (ir->type->base_type != GLSL_TYPE_FLOAT)
return false;
- ir_rvalue *nonmul = ir->operands[1];
- ir_expression *mul = ir->operands[0]->as_expression();
+ ir_rvalue *nonmul;
+ ir_expression *mul;
+ bool mul_negate, mul_abs;
+
+ for (int i = 0; i < 2; i++) {
+ mul_negate = false;
+ mul_abs = false;
- if (!mul || mul->operation != ir_binop_mul) {
- nonmul = ir->operands[0];
- mul = ir->operands[1]->as_expression();
+ mul = ir->operands[i]->as_expression();
+ nonmul = ir->operands[1 - i];
- if (!mul || mul->operation != ir_binop_mul)
- return false;
+ if (mul && mul->operation == ir_unop_abs) {
+ mul = mul->operands[0]->as_expression();
+ mul_abs = true;
+ } else if (mul && mul->operation == ir_unop_neg) {
+ mul = mul->operands[0]->as_expression();
+ mul_negate = true;
+ }
+
+ if (mul && mul->operation == ir_binop_mul)
+ break;
}
+ if (!mul || mul->operation != ir_binop_mul)
+ return false;
+
nonmul->accept(this);
src_reg src0 = fix_3src_operand(this->result);
mul->operands[0]->accept(this);
src_reg src1 = fix_3src_operand(this->result);
+ src1.negate ^= mul_negate;
+ src1.abs = mul_abs;
+ if (mul_abs)
+ src1.negate = false;
mul->operands[1]->accept(this);
src_reg src2 = fix_3src_operand(this->result);
+ src2.abs = mul_abs;
+ if (mul_abs)
+ src2.negate = false;
this->result = src_reg(this, ir->type);
emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
vec4_visitor::visit(ir_expression *ir)
{
unsigned int operand;
- src_reg op[Elements(ir->operands)];
+ src_reg op[ARRAY_SIZE(ir->operands)];
vec4_instruction *inst;
if (ir->operation == ir_binop_add) {
}
case ir_binop_all_equal:
+ if (brw->gen <= 5) {
+ resolve_bool_comparison(ir->operands[0], &op[0]);
+ resolve_bool_comparison(ir->operands[1], &op[1]);
+ }
+
/* "==" operator producing a scalar boolean. */
if (ir->operands[0]->type->is_vector() ||
ir->operands[1]->type->is_vector()) {
}
break;
case ir_binop_any_nequal:
+ if (brw->gen <= 5) {
+ resolve_bool_comparison(ir->operands[0], &op[0]);
+ resolve_bool_comparison(ir->operands[1], &op[1]);
+ }
+
/* "!=" operator producing a scalar boolean. */
if (ir->operands[0]->type->is_vector() ||
ir->operands[1]->type->is_vector()) {
break;
case ir_unop_any:
+ if (brw->gen <= 5) {
+ resolve_bool_comparison(ir->operands[0], &op[0]);
+ }
emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
emit(MOV(result_dst, src_reg(0)));
emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
break;
case ir_unop_i2b:
- emit(AND(result_dst, op[0], src_reg(1)));
+ emit(CMP(result_dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
break;
case ir_unop_trunc:
emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
break;
case ir_binop_max:
- emit_minmax(BRW_CONDITIONAL_G, result_dst, op[0], op[1]);
+ emit_minmax(BRW_CONDITIONAL_GE, result_dst, op[0], op[1]);
break;
case ir_binop_pow:
if (brw->gen >= 7) {
dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+
+ /* We have to use a message header on Skylake to get SIMD4x2 mode.
+ * Reserve space for the register.
+ */
+ if (brw->gen >= 9) {
+ grf_offset.reg_offset++;
+ alloc.sizes[grf_offset.reg] = 2;
+ }
+
grf_offset.type = offset.type;
emit(MOV(grf_offset, offset));
- emit(new(mem_ctx) vec4_instruction(this,
- VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
- dst_reg(packed_consts),
- surf_index,
- src_reg(grf_offset)));
+ vec4_instruction *pull =
+ emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ dst_reg(packed_consts),
+ surf_index,
+ src_reg(grf_offset)));
+ pull->mlen = 1;
} else {
vec4_instruction *pull =
- emit(new(mem_ctx) vec4_instruction(this,
- VS_OPCODE_PULL_CONSTANT_LOAD,
+ emit(new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
dst_reg(packed_consts),
surf_index,
offset));
unreachable("not reached: should not occur in vertex shader");
case ir_binop_ldexp:
unreachable("not reached: should be handled by ldexp_to_arith()");
+ case ir_unop_d2f:
+ case ir_unop_f2d:
+ case ir_unop_d2i:
+ case ir_unop_i2d:
+ case ir_unop_d2u:
+ case ir_unop_u2d:
+ case ir_unop_d2b:
+ case ir_unop_pack_double_2x32:
+ case ir_unop_unpack_double_2x32:
+ case ir_unop_frexp_sig:
+ case ir_unop_frexp_exp:
+ unreachable("fp64 todo");
}
}
src_reg
vec4_visitor::emit_mcs_fetch(ir_texture *ir, src_reg coordinate, src_reg sampler)
{
- vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, SHADER_OPCODE_TXF_MCS);
+ vec4_instruction *inst =
+ new(mem_ctx) vec4_instruction(SHADER_OPCODE_TXF_MCS,
+ dst_reg(this, glsl_type::uvec4_type));
inst->base_mrf = 2;
inst->mlen = 1;
- inst->dst = dst_reg(this, glsl_type::uvec4_type);
- inst->dst.writemask = WRITEMASK_XYZW;
-
inst->src[1] = sampler;
/* parameters are: u, v, r, lod; lod will always be zero due to api restrictions */
unreachable("Unrecognized tex op");
}
- vec4_instruction *inst = new(mem_ctx) vec4_instruction(this, opcode);
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction(
+ opcode, dst_reg(this, ir->type));
if (ir->offset != NULL && !has_nonconstant_offset) {
inst->offset =
is_high_sampler(brw, sampler_reg);
inst->base_mrf = 2;
inst->mlen = inst->header_present + 1; /* always at least one */
- inst->dst = dst_reg(this, ir->type);
inst->dst.writemask = WRITEMASK_XYZW;
inst->shadow_compare = ir->shadow_comparitor != NULL;
if (brw->gen >= 7) {
dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
+
+ /* We have to use a message header on Skylake to get SIMD4x2 mode.
+ * Reserve space for the register.
+ */
+ if (brw->gen >= 9) {
+ grf_offset.reg_offset++;
+ alloc.sizes[grf_offset.reg] = 2;
+ }
+
grf_offset.type = offset.type;
emit_before(block, inst, MOV(grf_offset, offset));
- load = new(mem_ctx) vec4_instruction(this,
- VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
temp, index, src_reg(grf_offset));
+ load->mlen = 1;
} else {
- load = new(mem_ctx) vec4_instruction(this, VS_OPCODE_PULL_CONSTANT_LOAD,
+ load = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
temp, index, offset);
load->base_mrf = 14;
load->mlen = 1;
struct gl_shader_program *shader_prog,
gl_shader_stage stage,
void *mem_ctx,
- bool debug_flag,
bool no_spills,
shader_time_shader_type st_base,
shader_time_shader_type st_written,
fail_msg(NULL),
first_non_payload_grf(0),
need_all_constants_in_pull_buffer(false),
- debug_flag(debug_flag),
no_spills(no_spills),
st_base(st_base),
st_written(st_written),
*/
this->uniform_array_size = 1;
if (prog_data) {
- this->uniform_array_size = MAX2(stage_prog_data->nr_params, 1);
+ this->uniform_array_size =
+ MAX2(DIV_ROUND_UP(stage_prog_data->nr_params, 4), 1);
}
this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
va_start(va, format);
msg = ralloc_vasprintf(mem_ctx, format, va);
va_end(va);
- msg = ralloc_asprintf(mem_ctx, "vec4 compile failed: %s\n", msg);
+ msg = ralloc_asprintf(mem_ctx, "%s compile failed: %s\n", stage_abbrev, msg);
this->fail_msg = msg;
- if (debug_flag) {
+ if (debug_enabled) {
fprintf(stderr, "%s", msg);
}
}