#include "brw_vec4.h"
#include "brw_cfg.h"
#include "brw_eu.h"
+#include "util/u_math.h"
namespace brw {
this->predicate_inverse = false;
this->target = 0;
this->shadow_compare = false;
+ this->eot = false;
this->ir = NULL;
this->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
this->header_size = 0;
* false) elements needed to pack a type.
*/
static int
-type_size_xvec4(const struct glsl_type *type, bool as_vec4)
+type_size_xvec4(const struct glsl_type *type, bool as_vec4, bool bindless)
{
unsigned int i;
int size;
case GLSL_TYPE_UINT:
case GLSL_TYPE_INT:
case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_FLOAT16:
case GLSL_TYPE_BOOL:
case GLSL_TYPE_DOUBLE:
+ case GLSL_TYPE_UINT16:
+ case GLSL_TYPE_INT16:
+ case GLSL_TYPE_UINT8:
+ case GLSL_TYPE_INT8:
case GLSL_TYPE_UINT64:
case GLSL_TYPE_INT64:
if (type->is_matrix()) {
}
case GLSL_TYPE_ARRAY:
assert(type->length > 0);
- return type_size_xvec4(type->fields.array, as_vec4) * type->length;
+ return type_size_xvec4(type->fields.array, as_vec4, bindless) *
+ type->length;
case GLSL_TYPE_STRUCT:
+ case GLSL_TYPE_INTERFACE:
size = 0;
for (i = 0; i < type->length; i++) {
- size += type_size_xvec4(type->fields.structure[i].type, as_vec4);
+ size += type_size_xvec4(type->fields.structure[i].type, as_vec4,
+ bindless);
}
return size;
case GLSL_TYPE_SUBROUTINE:
/* Samplers take up no register space, since they're baked in at
* link time.
*/
- return 0;
+ return bindless ? 1 : 0;
case GLSL_TYPE_ATOMIC_UINT:
return 0;
case GLSL_TYPE_IMAGE:
- return DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
+ return bindless ? 1 : DIV_ROUND_UP(BRW_IMAGE_PARAM_SIZE, 4);
case GLSL_TYPE_VOID:
case GLSL_TYPE_ERROR:
- case GLSL_TYPE_INTERFACE:
case GLSL_TYPE_FUNCTION:
unreachable("not reached");
}
* store a particular type.
*/
extern "C" int
-type_size_vec4(const struct glsl_type *type)
+type_size_vec4(const struct glsl_type *type, bool bindless)
{
- return type_size_xvec4(type, true);
+ return type_size_xvec4(type, true, bindless);
}
/**
* type fits in one or two vec4 slots.
*/
extern "C" int
-type_size_dvec4(const struct glsl_type *type)
+type_size_dvec4(const struct glsl_type *type, bool bindless)
{
- return type_size_xvec4(type, false);
+ return type_size_xvec4(type, false, bindless);
}
src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
init();
this->file = VGRF;
- this->nr = v->alloc.allocate(type_size_vec4(type));
+ this->nr = v->alloc.allocate(type_size_vec4(type, false));
- if (type->is_array() || type->is_record()) {
+ if (type->is_array() || type->is_struct()) {
this->swizzle = BRW_SWIZZLE_NOOP;
} else {
this->swizzle = brw_swizzle_for_size(type->vector_elements);
init();
this->file = VGRF;
- this->nr = v->alloc.allocate(type_size_vec4(type) * size);
+ this->nr = v->alloc.allocate(type_size_vec4(type, false) * size);
this->swizzle = BRW_SWIZZLE_NOOP;
init();
this->file = VGRF;
- this->nr = v->alloc.allocate(type_size_vec4(type));
+ this->nr = v->alloc.allocate(type_size_vec4(type, false));
- if (type->is_array() || type->is_record()) {
+ if (type->is_array() || type->is_struct()) {
this->writemask = WRITEMASK_XYZW;
} else {
this->writemask = (1 << type->vector_elements) - 1;
return inst;
}
-vec4_instruction *
-vec4_visitor::emit_lrp(const dst_reg &dst,
- const src_reg &x, const src_reg &y, const src_reg &a)
-{
- if (devinfo->gen >= 6) {
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
- fix_3src_operand(x)));
- } else {
- /* Earlier generations don't support three source operations, so we
- * need to emit x*(1-a) + y*a.
- */
- dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
- dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
- dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
- y_times_a.writemask = dst.writemask;
- one_minus_a.writemask = dst.writemask;
- x_times_one_minus_a.writemask = dst.writemask;
-
- emit(MUL(y_times_a, y, a));
- emit(ADD(one_minus_a, negate(a), brw_imm_f(1.0f)));
- emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
- return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
- }
-}
-
/**
* Emits the instructions needed to perform a pull constant load. before_block
* and before_inst can be NULL in which case the instruction will be appended
dst_reg(this, glsl_type::uvec4_type));
inst->base_mrf = 2;
inst->src[1] = surface;
- inst->src[2] = surface;
+ inst->src[2] = brw_imm_ud(0); /* sampler */
int param_base;
src_reg surface_reg,
src_reg sampler_reg)
{
- /* The sampler can only meaningfully compute LOD for fragment shader
- * messages. For all other stages, we change the opcode to TXL and hardcode
- * the LOD to 0.
- *
- * textureQueryLevels() is implemented in terms of TXS so we need to pass a
- * valid LOD argument.
- */
- if (op == ir_tex || op == ir_query_levels) {
- assert(lod.file == BAD_FILE);
- lod = brw_imm_f(0.0f);
- }
-
enum opcode opcode;
switch (op) {
case ir_tex: opcode = SHADER_OPCODE_TXL; break;
if (output_reg[VARYING_SLOT_CLIP_DIST0][0].file != BAD_FILE) {
current_annotation = "Clipping flags";
dst_reg flags0 = dst_reg(this, glsl_type::uint_type);
- dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST0][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags0, brw_imm_d(0));
emit(OR(header1_w, src_reg(header1_w), src_reg(flags0)));
+ }
+ if (output_reg[VARYING_SLOT_CLIP_DIST1][0].file != BAD_FILE) {
+ dst_reg flags1 = dst_reg(this, glsl_type::uint_type);
emit(CMP(dst_null_f(), src_reg(output_reg[VARYING_SLOT_CLIP_DIST1][0]), brw_imm_f(0.0f), BRW_CONDITIONAL_L));
emit(VS_OPCODE_UNPACK_FLAGS_SIMD4X2, flags1, brw_imm_d(0));
emit(SHL(flags1, src_reg(flags1), brw_imm_d(4)));
* determine which edges should be drawn as wireframe.
*/
current_annotation = "edge flag";
- int edge_attr = _mesa_bitcount_64(nir->info.inputs_read &
+ int edge_attr = util_bitcount64(nir->info.inputs_read &
BITFIELD64_MASK(VERT_ATTRIB_EDGEFLAG));
emit(MOV(reg, src_reg(dst_reg(ATTR, edge_attr,
glsl_type::float_type, WRITEMASK_XYZW))));
}
}
-static int
-align_interleaved_urb_mlen(const struct gen_device_info *devinfo, int mlen)
+static unsigned
+align_interleaved_urb_mlen(const struct gen_device_info *devinfo, unsigned mlen)
{
if (devinfo->gen >= 6) {
/* URB data written (does not include the message header reg) must
src = byte_offset(src, 16);
}
- brw_mark_surface_used(&prog_data->base, index);
-
if (is_64bit) {
temp = retype(temp, BRW_REGISTER_TYPE_DF);
shuffle_64bit_data(orig_temp, src_reg(temp), false, block, inst);
prog_data(prog_data),
fail_msg(NULL),
first_non_payload_grf(0),
+ live_analysis(this), performance_analysis(this),
need_all_constants_in_pull_buffer(false),
no_spills(no_spills),
shader_time_index(shader_time_index),
memset(this->output_num_components, 0, sizeof(this->output_num_components));
- this->virtual_grf_start = NULL;
- this->virtual_grf_end = NULL;
- this->live_intervals = NULL;
-
this->max_grf = devinfo->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
this->uniforms = 0;
-}
-vec4_visitor::~vec4_visitor()
-{
+ this->nir_locals = NULL;
+ this->nir_ssa_values = NULL;
}