_mesa_glsl_error(&loc, state, "unsized array index must be constant");
}
} else if (array->type->fields.array->is_interface()
- && array->variable_referenced()->data.mode == ir_var_uniform
+ && (array->variable_referenced()->data.mode == ir_var_uniform ||
+ array->variable_referenced()->data.mode == ir_var_shader_storage)
&& !state->is_version(400, 0) && !state->ARB_gpu_shader5_enable) {
- /* Page 46 in section 4.3.7 of the OpenGL ES 3.00 spec says:
+ /* Page 50 in section 4.3.9 of the OpenGL ES 3.10 spec says:
*
- * "All indexes used to index a uniform block array must be
- * constant integral expressions."
+ * "All indices used to index a uniform or shader storage block
+ * array must be constant integral expressions."
*/
- _mesa_glsl_error(&loc, state,
- "uniform block array index must be constant");
+ _mesa_glsl_error(&loc, state, "%s block array index must be constant",
+ array->variable_referenced()->data.mode
+ == ir_var_uniform ? "uniform" : "shader storage");
} else {
/* whole_variable_referenced can return NULL if the array is a
* member of a structure. In this case it is safe to not update
const struct ast_type_qualifier *const qual =
& decl_list->type->qualifier;
+
+ if (qual->flags.q.explicit_binding)
+ validate_binding_qualifier(state, &loc, decl_type, qual);
+
if (qual->flags.q.std140 ||
qual->flags.q.std430 ||
qual->flags.q.packed ||
base_type(GLSL_TYPE_SUBROUTINE),
sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
sampler_type(0), interface_packing(0),
- vector_elements(0), matrix_columns(0),
+ vector_elements(1), matrix_columns(1),
length(0)
{
mtx_lock(&glsl_type::mutex);
init_ralloc_type_ctx();
assert(subroutine_name != NULL);
this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
- this->vector_elements = 1;
mtx_unlock(&glsl_type::mutex);
}
base_alignment = MAX2(base_alignment,
field_type->std430_base_alignment(field_row_major));
}
+ assert(base_alignment > 0);
return base_alignment;
}
assert(!"not reached");
const unsigned num_data_slots = uniform_size.num_values;
const unsigned hidden_uniforms = uniform_size.num_hidden_uniforms;
+ /* assign hidden uniforms a slot id */
+ hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
+ delete hiddenUniforms;
+
/* On the outside chance that there were no uniforms, bail out.
*/
if (num_uniforms == 0)
return;
- /* assign hidden uniforms a slot id */
- hiddenUniforms->iterate(assign_hidden_uniform_slot_id, &uniform_size);
- delete hiddenUniforms;
-
struct gl_uniform_storage *uniforms =
rzalloc_array(prog, struct gl_uniform_storage, num_uniforms);
union gl_constant_value *data =
delete uniform_map;
}
+static bool
+should_add_buffer_variable(struct gl_shader_program *shProg,
+ GLenum type, const char *name)
+{
+ bool found_interface = false;
+ const char *block_name = NULL;
+
+ /* These rules only apply to buffer variables. So we return
+ * true for the rest of types.
+ */
+ if (type != GL_BUFFER_VARIABLE)
+ return true;
+
+ for (unsigned i = 0; i < shProg->NumBufferInterfaceBlocks; i++) {
+ block_name = shProg->UniformBlocks[i].Name;
+ if (strncmp(block_name, name, strlen(block_name)) == 0) {
+ found_interface = true;
+ break;
+ }
+ }
+
+ /* We remove the interface name from the buffer variable name,
+ * including the dot that follows it.
+ */
+ if (found_interface)
+ name = name + strlen(block_name) + 1;
+
+ /* From: ARB_program_interface_query extension:
+ *
+ * "For an active shader storage block member declared as an array, an
+ * entry will be generated only for the first array element, regardless
+ * of its type. For arrays of aggregate types, the enumeration rules are
+ * applied recursively for the single enumerated array element.
+ */
+ const char *first_dot = strchr(name, '.');
+ const char *first_square_bracket = strchr(name, '[');
+
+ /* The buffer variable is on top level and it is not an array */
+ if (!first_square_bracket) {
+ return true;
+ /* The shader storage block member is a struct, then generate the entry */
+ } else if (first_dot && first_dot < first_square_bracket) {
+ return true;
+ } else {
+ /* Shader storage block member is an array, only generate an entry for the
+ * first array element.
+ */
+ if (strncmp(first_square_bracket, "[0]", 3) == 0)
+ return true;
+ }
+
+ return false;
+}
+
static bool
add_program_resource(struct gl_shader_program *prog, GLenum type,
const void *data, uint8_t stages)
bool is_shader_storage = shProg->UniformStorage[i].is_shader_storage;
GLenum type = is_shader_storage ? GL_BUFFER_VARIABLE : GL_UNIFORM;
+ if (!should_add_buffer_variable(shProg, type,
+ shProg->UniformStorage[i].name))
+ continue;
+
if (!add_program_resource(shProg, type,
&shProg->UniformStorage[i], stageref))
return;
add(base_offset,
new(mem_ctx) ir_constant(deref_offset + i * matrix_stride));
if (is_write) {
+ /* If the component is not in the writemask, then don't
+ * store any value.
+ */
+ if (!((1 << i) & write_mask))
+ continue;
+
base_ir->insert_after(ssbo_store(swizzle(deref, i, 1), chan_offset, 1));
} else {
if (!this->is_shader_storage) {
}; /* end of anonymous namespace */
nir_shader *
-glsl_to_nir(struct gl_shader *sh, const nir_shader_compiler_options *options)
+glsl_to_nir(const struct gl_shader_program *shader_prog,
+ gl_shader_stage stage,
+ const nir_shader_compiler_options *options)
{
- nir_shader *shader = nir_shader_create(NULL, sh->Stage, options);
+ struct gl_shader *sh = shader_prog->_LinkedShaders[stage];
+
+ nir_shader *shader = nir_shader_create(NULL, stage, options);
nir_visitor v1(shader, sh);
nir_function_visitor v2(&v1);
nir_lower_outputs_to_temporaries(shader);
- shader->gs.vertices_out = sh->Geom.VerticesOut;
- shader->gs.invocations = sh->Geom.Invocations;
+ /* TODO: Use _mesa_fls instead */
+ unsigned num_textures = 0;
+ for (unsigned i = 0; i < 8 * sizeof(sh->Program->SamplersUsed); i++)
+ if (sh->Program->SamplersUsed & (1 << i))
+ num_textures = i;
+
+ shader->info.name = ralloc_asprintf(shader, "GLSL%d", sh->Name);
+ shader->info.num_textures = num_textures;
+ shader->info.num_ubos = sh->NumUniformBlocks;
+ shader->info.num_abos = shader_prog->NumAtomicBuffers;
+ shader->info.num_ssbos = shader_prog->NumBufferInterfaceBlocks;
+ shader->info.num_images = sh->NumImages;
+ shader->info.inputs_read = sh->Program->InputsRead;
+ shader->info.outputs_written = sh->Program->OutputsWritten;
+ shader->info.system_values_read = sh->Program->SystemValuesRead;
+ shader->info.uses_texture_gather = sh->Program->UsesGather;
+ shader->info.uses_clip_distance_out = sh->Program->UsesClipDistanceOut;
+ shader->info.separate_shader = shader_prog->SeparateShader;
+ shader->info.gs.vertices_out = sh->Geom.VerticesOut;
+ shader->info.gs.invocations = sh->Geom.Invocations;
return shader;
}
/* For whatever reason, GLSL IR makes gl_FrontFacing an input */
var->data.location = SYSTEM_VALUE_FRONT_FACE;
var->data.mode = nir_var_system_value;
+ } else if (shader->stage == MESA_SHADER_GEOMETRY &&
+ ir->data.location == VARYING_SLOT_PRIMITIVE_ID) {
+ /* For whatever reason, GLSL IR makes gl_PrimitiveIDIn an input */
+ var->data.location = SYSTEM_VALUE_PRIMITIVE_ID;
+ var->data.mode = nir_var_system_value;
} else {
var->data.mode = nir_var_shader_in;
}
instr = nir_intrinsic_instr_create(shader, op);
instr->src[2] = evaluate_rvalue(offset);
instr->const_index[0] = 0;
- dest = &instr->dest;
} else {
instr->const_index[0] = const_offset->value.u[0];
}
extern "C" {
#endif
-nir_shader *glsl_to_nir(struct gl_shader *sh,
+nir_shader *glsl_to_nir(const struct gl_shader_program *shader_prog,
+ gl_shader_stage stage,
const nir_shader_compiler_options *options);
#ifdef __cplusplus
exec_list_make_empty(&shader->outputs);
shader->options = options;
+ memset(&shader->info, 0, sizeof(shader->info));
exec_list_make_empty(&shader->functions);
exec_list_make_empty(&shader->registers);
shader->stage = stage;
- shader->gs.vertices_out = 0;
- shader->gs.invocations = 0;
-
return shader;
}
return nir_intrinsic_load_work_group_id;
case SYSTEM_VALUE_NUM_WORK_GROUPS:
return nir_intrinsic_load_num_work_groups;
+ case SYSTEM_VALUE_PRIMITIVE_ID:
+ return nir_intrinsic_load_primitive_id;
/* FINISHME: Add tessellation intrinsics.
case SYSTEM_VALUE_TESS_COORD:
case SYSTEM_VALUE_VERTICES_IN:
- case SYSTEM_VALUE_PRIMITIVE_ID:
case SYSTEM_VALUE_TESS_LEVEL_OUTER:
case SYSTEM_VALUE_TESS_LEVEL_INNER:
*/
return SYSTEM_VALUE_NUM_WORK_GROUPS;
case nir_intrinsic_load_work_group_id:
return SYSTEM_VALUE_WORK_GROUP_ID;
+ case nir_intrinsic_load_primitive_id:
+ return SYSTEM_VALUE_PRIMITIVE_ID;
/* FINISHME: Add tessellation intrinsics.
return SYSTEM_VALUE_TESS_COORD;
return SYSTEM_VALUE_VERTICES_IN;
bool native_integers;
} nir_shader_compiler_options;
+typedef struct nir_shader_info {
+ const char *name;
+
+ /* Number of textures used by this shader */
+ unsigned num_textures;
+ /* Number of uniform buffers used by this shader */
+ unsigned num_ubos;
+ /* Number of atomic buffers used by this shader */
+ unsigned num_abos;
+ /* Number of shader storage buffers used by this shader */
+ unsigned num_ssbos;
+ /* Number of images used by this shader */
+ unsigned num_images;
+
+ /* Which inputs are actually read */
+ uint64_t inputs_read;
+ /* Which outputs are actually written */
+ uint64_t outputs_written;
+ /* Which system values are actually read */
+ uint64_t system_values_read;
+
+ /* Whether or not this shader ever uses textureGather() */
+ bool uses_texture_gather;
+
+ /* Whether or not this shader uses the gl_ClipDistance output */
+ bool uses_clip_distance_out;
+
+ /* Whether or not separate shader objects were used */
+ bool separate_shader;
+
+ struct {
+ /** The maximum number of vertices the geometry shader might write. */
+ unsigned vertices_out;
+
+ /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
+ unsigned invocations;
+ } gs;
+} nir_shader_info;
+
typedef struct nir_shader {
/** list of uniforms (nir_variable) */
struct exec_list uniforms;
*/
const struct nir_shader_compiler_options *options;
+ /** Various bits of compile-time information about a given shader */
+ struct nir_shader_info info;
+
/** list of global variables in the shader (nir_variable) */
struct exec_list globals;
/** The shader stage, such as MESA_SHADER_VERTEX. */
gl_shader_stage stage;
-
- struct {
- /** The maximum number of vertices the geometry shader might write. */
- unsigned vertices_out;
-
- /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
- unsigned invocations;
- } gs;
} nir_shader;
#define nir_foreach_overload(shader, overload) \
int (*type_size)(const struct glsl_type *));
void nir_lower_io(nir_shader *shader,
+ nir_variable_mode mode,
int (*type_size)(const struct glsl_type *));
void nir_lower_vars_to_ssa(nir_shader *shader);
SYSTEM_VALUE(sample_id, 1, 0)
SYSTEM_VALUE(sample_pos, 2, 0)
SYSTEM_VALUE(sample_mask_in, 1, 0)
+SYSTEM_VALUE(primitive_id, 1, 0)
SYSTEM_VALUE(invocation_id, 1, 0)
SYSTEM_VALUE(local_invocation_id, 3, 0)
SYSTEM_VALUE(work_group_id, 3, 0)
b->cursor = nir_before_instr(&intrin->instr);
nir_ssa_def *count = nir_load_var(b, state->vertex_count_var);
- nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->gs.vertices_out);
+ nir_ssa_def *max_vertices = nir_imm_int(b, b->shader->info.gs.vertices_out);
/* Create: if (vertex_count < max_vertices) and insert it.
*
nir_builder builder;
void *mem_ctx;
int (*type_size)(const struct glsl_type *type);
+ nir_variable_mode mode;
};
void
nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
+ if (intrin->intrinsic != nir_intrinsic_load_var &&
+ intrin->intrinsic != nir_intrinsic_store_var)
+ continue;
+
+ nir_variable_mode mode = intrin->variables[0]->var->data.mode;
+
+ if (state->mode != -1 && state->mode != mode)
+ continue;
+
switch (intrin->intrinsic) {
case nir_intrinsic_load_var: {
- nir_variable_mode mode = intrin->variables[0]->var->data.mode;
if (mode != nir_var_shader_in && mode != nir_var_uniform)
continue;
}
static void
-nir_lower_io_impl(nir_function_impl *impl, int(*type_size)(const struct glsl_type *))
+nir_lower_io_impl(nir_function_impl *impl,
+ nir_variable_mode mode,
+ int (*type_size)(const struct glsl_type *))
{
struct lower_io_state state;
nir_builder_init(&state.builder, impl);
state.mem_ctx = ralloc_parent(impl);
+ state.mode = mode;
state.type_size = type_size;
nir_foreach_block(impl, nir_lower_io_block, &state);
}
void
-nir_lower_io(nir_shader *shader, int(*type_size)(const struct glsl_type *))
+nir_lower_io(nir_shader *shader, nir_variable_mode mode,
+ int (*type_size)(const struct glsl_type *))
{
nir_foreach_overload(shader, overload) {
if (overload->impl)
- nir_lower_io_impl(overload->impl, type_size);
+ nir_lower_io_impl(overload->impl, mode, type_size);
}
}
static bool
convert_impl(nir_function_impl *impl)
{
- bool progress;
+ bool progress = false;
nir_foreach_block(impl, convert_block, &progress);
nir_metadata_preserve(impl, nir_metadata_block_index |
nir_foreach_phi_src(phi, src) {
assert(src->src.is_ssa);
+
+ /* For phi nodes at the beginning of loops, we may encounter some
+ * sources from backedges that point back to the destination of the
+ * same phi, i.e. something like:
+ *
+ * a = phi(a, b, ...)
+ *
+ * We can safely ignore these sources, since if all of the normal
+ * sources point to the same definition, then that definition must
+ * still dominate the phi node, and the phi will still always take
+ * the value of that definition.
+ */
+ if (src->src.ssa == &phi->dest.ssa)
+ continue;
if (def == NULL) {
def = src->src.ssa;
if (!srcs_same)
continue;
+ /* We must have found at least one definition, since there must be at
+ * least one forward edge.
+ */
+ assert(def != NULL);
+
assert(phi->dest.is_ssa);
nir_ssa_def_rewrite_uses(&phi->dest.ssa, nir_src_for_ssa(def));
nir_instr_remove(instr);
/*@{*/
SYSTEM_VALUE_TESS_COORD,
SYSTEM_VALUE_VERTICES_IN, /**< Tessellation vertices in input patch */
- SYSTEM_VALUE_PRIMITIVE_ID, /**< (currently not used by GS) */
+ SYSTEM_VALUE_PRIMITIVE_ID,
SYSTEM_VALUE_TESS_LEVEL_OUTER, /**< TES input */
SYSTEM_VALUE_TESS_LEVEL_INNER, /**< TES input */
/*@}*/
$(EXTRA_DEFINES_XF86VIDMODE) \
-D_REENTRANT \
-DDEFAULT_DRIVER_DIR=\"$(DRI_DRIVER_SEARCH_DIR)\" \
+ -DGL_LIB_NAME=\"lib@GL_LIB@.so.1\" \
$(DEFINES) \
$(LIBDRM_CFLAGS) \
$(DRI2PROTO_CFLAGS) \
}
}
+#ifndef GL_LIB_NAME
+#define GL_LIB_NAME "libGL.so.1"
+#endif
+
#ifndef DEFAULT_DRIVER_DIR
/* this is normally defined in Mesa/configs/default with DRI_DRIVER_SEARCH_PATH */
#define DEFAULT_DRIVER_DIR "/usr/local/lib/dri"
int len;
/* Attempt to make sure libGL symbols will be visible to the driver */
- glhandle = dlopen("libGL.so.1", RTLD_NOW | RTLD_GLOBAL);
+ glhandle = dlopen(GL_LIB_NAME, RTLD_NOW | RTLD_GLOBAL);
libPaths = NULL;
if (geteuid() == getuid()) {
*/
_X_EXPORT void (*glXGetProcAddress(const GLubyte * procName)) (void)
#if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
+# if defined(USE_MGL_NAMESPACE)
+ __attribute__ ((alias("mglXGetProcAddressARB")));
+# else
__attribute__ ((alias("glXGetProcAddressARB")));
+# endif
#else
{
return glXGetProcAddressARB(procName);
# define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func)
#else
# if defined(__GNUC__) && !defined(GLX_ALIAS_UNSUPPORTED)
-# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
+/* GLX_ALIAS and GLX_ALIAS_VOID both expand to the macro GLX_ALIAS2. Using the
+ * extra expansion means that the name mangling macros in glx_mangle.h will
+ * apply before stringification, so the alias attribute will have a string like
+ * "mglXFoo" instead of "glXFoo". */
+# define GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func) \
return_type real_func proto_args \
__attribute__ ((alias( # aliased_func ) ));
+# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
+ GLX_ALIAS2(return_type, real_func, proto_args, args, aliased_func)
# define GLX_ALIAS_VOID(real_func, proto_args, args, aliased_func) \
- GLX_ALIAS(void, real_func, proto_args, args, aliased_func)
+ GLX_ALIAS2(void, real_func, proto_args, args, aliased_func)
# else
# define GLX_ALIAS(return_type, real_func, proto_args, args, aliased_func) \
return_type real_func proto_args \
const struct mapi_stub *stub;
#ifdef USE_MGL_NAMESPACE
- if (name)
+ if (name && name[0] == 'm')
name++;
#endif
char *sample_map_str = rzalloc_size(mem_ctx, 1);
char *sample_map_expr = rzalloc_size(mem_ctx, 1);
char *texel_fetch_macro = rzalloc_size(mem_ctx, 1);
- const char *vs_source;
const char *sampler_array_suffix = "";
- const char *texcoord_type = "vec2";
float y_scale;
enum blit_msaa_shader shader_index;
shader_index += BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_ARRAY_SCALED_RESOLVE -
BLIT_2X_MSAA_SHADER_2D_MULTISAMPLE_SCALED_RESOLVE;
sampler_array_suffix = "Array";
- texcoord_type = "vec3";
}
if (blit->msaa_shaders[shader_index]) {
" const int sample_map[%d] = int[%d](%s);\n",
samples, samples, sample_map_str);
- ralloc_asprintf_append(&texel_fetch_macro,
- "#define TEXEL_FETCH(coord) texelFetch(texSampler, i%s(coord), %s);\n",
- texcoord_type, sample_number);
+ if (target == GL_TEXTURE_2D_MULTISAMPLE) {
+ ralloc_asprintf_append(&texel_fetch_macro,
+ "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec2(coord), %s);\n",
+ sample_number);
+ } else {
+ ralloc_asprintf_append(&texel_fetch_macro,
+ "#define TEXEL_FETCH(coord) texelFetch(texSampler, ivec3(coord, layer), %s);\n",
+ sample_number);
+ }
- vs_source = ralloc_asprintf(mem_ctx,
+ static const char vs_source[] =
"#version 130\n"
"in vec2 position;\n"
- "in %s textureCoords;\n"
- "out %s texCoords;\n"
+ "in vec3 textureCoords;\n"
+ "out vec2 texCoords;\n"
+ "flat out int layer;\n"
"void main()\n"
"{\n"
- " texCoords = textureCoords;\n"
+ " texCoords = textureCoords.xy;\n"
+ " layer = int(textureCoords.z);\n"
" gl_Position = vec4(position, 0.0, 1.0);\n"
- "}\n",
- texcoord_type,
- texcoord_type);
+ "}\n"
+ ;
+
fs_source = ralloc_asprintf(mem_ctx,
"#version 130\n"
"#extension GL_ARB_texture_multisample : enable\n"
"uniform sampler2DMS%s texSampler;\n"
"uniform float src_width, src_height;\n"
- "in %s texCoords;\n"
+ "in vec2 texCoords;\n"
+ "flat in int layer;\n"
"out vec4 out_color;\n"
"\n"
"void main()\n"
" out_color = mix(x_0_color, x_1_color, interp.y);\n"
"}\n",
sampler_array_suffix,
- texcoord_type,
sample_map_expr,
y_scale,
1.0f / y_scale,
#define I830_UPLOAD_STIPPLE 0x4
#define I830_UPLOAD_INVARIENT 0x8
#define I830_UPLOAD_RASTER_RULES 0x10
-#define I830_UPLOAD_TEX(i) (0x10<<(i))
-#define I830_UPLOAD_TEXBLEND(i) (0x100<<(i))
-#define I830_UPLOAD_TEX_ALL (0x0f0)
-#define I830_UPLOAD_TEXBLEND_ALL (0xf00)
+#define I830_UPLOAD_TEX(i) (0x0100<<(i))
+#define I830_UPLOAD_TEXBLEND(i) (0x1000<<(i))
+#define I830_UPLOAD_TEX_ALL (0x0f00)
+#define I830_UPLOAD_TEXBLEND_ALL (0xf000)
/* State structure offsets - these will probably disappear.
*/
I915_RASTER_RULES_SETUP_SIZE,
};
+#define I915_TEX_UNITS 8
+
#define I915_MAX_CONSTANT 32
#define I915_CONSTANT_SIZE (2+(4*I915_MAX_CONSTANT))
/* Helpers for i915_fragprog.c:
*/
- GLuint wpos_tex;
+ uint8_t texcoord_mapping[I915_TEX_UNITS];
+ uint8_t wpos_tex;
bool depth_written;
struct
GLuint nr_params;
};
-
-
-
-
-
-
-#define I915_TEX_UNITS 8
-
-
struct i915_hw_state
{
GLuint Ctx[I915_CTX_SETUP_SIZE];
-1.0 / (6 * 5 * 4 * 3 * 2 * 1)
};
+/* texcoord_mapping[unit] = index | TEXCOORD_{TEX,VAR} */
+#define TEXCOORD_TEX (0<<7)
+#define TEXCOORD_VAR (1<<7)
+
+static unsigned
+get_texcoord_mapping(struct i915_fragment_program *p, uint8_t texcoord)
+{
+ for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+ if (p->texcoord_mapping[i] == texcoord)
+ return i;
+ }
+
+ /* blah */
+ return p->ctx->Const.MaxTextureCoordUnits - 1;
+}
+
/**
* Retrieve a ureg for the given source register. Will emit
* constants, apply swizzling and negation as needed.
const struct gl_fragment_program *program)
{
GLuint src;
+ unsigned unit;
switch (source->File) {
case VARYING_SLOT_TEX5:
case VARYING_SLOT_TEX6:
case VARYING_SLOT_TEX7:
+ unit = get_texcoord_mapping(p, (source->Index -
+ VARYING_SLOT_TEX0) | TEXCOORD_TEX);
src = i915_emit_decl(p, REG_TYPE_T,
- T_TEX0 + (source->Index - VARYING_SLOT_TEX0),
+ T_TEX0 + unit,
D0_CHANNEL_ALL);
break;
case VARYING_SLOT_VAR0 + 5:
case VARYING_SLOT_VAR0 + 6:
case VARYING_SLOT_VAR0 + 7:
+ unit = get_texcoord_mapping(p, (source->Index -
+ VARYING_SLOT_VAR0) | TEXCOORD_VAR);
src = i915_emit_decl(p, REG_TYPE_T,
- T_TEX0 + (source->Index - VARYING_SLOT_VAR0),
+ T_TEX0 + unit,
D0_CHANNEL_ALL);
break;
}
}
+static void
+check_texcoord_mapping(struct i915_fragment_program *p)
+{
+ GLbitfield64 inputs = p->FragProg.Base.InputsRead;
+ unsigned unit = 0;
+
+ for (unsigned i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
+ if (inputs & VARYING_BIT_TEX(i)) {
+ if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
+ unit++;
+ break;
+ }
+ p->texcoord_mapping[unit++] = i | TEXCOORD_TEX;
+ }
+ if (inputs & VARYING_BIT_VAR(i)) {
+ if (unit >= p->ctx->Const.MaxTextureCoordUnits) {
+ unit++;
+ break;
+ }
+ p->texcoord_mapping[unit++] = i | TEXCOORD_VAR;
+ }
+ }
+
+ if (unit > p->ctx->Const.MaxTextureCoordUnits)
+ i915_program_error(p, "Too many texcoord units");
+}
static void
check_wpos(struct i915_fragment_program *p)
{
GLbitfield64 inputs = p->FragProg.Base.InputsRead;
GLint i;
+ unsigned unit = 0;
p->wpos_tex = -1;
+ if ((inputs & VARYING_BIT_POS) == 0)
+ return;
+
for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
- if (inputs & (VARYING_BIT_TEX(i) | VARYING_BIT_VAR(i)))
- continue;
- else if (inputs & VARYING_BIT_POS) {
- p->wpos_tex = i;
- inputs &= ~VARYING_BIT_POS;
- }
+ unit += !!(inputs & VARYING_BIT_TEX(i));
+ unit += !!(inputs & VARYING_BIT_VAR(i));
}
- if (inputs & VARYING_BIT_POS) {
+ if (unit < p->ctx->Const.MaxTextureCoordUnits)
+ p->wpos_tex = unit;
+ else
i915_program_error(p, "No free texcoord for wpos value");
- }
}
}
i915_init_program(i915, p);
+ check_texcoord_mapping(p);
check_wpos(p);
upload_program(p);
fixup_depth_write(p);
for (i = 0; i < p->ctx->Const.MaxTextureCoordUnits; i++) {
if (inputsRead & VARYING_BIT_TEX(i)) {
+ int unit = get_texcoord_mapping(p, i | TEXCOORD_TEX);
int sz = VB->AttribPtr[_TNL_ATTRIB_TEX0 + i]->size;
- s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
- s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+ s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
+ s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
EMIT_ATTR(_TNL_ATTRIB_TEX0 + i, EMIT_SZ(sz), 0, sz * 4);
}
- else if (inputsRead & VARYING_BIT_VAR(i)) {
+ if (inputsRead & VARYING_BIT_VAR(i)) {
+ int unit = get_texcoord_mapping(p, i | TEXCOORD_VAR);
int sz = VB->AttribPtr[_TNL_ATTRIB_GENERIC0 + i]->size;
- s2 &= ~S2_TEXCOORD_FMT(i, S2_TEXCOORD_FMT0_MASK);
- s2 |= S2_TEXCOORD_FMT(i, SZ_TO_HW(sz));
+ s2 &= ~S2_TEXCOORD_FMT(unit, S2_TEXCOORD_FMT0_MASK);
+ s2 |= S2_TEXCOORD_FMT(unit, SZ_TO_HW(sz));
EMIT_ATTR(_TNL_ATTRIB_GENERIC0 + i, EMIT_SZ(sz), 0, sz * 4);
}
- else if (i == p->wpos_tex) {
+ if (i == p->wpos_tex) {
int wpos_size = 4 * sizeof(float);
/* If WPOS is required, duplicate the XYZ position data in an
* unused texture coordinate:
{
struct intel_context *intel = intel_context(ctx);
+ /* Sync up the state of window system buffers. We need to do this before
+ * we go looking for the buffers.
+ */
+ intel_prepare_render(intel);
+
if (mask & GL_COLOR_BUFFER_BIT) {
GLint i;
struct gl_renderbuffer *src_rb = readFb->_ColorReadBuffer;
brw_nir.h \
brw_nir.c \
brw_nir_analyze_boolean_resolves.c \
+ brw_nir_uniforms.cpp \
brw_object_purgeable.c \
brw_packed_float.c \
brw_performance_monitor.c \
brw_vec4_surface_builder.cpp \
brw_vec4_surface_builder.h \
brw_vec4_visitor.cpp \
- brw_vec4_vp.cpp \
brw_vec4_vs_visitor.cpp \
brw_vs.c \
brw_vs.h \
buffer->cpp, buffer->pitch);
}
- intel_miptree_release(&rb->mt);
bo = drm_intel_bo_gem_create_from_name(brw->bufmgr, buffer_name,
buffer->name);
if (!bo) {
bool compiled_once;
};
-/* Note: If adding fields that need anything besides a normal memcmp() for
- * comparing them, be sure to go fix brw_stage_prog_data_compare().
- */
struct brw_stage_prog_data {
struct {
/** size of our binding table. */
/* Pointers to tracked values (only valid once
* _mesa_load_state_parameters has been called at runtime).
- *
- * These must be the last fields of the struct (see
- * brw_stage_prog_data_compare()).
*/
const gl_constant_value **param;
const gl_constant_value **pull_param;
- /**
- * Image metadata passed to the shader as uniforms. This is deliberately
- * ignored by brw_stage_prog_data_compare() because its contents don't have
- * any influence on program compilation.
- */
+ /** Image metadata passed to the shader as uniforms. */
struct brw_image_param *image_param;
};
* there can be many of these, each in a different GL state
* corresponding to a different brw_wm_prog_key struct, with different
* compiled programs.
- *
- * Note: brw_wm_prog_data_compare() must be updated when adding fields to this
- * struct!
*/
struct brw_wm_prog_data {
struct brw_stage_prog_data base;
int urb_setup[VARYING_SLOT_MAX];
};
-/* Note: brw_cs_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
struct brw_cs_prog_data {
struct brw_stage_prog_data base;
DISPATCH_MODE_SIMD8 = 3,
};
-/* Note: brw_vue_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
struct brw_vue_prog_data {
struct brw_stage_prog_data base;
struct brw_vue_map vue_map;
};
-/* Note: brw_vs_prog_data_compare() must be updated when adding fields to this
- * struct!
- */
struct brw_vs_prog_data {
struct brw_vue_prog_data base;
#define SURF_INDEX_GEN6_SOL_BINDING(t) (t)
-/* Note: brw_gs_prog_data_compare() must be updated when adding fields to
- * this struct!
- */
struct brw_gs_prog_data
{
struct brw_vue_prog_data base;
};
-typedef bool (*cache_aux_compare_func)(const void *a, const void *b);
typedef void (*cache_aux_free_func)(const void *aux);
struct brw_cache {
uint32_t next_offset;
bool bo_used_by_gpu;
- /**
- * Optional functions used in determining whether the prog_data for a new
- * cache item matches an existing cache item (in case there's relevant data
- * outside of the prog_data). If NULL, a plain memcmp is done.
- */
- cache_aux_compare_func aux_compare[BRW_MAX_CACHE];
/** Optional functions for freeing other pointers attached to a prog_data. */
cache_aux_free_func aux_free[BRW_MAX_CACHE];
};
int num_atoms[BRW_NUM_PIPELINES];
const struct brw_tracked_state render_atoms[60];
- const struct brw_tracked_state compute_atoms[7];
+ const struct brw_tracked_state compute_atoms[8];
/* If (INTEL_DEBUG & DEBUG_BATCH) */
struct {
#include "intel_mipmap_tree.h"
#include "brw_state.h"
#include "intel_batchbuffer.h"
+#include "brw_nir.h"
-bool
-brw_cs_prog_data_compare(const void *in_a, const void *in_b)
+static void
+assign_cs_binding_table_offsets(const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ struct brw_cs_prog_data *prog_data)
{
- const struct brw_cs_prog_data *a =
- (const struct brw_cs_prog_data *)in_a;
- const struct brw_cs_prog_data *b =
- (const struct brw_cs_prog_data *)in_b;
-
- /* Compare the base structure. */
- if (!brw_stage_prog_data_compare(&a->base, &b->base))
- return false;
+ uint32_t next_binding_table_offset = 0;
- /* Compare the rest of the structure. */
- const unsigned offset = sizeof(struct brw_stage_prog_data);
- if (memcmp(((char *) a) + offset, ((char *) b) + offset,
- sizeof(struct brw_cs_prog_data) - offset))
- return false;
+ /* May not be used if the gl_NumWorkGroups variable is not accessed. */
+ prog_data->binding_table.work_groups_start = next_binding_table_offset;
+ next_binding_table_offset++;
- return true;
+ brw_assign_common_binding_table_offsets(MESA_SHADER_COMPUTE, devinfo,
+ shader_prog, prog, &prog_data->base,
+ next_binding_table_offset);
}
static bool
memset(&prog_data, 0, sizeof(prog_data));
+ assign_cs_binding_table_offsets(brw->intelScreen->devinfo, prog,
+ &cp->program.Base, &prog_data);
+
/* Allocate the references to the uniforms that will end up in the
* prog_data associated with the compiled program, and which will be freed
* by the state cache.
*/
- int param_count = cs->base.num_uniform_components +
- cs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+ int param_count = cp->program.Base.nir->num_uniforms;
/* The backend also sometimes adds params for texture size. */
param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
prog_data.base.nr_params = param_count;
prog_data.base.nr_image_params = cs->base.NumImages;
+ brw_nir_setup_glsl_uniforms(cp->program.Base.nir, prog, &cp->program.Base,
+ &prog_data.base, true);
+
if (unlikely(brw->perf_debug)) {
start_busy = (brw->batch.last_bo &&
drm_intel_bo_busy(brw->batch.last_bo));
extern "C" {
#endif
-bool brw_cs_prog_data_compare(const void *a, const void *b);
-
void
brw_upload_cs_prog(struct brw_context *brw);
unsigned *final_assembly_size);
unsigned
-brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
- unsigned dispatch_width);
+brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width);
#ifdef __cplusplus
}
this->param_size = v->param_size;
}
-void
-fs_visitor::setup_vec4_uniform_value(unsigned param_offset,
- const gl_constant_value *values,
- unsigned n)
-{
- static const gl_constant_value zero = { 0 };
-
- for (unsigned i = 0; i < n; ++i)
- stage_prog_data->param[param_offset + i] = &values[i];
-
- for (unsigned i = n; i < 4; ++i)
- stage_prog_data->param[param_offset + i] = &zero;
-}
-
fs_reg *
fs_visitor::emit_fragcoord_interpolation(bool pixel_center_integer,
bool origin_upper_left)
int urb_next = 0;
/* Figure out where each of the incoming setup attributes lands. */
if (devinfo->gen >= 6) {
- if (_mesa_bitcount_64(prog->InputsRead &
+ if (_mesa_bitcount_64(nir->info.inputs_read &
BRW_FS_VARYING_INPUT_MASK) <= 16) {
/* The SF/SBE pipeline stage can do arbitrary rearrangement of the
* first 16 varying inputs, so we can put them wherever we want.
* a different vertex (or geometry) shader.
*/
for (unsigned int i = 0; i < VARYING_SLOT_MAX; i++) {
- if (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+ if (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
BITFIELD64_BIT(i)) {
prog_data->urb_setup[i] = urb_next++;
}
struct brw_vue_map prev_stage_vue_map;
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
- shader_prog->SeparateShader);
+ nir->info.separate_shader);
int first_slot = 2 * BRW_SF_URB_ENTRY_READ_OFFSET;
assert(prev_stage_vue_map.num_slots <= first_slot + 32);
for (int slot = first_slot; slot < prev_stage_vue_map.num_slots;
* unused.
*/
if (varying != BRW_VARYING_SLOT_COUNT &&
- (prog->InputsRead & BRW_FS_VARYING_INPUT_MASK &
+ (nir->info.inputs_read & BRW_FS_VARYING_INPUT_MASK &
BITFIELD64_BIT(varying))) {
prog_data->urb_setup[varying] = slot - first_slot;
}
*
* See compile_sf_prog() for more info.
*/
- if (prog->InputsRead & BITFIELD64_BIT(VARYING_SLOT_PNTC))
+ if (nir->info.inputs_read & BITFIELD64_BIT(VARYING_SLOT_PNTC))
prog_data->urb_setup[VARYING_SLOT_PNTC] = urb_next++;
}
fprintf(file, "***m%d***", inst->src[i].reg);
break;
case ATTR:
- fprintf(file, "attr%d", inst->src[i].reg + inst->src[i].reg_offset);
+ fprintf(file, "attr%d+%d", inst->src[i].reg, inst->src[i].reg_offset);
break;
case UNIFORM:
fprintf(file, "u%d", inst->src[i].reg + inst->src[i].reg_offset);
fs_visitor::setup_payload_gen6()
{
bool uses_depth =
- (prog->InputsRead & (1 << VARYING_SLOT_POS)) != 0;
+ (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
unsigned barycentric_interp_modes =
(stage == MESA_SHADER_FRAGMENT) ?
((brw_wm_prog_data*) this->prog_data)->barycentric_interp_modes : 0;
}
/* R32: MSAA input coverage mask */
- if (prog->SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN) {
+ if (nir->info.system_values_read & SYSTEM_BIT_SAMPLE_MASK_IN) {
assert(devinfo->gen >= 7);
payload.sample_mask_in_reg = payload.num_regs;
payload.num_regs++;
/* R34-: bary for 32-pixel. */
/* R58-59: interp W for 32-pixel. */
- if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
source_depth_to_render_target = true;
}
}
payload.num_regs = 1;
- if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
+ if (nir->info.system_values_read & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
const unsigned local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(prog, dispatch_width);
+ brw_cs_prog_local_id_payload_dwords(dispatch_width);
assert((local_id_dwords & 0x7) == 0);
const unsigned local_id_regs = local_id_dwords / 8;
payload.local_invocation_id_reg = payload.num_regs;
}
}
-void
-fs_visitor::assign_fs_binding_table_offsets()
-{
- assert(stage == MESA_SHADER_FRAGMENT);
- brw_wm_prog_data *prog_data = (brw_wm_prog_data*) this->prog_data;
- brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- uint32_t next_binding_table_offset = 0;
-
- /* If there are no color regions, we still perform an FB write to a null
- * renderbuffer, which we place at surface index 0.
- */
- prog_data->binding_table.render_target_start = next_binding_table_offset;
- next_binding_table_offset += MAX2(key->nr_color_regions, 1);
-
- assign_common_binding_table_offsets(next_binding_table_offset);
-}
-
-void
-fs_visitor::assign_cs_binding_table_offsets()
-{
- assert(stage == MESA_SHADER_COMPUTE);
- brw_cs_prog_data *prog_data = (brw_cs_prog_data*) this->prog_data;
- uint32_t next_binding_table_offset = 0;
-
- /* May not be used if the gl_NumWorkGroups variable is not accessed. */
- prog_data->binding_table.work_groups_start = next_binding_table_offset;
- next_binding_table_offset++;
-
- assign_common_binding_table_offsets(next_binding_table_offset);
-}
-
void
fs_visitor::calculate_register_pressure()
{
\
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \
char filename[64]; \
- snprintf(filename, 64, "%s%d-%04d-%02d-%02d-" #pass, \
- stage_abbrev, dispatch_width, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
+ snprintf(filename, 64, "%s%d-%s-%02d-%02d-" #pass, \
+ stage_abbrev, dispatch_width, nir->info.name, iteration, pass_num); \
\
backend_shader::dump_instructions(filename); \
} \
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s%d-%04d-00-start",
- stage_abbrev, dispatch_width,
- shader_prog ? shader_prog->Name : 0);
+ snprintf(filename, 64, "%s%d-%s-00-start",
+ stage_abbrev, dispatch_width, nir->info.name);
backend_shader::dump_instructions(filename);
}
{
assert(stage == MESA_SHADER_VERTEX);
- if (prog_data->map_entries == NULL)
- assign_common_binding_table_offsets(0);
setup_vs_payload();
if (shader_time_index >= 0)
assert(stage == MESA_SHADER_FRAGMENT);
- sanity_param_count = prog->Parameters->NumParameters;
-
- if (prog_data->map_entries == NULL)
- assign_fs_binding_table_offsets();
-
if (devinfo->gen >= 6)
setup_payload_gen6();
else
emit_shader_time_begin();
calculate_urb_setup();
- if (prog->InputsRead > 0) {
+ if (nir->info.inputs_read > 0) {
if (devinfo->gen < 6)
emit_interpolation_setup_gen4();
else
fs_visitor::run_cs()
{
assert(stage == MESA_SHADER_COMPUTE);
- assert(shader);
-
- sanity_param_count = prog->Parameters->NumParameters;
-
- assign_cs_binding_table_offsets();
setup_cs_payload();
if (failed)
return false;
- /* If any state parameters were appended, then ParameterValues could have
- * been realloced, in which case the driver uniform storage set up by
- * _mesa_associate_uniform_storage() would point to freed memory. Make
- * sure that didn't happen.
- */
- assert(sanity_param_count == prog->Parameters->NumParameters);
-
return !failed;
}
/* Now the main event: Visit the shader IR and generate our FS IR for it.
*/
- fs_visitor v(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
- prog, &fp->Base, 8, st_index8);
+ fs_visitor v(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data->base, &fp->Base, fp->Base.nir, 8, st_index8);
if (!v.run_fs(false /* do_rep_send */)) {
if (prog) {
prog->LinkStatus = false;
}
cfg_t *simd16_cfg = NULL;
- fs_visitor v2(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_FRAGMENT, key, &prog_data->base,
- prog, &fp->Base, 16, st_index16);
+ fs_visitor v2(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data->base, &fp->Base, fp->Base.nir, 16, st_index16);
if (likely(!(INTEL_DEBUG & DEBUG_NO16) || brw->use_rep_send)) {
if (!v.simd16_unsupported) {
/* Try a SIMD16 compile */
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
- fs_visitor v8(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
- &cp->Base, 8, st_index);
+ fs_visitor v8(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data->base, &cp->Base, cp->Base.nir, 8, st_index);
if (!v8.run_cs()) {
fail_msg = v8.fail_msg;
} else if (local_workgroup_size <= 8 * brw->max_cs_threads) {
prog_data->simd_size = 8;
}
- fs_visitor v16(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
- &cp->Base, 16, st_index);
+ fs_visitor v16(brw->intelScreen->compiler, brw, mem_ctx, key,
+ &prog_data->base, &cp->Base, cp->Base.nir, 16, st_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported &&
local_workgroup_size <= 16 * brw->max_cs_threads) {
public:
fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
- gl_shader_stage stage,
const void *key,
struct brw_stage_prog_data *prog_data,
- struct gl_shader_program *shader_prog,
struct gl_program *prog,
+ nir_shader *shader,
unsigned dispatch_width,
int shader_time_index);
bool run_cs();
void optimize();
void allocate_registers();
- void assign_fs_binding_table_offsets();
- void assign_cs_binding_table_offsets();
void setup_payload_gen4();
void setup_payload_gen6();
void setup_vs_payload();
void emit_interpolation_setup_gen6();
void compute_sample_position(fs_reg dst, fs_reg int_sample_pos);
fs_reg rescale_texcoord(fs_reg coordinate, int coord_components,
- bool is_rect, uint32_t sampler, int texunit);
+ bool is_rect, uint32_t sampler);
void emit_texture(ir_texture_opcode op,
const glsl_type *dest_type,
fs_reg coordinate, int components,
bool is_cube_array,
bool is_rect,
uint32_t sampler,
- fs_reg sampler_reg,
- int texunit);
+ fs_reg sampler_reg);
fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components,
const fs_reg &sampler);
void emit_gen6_gather_wa(uint8_t wa, fs_reg dst);
uint32_t spill_offset, int count);
void emit_nir_code();
- void nir_setup_inputs(nir_shader *shader);
- void nir_setup_outputs(nir_shader *shader);
- void nir_setup_uniforms(nir_shader *shader);
- void nir_setup_uniform(nir_variable *var);
- void nir_setup_builtin_uniform(nir_variable *var);
- void nir_emit_system_values(nir_shader *shader);
+ void nir_setup_inputs();
+ void nir_setup_outputs();
+ void nir_setup_uniforms();
+ void nir_emit_system_values();
void nir_emit_impl(nir_function_impl *impl);
void nir_emit_cf_list(exec_list *list);
void nir_emit_if(nir_if *if_stmt);
struct brw_reg interp_reg(int location, int channel);
- virtual void setup_vec4_uniform_value(unsigned param_offset,
- const gl_constant_value *values,
- unsigned n);
-
int implied_mrf_writes(fs_inst *inst);
virtual void dump_instructions();
const struct brw_sampler_prog_key_data *key_tex;
struct brw_stage_prog_data *prog_data;
- unsigned int sanity_param_count;
+ struct gl_program *prog;
int *param_size;
void
fs_visitor::emit_nir_code()
{
- nir_shader *nir = prog->nir;
-
/* emit the arrays used for inputs and outputs - load/store intrinsics will
* be converted to reads/writes of these arrays
*/
- nir_setup_inputs(nir);
- nir_setup_outputs(nir);
- uniforms = nir->num_uniforms;
- //nir_setup_uniforms(nir);
- nir_emit_system_values(nir);
+ nir_setup_inputs();
+ nir_setup_outputs();
+ nir_setup_uniforms();
+ nir_emit_system_values();
/* get the main function and emit it */
nir_foreach_overload(nir, overload) {
}
void
-fs_visitor::nir_setup_inputs(nir_shader *shader)
+fs_visitor::nir_setup_inputs()
{
- nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_inputs);
+ nir_inputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_inputs);
- foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
enum brw_reg_type type = brw_type_for_base_type(var->type);
fs_reg input = offset(nir_inputs, bld, var->data.driver_location);
}
void
-fs_visitor::nir_setup_outputs(nir_shader *shader)
+fs_visitor::nir_setup_outputs()
{
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, shader->num_outputs);
+ nir_outputs = bld.vgrf(BRW_REGISTER_TYPE_F, nir->num_outputs);
- foreach_list_typed(nir_variable, var, node, &shader->outputs) {
+ foreach_list_typed(nir_variable, var, node, &nir->outputs) {
fs_reg reg = offset(nir_outputs, bld, var->data.driver_location);
int vector_elements =
}
void
-fs_visitor::nir_setup_uniforms(nir_shader *shader)
+fs_visitor::nir_setup_uniforms()
{
if (dispatch_width != 8)
return;
- uniforms = shader->num_uniforms;
-
- if (shader_prog) {
- foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
- /* UBO's and atomics don't take up space in the uniform file */
- if (var->interface_type != NULL || var->type->contains_atomic())
- continue;
-
- if (strncmp(var->name, "gl_", 3) == 0)
- nir_setup_builtin_uniform(var);
- else
- nir_setup_uniform(var);
- if(type_size_scalar(var->type) > 0)
- param_size[var->data.driver_location] = type_size_scalar(var->type);
- }
- } else {
- /* prog_to_nir only creates a single giant uniform variable so we can
- * just set param up directly. */
- for (unsigned p = 0; p < prog->Parameters->NumParameters; p++) {
- for (unsigned int i = 0; i < 4; i++) {
- stage_prog_data->param[4 * p + i] =
- &prog->Parameters->ParameterValues[p][i];
- }
- }
- if(prog->Parameters->NumParameters > 0)
- param_size[0] = prog->Parameters->NumParameters * 4;
- }
-}
+ uniforms = nir->num_uniforms;
-void
-fs_visitor::nir_setup_uniform(nir_variable *var)
-{
- int namelen = strlen(var->name);
-
- /* The data for our (non-builtin) uniforms is stored in a series of
- * gl_uniform_driver_storage structs for each subcomponent that
- * glGetUniformLocation() could name. We know it's been set up in the
- * same order we'd walk the type, so walk the list of storage and find
- * anything with our name, or the prefix of a component that starts with
- * our name.
- */
- unsigned index = var->data.driver_location;
- for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
- struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
- if (storage->builtin)
- continue;
-
- if (strncmp(var->name, storage->name, namelen) != 0 ||
- (storage->name[namelen] != 0 &&
- storage->name[namelen] != '.' &&
- storage->name[namelen] != '[')) {
+ foreach_list_typed(nir_variable, var, node, &nir->uniforms) {
+ /* UBO's and atomics don't take up space in the uniform file */
+ if (var->interface_type != NULL || var->type->contains_atomic())
continue;
- }
-
- if (storage->type->is_image()) {
- setup_image_uniform_values(index, storage);
- } else {
- unsigned slots = storage->type->component_slots();
- if (storage->array_elements)
- slots *= storage->array_elements;
- for (unsigned i = 0; i < slots; i++) {
- stage_prog_data->param[index++] = &storage->storage[i];
- }
- }
- }
-}
-
-void
-fs_visitor::nir_setup_builtin_uniform(nir_variable *var)
-{
- const nir_state_slot *const slots = var->state_slots;
- assert(var->state_slots != NULL);
-
- unsigned uniform_index = var->data.driver_location;
- for (unsigned int i = 0; i < var->num_state_slots; i++) {
- /* This state reference has already been setup by ir_to_mesa, but we'll
- * get the same index back here.
- */
- int index = _mesa_add_state_reference(this->prog->Parameters,
- (gl_state_index *)slots[i].tokens);
-
- /* Add each of the unique swizzles of the element as a parameter.
- * This'll end up matching the expected layout of the
- * array/matrix/structure we're trying to fill in.
- */
- int last_swiz = -1;
- for (unsigned int j = 0; j < 4; j++) {
- int swiz = GET_SWZ(slots[i].swizzle, j);
- if (swiz == last_swiz)
- break;
- last_swiz = swiz;
-
- stage_prog_data->param[uniform_index++] =
- &prog->Parameters->ParameterValues[index][swiz];
- }
+ if (type_size_scalar(var->type) > 0)
+ param_size[var->data.driver_location] = type_size_scalar(var->type);
}
}
}
void
-fs_visitor::nir_emit_system_values(nir_shader *shader)
+fs_visitor::nir_emit_system_values()
{
nir_system_values = ralloc_array(mem_ctx, fs_reg, SYSTEM_VALUE_MAX);
- nir_foreach_overload(shader, overload) {
+ nir_foreach_overload(nir, overload) {
assert(strcmp(overload->function->name, "main") == 0);
assert(overload->impl);
nir_foreach_block(overload->impl, emit_system_values_block, this);
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- shader_prog->NumBufferInterfaceBlocks - 1);
+ nir->info.num_ssbos - 1);
}
if (has_indirect) {
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- shader_prog->NumBufferInterfaceBlocks - 1);
+ nir->info.num_ssbos - 1);
}
/* Get the offset to read from */
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- shader_prog->NumBufferInterfaceBlocks - 1);
+ nir->info.num_ssbos - 1);
}
/* Offset */
unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
int reg_width = dispatch_width / 8;
- assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
-
/* Set LOD = 0 */
fs_reg source = fs_reg(0);
*/
brw_mark_surface_used(prog_data,
stage_prog_data->binding_table.ubo_start +
- shader_prog->NumBufferInterfaceBlocks - 1);
+ nir->info.num_ssbos - 1);
}
fs_reg offset = get_nir_src(instr->src[1]);
unsigned sampler = stage_prog_data->bind_map[set].index[binding];
fs_reg sampler_reg(sampler);
- /* FINISHME: We're failing to recompile our programs when the sampler is
- * updated. This only matters for the texture rectangle scale parameters
- * (pre-gen6, or gen6+ with GL_CLAMP).
- */
- int texunit = prog->SamplerUnits[sampler];
-
int gather_component = instr->component;
bool is_rect = instr->sampler_dim == GLSL_SAMPLER_DIM_RECT;
emit_texture(op, dest_type, coordinate, instr->coord_components,
shadow_comparitor, lod, lod2, lod_components, sample_index,
tex_offset, mcs, gather_component,
- is_cube_array, is_rect, sampler, sampler_reg, texunit);
+ is_cube_array, is_rect, sampler, sampler_reg);
fs_reg dest = get_nir_dest(instr->dest);
dest.type = this->result.type;
fs_reg
fs_visitor::rescale_texcoord(fs_reg coordinate, int coord_components,
- bool is_rect, uint32_t sampler, int texunit)
+ bool is_rect, uint32_t sampler)
{
bool needs_gl_clamp = true;
fs_reg scale_x, scale_y;
(devinfo->gen >= 6 && (key_tex->gl_clamp_mask[0] & (1 << sampler) ||
key_tex->gl_clamp_mask[1] & (1 << sampler))))) {
struct gl_program_parameter_list *params = prog->Parameters;
+
+
+ /* FINISHME: We're failing to recompile our programs when the sampler is
+ * updated. This only matters for the texture rectangle scale
+ * parameters (pre-gen6, or gen6+ with GL_CLAMP).
+ */
int tokens[STATE_LENGTH] = {
STATE_INTERNAL,
STATE_TEXRECT_SCALE,
- texunit,
+ prog->SamplerUnits[sampler],
0,
0
};
bool is_cube_array,
bool is_rect,
uint32_t sampler,
- fs_reg sampler_reg, int texunit)
+ fs_reg sampler_reg)
{
fs_inst *inst = NULL;
* samplers. This should only be a problem with GL_CLAMP on Gen7.
*/
coordinate = rescale_texcoord(coordinate, coord_components, is_rect,
- sampler, texunit);
+ sampler);
}
/* Writemasking doesn't eliminate channels on SIMD8 texture
fs_reg src_depth;
if (source_depth_to_render_target) {
- if (prog->OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ if (nir->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
src_depth = frag_depth;
else
src_depth = fs_reg(brw_vec8_grf(payload.source_depth_reg, 0));
fs_visitor::fs_visitor(const struct brw_compiler *compiler, void *log_data,
void *mem_ctx,
- gl_shader_stage stage,
const void *key,
struct brw_stage_prog_data *prog_data,
- struct gl_shader_program *shader_prog,
struct gl_program *prog,
+ nir_shader *shader,
unsigned dispatch_width,
int shader_time_index)
- : backend_shader(compiler, log_data, mem_ctx,
- shader_prog, prog, prog_data, stage),
- key(key), prog_data(prog_data),
+ : backend_shader(compiler, log_data, mem_ctx, shader, prog_data),
+ key(key), prog_data(prog_data), prog(prog),
dispatch_width(dispatch_width),
shader_time_index(shader_time_index),
promoted_constants(0),
#include "brw_vec4_gs_visitor.h"
#include "brw_state.h"
#include "brw_ff_gs.h"
+#include "brw_nir.h"
+
+static void
+assign_gs_binding_table_offsets(const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ struct brw_gs_prog_data *prog_data)
+{
+ /* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
+ * feedback surfaces.
+ */
+ uint32_t reserved = devinfo->gen == 6 ? BRW_MAX_SOL_BINDINGS : 0;
+
+ brw_assign_common_binding_table_offsets(MESA_SHADER_GEOMETRY, devinfo,
+ shader_prog, prog,
+ &prog_data->base.base,
+ reserved);
+}
bool
brw_compile_gs_prog(struct brw_context *brw,
c.prog_data.invocations = gp->program.Invocations;
+ assign_gs_binding_table_offsets(brw->intelScreen->devinfo, prog,
+ &gp->program.Base, &c.prog_data);
+
/* Allocate the references to the uniforms that will end up in the
* prog_data associated with the compiled program, and which will be freed
* by the state cache.
* every uniform is a float which gets padded to the size of a vec4.
*/
struct gl_shader *gs = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
- int param_count = gs->num_uniform_components * 4;
-
- param_count += gs->NumImages * BRW_IMAGE_PARAM_SIZE;
+ int param_count = gp->program.Base.nir->num_uniforms * 4;
c.prog_data.base.base.param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
c.prog_data.base.base.nr_params = param_count;
c.prog_data.base.base.nr_image_params = gs->NumImages;
+ brw_nir_setup_glsl_uniforms(gp->program.Base.nir, prog, &gp->program.Base,
+ &c.prog_data.base.base, false);
+
if (brw->gen >= 8) {
c.prog_data.static_vertex_count = !gp->program.Base.nir ? -1 :
nir_gs_count_vertices(gp->program.Base.nir);
return success;
}
-
-
-bool
-brw_gs_prog_data_compare(const void *in_a, const void *in_b)
-{
- const struct brw_gs_prog_data *a = in_a;
- const struct brw_gs_prog_data *b = in_b;
-
- /* Compare the base structure. */
- if (!brw_stage_prog_data_compare(&a->base.base, &b->base.base))
- return false;
-
- /* Compare the rest of the struct. */
- const unsigned offset = sizeof(struct brw_stage_prog_data);
- if (memcmp(((char *) a) + offset, ((char *) b) + offset,
- sizeof(struct brw_gs_prog_data) - offset)) {
- return false;
- }
-
- return true;
-}
#include "glsl/nir/glsl_to_nir.h"
#include "program/prog_to_nir.h"
+static void
+brw_nir_lower_inputs(nir_shader *nir, bool is_scalar)
+{
+ nir_assign_var_locations(&nir->inputs, &nir->num_inputs,
+ is_scalar ? type_size_scalar : type_size_vec4);
+}
+
+static void
+brw_nir_lower_outputs(nir_shader *nir, bool is_scalar)
+{
+ if (is_scalar) {
+ nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar);
+ } else {
+ foreach_list_typed(nir_variable, var, node, &nir->outputs)
+ var->data.driver_location = var->data.location;
+ }
+}
+
static void
nir_optimize(nir_shader *nir, bool is_scalar)
{
struct gl_context *ctx = &brw->ctx;
const nir_shader_compiler_options *options =
ctx->Const.ShaderCompilerOptions[stage].NirOptions;
- struct gl_shader *shader = shader_prog ? shader_prog->_LinkedShaders[stage] : NULL;
nir_shader *nir;
/* First, lower the GLSL IR or Mesa IR to NIR */
if (shader_prog) {
- nir = glsl_to_nir(shader, options);
+ nir = glsl_to_nir(shader_prog, stage, options);
} else {
nir = prog_to_nir(prog, options);
nir_convert_to_ssa(nir); /* turn registers into SSA */
/* Get rid of split copies */
nir_optimize(nir, is_scalar);
- if (is_scalar) {
- nir_assign_var_locations(&nir->uniforms,
- &nir->num_uniforms,
- type_size_scalar);
- nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_scalar);
- nir_assign_var_locations(&nir->outputs, &nir->num_outputs, type_size_scalar);
- nir_lower_io(nir, type_size_scalar);
- } else {
- nir_assign_var_locations(&nir->uniforms,
- &nir->num_uniforms,
- type_size_vec4);
-
- nir_assign_var_locations(&nir->inputs, &nir->num_inputs, type_size_vec4);
-
- foreach_list_typed(nir_variable, var, node, &nir->outputs)
- var->data.driver_location = var->data.location;
-
- nir_lower_io(nir, type_size_vec4);
- }
-
+ brw_nir_lower_inputs(nir, is_scalar);
+ brw_nir_lower_outputs(nir, is_scalar);
+ nir_assign_var_locations(&nir->uniforms,
+ &nir->num_uniforms,
+ is_scalar ? type_size_scalar : type_size_vec4);
+ nir_lower_io(nir, -1, is_scalar ? type_size_scalar : type_size_vec4);
nir_validate_shader(nir);
nir_remove_dead_variables(nir);
const struct gl_shader_program *shader_prog,
gl_shader_stage stage, bool is_scalar);
+void brw_nir_setup_glsl_uniforms(nir_shader *shader,
+ struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ bool is_scalar);
+
+void brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data);
+
#ifdef __cplusplus
}
#endif
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_shader.h"
+#include "brw_nir.h"
+#include "glsl/ir.h"
+#include "glsl/ir_uniform.h"
+
+static void
+brw_nir_setup_glsl_builtin_uniform(nir_variable *var,
+ const struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ unsigned comps_per_unit)
+{
+ const nir_state_slot *const slots = var->state_slots;
+ assert(var->state_slots != NULL);
+
+ unsigned uniform_index = var->data.driver_location * comps_per_unit;
+ for (unsigned int i = 0; i < var->num_state_slots; i++) {
+ /* This state reference has already been setup by ir_to_mesa, but we'll
+ * get the same index back here.
+ */
+ int index = _mesa_add_state_reference(prog->Parameters,
+ (gl_state_index *)slots[i].tokens);
+
+ /* Add each of the unique swizzles of the element as a parameter.
+ * This'll end up matching the expected layout of the
+ * array/matrix/structure we're trying to fill in.
+ */
+ int last_swiz = -1;
+ for (unsigned j = 0; j < 4; j++) {
+ int swiz = GET_SWZ(slots[i].swizzle, j);
+
+ /* If we hit a pair of identical swizzles, this means we've hit the
+ * end of the builtin variable. In scalar mode, we should just quit
+ * and move on to the next one. In vec4, we need to continue and pad
+ * it out to 4 components.
+ */
+ if (swiz == last_swiz && comps_per_unit == 1)
+ break;
+
+ last_swiz = swiz;
+
+ stage_prog_data->param[uniform_index++] =
+ &prog->Parameters->ParameterValues[index][swiz];
+ }
+ }
+}
+
+static void
+brw_nir_setup_glsl_uniform(gl_shader_stage stage, nir_variable *var,
+ struct gl_shader_program *shader_prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ unsigned comps_per_unit)
+{
+ int namelen = strlen(var->name);
+
+ /* The data for our (non-builtin) uniforms is stored in a series of
+ * gl_uniform_driver_storage structs for each subcomponent that
+ * glGetUniformLocation() could name. We know it's been set up in the same
+ * order we'd walk the type, so walk the list of storage and find anything
+ * with our name, or the prefix of a component that starts with our name.
+ */
+ unsigned uniform_index = var->data.driver_location * comps_per_unit;
+ for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
+ struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
+
+ if (storage->builtin)
+ continue;
+
+ if (strncmp(var->name, storage->name, namelen) != 0 ||
+ (storage->name[namelen] != 0 &&
+ storage->name[namelen] != '.' &&
+ storage->name[namelen] != '[')) {
+ continue;
+ }
+
+ if (storage->type->is_image()) {
+ brw_setup_image_uniform_values(stage, stage_prog_data,
+ uniform_index, storage);
+ } else {
+ gl_constant_value *components = storage->storage;
+ unsigned vector_count = (MAX2(storage->array_elements, 1) *
+ storage->type->matrix_columns);
+ unsigned vector_size = storage->type->vector_elements;
+
+ for (unsigned s = 0; s < vector_count; s++) {
+ unsigned i;
+ for (i = 0; i < vector_size; i++) {
+ stage_prog_data->param[uniform_index++] = components++;
+ }
+
+ /* Pad out with zeros if needed (only needed for vec4) */
+ for (; i < comps_per_unit; i++) {
+ static const gl_constant_value zero = { 0.0 };
+ stage_prog_data->param[uniform_index++] = &zero;
+ }
+ }
+ }
+ }
+}
+
+void
+brw_nir_setup_glsl_uniforms(nir_shader *shader,
+ struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ bool is_scalar)
+{
+ unsigned comps_per_unit = is_scalar ? 1 : 4;
+
+ foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
+ /* UBO's, atomics and samplers don't take up space in the
+ uniform file */
+ if (var->interface_type != NULL || var->type->contains_atomic())
+ continue;
+
+ if (strncmp(var->name, "gl_", 3) == 0) {
+ brw_nir_setup_glsl_builtin_uniform(var, prog, stage_prog_data,
+ comps_per_unit);
+ } else {
+ brw_nir_setup_glsl_uniform(shader->stage, var, shader_prog,
+ stage_prog_data, comps_per_unit);
+ }
+ }
+}
+
+void
+brw_nir_setup_arb_uniforms(nir_shader *shader, struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data)
+{
+ struct gl_program_parameter_list *plist = prog->Parameters;
+
+#ifndef NDEBUG
+ if (!shader->uniforms.is_empty()) {
+ /* For ARB programs, only a single "parameters" variable is generated to
+ * support uniform data.
+ */
+ assert(shader->uniforms.length() == 1);
+ nir_variable *var = (nir_variable *) shader->uniforms.get_head();
+ assert(strcmp(var->name, "parameters") == 0);
+ assert(var->type->array_size() == (int)plist->NumParameters);
+ }
+#endif
+
+ for (unsigned p = 0; p < plist->NumParameters; p++) {
+ /* Parameters should be either vec4 uniforms or single component
+ * constants; matrices and other larger types should have been broken
+ * down earlier.
+ */
+ assert(plist->Parameters[p].Size <= 4);
+
+ unsigned i;
+ for (i = 0; i < plist->Parameters[p].Size; i++) {
+ stage_prog_data->param[4 * p + i] = &plist->ParameterValues[p][i];
+ }
+ for (; i < 4; i++) {
+ static const gl_constant_value zero = { 0.0 };
+ stage_prog_data->param[4 * p + i] = &zero;
+ }
+ }
+}
brw_add_texrect_params(prog);
- if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_FRAGMENT].NirOptions) {
- prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
- }
+ prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_FRAGMENT, true);
brw_fs_precompile(ctx, NULL, prog);
break;
brw_add_texrect_params(prog);
- if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].NirOptions) {
- prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
- brw->intelScreen->compiler->scalar_vs);
- }
+ prog->nir = brw_create_nir(brw, NULL, prog, MESA_SHADER_VERTEX,
+ brw->intelScreen->compiler->scalar_vs);
brw_vs_precompile(ctx, NULL, prog);
break;
MAX2(prog_data->binding_table.size_bytes, (surf_index + 1) * 4);
}
-bool
-brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
- const struct brw_stage_prog_data *b)
-{
- /* Compare all the struct up to the pointers. */
- if (memcmp(a, b, offsetof(struct brw_stage_prog_data, param)))
- return false;
-
- if (memcmp(a->param, b->param, a->nr_params * sizeof(void *)))
- return false;
-
- if (memcmp(a->pull_param, b->pull_param, a->nr_pull_params * sizeof(void *)))
- return false;
-
- return true;
-}
-
void
brw_stage_prog_data_free(const void *p)
{
ralloc_free(prog_data->param);
ralloc_free(prog_data->pull_param);
+ ralloc_free(prog_data->image_param);
}
void
brw_mark_surface_used(struct brw_stage_prog_data *prog_data,
unsigned surf_index);
-bool
-brw_stage_prog_data_compare(const struct brw_stage_prog_data *a,
- const struct brw_stage_prog_data *b);
-
void
brw_stage_prog_data_free(const void *prog_data);
va_end(args);
}
+static bool
+is_scalar_shader_stage(const struct brw_compiler *compiler, int stage)
+{
+ switch (stage) {
+ case MESA_SHADER_FRAGMENT:
+ case MESA_SHADER_COMPUTE:
+ return true;
+ case MESA_SHADER_VERTEX:
+ return compiler->scalar_vs;
+ default:
+ return false;
+ }
+}
+
struct brw_compiler *
brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo)
{
compiler->glsl_compiler_options[i].EmitNoIndirectUniform = false;
compiler->glsl_compiler_options[i].LowerClipDistance = true;
- bool is_scalar;
- switch (i) {
- case MESA_SHADER_FRAGMENT:
- case MESA_SHADER_COMPUTE:
- is_scalar = true;
- break;
- case MESA_SHADER_VERTEX:
- is_scalar = compiler->scalar_vs;
- break;
- default:
- is_scalar = false;
- break;
- }
+ bool is_scalar = is_scalar_shader_stage(compiler, i);
compiler->glsl_compiler_options[i].EmitNoIndirectOutput = is_scalar;
compiler->glsl_compiler_options[i].EmitNoIndirectTemp = is_scalar;
if (devinfo->gen < 7)
compiler->glsl_compiler_options[i].EmitNoIndirectSampler = true;
- if (is_scalar || brw_env_var_as_boolean("INTEL_USE_NIR", true))
- compiler->glsl_compiler_options[i].NirOptions = nir_options;
+ compiler->glsl_compiler_options[i].NirOptions = nir_options;
}
return compiler;
return true;
}
-static inline bool
-is_scalar_shader_stage(struct brw_context *brw, int stage)
-{
- switch (stage) {
- case MESA_SHADER_FRAGMENT:
- case MESA_SHADER_COMPUTE:
- return true;
- case MESA_SHADER_VERTEX:
- return brw->intelScreen->compiler->scalar_vs;
- default:
- return false;
- }
-}
-
static void
brw_lower_packing_builtins(struct brw_context *brw,
gl_shader_stage shader_type,
| LOWER_PACK_UNORM_2x16
| LOWER_UNPACK_UNORM_2x16;
- if (is_scalar_shader_stage(brw, shader_type)) {
+ if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
ops |= LOWER_UNPACK_UNORM_4x8
| LOWER_UNPACK_SNORM_4x8
| LOWER_PACK_UNORM_4x8
* lowering is needed. For SOA code, the Half2x16 ops must be
* scalarized.
*/
- if (is_scalar_shader_stage(brw, shader_type)) {
+ if (is_scalar_shader_stage(brw->intelScreen->compiler, shader_type)) {
ops |= LOWER_PACK_HALF_2x16_TO_SPLIT
| LOWER_UNPACK_HALF_2x16_TO_SPLIT;
}
brw_lower_texture_gradients(brw, shader->ir);
do_vec_index_to_cond_assign(shader->ir);
lower_vector_insert(shader->ir, true);
- if (options->NirOptions == NULL)
- brw_do_cubemap_normalize(shader->ir);
lower_offset_arrays(shader->ir);
brw_do_lower_unnormalized_offset(shader->ir);
lower_noise(shader->ir);
do {
progress = false;
- if (is_scalar_shader_stage(brw, shader->Stage)) {
+ if (is_scalar_shader_stage(brw->intelScreen->compiler, shader->Stage)) {
brw_do_channel_expressions(shader->ir);
brw_do_vector_splitting(shader->ir);
}
brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
{
struct brw_context *brw = brw_context(ctx);
+ const struct brw_compiler *compiler = brw->intelScreen->compiler;
unsigned int stage;
for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
struct gl_shader *shader = shProg->_LinkedShaders[stage];
- const struct gl_shader_compiler_options *options =
- &ctx->Const.ShaderCompilerOptions[stage];
-
if (!shader)
continue;
brw_add_texrect_params(prog);
- if (options->NirOptions) {
- prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
- is_scalar_shader_stage(brw, stage));
- }
+ prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
+ is_scalar_shader_stage(compiler, stage));
_mesa_reference_program(ctx, &prog, NULL);
}
backend_shader::backend_shader(const struct brw_compiler *compiler,
void *log_data,
void *mem_ctx,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- gl_shader_stage stage)
+ nir_shader *shader,
+ struct brw_stage_prog_data *stage_prog_data)
: compiler(compiler),
log_data(log_data),
devinfo(compiler->devinfo),
- shader(shader_prog ?
- (struct brw_shader *)shader_prog->_LinkedShaders[stage] : NULL),
- shader_prog(shader_prog),
- prog(prog),
+ nir(shader),
stage_prog_data(stage_prog_data),
mem_ctx(mem_ctx),
cfg(NULL),
- stage(stage)
+ stage(shader->stage)
{
debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
stage_name = _mesa_shader_stage_to_string(stage);
* trigger some of our asserts that surface indices are < BRW_MAX_SURFACES.
*/
void
-backend_shader::assign_common_binding_table_offsets(uint32_t next_binding_table_offset)
+brw_assign_common_binding_table_offsets(gl_shader_stage stage,
+ const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ uint32_t next_binding_table_offset)
{
+ const struct gl_shader *shader = NULL;
int num_textures = _mesa_fls(prog->SamplersUsed);
+ if (shader_prog)
+ shader = shader_prog->_LinkedShaders[stage];
+
stage_prog_data->binding_table.texture_start = next_binding_table_offset;
next_binding_table_offset += num_textures;
if (shader) {
stage_prog_data->binding_table.ubo_start = next_binding_table_offset;
- next_binding_table_offset += shader->base.NumUniformBlocks;
+ next_binding_table_offset += shader->NumUniformBlocks;
} else {
stage_prog_data->binding_table.ubo_start = 0xd0d0d0d0;
}
stage_prog_data->binding_table.abo_start = 0xd0d0d0d0;
}
- if (shader && shader->base.NumImages) {
+ if (shader && shader->NumImages) {
stage_prog_data->binding_table.image_start = next_binding_table_offset;
- next_binding_table_offset += shader->base.NumImages;
+ next_binding_table_offset += shader->NumImages;
} else {
stage_prog_data->binding_table.image_start = 0xd0d0d0d0;
}
/* prog_data->base.binding_table.size will be set by brw_mark_surface_used. */
}
+static void
+setup_vec4_uniform_value(const gl_constant_value **params,
+ const gl_constant_value *values,
+ unsigned n)
+{
+ static const gl_constant_value zero = { 0 };
+
+ for (unsigned i = 0; i < n; ++i)
+ params[i] = &values[i];
+
+ for (unsigned i = n; i < 4; ++i)
+ params[i] = &zero;
+}
+
void
-backend_shader::setup_image_uniform_values(unsigned param_offset,
- const gl_uniform_storage *storage)
+brw_setup_image_uniform_values(gl_shader_stage stage,
+ struct brw_stage_prog_data *stage_prog_data,
+ unsigned param_start_index,
+ const gl_uniform_storage *storage)
{
- const unsigned stage = _mesa_program_enum_to_shader_stage(prog->Target);
+ const gl_constant_value **param =
+ &stage_prog_data->param[param_start_index];
for (unsigned i = 0; i < MAX2(storage->array_elements, 1); i++) {
const unsigned image_idx = storage->image[stage].index + i;
- const brw_image_param *param = &stage_prog_data->image_param[image_idx];
+ const brw_image_param *image_param =
+ &stage_prog_data->image_param[image_idx];
/* Upload the brw_image_param structure. The order is expected to match
* the BRW_IMAGE_PARAM_*_OFFSET defines.
*/
- setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
- (const gl_constant_value *)¶m->surface_idx, 1);
- setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_OFFSET_OFFSET,
- (const gl_constant_value *)param->offset, 2);
- setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SIZE_OFFSET,
- (const gl_constant_value *)param->size, 3);
- setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_STRIDE_OFFSET,
- (const gl_constant_value *)param->stride, 4);
- setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_TILING_OFFSET,
- (const gl_constant_value *)param->tiling, 3);
- setup_vec4_uniform_value(param_offset + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
- (const gl_constant_value *)param->swizzling, 2);
- param_offset += BRW_IMAGE_PARAM_SIZE;
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SURFACE_IDX_OFFSET,
+ (const gl_constant_value *)&image_param->surface_idx, 1);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_OFFSET_OFFSET,
+ (const gl_constant_value *)image_param->offset, 2);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SIZE_OFFSET,
+ (const gl_constant_value *)image_param->size, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_STRIDE_OFFSET,
+ (const gl_constant_value *)image_param->stride, 4);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_TILING_OFFSET,
+ (const gl_constant_value *)image_param->tiling, 3);
+ setup_vec4_uniform_value(param + BRW_IMAGE_PARAM_SWIZZLING_OFFSET,
+ (const gl_constant_value *)image_param->swizzling, 2);
+ param += BRW_IMAGE_PARAM_SIZE;
brw_mark_surface_used(
stage_prog_data,
#include <stdint.h>
#include "brw_reg.h"
#include "brw_defines.h"
+#include "brw_context.h"
#include "main/compiler.h"
#include "glsl/ir.h"
#include "program/prog_parameter.h"
backend_shader(const struct brw_compiler *compiler,
void *log_data,
void *mem_ctx,
- struct gl_shader_program *shader_prog,
- struct gl_program *prog,
- struct brw_stage_prog_data *stage_prog_data,
- gl_shader_stage stage);
+ nir_shader *shader,
+ struct brw_stage_prog_data *stage_prog_data);
public:
void *log_data; /* Passed to compiler->*_log functions */
const struct brw_device_info * const devinfo;
- struct brw_shader * const shader;
- struct gl_shader_program * const shader_prog;
- struct gl_program * const prog;
+ nir_shader *nir;
struct brw_stage_prog_data * const stage_prog_data;
/** ralloc context for temporary data used during compile */
void calculate_cfg();
void invalidate_cfg();
- void assign_common_binding_table_offsets(uint32_t next_binding_table_offset);
-
virtual void invalidate_live_intervals() = 0;
-
- virtual void setup_vec4_uniform_value(unsigned param_offset,
- const gl_constant_value *values,
- unsigned n) = 0;
- void setup_image_uniform_values(unsigned param_offset,
- const gl_uniform_storage *storage);
};
uint32_t brw_texture_offset(int *offsets, unsigned num_components);
+void brw_setup_image_uniform_values(gl_shader_stage stage,
+ struct brw_stage_prog_data *stage_prog_data,
+ unsigned param_start_index,
+ const gl_uniform_storage *storage);
+
#endif /* __cplusplus */
enum brw_reg_type brw_type_for_base_type(const struct glsl_type *type);
struct brw_compiler *
brw_compiler_create(void *mem_ctx, const struct brw_device_info *devinfo);
+void
+brw_assign_common_binding_table_offsets(gl_shader_stage stage,
+ const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ struct brw_stage_prog_data *stage_prog_data,
+ uint32_t next_binding_table_offset);
+
bool brw_vs_precompile(struct gl_context *ctx,
struct gl_shader_program *shader_prog,
struct gl_program *prog);
extern const struct brw_tracked_state brw_wm_ubo_surfaces;
extern const struct brw_tracked_state brw_wm_abo_surfaces;
extern const struct brw_tracked_state brw_wm_image_surfaces;
+extern const struct brw_tracked_state brw_cs_ubo_surfaces;
extern const struct brw_tracked_state brw_cs_abo_surfaces;
extern const struct brw_tracked_state brw_cs_image_surfaces;
extern const struct brw_tracked_state brw_wm_unit;
if (brw->has_llc)
drm_intel_gem_bo_map_unsynchronized(cache->bo);
- cache->aux_compare[BRW_CACHE_VS_PROG] = brw_vs_prog_data_compare;
- cache->aux_compare[BRW_CACHE_GS_PROG] = brw_gs_prog_data_compare;
- cache->aux_compare[BRW_CACHE_FS_PROG] = brw_wm_prog_data_compare;
- cache->aux_compare[BRW_CACHE_CS_PROG] = brw_cs_prog_data_compare;
cache->aux_free[BRW_CACHE_VS_PROG] = brw_stage_prog_data_free;
cache->aux_free[BRW_CACHE_GS_PROG] = brw_stage_prog_data_free;
cache->aux_free[BRW_CACHE_FS_PROG] = brw_stage_prog_data_free;
&brw_state_base_address,
&brw_cs_image_surfaces,
&gen7_cs_push_constants,
+ &brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
&brw_texture_surfaces,
&brw_cs_work_groups_surface,
&gen8_state_base_address,
&brw_cs_image_surfaces,
&gen7_cs_push_constants,
+ &brw_cs_ubo_surfaces,
&brw_cs_abo_surfaces,
&brw_texture_surfaces,
&brw_cs_work_groups_surface,
/* When this layout is used the horizontal alignment is fixed at 64 and the
* hardware ignores the value given in the surface state
*/
- const unsigned int align_w = 64;
+ const unsigned int halign = 64;
mt->total_height = mt->physical_height0;
mt->total_width = 0;
intel_miptree_set_level_info(mt, level, x, 0, depth);
- img_width = ALIGN(width, align_w);
+ img_width = ALIGN(width, halign);
mt->total_width = MAX2(mt->total_width, x + img_width);
unsigned mip1_width;
if (mt->compressed) {
- mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
+ mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->halign) +
ALIGN_NPOT(minify(mt->physical_width0, 2), bw);
} else {
- mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->align_w) +
+ mip1_width = ALIGN_NPOT(minify(mt->physical_width0, 1), mt->halign) +
minify(mt->physical_width0, 2);
}
intel_miptree_set_level_info(mt, level, x, y, depth);
- img_height = ALIGN_NPOT(height, mt->align_h);
+ img_height = ALIGN_NPOT(height, mt->valign);
if (mt->compressed)
img_height /= bh;
/* Layout_below: step right after second mipmap.
*/
if (level == mt->first_level + 1) {
- x += ALIGN_NPOT(width, mt->align_w) / bw;
+ x += ALIGN_NPOT(width, mt->halign) / bw;
} else {
y += img_height;
}
{
if ((brw->gen < 9 && mt->target == GL_TEXTURE_3D) ||
(brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP)) {
- return ALIGN_NPOT(minify(mt->physical_width0, level), mt->align_w);
+ return ALIGN_NPOT(minify(mt->physical_width0, level), mt->halign);
} else {
return 0;
}
} else if (mt->target == GL_TEXTURE_3D ||
(brw->gen == 4 && mt->target == GL_TEXTURE_CUBE_MAP) ||
mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
- return ALIGN_NPOT(minify(mt->physical_height0, level), mt->align_h);
+ return ALIGN_NPOT(minify(mt->physical_height0, level), mt->valign);
} else {
- const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->align_h);
- const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->align_h);
+ const unsigned h0 = ALIGN_NPOT(mt->physical_height0, mt->valign);
+ const unsigned h1 = ALIGN_NPOT(minify(mt->physical_height0, 1), mt->valign);
- return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->align_h;
+ return h0 + h1 + (brw->gen >= 7 ? 12 : 11) * mt->valign;
}
}
for (unsigned level = mt->first_level; level <= mt->last_level; level++) {
unsigned img_height;
- img_height = ALIGN_NPOT(height, mt->align_h);
+ img_height = ALIGN_NPOT(height, mt->valign);
if (mt->compressed)
- img_height /= mt->align_h;
+ img_height /= mt->valign;
for (unsigned q = 0; q < mt->level[level].depth; q++) {
if (mt->array_layout == ALL_SLICES_AT_EACH_LOD) {
unsigned WL = MAX2(mt->physical_width0 >> level, 1);
unsigned HL = MAX2(mt->physical_height0 >> level, 1);
unsigned DL = MAX2(mt->physical_depth0 >> level, 1);
- unsigned wL = ALIGN_NPOT(WL, mt->align_w);
- unsigned hL = ALIGN_NPOT(HL, mt->align_h);
+ unsigned wL = ALIGN_NPOT(WL, mt->halign);
+ unsigned hL = ALIGN_NPOT(HL, mt->valign);
if (mt->target == GL_TEXTURE_CUBE_MAP)
DL = 6;
* to know that ahead of time. And besides, since we use a vertical
* alignment of 4 as often as we can, this shouldn't happen very often.
*/
- if (brw->gen == 7 && mt->align_h == 2 &&
+ if (brw->gen == 7 && mt->valign == 2 &&
brw->format_supported_as_render_target[mt->format]) {
return I915_TILING_X;
}
/* Stencil uses W tiling, so we force W tiling alignment for the
* ALL_SLICES_AT_EACH_LOD miptree layout.
*/
- mt->align_w = 64;
- mt->align_h = 64;
+ mt->halign = 64;
+ mt->valign = 64;
assert((layout_flags & MIPTREE_LAYOUT_FORCE_HALIGN16) == 0);
} else {
/* Depth uses Y tiling, so we force need Y tiling alignment for the
* ALL_SLICES_AT_EACH_LOD miptree layout.
*/
- mt->align_w = 128 / mt->cpp;
- mt->align_h = 32;
+ mt->halign = 128 / mt->cpp;
+ mt->valign = 32;
}
} else if (mt->compressed) {
/* The hardware alignment requirements for compressed textures
* happen to match the block boundaries.
*/
- _mesa_get_format_block_size(mt->format, &mt->align_w, &mt->align_h);
+ _mesa_get_format_block_size(mt->format, &mt->halign, &mt->valign);
/* On Gen9+ we can pick our own alignment for compressed textures but it
* has to be a multiple of the block size. The minimum alignment we can
* size
*/
if (brw->gen >= 9) {
- mt->align_w *= 4;
- mt->align_h *= 4;
+ mt->halign *= 4;
+ mt->valign *= 4;
}
} else if (mt->format == MESA_FORMAT_S_UINT8) {
- mt->align_w = 8;
- mt->align_h = brw->gen >= 7 ? 8 : 4;
+ mt->halign = 8;
+ mt->valign = brw->gen >= 7 ? 8 : 4;
} else if (brw->gen >= 9 && mt->tr_mode != INTEL_MIPTREE_TRMODE_NONE) {
/* XY_FAST_COPY_BLT doesn't support horizontal alignment < 32 or
* vertical alignment < 64. */
- mt->align_w = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
- mt->align_h = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
+ mt->halign = MAX2(tr_mode_horizontal_texture_alignment(brw, mt), 32);
+ mt->valign = MAX2(tr_mode_vertical_texture_alignment(brw, mt), 64);
} else {
- mt->align_w =
+ mt->halign =
intel_horizontal_texture_alignment_unit(brw, mt, layout_flags);
- mt->align_h = intel_vertical_texture_alignment_unit(brw, mt);
+ mt->valign = intel_vertical_texture_alignment_unit(brw, mt);
}
}
if (brw->gen >= 9) {
unsigned int i, j;
_mesa_get_format_block_size(mt->format, &i, &j);
- mt->align_w /= i;
- mt->align_h /= j;
+ mt->halign /= i;
+ mt->valign /= j;
}
if ((layout_flags & MIPTREE_LAYOUT_FOR_BO) == 0)
void
vec4_visitor::pack_uniform_registers()
{
- bool uniform_used[this->uniforms];
+ uint8_t chans_used[this->uniforms];
int new_loc[this->uniforms];
int new_chan[this->uniforms];
- memset(uniform_used, 0, sizeof(uniform_used));
+ memset(chans_used, 0, sizeof(chans_used));
memset(new_loc, 0, sizeof(new_loc));
memset(new_chan, 0, sizeof(new_chan));
* to pull constants, and from some GLSL code generators like wine.
*/
foreach_block_and_inst(block, vec4_instruction, inst, cfg) {
+ unsigned readmask;
+ switch (inst->opcode) {
+ case VEC4_OPCODE_PACK_BYTES:
+ case BRW_OPCODE_DP4:
+ case BRW_OPCODE_DPH:
+ readmask = 0xf;
+ break;
+ case BRW_OPCODE_DP3:
+ readmask = 0x7;
+ break;
+ case BRW_OPCODE_DP2:
+ readmask = 0x3;
+ break;
+ default:
+ readmask = inst->dst.writemask;
+ break;
+ }
+
for (int i = 0 ; i < 3; i++) {
- if (inst->src[i].file != UNIFORM)
- continue;
+ if (inst->src[i].file != UNIFORM)
+ continue;
- uniform_used[inst->src[i].reg] = true;
+ int reg = inst->src[i].reg;
+ for (int c = 0; c < 4; c++) {
+ if (!(readmask & (1 << c)))
+ continue;
+
+ chans_used[reg] = MAX2(chans_used[reg],
+ BRW_GET_SWZ(inst->src[i].swizzle, c) + 1);
+ }
}
}
*/
for (int src = 0; src < uniforms; src++) {
assert(src < uniform_array_size);
- int size = this->uniform_vector_size[src];
+ int size = chans_used[src];
- if (!uniform_used[src]) {
- this->uniform_vector_size[src] = 0;
- continue;
- }
+ if (size == 0)
+ continue;
int dst;
/* Find the lowest place we can slot this uniform in. */
for (dst = 0; dst < src; dst++) {
- if (this->uniform_vector_size[dst] + size <= 4)
+ if (chans_used[dst] + size <= 4)
break;
}
new_chan[src] = 0;
} else {
new_loc[src] = dst;
- new_chan[src] = this->uniform_vector_size[dst];
+ new_chan[src] = chans_used[dst];
/* Move the references to the data */
for (int j = 0; j < size; j++) {
stage_prog_data->param[src * 4 + j];
}
- this->uniform_vector_size[dst] += size;
- this->uniform_vector_size[src] = 0;
+ chans_used[dst] += size;
+ chans_used[src] = 0;
}
new_uniform_count = MAX2(new_uniform_count, dst + 1);
*/
if (devinfo->gen < 6 && this->uniforms == 0) {
assert(this->uniforms < this->uniform_array_size);
- this->uniform_vector_size[this->uniforms] = 1;
stage_prog_data->param =
reralloc(NULL, stage_prog_data->param, const gl_constant_value *, 4);
this->first_non_payload_grf = reg;
}
-void
-vec4_visitor::assign_binding_table_offsets()
-{
- assign_common_binding_table_offsets(0);
-}
-
src_reg
vec4_visitor::get_timestamp()
{
bool
vec4_visitor::run()
{
- bool use_vec4_nir =
- compiler->glsl_compiler_options[stage].NirOptions != NULL;
-
- sanity_param_count = prog->Parameters->NumParameters;
-
if (shader_time_index >= 0)
emit_shader_time_begin();
- assign_binding_table_offsets();
-
emit_prolog();
- if (use_vec4_nir) {
- assert(prog->nir != NULL);
- emit_nir_code();
- if (failed)
- return false;
- } else if (shader) {
- /* Generate VS IR for main(). (the visitor only descends into
- * functions called "main").
- */
- visit_instructions(shader->base.ir);
- } else {
- emit_program_code();
- }
+ emit_nir_code();
+ if (failed)
+ return false;
base_ir = NULL;
emit_thread_end();
* that we have reladdr computations available for CSE, since we'll
* often do repeated subexpressions for those.
*/
- if (shader || use_vec4_nir) {
- move_grf_array_access_to_scratch();
- move_uniform_array_access_to_pull_constants();
- } else {
- /* The ARB_vertex_program frontend emits pull constant loads directly
- * rather than using reladdr, so we don't need to walk through all the
- * instructions looking for things to move. There isn't anything.
- *
- * We do still need to split things to vec4 size.
- */
- split_uniform_registers();
- }
+ move_grf_array_access_to_scratch();
+ move_uniform_array_access_to_pull_constants();
+
pack_uniform_registers();
move_push_constants_to_pull_constants();
split_virtual_grfs();
\
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER) && this_progress) { \
char filename[64]; \
- snprintf(filename, 64, "%s-%04d-%02d-%02d-" #pass, \
- stage_abbrev, shader_prog ? shader_prog->Name : 0, iteration, pass_num); \
+ snprintf(filename, 64, "%s-%s-%02d-%02d-" #pass, \
+ stage_abbrev, nir->info.name, iteration, pass_num); \
\
backend_shader::dump_instructions(filename); \
} \
if (unlikely(INTEL_DEBUG & DEBUG_OPTIMIZER)) {
char filename[64];
- snprintf(filename, 64, "%s-%04d-00-start",
- stage_abbrev, shader_prog ? shader_prog->Name : 0);
+ snprintf(filename, 64, "%s-%s-00-start",
+ stage_abbrev, nir->info.name);
backend_shader::dump_instructions(filename);
}
brw_get_scratch_size(last_scratch * REG_SIZE);
}
- /* If any state parameters were appended, then ParameterValues could have
- * been realloced, in which case the driver uniform storage set up by
- * _mesa_associate_uniform_storage() would point to freed memory. Make
- * sure that didn't happen.
- */
- assert(sanity_param_count == prog->Parameters->NumParameters);
-
return !failed;
}
if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
- if (!vp->Base.nir &&
- (brw->intelScreen->compiler->scalar_vs ||
- brw->intelScreen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions != NULL)) {
- /* Normally we generate NIR in LinkShader() or
- * ProgramStringNotify(), but Mesa's fixed-function vertex program
- * handling doesn't notify the driver at all. Just do it here, at
- * the last minute, even though it's lame.
- */
- assert(vp->Base.Id == 0 && prog == NULL);
- vp->Base.nir =
- brw_create_nir(brw, NULL, &vp->Base, MESA_SHADER_VERTEX,
- brw->intelScreen->compiler->scalar_vs);
- }
-
if (brw->intelScreen->compiler->scalar_vs) {
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
fs_visitor v(brw->intelScreen->compiler, brw,
- mem_ctx, MESA_SHADER_VERTEX, key,
- &prog_data->base.base, prog, &vp->Base,
- 8, st_index);
+ mem_ctx, key, &prog_data->base.base,
+ NULL, /* prog; Only used for TEXTURE_RECTANGLE on gen < 8 */
+ vp->Base.nir, 8, st_index);
if (!v.run_vs(brw_select_clip_planes(&brw->ctx))) {
if (prog) {
prog->LinkStatus = false;
prog_data->base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
vec4_vs_visitor v(brw->intelScreen->compiler, brw, key, prog_data,
- vp, prog, brw_select_clip_planes(&brw->ctx),
+ vp->Base.nir, brw_select_clip_planes(&brw->ctx),
mem_ctx, st_index,
!_mesa_is_gles3(&brw->ctx));
if (!v.run()) {
* Translates either GLSL IR or Mesa IR (for ARB_vertex_program and
* fixed-function) into VS IR.
*/
-class vec4_visitor : public backend_shader, public ir_visitor
+class vec4_visitor : public backend_shader
{
public:
vec4_visitor(const struct brw_compiler *compiler,
void *log_data,
- struct gl_program *prog,
const struct brw_sampler_prog_key_data *key,
struct brw_vue_prog_data *prog_data,
- struct gl_shader_program *shader_prog,
- gl_shader_stage stage,
+ nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index);
const struct brw_sampler_prog_key_data * const key_tex;
struct brw_vue_prog_data * const prog_data;
- unsigned int sanity_param_count;
-
char *fail_msg;
bool failed;
brw::vec4_live_variables *live_intervals;
dst_reg userplane[MAX_CLIP_PLANES];
- dst_reg *variable_storage(ir_variable *var);
-
- void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
-
bool need_all_constants_in_pull_buffer;
- /**
- * \name Visit methods
- *
- * As typical for the visitor pattern, there must be one \c visit method for
- * each concrete subclass of \c ir_instruction. Virtual base classes within
- * the hierarchy should not have \c visit methods.
- */
- /*@{*/
- virtual void visit(ir_variable *);
- virtual void visit(ir_loop *);
- virtual void visit(ir_loop_jump *);
- virtual void visit(ir_function_signature *);
- virtual void visit(ir_function *);
- virtual void visit(ir_expression *);
- virtual void visit(ir_swizzle *);
- virtual void visit(ir_dereference_variable *);
- virtual void visit(ir_dereference_array *);
- virtual void visit(ir_dereference_record *);
- virtual void visit(ir_assignment *);
- virtual void visit(ir_constant *);
- virtual void visit(ir_call *);
- virtual void visit(ir_return *);
- virtual void visit(ir_discard *);
- virtual void visit(ir_texture *);
- virtual void visit(ir_if *);
- virtual void visit(ir_emit_vertex *);
- virtual void visit(ir_end_primitive *);
- virtual void visit(ir_barrier *);
- /*@}*/
-
- src_reg result;
-
/* Regs for vertex results. Generated at ir_variable visiting time
* for the ir->location's used.
*/
dst_reg output_reg[BRW_VARYING_SLOT_COUNT];
const char *output_reg_annotation[BRW_VARYING_SLOT_COUNT];
int *uniform_size;
- int *uniform_vector_size;
- int uniform_array_size; /*< Size of uniform_[vector_]size arrays */
+ int uniform_array_size; /*< Size of the uniform_size array */
int uniforms;
src_reg shader_start_time;
- struct hash_table *variable_ht;
-
bool run();
void fail(const char *msg, ...);
- virtual void setup_vec4_uniform_value(unsigned param_offset,
- const gl_constant_value *values,
- unsigned n);
- void setup_uniform_values(ir_variable *ir);
- void setup_builtin_uniform_values(ir_variable *ir);
int setup_uniforms(int payload_reg);
bool reg_allocate_trivial();
int implied_mrf_writes(vec4_instruction *inst);
- bool try_rewrite_rhs_to_dst(ir_assignment *ir,
- dst_reg dst,
- src_reg src,
- vec4_instruction *pre_rhs_inst,
- vec4_instruction *last_rhs_inst);
-
- /** Walks an exec_list of ir_instruction and sends it through this visitor. */
- void visit_instructions(const exec_list *list);
-
void emit_vp_sop(enum brw_conditional_mod condmod, dst_reg dst,
src_reg src0, src_reg src1, src_reg one);
- void emit_bool_to_cond_code(ir_rvalue *ir, enum brw_predicate *predicate);
- void emit_if_gen6(ir_if *ir);
-
vec4_instruction *emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
src_reg src0, src_reg src1);
*/
src_reg emit_uniformize(const src_reg &src);
- void emit_block_move(dst_reg *dst, src_reg *src,
- const struct glsl_type *type, brw_predicate predicate);
-
- void emit_constant_values(dst_reg *dst, ir_constant *value);
-
/**
* Emit the correct dot-product instruction for the type of arguments
*/
void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
- void emit_scalar(ir_instruction *ir, enum prog_opcode op,
- dst_reg dst, src_reg src0);
-
- void emit_scalar(ir_instruction *ir, enum prog_opcode op,
- dst_reg dst, src_reg src0, src_reg src1);
-
src_reg fix_3src_operand(const src_reg &src);
src_reg resolve_source_modifiers(const src_reg &src);
src_reg emit_resolve_reladdr(int scratch_loc[], bblock_t *block,
vec4_instruction *inst, src_reg src);
- bool try_emit_mad(ir_expression *ir);
- bool try_emit_b2f_of_compare(ir_expression *ir);
void resolve_ud_negate(src_reg *reg);
- void resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg);
src_reg get_timestamp();
- bool process_move_condition(ir_rvalue *ir);
-
void dump_instruction(backend_instruction *inst);
void dump_instruction(backend_instruction *inst, FILE *file);
- void visit_atomic_counter_intrinsic(ir_call *ir);
-
bool is_high_sampler(src_reg sampler);
virtual void emit_nir_code();
- virtual void nir_setup_inputs(nir_shader *shader);
- virtual void nir_setup_uniforms(nir_shader *shader);
- virtual void nir_setup_uniform(nir_variable *var);
- virtual void nir_setup_builtin_uniform(nir_variable *var);
+ virtual void nir_setup_inputs();
+ virtual void nir_setup_uniforms();
virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
- virtual void nir_setup_system_values(nir_shader *shader);
+ virtual void nir_setup_system_values();
virtual void nir_emit_impl(nir_function_impl *impl);
virtual void nir_emit_cf_list(exec_list *list);
virtual void nir_emit_if(nir_if *if_stmt);
bool interleaved);
void setup_payload_interference(struct ra_graph *g, int first_payload_node,
int reg_node_count);
- virtual void assign_binding_table_offsets();
virtual void setup_payload() = 0;
virtual void emit_prolog() = 0;
- virtual void emit_program_code() = 0;
virtual void emit_thread_end() = 0;
virtual void emit_urb_write_header(int mrf) = 0;
virtual vec4_instruction *emit_urb_write_opcode(bool complete) = 0;
- virtual int compute_array_stride(ir_dereference_array *ir);
virtual void gs_emit_vertex(int stream_id);
virtual void gs_end_primitive();
namespace brw {
void
-vec4_gs_visitor::nir_setup_inputs(nir_shader *shader)
+vec4_gs_visitor::nir_setup_inputs()
{
- nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
+ nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
- foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
int offset = var->data.driver_location;
if (var->type->base_type == GLSL_TYPE_ARRAY) {
/* Geometry shader inputs are arrays, but they use an unusual array
dst_reg *reg;
switch (instr->intrinsic) {
+ case nir_intrinsic_load_primitive_id:
+ /* We'll just read g1 directly; don't create a temporary. */
+ break;
+
case nir_intrinsic_load_invocation_id:
reg = &this->nir_system_values[SYSTEM_VALUE_INVOCATION_ID];
if (reg->file == BAD_FILE)
retype(get_nir_src(instr->src[0], 1), BRW_REGISTER_TYPE_UD);
break;
+ case nir_intrinsic_load_primitive_id:
+ assert(c->prog_data.include_primitive_id);
+ dest = get_nir_dest(instr->dest, BRW_REGISTER_TYPE_D);
+ emit(MOV(dest, retype(brw_vec4_grf(1, 0), BRW_REGISTER_TYPE_D)));
+ break;
+
case nir_intrinsic_load_invocation_id: {
src_reg invocation_id =
src_reg(nir_system_values[SYSTEM_VALUE_INVOCATION_ID]);
void *log_data,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
+ nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index)
- : vec4_visitor(compiler, log_data,
- &c->gp->program.Base, &c->key.tex,
- &c->prog_data.base, prog, MESA_SHADER_GEOMETRY, mem_ctx,
+ : vec4_visitor(compiler, log_data, &c->key.tex,
+ &c->prog_data.base, shader, mem_ctx,
no_spills, shader_time_index),
+ shader_prog(prog),
c(c)
{
}
this->current_annotation = NULL;
}
-
-void
-vec4_gs_visitor::emit_program_code()
-{
- /* We don't support NV_geometry_program4. */
- unreachable("Unreached");
-}
-
-
void
vec4_gs_visitor::emit_thread_end()
{
}
-int
-vec4_gs_visitor::compute_array_stride(ir_dereference_array *ir)
-{
- /* Geometry shader inputs are arrays, but they use an unusual array layout:
- * instead of all array elements for a given geometry shader input being
- * stored consecutively, all geometry shader inputs are interleaved into
- * one giant array. At this stage of compilation, we assume that the
- * stride of the array is BRW_VARYING_SLOT_COUNT. Later,
- * setup_attributes() will remap our accesses to the actual input array.
- */
- ir_dereference_variable *deref_var = ir->array->as_dereference_variable();
- if (deref_var && deref_var->var->data.mode == ir_var_shader_in)
- return BRW_VARYING_SLOT_COUNT;
- else
- return vec4_visitor::compute_array_stride(ir);
-}
-
-
/**
* Write out a batch of 32 control data bits from the control_data_bits
* register to the URB.
this->current_annotation = NULL;
}
-void
-vec4_gs_visitor::visit(ir_emit_vertex *ir)
-{
- /* To ensure that we don't output more vertices than the shader specified
- * using max_vertices, do the logic inside a conditional of the form "if
- * (vertex_count < MAX)"
- */
- unsigned num_output_vertices = c->gp->program.VerticesOut;
- emit(CMP(dst_null_d(), this->vertex_count,
- src_reg(num_output_vertices), BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
-
- gs_emit_vertex(ir->stream_id());
-
- this->current_annotation = "emit vertex: increment vertex count";
- emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
- src_reg(1u)));
-
- emit(BRW_OPCODE_ENDIF);
-}
-
void
vec4_gs_visitor::gs_end_primitive()
{
emit(OR(dst_reg(this->control_data_bits), this->control_data_bits, mask));
}
-void
-vec4_gs_visitor::visit(ir_end_primitive *)
-{
- gs_end_primitive();
-}
-
static const unsigned *
generate_assembly(struct brw_context *brw,
struct gl_shader_program *shader_prog,
void *mem_ctx,
unsigned *final_assembly_size)
{
- if (unlikely(INTEL_DEBUG & DEBUG_GS)) {
- struct brw_shader *shader =
- (brw_shader *) prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
+ struct gl_shader *shader = prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
- brw_dump_ir("geometry", prog, &shader->base, NULL);
- }
+ if (unlikely(INTEL_DEBUG & DEBUG_GS))
+ brw_dump_ir("geometry", prog, shader, NULL);
int st_index = -1;
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
c->prog_data.base.dispatch_mode = DISPATCH_MODE_4X2_DUAL_OBJECT;
vec4_gs_visitor v(brw->intelScreen->compiler, brw,
- c, prog, mem_ctx, true /* no_spills */, st_index);
+ c, prog, shader->Program->nir,
+ mem_ctx, true /* no_spills */, st_index);
if (v.run()) {
return generate_assembly(brw, prog, &c->gp->program.Base,
&c->prog_data.base, mem_ctx, v.cfg,
if (brw->gen >= 7)
gs = new vec4_gs_visitor(brw->intelScreen->compiler, brw,
- c, prog, mem_ctx, false /* no_spills */,
+ c, prog, shader->Program->nir,
+ mem_ctx, false /* no_spills */,
st_index);
else
gs = new gen6_gs_visitor(brw->intelScreen->compiler, brw,
- c, prog, mem_ctx, false /* no_spills */,
+ c, prog, shader->Program->nir,
+ mem_ctx, false /* no_spills */,
st_index);
if (!gs->run()) {
void *log_data,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
+ nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index);
- virtual void nir_setup_inputs(nir_shader *shader);
+ virtual void nir_setup_inputs();
virtual void nir_setup_system_value_intrinsic(nir_intrinsic_instr *instr);
protected:
const glsl_type *type);
virtual void setup_payload();
virtual void emit_prolog();
- virtual void emit_program_code();
virtual void emit_thread_end();
virtual void emit_urb_write_header(int mrf);
virtual vec4_instruction *emit_urb_write_opcode(bool complete);
- virtual int compute_array_stride(ir_dereference_array *ir);
- virtual void visit(ir_emit_vertex *);
- virtual void visit(ir_end_primitive *);
virtual void gs_emit_vertex(int stream_id);
virtual void gs_end_primitive();
virtual void nir_emit_intrinsic(nir_intrinsic_instr *instr);
void emit_control_data_bits();
void set_stream_control_data_bits(unsigned stream_id);
+ struct gl_shader_program *shader_prog;
+
src_reg vertex_count;
src_reg control_data_bits;
const struct brw_gs_compile * const c;
void
vec4_visitor::emit_nir_code()
{
- nir_shader *nir = prog->nir;
-
if (nir->num_inputs > 0)
- nir_setup_inputs(nir);
+ nir_setup_inputs();
if (nir->num_uniforms > 0)
- nir_setup_uniforms(nir);
+ nir_setup_uniforms();
- nir_setup_system_values(nir);
+ nir_setup_system_values();
/* get the main function and emit it */
nir_foreach_overload(nir, overload) {
}
void
-vec4_visitor::nir_setup_system_values(nir_shader *shader)
+vec4_visitor::nir_setup_system_values()
{
nir_system_values = ralloc_array(mem_ctx, dst_reg, SYSTEM_VALUE_MAX);
- nir_foreach_overload(shader, overload) {
+ nir_foreach_overload(nir, overload) {
assert(strcmp(overload->function->name, "main") == 0);
assert(overload->impl);
nir_foreach_block(overload->impl, setup_system_values_block, this);
}
void
-vec4_visitor::nir_setup_inputs(nir_shader *shader)
+vec4_visitor::nir_setup_inputs()
{
- nir_inputs = ralloc_array(mem_ctx, src_reg, shader->num_inputs);
+ nir_inputs = ralloc_array(mem_ctx, src_reg, nir->num_inputs);
- foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+ foreach_list_typed(nir_variable, var, node, &nir->inputs) {
int offset = var->data.driver_location;
unsigned size = type_size_vec4(var->type);
for (unsigned i = 0; i < size; i++) {
}
void
-vec4_visitor::nir_setup_uniforms(nir_shader *shader)
-{
- uniforms = 0;
-
- if (shader_prog) {
- foreach_list_typed(nir_variable, var, node, &shader->uniforms) {
- /* UBO's, atomics and samplers don't take up space in the
- uniform file */
- if (var->interface_type != NULL || var->type->contains_atomic() ||
- type_size_vec4(var->type) == 0) {
- continue;
- }
-
- assert(uniforms < uniform_array_size);
- uniform_size[uniforms] = type_size_vec4(var->type);
-
- if (strncmp(var->name, "gl_", 3) == 0)
- nir_setup_builtin_uniform(var);
- else
- nir_setup_uniform(var);
- }
- } else {
- /* For ARB_vertex_program, only a single "parameters" variable is
- * generated to support uniform data.
- */
- nir_variable *var = (nir_variable *) shader->uniforms.get_head();
- assert(shader->uniforms.length() == 1 &&
- strcmp(var->name, "parameters") == 0);
-
- assert(uniforms < uniform_array_size);
- uniform_size[uniforms] = type_size_vec4(var->type);
-
- struct gl_program_parameter_list *plist = prog->Parameters;
- for (unsigned p = 0; p < plist->NumParameters; p++) {
- uniform_vector_size[uniforms] = plist->Parameters[p].Size;
-
- /* Parameters should be either vec4 uniforms or single component
- * constants; matrices and other larger types should have been broken
- * down earlier.
- */
- assert(uniform_vector_size[uniforms] <= 4);
-
- int i;
- for (i = 0; i < uniform_vector_size[uniforms]; i++) {
- stage_prog_data->param[uniforms * 4 + i] = &plist->ParameterValues[p][i];
- }
- for (; i < 4; i++) {
- static const gl_constant_value zero = { 0.0 };
- stage_prog_data->param[uniforms * 4 + i] = &zero;
- }
-
- uniforms++;
- }
- }
-}
-
-void
-vec4_visitor::nir_setup_uniform(nir_variable *var)
-{
- int namelen = strlen(var->name);
-
- /* The data for our (non-builtin) uniforms is stored in a series of
- * gl_uniform_driver_storage structs for each subcomponent that
- * glGetUniformLocation() could name. We know it's been set up in the same
- * order we'd walk the type, so walk the list of storage and find anything
- * with our name, or the prefix of a component that starts with our name.
- */
- for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
- struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
- if (storage->builtin)
- continue;
-
- if (strncmp(var->name, storage->name, namelen) != 0 ||
- (storage->name[namelen] != 0 &&
- storage->name[namelen] != '.' &&
- storage->name[namelen] != '[')) {
- continue;
- }
-
- gl_constant_value *components = storage->storage;
- unsigned vector_count = (MAX2(storage->array_elements, 1) *
- storage->type->matrix_columns);
-
- for (unsigned s = 0; s < vector_count; s++) {
- assert(uniforms < uniform_array_size);
- uniform_vector_size[uniforms] = storage->type->vector_elements;
-
- int i;
- for (i = 0; i < uniform_vector_size[uniforms]; i++) {
- stage_prog_data->param[uniforms * 4 + i] = components;
- components++;
- }
- for (; i < 4; i++) {
- static const gl_constant_value zero = { 0.0 };
- stage_prog_data->param[uniforms * 4 + i] = &zero;
- }
-
- uniforms++;
- }
- }
-}
-
-void
-vec4_visitor::nir_setup_builtin_uniform(nir_variable *var)
+vec4_visitor::nir_setup_uniforms()
{
- const nir_state_slot *const slots = var->state_slots;
- assert(var->state_slots != NULL);
-
- for (unsigned int i = 0; i < var->num_state_slots; i++) {
- /* This state reference has already been setup by ir_to_mesa,
- * but we'll get the same index back here. We can reference
- * ParameterValues directly, since unlike brw_fs.cpp, we never
- * add new state references during compile.
- */
- int index = _mesa_add_state_reference(prog->Parameters,
- (gl_state_index *)slots[i].tokens);
- gl_constant_value *values =
- &prog->Parameters->ParameterValues[index][0];
-
- assert(uniforms < uniform_array_size);
+ uniforms = nir->num_uniforms;
- for (unsigned j = 0; j < 4; j++)
- stage_prog_data->param[uniforms * 4 + j] =
- &values[GET_SWZ(slots[i].swizzle, j)];
-
- uniform_vector_size[uniforms] =
- (var->type->is_scalar() || var->type->is_vector() ||
- var->type->is_matrix() ? var->type->vector_elements : 4);
+ foreach_list_typed(nir_variable, var, node, &nir->uniforms) {
+ /* UBO's and atomics don't take up space in the uniform file */
+ if (var->interface_type != NULL || var->type->contains_atomic())
+ continue;
- uniforms++;
+ if (type_size_vec4(var->type) > 0)
+ uniform_size[var->data.driver_location] = type_size_vec4(var->type);
}
}
nir_const_value *const_uniform_block = nir_src_as_const_value(instr->src[0]);
unsigned ubo_index = const_uniform_block ? const_uniform_block->u[0] : 0;
- assert(shader->base.UniformBlocks[ubo_index].IsShaderStorage);
-
src_reg surf_index = src_reg(prog_data->base.binding_table.ubo_start +
ubo_index);
dst_reg result_dst = get_nir_dest(instr->dest);
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- shader_prog->NumBufferInterfaceBlocks - 1);
+ nir->info.num_ssbos - 1);
}
/* Offset */
*/
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- shader_prog->NumBufferInterfaceBlocks - 1);
+ nir->info.num_ssbos - 1);
}
src_reg offset_reg = src_reg(this, glsl_type::uint_type);
*/
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- shader_prog->NumBufferInterfaceBlocks - 1);
+ nir->info.num_ssbos - 1);
}
unsigned const_offset = instr->const_index[1];
break;
}
+ case nir_intrinsic_memory_barrier: {
+ const vec4_builder bld =
+ vec4_builder(this).at_end().annotate(current_annotation, base_ir);
+ const dst_reg tmp = bld.vgrf(BRW_REGISTER_TYPE_UD, 2);
+ bld.emit(SHADER_OPCODE_MEMORY_FENCE, tmp)
+ ->regs_written = 2;
+ break;
+ }
+
default:
unreachable("Unknown intrinsic");
}
*/
brw_mark_surface_used(&prog_data->base,
prog_data->base.binding_table.ubo_start +
- shader_prog->NumBufferInterfaceBlocks - 1);
+ nir->info.num_ssbos - 1);
}
src_reg offset = get_nir_src(instr->src[1], 1);
emit(VEC4_OPCODE_PACK_BYTES, dst, bytes);
}
-void
-vec4_visitor::visit_instructions(const exec_list *list)
-{
- foreach_in_list(ir_instruction, ir, list) {
- base_ir = ir;
- ir->accept(this);
- }
-}
-
/**
* Returns the minimum number of vec4 elements needed to pack a type.
*
this->swizzle = BRW_SWIZZLE_NOOP;
this->type = brw_type_for_base_type(type);
-}
-
-dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
-{
- init();
-
- this->file = GRF;
- this->reg = v->alloc.allocate(type_size_vec4(type));
-
- if (type->is_array() || type->is_record()) {
- this->writemask = WRITEMASK_XYZW;
- } else {
- this->writemask = (1 << type->vector_elements) - 1;
- }
-
- this->type = brw_type_for_base_type(type);
-}
-
-void
-vec4_visitor::setup_vec4_uniform_value(unsigned param_offset,
- const gl_constant_value *values,
- unsigned n)
-{
- static const gl_constant_value zero = { 0 };
-
- assert(param_offset % 4 == 0);
-
- for (unsigned i = 0; i < n; ++i)
- stage_prog_data->param[param_offset + i] = &values[i];
-
- for (unsigned i = n; i < 4; ++i)
- stage_prog_data->param[param_offset + i] = &zero;
-
- uniform_vector_size[param_offset / 4] = n;
-}
-
-/* Our support for uniforms is piggy-backed on the struct
- * gl_fragment_program, because that's where the values actually
- * get stored, rather than in some global gl_shader_program uniform
- * store.
- */
-void
-vec4_visitor::setup_uniform_values(ir_variable *ir)
-{
- int namelen = strlen(ir->name);
-
- /* The data for our (non-builtin) uniforms is stored in a series of
- * gl_uniform_driver_storage structs for each subcomponent that
- * glGetUniformLocation() could name. We know it's been set up in the same
- * order we'd walk the type, so walk the list of storage and find anything
- * with our name, or the prefix of a component that starts with our name.
- */
- for (unsigned u = 0; u < shader_prog->NumUniformStorage; u++) {
- struct gl_uniform_storage *storage = &shader_prog->UniformStorage[u];
-
- if (storage->builtin)
- continue;
-
- if (strncmp(ir->name, storage->name, namelen) != 0 ||
- (storage->name[namelen] != 0 &&
- storage->name[namelen] != '.' &&
- storage->name[namelen] != '[')) {
- continue;
- }
-
- const unsigned vector_count = (MAX2(storage->array_elements, 1) *
- storage->type->matrix_columns);
- const unsigned vector_size = storage->type->vector_elements;
-
- for (unsigned s = 0; s < vector_count; s++) {
- setup_vec4_uniform_value(uniforms * 4,
- &storage->storage[s * vector_size],
- vector_size);
- uniforms++;
- }
- }
-}
-
-/* Our support for builtin uniforms is even scarier than non-builtin.
- * It sits on top of the PROG_STATE_VAR parameters that are
- * automatically updated from GL context state.
- */
-void
-vec4_visitor::setup_builtin_uniform_values(ir_variable *ir)
-{
- const ir_state_slot *const slots = ir->get_state_slots();
- assert(slots != NULL);
-
- for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
- /* This state reference has already been setup by ir_to_mesa,
- * but we'll get the same index back here. We can reference
- * ParameterValues directly, since unlike brw_fs.cpp, we never
- * add new state references during compile.
- */
- int index = _mesa_add_state_reference(this->prog->Parameters,
- (gl_state_index *)slots[i].tokens);
- gl_constant_value *values =
- &this->prog->Parameters->ParameterValues[index][0];
-
- assert(this->uniforms < uniform_array_size);
-
- for (unsigned j = 0; j < 4; j++)
- stage_prog_data->param[this->uniforms * 4 + j] =
- &values[GET_SWZ(slots[i].swizzle, j)];
-
- this->uniform_vector_size[this->uniforms] =
- (ir->type->is_scalar() || ir->type->is_vector() ||
- ir->type->is_matrix() ? ir->type->vector_elements : 4);
-
- this->uniforms++;
- }
-}
-
-dst_reg *
-vec4_visitor::variable_storage(ir_variable *var)
-{
- return (dst_reg *)hash_table_find(this->variable_ht, var);
-}
-
-void
-vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir,
- enum brw_predicate *predicate)
-{
- ir_expression *expr = ir->as_expression();
-
- *predicate = BRW_PREDICATE_NORMAL;
-
- if (expr && expr->operation != ir_binop_ubo_load) {
- src_reg op[3];
- vec4_instruction *inst;
-
- assert(expr->get_num_operands() <= 3);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- expr->operands[i]->accept(this);
- op[i] = this->result;
-
- resolve_ud_negate(&op[i]);
- }
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- inst = emit(AND(dst_null_d(), op[0], src_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_Z;
- break;
-
- case ir_binop_logic_xor:
- if (devinfo->gen <= 5) {
- src_reg temp = src_reg(this, ir->type);
- emit(XOR(dst_reg(temp), op[0], op[1]));
- inst = emit(AND(dst_null_d(), temp, src_reg(1)));
- } else {
- inst = emit(XOR(dst_null_d(), op[0], op[1]));
- }
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_binop_logic_or:
- if (devinfo->gen <= 5) {
- src_reg temp = src_reg(this, ir->type);
- emit(OR(dst_reg(temp), op[0], op[1]));
- inst = emit(AND(dst_null_d(), temp, src_reg(1)));
- } else {
- inst = emit(OR(dst_null_d(), op[0], op[1]));
- }
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_binop_logic_and:
- if (devinfo->gen <= 5) {
- src_reg temp = src_reg(this, ir->type);
- emit(AND(dst_reg(temp), op[0], op[1]));
- inst = emit(AND(dst_null_d(), temp, src_reg(1)));
- } else {
- inst = emit(AND(dst_null_d(), op[0], op[1]));
- }
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
-
- case ir_unop_f2b:
- if (devinfo->gen >= 6) {
- emit(CMP(dst_null_d(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
- } else {
- inst = emit(MOV(dst_null_f(), op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_unop_i2b:
- if (devinfo->gen >= 6) {
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- } else {
- inst = emit(MOV(dst_null_d(), op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- }
- break;
-
- case ir_binop_all_equal:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(expr->operands[0], &op[0]);
- resolve_bool_comparison(expr->operands[1], &op[1]);
- }
- inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
- *predicate = BRW_PREDICATE_ALIGN16_ALL4H;
- break;
-
- case ir_binop_any_nequal:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(expr->operands[0], &op[0]);
- resolve_bool_comparison(expr->operands[1], &op[1]);
- }
- inst = emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
- *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
- break;
-
- case ir_unop_any:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(expr->operands[0], &op[0]);
- }
- inst = emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- *predicate = BRW_PREDICATE_ALIGN16_ANY4H;
- break;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_nequal:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(expr->operands[0], &op[0]);
- resolve_bool_comparison(expr->operands[1], &op[1]);
- }
- emit(CMP(dst_null_d(), op[0], op[1],
- brw_conditional_for_comparison(expr->operation)));
- break;
-
- case ir_triop_csel: {
- /* Expand the boolean condition into the flag register. */
- inst = emit(MOV(dst_null_d(), op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
- /* Select which boolean to return. */
- dst_reg temp(this, expr->operands[1]->type);
- inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- /* Expand the result to a condition code. */
- inst = emit(MOV(dst_null_d(), src_reg(temp)));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
- break;
- }
-
- default:
- unreachable("not reached");
- }
- return;
- }
-
- ir->accept(this);
-
- resolve_ud_negate(&this->result);
-
- vec4_instruction *inst = emit(AND(dst_null_d(), this->result, src_reg(1)));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-}
-
-/**
- * Emit a gen6 IF statement with the comparison folded into the IF
- * instruction.
- */
-void
-vec4_visitor::emit_if_gen6(ir_if *ir)
-{
- ir_expression *expr = ir->condition->as_expression();
-
- if (expr && expr->operation != ir_binop_ubo_load) {
- src_reg op[3];
- dst_reg temp;
-
- assert(expr->get_num_operands() <= 3);
- for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
- expr->operands[i]->accept(this);
- op[i] = this->result;
- }
-
- switch (expr->operation) {
- case ir_unop_logic_not:
- emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_Z));
- return;
-
- case ir_binop_logic_xor:
- emit(IF(op[0], op[1], BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_logic_or:
- temp = dst_reg(this, glsl_type::bool_type);
- emit(OR(temp, op[0], op[1]));
- emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_logic_and:
- temp = dst_reg(this, glsl_type::bool_type);
- emit(AND(temp, op[0], op[1]));
- emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_unop_f2b:
- emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_unop_i2b:
- emit(IF(op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- return;
-
- case ir_binop_greater:
- case ir_binop_gequal:
- case ir_binop_less:
- case ir_binop_lequal:
- case ir_binop_equal:
- case ir_binop_nequal:
- emit(IF(op[0], op[1],
- brw_conditional_for_comparison(expr->operation)));
- return;
-
- case ir_binop_all_equal:
- emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
- emit(IF(BRW_PREDICATE_ALIGN16_ALL4H));
- return;
-
- case ir_binop_any_nequal:
- emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
- emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
- return;
-
- case ir_unop_any:
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- emit(IF(BRW_PREDICATE_ALIGN16_ANY4H));
- return;
-
- case ir_triop_csel: {
- /* Expand the boolean condition into the flag register. */
- vec4_instruction *inst = emit(MOV(dst_null_d(), op[0]));
- inst->conditional_mod = BRW_CONDITIONAL_NZ;
-
- /* Select which boolean to return. */
- dst_reg temp(this, expr->operands[1]->type);
- inst = emit(BRW_OPCODE_SEL, temp, op[1], op[2]);
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- emit(IF(src_reg(temp), src_reg(0), BRW_CONDITIONAL_NZ));
- return;
- }
-
- default:
- unreachable("not reached");
- }
- return;
- }
-
- ir->condition->accept(this);
-
- emit(IF(this->result, src_reg(0), BRW_CONDITIONAL_NZ));
-}
-
-void
-vec4_visitor::visit(ir_variable *ir)
-{
- dst_reg *reg = NULL;
-
- if (variable_storage(ir))
- return;
-
- switch (ir->data.mode) {
- case ir_var_shader_in:
- assert(ir->data.location != -1);
- reg = new(mem_ctx) dst_reg(ATTR, ir->data.location);
- break;
-
- case ir_var_shader_out:
- assert(ir->data.location != -1);
- reg = new(mem_ctx) dst_reg(this, ir->type);
-
- for (int i = 0; i < type_size_vec4(ir->type); i++) {
- output_reg[ir->data.location + i] = *reg;
- output_reg[ir->data.location + i].reg_offset = i;
- output_reg_annotation[ir->data.location + i] = ir->name;
- }
- break;
-
- case ir_var_auto:
- case ir_var_temporary:
- reg = new(mem_ctx) dst_reg(this, ir->type);
- break;
-
- case ir_var_uniform:
- case ir_var_shader_storage:
- reg = new(this->mem_ctx) dst_reg(UNIFORM, this->uniforms);
-
- /* Thanks to the lower_ubo_reference pass, we will see only
- * ir_binop_{ubo,ssbo}_load expressions and not ir_dereference_variable
- * for UBO/SSBO variables, so no need for them to be in variable_ht.
- *
- * Some uniforms, such as samplers and atomic counters, have no actual
- * storage, so we should ignore them.
- */
- if (ir->is_in_buffer_block() || type_size_vec4(ir->type) == 0)
- return;
-
- /* Track how big the whole uniform variable is, in case we need to put a
- * copy of its data into pull constants for array access.
- */
- assert(this->uniforms < uniform_array_size);
- this->uniform_size[this->uniforms] = type_size_vec4(ir->type);
-
- if (!strncmp(ir->name, "gl_", 3)) {
- setup_builtin_uniform_values(ir);
- } else {
- setup_uniform_values(ir);
- }
- break;
-
- case ir_var_system_value:
- reg = make_reg_for_system_value(ir->data.location, ir->type);
- break;
-
- default:
- unreachable("not reached");
- }
-
- reg->type = brw_type_for_base_type(ir->type);
- hash_table_insert(this->variable_ht, reg, ir);
-}
-
-void
-vec4_visitor::visit(ir_loop *ir)
-{
- /* We don't want debugging output to print the whole body of the
- * loop as the annotation.
- */
- this->base_ir = NULL;
-
- emit(BRW_OPCODE_DO);
-
- visit_instructions(&ir->body_instructions);
-
- emit(BRW_OPCODE_WHILE);
-}
-
-void
-vec4_visitor::visit(ir_loop_jump *ir)
-{
- switch (ir->mode) {
- case ir_loop_jump::jump_break:
- emit(BRW_OPCODE_BREAK);
- break;
- case ir_loop_jump::jump_continue:
- emit(BRW_OPCODE_CONTINUE);
- break;
- }
-}
-
-
-void
-vec4_visitor::visit(ir_function_signature *)
-{
- unreachable("not reached");
-}
-
-void
-vec4_visitor::visit(ir_function *ir)
-{
- /* Ignore function bodies other than main() -- we shouldn't see calls to
- * them since they should all be inlined.
- */
- if (strcmp(ir->name, "main") == 0) {
- const ir_function_signature *sig;
- exec_list empty;
-
- sig = ir->matching_signature(NULL, &empty, false);
-
- assert(sig);
-
- visit_instructions(&sig->body);
- }
-}
-
-bool
-vec4_visitor::try_emit_mad(ir_expression *ir)
-{
- /* 3-src instructions were introduced in gen6. */
- if (devinfo->gen < 6)
- return false;
-
- /* MAD can only handle floating-point data. */
- if (ir->type->base_type != GLSL_TYPE_FLOAT)
- return false;
-
- ir_rvalue *nonmul;
- ir_expression *mul;
- bool mul_negate, mul_abs;
-
- for (int i = 0; i < 2; i++) {
- mul_negate = false;
- mul_abs = false;
-
- mul = ir->operands[i]->as_expression();
- nonmul = ir->operands[1 - i];
-
- if (mul && mul->operation == ir_unop_abs) {
- mul = mul->operands[0]->as_expression();
- mul_abs = true;
- } else if (mul && mul->operation == ir_unop_neg) {
- mul = mul->operands[0]->as_expression();
- mul_negate = true;
- }
-
- if (mul && mul->operation == ir_binop_mul)
- break;
- }
-
- if (!mul || mul->operation != ir_binop_mul)
- return false;
-
- nonmul->accept(this);
- src_reg src0 = fix_3src_operand(this->result);
-
- mul->operands[0]->accept(this);
- src_reg src1 = fix_3src_operand(this->result);
- src1.negate ^= mul_negate;
- src1.abs = mul_abs;
- if (mul_abs)
- src1.negate = false;
-
- mul->operands[1]->accept(this);
- src_reg src2 = fix_3src_operand(this->result);
- src2.abs = mul_abs;
- if (mul_abs)
- src2.negate = false;
-
- this->result = src_reg(this, ir->type);
- emit(BRW_OPCODE_MAD, dst_reg(this->result), src0, src1, src2);
-
- return true;
-}
-
-bool
-vec4_visitor::try_emit_b2f_of_compare(ir_expression *ir)
-{
- /* This optimization relies on CMP setting the destination to 0 when
- * false. Early hardware only sets the least significant bit, and
- * leaves the other bits undefined. So we can't use it.
- */
- if (devinfo->gen < 6)
- return false;
-
- ir_expression *const cmp = ir->operands[0]->as_expression();
-
- if (cmp == NULL)
- return false;
-
- switch (cmp->operation) {
- case ir_binop_less:
- case ir_binop_greater:
- case ir_binop_lequal:
- case ir_binop_gequal:
- case ir_binop_equal:
- case ir_binop_nequal:
- break;
-
- default:
- return false;
- }
-
- cmp->operands[0]->accept(this);
- const src_reg cmp_src0 = this->result;
-
- cmp->operands[1]->accept(this);
- const src_reg cmp_src1 = this->result;
-
- this->result = src_reg(this, ir->type);
-
- emit(CMP(dst_reg(this->result), cmp_src0, cmp_src1,
- brw_conditional_for_comparison(cmp->operation)));
-
- /* If the comparison is false, this->result will just happen to be zero.
- */
- vec4_instruction *const inst = emit(BRW_OPCODE_SEL, dst_reg(this->result),
- this->result, src_reg(1.0f));
- inst->predicate = BRW_PREDICATE_NORMAL;
- inst->predicate_inverse = true;
-
- return true;
-}
-
-vec4_instruction *
-vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
- src_reg src0, src_reg src1)
-{
- vec4_instruction *inst;
-
- if (devinfo->gen >= 6) {
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->conditional_mod = conditionalmod;
- } else {
- emit(CMP(dst, src0, src1, conditionalmod));
-
- inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
-
- return inst;
-}
-
-vec4_instruction *
-vec4_visitor::emit_lrp(const dst_reg &dst,
- const src_reg &x, const src_reg &y, const src_reg &a)
-{
- if (devinfo->gen >= 6) {
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
- fix_3src_operand(x)));
- } else {
- /* Earlier generations don't support three source operations, so we
- * need to emit x*(1-a) + y*a.
- */
- dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
- dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
- dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
- y_times_a.writemask = dst.writemask;
- one_minus_a.writemask = dst.writemask;
- x_times_one_minus_a.writemask = dst.writemask;
-
- emit(MUL(y_times_a, y, a));
- emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
- emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
- return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
- }
-}
-
-/**
- * Emits the instructions needed to perform a pull constant load. before_block
- * and before_inst can be NULL in which case the instruction will be appended
- * to the end of the instruction list.
- */
-void
-vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
- src_reg surf_index,
- src_reg offset_reg,
- bblock_t *before_block,
- vec4_instruction *before_inst)
-{
- assert((before_inst == NULL && before_block == NULL) ||
- (before_inst && before_block));
-
- vec4_instruction *pull;
-
- if (devinfo->gen >= 9) {
- /* Gen9+ needs a message header in order to use SIMD4x2 mode */
- src_reg header(this, glsl_type::uvec4_type, 2);
-
- pull = new(mem_ctx)
- vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
- dst_reg(header));
-
- if (before_inst)
- emit_before(before_block, before_inst, pull);
- else
- emit(pull);
-
- dst_reg index_reg = retype(offset(dst_reg(header), 1),
- offset_reg.type);
- pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
-
- if (before_inst)
- emit_before(before_block, before_inst, pull);
- else
- emit(pull);
-
- pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
- dst,
- surf_index,
- header);
- pull->mlen = 2;
- pull->header_size = 1;
- } else if (devinfo->gen >= 7) {
- dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
-
- grf_offset.type = offset_reg.type;
-
- pull = MOV(grf_offset, offset_reg);
-
- if (before_inst)
- emit_before(before_block, before_inst, pull);
- else
- emit(pull);
-
- pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
- dst,
- surf_index,
- src_reg(grf_offset));
- pull->mlen = 1;
- } else {
- pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
- dst,
- surf_index,
- offset_reg);
- pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
- pull->mlen = 1;
- }
-
- if (before_inst)
- emit_before(before_block, before_inst, pull);
- else
- emit(pull);
-}
-
-src_reg
-vec4_visitor::emit_uniformize(const src_reg &src)
-{
- const src_reg chan_index(this, glsl_type::uint_type);
- const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
- src.type);
-
- emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
- ->force_writemask_all = true;
- emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
- ->force_writemask_all = true;
-
- return src_reg(dst);
-}
-
-void
-vec4_visitor::visit(ir_expression *ir)
-{
- unsigned int operand;
- src_reg op[ARRAY_SIZE(ir->operands)];
- vec4_instruction *inst;
-
- if (ir->operation == ir_binop_add) {
- if (try_emit_mad(ir))
- return;
- }
-
- if (ir->operation == ir_unop_b2f) {
- if (try_emit_b2f_of_compare(ir))
- return;
- }
-
- /* Storage for our result. Ideally for an assignment we'd be using
- * the actual storage for the result here, instead.
- */
- dst_reg result_dst(this, ir->type);
- src_reg result_src(result_dst);
-
- if (ir->operation == ir_triop_csel) {
- ir->operands[1]->accept(this);
- op[1] = this->result;
- ir->operands[2]->accept(this);
- op[2] = this->result;
-
- enum brw_predicate predicate;
- emit_bool_to_cond_code(ir->operands[0], &predicate);
- inst = emit(BRW_OPCODE_SEL, result_dst, op[1], op[2]);
- inst->predicate = predicate;
- this->result = result_src;
- return;
- }
-
- for (operand = 0; operand < ir->get_num_operands(); operand++) {
- this->result.file = BAD_FILE;
- ir->operands[operand]->accept(this);
- if (this->result.file == BAD_FILE) {
- fprintf(stderr, "Failed to get tree for expression operand:\n");
- ir->operands[operand]->fprint(stderr);
- exit(1);
- }
- op[operand] = this->result;
-
- /* Matrix expression operands should have been broken down to vector
- * operations already.
- */
- assert(!ir->operands[operand]->type->is_matrix());
- }
-
- /* If nothing special happens, this is the result. */
- this->result = result_src;
-
- switch (ir->operation) {
- case ir_unop_logic_not:
- emit(NOT(result_dst, op[0]));
- break;
- case ir_unop_neg:
- op[0].negate = !op[0].negate;
- emit(MOV(result_dst, op[0]));
- break;
- case ir_unop_abs:
- op[0].abs = true;
- op[0].negate = false;
- emit(MOV(result_dst, op[0]));
- break;
-
- case ir_unop_sign:
- if (ir->type->is_float()) {
- /* AND(val, 0x80000000) gives the sign bit.
- *
- * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
- * zero.
- */
- emit(CMP(dst_null_f(), op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
-
- op[0].type = BRW_REGISTER_TYPE_UD;
- result_dst.type = BRW_REGISTER_TYPE_UD;
- emit(AND(result_dst, op[0], src_reg(0x80000000u)));
-
- inst = emit(OR(result_dst, src_reg(result_dst), src_reg(0x3f800000u)));
- inst->predicate = BRW_PREDICATE_NORMAL;
-
- this->result.type = BRW_REGISTER_TYPE_F;
- } else {
- /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
- * -> non-negative val generates 0x00000000.
- * Predicated OR sets 1 if val is positive.
- */
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_G));
-
- emit(ASR(result_dst, op[0], src_reg(31)));
-
- inst = emit(OR(result_dst, src_reg(result_dst), src_reg(1)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- }
- break;
-
- case ir_unop_rcp:
- emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
- break;
-
- case ir_unop_exp2:
- emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
- break;
- case ir_unop_log2:
- emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
- break;
- case ir_unop_exp:
- case ir_unop_log:
- unreachable("not reached: should be handled by ir_explog_to_explog2");
- case ir_unop_sin:
- emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
- break;
- case ir_unop_cos:
- emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
- break;
-
- case ir_unop_dFdx:
- case ir_unop_dFdx_coarse:
- case ir_unop_dFdx_fine:
- case ir_unop_dFdy:
- case ir_unop_dFdy_coarse:
- case ir_unop_dFdy_fine:
- unreachable("derivatives not valid in vertex shader");
-
- case ir_unop_bitfield_reverse:
- emit(BFREV(result_dst, op[0]));
- break;
- case ir_unop_bit_count:
- emit(CBIT(result_dst, op[0]));
- break;
- case ir_unop_find_msb: {
- src_reg temp = src_reg(this, glsl_type::uint_type);
-
- inst = emit(FBH(dst_reg(temp), op[0]));
- inst->dst.writemask = WRITEMASK_XYZW;
-
- /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
- * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
- * subtract the result from 31 to convert the MSB count into an LSB count.
- */
-
- /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
- temp.swizzle = BRW_SWIZZLE_NOOP;
- emit(MOV(result_dst, temp));
-
- src_reg src_tmp = src_reg(result_dst);
- emit(CMP(dst_null_d(), src_tmp, src_reg(-1), BRW_CONDITIONAL_NZ));
-
- src_tmp.negate = true;
- inst = emit(ADD(result_dst, src_tmp, src_reg(31)));
- inst->predicate = BRW_PREDICATE_NORMAL;
- break;
- }
- case ir_unop_find_lsb:
- emit(FBL(result_dst, op[0]));
- break;
- case ir_unop_saturate:
- inst = emit(MOV(result_dst, op[0]));
- inst->saturate = true;
- break;
-
- case ir_unop_noise:
- unreachable("not reached: should be handled by lower_noise");
-
- case ir_unop_subroutine_to_int:
- emit(MOV(result_dst, op[0]));
- break;
-
- case ir_unop_ssbo_unsized_array_length:
- unreachable("not reached: should be handled by lower_ubo_reference");
- break;
-
- case ir_binop_add:
- emit(ADD(result_dst, op[0], op[1]));
- break;
- case ir_binop_sub:
- unreachable("not reached: should be handled by ir_sub_to_add_neg");
-
- case ir_binop_mul:
- if (devinfo->gen < 8 && ir->type->is_integer()) {
- /* For integer multiplication, the MUL uses the low 16 bits of one of
- * the operands (src0 through SNB, src1 on IVB and later). The MACH
- * accumulates in the contribution of the upper 16 bits of that
- * operand. If we can determine that one of the args is in the low
- * 16 bits, though, we can just emit a single MUL.
- */
- if (ir->operands[0]->is_uint16_constant()) {
- if (devinfo->gen < 7)
- emit(MUL(result_dst, op[0], op[1]));
- else
- emit(MUL(result_dst, op[1], op[0]));
- } else if (ir->operands[1]->is_uint16_constant()) {
- if (devinfo->gen < 7)
- emit(MUL(result_dst, op[1], op[0]));
- else
- emit(MUL(result_dst, op[0], op[1]));
- } else {
- struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(dst_null_d(), op[0], op[1]));
- emit(MOV(result_dst, src_reg(acc)));
- }
- } else {
- emit(MUL(result_dst, op[0], op[1]));
- }
- break;
- case ir_binop_imul_high: {
- struct brw_reg acc = retype(brw_acc_reg(8), result_dst.type);
-
- emit(MUL(acc, op[0], op[1]));
- emit(MACH(result_dst, op[0], op[1]));
- break;
- }
- case ir_binop_div:
- /* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
- assert(ir->type->is_integer());
- emit_math(SHADER_OPCODE_INT_QUOTIENT, result_dst, op[0], op[1]);
- break;
-
- case ir_binop_carry:
- unreachable("Should have been lowered by carry_to_arith().");
-
- case ir_binop_borrow:
- unreachable("Should have been lowered by borrow_to_arith().");
-
- case ir_binop_mod:
- /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
- assert(ir->type->is_integer());
- emit_math(SHADER_OPCODE_INT_REMAINDER, result_dst, op[0], op[1]);
- break;
-
- case ir_binop_less:
- case ir_binop_greater:
- case ir_binop_lequal:
- case ir_binop_gequal:
- case ir_binop_equal:
- case ir_binop_nequal: {
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(ir->operands[0], &op[0]);
- resolve_bool_comparison(ir->operands[1], &op[1]);
- }
- emit(CMP(result_dst, op[0], op[1],
- brw_conditional_for_comparison(ir->operation)));
- break;
- }
-
- case ir_binop_all_equal:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(ir->operands[0], &op[0]);
- resolve_bool_comparison(ir->operands[1], &op[1]);
- }
-
- /* "==" operator producing a scalar boolean. */
- if (ir->operands[0]->type->is_vector() ||
- ir->operands[1]->type->is_vector()) {
- emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_Z));
- emit(MOV(result_dst, src_reg(0)));
- inst = emit(MOV(result_dst, src_reg(~0)));
- inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
- } else {
- emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_Z));
- }
- break;
- case ir_binop_any_nequal:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(ir->operands[0], &op[0]);
- resolve_bool_comparison(ir->operands[1], &op[1]);
- }
-
- /* "!=" operator producing a scalar boolean. */
- if (ir->operands[0]->type->is_vector() ||
- ir->operands[1]->type->is_vector()) {
- emit(CMP(dst_null_d(), op[0], op[1], BRW_CONDITIONAL_NZ));
-
- emit(MOV(result_dst, src_reg(0)));
- inst = emit(MOV(result_dst, src_reg(~0)));
- inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
- } else {
- emit(CMP(result_dst, op[0], op[1], BRW_CONDITIONAL_NZ));
- }
- break;
-
- case ir_unop_any:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(ir->operands[0], &op[0]);
- }
- emit(CMP(dst_null_d(), op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- emit(MOV(result_dst, src_reg(0)));
-
- inst = emit(MOV(result_dst, src_reg(~0)));
- inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
- break;
-
- case ir_binop_logic_xor:
- emit(XOR(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_logic_or:
- emit(OR(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_logic_and:
- emit(AND(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_dot:
- assert(ir->operands[0]->type->is_vector());
- assert(ir->operands[0]->type == ir->operands[1]->type);
- emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
- break;
-
- case ir_unop_sqrt:
- emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
- break;
- case ir_unop_rsq:
- emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
- break;
-
- case ir_unop_bitcast_i2f:
- case ir_unop_bitcast_u2f:
- this->result = op[0];
- this->result.type = BRW_REGISTER_TYPE_F;
- break;
-
- case ir_unop_bitcast_f2i:
- this->result = op[0];
- this->result.type = BRW_REGISTER_TYPE_D;
- break;
-
- case ir_unop_bitcast_f2u:
- this->result = op[0];
- this->result.type = BRW_REGISTER_TYPE_UD;
- break;
-
- case ir_unop_i2f:
- case ir_unop_i2u:
- case ir_unop_u2i:
- case ir_unop_u2f:
- case ir_unop_f2i:
- case ir_unop_f2u:
- emit(MOV(result_dst, op[0]));
- break;
- case ir_unop_b2i:
- case ir_unop_b2f:
- if (devinfo->gen <= 5) {
- resolve_bool_comparison(ir->operands[0], &op[0]);
- }
- emit(MOV(result_dst, negate(op[0])));
- break;
- case ir_unop_f2b:
- emit(CMP(result_dst, op[0], src_reg(0.0f), BRW_CONDITIONAL_NZ));
- break;
- case ir_unop_i2b:
- emit(CMP(result_dst, op[0], src_reg(0), BRW_CONDITIONAL_NZ));
- break;
-
- case ir_unop_trunc:
- emit(RNDZ(result_dst, op[0]));
- break;
- case ir_unop_ceil: {
- src_reg tmp = src_reg(this, ir->type);
- op[0].negate = !op[0].negate;
- emit(RNDD(dst_reg(tmp), op[0]));
- tmp.negate = true;
- emit(MOV(result_dst, tmp));
- }
- break;
- case ir_unop_floor:
- inst = emit(RNDD(result_dst, op[0]));
- break;
- case ir_unop_fract:
- inst = emit(FRC(result_dst, op[0]));
- break;
- case ir_unop_round_even:
- emit(RNDE(result_dst, op[0]));
- break;
-
- case ir_unop_get_buffer_size:
- unreachable("not reached: not implemented");
- break;
-
- case ir_binop_min:
- emit_minmax(BRW_CONDITIONAL_L, result_dst, op[0], op[1]);
- break;
- case ir_binop_max:
- emit_minmax(BRW_CONDITIONAL_GE, result_dst, op[0], op[1]);
- break;
-
- case ir_binop_pow:
- emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
- break;
-
- case ir_unop_bit_not:
- inst = emit(NOT(result_dst, op[0]));
- break;
- case ir_binop_bit_and:
- inst = emit(AND(result_dst, op[0], op[1]));
- break;
- case ir_binop_bit_xor:
- inst = emit(XOR(result_dst, op[0], op[1]));
- break;
- case ir_binop_bit_or:
- inst = emit(OR(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_lshift:
- inst = emit(SHL(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_rshift:
- if (ir->type->base_type == GLSL_TYPE_INT)
- inst = emit(ASR(result_dst, op[0], op[1]));
- else
- inst = emit(SHR(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_bfm:
- emit(BFI1(result_dst, op[0], op[1]));
- break;
-
- case ir_binop_ubo_load: {
- ir_constant *const_uniform_block = ir->operands[0]->as_constant();
- ir_constant *const_offset_ir = ir->operands[1]->as_constant();
- unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
- src_reg offset;
-
- /* Now, load the vector from that offset. */
- assert(ir->type->is_vector() || ir->type->is_scalar());
-
- src_reg packed_consts = src_reg(this, glsl_type::vec4_type);
- packed_consts.type = result.type;
- src_reg surf_index;
-
- if (const_uniform_block) {
- /* The block index is a constant, so just emit the binding table entry
- * as an immediate.
- */
- surf_index = src_reg(prog_data->base.binding_table.ubo_start +
- const_uniform_block->value.u[0]);
- } else {
- /* The block index is not a constant. Evaluate the index expression
- * per-channel and add the base UBO index; we have to select a value
- * from any live channel.
- */
- surf_index = src_reg(this, glsl_type::uint_type);
- emit(ADD(dst_reg(surf_index), op[0],
- src_reg(prog_data->base.binding_table.ubo_start)));
- surf_index = emit_uniformize(surf_index);
-
- /* Assume this may touch any UBO. It would be nice to provide
- * a tighter bound, but the array information is already lowered away.
- */
- brw_mark_surface_used(&prog_data->base,
- prog_data->base.binding_table.ubo_start +
- shader_prog->NumBufferInterfaceBlocks - 1);
- }
-
- if (const_offset_ir) {
- if (devinfo->gen >= 8) {
- /* Store the offset in a GRF so we can send-from-GRF. */
- offset = src_reg(this, glsl_type::int_type);
- emit(MOV(dst_reg(offset), src_reg(const_offset / 16)));
- } else {
- /* Immediates are fine on older generations since they'll be moved
- * to a (potentially fake) MRF at the generator level.
- */
- offset = src_reg(const_offset / 16);
- }
- } else {
- offset = src_reg(this, glsl_type::uint_type);
- emit(SHR(dst_reg(offset), op[1], src_reg(4u)));
- }
-
- emit_pull_constant_load_reg(dst_reg(packed_consts),
- surf_index,
- offset,
- NULL, NULL /* before_block/inst */);
-
- packed_consts.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
- packed_consts.swizzle += BRW_SWIZZLE4(const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4,
- const_offset % 16 / 4);
-
- /* UBO bools are any nonzero int. We need to convert them to 0/~0. */
- if (ir->type->base_type == GLSL_TYPE_BOOL) {
- emit(CMP(result_dst, packed_consts, src_reg(0u),
- BRW_CONDITIONAL_NZ));
- } else {
- emit(MOV(result_dst, packed_consts));
- }
- break;
- }
-
- case ir_binop_vector_extract:
- unreachable("should have been lowered by vec_index_to_cond_assign");
-
- case ir_triop_fma:
- op[0] = fix_3src_operand(op[0]);
- op[1] = fix_3src_operand(op[1]);
- op[2] = fix_3src_operand(op[2]);
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- emit(MAD(result_dst, op[2], op[1], op[0]));
- break;
-
- case ir_triop_lrp:
- emit_lrp(result_dst, op[0], op[1], op[2]);
- break;
-
- case ir_triop_csel:
- unreachable("already handled above");
- break;
-
- case ir_triop_bfi:
- op[0] = fix_3src_operand(op[0]);
- op[1] = fix_3src_operand(op[1]);
- op[2] = fix_3src_operand(op[2]);
- emit(BFI2(result_dst, op[0], op[1], op[2]));
- break;
-
- case ir_triop_bitfield_extract:
- op[0] = fix_3src_operand(op[0]);
- op[1] = fix_3src_operand(op[1]);
- op[2] = fix_3src_operand(op[2]);
- /* Note that the instruction's argument order is reversed from GLSL
- * and the IR.
- */
- emit(BFE(result_dst, op[2], op[1], op[0]));
- break;
-
- case ir_triop_vector_insert:
- unreachable("should have been lowered by lower_vector_insert");
-
- case ir_quadop_bitfield_insert:
- unreachable("not reached: should be handled by "
- "bitfield_insert_to_bfm_bfi\n");
-
- case ir_quadop_vector:
- unreachable("not reached: should be handled by lower_quadop_vector");
-
- case ir_unop_pack_half_2x16:
- emit_pack_half_2x16(result_dst, op[0]);
- break;
- case ir_unop_unpack_half_2x16:
- emit_unpack_half_2x16(result_dst, op[0]);
- break;
- case ir_unop_unpack_unorm_4x8:
- emit_unpack_unorm_4x8(result_dst, op[0]);
- break;
- case ir_unop_unpack_snorm_4x8:
- emit_unpack_snorm_4x8(result_dst, op[0]);
- break;
- case ir_unop_pack_unorm_4x8:
- emit_pack_unorm_4x8(result_dst, op[0]);
- break;
- case ir_unop_pack_snorm_4x8:
- emit_pack_snorm_4x8(result_dst, op[0]);
- break;
- case ir_unop_pack_snorm_2x16:
- case ir_unop_pack_unorm_2x16:
- case ir_unop_unpack_snorm_2x16:
- case ir_unop_unpack_unorm_2x16:
- unreachable("not reached: should be handled by lower_packing_builtins");
- case ir_unop_unpack_half_2x16_split_x:
- case ir_unop_unpack_half_2x16_split_y:
- case ir_binop_pack_half_2x16_split:
- case ir_unop_interpolate_at_centroid:
- case ir_binop_interpolate_at_sample:
- case ir_binop_interpolate_at_offset:
- unreachable("not reached: should not occur in vertex shader");
- case ir_binop_ldexp:
- unreachable("not reached: should be handled by ldexp_to_arith()");
- case ir_unop_d2f:
- case ir_unop_f2d:
- case ir_unop_d2i:
- case ir_unop_i2d:
- case ir_unop_d2u:
- case ir_unop_u2d:
- case ir_unop_d2b:
- case ir_unop_pack_double_2x32:
- case ir_unop_unpack_double_2x32:
- case ir_unop_frexp_sig:
- case ir_unop_frexp_exp:
- unreachable("fp64 todo");
- }
-}
-
-
-void
-vec4_visitor::visit(ir_swizzle *ir)
-{
- /* Note that this is only swizzles in expressions, not those on the left
- * hand side of an assignment, which do write masking. See ir_assignment
- * for that.
- */
- const unsigned swz = brw_compose_swizzle(
- brw_swizzle_for_size(ir->type->vector_elements),
- BRW_SWIZZLE4(ir->mask.x, ir->mask.y, ir->mask.z, ir->mask.w));
-
- ir->val->accept(this);
- this->result = swizzle(this->result, swz);
-}
-
-void
-vec4_visitor::visit(ir_dereference_variable *ir)
-{
- const struct glsl_type *type = ir->type;
- dst_reg *reg = variable_storage(ir->var);
-
- if (!reg) {
- fail("Failed to find variable storage for %s\n", ir->var->name);
- this->result = src_reg(brw_null_reg());
- return;
- }
-
- this->result = src_reg(*reg);
-
- /* System values get their swizzle from the dst_reg writemask */
- if (ir->var->data.mode == ir_var_system_value)
- return;
-
- if (type->is_scalar() || type->is_vector() || type->is_matrix())
- this->result.swizzle = brw_swizzle_for_size(type->vector_elements);
-}
-
-
-int
-vec4_visitor::compute_array_stride(ir_dereference_array *ir)
-{
- /* Under normal circumstances array elements are stored consecutively, so
- * the stride is equal to the size of the array element.
- */
- return type_size_vec4(ir->type);
-}
-
-
-void
-vec4_visitor::visit(ir_dereference_array *ir)
-{
- ir_constant *constant_index;
- src_reg src;
- int array_stride = compute_array_stride(ir);
-
- constant_index = ir->array_index->constant_expression_value();
-
- ir->array->accept(this);
- src = this->result;
-
- if (constant_index) {
- src.reg_offset += constant_index->value.i[0] * array_stride;
- } else {
- /* Variable index array dereference. It eats the "vec4" of the
- * base of the array and an index that offsets the Mesa register
- * index.
- */
- ir->array_index->accept(this);
-
- src_reg index_reg;
-
- if (array_stride == 1) {
- index_reg = this->result;
- } else {
- index_reg = src_reg(this, glsl_type::int_type);
-
- emit(MUL(dst_reg(index_reg), this->result, src_reg(array_stride)));
- }
-
- if (src.reladdr) {
- src_reg temp = src_reg(this, glsl_type::int_type);
-
- emit(ADD(dst_reg(temp), *src.reladdr, index_reg));
-
- index_reg = temp;
- }
-
- src.reladdr = ralloc(mem_ctx, src_reg);
- memcpy(src.reladdr, &index_reg, sizeof(index_reg));
- }
-
- /* If the type is smaller than a vec4, replicate the last channel out. */
- if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
- src.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
- else
- src.swizzle = BRW_SWIZZLE_NOOP;
- src.type = brw_type_for_base_type(ir->type);
-
- this->result = src;
-}
-
-void
-vec4_visitor::visit(ir_dereference_record *ir)
-{
- unsigned int i;
- const glsl_type *struct_type = ir->record->type;
- int offset = 0;
-
- ir->record->accept(this);
-
- for (i = 0; i < struct_type->length; i++) {
- if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
- break;
- offset += type_size_vec4(struct_type->fields.structure[i].type);
- }
-
- /* If the type is smaller than a vec4, replicate the last channel out. */
- if (ir->type->is_scalar() || ir->type->is_vector() || ir->type->is_matrix())
- this->result.swizzle = brw_swizzle_for_size(ir->type->vector_elements);
- else
- this->result.swizzle = BRW_SWIZZLE_NOOP;
- this->result.type = brw_type_for_base_type(ir->type);
-
- this->result.reg_offset += offset;
-}
-
-/**
- * We want to be careful in assignment setup to hit the actual storage
- * instead of potentially using a temporary like we might with the
- * ir_dereference handler.
- */
-static dst_reg
-get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
-{
- /* The LHS must be a dereference. If the LHS is a variable indexed array
- * access of a vector, it must be separated into a series conditional moves
- * before reaching this point (see ir_vec_index_to_cond_assign).
- */
- assert(ir->as_dereference());
- ir_dereference_array *deref_array = ir->as_dereference_array();
- if (deref_array) {
- assert(!deref_array->array->type->is_vector());
- }
-
- /* Use the rvalue deref handler for the most part. We'll ignore
- * swizzles in it and write swizzles using writemask, though.
- */
- ir->accept(v);
- return dst_reg(v->result);
-}
-
-void
-vec4_visitor::emit_block_move(dst_reg *dst, src_reg *src,
- const struct glsl_type *type,
- enum brw_predicate predicate)
-{
- if (type->base_type == GLSL_TYPE_STRUCT) {
- for (unsigned int i = 0; i < type->length; i++) {
- emit_block_move(dst, src, type->fields.structure[i].type, predicate);
- }
- return;
- }
-
- if (type->is_array()) {
- for (unsigned int i = 0; i < type->length; i++) {
- emit_block_move(dst, src, type->fields.array, predicate);
- }
- return;
- }
-
- if (type->is_matrix()) {
- const struct glsl_type *vec_type;
-
- vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
- type->vector_elements, 1);
-
- for (int i = 0; i < type->matrix_columns; i++) {
- emit_block_move(dst, src, vec_type, predicate);
- }
- return;
- }
-
- assert(type->is_scalar() || type->is_vector());
-
- dst->type = brw_type_for_base_type(type);
- src->type = dst->type;
-
- dst->writemask = (1 << type->vector_elements) - 1;
-
- src->swizzle = brw_swizzle_for_size(type->vector_elements);
-
- vec4_instruction *inst = emit(MOV(*dst, *src));
- inst->predicate = predicate;
-
- dst->reg_offset++;
- src->reg_offset++;
-}
-
-
-/* If the RHS processing resulted in an instruction generating a
- * temporary value, and it would be easy to rewrite the instruction to
- * generate its result right into the LHS instead, do so. This ends
- * up reliably removing instructions where it can be tricky to do so
- * later without real UD chain information.
- */
-bool
-vec4_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
- dst_reg dst,
- src_reg src,
- vec4_instruction *pre_rhs_inst,
- vec4_instruction *last_rhs_inst)
-{
- /* This could be supported, but it would take more smarts. */
- if (ir->condition)
- return false;
-
- if (pre_rhs_inst == last_rhs_inst)
- return false; /* No instructions generated to work with. */
-
- /* Make sure the last instruction generated our source reg. */
- if (src.file != GRF ||
- src.file != last_rhs_inst->dst.file ||
- src.reg != last_rhs_inst->dst.reg ||
- src.reg_offset != last_rhs_inst->dst.reg_offset ||
- src.reladdr ||
- src.abs ||
- src.negate ||
- last_rhs_inst->predicate != BRW_PREDICATE_NONE)
- return false;
-
- /* Check that that last instruction fully initialized the channels
- * we want to use, in the order we want to use them. We could
- * potentially reswizzle the operands of many instructions so that
- * we could handle out of order channels, but don't yet.
- */
-
- for (unsigned i = 0; i < 4; i++) {
- if (dst.writemask & (1 << i)) {
- if (!(last_rhs_inst->dst.writemask & (1 << i)))
- return false;
-
- if (BRW_GET_SWZ(src.swizzle, i) != i)
- return false;
- }
- }
-
- /* Success! Rewrite the instruction. */
- last_rhs_inst->dst.file = dst.file;
- last_rhs_inst->dst.reg = dst.reg;
- last_rhs_inst->dst.reg_offset = dst.reg_offset;
- last_rhs_inst->dst.reladdr = dst.reladdr;
- last_rhs_inst->dst.writemask &= dst.writemask;
-
- return true;
-}
-
-void
-vec4_visitor::visit(ir_assignment *ir)
-{
- dst_reg dst = get_assignment_lhs(ir->lhs, this);
- enum brw_predicate predicate = BRW_PREDICATE_NONE;
-
- if (!ir->lhs->type->is_scalar() &&
- !ir->lhs->type->is_vector()) {
- ir->rhs->accept(this);
- src_reg src = this->result;
-
- if (ir->condition) {
- emit_bool_to_cond_code(ir->condition, &predicate);
- }
-
- /* emit_block_move doesn't account for swizzles in the source register.
- * This should be ok, since the source register is a structure or an
- * array, and those can't be swizzled. But double-check to be sure.
- */
- assert(src.swizzle ==
- (ir->rhs->type->is_matrix()
- ? brw_swizzle_for_size(ir->rhs->type->vector_elements)
- : BRW_SWIZZLE_NOOP));
-
- emit_block_move(&dst, &src, ir->rhs->type, predicate);
- return;
- }
-
- /* Now we're down to just a scalar/vector with writemasks. */
- int i;
-
- vec4_instruction *pre_rhs_inst, *last_rhs_inst;
- pre_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+}
- ir->rhs->accept(this);
+dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+ init();
- last_rhs_inst = (vec4_instruction *)this->instructions.get_tail();
+ this->file = GRF;
+ this->reg = v->alloc.allocate(type_size_vec4(type));
- int swizzles[4];
- int src_chan = 0;
+ if (type->is_array() || type->is_record()) {
+ this->writemask = WRITEMASK_XYZW;
+ } else {
+ this->writemask = (1 << type->vector_elements) - 1;
+ }
- assert(ir->lhs->type->is_vector() ||
- ir->lhs->type->is_scalar());
- dst.writemask = ir->write_mask;
+ this->type = brw_type_for_base_type(type);
+}
- /* Swizzle a small RHS vector into the channels being written.
- *
- * glsl ir treats write_mask as dictating how many channels are
- * present on the RHS while in our instructions we need to make
- * those channels appear in the slots of the vec4 they're written to.
- */
- for (int i = 0; i < 4; i++)
- swizzles[i] = (ir->write_mask & (1 << i) ? src_chan++ : 0);
+vec4_instruction *
+vec4_visitor::emit_minmax(enum brw_conditional_mod conditionalmod, dst_reg dst,
+ src_reg src0, src_reg src1)
+{
+ vec4_instruction *inst;
- src_reg src = swizzle(this->result,
- BRW_SWIZZLE4(swizzles[0], swizzles[1],
- swizzles[2], swizzles[3]));
+ if (devinfo->gen >= 6) {
+ inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+ inst->conditional_mod = conditionalmod;
+ } else {
+ emit(CMP(dst, src0, src1, conditionalmod));
- if (try_rewrite_rhs_to_dst(ir, dst, src, pre_rhs_inst, last_rhs_inst)) {
- return;
+ inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
+ inst->predicate = BRW_PREDICATE_NORMAL;
}
- if (ir->condition) {
- emit_bool_to_cond_code(ir->condition, &predicate);
- }
+ return inst;
+}
- for (i = 0; i < type_size_vec4(ir->lhs->type); i++) {
- vec4_instruction *inst = emit(MOV(dst, src));
- inst->predicate = predicate;
+vec4_instruction *
+vec4_visitor::emit_lrp(const dst_reg &dst,
+ const src_reg &x, const src_reg &y, const src_reg &a)
+{
+ if (devinfo->gen >= 6) {
+ /* Note that the instruction's argument order is reversed from GLSL
+ * and the IR.
+ */
+ return emit(LRP(dst, fix_3src_operand(a), fix_3src_operand(y),
+ fix_3src_operand(x)));
+ } else {
+ /* Earlier generations don't support three source operations, so we
+ * need to emit x*(1-a) + y*a.
+ */
+ dst_reg y_times_a = dst_reg(this, glsl_type::vec4_type);
+ dst_reg one_minus_a = dst_reg(this, glsl_type::vec4_type);
+ dst_reg x_times_one_minus_a = dst_reg(this, glsl_type::vec4_type);
+ y_times_a.writemask = dst.writemask;
+ one_minus_a.writemask = dst.writemask;
+ x_times_one_minus_a.writemask = dst.writemask;
- dst.reg_offset++;
- src.reg_offset++;
+ emit(MUL(y_times_a, y, a));
+ emit(ADD(one_minus_a, negate(a), src_reg(1.0f)));
+ emit(MUL(x_times_one_minus_a, x, src_reg(one_minus_a)));
+ return emit(ADD(dst, src_reg(x_times_one_minus_a), src_reg(y_times_a)));
}
}
+/**
+ * Emits the instructions needed to perform a pull constant load. before_block
+ * and before_inst can be NULL in which case the instruction will be appended
+ * to the end of the instruction list.
+ */
void
-vec4_visitor::emit_constant_values(dst_reg *dst, ir_constant *ir)
+vec4_visitor::emit_pull_constant_load_reg(dst_reg dst,
+ src_reg surf_index,
+ src_reg offset_reg,
+ bblock_t *before_block,
+ vec4_instruction *before_inst)
{
- if (ir->type->base_type == GLSL_TYPE_STRUCT) {
- foreach_in_list(ir_constant, field_value, &ir->components) {
- emit_constant_values(dst, field_value);
- }
- return;
- }
-
- if (ir->type->is_array()) {
- for (unsigned int i = 0; i < ir->type->length; i++) {
- emit_constant_values(dst, ir->array_elements[i]);
- }
- return;
- }
-
- if (ir->type->is_matrix()) {
- for (int i = 0; i < ir->type->matrix_columns; i++) {
- float *vec = &ir->value.f[i * ir->type->vector_elements];
+ assert((before_inst == NULL && before_block == NULL) ||
+ (before_inst && before_block));
- for (int j = 0; j < ir->type->vector_elements; j++) {
- dst->writemask = 1 << j;
- dst->type = BRW_REGISTER_TYPE_F;
+ vec4_instruction *pull;
- emit(MOV(*dst, src_reg(vec[j])));
- }
- dst->reg_offset++;
- }
- return;
- }
+ if (devinfo->gen >= 9) {
+ /* Gen9+ needs a message header in order to use SIMD4x2 mode */
+ src_reg header(this, glsl_type::uvec4_type, 2);
- int remaining_writemask = (1 << ir->type->vector_elements) - 1;
+ pull = new(mem_ctx)
+ vec4_instruction(VS_OPCODE_SET_SIMD4X2_HEADER_GEN9,
+ dst_reg(header));
- for (int i = 0; i < ir->type->vector_elements; i++) {
- if (!(remaining_writemask & (1 << i)))
- continue;
+ if (before_inst)
+ emit_before(before_block, before_inst, pull);
+ else
+ emit(pull);
- dst->writemask = 1 << i;
- dst->type = brw_type_for_base_type(ir->type);
+ dst_reg index_reg = retype(offset(dst_reg(header), 1),
+ offset_reg.type);
+ pull = MOV(writemask(index_reg, WRITEMASK_X), offset_reg);
- /* Find other components that match the one we're about to
- * write. Emits fewer instructions for things like vec4(0.5,
- * 1.5, 1.5, 1.5).
- */
- for (int j = i + 1; j < ir->type->vector_elements; j++) {
- if (ir->type->base_type == GLSL_TYPE_BOOL) {
- if (ir->value.b[i] == ir->value.b[j])
- dst->writemask |= (1 << j);
- } else {
- /* u, i, and f storage all line up, so no need for a
- * switch case for comparing each type.
- */
- if (ir->value.u[i] == ir->value.u[j])
- dst->writemask |= (1 << j);
- }
- }
+ if (before_inst)
+ emit_before(before_block, before_inst, pull);
+ else
+ emit(pull);
- switch (ir->type->base_type) {
- case GLSL_TYPE_FLOAT:
- emit(MOV(*dst, src_reg(ir->value.f[i])));
- break;
- case GLSL_TYPE_INT:
- emit(MOV(*dst, src_reg(ir->value.i[i])));
- break;
- case GLSL_TYPE_UINT:
- emit(MOV(*dst, src_reg(ir->value.u[i])));
- break;
- case GLSL_TYPE_BOOL:
- emit(MOV(*dst, src_reg(ir->value.b[i] != 0 ? ~0 : 0)));
- break;
- default:
- unreachable("Non-float/uint/int/bool constant");
- }
+ pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ dst,
+ surf_index,
+ header);
+ pull->mlen = 2;
+ pull->header_size = 1;
+ } else if (devinfo->gen >= 7) {
+ dst_reg grf_offset = dst_reg(this, glsl_type::int_type);
- remaining_writemask &= ~dst->writemask;
- }
- dst->reg_offset++;
-}
+ grf_offset.type = offset_reg.type;
-void
-vec4_visitor::visit(ir_constant *ir)
-{
- dst_reg dst = dst_reg(this, ir->type);
- this->result = src_reg(dst);
+ pull = MOV(grf_offset, offset_reg);
- emit_constant_values(&dst, ir);
-}
+ if (before_inst)
+ emit_before(before_block, before_inst, pull);
+ else
+ emit(pull);
-void
-vec4_visitor::visit_atomic_counter_intrinsic(ir_call *ir)
-{
- ir_dereference *deref = static_cast<ir_dereference *>(
- ir->actual_parameters.get_head());
- ir_variable *location = deref->variable_referenced();
- unsigned surf_index = (prog_data->base.binding_table.abo_start +
- location->data.binding);
-
- /* Calculate the surface offset */
- src_reg offset(this, glsl_type::uint_type);
- ir_dereference_array *deref_array = deref->as_dereference_array();
- if (deref_array) {
- deref_array->array_index->accept(this);
-
- src_reg tmp(this, glsl_type::uint_type);
- emit(MUL(dst_reg(tmp), this->result, ATOMIC_COUNTER_SIZE));
- emit(ADD(dst_reg(offset), tmp, location->data.atomic.offset));
+ pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD_GEN7,
+ dst,
+ surf_index,
+ src_reg(grf_offset));
+ pull->mlen = 1;
} else {
- offset = location->data.atomic.offset;
- }
-
- /* Emit the appropriate machine instruction */
- const char *callee = ir->callee->function_name();
- dst_reg dst = get_assignment_lhs(ir->return_deref, this);
-
- if (!strcmp("__intrinsic_atomic_read", callee)) {
- emit_untyped_surface_read(surf_index, dst, offset);
-
- } else if (!strcmp("__intrinsic_atomic_increment", callee)) {
- emit_untyped_atomic(BRW_AOP_INC, surf_index, dst, offset,
- src_reg(), src_reg());
-
- } else if (!strcmp("__intrinsic_atomic_predecrement", callee)) {
- emit_untyped_atomic(BRW_AOP_PREDEC, surf_index, dst, offset,
- src_reg(), src_reg());
+ pull = new(mem_ctx) vec4_instruction(VS_OPCODE_PULL_CONSTANT_LOAD,
+ dst,
+ surf_index,
+ offset_reg);
+ pull->base_mrf = FIRST_SPILL_MRF(devinfo->gen) + 1;
+ pull->mlen = 1;
}
- brw_mark_surface_used(stage_prog_data, surf_index);
+ if (before_inst)
+ emit_before(before_block, before_inst, pull);
+ else
+ emit(pull);
}
-void
-vec4_visitor::visit(ir_call *ir)
+src_reg
+vec4_visitor::emit_uniformize(const src_reg &src)
{
- const char *callee = ir->callee->function_name();
+ const src_reg chan_index(this, glsl_type::uint_type);
+ const dst_reg dst = retype(dst_reg(this, glsl_type::uint_type),
+ src.type);
- if (!strcmp("__intrinsic_atomic_read", callee) ||
- !strcmp("__intrinsic_atomic_increment", callee) ||
- !strcmp("__intrinsic_atomic_predecrement", callee)) {
- visit_atomic_counter_intrinsic(ir);
- } else {
- unreachable("Unsupported intrinsic.");
- }
+ emit(SHADER_OPCODE_FIND_LIVE_CHANNEL, dst_reg(chan_index))
+ ->force_writemask_all = true;
+ emit(SHADER_OPCODE_BROADCAST, dst, src, chan_index)
+ ->force_writemask_all = true;
+
+ return src_reg(dst);
}
src_reg
src_reg(inst->dst), sampler, dest_type);
}
-void
-vec4_visitor::visit(ir_texture *ir)
-{
- uint32_t sampler =
- _mesa_get_sampler_uniform_value(ir->sampler, shader_prog, prog);
-
- ir_rvalue *nonconst_sampler_index =
- _mesa_get_sampler_array_nonconst_index(ir->sampler);
-
- /* Handle non-constant sampler array indexing */
- src_reg sampler_reg;
- if (nonconst_sampler_index) {
- /* The highest sampler which may be used by this operation is
- * the last element of the array. Mark it here, because the generator
- * doesn't have enough information to determine the bound.
- */
- uint32_t array_size = ir->sampler->as_dereference_array()
- ->array->type->array_size();
-
- uint32_t max_used = sampler + array_size - 1;
- if (ir->op == ir_tg4 && devinfo->gen < 8) {
- max_used += prog_data->base.binding_table.gather_texture_start;
- } else {
- max_used += prog_data->base.binding_table.texture_start;
- }
-
- brw_mark_surface_used(&prog_data->base, max_used);
-
- /* Emit code to evaluate the actual indexing expression */
- nonconst_sampler_index->accept(this);
- src_reg temp(this, glsl_type::uint_type);
- emit(ADD(dst_reg(temp), this->result, src_reg(sampler)));
- sampler_reg = emit_uniformize(temp);
- } else {
- /* Single sampler, or constant array index; the indexing expression
- * is just an immediate.
- */
- sampler_reg = src_reg(sampler);
- }
-
- /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
- * emitting anything other than setting up the constant result.
- */
- if (ir->op == ir_tg4) {
- ir_constant *chan = ir->lod_info.component->as_constant();
- int swiz = GET_SWZ(key_tex->swizzles[sampler], chan->value.i[0]);
- if (swiz == SWIZZLE_ZERO || swiz == SWIZZLE_ONE) {
- dst_reg result(this, ir->type);
- this->result = src_reg(result);
- emit(MOV(result, src_reg(swiz == SWIZZLE_ONE ? 1.0f : 0.0f)));
- return;
- }
- }
-
- /* Should be lowered by do_lower_texture_projection */
- assert(!ir->projector);
-
- /* Should be lowered */
- assert(!ir->offset || !ir->offset->type->is_array());
-
- /* Generate code to compute all the subexpression trees. This has to be
- * done before loading any values into MRFs for the sampler message since
- * generating these values may involve SEND messages that need the MRFs.
- */
- src_reg coordinate;
- int coord_components = 0;
- if (ir->coordinate) {
- coord_components = ir->coordinate->type->vector_elements;
- ir->coordinate->accept(this);
- coordinate = this->result;
- }
-
- src_reg shadow_comparitor;
- if (ir->shadow_comparitor) {
- ir->shadow_comparitor->accept(this);
- shadow_comparitor = this->result;
- }
-
- bool has_nonconstant_offset = ir->offset && !ir->offset->as_constant();
- src_reg offset_value;
- if (has_nonconstant_offset) {
- ir->offset->accept(this);
- offset_value = src_reg(this->result);
- }
-
- src_reg lod, lod2, sample_index, mcs;
- switch (ir->op) {
- case ir_tex:
- lod = src_reg(0.0f);
- break;
- case ir_txf:
- case ir_txl:
- case ir_txs:
- ir->lod_info.lod->accept(this);
- lod = this->result;
- break;
- case ir_query_levels:
- lod = src_reg(0);
- break;
- case ir_txf_ms:
- ir->lod_info.sample_index->accept(this);
- sample_index = this->result;
-
- if (devinfo->gen >= 7 && key_tex->compressed_multisample_layout_mask & (1 << sampler))
- mcs = emit_mcs_fetch(ir->coordinate->type, coordinate, sampler_reg);
- else
- mcs = src_reg(0u);
- break;
- case ir_txd:
- ir->lod_info.grad.dPdx->accept(this);
- lod = this->result;
-
- ir->lod_info.grad.dPdy->accept(this);
- lod2 = this->result;
- break;
- case ir_txb:
- case ir_lod:
- case ir_tg4:
- case ir_texture_samples:
- break;
- }
-
- uint32_t constant_offset = 0;
- if (ir->offset != NULL && !has_nonconstant_offset) {
- constant_offset =
- brw_texture_offset(ir->offset->as_constant()->value.i,
- ir->offset->type->vector_elements);
- }
-
- /* Stuff the channel select bits in the top of the texture offset */
- if (ir->op == ir_tg4)
- constant_offset |=
- gather_channel( ir->lod_info.component->as_constant()->value.i[0],
- sampler) << 16;
-
- glsl_type const *type = ir->sampler->type;
- bool is_cube_array = type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
- type->sampler_array;
-
- this->result = src_reg(this, ir->type);
- dst_reg dest = dst_reg(this->result);
-
- emit_texture(ir->op, dest, ir->type, coordinate, coord_components,
- shadow_comparitor,
- lod, lod2, sample_index,
- constant_offset, offset_value,
- mcs, is_cube_array, sampler, sampler_reg);
-}
-
/**
* Apply workarounds for Gen6 gather with UINT/SINT
*/
}
}
-void
-vec4_visitor::visit(ir_return *)
-{
- unreachable("not reached");
-}
-
-void
-vec4_visitor::visit(ir_discard *)
-{
- unreachable("not reached");
-}
-
-void
-vec4_visitor::visit(ir_if *ir)
-{
- /* Don't point the annotation at the if statement, because then it plus
- * the then and else blocks get printed.
- */
- this->base_ir = ir->condition;
-
- if (devinfo->gen == 6) {
- emit_if_gen6(ir);
- } else {
- enum brw_predicate predicate;
- emit_bool_to_cond_code(ir->condition, &predicate);
- emit(IF(predicate));
- }
-
- visit_instructions(&ir->then_instructions);
-
- if (!ir->else_instructions.is_empty()) {
- this->base_ir = ir->condition;
- emit(BRW_OPCODE_ELSE);
-
- visit_instructions(&ir->else_instructions);
- }
-
- this->base_ir = ir->condition;
- emit(BRW_OPCODE_ENDIF);
-}
-
void
vec4_visitor::gs_emit_vertex(int stream_id)
{
unreachable("not reached");
}
-void
-vec4_visitor::visit(ir_emit_vertex *)
-{
- unreachable("not reached");
-}
-
void
vec4_visitor::gs_end_primitive()
{
unreachable("not reached");
}
-
-void
-vec4_visitor::visit(ir_end_primitive *)
-{
- unreachable("not reached");
-}
-
-void
-vec4_visitor::visit(ir_barrier *)
-{
- unreachable("not reached");
-}
-
void
vec4_visitor::emit_untyped_atomic(unsigned atomic_op, unsigned surf_index,
dst_reg dst, src_reg offset,
*reg = temp;
}
-/**
- * Resolve the result of a Gen4-5 CMP instruction to a proper boolean.
- *
- * CMP on Gen4-5 only sets the LSB of the result; the rest are undefined.
- * If we need a proper boolean value, we have to fix it up to be 0 or ~0.
- */
-void
-vec4_visitor::resolve_bool_comparison(ir_rvalue *rvalue, src_reg *reg)
-{
- assert(devinfo->gen <= 5);
-
- if (!rvalue->type->is_boolean())
- return;
-
- src_reg and_result = src_reg(this, rvalue->type);
- src_reg neg_result = src_reg(this, rvalue->type);
- emit(AND(dst_reg(and_result), *reg, src_reg(1)));
- emit(MOV(dst_reg(neg_result), negate(and_result)));
- *reg = neg_result;
-}
-
vec4_visitor::vec4_visitor(const struct brw_compiler *compiler,
void *log_data,
- struct gl_program *prog,
const struct brw_sampler_prog_key_data *key_tex,
struct brw_vue_prog_data *prog_data,
- struct gl_shader_program *shader_prog,
- gl_shader_stage stage,
+ nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index)
- : backend_shader(compiler, log_data, mem_ctx,
- shader_prog, prog, &prog_data->base, stage),
+ : backend_shader(compiler, log_data, mem_ctx, shader, &prog_data->base),
key_tex(key_tex),
prog_data(prog_data),
- sanity_param_count(0),
fail_msg(NULL),
first_non_payload_grf(0),
need_all_constants_in_pull_buffer(false),
this->current_annotation = NULL;
memset(this->output_reg_annotation, 0, sizeof(this->output_reg_annotation));
- this->variable_ht = hash_table_ctor(0,
- hash_table_pointer_hash,
- hash_table_pointer_compare);
-
this->virtual_grf_start = NULL;
this->virtual_grf_end = NULL;
this->live_intervals = NULL;
}
this->uniform_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
- this->uniform_vector_size = rzalloc_array(mem_ctx, int, this->uniform_array_size);
}
vec4_visitor::~vec4_visitor()
{
- hash_table_dtor(this->variable_ht);
}
+++ /dev/null
-/*
- * Copyright © 2012 Intel Corporation
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * the rights to use, copy, modify, merge, publish, distribute, sublicense,
- * and/or sell copies of the Software, and to permit persons to whom the
- * Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
- * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
- * IN THE SOFTWARE.
- */
-
-/** @file brw_vec4_vp.cpp
- *
- * A translator from Mesa IR to the i965 driver's Vec4 IR, used to implement
- * ARB_vertex_program and fixed-function vertex processing.
- */
-
-#include "brw_context.h"
-#include "brw_vec4.h"
-#include "brw_vs.h"
-extern "C" {
-#include "program/prog_parameter.h"
-#include "program/prog_print.h"
-}
-using namespace brw;
-
-void
-vec4_visitor::emit_vp_sop(enum brw_conditional_mod conditional_mod,
- dst_reg dst, src_reg src0, src_reg src1,
- src_reg one)
-{
- vec4_instruction *inst;
-
- inst = emit(CMP(dst_null_f(), src0, src1, conditional_mod));
-
- inst = emit(BRW_OPCODE_SEL, dst, one, src_reg(0.0f));
- inst->predicate = BRW_PREDICATE_NORMAL;
-}
-
-void
-vec4_vs_visitor::emit_program_code()
-{
- this->need_all_constants_in_pull_buffer = false;
-
- setup_vp_regs();
-
- /* Keep a reg with 1.0 around, for reuse by emit_vs_sop so that it can just
- * be:
- *
- * sel.f0 dst 1.0 0.0
- *
- * instead of
- *
- * mov dst 0.0
- * mov.f0 dst 1.0
- */
- src_reg one = src_reg(this, glsl_type::float_type);
- emit(MOV(dst_reg(one), src_reg(1.0f)));
-
- for (unsigned int insn = 0; insn < prog->NumInstructions; insn++) {
- const struct prog_instruction *vpi = &prog->Instructions[insn];
- base_ir = vpi;
-
- dst_reg dst;
- src_reg src[3];
-
- /* We always emit into a temporary destination register to avoid
- * aliasing issues.
- */
- dst = dst_reg(this, glsl_type::vec4_type);
-
- for (int i = 0; i < 3; i++)
- src[i] = get_vp_src_reg(vpi->SrcReg[i]);
-
- switch (vpi->Opcode) {
- case OPCODE_ABS:
- src[0].abs = true;
- src[0].negate = false;
- emit(MOV(dst, src[0]));
- break;
-
- case OPCODE_ADD:
- emit(ADD(dst, src[0], src[1]));
- break;
-
- case OPCODE_ARL:
- if (devinfo->gen >= 6) {
- dst.writemask = WRITEMASK_X;
- dst_reg dst_f = dst;
- dst_f.type = BRW_REGISTER_TYPE_F;
-
- emit(RNDD(dst_f, src[0]));
- emit(MOV(dst, src_reg(dst_f)));
- } else {
- emit(RNDD(dst, src[0]));
- }
- break;
-
- case OPCODE_DP3:
- emit(DP3(dst, src[0], src[1]));
- break;
- case OPCODE_DP4:
- emit(DP4(dst, src[0], src[1]));
- break;
- case OPCODE_DPH:
- emit(DPH(dst, src[0], src[1]));
- break;
-
- case OPCODE_DST: {
- dst_reg t = dst;
- if (vpi->DstReg.WriteMask & WRITEMASK_X) {
- t.writemask = WRITEMASK_X;
- emit(MOV(t, src_reg(1.0f)));
- }
- if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
- t.writemask = WRITEMASK_Y;
- emit(MUL(t, src[0], src[1]));
- }
- if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
- t.writemask = WRITEMASK_Z;
- emit(MOV(t, src[0]));
- }
- if (vpi->DstReg.WriteMask & WRITEMASK_W) {
- t.writemask = WRITEMASK_W;
- emit(MOV(t, src[1]));
- }
- break;
- }
-
- case OPCODE_EXP: {
- dst_reg result = dst;
- if (vpi->DstReg.WriteMask & WRITEMASK_X) {
- /* tmp_d = floor(src[0].x) */
- src_reg tmp_d = src_reg(this, glsl_type::ivec4_type);
- assert(tmp_d.type == BRW_REGISTER_TYPE_D);
- emit(RNDD(dst_reg(tmp_d), swizzle(src[0], BRW_SWIZZLE_XXXX)));
-
- /* result[0] = 2.0 ^ tmp */
- /* Adjust exponent for floating point: exp += 127 */
- dst_reg tmp_d_x(GRF, tmp_d.reg, glsl_type::int_type, WRITEMASK_X);
- emit(ADD(tmp_d_x, tmp_d, src_reg(127)));
-
- /* Install exponent and sign. Excess drops off the edge: */
- dst_reg res_d_x(GRF, result.reg, glsl_type::int_type, WRITEMASK_X);
- emit(BRW_OPCODE_SHL, res_d_x, tmp_d, src_reg(23));
- }
- if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
- result.writemask = WRITEMASK_Y;
- emit(FRC(result, src[0]));
- }
- if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
- result.writemask = WRITEMASK_Z;
- emit_math(SHADER_OPCODE_EXP2, result, src[0]);
- }
- if (vpi->DstReg.WriteMask & WRITEMASK_W) {
- result.writemask = WRITEMASK_W;
- emit(MOV(result, src_reg(1.0f)));
- }
- break;
- }
-
- case OPCODE_EX2:
- emit_math(SHADER_OPCODE_EXP2, dst, src[0]);
- break;
-
- case OPCODE_FLR:
- emit(RNDD(dst, src[0]));
- break;
-
- case OPCODE_FRC:
- emit(FRC(dst, src[0]));
- break;
-
- case OPCODE_LG2:
- emit_math(SHADER_OPCODE_LOG2, dst, src[0]);
- break;
-
- case OPCODE_LIT: {
- dst_reg result = dst;
- /* From the ARB_vertex_program spec:
- *
- * tmp = VectorLoad(op0);
- * if (tmp.x < 0) tmp.x = 0;
- * if (tmp.y < 0) tmp.y = 0;
- * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
- * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
- * result.x = 1.0;
- * result.y = tmp.x;
- * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
- * result.w = 1.0;
- *
- * Note that we don't do the clamping to +/- 128. We didn't in
- * brw_vs_emit.c either.
- */
- if (vpi->DstReg.WriteMask & WRITEMASK_XW) {
- result.writemask = WRITEMASK_XW;
- emit(MOV(result, src_reg(1.0f)));
- }
- if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
- result.writemask = WRITEMASK_YZ;
- emit(MOV(result, src_reg(0.0f)));
-
- src_reg tmp_x = swizzle(src[0], BRW_SWIZZLE_XXXX);
-
- emit(CMP(dst_null_d(), tmp_x, src_reg(0.0f), BRW_CONDITIONAL_G));
- emit(IF(BRW_PREDICATE_NORMAL));
-
- if (vpi->DstReg.WriteMask & WRITEMASK_Y) {
- result.writemask = WRITEMASK_Y;
- emit(MOV(result, tmp_x));
- }
-
- if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
- /* if (tmp.y < 0) tmp.y = 0; */
- src_reg tmp_y = swizzle(src[0], BRW_SWIZZLE_YYYY);
- result.writemask = WRITEMASK_Z;
- emit_minmax(BRW_CONDITIONAL_GE, result, tmp_y, src_reg(0.0f));
-
- src_reg clamped_y(result);
- clamped_y.swizzle = BRW_SWIZZLE_ZZZZ;
-
- src_reg tmp_w = swizzle(src[0], BRW_SWIZZLE_WWWW);
-
- emit_math(SHADER_OPCODE_POW, result, clamped_y, tmp_w);
- }
- emit(BRW_OPCODE_ENDIF);
- }
- break;
- }
-
- case OPCODE_LOG: {
- dst_reg result = dst;
- result.type = BRW_REGISTER_TYPE_UD;
- src_reg result_src = src_reg(result);
-
- src_reg arg0_ud = swizzle(src[0], BRW_SWIZZLE_XXXX);
- arg0_ud.type = BRW_REGISTER_TYPE_UD;
-
- /* Perform mant = frexpf(fabsf(x), &exp), adjust exp and mnt
- * according to spec:
- *
- * These almost look likey they could be joined up, but not really
- * practical:
- *
- * result[0].f = (x.i & ((1<<31)-1) >> 23) - 127
- * result[1].i = (x.i & ((1<<23)-1) + (127<<23)
- */
- if (vpi->DstReg.WriteMask & WRITEMASK_XZ) {
- result.writemask = WRITEMASK_X;
- emit(AND(result, arg0_ud, src_reg((1u << 31) - 1)));
- emit(BRW_OPCODE_SHR, result, result_src, src_reg(23u));
- src_reg result_d(result_src);
- result_d.type = BRW_REGISTER_TYPE_D; /* does it matter? */
- result.type = BRW_REGISTER_TYPE_F;
- emit(ADD(result, result_d, src_reg(-127)));
- }
-
- if (vpi->DstReg.WriteMask & WRITEMASK_YZ) {
- result.writemask = WRITEMASK_Y;
- result.type = BRW_REGISTER_TYPE_UD;
- emit(AND(result, arg0_ud, src_reg((1u << 23) - 1)));
- emit(OR(result, result_src, src_reg(127u << 23)));
- }
-
- if (vpi->DstReg.WriteMask & WRITEMASK_Z) {
- /* result[2] = result[0] + LOG2(result[1]); */
-
- /* Why bother? The above is just a hint how to do this with a
- * taylor series. Maybe we *should* use a taylor series as by
- * the time all the above has been done it's almost certainly
- * quicker than calling the mathbox, even with low precision.
- *
- * Options are:
- * - result[0] + mathbox.LOG2(result[1])
- * - mathbox.LOG2(arg0.x)
- * - result[0] + inline_taylor_approx(result[1])
- */
- result.type = BRW_REGISTER_TYPE_F;
- result.writemask = WRITEMASK_Z;
- src_reg result_x(result), result_y(result), result_z(result);
- result_x.swizzle = BRW_SWIZZLE_XXXX;
- result_y.swizzle = BRW_SWIZZLE_YYYY;
- result_z.swizzle = BRW_SWIZZLE_ZZZZ;
- emit_math(SHADER_OPCODE_LOG2, result, result_y);
- emit(ADD(result, result_z, result_x));
- }
-
- if (vpi->DstReg.WriteMask & WRITEMASK_W) {
- result.type = BRW_REGISTER_TYPE_F;
- result.writemask = WRITEMASK_W;
- emit(MOV(result, src_reg(1.0f)));
- }
- break;
- }
-
- case OPCODE_MAD: {
- src_reg temp = src_reg(this, glsl_type::vec4_type);
- emit(MUL(dst_reg(temp), src[0], src[1]));
- emit(ADD(dst, temp, src[2]));
- break;
- }
-
- case OPCODE_MAX:
- emit_minmax(BRW_CONDITIONAL_GE, dst, src[0], src[1]);
- break;
-
- case OPCODE_MIN:
- emit_minmax(BRW_CONDITIONAL_L, dst, src[0], src[1]);
- break;
-
- case OPCODE_MOV:
- emit(MOV(dst, src[0]));
- break;
-
- case OPCODE_MUL:
- emit(MUL(dst, src[0], src[1]));
- break;
-
- case OPCODE_POW:
- emit_math(SHADER_OPCODE_POW, dst, src[0], src[1]);
- break;
-
- case OPCODE_RCP:
- emit_math(SHADER_OPCODE_RCP, dst, src[0]);
- break;
-
- case OPCODE_RSQ:
- emit_math(SHADER_OPCODE_RSQ, dst, src[0]);
- break;
-
- case OPCODE_SGE:
- emit_vp_sop(BRW_CONDITIONAL_GE, dst, src[0], src[1], one);
- break;
-
- case OPCODE_SLT:
- emit_vp_sop(BRW_CONDITIONAL_L, dst, src[0], src[1], one);
- break;
-
- case OPCODE_SUB: {
- src_reg neg_src1 = src[1];
- neg_src1.negate = !src[1].negate;
- emit(ADD(dst, src[0], neg_src1));
- break;
- }
-
- case OPCODE_SWZ:
- /* Note that SWZ's extended swizzles are handled in the general
- * get_src_reg() code.
- */
- emit(MOV(dst, src[0]));
- break;
-
- case OPCODE_XPD: {
- src_reg t1 = src_reg(this, glsl_type::vec4_type);
- src_reg t2 = src_reg(this, glsl_type::vec4_type);
-
- emit(MUL(dst_reg(t1),
- swizzle(src[0], BRW_SWIZZLE_YZXW),
- swizzle(src[1], BRW_SWIZZLE_ZXYW)));
- emit(MUL(dst_reg(t2),
- swizzle(src[0], BRW_SWIZZLE_ZXYW),
- swizzle(src[1], BRW_SWIZZLE_YZXW)));
- t2.negate = true;
- emit(ADD(dst, t1, t2));
- break;
- }
-
- case OPCODE_END:
- break;
-
- default:
- assert(!"Unsupported opcode in vertex program");
- }
-
- /* Copy the temporary back into the actual destination register. */
- if (_mesa_num_inst_dst_regs(vpi->Opcode) != 0) {
- emit(MOV(get_vp_dst_reg(vpi->DstReg), src_reg(dst)));
- }
- }
-
- /* If we used relative addressing, we need to upload all constants as
- * pull constants. Do that now.
- */
- if (this->need_all_constants_in_pull_buffer) {
- const struct gl_program_parameter_list *params = vp->Base.Parameters;
- unsigned i;
- for (i = 0; i < params->NumParameters * 4; i++) {
- stage_prog_data->pull_param[i] =
- ¶ms->ParameterValues[i / 4][i % 4];
- }
- stage_prog_data->nr_pull_params = i;
- }
-}
-
-void
-vec4_vs_visitor::setup_vp_regs()
-{
- /* PROGRAM_TEMPORARY */
- int num_temp = prog->NumTemporaries;
- vp_temp_regs = rzalloc_array(mem_ctx, src_reg, num_temp);
- for (int i = 0; i < num_temp; i++)
- vp_temp_regs[i] = src_reg(this, glsl_type::vec4_type);
-
- /* PROGRAM_STATE_VAR etc. */
- struct gl_program_parameter_list *plist = vp->Base.Parameters;
- for (unsigned p = 0; p < plist->NumParameters; p++) {
- unsigned components = plist->Parameters[p].Size;
-
- /* Parameters should be either vec4 uniforms or single component
- * constants; matrices and other larger types should have been broken
- * down earlier.
- */
- assert(components <= 4);
-
- this->uniform_size[this->uniforms] = 1; /* 1 vec4 */
- this->uniform_vector_size[this->uniforms] = components;
- for (unsigned i = 0; i < 4; i++) {
- stage_prog_data->param[this->uniforms * 4 + i] = i >= components
- ? 0 : &plist->ParameterValues[p][i];
- }
- this->uniforms++; /* counted in vec4 units */
- }
-
- /* PROGRAM_OUTPUT */
- for (int slot = 0; slot < prog_data->vue_map.num_slots; slot++) {
- int varying = prog_data->vue_map.slot_to_varying[slot];
- if (varying == VARYING_SLOT_PSIZ)
- output_reg[varying] = dst_reg(this, glsl_type::float_type);
- else
- output_reg[varying] = dst_reg(this, glsl_type::vec4_type);
- assert(output_reg[varying].type == BRW_REGISTER_TYPE_F);
- }
-
- /* PROGRAM_ADDRESS */
- this->vp_addr_reg = src_reg(this, glsl_type::int_type);
- assert(this->vp_addr_reg.type == BRW_REGISTER_TYPE_D);
-}
-
-dst_reg
-vec4_vs_visitor::get_vp_dst_reg(const prog_dst_register &dst)
-{
- dst_reg result;
-
- assert(!dst.RelAddr);
-
- switch (dst.File) {
- case PROGRAM_TEMPORARY:
- result = dst_reg(vp_temp_regs[dst.Index]);
- break;
-
- case PROGRAM_OUTPUT:
- result = output_reg[dst.Index];
- break;
-
- case PROGRAM_ADDRESS: {
- assert(dst.Index == 0);
- result = dst_reg(this->vp_addr_reg);
- break;
- }
-
- case PROGRAM_UNDEFINED:
- return dst_null_f();
-
- default:
- unreachable("vec4_vp: bad destination register file");
- }
-
- result.writemask = dst.WriteMask;
- return result;
-}
-
-src_reg
-vec4_vs_visitor::get_vp_src_reg(const prog_src_register &src)
-{
- struct gl_program_parameter_list *plist = vp->Base.Parameters;
-
- src_reg result;
-
- assert(!src.Abs);
-
- switch (src.File) {
- case PROGRAM_UNDEFINED:
- return src_reg(brw_null_reg());
-
- case PROGRAM_TEMPORARY:
- result = vp_temp_regs[src.Index];
- break;
-
- case PROGRAM_INPUT:
- result = src_reg(ATTR, src.Index, glsl_type::vec4_type);
- result.type = BRW_REGISTER_TYPE_F;
- break;
-
- case PROGRAM_ADDRESS: {
- assert(src.Index == 0);
- result = this->vp_addr_reg;
- break;
- }
-
- case PROGRAM_STATE_VAR:
- case PROGRAM_CONSTANT:
- /* From the ARB_vertex_program specification:
- * "Relative addressing can only be used for accessing program
- * parameter arrays."
- */
- if (src.RelAddr) {
- /* Since we have no idea what the base of the array is, we need to
- * upload ALL constants as push constants.
- */
- this->need_all_constants_in_pull_buffer = true;
-
- /* Add the small constant index to the address register */
- src_reg reladdr = src_reg(this, glsl_type::int_type);
-
- dst_reg dst_reladdr = dst_reg(reladdr);
- dst_reladdr.writemask = WRITEMASK_X;
- emit(ADD(dst_reladdr, this->vp_addr_reg, src_reg(src.Index)));
-
- if (devinfo->gen < 6)
- emit(MUL(dst_reladdr, reladdr, src_reg(16)));
-
- #if 0
- assert(src.Index < this->uniforms);
- result = src_reg(dst_reg(UNIFORM, 0));
- result.type = BRW_REGISTER_TYPE_F;
- result.reladdr = new(mem_ctx) src_reg();
- memcpy(result.reladdr, &reladdr, sizeof(src_reg));
- #endif
-
- result = src_reg(this, glsl_type::vec4_type);
- src_reg surf_index = src_reg(unsigned(prog_data->base.binding_table.pull_constants_start));
-
- emit_pull_constant_load_reg(dst_reg(result),
- surf_index,
- reladdr,
- NULL, NULL /* before_block/inst */);
- break;
- }
-
- /* We actually want to look at the type in the Parameters list for this,
- * because this lets us upload constant builtin uniforms as actual
- * constants.
- */
- switch (plist->Parameters[src.Index].Type) {
- case PROGRAM_CONSTANT:
- result = src_reg(this, glsl_type::vec4_type);
- for (int i = 0; i < 4; i++) {
- dst_reg t = dst_reg(result);
- t.writemask = 1 << i;
- emit(MOV(t, src_reg(plist->ParameterValues[src.Index][i].f)));
- }
- break;
-
- case PROGRAM_STATE_VAR:
- assert(src.Index < this->uniforms);
- result = src_reg(dst_reg(UNIFORM, src.Index));
- result.type = BRW_REGISTER_TYPE_F;
- break;
-
- default:
- assert(!"Bad uniform in src register file");
- return src_reg(this, glsl_type::vec4_type);
- }
- break;
-
- default:
- assert(!"Bad src register file");
- return src_reg(this, glsl_type::vec4_type);
- }
-
- if (src.Swizzle != SWIZZLE_NOOP || src.Negate) {
- unsigned short zeros_mask = 0;
- unsigned short ones_mask = 0;
- unsigned short src_mask = 0;
- unsigned short src_swiz[4];
-
- for (int i = 0; i < 4; i++) {
- src_swiz[i] = 0; /* initialize for safety */
-
- /* The ZERO, ONE, and Negate options are only used for OPCODE_SWZ,
- * but it's simplest to handle it here.
- */
- int s = GET_SWZ(src.Swizzle, i);
- switch (s) {
- case SWIZZLE_X:
- case SWIZZLE_Y:
- case SWIZZLE_Z:
- case SWIZZLE_W:
- src_mask |= 1 << i;
- src_swiz[i] = s;
- break;
- case SWIZZLE_ZERO:
- zeros_mask |= 1 << i;
- break;
- case SWIZZLE_ONE:
- ones_mask |= 1 << i;
- break;
- }
- }
-
- result.swizzle =
- BRW_SWIZZLE4(src_swiz[0], src_swiz[1], src_swiz[2], src_swiz[3]);
-
- /* The hardware doesn't natively handle the SWZ instruction's zero/one
- * swizzles or per-component negation, so we need to use a temporary.
- */
- if (zeros_mask || ones_mask || src.Negate) {
- src_reg temp_src(this, glsl_type::vec4_type);
- dst_reg temp(temp_src);
-
- if (src_mask) {
- temp.writemask = src_mask;
- emit(MOV(temp, result));
- }
-
- if (zeros_mask) {
- temp.writemask = zeros_mask;
- emit(MOV(temp, src_reg(0.0f)));
- }
-
- if (ones_mask) {
- temp.writemask = ones_mask;
- emit(MOV(temp, src_reg(1.0f)));
- }
-
- if (src.Negate) {
- temp.writemask = src.Negate;
- src_reg neg(temp_src);
- neg.negate = true;
- emit(MOV(temp, neg));
- }
- result = temp_src;
- }
- }
-
- return result;
-}
{
for (int i = 0; i < key->nr_userclip_plane_consts; ++i) {
assert(this->uniforms < uniform_array_size);
- this->uniform_vector_size[this->uniforms] = 4;
this->userplane[i] = dst_reg(UNIFORM, this->uniforms);
this->userplane[i].type = BRW_REGISTER_TYPE_F;
for (int j = 0; j < 4; ++j) {
void *log_data,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
- struct gl_vertex_program *vp,
- struct gl_shader_program *prog,
+ nir_shader *shader,
gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
bool use_legacy_snorm_formula)
- : vec4_visitor(compiler, log_data,
- &vp->Base, &key->tex, &vs_prog_data->base, prog,
- MESA_SHADER_VERTEX,
- mem_ctx, false /* no_spills */,
- shader_time_index),
+ : vec4_visitor(compiler, log_data, &key->tex, &vs_prog_data->base, shader,
+ mem_ctx, false /* no_spills */, shader_time_index),
key(key),
vs_prog_data(vs_prog_data),
- vp(vp),
clip_planes(clip_planes),
use_legacy_snorm_formula(use_legacy_snorm_formula)
{
#include "brw_state.h"
#include "program/prog_print.h"
#include "program/prog_parameter.h"
+#include "brw_nir.h"
#include "util/ralloc.h"
}
}
-
-bool
-brw_vs_prog_data_compare(const void *in_a, const void *in_b)
-{
- const struct brw_vs_prog_data *a = in_a;
- const struct brw_vs_prog_data *b = in_b;
-
- /* Compare the base structure. */
- if (!brw_stage_prog_data_compare(&a->base.base, &b->base.base))
- return false;
-
- /* Compare the rest of the struct. */
- const unsigned offset = sizeof(struct brw_stage_prog_data);
- if (memcmp(((char *) a) + offset, ((char *) b) + offset,
- sizeof(struct brw_vs_prog_data) - offset)) {
- return false;
- }
-
- return true;
-}
-
bool
brw_codegen_vs_prog(struct brw_context *brw,
struct gl_shader_program *prog,
bool start_busy = false;
double start_time = 0;
+ if (!vp->program.Base.nir) {
+ /* Normally we generate NIR in LinkShader() or
+ * ProgramStringNotify(), but Mesa's fixed-function vertex program
+ * handling doesn't notify the driver at all. Just do it here, at
+ * the last minute, even though it's lame.
+ */
+ assert(vp->program.Base.Id == 0 && prog == NULL);
+ vp->program.Base.nir =
+ brw_create_nir(brw, NULL, &vp->program.Base, MESA_SHADER_VERTEX,
+ brw->intelScreen->compiler->scalar_vs);
+ }
+
if (prog)
vs = (struct brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
mem_ctx = ralloc_context(NULL);
+ brw_assign_common_binding_table_offsets(MESA_SHADER_VERTEX,
+ brw->intelScreen->devinfo,
+ prog, &vp->program.Base,
+ &prog_data.base.base, 0);
+
/* Allocate the references to the uniforms that will end up in the
* prog_data associated with the compiled program, and which will be freed
* by the state cache.
*/
- int param_count;
- if (vs) {
- /* We add padding around uniform values below vec4 size, with the worst
- * case being a float value that gets blown up to a vec4, so be
- * conservative here.
- */
- param_count = vs->base.num_uniform_components * 4 +
- vs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
- stage_prog_data->nr_image_params = vs->base.NumImages;
- } else {
- param_count = vp->program.Base.Parameters->NumParameters * 4;
- }
+ int param_count = vp->program.Base.nir->num_uniforms;
+ if (!brw->intelScreen->compiler->scalar_vs)
+ param_count *= 4;
+
+ if (vs)
+ prog_data.base.base.nr_image_params = vs->base.NumImages;
+
/* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
* planes as uniforms.
*/
stage_prog_data->nr_image_params);
stage_prog_data->nr_params = param_count;
+ if (prog) {
+ brw_nir_setup_glsl_uniforms(vp->program.Base.nir, prog, &vp->program.Base,
+ &prog_data.base.base,
+ brw->intelScreen->compiler->scalar_vs);
+ } else {
+ brw_nir_setup_arb_uniforms(vp->program.Base.nir, &vp->program.Base,
+ &prog_data.base.base);
+ }
+
GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
prog_data.inputs_read = vp->program.Base.InputsRead;
void brw_vs_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
const struct brw_vs_prog_key *key);
-bool brw_vs_prog_data_compare(const void *a, const void *b);
void
brw_upload_vs_prog(struct brw_context *brw);
void *log_data,
const struct brw_vs_prog_key *key,
struct brw_vs_prog_data *vs_prog_data,
- struct gl_vertex_program *vp,
- struct gl_shader_program *prog,
+ nir_shader *shader,
gl_clip_plane *clip_planes,
void *mem_ctx,
int shader_time_index,
const glsl_type *type);
virtual void setup_payload();
virtual void emit_prolog();
- virtual void emit_program_code();
virtual void emit_thread_end();
virtual void emit_urb_write_header(int mrf);
virtual void emit_urb_slot(dst_reg reg, int varying);
const struct brw_vs_prog_key *const key;
struct brw_vs_prog_data * const vs_prog_data;
- struct gl_vertex_program *const vp;
src_reg *vp_temp_regs;
src_reg vp_addr_reg;
#include "program/prog_parameter.h"
#include "program/program.h"
#include "intel_mipmap_tree.h"
+#include "brw_nir.h"
#include "util/ralloc.h"
return BRW_PSCDEPTH_OFF;
}
-bool
-brw_wm_prog_data_compare(const void *in_a, const void *in_b)
+static void
+assign_fs_binding_table_offsets(const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
+ const struct gl_program *prog,
+ const struct brw_wm_prog_key *key,
+ struct brw_wm_prog_data *prog_data)
{
- const struct brw_wm_prog_data *a = in_a;
- const struct brw_wm_prog_data *b = in_b;
-
- /* Compare the base structure. */
- if (!brw_stage_prog_data_compare(&a->base, &b->base))
- return false;
+ uint32_t next_binding_table_offset = 0;
- /* Compare the rest of the structure. */
- const unsigned offset = sizeof(struct brw_stage_prog_data);
- if (memcmp(((char *) a) + offset, ((char *) b) + offset,
- sizeof(struct brw_wm_prog_data) - offset))
- return false;
+ /* If there are no color regions, we still perform an FB write to a null
+ * renderbuffer, which we place at surface index 0.
+ */
+ prog_data->binding_table.render_target_start = next_binding_table_offset;
+ next_binding_table_offset += MAX2(key->nr_color_regions, 1);
- return true;
+ brw_assign_common_binding_table_offsets(MESA_SHADER_FRAGMENT, devinfo,
+ shader_prog, prog, &prog_data->base,
+ next_binding_table_offset);
}
/**
if (!prog)
prog_data.base.use_alt_mode = true;
+ assign_fs_binding_table_offsets(brw->intelScreen->devinfo, prog,
+ &fp->program.Base, key, &prog_data);
+
/* Allocate the references to the uniforms that will end up in the
* prog_data associated with the compiled program, and which will be freed
* by the state cache.
*/
- int param_count;
- if (fs) {
- param_count = fs->base.num_uniform_components +
- fs->base.NumImages * BRW_IMAGE_PARAM_SIZE;
+ int param_count = fp->program.Base.nir->num_uniforms;
+ if (fs)
prog_data.base.nr_image_params = fs->base.NumImages;
- } else {
- param_count = fp->program.Base.Parameters->NumParameters * 4;
- }
/* The backend also sometimes adds params for texture size. */
param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
prog_data.base.param =
prog_data.base.nr_image_params);
prog_data.base.nr_params = param_count;
+ if (prog) {
+ brw_nir_setup_glsl_uniforms(fp->program.Base.nir, prog, &fp->program.Base,
+ &prog_data.base, true);
+ } else {
+ brw_nir_setup_arb_uniforms(fp->program.Base.nir, &fp->program.Base,
+ &prog_data.base);
+ }
+
prog_data.barycentric_interp_modes =
brw_compute_barycentric_interp_modes(brw, key->flat_shade,
key->persample_shading,
void brw_wm_debug_recompile(struct brw_context *brw,
struct gl_shader_program *prog,
const struct brw_wm_prog_key *key);
-bool brw_wm_prog_data_compare(const void *a, const void *b);
void
brw_upload_wm_prog(struct brw_context *brw);
{
assert(stage == MESA_SHADER_FRAGMENT);
brw_wm_prog_key *key = (brw_wm_prog_key*) this->key;
- gl_fragment_program *fp = (gl_fragment_program*) prog;
GLuint reg = 2;
bool kill_stats_promoted_workaround = false;
int lookup = key->iz_lookup;
bool uses_depth =
- (fp->Base.InputsRead & (1 << VARYING_SLOT_POS)) != 0;
+ (nir->info.inputs_read & (1 << VARYING_SLOT_POS)) != 0;
assert(lookup < IZ_BIT_MAX);
surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) |
SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
- surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
+ surf[5] = mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
/* Emit relocation to surface contents */
drm_intel_bo_emit_reloc(brw->batch.bo,
assert(tile_y % 2 == 0);
surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
(tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
- (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
+ (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
if (brw->gen < 6) {
/* _NEW_COLOR */
.emit = brw_upload_wm_ubo_surfaces,
};
+static void
+brw_upload_cs_ubo_surfaces(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* _NEW_PROGRAM */
+ struct gl_shader_program *prog =
+ ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
+
+ if (!prog)
+ return;
+
+ /* BRW_NEW_CS_PROG_DATA */
+ brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_COMPUTE],
+ &brw->cs.base, &brw->cs.prog_data->base, true);
+}
+
+const struct brw_tracked_state brw_cs_ubo_surfaces = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_CS_PROG_DATA |
+ BRW_NEW_UNIFORM_BUFFER,
+ },
+ .emit = brw_upload_cs_ubo_surfaces,
+};
+
void
brw_upload_abo_surfaces(struct brw_context *brw,
struct gl_shader_program *prog,
assert(tile_y % 2 == 0);
surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT |
(tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT |
- (surface->mt->align_h == 4 ?
+ (surface->mt->valign == 4 ?
BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
/* Emit relocation to surface contents */
namespace brw {
-void
-gen6_gs_visitor::assign_binding_table_offsets()
-{
- /* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
- * feedback surfaces.
- */
- assign_common_binding_table_offsets(BRW_MAX_SOL_BINDINGS);
-}
-
void
gen6_gs_visitor::emit_prolog()
{
}
}
-void
-gen6_gs_visitor::visit(ir_emit_vertex *ir)
-{
- /* To ensure that we don't output more vertices than the shader specified
- * using max_vertices, do the logic inside a conditional of the form "if
- * (vertex_count < MAX)"
- */
- unsigned num_output_vertices = c->gp->program.VerticesOut;
- emit(CMP(dst_null_d(), this->vertex_count,
- src_reg(num_output_vertices), BRW_CONDITIONAL_L));
- emit(IF(BRW_PREDICATE_NORMAL));
-
- gs_emit_vertex(ir->stream_id());
-
- this->current_annotation = "emit vertex: increment vertex count";
- emit(ADD(dst_reg(this->vertex_count), this->vertex_count,
- src_reg(1u)));
-
- emit(BRW_OPCODE_ENDIF);
-}
-
void
gen6_gs_visitor::gs_emit_vertex(int stream_id)
{
this->vertex_output_offset, 1u));
}
-void
-gen6_gs_visitor::visit(ir_end_primitive *)
-{
- gs_end_primitive();
-}
-
void
gen6_gs_visitor::gs_end_primitive()
{
if (c->gp->program.OutputType != GL_POINTS) {
emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
emit(IF(BRW_PREDICATE_NORMAL));
- {
- visit((ir_end_primitive *) NULL);
- }
+ gs_end_primitive();
emit(BRW_OPCODE_ENDIF);
}
void *log_data,
struct brw_gs_compile *c,
struct gl_shader_program *prog,
+ nir_shader *shader,
void *mem_ctx,
bool no_spills,
int shader_time_index) :
- vec4_gs_visitor(comp, log_data, c, prog, mem_ctx, no_spills,
+ vec4_gs_visitor(comp, log_data, c, prog, shader, mem_ctx, no_spills,
shader_time_index) {}
protected:
- virtual void assign_binding_table_offsets();
virtual void emit_prolog();
virtual void emit_thread_end();
- virtual void visit(ir_emit_vertex *);
- virtual void visit(ir_end_primitive *);
virtual void gs_emit_vertex(int stream_id);
virtual void gs_end_primitive();
virtual void emit_urb_write_header(int mrf);
SET_FIELD(min_array_element, BRW_SURFACE_MIN_ARRAY_ELEMENT) |
SET_FIELD(depth - 1, BRW_SURFACE_RENDER_TARGET_VIEW_EXTENT);
- surf[5] = (mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0);
+ surf[5] = (mt->valign == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0);
drm_intel_bo_emit_reloc(brw->batch.bo,
offset + 4,
surface->brw_surfaceformat << BRW_SURFACE_FORMAT_SHIFT |
gen7_surface_tiling_mode(tiling);
- if (surface->mt->align_h == 4)
+ if (surface->mt->valign == 4)
surf[0] |= GEN7_SURFACE_VALIGN_4;
- if (surface->mt->align_w == 8)
+ if (surface->mt->halign == 8)
surf[0] |= GEN7_SURFACE_HALIGN_8;
if (surface->array_layout == ALL_SLICES_AT_EACH_LOD)
if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+ brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
}
unsigned push_constant_data_size =
*
*/
unsigned
-brw_cs_prog_local_id_payload_dwords(const struct gl_program *prog,
- unsigned dispatch_width)
+brw_cs_prog_local_id_payload_dwords(unsigned dispatch_width)
{
return 3 * dispatch_width;
}
if (prog->SystemValuesRead & SYSTEM_BIT_LOCAL_INVOCATION_ID) {
local_id_dwords =
- brw_cs_prog_local_id_payload_dwords(prog, cs_prog_data->simd_size);
+ brw_cs_prog_local_id_payload_dwords(cs_prog_data->simd_size);
}
/* Updates the ParamaterValues[i] pointers for all parameters of the
if (target == GL_TEXTURE_CUBE_MAP || target == GL_TEXTURE_CUBE_MAP_ARRAY)
surf[0] |= BRW_SURFACE_CUBEFACE_ENABLES;
- if (mt->align_h == 4)
+ if (mt->valign == 4)
surf[0] |= GEN7_SURFACE_VALIGN_4;
- if (mt->align_w == 8)
+ if (mt->halign == 8)
surf[0] |= GEN7_SURFACE_HALIGN_8;
if (_mesa_is_array_texture(target) || target == GL_TEXTURE_CUBE_MAP)
GEN7_SURFACE_ARYSPC_LOD0 : GEN7_SURFACE_ARYSPC_FULL) |
gen7_surface_tiling_mode(mt->tiling);
- if (irb->mt->align_h == 4)
+ if (irb->mt->valign == 4)
surf[0] |= GEN7_SURFACE_VALIGN_4;
- if (irb->mt->align_w == 8)
+ if (irb->mt->halign == 8)
surf[0] |= GEN7_SURFACE_HALIGN_8;
if (is_array) {
surf_type == BRW_SURFACE_1D))
return GEN8_SURFACE_VALIGN_4;
- switch (mt->align_h) {
+ switch (mt->valign) {
case 4:
return GEN8_SURFACE_VALIGN_4;
case 8:
gen9_use_linear_1d_layout(brw, mt)))
return GEN8_SURFACE_HALIGN_4;
- switch (mt->align_w) {
+ switch (mt->halign) {
case 4:
return GEN8_SURFACE_HALIGN_4;
case 8:
* "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
* 16 must be used."
*/
- assert(brw->gen < 9 || mt->align_w == 16);
- assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
+ assert(brw->gen < 9 || mt->halign == 16);
+ assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
}
const uint32_t surf_type = translate_tex_target(target);
* "When Auxiliary Surface Mode is set to AUX_CCS_D or AUX_CCS_E, HALIGN
* 16 must be used."
*/
- assert(brw->gen < 9 || mt->align_w == 16);
- assert(brw->gen < 8 || mt->num_samples > 1 || mt->align_w == 16);
+ assert(brw->gen < 9 || mt->halign == 16);
+ assert(brw->gen < 8 || mt->num_samples > 1 || mt->halign == 16);
}
uint32_t *surf = allocate_surface_state(brw, &offset, surf_index);
} else {
assert(dst_renderbuffer);
dst_mt = intel_renderbuffer(dst_renderbuffer)->mt;
- src_image = src_renderbuffer->TexImage;
+ dst_image = dst_renderbuffer->TexImage;
}
if (src_mt->num_samples > 0 || dst_mt->num_samples > 0) {
if (intel_tiling_supports_non_msrt_mcs(brw, mt->tiling) &&
intel_miptree_is_fast_clear_capable(brw, mt)) {
mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_RESOLVED;
- assert(brw->gen < 8 || mt->align_w == 16 || num_samples <= 1);
+ assert(brw->gen < 8 || mt->halign == 16 || num_samples <= 1);
}
return mt;
struct intel_miptree_map *map,
unsigned int level, unsigned int slice)
{
- map->mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
- 0, 0,
- map->w, map->h, 1,
- 0, MIPTREE_LAYOUT_TILING_NONE);
+ map->linear_mt = intel_miptree_create(brw, GL_TEXTURE_2D, mt->format,
+ /* first_level */ 0,
+ /* last_level */ 0,
+ map->w, map->h, 1,
+ /* samples */ 0,
+ MIPTREE_LAYOUT_TILING_NONE);
- if (!map->mt) {
+ if (!map->linear_mt) {
fprintf(stderr, "Failed to allocate blit temporary\n");
goto fail;
}
- map->stride = map->mt->pitch;
+ map->stride = map->linear_mt->pitch;
/* One of either READ_BIT or WRITE_BIT or both is set. READ_BIT implies no
* INVALIDATE_RANGE_BIT. WRITE_BIT needs the original values read in unless
if (!intel_miptree_blit(brw,
mt, level, slice,
map->x, map->y, false,
- map->mt, 0, 0,
+ map->linear_mt, 0, 0,
0, 0, false,
map->w, map->h, GL_COPY)) {
fprintf(stderr, "Failed to blit\n");
}
}
- map->ptr = intel_miptree_map_raw(brw, map->mt);
+ map->ptr = intel_miptree_map_raw(brw, map->linear_mt);
DBG("%s: %d,%d %dx%d from mt %p (%s) %d,%d = %p/%d\n", __func__,
map->x, map->y, map->w, map->h,
return;
fail:
- intel_miptree_release(&map->mt);
+ intel_miptree_release(&map->linear_mt);
map->ptr = NULL;
map->stride = 0;
}
{
struct gl_context *ctx = &brw->ctx;
- intel_miptree_unmap_raw(map->mt);
+ intel_miptree_unmap_raw(map->linear_mt);
if (map->mode & GL_MAP_WRITE_BIT) {
bool ok = intel_miptree_blit(brw,
- map->mt, 0, 0,
+ map->linear_mt, 0, 0,
0, 0, false,
mt, level, slice,
map->x, map->y, false,
WARN_ONCE(!ok, "Failed to blit from linear temporary mapping");
}
- intel_miptree_release(&map->mt);
+ intel_miptree_release(&map->linear_mt);
}
/**
intel_miptree_unmap_etc(brw, mt, map, level, slice);
} else if (mt->stencil_mt && !(map->mode & BRW_MAP_DIRECT_BIT)) {
intel_miptree_unmap_depthstencil(brw, mt, map, level, slice);
- } else if (map->mt) {
+ } else if (map->linear_mt) {
intel_miptree_unmap_blit(brw, mt, map, level, slice);
#if defined(USE_SSE41)
} else if (map->buffer && cpu_has_sse4_1) {
struct intel_texture_image;
/**
+ * This bit extends the set of GL_MAP_*_BIT enums.
+ *
* When calling intel_miptree_map() on an ETC-transcoded-to-RGB miptree or a
* depthstencil-split-to-separate-stencil miptree, we'll normally make a
- * tmeporary and recreate the kind of data requested by Mesa core, since we're
+ * temporary and recreate the kind of data requested by Mesa core, since we're
* satisfying some glGetTexImage() request or something.
*
* However, occasionally you want to actually map the miptree's current data
#define BRW_MAP_DIRECT_BIT 0x80000000
struct intel_miptree_map {
- /** Bitfield of GL_MAP_READ_BIT, GL_MAP_WRITE_BIT, GL_MAP_INVALIDATE_BIT */
+ /** Bitfield of GL_MAP_*_BIT and BRW_MAP_*_BIT. */
GLbitfield mode;
/** Region of interest for the map. */
int x, y, w, h;
/** Possibly malloced temporary buffer for the mapping. */
void *buffer;
/** Possible pointer to a temporary linear miptree for the mapping. */
- struct intel_mipmap_tree *mt;
+ struct intel_mipmap_tree *linear_mt;
/** Pointer to the start of (map_x, map_y) returned by the mapping. */
void *ptr;
/** Stride of the mapping. */
* \code
* x = mt->level[l].slice[s].x_offset
* y = mt->level[l].slice[s].y_offset
+ *
+ * On some hardware generations, we program these offsets into
+ * RENDER_SURFACE_STATE.XOffset and RENDER_SURFACE_STATE.YOffset.
*/
GLuint x_offset;
GLuint y_offset;
* accommodated by scaling up the width and the height of the surface so
* that all the samples corresponding to a pixel are located at nearby
* memory locations.
+ *
+ * @see PRM section "Interleaved Multisampled Surfaces"
*/
INTEL_MSAA_LAYOUT_IMS,
/**
* Uncompressed Multisample Surface. The surface is stored as a 2D array,
* with array slice n containing all pixel data for sample n.
+ *
+ * @see PRM section "Uncompressed Multisampled Surfaces"
*/
INTEL_MSAA_LAYOUT_UMS,
* the common case (where all samples constituting a pixel have the same
* color value) to be stored efficiently by just using a single array
* slice.
+ *
+ * @see PRM section "Compressed Multisampled Surfaces"
*/
INTEL_MSAA_LAYOUT_CMS,
};
*/
struct intel_miptree_aux_buffer
{
- /** Buffer object containing the pixel data. */
+ /**
+ * Buffer object containing the pixel data.
+ *
+ * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
+ * @see 3DSTATE_HIER_DEPTH_BUFFER.AuxiliarySurfaceBaseAddress
+ */
drm_intel_bo *bo;
- uint32_t pitch; /**< pitch in bytes. */
+ /**
+ * Pitch in bytes.
+ *
+ * @see RENDER_SURFACE_STATE.AuxiliarySurfacePitch
+ * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfacePitch
+ */
+ uint32_t pitch;
- uint32_t qpitch; /**< The distance in rows between array slices. */
+ /**
+ * The distance in rows between array slices.
+ *
+ * @see RENDER_SURFACE_STATE.AuxiliarySurfaceQPitch
+ * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch
+ */
+ uint32_t qpitch;
- struct intel_mipmap_tree *mt; /**< hiz miptree used with Gen6 */
+ /**
+ * Hiz miptree. Used only by Gen6.
+ */
+ struct intel_mipmap_tree *mt;
};
/* Tile resource modes */
struct intel_mipmap_tree
{
- /** Buffer object containing the pixel data. */
+ /**
+ * Buffer object containing the surface.
+ *
+ * @see intel_mipmap_tree::offset
+ * @see RENDER_SURFACE_STATE.SurfaceBaseAddress
+ * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
+ * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
+ * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
+ * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
+ */
drm_intel_bo *bo;
- uint32_t pitch; /**< pitch in bytes. */
+ /**
+ * Pitch in bytes.
+ *
+ * @see RENDER_SURFACE_STATE.SurfacePitch
+ * @see RENDER_SURFACE_STATE.AuxiliarySurfacePitch
+ * @see 3DSTATE_DEPTH_BUFFER.SurfacePitch
+ * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfacePitch
+ * @see 3DSTATE_STENCIL_BUFFER.SurfacePitch
+ */
+ uint32_t pitch;
+
+ /**
+ * One of the I915_TILING_* flags.
+ *
+ * @see RENDER_SURFACE_STATE.TileMode
+ * @see 3DSTATE_DEPTH_BUFFER.TileMode
+ */
+ uint32_t tiling;
- uint32_t tiling; /**< One of the I915_TILING_* flags */
+ /**
+ * @see RENDER_SURFACE_STATE.TiledResourceMode
+ * @see 3DSTATE_DEPTH_BUFFER.TiledResourceMode
+ */
enum intel_miptree_tr_mode tr_mode;
- /* Effectively the key:
+ /**
+ * @brief One of GL_TEXTURE_2D, GL_TEXTURE_2D_ARRAY, etc.
+ *
+ * @see RENDER_SURFACE_STATE.SurfaceType
+ * @see RENDER_SURFACE_STATE.SurfaceArray
+ * @see 3DSTATE_DEPTH_BUFFER.SurfaceType
*/
GLenum target;
*
* For ETC1/ETC2 textures, this is one of the uncompressed mesa texture
* formats if the hardware lacks support for ETC1/ETC2. See @ref etc_format.
+ *
+ * @see RENDER_SURFACE_STATE.SurfaceFormat
+ * @see 3DSTATE_DEPTH_BUFFER.SurfaceFormat
*/
mesa_format format;
- /** This variable stores the value of ETC compressed texture format */
+ /**
+ * This variable stores the value of ETC compressed texture format
+ *
+ * @see RENDER_SURFACE_STATE.SurfaceFormat
+ */
mesa_format etc_format;
/**
- * The X offset of each image in the miptree must be aligned to this.
- * See the comments in brw_tex_layout.c.
+ * @name Surface Alignment
+ * @{
+ *
+ * This defines the alignment of the upperleft pixel of each "slice" in the
+ * surface. The alignment is in pixel coordinates relative to the surface's
+ * most upperleft pixel, which is the pixel at (x=0, y=0, layer=0,
+ * level=0).
+ *
+ * The hardware docs do not use the term "slice". We use "slice" to mean
+ * the pixels at a given miplevel and layer. For 2D surfaces, the layer is
+ * the array slice; for 3D surfaces, the layer is the z offset.
+ *
+ * In the surface layout equations found in the hardware docs, the
+ * horizontal and vertical surface alignments often appear as variables 'i'
+ * and 'j'.
*/
- unsigned int align_w;
- unsigned int align_h; /**< \see align_w */
+
+ /** @see RENDER_SURFACE_STATE.SurfaceHorizontalAlignment */
+ uint32_t halign;
+
+ /** @see RENDER_SURFACE_STATE.SurfaceVerticalAlignment */
+ uint32_t valign;
+ /** @} */
GLuint first_level;
GLuint last_level;
*/
GLuint physical_width0, physical_height0, physical_depth0;
- GLuint cpp; /**< bytes per pixel (or bytes per block if compressed) */
+ /** Bytes per pixel (or bytes per block if compressed) */
+ GLuint cpp;
+
+ /**
+ * @see RENDER_SURFACE_STATE.NumberOfMultisamples
+ * @see 3DSTATE_MULTISAMPLE.NumberOfMultisamples
+ */
GLuint num_samples;
+
bool compressed;
/**
- * Level zero image dimensions. These dimensions correspond to the
+ * @name Level zero image dimensions
+ * @{
+ *
+ * These dimensions correspond to the
* logical width, height, and depth of the texture as seen by client code.
* Accordingly, they do not account for the extra width, height, and/or
* depth that must be allocated in order to accommodate multisample
* formats, nor do they account for the extra factor of 6 in depth that
* must be allocated in order to accommodate cubemap textures.
*/
- uint32_t logical_width0, logical_height0, logical_depth0;
+
+ /**
+ * @see RENDER_SURFACE_STATE.Width
+ * @see 3DSTATE_DEPTH_BUFFER.Width
+ */
+ uint32_t logical_width0;
+
+ /**
+ * @see RENDER_SURFACE_STATE.Height
+ * @see 3DSTATE_DEPTH_BUFFER.Height
+ */
+ uint32_t logical_height0;
+
+ /**
+ * @see RENDER_SURFACE_STATE.Depth
+ * @see 3DSTATE_DEPTH_BUFFER.Depth
+ */
+ uint32_t logical_depth0;
+ /** @} */
/**
* Indicates if we use the standard miptree layout (ALL_LOD_IN_EACH_SLICE),
* surfaces it is the number of blocks. For 1D array surfaces that have the
* mipmap tree stored horizontally it is the number of pixels between each
* slice.
+ *
+ * @see RENDER_SURFACE_STATE.SurfaceQPitch
+ * @see 3DSTATE_DEPTH_BUFFER.SurfaceQPitch
+ * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceQPitch
+ * @see 3DSTATE_STENCIL_BUFFER.SurfaceQPitch
*/
uint32_t qpitch;
/**
* MSAA layout used by this buffer.
+ *
+ * @see RENDER_SURFACE_STATE.MultisampledSurfaceStorageFormat
*/
enum intel_msaa_layout msaa_layout;
GLuint total_width;
GLuint total_height;
- /* The 3DSTATE_CLEAR_PARAMS value associated with the last depth clear to
- * this depth mipmap tree, if any.
+ /**
+ * The depth value used during the most recent fast depth clear performed
+ * on the surface. This field is invalid only if surface has never
+ * underwent a fast depth clear.
+ *
+ * @see 3DSTATE_CLEAR_PARAMS.DepthClearValue
*/
uint32_t depth_clear_value;
- /* Includes image offset tables:
- */
+ /* Includes image offset tables: */
struct intel_mipmap_level level[MAX_TEXTURE_LEVELS];
- /* Offset into bo where miptree starts:
+ /**
+ * Offset into bo where the surface starts.
+ *
+ * @see intel_mipmap_tree::bo
+ *
+ * @see RENDER_SURFACE_STATE.AuxiliarySurfaceBaseAddress
+ * @see 3DSTATE_DEPTH_BUFFER.SurfaceBaseAddress
+ * @see 3DSTATE_HIER_DEPTH_BUFFER.SurfaceBaseAddress
+ * @see 3DSTATE_STENCIL_BUFFER.SurfaceBaseAddress
*/
uint32_t offset;
/**
* \brief HiZ aux buffer
*
- * The hiz miptree contains the miptree's hiz buffer. To allocate the hiz
- * buffer, use intel_miptree_alloc_hiz().
+ * To allocate the hiz buffer, use intel_miptree_alloc_hiz().
*
* To determine if hiz is enabled, do not check this pointer. Instead, use
* intel_miptree_slice_has_hiz().
* require separate stencil. It always has the true copy of the stencil
* bits, regardless of mt->format.
*
+ * \see 3DSTATE_STENCIL_BUFFER
* \see intel_miptree_map_depthstencil()
* \see intel_miptree_unmap_depthstencil()
*/
*
* This value will only ever contain ones in bits 28-31, so it is safe to
* OR into dword 7 of SURFACE_STATE.
+ *
+ * @see RENDER_SURFACE_STATE.RedClearColor
+ * @see RENDER_SURFACE_STATE.GreenClearColor
+ * @see RENDER_SURFACE_STATE.BlueClearColor
+ * @see RENDER_SURFACE_STATE.AlphaClearColor
*/
uint32_t fast_clear_color_value;
public:
cmod_propagation_fs_visitor(struct brw_compiler *compiler,
struct brw_wm_prog_data *prog_data,
- struct gl_shader_program *shader_prog)
- : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
- &prog_data->base, shader_prog,
- (struct gl_program *) NULL, 8, -1) {}
+ nir_shader *shader)
+ : fs_visitor(compiler, NULL, NULL, NULL,
+ &prog_data->base, (struct gl_program *) NULL,
+ shader, 8, -1) {}
};
fp = ralloc(NULL, struct brw_fragment_program);
prog_data = ralloc(NULL, struct brw_wm_prog_data);
- shader_prog = ralloc(NULL, struct gl_shader_program);
+ nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
- v = new cmod_propagation_fs_visitor(compiler, prog_data, shader_prog);
+ v = new cmod_propagation_fs_visitor(compiler, prog_data, shader);
_mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
public:
saturate_propagation_fs_visitor(struct brw_compiler *compiler,
struct brw_wm_prog_data *prog_data,
- struct gl_shader_program *shader_prog)
- : fs_visitor(compiler, NULL, NULL, MESA_SHADER_FRAGMENT, NULL,
- &prog_data->base, shader_prog,
- (struct gl_program *) NULL, 8, -1) {}
+ nir_shader *shader)
+ : fs_visitor(compiler, NULL, NULL, NULL,
+ &prog_data->base, (struct gl_program *) NULL,
+ shader, 8, -1) {}
};
fp = ralloc(NULL, struct brw_fragment_program);
prog_data = ralloc(NULL, struct brw_wm_prog_data);
- shader_prog = ralloc(NULL, struct gl_shader_program);
+ nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL);
- v = new saturate_propagation_fs_visitor(compiler, prog_data, shader_prog);
+ v = new saturate_propagation_fs_visitor(compiler, prog_data, shader);
_mesa_init_fragment_program(ctx, &fp->program, GL_FRAGMENT_SHADER, 0);
{
public:
copy_propagation_vec4_visitor(struct brw_compiler *compiler,
- struct gl_shader_program *shader_prog)
- : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
- MESA_SHADER_VERTEX, NULL,
+ nir_shader *shader)
+ : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
false /* no_spills */, -1)
{
}
unreachable("Not reached");
}
- virtual void emit_program_code()
- {
- unreachable("Not reached");
- }
-
virtual void emit_thread_end()
{
unreachable("Not reached");
vp = ralloc(NULL, struct brw_vertex_program);
- shader_prog = ralloc(NULL, struct gl_shader_program);
+ nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
- v = new copy_propagation_vec4_visitor(compiler, shader_prog);
+ v = new copy_propagation_vec4_visitor(compiler, shader);
_mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
{
public:
register_coalesce_vec4_visitor(struct brw_compiler *compiler,
- struct gl_shader_program *shader_prog)
- : vec4_visitor(compiler, NULL, NULL, NULL, NULL, shader_prog,
- MESA_SHADER_VERTEX, NULL,
+ nir_shader *shader)
+ : vec4_visitor(compiler, NULL, NULL, NULL, shader, NULL,
false /* no_spills */, -1)
{
}
unreachable("Not reached");
}
- virtual void emit_program_code()
- {
- unreachable("Not reached");
- }
-
virtual void emit_thread_end()
{
unreachable("Not reached");
vp = ralloc(NULL, struct brw_vertex_program);
- shader_prog = ralloc(NULL, struct gl_shader_program);
+ nir_shader *shader = nir_shader_create(NULL, MESA_SHADER_VERTEX, NULL);
- v = new register_coalesce_vec4_visitor(compiler, shader_prog);
+ v = new register_coalesce_vec4_visitor(compiler, shader);
_mesa_init_vertex_program(ctx, &vp->program, GL_VERTEX_SHADER, 0);
GET_CURRENT_CONTEXT(ctx);
if (MESA_VERBOSE & VERBOSE_API)
- _mesa_debug(ctx, "glDispatchComputeIndirect(%d)\n", indirect);
+ _mesa_debug(ctx, "glDispatchComputeIndirect(%ld)\n", (long) indirect);
if (!_mesa_validate_DispatchComputeIndirect(ctx, indirect))
return;
*
**************************************************************************/
-#include "glheader.h"
-#include "imports.h"
-#include "mtypes.h"
+#include "main/glheader.h"
#include "main/context.h"
+#include "main/imports.h"
#include "main/macros.h"
#include "main/samplerobj.h"
#include "main/texenvprogram.h"
#include "main/texobj.h"
#include "main/uniforms.h"
+#include "glsl/ir_builder.h"
+#include "glsl/ir_optimization.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/glsl_symbol_table.h"
+#include "glsl/glsl_types.h"
+#include "program/ir_to_mesa.h"
#include "program/program.h"
-#include "program/prog_parameter.h"
+#include "program/programopt.h"
#include "program/prog_cache.h"
#include "program/prog_instruction.h"
+#include "program/prog_parameter.h"
#include "program/prog_print.h"
#include "program/prog_statevars.h"
-#include "program/programopt.h"
-#include "../glsl/glsl_types.h"
-#include "../glsl/ir.h"
-#include "../glsl/ir_builder.h"
-#include "../glsl/glsl_symbol_table.h"
-#include "../glsl/glsl_parser_extras.h"
-#include "../glsl/ir_optimization.h"
-#include "../program/ir_to_mesa.h"
using namespace ir_builder;
GLuint file:4;
GLint idx:9; /* relative addressing may be negative */
/* sizeof(idx) should == sizeof(prog_src_reg::Index) */
+ GLuint abs:1;
GLuint negate:1;
GLuint swz:12;
- GLuint pad:6;
+ GLuint pad:5;
};
0,
0,
0,
+ 0,
0
};
struct ureg reg;
reg.file = file;
reg.idx = idx;
+ reg.abs = 0;
reg.negate = 0;
reg.swz = SWIZZLE_NOOP;
reg.pad = 0;
+static struct ureg absolute( struct ureg reg )
+{
+ reg.abs = 1;
+ reg.negate = 0;
+ return reg;
+}
+
+
static struct ureg negate( struct ureg reg )
{
reg.negate ^= 1;
src->File = reg.file;
src->Index = reg.idx;
src->Swizzle = reg.swz;
+ src->Abs = reg.abs;
src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
- src->Abs = 0;
src->RelAddr = 0;
/* Check that bitfield sizes aren't exceeded */
assert(src->Index == reg.idx);
emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
- emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
+ emit_op2(p, OPCODE_POW, spot, 0, absolute(spot), swizzle1(attenuation, W));
emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
release_temp(p, spot);
#define snprintf _snprintf
#endif
+#if defined(_WIN32) && !defined(strtok_r)
+#define strtok_r strtok_s
+#endif
#ifdef __cplusplus
}
* GL_ARB_separate_shader_objects extension.
*/
+#include <stdbool.h>
#include "main/glheader.h"
#include "main/context.h"
#include "main/dispatch.h"
#include "main/shaderobj.h"
#include "main/transformfeedback.h"
#include "main/uniforms.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/ir_uniform.h"
#include "program/program.h"
#include "program/prog_parameter.h"
#include "util/ralloc.h"
-#include <stdbool.h>
-#include "../glsl/glsl_parser_extras.h"
-#include "../glsl/ir_uniform.h"
/**
* Delete a pipeline object.
return GL_INVALID_INDEX;
switch (res->Type) {
- case GL_UNIFORM_BLOCK:
- case GL_SHADER_STORAGE_BLOCK:
- return RESOURCE_UBO(res)- shProg->UniformBlocks;
case GL_ATOMIC_COUNTER_BUFFER:
return RESOURCE_ATC(res) - shProg->AtomicBuffers;
+ case GL_UNIFORM_BLOCK:
+ case GL_SHADER_STORAGE_BLOCK:
case GL_TRANSFORM_FEEDBACK_VARYING:
default:
return calc_resource_index(shProg, res);
struct gl_texture_object *
_mesa_create_nameless_texture(struct gl_context *ctx, GLenum target)
{
- struct gl_texture_object *texObj = NULL;
- GLint targetIndex;
+ struct gl_texture_object *texObj = NULL;
+ GLint targetIndex;
- if (target == 0)
- return texObj;
+ if (target == 0)
+ return texObj;
- texObj = ctx->Driver.NewTextureObject(ctx, 0, target);
- targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
- assert(targetIndex < NUM_TEXTURE_TARGETS);
- texObj->TargetIndex = targetIndex;
+ texObj = ctx->Driver.NewTextureObject(ctx, 0, target);
+ targetIndex = _mesa_tex_target_to_index(ctx, texObj->Target);
+ assert(targetIndex < NUM_TEXTURE_TARGETS);
+ texObj->TargetIndex = targetIndex;
- return texObj;
+ return texObj;
}
/**
/**
- * Bind a named texture to a texturing target.
+ * Do actual texture binding. All error checking should have been done prior
+ * to calling this function. Note that the texture target (1D, 2D, etc) is
+ * always specified by the texObj->TargetIndex.
+ *
+ * \param unit index of texture unit to update
+ * \param texObj the new texture object (cannot be NULL)
+ */
+static void
+bind_texture(struct gl_context *ctx,
+ unsigned unit,
+ struct gl_texture_object *texObj)
+{
+ struct gl_texture_unit *texUnit;
+ int targetIndex;
+
+ assert(unit < ARRAY_SIZE(ctx->Texture.Unit));
+ texUnit = &ctx->Texture.Unit[unit];
+
+ assert(texObj);
+ assert(valid_texture_object(texObj));
+
+ targetIndex = texObj->TargetIndex;
+ assert(targetIndex >= 0);
+ assert(targetIndex < NUM_TEXTURE_TARGETS);
+
+ /* Check if this texture is only used by this context and is already bound.
+ * If so, just return.
+ */
+ {
+ bool early_out;
+ mtx_lock(&ctx->Shared->Mutex);
+ early_out = ((ctx->Shared->RefCount == 1)
+ && (texObj == texUnit->CurrentTex[targetIndex]));
+ mtx_unlock(&ctx->Shared->Mutex);
+ if (early_out) {
+ return;
+ }
+ }
+
+ /* flush before changing binding */
+ FLUSH_VERTICES(ctx, _NEW_TEXTURE);
+
+ /* If the refcount on the previously bound texture is decremented to
+ * zero, it'll be deleted here.
+ */
+ _mesa_reference_texobj(&texUnit->CurrentTex[targetIndex], texObj);
+
+ ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
+ unit + 1);
+
+ if (texObj->Name != 0)
+ texUnit->_BoundTextures |= (1 << targetIndex);
+ else
+ texUnit->_BoundTextures &= ~(1 << targetIndex);
+
+ /* Pass BindTexture call to device driver */
+ if (ctx->Driver.BindTexture) {
+ ctx->Driver.BindTexture(ctx, unit, texObj->Target, texObj);
+ }
+}
+
+
+/**
+ * Implement glBindTexture(). Do error checking, look-up or create a new
+ * texture object, then bind it in the current texture unit.
*
* \param target texture target.
* \param texName texture name.
- *
- * \sa glBindTexture().
- *
- * Determines the old texture object bound and returns immediately if rebinding
- * the same texture. Get the current texture which is either a default texture
- * if name is null, a named texture from the hash, or a new texture if the
- * given texture name is new. Increments its reference count, binds it, and
- * calls dd_function_table::BindTexture. Decrements the old texture reference
- * count and deletes it if it reaches zero.
*/
void GLAPIENTRY
_mesa_BindTexture( GLenum target, GLuint texName )
{
GET_CURRENT_CONTEXT(ctx);
- struct gl_texture_unit *texUnit = _mesa_get_current_tex_unit(ctx);
struct gl_texture_object *newTexObj = NULL;
GLint targetIndex;
newTexObj->TargetIndex = targetIndex;
}
- assert(valid_texture_object(newTexObj));
-
- /* Check if this texture is only used by this context and is already bound.
- * If so, just return.
- */
- {
- GLboolean early_out;
- mtx_lock(&ctx->Shared->Mutex);
- early_out = ((ctx->Shared->RefCount == 1)
- && (newTexObj == texUnit->CurrentTex[targetIndex]));
- mtx_unlock(&ctx->Shared->Mutex);
- if (early_out) {
- return;
- }
- }
-
- /* flush before changing binding */
- FLUSH_VERTICES(ctx, _NEW_TEXTURE);
-
- /* Do the actual binding. The refcount on the previously bound
- * texture object will be decremented. It'll be deleted if the
- * count hits zero.
- */
- _mesa_reference_texobj(&texUnit->CurrentTex[targetIndex], newTexObj);
- ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
- ctx->Texture.CurrentUnit + 1);
- assert(texUnit->CurrentTex[targetIndex]);
-
- if (texName != 0)
- texUnit->_BoundTextures |= (1 << targetIndex);
- else
- texUnit->_BoundTextures &= ~(1 << targetIndex);
-
- /* Pass BindTexture call to device driver */
- if (ctx->Driver.BindTexture)
- ctx->Driver.BindTexture(ctx, ctx->Texture.CurrentUnit, target, newTexObj);
+ bind_texture(ctx, ctx->Texture.CurrentUnit, newTexObj);
}
-/**
- * Do the actual binding to a numbered texture unit.
- * The refcount on the previously bound
- * texture object will be decremented. It'll be deleted if the
- * count hits zero.
- */
-static void
-bind_texture_unit(struct gl_context *ctx,
- GLuint unit,
- struct gl_texture_object *texObj)
-{
- struct gl_texture_unit *texUnit;
-
- /* Get the texture unit (this is an array look-up) */
- texUnit = _mesa_get_tex_unit_err(ctx, unit, "glBindTextureUnit");
- if (!texUnit)
- return;
-
- /* Check if this texture is only used by this context and is already bound.
- * If so, just return.
- */
- {
- bool early_out;
- mtx_lock(&ctx->Shared->Mutex);
- early_out = ((ctx->Shared->RefCount == 1)
- && (texObj == texUnit->CurrentTex[texObj->TargetIndex]));
- mtx_unlock(&ctx->Shared->Mutex);
- if (early_out) {
- return;
- }
- }
-
- /* flush before changing binding */
- FLUSH_VERTICES(ctx, _NEW_TEXTURE);
-
- _mesa_reference_texobj(&texUnit->CurrentTex[texObj->TargetIndex],
- texObj);
- assert(texUnit->CurrentTex[texObj->TargetIndex]);
- ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
- unit + 1);
- texUnit->_BoundTextures |= (1 << texObj->TargetIndex);
-
-
- /* Pass BindTexture call to device driver */
- if (ctx->Driver.BindTexture) {
- ctx->Driver.BindTexture(ctx, unit, texObj->Target, texObj);
- }
-}
/**
- * Bind a named texture to the specified texture unit.
+ * OpenGL 4.5 / GL_ARB_direct_state_access glBindTextureUnit().
*
* \param unit texture unit.
* \param texture texture name.
{
GET_CURRENT_CONTEXT(ctx);
struct gl_texture_object *texObj;
+ struct gl_texture_unit *texUnit;
+
+ if (unit >= _mesa_max_tex_unit(ctx)) {
+ _mesa_error(ctx, GL_INVALID_VALUE, "glBindTextureUnit(unit=%u)", unit);
+ return;
+ }
+
+ texUnit = _mesa_get_tex_unit(ctx, unit);
+ assert(texUnit);
+ if (!texUnit) {
+ return;
+ }
if (MESA_VERBOSE & (VERBOSE_API|VERBOSE_TEXTURE))
_mesa_debug(ctx, "glBindTextureUnit %s %d\n",
return;
}
if (texObj->Target == 0) {
- _mesa_error(ctx, GL_INVALID_ENUM, "glBindTextureUnit(target)");
+ /* Texture object was gen'd but never bound so the target is not set */
+ _mesa_error(ctx, GL_INVALID_OPERATION, "glBindTextureUnit(target)");
return;
}
assert(valid_texture_object(texObj));
- bind_texture_unit(ctx, unit, texObj);
+ bind_texture(ctx, unit, texObj);
}
+/**
+ * OpenGL 4.4 / GL_ARB_multi_bind glBindTextures().
+ */
void GLAPIENTRY
_mesa_BindTextures(GLuint first, GLsizei count, const GLuint *textures)
{
return;
}
- /* Flush before changing bindings */
- FLUSH_VERTICES(ctx, 0);
-
- ctx->Texture.NumCurrentTexUsed = MAX2(ctx->Texture.NumCurrentTexUsed,
- first + count);
-
if (textures) {
/* Note that the error semantics for multi-bind commands differ from
* those of other GL commands.
texObj = _mesa_lookup_texture_locked(ctx, textures[i]);
if (texObj && texObj->Target != 0) {
- const gl_texture_index targetIndex = texObj->TargetIndex;
-
- if (texUnit->CurrentTex[targetIndex] != texObj) {
- /* Do the actual binding. The refcount on the previously
- * bound texture object will be decremented. It will be
- * deleted if the count hits zero.
- */
- _mesa_reference_texobj(&texUnit->CurrentTex[targetIndex],
- texObj);
-
- texUnit->_BoundTextures |= (1 << targetIndex);
- ctx->NewState |= _NEW_TEXTURE;
-
- /* Pass the BindTexture call to the device driver */
- if (ctx->Driver.BindTexture)
- ctx->Driver.BindTexture(ctx, first + i,
- texObj->Target, texObj);
- }
+ bind_texture(ctx, first + i, texObj);
} else {
/* The ARB_multi_bind spec says:
*
ctx->Const.MaxTextureCoordUnits);
}
-static inline struct gl_texture_unit *
-_mesa_get_tex_unit_err(struct gl_context *ctx, GLuint unit, const char *func)
-{
- if (unit < _mesa_max_tex_unit(ctx))
- return _mesa_get_tex_unit(ctx, unit);
-
- /* Note: This error is a precedent set by glBindTextures. From the GL 4.5
- * specification (30.10.2014) Section 8.1 ("Texture Objects"):
- *
- * "An INVALID_OPERATION error is generated if first + count is greater
- * than the number of texture image units supported by the
- * implementation."
- */
- _mesa_error(ctx, GL_INVALID_OPERATION, "%s(unit=%s)", func,
- _mesa_enum_to_string(GL_TEXTURE0+unit));
- return NULL;
-}
-
extern void
_mesa_copy_texture_state( const struct gl_context *src, struct gl_context *dst );
#include "main/core.h"
#include "main/context.h"
-#include "ir.h"
-#include "ir_uniform.h"
-#include "program/hash_table.h"
-#include "../glsl/program.h"
-#include "../glsl/ir_uniform.h"
-#include "../glsl/glsl_parser_extras.h"
#include "main/shaderapi.h"
#include "main/shaderobj.h"
-#include "uniforms.h"
+#include "main/uniforms.h"
+#include "glsl/ir.h"
+#include "glsl/ir_uniform.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/program.h"
+#include "program/hash_table.h"
extern "C" void GLAPIENTRY
#ifndef UNIFORMS_H
#define UNIFORMS_H
-#include "glheader.h"
+#include "main/glheader.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir_uniform.h"
#include "program/prog_parameter.h"
-#include "../glsl/glsl_types.h"
-#include "../glsl/ir_uniform.h"
#ifdef __cplusplus
extern "C" {
wrapper->closure = closure;
hash_table_call_foreach(this->ht, subtract_one_wrapper, wrapper);
+ free(wrapper);
}
/**
#include <stdio.h>
#include "main/compiler.h"
-#include "ir.h"
-#include "ir_visitor.h"
-#include "ir_expression_flattening.h"
-#include "ir_uniform.h"
-#include "glsl_types.h"
-#include "glsl_parser_extras.h"
-#include "../glsl/program.h"
-#include "ir_optimization.h"
-#include "ast.h"
-#include "linker.h"
-
#include "main/mtypes.h"
#include "main/shaderapi.h"
#include "main/shaderobj.h"
#include "main/uniforms.h"
-
+#include "glsl/ast.h"
+#include "glsl/ir.h"
+#include "glsl/ir_expression_flattening.h"
+#include "glsl/ir_visitor.h"
+#include "glsl/ir_optimization.h"
+#include "glsl/ir_uniform.h"
+#include "glsl/glsl_parser_extras.h"
+#include "glsl/glsl_types.h"
+#include "glsl/linker.h"
+#include "glsl/program.h"
#include "program/hash_table.h"
#include "program/prog_instruction.h"
#include "program/prog_optimize.h"
ptn_add_output_stores(c);
+ s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
+ s->info.num_textures = _mesa_fls(prog->SamplersUsed);
+ s->info.num_ubos = 0;
+ s->info.num_abos = 0;
+ s->info.num_ssbos = 0;
+ s->info.num_images = 0;
+ s->info.inputs_read = prog->InputsRead;
+ s->info.outputs_written = prog->OutputsWritten;
+ s->info.system_values_read = prog->SystemValuesRead;
+ s->info.uses_texture_gather = false;
+ s->info.uses_clip_distance_out = false;
+ s->info.separate_shader = false;
+
fail:
if (c->error) {
ralloc_free(s);
* DEALINGS IN THE SOFTWARE.
*/
-#include "ir.h"
-#include "glsl_types.h"
-#include "ir_visitor.h"
-#include "../glsl/program.h"
-#include "ir_uniform.h"
-
#include "main/mtypes.h"
+#include "glsl/glsl_types.h"
+#include "glsl/ir.h"
+#include "glsl/ir_uniform.h"
+#include "glsl/ir_visitor.h"
+#include "glsl/program.h"
#include "program/hash_table.h"
#include "program/prog_parameter.h"
#include "program/program.h"
#include "main/macros.h"
#include "main/varray.h"
+#include "glsl/ir_uniform.h"
+
#include "vbo/vbo.h"
#include "st_context.h"
#include "draw/draw_context.h"
#include "cso_cache/cso_context.h"
-#include "../glsl/ir_uniform.h"
-
/**
* This is very similar to vbo_all_varyings_in_vbos() but we are
if (unlikely(str == NULL))
return NULL;
- n = strlen(str);
- if (n > max)
- n = max;
-
+ n = strnlen(str, max);
ptr = ralloc_array(ctx, char, n + 1);
memcpy(ptr, str, n);
ptr[n] = '\0';
if (!str)
return NULL;
- n = strlen(str);
- if (n > max)
- n = max;
-
+ n = strnlen(str, max);
ptr = (char *) calloc(n + 1, sizeof(char));
if (!ptr)
return NULL;
prog->OutputsWritten |= BITFIELD64_BIT(var->data.location);
}
+ shader->info.inputs_read = prog->InputsRead;
+ shader->info.outputs_written = prog->OutputsWritten;
+
mesa_shader->num_uniform_components = shader->num_uniforms;
}