unsigned explicit_stream:1; /**< stream value assigned explicitly by shader code */
/** \} */
+ /** \name Vulkan qualifiers */
+ unsigned vk_set:1;
+
+ /** \name Layout qualifiers for GL_ARB_tessellation_shader */
+ /** \{ */
+ /* tess eval input layout */
+ /* gs prim_type reused for primitive mode */
+ unsigned vertex_spacing:1;
+ unsigned ordering:1;
+ unsigned point_mode:1;
+ /* tess control output layout */
+ unsigned vertices:1;
+ /** \} */
+
+ /** \name Qualifiers for GL_ARB_shader_subroutine */
+ /** \{ */
+ unsigned subroutine:1; /**< Is this marked 'subroutine' */
+ unsigned subroutine_def:1; /**< Is this marked 'subroutine' with a list of types */
+ /** \} */
}
/** \brief Set of flags, accessed by name. */
q;
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_ATOMIC_UINT:
+ case GLSL_TYPE_SUBROUTINE:
/* I assume a comparison of a struct containing a sampler just
* ignores the sampler present in the type.
*/
hash_table *glsl_type::array_types = NULL;
hash_table *glsl_type::record_types = NULL;
hash_table *glsl_type::interface_types = NULL;
+hash_table *glsl_type::function_types = NULL;
+ hash_table *glsl_type::subroutine_types = NULL;
void *glsl_type::mem_ctx = NULL;
void
mtx_unlock(&glsl_type::mutex);
}
+glsl_type::glsl_type(const glsl_type *return_type,
+ const glsl_function_param *params, unsigned num_params) :
+ gl_type(0),
+ base_type(GLSL_TYPE_FUNCTION),
+ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ sampler_type(0), interface_packing(0),
+ vector_elements(0), matrix_columns(0),
+ length(num_params)
+{
+ unsigned int i;
+
+ mtx_lock(&glsl_type::mutex);
+
+ init_ralloc_type_ctx();
+
+ this->fields.parameters = rzalloc_array(this->mem_ctx,
+ glsl_function_param, num_params + 1);
+
+ /* We store the return type as the first parameter */
+ this->fields.parameters[0].type = return_type;
+ this->fields.parameters[0].in = false;
+ this->fields.parameters[0].out = true;
+
+ /* We store the i'th parameter in slot i+1 */
+ for (i = 0; i < length; i++) {
+ this->fields.parameters[i + 1].type = params[i].type;
+ this->fields.parameters[i + 1].in = params[i].in;
+ this->fields.parameters[i + 1].out = params[i].out;
+ }
+
+ mtx_unlock(&glsl_type::mutex);
+}
+
+ glsl_type::glsl_type(const char *subroutine_name) :
+ gl_type(0),
+ base_type(GLSL_TYPE_SUBROUTINE),
+ sampler_dimensionality(0), sampler_shadow(0), sampler_array(0),
+ sampler_type(0), interface_packing(0),
+ vector_elements(0), matrix_columns(0),
+ length(0)
+ {
+ mtx_lock(&glsl_type::mutex);
+
+ init_ralloc_type_ctx();
+ assert(subroutine_name != NULL);
+ this->name = ralloc_strdup(this->mem_ctx, subroutine_name);
+ this->vector_elements = 1;
+ mtx_unlock(&glsl_type::mutex);
+ }
bool
glsl_type::contains_sampler() const
}
- static int
++static bool
+function_key_compare(const void *a, const void *b)
+{
+ const glsl_type *const key1 = (glsl_type *) a;
+ const glsl_type *const key2 = (glsl_type *) b;
+
+ if (key1->length != key2->length)
+ return 1;
+
+ return memcmp(key1->fields.parameters, key2->fields.parameters,
+ (key1->length + 1) * sizeof(*key1->fields.parameters));
+}
+
+
- static unsigned
++static uint32_t
+function_key_hash(const void *a)
+{
+ const glsl_type *const key = (glsl_type *) a;
+ char hash_key[128];
+ unsigned size = 0;
+
+ size = snprintf(hash_key, sizeof(hash_key), "%08x", key->length);
+
+ for (unsigned i = 0; i < key->length; i++) {
+ if (size >= sizeof(hash_key))
+ break;
+
+ size += snprintf(& hash_key[size], sizeof(hash_key) - size,
+ "%p", (void *) key->fields.structure[i].type);
+ }
+
- return hash_table_string_hash(& hash_key);
++ return _mesa_hash_string(hash_key);
+}
+
+const glsl_type *
+glsl_type::get_function_instance(const glsl_type *return_type,
+ const glsl_function_param *params,
+ unsigned num_params)
+{
+ const glsl_type key(return_type, params, num_params);
+
+ mtx_lock(&glsl_type::mutex);
+
+ if (function_types == NULL) {
- function_types = hash_table_ctor(64, function_key_hash,
- function_key_compare);
++ function_types = _mesa_hash_table_create(NULL, function_key_hash,
++ function_key_compare);
+ }
+
- const glsl_type *t = (glsl_type *) hash_table_find(function_types, &key);
- if (t == NULL) {
++ struct hash_entry *entry = _mesa_hash_table_search(function_types, &key);
++ if (entry == NULL) {
+ mtx_unlock(&glsl_type::mutex);
- t = new glsl_type(return_type, params, num_params);
++ const glsl_type *t = new glsl_type(return_type, params, num_params);
+ mtx_lock(&glsl_type::mutex);
+
- hash_table_insert(function_types, (void *) t, t);
++ _mesa_hash_table_insert(function_types, t, (void *) t);
+ }
+
++ const glsl_type *t = (const glsl_type *)entry->data;
++
+ assert(t->base_type == GLSL_TYPE_FUNCTION);
+ assert(t->length == num_params);
+
+ mtx_unlock(&glsl_type::mutex);
+
+ return t;
+}
+
+
const glsl_type *
glsl_type::get_mul_type(const glsl_type *type_a, const glsl_type *type_b)
{
case GLSL_TYPE_IMAGE:
return 1;
-
+ case GLSL_TYPE_SUBROUTINE:
+ return 1;
+ case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_SAMPLER:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_VOID:
enum glsl_interface_packing packing,
const char *block_name);
+ /**
+ * Get the instance of a function type
+ */
+ static const glsl_type *get_function_instance(const struct glsl_type *return_type,
+ const glsl_function_param *parameters,
+ unsigned num_params);
+ /**
+ * Get the instance of an subroutine type
+ */
+ static const glsl_type *get_subroutine_instance(const char *subroutine_name);
/**
* Get the type resulting from a multiplication of \p type_a * \p type_b
/** Hash table containing the known interface types. */
static struct hash_table *interface_types;
- static int record_key_compare(const void *a, const void *b);
+ /** Hash table containing the known function types. */
+ static struct hash_table *function_types;
+
+ /** Hash table containing the known subroutine types. */
+ static struct hash_table *subroutine_types;
+
+ static bool record_key_compare(const void *a, const void *b);
static unsigned record_key_hash(const void *key);
/**
* streams (as in ir_variable::stream). -1 otherwise.
*/
int stream;
+
++#ifdef __cplusplus
+ glsl_struct_field(const struct glsl_type *_type, const char *_name)
+ : type(_type), name(_name), location(-1), interpolation(0), centroid(0),
+ sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0),
+ stream(-1)
+ {
+ /* empty */
+ }
+
+ glsl_struct_field()
+ {
+ /* empty */
+ }
++#endif
+};
+
+struct glsl_function_param {
+ const struct glsl_type *type;
+
+ bool in;
+ bool out;
};
static inline unsigned int
blocks[i].NumUniforms =
(unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
+ blocks[i].Set = b->set;
+ blocks[i].Binding = b->binding;
+ blocks[i].IsShaderStorage = b->is_shader_storage;
i++;
}
blocks[i].NumUniforms =
(unsigned)(ptrdiff_t)(&variables[parcel.index] - blocks[i].Uniforms);
+ blocks[i].Set = b->set;
+ blocks[i].Binding = b->binding;
+ blocks[i].IsShaderStorage = b->is_shader_storage;
i++;
}
case GLSL_TYPE_IMAGE:
case GLSL_TYPE_ATOMIC_UINT:
case GLSL_TYPE_INTERFACE:
+ case GLSL_TYPE_FUNCTION:
case GLSL_TYPE_VOID:
+ case GLSL_TYPE_SUBROUTINE:
case GLSL_TYPE_ERROR:
/* All other types should have already been filtered by other
* paths in the caller.
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ *
+ * Authors:
+ * Jason Ekstrand (jason@jlekstrand.net)
+ *
+ */
+
++#include "nir.h"
+#include "nir_spirv.h"
+#include "nir_builder.h"
+#include "spirv.h"
+
+struct vtn_builder;
+struct vtn_decoration;
+
+enum vtn_value_type {
+ vtn_value_type_invalid = 0,
+ vtn_value_type_undef,
+ vtn_value_type_string,
+ vtn_value_type_decoration_group,
+ vtn_value_type_type,
+ vtn_value_type_constant,
+ vtn_value_type_deref,
+ vtn_value_type_function,
+ vtn_value_type_block,
+ vtn_value_type_ssa,
+ vtn_value_type_extension,
+};
+
+struct vtn_block {
+ /* Merge opcode if this block contains a merge; SpvOpNop otherwise. */
+ SpvOp merge_op;
+ uint32_t merge_block_id;
+ const uint32_t *label;
+ const uint32_t *branch;
+ nir_block *block;
+};
+
+struct vtn_function {
+ struct exec_node node;
+
+ nir_function_overload *overload;
+ struct vtn_block *start_block;
+
+ const uint32_t *end;
+};
+
+typedef bool (*vtn_instruction_handler)(struct vtn_builder *, uint32_t,
+ const uint32_t *, unsigned);
+
+struct vtn_ssa_value {
+ union {
+ nir_ssa_def *def;
+ struct vtn_ssa_value **elems;
+ };
+
+ /* For matrices, a transposed version of the value, or NULL if it hasn't
+ * been computed
+ */
+ struct vtn_ssa_value *transposed;
+
+ const struct glsl_type *type;
+};
+
+struct vtn_type {
+ const struct glsl_type *type;
+
+ /* for matrices, whether the matrix is stored row-major */
+ bool row_major;
+
+ /* for structs, the offset of each member */
+ unsigned *offsets;
+
+ /* for structs, whether it was decorated as a "non-SSBO-like" block */
+ bool block;
+
+ /* for structs, whether it was decorated as an "SSBO-like" block */
+ bool buffer_block;
+
+ /* for structs with block == true, whether this is a builtin block (i.e. a
+ * block that contains only builtins).
+ */
+ bool builtin_block;
+
+ /* for arrays and matrices, the array stride */
+ unsigned stride;
+
+ /* for arrays, the vtn_type for the elements of the array */
+ struct vtn_type *array_element;
+
+ /* for structures, the vtn_type for each member */
+ struct vtn_type **members;
+
+ /* Whether this type, or a parent type, has been decorated as a builtin */
+ bool is_builtin;
+
+ SpvBuiltIn builtin;
+};
+
+struct vtn_value {
+ enum vtn_value_type value_type;
+ const char *name;
+ struct vtn_decoration *decoration;
+ union {
+ void *ptr;
+ char *str;
+ struct vtn_type *type;
+ struct {
+ nir_constant *constant;
+ const struct glsl_type *const_type;
+ };
+ struct {
+ nir_deref_var *deref;
+ struct vtn_type *deref_type;
+ };
+ struct vtn_function *func;
+ struct vtn_block *block;
+ struct vtn_ssa_value *ssa;
+ vtn_instruction_handler ext_handler;
+ };
+};
+
+struct vtn_decoration {
+ struct vtn_decoration *next;
+ int member; /* -1 if not a member decoration */
+ const uint32_t *literals;
+ struct vtn_value *group;
+ SpvDecoration decoration;
+};
+
+struct vtn_builder {
+ nir_builder nb;
+
+ nir_shader *shader;
+ nir_function_impl *impl;
+ struct vtn_block *block;
+
+ /*
+ * In SPIR-V, constants are global, whereas in NIR, the load_const
+ * instruction we use is per-function. So while we parse each function, we
+ * keep a hash table of constants we've resolved to nir_ssa_value's so
+ * far, and we lazily resolve them when we see them used in a function.
+ */
+ struct hash_table *const_table;
+
+ /*
+ * Map from nir_block to the vtn_block which ends with it -- used for
+ * handling phi nodes.
+ */
+ struct hash_table *block_table;
+
+ /*
+ * NIR variable for each SPIR-V builtin.
+ */
+ nir_variable *builtins[42]; /* XXX need symbolic constant from SPIR-V header */
+
+ unsigned value_id_bound;
+ struct vtn_value *values;
+
+ SpvExecutionModel execution_model;
+ struct vtn_value *entry_point;
+
+ struct vtn_function *func;
+ struct exec_list functions;
+};
+
+static inline struct vtn_value *
+vtn_push_value(struct vtn_builder *b, uint32_t value_id,
+ enum vtn_value_type value_type)
+{
+ assert(value_id < b->value_id_bound);
+ assert(b->values[value_id].value_type == vtn_value_type_invalid);
+
+ b->values[value_id].value_type = value_type;
+
+ return &b->values[value_id];
+}
+
+static inline struct vtn_value *
+vtn_untyped_value(struct vtn_builder *b, uint32_t value_id)
+{
+ assert(value_id < b->value_id_bound);
+ return &b->values[value_id];
+}
+
+static inline struct vtn_value *
+vtn_value(struct vtn_builder *b, uint32_t value_id,
+ enum vtn_value_type value_type)
+{
+ struct vtn_value *val = vtn_untyped_value(b, value_id);
+ assert(val->value_type == value_type);
+ return val;
+}
+
+struct vtn_ssa_value *vtn_ssa_value(struct vtn_builder *b, uint32_t value_id);
+
+typedef void (*vtn_decoration_foreach_cb)(struct vtn_builder *,
+ struct vtn_value *,
+ int member,
+ const struct vtn_decoration *,
+ void *);
+
+void vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value,
+ vtn_decoration_foreach_cb cb, void *data);
+
+bool vtn_handle_glsl450_instruction(struct vtn_builder *b, uint32_t ext_opcode,
+ const uint32_t *words, unsigned count);
};
- VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */ VARYING_SLOT_CLIP_DIST0,
+/**
+ * Indexes for vertex program attributes.
+ * GL_NV_vertex_program aliases generic attributes over the conventional
+ * attributes. In GL_ARB_vertex_program shader the aliasing is optional.
+ * In GL_ARB_vertex_shader / OpenGL 2.0 the aliasing is disallowed (the
+ * generic attributes are distinct/separate).
+ */
+typedef enum
+{
+ VERT_ATTRIB_POS = 0,
+ VERT_ATTRIB_WEIGHT = 1,
+ VERT_ATTRIB_NORMAL = 2,
+ VERT_ATTRIB_COLOR0 = 3,
+ VERT_ATTRIB_COLOR1 = 4,
+ VERT_ATTRIB_FOG = 5,
+ VERT_ATTRIB_COLOR_INDEX = 6,
+ VERT_ATTRIB_EDGEFLAG = 7,
+ VERT_ATTRIB_TEX0 = 8,
+ VERT_ATTRIB_TEX1 = 9,
+ VERT_ATTRIB_TEX2 = 10,
+ VERT_ATTRIB_TEX3 = 11,
+ VERT_ATTRIB_TEX4 = 12,
+ VERT_ATTRIB_TEX5 = 13,
+ VERT_ATTRIB_TEX6 = 14,
+ VERT_ATTRIB_TEX7 = 15,
+ VERT_ATTRIB_POINT_SIZE = 16,
+ VERT_ATTRIB_GENERIC0 = 17,
+ VERT_ATTRIB_GENERIC1 = 18,
+ VERT_ATTRIB_GENERIC2 = 19,
+ VERT_ATTRIB_GENERIC3 = 20,
+ VERT_ATTRIB_GENERIC4 = 21,
+ VERT_ATTRIB_GENERIC5 = 22,
+ VERT_ATTRIB_GENERIC6 = 23,
+ VERT_ATTRIB_GENERIC7 = 24,
+ VERT_ATTRIB_GENERIC8 = 25,
+ VERT_ATTRIB_GENERIC9 = 26,
+ VERT_ATTRIB_GENERIC10 = 27,
+ VERT_ATTRIB_GENERIC11 = 28,
+ VERT_ATTRIB_GENERIC12 = 29,
+ VERT_ATTRIB_GENERIC13 = 30,
+ VERT_ATTRIB_GENERIC14 = 31,
+ VERT_ATTRIB_GENERIC15 = 32,
+ VERT_ATTRIB_MAX = 33
+} gl_vert_attrib;
+
+/**
+ * Symbolic constats to help iterating over
+ * specific blocks of vertex attributes.
+ *
+ * VERT_ATTRIB_FF
+ * includes all fixed function attributes as well as
+ * the aliased GL_NV_vertex_program shader attributes.
+ * VERT_ATTRIB_TEX
+ * include the classic texture coordinate attributes.
+ * Is a subset of VERT_ATTRIB_FF.
+ * VERT_ATTRIB_GENERIC
+ * include the OpenGL 2.0+ GLSL generic shader attributes.
+ * These alias the generic GL_ARB_vertex_shader attributes.
+ */
+#define VERT_ATTRIB_FF(i) (VERT_ATTRIB_POS + (i))
+#define VERT_ATTRIB_FF_MAX VERT_ATTRIB_GENERIC0
+
+#define VERT_ATTRIB_TEX(i) (VERT_ATTRIB_TEX0 + (i))
+#define VERT_ATTRIB_TEX_MAX MAX_TEXTURE_COORD_UNITS
+
+#define VERT_ATTRIB_GENERIC(i) (VERT_ATTRIB_GENERIC0 + (i))
+#define VERT_ATTRIB_GENERIC_MAX MAX_VERTEX_GENERIC_ATTRIBS
+
+/**
+ * Bitflags for vertex attributes.
+ * These are used in bitfields in many places.
+ */
+/*@{*/
+#define VERT_BIT_POS BITFIELD64_BIT(VERT_ATTRIB_POS)
+#define VERT_BIT_WEIGHT BITFIELD64_BIT(VERT_ATTRIB_WEIGHT)
+#define VERT_BIT_NORMAL BITFIELD64_BIT(VERT_ATTRIB_NORMAL)
+#define VERT_BIT_COLOR0 BITFIELD64_BIT(VERT_ATTRIB_COLOR0)
+#define VERT_BIT_COLOR1 BITFIELD64_BIT(VERT_ATTRIB_COLOR1)
+#define VERT_BIT_FOG BITFIELD64_BIT(VERT_ATTRIB_FOG)
+#define VERT_BIT_COLOR_INDEX BITFIELD64_BIT(VERT_ATTRIB_COLOR_INDEX)
+#define VERT_BIT_EDGEFLAG BITFIELD64_BIT(VERT_ATTRIB_EDGEFLAG)
+#define VERT_BIT_TEX0 BITFIELD64_BIT(VERT_ATTRIB_TEX0)
+#define VERT_BIT_TEX1 BITFIELD64_BIT(VERT_ATTRIB_TEX1)
+#define VERT_BIT_TEX2 BITFIELD64_BIT(VERT_ATTRIB_TEX2)
+#define VERT_BIT_TEX3 BITFIELD64_BIT(VERT_ATTRIB_TEX3)
+#define VERT_BIT_TEX4 BITFIELD64_BIT(VERT_ATTRIB_TEX4)
+#define VERT_BIT_TEX5 BITFIELD64_BIT(VERT_ATTRIB_TEX5)
+#define VERT_BIT_TEX6 BITFIELD64_BIT(VERT_ATTRIB_TEX6)
+#define VERT_BIT_TEX7 BITFIELD64_BIT(VERT_ATTRIB_TEX7)
+#define VERT_BIT_POINT_SIZE BITFIELD64_BIT(VERT_ATTRIB_POINT_SIZE)
+#define VERT_BIT_GENERIC0 BITFIELD64_BIT(VERT_ATTRIB_GENERIC0)
+
+#define VERT_BIT(i) BITFIELD64_BIT(i)
+#define VERT_BIT_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_MAX)
+
+#define VERT_BIT_FF(i) VERT_BIT(i)
+#define VERT_BIT_FF_ALL BITFIELD64_RANGE(0, VERT_ATTRIB_FF_MAX)
+#define VERT_BIT_TEX(i) VERT_BIT(VERT_ATTRIB_TEX(i))
+#define VERT_BIT_TEX_ALL \
+ BITFIELD64_RANGE(VERT_ATTRIB_TEX(0), VERT_ATTRIB_TEX_MAX)
+
+#define VERT_BIT_GENERIC(i) VERT_BIT(VERT_ATTRIB_GENERIC(i))
+#define VERT_BIT_GENERIC_ALL \
+ BITFIELD64_RANGE(VERT_ATTRIB_GENERIC(0), VERT_ATTRIB_GENERIC_MAX)
+/*@}*/
+
+
+/**
+ * Indexes for vertex shader outputs, geometry shader inputs/outputs, and
+ * fragment shader inputs.
+ *
+ * Note that some of these values are not available to all pipeline stages.
+ *
+ * When this enum is updated, the following code must be updated too:
+ * - vertResults (in prog_print.c's arb_output_attrib_string())
+ * - fragAttribs (in prog_print.c's arb_input_attrib_string())
+ * - _mesa_varying_slot_in_fs()
+ */
+typedef enum
+{
+ VARYING_SLOT_POS,
+ VARYING_SLOT_COL0, /* COL0 and COL1 must be contiguous */
+ VARYING_SLOT_COL1,
+ VARYING_SLOT_FOGC,
+ VARYING_SLOT_TEX0, /* TEX0-TEX7 must be contiguous */
+ VARYING_SLOT_TEX1,
+ VARYING_SLOT_TEX2,
+ VARYING_SLOT_TEX3,
+ VARYING_SLOT_TEX4,
+ VARYING_SLOT_TEX5,
+ VARYING_SLOT_TEX6,
+ VARYING_SLOT_TEX7,
+ VARYING_SLOT_PSIZ, /* Does not appear in FS */
+ VARYING_SLOT_BFC0, /* Does not appear in FS */
+ VARYING_SLOT_BFC1, /* Does not appear in FS */
+ VARYING_SLOT_EDGE, /* Does not appear in FS */
- VARYING_SLOT_MAX = VARYING_SLOT_VAR0 + MAX_VARYING
++ VARYING_SLOT_CLIP_VERTEX, /* Does not appear in FS */
++ VARYING_SLOT_CLIP_DIST0,
+ VARYING_SLOT_CLIP_DIST1,
+ VARYING_SLOT_PRIMITIVE_ID, /* Does not appear in VS */
+ VARYING_SLOT_LAYER, /* Appears as VS or GS output */
+ VARYING_SLOT_VIEWPORT, /* Appears as VS or GS output */
+ VARYING_SLOT_FACE, /* FS only */
+ VARYING_SLOT_PNTC, /* FS only */
++ VARYING_SLOT_TESS_LEVEL_OUTER, /* Only appears as TCS output. */
++ VARYING_SLOT_TESS_LEVEL_INNER, /* Only appears as TCS output. */
+ VARYING_SLOT_VAR0, /* First generic varying slot */
++ VARYING_SLOT_MAX = VARYING_SLOT_VAR0 + MAX_VARYING,
++ VARYING_SLOT_PATCH0 = VARYING_SLOT_MAX,
++ VARYING_SLOT_TESS_MAX = VARYING_SLOT_PATCH0 + MAX_VARYING
+} gl_varying_slot;
+
+
+/**
+ * Bitflags for varying slots.
+ */
+/*@{*/
+#define VARYING_BIT_POS BITFIELD64_BIT(VARYING_SLOT_POS)
+#define VARYING_BIT_COL0 BITFIELD64_BIT(VARYING_SLOT_COL0)
+#define VARYING_BIT_COL1 BITFIELD64_BIT(VARYING_SLOT_COL1)
+#define VARYING_BIT_FOGC BITFIELD64_BIT(VARYING_SLOT_FOGC)
+#define VARYING_BIT_TEX0 BITFIELD64_BIT(VARYING_SLOT_TEX0)
+#define VARYING_BIT_TEX1 BITFIELD64_BIT(VARYING_SLOT_TEX1)
+#define VARYING_BIT_TEX2 BITFIELD64_BIT(VARYING_SLOT_TEX2)
+#define VARYING_BIT_TEX3 BITFIELD64_BIT(VARYING_SLOT_TEX3)
+#define VARYING_BIT_TEX4 BITFIELD64_BIT(VARYING_SLOT_TEX4)
+#define VARYING_BIT_TEX5 BITFIELD64_BIT(VARYING_SLOT_TEX5)
+#define VARYING_BIT_TEX6 BITFIELD64_BIT(VARYING_SLOT_TEX6)
+#define VARYING_BIT_TEX7 BITFIELD64_BIT(VARYING_SLOT_TEX7)
+#define VARYING_BIT_TEX(U) BITFIELD64_BIT(VARYING_SLOT_TEX0 + (U))
+#define VARYING_BITS_TEX_ANY BITFIELD64_RANGE(VARYING_SLOT_TEX0, \
+ MAX_TEXTURE_COORD_UNITS)
+#define VARYING_BIT_PSIZ BITFIELD64_BIT(VARYING_SLOT_PSIZ)
+#define VARYING_BIT_BFC0 BITFIELD64_BIT(VARYING_SLOT_BFC0)
+#define VARYING_BIT_BFC1 BITFIELD64_BIT(VARYING_SLOT_BFC1)
+#define VARYING_BIT_EDGE BITFIELD64_BIT(VARYING_SLOT_EDGE)
+#define VARYING_BIT_CLIP_VERTEX BITFIELD64_BIT(VARYING_SLOT_CLIP_VERTEX)
+#define VARYING_BIT_CLIP_DIST0 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0)
+#define VARYING_BIT_CLIP_DIST1 BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1)
+#define VARYING_BIT_PRIMITIVE_ID BITFIELD64_BIT(VARYING_SLOT_PRIMITIVE_ID)
+#define VARYING_BIT_LAYER BITFIELD64_BIT(VARYING_SLOT_LAYER)
+#define VARYING_BIT_VIEWPORT BITFIELD64_BIT(VARYING_SLOT_VIEWPORT)
+#define VARYING_BIT_FACE BITFIELD64_BIT(VARYING_SLOT_FACE)
+#define VARYING_BIT_PNTC BITFIELD64_BIT(VARYING_SLOT_PNTC)
+#define VARYING_BIT_VAR(V) BITFIELD64_BIT(VARYING_SLOT_VAR0 + (V))
+/*@}*/
+
+
+/**
+ * Fragment program results
+ */
+typedef enum
+{
+ FRAG_RESULT_DEPTH = 0,
+ FRAG_RESULT_STENCIL = 1,
+ /* If a single color should be written to all render targets, this
+ * register is written. No FRAG_RESULT_DATAn will be written.
+ */
+ FRAG_RESULT_COLOR = 2,
+ FRAG_RESULT_SAMPLE_MASK = 3,
+
+ /* FRAG_RESULT_DATAn are the per-render-target (GLSL gl_FragData[n]
+ * or ARB_fragment_program fragment.color[n]) color results. If
+ * any are written, FRAG_RESULT_COLOR will not be written.
+ */
+ FRAG_RESULT_DATA0 = 4,
+ FRAG_RESULT_MAX = (FRAG_RESULT_DATA0 + MAX_DRAW_BUFFERS)
+} gl_frag_result;
+
+
#endif /* SHADER_ENUMS_H */
#include <stdio.h>
#include <string.h>
#include "util/ralloc.h"
+ #include "util/strtod.h"
+extern "C" void
+_mesa_error_no_memory(const char *caller)
+{
+ fprintf(stderr, "Mesa error: out of memory in %s", caller);
+}
+
void
_mesa_warning(struct gl_context *ctx, const char *fmt, ...)
{
}
}
+ if (brw_init_pipe_control(brw, devinfo)) {
+ *dri_ctx_error = __DRI_CTX_ERROR_NO_MEMORY;
+ intelDestroyContext(driContextPriv);
+ return false;
+ }
+
brw_init_state(brw);
+#endif
intelInitExtensions(ctx);
/** @} */
} binding_table;
+ uint32_t *map_entries;
+ struct {
+ uint32_t index_count;
+ uint32_t *index;
+ } bind_map[8]; /* MAX_SETS from vulkan/private.h */
+
GLuint nr_params; /**< number of float params/constants */
GLuint nr_pull_params;
+ unsigned nr_image_params;
unsigned curb_read_length;
unsigned total_scratch;
return NULL;
}
- if (devinfo->gen == 9 && (revision == 2 || revision == 3 || revision == -1))
- return &brw_device_info_skl_early;
-
return devinfo;
}
+
+const char *
+brw_get_device_name(int devid)
+{
+ switch (devid) {
+#undef CHIPSET
+#define CHIPSET(id, family, name) case id: return name;
+#include "pci_ids/i965_pci_ids.h"
+ default:
+ return NULL;
+ }
+}
BRW_REGISTER_TYPE_D),
fs_reg(2));
- unsigned vec4_offset = instr->const_index[0] / 4;
+ unsigned vec4_offset = instr->const_index[1] / 4;
for (int i = 0; i < instr->num_components; i++)
- VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, i), surf_index,
+ VARYING_PULL_CONSTANT_LOAD(bld, offset(dest, bld, i), surf_index,
base_offset, vec4_offset + i);
} else {
fs_reg packed_consts = vgrf(glsl_type::float_type);
return false;
}
-- /* Scratch space is used for register spilling */
- if (c.base.last_scratch) {
- perf_debug("Geometry shader triggered register spilling. "
- "Try reducing the number of live vec4 values to "
- "improve performance.\n");
-
- c.prog_data.base.base.total_scratch
- = brw_get_scratch_size(c.base.last_scratch*REG_SIZE);
- }
-
- if (c.prog_data.base.base.total_scratch) {
+ output->mem_ctx = mem_ctx;
+ output->program = program;
+ output->program_size = program_size;
+ memcpy(&output->prog_data, &c.prog_data,
+ sizeof(output->prog_data));
+
+ return true;
+}
+
+bool
+brw_codegen_gs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_geometry_program *gp,
+ struct brw_gs_prog_key *key)
+{
+ struct brw_gs_compile_output output;
+ struct brw_stage_state *stage_state = &brw->gs.base;
+
+ if (brw_compile_gs_prog(brw, prog, gp, key, &output))
+ return false;
+
+ if (output.prog_data.base.base.total_scratch) {
brw_get_scratch_bo(brw, &stage_state->scratch_bo,
- c.prog_data.base.base.total_scratch *
+ output.prog_data.base.base.total_scratch *
brw->max_gs_threads);
}
}
nir_validate_shader(nir);
- brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage);
++ brw_process_nir(nir, brw->intelScreen->devinfo, shader_prog, stage, is_scalar);
+
+ static GLuint msg_id = 0;
+ _mesa_gl_debug(&brw->ctx, &msg_id,
+ MESA_DEBUG_SOURCE_SHADER_COMPILER,
+ MESA_DEBUG_TYPE_OTHER,
+ MESA_DEBUG_SEVERITY_NOTIFICATION,
- "%s NIR shader: %d inst\n",
- _mesa_shader_stage_to_abbrev(stage),
- count_nir_instrs(nir));
++ "%s NIR shader:\n",
++ _mesa_shader_stage_to_abbrev(stage));
+
+ return nir;
+}
+
+void
+brw_process_nir(nir_shader *nir,
+ const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
- gl_shader_stage stage)
++ gl_shader_stage stage, bool is_scalar)
+{
+ bool debug_enabled = INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage);
+
nir_lower_global_vars_to_local(nir);
nir_validate_shader(nir);
nir_lower_atomics(nir);
nir_validate_shader(nir);
- nir_optimize(nir);
+ nir_optimize(nir, is_scalar);
- if (brw->gen >= 6) {
+ if (devinfo->gen >= 6) {
/* Try and fuse multiply-adds */
nir_opt_peephole_ffma(nir);
nir_validate_shader(nir);
_mesa_shader_stage_to_string(stage));
nir_print_shader(nir, stderr);
}
-
- return nir;
}
+
+ enum brw_reg_type
+ brw_type_for_nir_type(nir_alu_type type)
+ {
+ switch (type) {
+ case nir_type_unsigned:
+ return BRW_REGISTER_TYPE_UD;
+ case nir_type_bool:
+ case nir_type_int:
+ return BRW_REGISTER_TYPE_D;
+ case nir_type_float:
+ return BRW_REGISTER_TYPE_F;
+ default:
+ unreachable("unknown type");
+ }
+
+ return BRW_REGISTER_TYPE_F;
+ }
+
+ /* Returns the glsl_base_type corresponding to a nir_alu_type.
+ * This is used by both brw_vec4_nir and brw_fs_nir.
+ */
+ enum glsl_base_type
+ brw_glsl_base_type_for_nir_type(nir_alu_type type)
+ {
+ switch (type) {
+ case nir_type_float:
+ return GLSL_TYPE_FLOAT;
+
+ case nir_type_int:
+ return GLSL_TYPE_INT;
+
+ case nir_type_unsigned:
+ return GLSL_TYPE_UINT;
+
+ default:
+ unreachable("bad type");
+ }
+ }
nir_shader *brw_create_nir(struct brw_context *brw,
const struct gl_shader_program *shader_prog,
const struct gl_program *prog,
- gl_shader_stage stage);
+ gl_shader_stage stage,
+ bool is_scalar);
+
+ enum brw_reg_type brw_type_for_nir_type(nir_alu_type type);
+
+ enum glsl_base_type brw_glsl_base_type_for_nir_type(nir_alu_type type);
- gl_shader_stage stage);
+void
+brw_process_nir(nir_shader *nir,
+ const struct brw_device_info *devinfo,
+ const struct gl_shader_program *shader_prog,
++ gl_shader_stage stage, bool is_scalar);
+
#ifdef __cplusplus
}
#endif
int st_index = -1;
if (INTEL_DEBUG & DEBUG_SHADER_TIME)
- st_index = brw_get_shader_time_index(brw, prog, &c->vp->program.Base,
- ST_VS);
+ st_index = brw_get_shader_time_index(brw, prog, &vp->Base, ST_VS);
- if (unlikely(INTEL_DEBUG & DEBUG_VS))
+ if (unlikely(INTEL_DEBUG & DEBUG_VS) && shader->base.ir)
- brw_dump_ir("vertex", prog, &shader->base, &c->vp->program.Base);
+ brw_dump_ir("vertex", prog, &shader->base, &vp->Base);
+
+ if (!vp->Base.nir &&
+ (brw->intelScreen->compiler->scalar_vs ||
+ brw->intelScreen->compiler->glsl_compiler_options[MESA_SHADER_VERTEX].NirOptions != NULL)) {
+ /* Normally we generate NIR in LinkShader() or
+ * ProgramStringNotify(), but Mesa's fixed-function vertex program
+ * handling doesn't notify the driver at all. Just do it here, at
+ * the last minute, even though it's lame.
+ */
+ assert(vp->Base.Id == 0 && prog == NULL);
+ vp->Base.nir =
+ brw_create_nir(brw, NULL, &vp->Base, MESA_SHADER_VERTEX,
+ brw->intelScreen->compiler->scalar_vs);
+ }
if (brw->intelScreen->compiler->scalar_vs) {
- if (!c->vp->program.Base.nir) {
- /* Normally we generate NIR in LinkShader() or
- * ProgramStringNotify(), but Mesa's fixed-function vertex program
- * handling doesn't notify the driver at all. Just do it here, at
- * the last minute, even though it's lame.
- */
- assert(c->vp->program.Base.Id == 0 && prog == NULL);
- c->vp->program.Base.nir =
- brw_create_nir(brw, NULL, &c->vp->program.Base, MESA_SHADER_VERTEX);
- }
-
prog_data->base.dispatch_mode = DISPATCH_MODE_SIMD8;
fs_visitor v(brw->intelScreen->compiler, brw,
}
if (brw->gen >= 6) {
- uint64_t dummy;
-
ctx->Extensions.ARB_blend_func_extended =
+ brw->optionCache.info == NULL ||
!driQueryOptionb(&brw->optionCache, "disable_blend_func_extended");
ctx->Extensions.ARB_conditional_render_inverted = true;
ctx->Extensions.ARB_draw_buffers_blend = true;
ctx->Extensions.EXT_transform_feedback = true;
ctx->Extensions.OES_depth_texture_cube_map = true;
- ctx->Extensions.ARB_timer_query = brw->intelScreen->hw_has_timestamp;
+ /* Test if the kernel has the ioctl. */
- if (brw->bufmgr && drm_intel_reg_read(brw->bufmgr, TIMESTAMP, &dummy) == 0)
++ if (brw->intelScreen->hw_has_timestamp)
+ ctx->Extensions.ARB_timer_query = true;
/* Only enable this in core profile because other parts of Mesa behave
* slightly differently when the extension is enabled.
--- /dev/null
- assert(is_power_of_two(block_size));
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#define _DEFAULT_SOURCE
+
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <values.h>
+#include <assert.h>
+#include <linux/futex.h>
+#include <linux/memfd.h>
+#include <sys/time.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+
+#include "anv_private.h"
+
+#ifdef HAVE_VALGRIND
+#define VG_NOACCESS_READ(__ptr) ({ \
+ VALGRIND_MAKE_MEM_DEFINED((__ptr), sizeof(*(__ptr))); \
+ __typeof(*(__ptr)) __val = *(__ptr); \
+ VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr)));\
+ __val; \
+})
+#define VG_NOACCESS_WRITE(__ptr, __val) ({ \
+ VALGRIND_MAKE_MEM_UNDEFINED((__ptr), sizeof(*(__ptr))); \
+ *(__ptr) = (__val); \
+ VALGRIND_MAKE_MEM_NOACCESS((__ptr), sizeof(*(__ptr))); \
+})
+#else
+#define VG_NOACCESS_READ(__ptr) (*(__ptr))
+#define VG_NOACCESS_WRITE(__ptr, __val) (*(__ptr) = (__val))
+#endif
+
+/* Design goals:
+ *
+ * - Lock free (except when resizing underlying bos)
+ *
+ * - Constant time allocation with typically only one atomic
+ *
+ * - Multiple allocation sizes without fragmentation
+ *
+ * - Can grow while keeping addresses and offset of contents stable
+ *
+ * - All allocations within one bo so we can point one of the
+ * STATE_BASE_ADDRESS pointers at it.
+ *
+ * The overall design is a two-level allocator: top level is a fixed size, big
+ * block (8k) allocator, which operates out of a bo. Allocation is done by
+ * either pulling a block from the free list or growing the used range of the
+ * bo. Growing the range may run out of space in the bo which we then need to
+ * grow. Growing the bo is tricky in a multi-threaded, lockless environment:
+ * we need to keep all pointers and contents in the old map valid. GEM bos in
+ * general can't grow, but we use a trick: we create a memfd and use ftruncate
+ * to grow it as necessary. We mmap the new size and then create a gem bo for
+ * it using the new gem userptr ioctl. Without heavy-handed locking around
+ * our allocation fast-path, there isn't really a way to munmap the old mmap,
+ * so we just keep it around until garbage collection time. While the block
+ * allocator is lockless for normal operations, we block other threads trying
+ * to allocate while we're growing the map. It sholdn't happen often, and
+ * growing is fast anyway.
+ *
+ * At the next level we can use various sub-allocators. The state pool is a
+ * pool of smaller, fixed size objects, which operates much like the block
+ * pool. It uses a free list for freeing objects, but when it runs out of
+ * space it just allocates a new block from the block pool. This allocator is
+ * intended for longer lived state objects such as SURFACE_STATE and most
+ * other persistent state objects in the API. We may need to track more info
+ * with these object and a pointer back to the CPU object (eg VkImage). In
+ * those cases we just allocate a slightly bigger object and put the extra
+ * state after the GPU state object.
+ *
+ * The state stream allocator works similar to how the i965 DRI driver streams
+ * all its state. Even with Vulkan, we need to emit transient state (whether
+ * surface state base or dynamic state base), and for that we can just get a
+ * block and fill it up. These cases are local to a command buffer and the
+ * sub-allocator need not be thread safe. The streaming allocator gets a new
+ * block when it runs out of space and chains them together so they can be
+ * easily freed.
+ */
+
+/* Allocations are always at least 64 byte aligned, so 1 is an invalid value.
+ * We use it to indicate the free list is empty. */
+#define EMPTY 1
+
+struct anv_mmap_cleanup {
+ void *map;
+ size_t size;
+ uint32_t gem_handle;
+};
+
+#define ANV_MMAP_CLEANUP_INIT ((struct anv_mmap_cleanup){0})
+
+static inline long
+sys_futex(void *addr1, int op, int val1,
+ struct timespec *timeout, void *addr2, int val3)
+{
+ return syscall(SYS_futex, addr1, op, val1, timeout, addr2, val3);
+}
+
+static inline int
+futex_wake(uint32_t *addr, int count)
+{
+ return sys_futex(addr, FUTEX_WAKE, count, NULL, NULL, 0);
+}
+
+static inline int
+futex_wait(uint32_t *addr, int32_t value)
+{
+ return sys_futex(addr, FUTEX_WAIT, value, NULL, NULL, 0);
+}
+
+static inline int
+memfd_create(const char *name, unsigned int flags)
+{
+ return syscall(SYS_memfd_create, name, flags);
+}
+
+static inline uint32_t
+ilog2_round_up(uint32_t value)
+{
+ assert(value != 0);
+ return 32 - __builtin_clz(value - 1);
+}
+
+static inline uint32_t
+round_to_power_of_two(uint32_t value)
+{
+ return 1 << ilog2_round_up(value);
+}
+
+static bool
+anv_free_list_pop(union anv_free_list *list, void **map, uint32_t *offset)
+{
+ union anv_free_list current, new, old;
+
+ current.u64 = list->u64;
+ while (current.offset != EMPTY) {
+ /* We have to add a memory barrier here so that the list head (and
+ * offset) gets read before we read the map pointer. This way we
+ * know that the map pointer is valid for the given offset at the
+ * point where we read it.
+ */
+ __sync_synchronize();
+
+ uint32_t *next_ptr = *map + current.offset;
+ new.offset = VG_NOACCESS_READ(next_ptr);
+ new.count = current.count + 1;
+ old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
+ if (old.u64 == current.u64) {
+ *offset = current.offset;
+ return true;
+ }
+ current = old;
+ }
+
+ return false;
+}
+
+static void
+anv_free_list_push(union anv_free_list *list, void *map, uint32_t offset)
+{
+ union anv_free_list current, old, new;
+ uint32_t *next_ptr = map + offset;
+
+ old = *list;
+ do {
+ current = old;
+ VG_NOACCESS_WRITE(next_ptr, current.offset);
+ new.offset = offset;
+ new.count = current.count + 1;
+ old.u64 = __sync_val_compare_and_swap(&list->u64, current.u64, new.u64);
+ } while (old.u64 != current.u64);
+}
+
+/* All pointers in the ptr_free_list are assumed to be page-aligned. This
+ * means that the bottom 12 bits should all be zero.
+ */
+#define PFL_COUNT(x) ((uintptr_t)(x) & 0xfff)
+#define PFL_PTR(x) ((void *)((uintptr_t)(x) & ~0xfff))
+#define PFL_PACK(ptr, count) ({ \
+ assert(((uintptr_t)(ptr) & 0xfff) == 0); \
+ (void *)((uintptr_t)(ptr) | (uintptr_t)((count) & 0xfff)); \
+})
+
+static bool
+anv_ptr_free_list_pop(void **list, void **elem)
+{
+ void *current = *list;
+ while (PFL_PTR(current) != NULL) {
+ void **next_ptr = PFL_PTR(current);
+ void *new_ptr = VG_NOACCESS_READ(next_ptr);
+ unsigned new_count = PFL_COUNT(current) + 1;
+ void *new = PFL_PACK(new_ptr, new_count);
+ void *old = __sync_val_compare_and_swap(list, current, new);
+ if (old == current) {
+ *elem = PFL_PTR(current);
+ return true;
+ }
+ current = old;
+ }
+
+ return false;
+}
+
+static void
+anv_ptr_free_list_push(void **list, void *elem)
+{
+ void *old, *current;
+ void **next_ptr = elem;
+
+ old = *list;
+ do {
+ current = old;
+ VG_NOACCESS_WRITE(next_ptr, PFL_PTR(current));
+ unsigned new_count = PFL_COUNT(current) + 1;
+ void *new = PFL_PACK(elem, new_count);
+ old = __sync_val_compare_and_swap(list, current, new);
+ } while (old != current);
+}
+
+static uint32_t
+anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size);
+
+void
+anv_block_pool_init(struct anv_block_pool *pool,
+ struct anv_device *device, uint32_t block_size)
+{
- assert(state_size >= 64 && is_power_of_two(state_size));
++ assert(util_is_power_of_two(block_size));
+
+ pool->device = device;
+ pool->bo.gem_handle = 0;
+ pool->bo.offset = 0;
+ pool->block_size = block_size;
+ pool->free_list = ANV_FREE_LIST_EMPTY;
+ anv_vector_init(&pool->mmap_cleanups,
+ round_to_power_of_two(sizeof(struct anv_mmap_cleanup)), 128);
+
+ /* Immediately grow the pool so we'll have a backing bo. */
+ pool->state.next = 0;
+ pool->state.end = anv_block_pool_grow(pool, 0);
+}
+
+void
+anv_block_pool_finish(struct anv_block_pool *pool)
+{
+ struct anv_mmap_cleanup *cleanup;
+
+ anv_vector_foreach(cleanup, &pool->mmap_cleanups) {
+ if (cleanup->map)
+ munmap(cleanup->map, cleanup->size);
+ if (cleanup->gem_handle)
+ anv_gem_close(pool->device, cleanup->gem_handle);
+ }
+
+ anv_vector_finish(&pool->mmap_cleanups);
+
+ close(pool->fd);
+}
+
+static uint32_t
+anv_block_pool_grow(struct anv_block_pool *pool, uint32_t old_size)
+{
+ size_t size;
+ void *map;
+ int gem_handle;
+ struct anv_mmap_cleanup *cleanup;
+
+ if (old_size == 0) {
+ size = 32 * pool->block_size;
+ } else {
+ size = old_size * 2;
+ }
+
+ cleanup = anv_vector_add(&pool->mmap_cleanups);
+ if (!cleanup)
+ return 0;
+ *cleanup = ANV_MMAP_CLEANUP_INIT;
+
+ if (old_size == 0)
+ pool->fd = memfd_create("block pool", MFD_CLOEXEC);
+
+ if (pool->fd == -1)
+ return 0;
+
+ if (ftruncate(pool->fd, size) == -1)
+ return 0;
+
+ /* First try to see if mremap can grow the map in place. */
+ map = MAP_FAILED;
+ if (old_size > 0)
+ map = mremap(pool->map, old_size, size, 0);
+ if (map == MAP_FAILED) {
+ /* Just leak the old map until we destroy the pool. We can't munmap it
+ * without races or imposing locking on the block allocate fast path. On
+ * the whole the leaked maps adds up to less than the size of the
+ * current map. MAP_POPULATE seems like the right thing to do, but we
+ * should try to get some numbers.
+ */
+ map = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, pool->fd, 0);
+ cleanup->map = map;
+ cleanup->size = size;
+ }
+ if (map == MAP_FAILED)
+ return 0;
+
+ gem_handle = anv_gem_userptr(pool->device, map, size);
+ if (gem_handle == 0)
+ return 0;
+ cleanup->gem_handle = gem_handle;
+
+ /* Now that we successfull allocated everything, we can write the new
+ * values back into pool. */
+ pool->map = map;
+ pool->bo.gem_handle = gem_handle;
+ pool->bo.size = size;
+ pool->bo.map = map;
+ pool->bo.index = 0;
+
+ return size;
+}
+
+uint32_t
+anv_block_pool_alloc(struct anv_block_pool *pool)
+{
+ uint32_t offset;
+ struct anv_block_state state, old, new;
+
+ /* Try free list first. */
+ if (anv_free_list_pop(&pool->free_list, &pool->map, &offset)) {
+ assert(pool->map);
+ return offset;
+ }
+
+ restart:
+ state.u64 = __sync_fetch_and_add(&pool->state.u64, pool->block_size);
+ if (state.next < state.end) {
+ assert(pool->map);
+ return state.next;
+ } else if (state.next == state.end) {
+ /* We allocated the first block outside the pool, we have to grow it.
+ * pool->next_block acts a mutex: threads who try to allocate now will
+ * get block indexes above the current limit and hit futex_wait
+ * below. */
+ new.next = state.next + pool->block_size;
+ new.end = anv_block_pool_grow(pool, state.end);
+ assert(new.end > 0);
+ old.u64 = __sync_lock_test_and_set(&pool->state.u64, new.u64);
+ if (old.next != state.next)
+ futex_wake(&pool->state.end, INT_MAX);
+ return state.next;
+ } else {
+ futex_wait(&pool->state.end, state.end);
+ goto restart;
+ }
+}
+
+void
+anv_block_pool_free(struct anv_block_pool *pool, uint32_t offset)
+{
+ anv_free_list_push(&pool->free_list, pool->map, offset);
+}
+
+static void
+anv_fixed_size_state_pool_init(struct anv_fixed_size_state_pool *pool,
+ size_t state_size)
+{
+ /* At least a cache line and must divide the block size. */
- assert(is_power_of_two(state.alloc_size));
++ assert(state_size >= 64 && util_is_power_of_two(state_size));
+
+ pool->state_size = state_size;
+ pool->free_list = ANV_FREE_LIST_EMPTY;
+ pool->block.next = 0;
+ pool->block.end = 0;
+}
+
+static uint32_t
+anv_fixed_size_state_pool_alloc(struct anv_fixed_size_state_pool *pool,
+ struct anv_block_pool *block_pool)
+{
+ uint32_t offset;
+ struct anv_block_state block, old, new;
+
+ /* Try free list first. */
+ if (anv_free_list_pop(&pool->free_list, &block_pool->map, &offset))
+ return offset;
+
+ /* If free list was empty (or somebody raced us and took the items) we
+ * allocate a new item from the end of the block */
+ restart:
+ block.u64 = __sync_fetch_and_add(&pool->block.u64, pool->state_size);
+
+ if (block.next < block.end) {
+ return block.next;
+ } else if (block.next == block.end) {
+ offset = anv_block_pool_alloc(block_pool);
+ new.next = offset + pool->state_size;
+ new.end = offset + block_pool->block_size;
+ old.u64 = __sync_lock_test_and_set(&pool->block.u64, new.u64);
+ if (old.next != block.next)
+ futex_wake(&pool->block.end, INT_MAX);
+ return offset;
+ } else {
+ futex_wait(&pool->block.end, block.end);
+ goto restart;
+ }
+}
+
+static void
+anv_fixed_size_state_pool_free(struct anv_fixed_size_state_pool *pool,
+ struct anv_block_pool *block_pool,
+ uint32_t offset)
+{
+ anv_free_list_push(&pool->free_list, block_pool->map, offset);
+}
+
+void
+anv_state_pool_init(struct anv_state_pool *pool,
+ struct anv_block_pool *block_pool)
+{
+ pool->block_pool = block_pool;
+ for (unsigned i = 0; i < ANV_STATE_BUCKETS; i++) {
+ size_t size = 1 << (ANV_MIN_STATE_SIZE_LOG2 + i);
+ anv_fixed_size_state_pool_init(&pool->buckets[i], size);
+ }
+ VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false));
+}
+
+void
+anv_state_pool_finish(struct anv_state_pool *pool)
+{
+ VG(VALGRIND_DESTROY_MEMPOOL(pool));
+}
+
+struct anv_state
+anv_state_pool_alloc(struct anv_state_pool *pool, size_t size, size_t align)
+{
+ unsigned size_log2 = ilog2_round_up(size < align ? align : size);
+ assert(size_log2 <= ANV_MAX_STATE_SIZE_LOG2);
+ if (size_log2 < ANV_MIN_STATE_SIZE_LOG2)
+ size_log2 = ANV_MIN_STATE_SIZE_LOG2;
+ unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2;
+
+ struct anv_state state;
+ state.alloc_size = 1 << size_log2;
+ state.offset = anv_fixed_size_state_pool_alloc(&pool->buckets[bucket],
+ pool->block_pool);
+ state.map = pool->block_pool->map + state.offset;
+ VG(VALGRIND_MEMPOOL_ALLOC(pool, state.map, size));
+ return state;
+}
+
+void
+anv_state_pool_free(struct anv_state_pool *pool, struct anv_state state)
+{
++ assert(util_is_power_of_two(state.alloc_size));
+ unsigned size_log2 = ilog2_round_up(state.alloc_size);
+ assert(size_log2 >= ANV_MIN_STATE_SIZE_LOG2 &&
+ size_log2 <= ANV_MAX_STATE_SIZE_LOG2);
+ unsigned bucket = size_log2 - ANV_MIN_STATE_SIZE_LOG2;
+
+ VG(VALGRIND_MEMPOOL_FREE(pool, state.map));
+ anv_fixed_size_state_pool_free(&pool->buckets[bucket],
+ pool->block_pool, state.offset);
+}
+
+#define NULL_BLOCK 1
+struct stream_block {
+ uint32_t next;
+
+ /* The map for the BO at the time the block was givne to us */
+ void *current_map;
+
+#ifdef HAVE_VALGRIND
+ void *_vg_ptr;
+#endif
+};
+
+/* The state stream allocator is a one-shot, single threaded allocator for
+ * variable sized blocks. We use it for allocating dynamic state.
+ */
+void
+anv_state_stream_init(struct anv_state_stream *stream,
+ struct anv_block_pool *block_pool)
+{
+ stream->block_pool = block_pool;
+ stream->next = 0;
+ stream->end = 0;
+ stream->current_block = NULL_BLOCK;
+
+ VG(VALGRIND_CREATE_MEMPOOL(stream, 0, false));
+}
+
+void
+anv_state_stream_finish(struct anv_state_stream *stream)
+{
+ struct stream_block *sb;
+ uint32_t block, next_block;
+
+ block = stream->current_block;
+ while (block != NULL_BLOCK) {
+ sb = stream->block_pool->map + block;
+ next_block = VG_NOACCESS_READ(&sb->next);
+ VG(VALGRIND_MEMPOOL_FREE(stream, VG_NOACCESS_READ(&sb->_vg_ptr)));
+ anv_block_pool_free(stream->block_pool, block);
+ block = next_block;
+ }
+
+ VG(VALGRIND_DESTROY_MEMPOOL(stream));
+}
+
+struct anv_state
+anv_state_stream_alloc(struct anv_state_stream *stream,
+ uint32_t size, uint32_t alignment)
+{
+ struct stream_block *sb;
+ struct anv_state state;
+ uint32_t block;
+
+ state.offset = align_u32(stream->next, alignment);
+ if (state.offset + size > stream->end) {
+ block = anv_block_pool_alloc(stream->block_pool);
+ void *current_map = stream->block_pool->map;
+ sb = current_map + block;
+ VG_NOACCESS_WRITE(&sb->current_map, current_map);
+ VG_NOACCESS_WRITE(&sb->next, stream->current_block);
+ VG(VG_NOACCESS_WRITE(&sb->_vg_ptr, 0));
+ stream->current_block = block;
+ stream->next = block + sizeof(*sb);
+ stream->end = block + stream->block_pool->block_size;
+ state.offset = align_u32(stream->next, alignment);
+ assert(state.offset + size <= stream->end);
+ }
+
+ sb = stream->block_pool->map + stream->current_block;
+ void *current_map = VG_NOACCESS_READ(&sb->current_map);
+
+ state.map = current_map + state.offset;
+ state.alloc_size = size;
+
+#ifdef HAVE_VALGRIND
+ void *vg_ptr = VG_NOACCESS_READ(&sb->_vg_ptr);
+ if (vg_ptr == NULL) {
+ vg_ptr = state.map;
+ VG_NOACCESS_WRITE(&sb->_vg_ptr, vg_ptr);
+ VALGRIND_MEMPOOL_ALLOC(stream, vg_ptr, size);
+ } else {
+ ptrdiff_t vg_offset = vg_ptr - current_map;
+ assert(vg_offset >= stream->current_block &&
+ vg_offset < stream->end);
+ VALGRIND_MEMPOOL_CHANGE(stream, vg_ptr, vg_ptr,
+ (state.offset + size) - vg_offset);
+ }
+#endif
+
+ stream->next = state.offset + size;
+
+ return state;
+}
+
+struct bo_pool_bo_link {
+ struct bo_pool_bo_link *next;
+ struct anv_bo bo;
+};
+
+void
+anv_bo_pool_init(struct anv_bo_pool *pool,
+ struct anv_device *device, uint32_t bo_size)
+{
+ pool->device = device;
+ pool->bo_size = bo_size;
+ pool->free_list = NULL;
+
+ VG(VALGRIND_CREATE_MEMPOOL(pool, 0, false));
+}
+
+void
+anv_bo_pool_finish(struct anv_bo_pool *pool)
+{
+ struct bo_pool_bo_link *link = PFL_PTR(pool->free_list);
+ while (link != NULL) {
+ struct bo_pool_bo_link link_copy = VG_NOACCESS_READ(link);
+
+ anv_gem_munmap(link_copy.bo.map, pool->bo_size);
+ anv_gem_close(pool->device, link_copy.bo.gem_handle);
+ link = link_copy.next;
+ }
+
+ VG(VALGRIND_DESTROY_MEMPOOL(pool));
+}
+
+VkResult
+anv_bo_pool_alloc(struct anv_bo_pool *pool, struct anv_bo *bo)
+{
+ VkResult result;
+
+ void *next_free_void;
+ if (anv_ptr_free_list_pop(&pool->free_list, &next_free_void)) {
+ struct bo_pool_bo_link *next_free = next_free_void;
+ *bo = VG_NOACCESS_READ(&next_free->bo);
+ assert(bo->map == next_free);
+ assert(bo->size == pool->bo_size);
+
+ VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size));
+
+ return VK_SUCCESS;
+ }
+
+ struct anv_bo new_bo;
+
+ result = anv_bo_init_new(&new_bo, pool->device, pool->bo_size);
+ if (result != VK_SUCCESS)
+ return result;
+
+ assert(new_bo.size == pool->bo_size);
+
+ new_bo.map = anv_gem_mmap(pool->device, new_bo.gem_handle, 0, pool->bo_size);
+ if (new_bo.map == NULL) {
+ anv_gem_close(pool->device, new_bo.gem_handle);
+ return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
+ }
+
+ *bo = new_bo;
+
+ VG(VALGRIND_MEMPOOL_ALLOC(pool, bo->map, pool->bo_size));
+
+ return VK_SUCCESS;
+}
+
+void
+anv_bo_pool_free(struct anv_bo_pool *pool, const struct anv_bo *bo)
+{
+ struct bo_pool_bo_link *link = bo->map;
+ link->bo = *bo;
+
+ VG(VALGRIND_MEMPOOL_FREE(pool, bo->map));
+ anv_ptr_free_list_push(&pool->free_list, link);
+}
--- /dev/null
- struct brw_vs_compile c;
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#include <sys/stat.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "anv_private.h"
+
+#include <brw_context.h>
+#include <brw_wm.h> /* brw_new_shader_program is here */
+#include <brw_nir.h>
+
+#include <brw_vs.h>
+#include <brw_gs.h>
+#include <brw_cs.h>
+
+#include <mesa/main/shaderobj.h>
+#include <mesa/main/fbobject.h>
+#include <mesa/main/context.h>
+#include <mesa/program/program.h>
+#include <glsl/program.h>
+
+/* XXX: We need this to keep symbols in nir.h from conflicting with the
+ * generated GEN command packing headers. We need to fix *both* to not
+ * define something as generic as LOAD.
+ */
+#undef LOAD
+
+#include <glsl/nir/nir_spirv.h>
+
+#define SPIR_V_MAGIC_NUMBER 0x07230203
+
+static void
+fail_if(int cond, const char *format, ...)
+{
+ va_list args;
+
+ if (!cond)
+ return;
+
+ va_start(args, format);
+ vfprintf(stderr, format, args);
+ va_end(args);
+
+ exit(1);
+}
+
+static VkResult
+set_binding_table_layout(struct brw_stage_prog_data *prog_data,
+ struct anv_pipeline *pipeline, uint32_t stage)
+{
+ uint32_t bias, count, k, *map;
+ struct anv_pipeline_layout *layout = pipeline->layout;
+
+ /* No layout is valid for shaders that don't bind any resources. */
+ if (pipeline->layout == NULL)
+ return VK_SUCCESS;
+
+ if (stage == VK_SHADER_STAGE_FRAGMENT)
+ bias = MAX_RTS;
+ else
+ bias = 0;
+
+ count = layout->stage[stage].surface_count;
+ prog_data->map_entries =
+ (uint32_t *) malloc(count * sizeof(prog_data->map_entries[0]));
+ if (prog_data->map_entries == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ k = bias;
+ map = prog_data->map_entries;
+ for (uint32_t i = 0; i < layout->num_sets; i++) {
+ prog_data->bind_map[i].index = map;
+ for (uint32_t j = 0; j < layout->set[i].layout->stage[stage].surface_count; j++)
+ *map++ = k++;
+
+ prog_data->bind_map[i].index_count =
+ layout->set[i].layout->stage[stage].surface_count;
+ }
+
+ return VK_SUCCESS;
+}
+
+static uint32_t
+upload_kernel(struct anv_pipeline *pipeline, const void *data, size_t size)
+{
+ struct anv_state state =
+ anv_state_stream_alloc(&pipeline->program_stream, size, 64);
+
+ assert(size < pipeline->program_stream.block_pool->block_size);
+
+ memcpy(state.map, data, size);
+
+ return state.offset;
+}
+
+static void
+brw_vs_populate_key(struct brw_context *brw,
+ struct brw_vertex_program *vp,
+ struct brw_vs_prog_key *key)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* BRW_NEW_VERTEX_PROGRAM */
+ struct gl_program *prog = (struct gl_program *) vp;
+
+ memset(key, 0, sizeof(*key));
+
+ /* Just upload the program verbatim for now. Always send it all
+ * the inputs it asks for, whether they are varying or not.
+ */
+ key->base.program_string_id = vp->id;
+ brw_setup_vue_key_clip_info(brw, &key->base,
+ vp->program.Base.UsesClipDistanceOut);
+
+ /* _NEW_POLYGON */
+ if (brw->gen < 6) {
+ key->copy_edgeflag = (ctx->Polygon.FrontMode != GL_FILL ||
+ ctx->Polygon.BackMode != GL_FILL);
+ }
+
+ if (prog->OutputsWritten & (VARYING_BIT_COL0 | VARYING_BIT_COL1 |
+ VARYING_BIT_BFC0 | VARYING_BIT_BFC1)) {
+ /* _NEW_LIGHT | _NEW_BUFFERS */
+ key->clamp_vertex_color = ctx->Light._ClampVertexColor;
+ }
+
+ /* _NEW_POINT */
+ if (brw->gen < 6 && ctx->Point.PointSprite) {
+ for (int i = 0; i < 8; i++) {
+ if (ctx->Point.CoordReplace[i])
+ key->point_coord_replace |= (1 << i);
+ }
+ }
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, brw->vs.base.sampler_count,
+ &key->base.tex);
+}
+
+static bool
+really_do_vs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_vertex_program *vp,
+ struct brw_vs_prog_key *key, struct anv_pipeline *pipeline)
+{
+ GLuint program_size;
+ const GLuint *program;
- memset(&c, 0, sizeof(c));
- memcpy(&c.key, key, sizeof(*key));
+ struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
+ struct brw_stage_prog_data *stage_prog_data = &prog_data->base.base;
+ void *mem_ctx;
+ struct gl_shader *vs = NULL;
+
+ if (prog)
+ vs = prog->_LinkedShaders[MESA_SHADER_VERTEX];
+
- c.vp = vp;
-
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ mem_ctx = ralloc_context(NULL);
+
- param_count += c.key.base.nr_userclip_plane_consts * 4;
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ */
+ int param_count;
+ if (vs) {
+ /* We add padding around uniform values below vec4 size, with the worst
+ * case being a float value that gets blown up to a vec4, so be
+ * conservative here.
+ */
+ param_count = vs->num_uniform_components * 4;
+
+ } else {
+ param_count = vp->program.Base.Parameters->NumParameters * 4;
+ }
+ /* vec4_visitor::setup_uniform_clipplane_values() also uploads user clip
+ * planes as uniforms.
+ */
- if (c.key.copy_edgeflag) {
++ param_count += key->base.nr_userclip_plane_consts * 4;
+
+ /* Setting nr_params here NOT to the size of the param and pull_param
+ * arrays, but to the number of uniform components vec4_visitor
+ * needs. vec4_visitor::setup_uniforms() will set it back to a proper value.
+ */
+ stage_prog_data->nr_params = ALIGN(param_count, 4) / 4;
+ if (vs) {
+ stage_prog_data->nr_params += vs->num_samplers;
+ }
+
+ GLbitfield64 outputs_written = vp->program.Base.OutputsWritten;
+ prog_data->inputs_read = vp->program.Base.InputsRead;
+
- if (c.key.point_coord_replace & (1 << i))
++ if (key->copy_edgeflag) {
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_EDGE);
+ prog_data->inputs_read |= VERT_BIT_EDGEFLAG;
+ }
+
+ if (brw->gen < 6) {
+ /* Put dummy slots into the VUE for the SF to put the replaced
+ * point sprite coords in. We shouldn't need these dummy slots,
+ * which take up precious URB space, but it would mean that the SF
+ * doesn't get nice aligned pairs of input coords into output
+ * coords, which would be a pain to handle.
+ */
+ for (int i = 0; i < 8; i++) {
- if (c.key.base.userclip_active) {
++ if (key->point_coord_replace & (1 << i))
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_TEX0 + i);
+ }
+
+ /* if back colors are written, allocate slots for front colors too */
+ if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC0))
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL0);
+ if (outputs_written & BITFIELD64_BIT(VARYING_SLOT_BFC1))
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_COL1);
+ }
+
+ /* In order for legacy clipping to work, we need to populate the clip
+ * distance varying slots whenever clipping is enabled, even if the vertex
+ * shader doesn't write to gl_ClipDistance.
+ */
- program = brw_vs_emit(brw, prog, &c, prog_data, mem_ctx, &program_size);
++ if (key->base.userclip_active) {
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST0);
+ outputs_written |= BITFIELD64_BIT(VARYING_SLOT_CLIP_DIST1);
+ }
+
+ brw_compute_vue_map(brw->intelScreen->devinfo,
+ &prog_data->base.vue_map, outputs_written);
+\
+ set_binding_table_layout(&prog_data->base.base, pipeline,
+ VK_SHADER_STAGE_VERTEX);
+
+ /* Emit GEN4 code.
+ */
- NULL, mesa_shader->Stage);
++ program = brw_vs_emit(brw, mem_ctx, key, prog_data, &vp->program,
++ prog, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ pipeline->vs_simd8 = upload_kernel(pipeline, program, program_size);
+
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+void brw_wm_populate_key(struct brw_context *brw,
+ struct brw_fragment_program *fp,
+ struct brw_wm_prog_key *key)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct gl_program *prog = (struct gl_program *) brw->fragment_program;
+ GLuint lookup = 0;
+ GLuint line_aa;
+ bool program_uses_dfdy = fp->program.UsesDFdy;
+ struct gl_framebuffer draw_buffer;
+ bool multisample_fbo;
+
+ memset(key, 0, sizeof(*key));
+
+ for (int i = 0; i < MAX_SAMPLERS; i++) {
+ /* Assume color sampler, no swizzling. */
+ key->tex.swizzles[i] = SWIZZLE_XYZW;
+ }
+
+ /* A non-zero framebuffer name indicates that the framebuffer was created by
+ * the user rather than the window system. */
+ draw_buffer.Name = 1;
+ draw_buffer.Visual.samples = 1;
+ draw_buffer._NumColorDrawBuffers = 1;
+ draw_buffer._NumColorDrawBuffers = 1;
+ draw_buffer.Width = 400;
+ draw_buffer.Height = 400;
+ ctx->DrawBuffer = &draw_buffer;
+
+ multisample_fbo = ctx->DrawBuffer->Visual.samples > 1;
+
+ /* Build the index for table lookup
+ */
+ if (brw->gen < 6) {
+ /* _NEW_COLOR */
+ if (fp->program.UsesKill || ctx->Color.AlphaEnabled)
+ lookup |= IZ_PS_KILL_ALPHATEST_BIT;
+
+ if (fp->program.Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH))
+ lookup |= IZ_PS_COMPUTES_DEPTH_BIT;
+
+ /* _NEW_DEPTH */
+ if (ctx->Depth.Test)
+ lookup |= IZ_DEPTH_TEST_ENABLE_BIT;
+
+ if (ctx->Depth.Test && ctx->Depth.Mask) /* ?? */
+ lookup |= IZ_DEPTH_WRITE_ENABLE_BIT;
+
+ /* _NEW_STENCIL | _NEW_BUFFERS */
+ if (ctx->Stencil._Enabled) {
+ lookup |= IZ_STENCIL_TEST_ENABLE_BIT;
+
+ if (ctx->Stencil.WriteMask[0] ||
+ ctx->Stencil.WriteMask[ctx->Stencil._BackFace])
+ lookup |= IZ_STENCIL_WRITE_ENABLE_BIT;
+ }
+ key->iz_lookup = lookup;
+ }
+
+ line_aa = AA_NEVER;
+
+ /* _NEW_LINE, _NEW_POLYGON, BRW_NEW_REDUCED_PRIMITIVE */
+ if (ctx->Line.SmoothFlag) {
+ if (brw->reduced_primitive == GL_LINES) {
+ line_aa = AA_ALWAYS;
+ }
+ else if (brw->reduced_primitive == GL_TRIANGLES) {
+ if (ctx->Polygon.FrontMode == GL_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if (ctx->Polygon.BackMode == GL_LINE ||
+ (ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_BACK))
+ line_aa = AA_ALWAYS;
+ }
+ else if (ctx->Polygon.BackMode == GL_LINE) {
+ line_aa = AA_SOMETIMES;
+
+ if ((ctx->Polygon.CullFlag &&
+ ctx->Polygon.CullFaceMode == GL_FRONT))
+ line_aa = AA_ALWAYS;
+ }
+ }
+ }
+
+ key->line_aa = line_aa;
+
+ /* _NEW_HINT */
+ key->high_quality_derivatives =
+ ctx->Hint.FragmentShaderDerivative == GL_NICEST;
+
+ if (brw->gen < 6)
+ key->stats_wm = brw->stats_wm;
+
+ /* _NEW_LIGHT */
+ key->flat_shade = (ctx->Light.ShadeModel == GL_FLAT);
+
+ /* _NEW_FRAG_CLAMP | _NEW_BUFFERS */
+ key->clamp_fragment_color = ctx->Color._ClampFragmentColor;
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, brw->wm.base.sampler_count,
+ &key->tex);
+
+ /* _NEW_BUFFERS */
+ /*
+ * Include the draw buffer origin and height so that we can calculate
+ * fragment position values relative to the bottom left of the drawable,
+ * from the incoming screen origin relative position we get as part of our
+ * payload.
+ *
+ * This is only needed for the WM_WPOSXY opcode when the fragment program
+ * uses the gl_FragCoord input.
+ *
+ * We could avoid recompiling by including this as a constant referenced by
+ * our program, but if we were to do that it would also be nice to handle
+ * getting that constant updated at batchbuffer submit time (when we
+ * hold the lock and know where the buffer really is) rather than at emit
+ * time when we don't hold the lock and are just guessing. We could also
+ * just avoid using this as key data if the program doesn't use
+ * fragment.position.
+ *
+ * For DRI2 the origin_x/y will always be (0,0) but we still need the
+ * drawable height in order to invert the Y axis.
+ */
+ if (fp->program.Base.InputsRead & VARYING_BIT_POS) {
+ key->drawable_height = ctx->DrawBuffer->Height;
+ }
+
+ if ((fp->program.Base.InputsRead & VARYING_BIT_POS) || program_uses_dfdy) {
+ key->render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ }
+
+ /* _NEW_BUFFERS */
+ key->nr_color_regions = ctx->DrawBuffer->_NumColorDrawBuffers;
+
+ /* _NEW_MULTISAMPLE, _NEW_COLOR, _NEW_BUFFERS */
+ key->replicate_alpha = ctx->DrawBuffer->_NumColorDrawBuffers > 1 &&
+ (ctx->Multisample.SampleAlphaToCoverage || ctx->Color.AlphaEnabled);
+
+ /* _NEW_BUFFERS _NEW_MULTISAMPLE */
+ /* Ignore sample qualifier while computing this flag. */
+ key->persample_shading =
+ _mesa_get_min_invocations_per_fragment(ctx, &fp->program, true) > 1;
+ if (key->persample_shading)
+ key->persample_2x = ctx->DrawBuffer->Visual.samples == 2;
+
+ key->compute_pos_offset =
+ _mesa_get_min_invocations_per_fragment(ctx, &fp->program, false) > 1 &&
+ fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_POS;
+
+ key->compute_sample_id =
+ multisample_fbo &&
+ ctx->Multisample.Enabled &&
+ (fp->program.Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_ID);
+
+ /* BRW_NEW_VUE_MAP_GEOM_OUT */
+ if (brw->gen < 6 || _mesa_bitcount_64(fp->program.Base.InputsRead &
+ BRW_FS_VARYING_INPUT_MASK) > 16)
+ key->input_slots_valid = brw->vue_map_geom_out.slots_valid;
+
+
+ /* _NEW_COLOR | _NEW_BUFFERS */
+ /* Pre-gen6, the hardware alpha test always used each render
+ * target's alpha to do alpha test, as opposed to render target 0's alpha
+ * like GL requires. Fix that by building the alpha test into the
+ * shader, and we'll skip enabling the fixed function alpha test.
+ */
+ if (brw->gen < 6 && ctx->DrawBuffer->_NumColorDrawBuffers > 1 && ctx->Color.AlphaEnabled) {
+ key->alpha_test_func = ctx->Color.AlphaFunc;
+ key->alpha_test_ref = ctx->Color.AlphaRef;
+ }
+
+ /* The unique fragment program ID */
+ key->program_string_id = fp->id;
+
+ ctx->DrawBuffer = NULL;
+}
+
+static uint8_t
+computed_depth_mode(struct gl_fragment_program *fp)
+{
+ if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
+ switch (fp->FragDepthLayout) {
+ case FRAG_DEPTH_LAYOUT_NONE:
+ case FRAG_DEPTH_LAYOUT_ANY:
+ return BRW_PSCDEPTH_ON;
+ case FRAG_DEPTH_LAYOUT_GREATER:
+ return BRW_PSCDEPTH_ON_GE;
+ case FRAG_DEPTH_LAYOUT_LESS:
+ return BRW_PSCDEPTH_ON_LE;
+ case FRAG_DEPTH_LAYOUT_UNCHANGED:
+ return BRW_PSCDEPTH_OFF;
+ }
+ }
+ return BRW_PSCDEPTH_OFF;
+}
+
+static bool
+really_do_wm_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_fragment_program *fp,
+ struct brw_wm_prog_key *key, struct anv_pipeline *pipeline)
+{
+ struct gl_context *ctx = &brw->ctx;
+ void *mem_ctx = ralloc_context(NULL);
+ struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data;
+ struct gl_shader *fs = NULL;
+ unsigned int program_size;
+ const uint32_t *program;
+
+ if (prog)
+ fs = prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
+
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ /* key->alpha_test_func means simulating alpha testing via discards,
+ * so the shader definitely kills pixels.
+ */
+ prog_data->uses_kill = fp->program.UsesKill || key->alpha_test_func;
+
+ prog_data->computed_depth_mode = computed_depth_mode(&fp->program);
+
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ */
+ int param_count;
+ if (fs) {
+ param_count = fs->num_uniform_components;
+ } else {
+ param_count = fp->program.Base.Parameters->NumParameters * 4;
+ }
+ /* The backend also sometimes adds params for texture size. */
+ param_count += 2 * ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits;
+ prog_data->base.param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data->base.pull_param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data->base.nr_params = param_count;
+
+ prog_data->barycentric_interp_modes =
+ brw_compute_barycentric_interp_modes(brw, key->flat_shade,
+ key->persample_shading,
+ &fp->program);
+
+ set_binding_table_layout(&prog_data->base, pipeline,
+ VK_SHADER_STAGE_FRAGMENT);
+ /* This needs to come after shader time and pull constant entries, but we
+ * don't have those set up now, so just put it after the layout entries.
+ */
+ prog_data->binding_table.render_target_start = 0;
+
+ program = brw_wm_fs_emit(brw, mem_ctx, key, prog_data,
+ &fp->program, prog, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ uint32_t offset = upload_kernel(pipeline, program, program_size);
+
+ if (prog_data->no_8)
+ pipeline->ps_simd8 = NO_KERNEL;
+ else
+ pipeline->ps_simd8 = offset;
+
+ if (prog_data->no_8 || prog_data->prog_offset_16) {
+ pipeline->ps_simd16 = offset + prog_data->prog_offset_16;
+ } else {
+ pipeline->ps_simd16 = NO_KERNEL;
+ }
+
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+static void
+brw_gs_populate_key(struct brw_context *brw,
+ struct anv_pipeline *pipeline,
+ struct brw_geometry_program *gp,
+ struct brw_gs_prog_key *key)
+{
+ struct gl_context *ctx = &brw->ctx;
+ struct brw_stage_state *stage_state = &brw->gs.base;
+ struct gl_program *prog = &gp->program.Base;
+
+ memset(key, 0, sizeof(*key));
+
+ key->base.program_string_id = gp->id;
+ brw_setup_vue_key_clip_info(brw, &key->base,
+ gp->program.Base.UsesClipDistanceOut);
+
+ /* _NEW_TEXTURE */
+ brw_populate_sampler_prog_key_data(ctx, prog, stage_state->sampler_count,
+ &key->base.tex);
+
+ struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data;
+
+ /* BRW_NEW_VUE_MAP_VS */
+ key->input_varyings = prog_data->base.vue_map.slots_valid;
+}
+
+static bool
+really_do_gs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_geometry_program *gp,
+ struct brw_gs_prog_key *key, struct anv_pipeline *pipeline)
+{
+ struct brw_gs_compile_output output;
+
+ /* FIXME: We pass the bind map to the compile in the output struct. Need
+ * something better. */
+ set_binding_table_layout(&output.prog_data.base.base,
+ pipeline, VK_SHADER_STAGE_GEOMETRY);
+
+ brw_compile_gs_prog(brw, prog, gp, key, &output);
+
+ pipeline->gs_vec4 = upload_kernel(pipeline, output.program, output.program_size);
+ pipeline->gs_vertex_count = gp->program.VerticesIn;
+
+ ralloc_free(output.mem_ctx);
+
+ return true;
+}
+
+static bool
+brw_codegen_cs_prog(struct brw_context *brw,
+ struct gl_shader_program *prog,
+ struct brw_compute_program *cp,
+ struct brw_cs_prog_key *key, struct anv_pipeline *pipeline)
+{
+ struct gl_context *ctx = &brw->ctx;
+ const GLuint *program;
+ void *mem_ctx = ralloc_context(NULL);
+ GLuint program_size;
+ struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
+
+ struct gl_shader *cs = prog->_LinkedShaders[MESA_SHADER_COMPUTE];
+ assert (cs);
+
+ memset(prog_data, 0, sizeof(*prog_data));
+
+ set_binding_table_layout(&prog_data->base, pipeline, VK_SHADER_STAGE_COMPUTE);
+
+ /* Allocate the references to the uniforms that will end up in the
+ * prog_data associated with the compiled program, and which will be freed
+ * by the state cache.
+ */
+ int param_count = cs->num_uniform_components;
+
+ /* The backend also sometimes adds params for texture size. */
+ param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
+ prog_data->base.param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data->base.pull_param =
+ rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data->base.nr_params = param_count;
+
+ program = brw_cs_emit(brw, mem_ctx, key, prog_data,
+ &cp->program, prog, &program_size);
+ if (program == NULL) {
+ ralloc_free(mem_ctx);
+ return false;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_CS))
+ fprintf(stderr, "\n");
+
+ pipeline->cs_simd = upload_kernel(pipeline, program, program_size);
+
+ ralloc_free(mem_ctx);
+
+ return true;
+}
+
+static void
+brw_cs_populate_key(struct brw_context *brw,
+ struct brw_compute_program *bcp, struct brw_cs_prog_key *key)
+{
+ memset(key, 0, sizeof(*key));
+
+ /* The unique compute program ID */
+ key->program_string_id = bcp->id;
+}
+
+static void
+fail_on_compile_error(int status, const char *msg)
+{
+ int source, line, column;
+ char error[256];
+
+ if (status)
+ return;
+
+ if (sscanf(msg, "%d:%d(%d): error: %255[^\n]", &source, &line, &column, error) == 4)
+ fail_if(!status, "%d:%s\n", line, error);
+ else
+ fail_if(!status, "%s\n", msg);
+}
+
+struct anv_compiler {
+ struct anv_device *device;
+ struct intel_screen *screen;
+ struct brw_context *brw;
+ struct gl_pipeline_object pipeline;
+};
+
+extern "C" {
+
+struct anv_compiler *
+anv_compiler_create(struct anv_device *device)
+{
+ const struct brw_device_info *devinfo = &device->info;
+ struct anv_compiler *compiler;
+ struct gl_context *ctx;
+
+ compiler = rzalloc(NULL, struct anv_compiler);
+ if (compiler == NULL)
+ return NULL;
+
+ compiler->screen = rzalloc(compiler, struct intel_screen);
+ if (compiler->screen == NULL)
+ goto fail;
+
+ compiler->brw = rzalloc(compiler, struct brw_context);
+ if (compiler->brw == NULL)
+ goto fail;
+
+ compiler->device = device;
+
+ compiler->brw->optionCache.info = NULL;
+ compiler->brw->bufmgr = NULL;
+ compiler->brw->gen = devinfo->gen;
+ compiler->brw->is_g4x = devinfo->is_g4x;
+ compiler->brw->is_baytrail = devinfo->is_baytrail;
+ compiler->brw->is_haswell = devinfo->is_haswell;
+ compiler->brw->is_cherryview = devinfo->is_cherryview;
+
+ /* We need this at least for CS, which will check brw->max_cs_threads
+ * against the work group size. */
+ compiler->brw->max_vs_threads = devinfo->max_vs_threads;
+ compiler->brw->max_hs_threads = devinfo->max_hs_threads;
+ compiler->brw->max_ds_threads = devinfo->max_ds_threads;
+ compiler->brw->max_gs_threads = devinfo->max_gs_threads;
+ compiler->brw->max_wm_threads = devinfo->max_wm_threads;
+ compiler->brw->max_cs_threads = devinfo->max_cs_threads;
+ compiler->brw->urb.size = devinfo->urb.size;
+ compiler->brw->urb.min_vs_entries = devinfo->urb.min_vs_entries;
+ compiler->brw->urb.max_vs_entries = devinfo->urb.max_vs_entries;
+ compiler->brw->urb.max_hs_entries = devinfo->urb.max_hs_entries;
+ compiler->brw->urb.max_ds_entries = devinfo->urb.max_ds_entries;
+ compiler->brw->urb.max_gs_entries = devinfo->urb.max_gs_entries;
+
+ compiler->brw->intelScreen = compiler->screen;
+ compiler->screen->devinfo = &device->info;
+
+ brw_process_intel_debug_variable(compiler->screen);
+
+ compiler->screen->compiler = brw_compiler_create(compiler, &device->info);
+
+ ctx = &compiler->brw->ctx;
+ _mesa_init_shader_object_functions(&ctx->Driver);
+
+ _mesa_init_constants(&ctx->Const, API_OPENGL_CORE);
+
+ brw_initialize_context_constants(compiler->brw);
+
+ intelInitExtensions(ctx);
+
+ /* Set dd::NewShader */
+ brwInitFragProgFuncs(&ctx->Driver);
+
+ ctx->_Shader = &compiler->pipeline;
+
+ compiler->brw->precompile = false;
+
+ return compiler;
+
+ fail:
+ ralloc_free(compiler);
+ return NULL;
+}
+
+void
+anv_compiler_destroy(struct anv_compiler *compiler)
+{
+ _mesa_free_errors_data(&compiler->brw->ctx);
+ ralloc_free(compiler);
+}
+
+/* From gen7_urb.c */
+
+/* FIXME: Add to struct intel_device_info */
+
+static const int gen8_push_size = 32 * 1024;
+
+static void
+gen7_compute_urb_partition(struct anv_pipeline *pipeline)
+{
+ const struct brw_device_info *devinfo = &pipeline->device->info;
+ bool vs_present = pipeline->vs_simd8 != NO_KERNEL;
+ unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1;
+ unsigned vs_entry_size_bytes = vs_size * 64;
+ bool gs_present = pipeline->gs_vec4 != NO_KERNEL;
+ unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1;
+ unsigned gs_entry_size_bytes = gs_size * 64;
+
+ /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS):
+ *
+ * VS Number of URB Entries must be divisible by 8 if the VS URB Entry
+ * Allocation Size is less than 9 512-bit URB entries.
+ *
+ * Similar text exists for GS.
+ */
+ unsigned vs_granularity = (vs_size < 9) ? 8 : 1;
+ unsigned gs_granularity = (gs_size < 9) ? 8 : 1;
+
+ /* URB allocations must be done in 8k chunks. */
+ unsigned chunk_size_bytes = 8192;
+
+ /* Determine the size of the URB in chunks. */
+ unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes;
+
+ /* Reserve space for push constants */
+ unsigned push_constant_bytes = gen8_push_size;
+ unsigned push_constant_chunks =
+ push_constant_bytes / chunk_size_bytes;
+
+ /* Initially, assign each stage the minimum amount of URB space it needs,
+ * and make a note of how much additional space it "wants" (the amount of
+ * additional space it could actually make use of).
+ */
+
+ /* VS has a lower limit on the number of URB entries */
+ unsigned vs_chunks =
+ ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes;
+ unsigned vs_wants =
+ ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes - vs_chunks;
+
+ unsigned gs_chunks = 0;
+ unsigned gs_wants = 0;
+ if (gs_present) {
+ /* There are two constraints on the minimum amount of URB space we can
+ * allocate:
+ *
+ * (1) We need room for at least 2 URB entries, since we always operate
+ * the GS in DUAL_OBJECT mode.
+ *
+ * (2) We can't allocate less than nr_gs_entries_granularity.
+ */
+ gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes;
+ gs_wants =
+ ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes,
+ chunk_size_bytes) / chunk_size_bytes - gs_chunks;
+ }
+
+ /* There should always be enough URB space to satisfy the minimum
+ * requirements of each stage.
+ */
+ unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks;
+ assert(total_needs <= urb_chunks);
+
+ /* Mete out remaining space (if any) in proportion to "wants". */
+ unsigned total_wants = vs_wants + gs_wants;
+ unsigned remaining_space = urb_chunks - total_needs;
+ if (remaining_space > total_wants)
+ remaining_space = total_wants;
+ if (remaining_space > 0) {
+ unsigned vs_additional = (unsigned)
+ round(vs_wants * (((double) remaining_space) / total_wants));
+ vs_chunks += vs_additional;
+ remaining_space -= vs_additional;
+ gs_chunks += remaining_space;
+ }
+
+ /* Sanity check that we haven't over-allocated. */
+ assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks);
+
+ /* Finally, compute the number of entries that can fit in the space
+ * allocated to each stage.
+ */
+ unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes;
+ unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes;
+
+ /* Since we rounded up when computing *_wants, this may be slightly more
+ * than the maximum allowed amount, so correct for that.
+ */
+ nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries);
+ nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries);
+
+ /* Ensure that we program a multiple of the granularity. */
+ nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity);
+ nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity);
+
+ /* Finally, sanity check to make sure we have at least the minimum number
+ * of entries needed for each stage.
+ */
+ assert(nr_vs_entries >= devinfo->urb.min_vs_entries);
+ if (gs_present)
+ assert(nr_gs_entries >= 2);
+
+ /* Lay out the URB in the following order:
+ * - push constants
+ * - VS
+ * - GS
+ */
+ pipeline->urb.vs_start = push_constant_chunks;
+ pipeline->urb.vs_size = vs_size;
+ pipeline->urb.nr_vs_entries = nr_vs_entries;
+
+ pipeline->urb.gs_start = push_constant_chunks + vs_chunks;
+ pipeline->urb.gs_size = gs_size;
+ pipeline->urb.nr_gs_entries = nr_gs_entries;
+}
+
+static const struct {
+ uint32_t token;
+ gl_shader_stage stage;
+ const char *name;
+} stage_info[] = {
+ { GL_VERTEX_SHADER, MESA_SHADER_VERTEX, "vertex" },
+ { GL_TESS_CONTROL_SHADER, (gl_shader_stage)-1,"tess control" },
+ { GL_TESS_EVALUATION_SHADER, (gl_shader_stage)-1, "tess evaluation" },
+ { GL_GEOMETRY_SHADER, MESA_SHADER_GEOMETRY, "geometry" },
+ { GL_FRAGMENT_SHADER, MESA_SHADER_FRAGMENT, "fragment" },
+ { GL_COMPUTE_SHADER, MESA_SHADER_COMPUTE, "compute" },
+};
+
+struct spirv_header{
+ uint32_t magic;
+ uint32_t version;
+ uint32_t gen_magic;
+};
+
+static const char *
+src_as_glsl(const char *data)
+{
+ const struct spirv_header *as_spirv = (const struct spirv_header *)data;
+
+ /* Check alignment */
+ if ((intptr_t)data & 0x3) {
+ return data;
+ }
+
+ if (as_spirv->magic == SPIR_V_MAGIC_NUMBER) {
+ /* LunarG back-door */
+ if (as_spirv->version == 0)
+ return data + 12;
+ else
+ return NULL;
+ } else {
+ return data;
+ }
+}
+
+static void
+anv_compile_shader_glsl(struct anv_compiler *compiler,
+ struct gl_shader_program *program,
+ struct anv_pipeline *pipeline, uint32_t stage)
+{
+ struct brw_context *brw = compiler->brw;
+ struct gl_shader *shader;
+ int name = 0;
+
+ shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token);
+ fail_if(shader == NULL, "failed to create %s shader\n", stage_info[stage].name);
+
+ shader->Source = strdup(src_as_glsl(pipeline->shaders[stage]->module->data));
+ _mesa_glsl_compile_shader(&brw->ctx, shader, false, false);
+ fail_on_compile_error(shader->CompileStatus, shader->InfoLog);
+
+ program->Shaders[program->NumShaders] = shader;
+ program->NumShaders++;
+}
+
+static void
+setup_nir_io(struct gl_program *prog,
+ nir_shader *shader)
+{
+ foreach_list_typed(nir_variable, var, node, &shader->inputs) {
+ prog->InputsRead |= BITFIELD64_BIT(var->data.location);
+ }
+
+ foreach_list_typed(nir_variable, var, node, &shader->outputs) {
+ prog->OutputsWritten |= BITFIELD64_BIT(var->data.location);
+ }
+}
+
+static void
+anv_compile_shader_spirv(struct anv_compiler *compiler,
+ struct gl_shader_program *program,
+ struct anv_pipeline *pipeline, uint32_t stage)
+{
+ struct brw_context *brw = compiler->brw;
+ struct anv_shader *shader = pipeline->shaders[stage];
+ struct gl_shader *mesa_shader;
+ int name = 0;
+
+ mesa_shader = brw_new_shader(&brw->ctx, name, stage_info[stage].token);
+ fail_if(mesa_shader == NULL,
+ "failed to create %s shader\n", stage_info[stage].name);
+
+ switch (stage) {
+ case VK_SHADER_STAGE_VERTEX:
+ mesa_shader->Program = &rzalloc(mesa_shader, struct brw_vertex_program)->program.Base;
+ break;
+ case VK_SHADER_STAGE_GEOMETRY:
+ mesa_shader->Program = &rzalloc(mesa_shader, struct brw_geometry_program)->program.Base;
+ break;
+ case VK_SHADER_STAGE_FRAGMENT:
+ mesa_shader->Program = &rzalloc(mesa_shader, struct brw_fragment_program)->program.Base;
+ break;
+ case VK_SHADER_STAGE_COMPUTE:
+ mesa_shader->Program = &rzalloc(mesa_shader, struct brw_compute_program)->program.Base;
+ break;
+ }
+
+ mesa_shader->Program->Parameters =
+ rzalloc(mesa_shader, struct gl_program_parameter_list);
+
+ mesa_shader->Type = stage_info[stage].token;
+ mesa_shader->Stage = stage_info[stage].stage;
+
+ assert(shader->module->size % 4 == 0);
+
+ struct gl_shader_compiler_options *glsl_options =
+ &compiler->screen->compiler->glsl_compiler_options[stage_info[stage].stage];
+
+ mesa_shader->Program->nir =
+ spirv_to_nir((uint32_t *)shader->module->data, shader->module->size / 4,
+ glsl_options->NirOptions);
+ nir_validate_shader(mesa_shader->Program->nir);
+
+ brw_process_nir(mesa_shader->Program->nir,
+ compiler->screen->devinfo,
++ NULL, mesa_shader->Stage, false);
+
+ setup_nir_io(mesa_shader->Program, mesa_shader->Program->nir);
+
+ fail_if(mesa_shader->Program->nir == NULL,
+ "failed to translate SPIR-V to NIR\n");
+
+ program->Shaders[program->NumShaders] = mesa_shader;
+ program->NumShaders++;
+}
+
+static void
+add_compiled_stage(struct anv_pipeline *pipeline, uint32_t stage,
+ struct brw_stage_prog_data *prog_data)
+{
+ struct brw_device_info *devinfo = &pipeline->device->info;
+ uint32_t max_threads[] = {
+ [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads,
+ [VK_SHADER_STAGE_TESS_CONTROL] = 0,
+ [VK_SHADER_STAGE_TESS_EVALUATION] = 0,
+ [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads,
+ [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads,
+ [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads,
+ };
+
+ pipeline->prog_data[stage] = prog_data;
+ pipeline->active_stages |= 1 << stage;
+ pipeline->scratch_start[stage] = pipeline->total_scratch;
+ pipeline->total_scratch =
+ align_u32(pipeline->total_scratch, 1024) +
+ prog_data->total_scratch * max_threads[stage];
+}
+
+int
+anv_compiler_run(struct anv_compiler *compiler, struct anv_pipeline *pipeline)
+{
+ struct gl_shader_program *program;
+ int name = 0;
+ struct brw_context *brw = compiler->brw;
+
+ pipeline->writes_point_size = false;
+
+ /* When we free the pipeline, we detect stages based on the NULL status
+ * of various prog_data pointers. Make them NULL by default.
+ */
+ memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data));
+ memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start));
+
+ brw->use_rep_send = pipeline->use_repclear;
+ brw->no_simd8 = pipeline->use_repclear;
+
+ program = brw->ctx.Driver.NewShaderProgram(name);
+ program->Shaders = (struct gl_shader **)
+ calloc(VK_SHADER_STAGE_NUM, sizeof(struct gl_shader *));
+ fail_if(program == NULL || program->Shaders == NULL,
+ "failed to create program\n");
+
+ bool all_spirv = true;
+ for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) {
+ if (pipeline->shaders[i] == NULL)
+ continue;
+
+ /* You need at least this much for "void main() { }" anyway */
+ assert(pipeline->shaders[i]->module->size >= 12);
+
+ if (src_as_glsl(pipeline->shaders[i]->module->data)) {
+ all_spirv = false;
+ break;
+ }
+
+ assert(pipeline->shaders[i]->module->size % 4 == 0);
+ }
+
+ if (all_spirv) {
+ for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) {
+ if (pipeline->shaders[i])
+ anv_compile_shader_spirv(compiler, program, pipeline, i);
+ }
+
+ for (unsigned i = 0; i < program->NumShaders; i++) {
+ struct gl_shader *shader = program->Shaders[i];
+ program->_LinkedShaders[shader->Stage] = shader;
+ }
+ } else {
+ for (unsigned i = 0; i < VK_SHADER_STAGE_NUM; i++) {
+ if (pipeline->shaders[i])
+ anv_compile_shader_glsl(compiler, program, pipeline, i);
+ }
+
+ _mesa_glsl_link_shader(&brw->ctx, program);
+ fail_on_compile_error(program->LinkStatus,
+ program->InfoLog);
+ }
+
+ bool success;
+ pipeline->active_stages = 0;
+ pipeline->total_scratch = 0;
+
+ if (pipeline->shaders[VK_SHADER_STAGE_VERTEX]) {
+ struct brw_vs_prog_key vs_key;
+ struct gl_vertex_program *vp = (struct gl_vertex_program *)
+ program->_LinkedShaders[MESA_SHADER_VERTEX]->Program;
+ struct brw_vertex_program *bvp = brw_vertex_program(vp);
+
+ brw_vs_populate_key(brw, bvp, &vs_key);
+
+ success = really_do_vs_prog(brw, program, bvp, &vs_key, pipeline);
+ fail_if(!success, "do_wm_prog failed\n");
+ add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX,
+ &pipeline->vs_prog_data.base.base);
+
+ if (vp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
+ pipeline->writes_point_size = true;
+ } else {
+ memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data));
+ pipeline->vs_simd8 = NO_KERNEL;
+ }
+
+
+ if (pipeline->shaders[VK_SHADER_STAGE_GEOMETRY]) {
+ struct brw_gs_prog_key gs_key;
+ struct gl_geometry_program *gp = (struct gl_geometry_program *)
+ program->_LinkedShaders[MESA_SHADER_GEOMETRY]->Program;
+ struct brw_geometry_program *bgp = brw_geometry_program(gp);
+
+ brw_gs_populate_key(brw, pipeline, bgp, &gs_key);
+
+ success = really_do_gs_prog(brw, program, bgp, &gs_key, pipeline);
+ fail_if(!success, "do_gs_prog failed\n");
+ add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY,
+ &pipeline->gs_prog_data.base.base);
+
+ if (gp->Base.OutputsWritten & VARYING_SLOT_PSIZ)
+ pipeline->writes_point_size = true;
+ } else {
+ pipeline->gs_vec4 = NO_KERNEL;
+ }
+
+ if (pipeline->shaders[VK_SHADER_STAGE_FRAGMENT]) {
+ struct brw_wm_prog_key wm_key;
+ struct gl_fragment_program *fp = (struct gl_fragment_program *)
+ program->_LinkedShaders[MESA_SHADER_FRAGMENT]->Program;
+ struct brw_fragment_program *bfp = brw_fragment_program(fp);
+
+ brw_wm_populate_key(brw, bfp, &wm_key);
+
+ success = really_do_wm_prog(brw, program, bfp, &wm_key, pipeline);
+ fail_if(!success, "do_wm_prog failed\n");
+ add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT,
+ &pipeline->wm_prog_data.base);
+ }
+
+ if (pipeline->shaders[VK_SHADER_STAGE_COMPUTE]) {
+ struct brw_cs_prog_key cs_key;
+ struct gl_compute_program *cp = (struct gl_compute_program *)
+ program->_LinkedShaders[MESA_SHADER_COMPUTE]->Program;
+ struct brw_compute_program *bcp = brw_compute_program(cp);
+
+ brw_cs_populate_key(brw, bcp, &cs_key);
+
+ success = brw_codegen_cs_prog(brw, program, bcp, &cs_key, pipeline);
+ fail_if(!success, "brw_codegen_cs_prog failed\n");
+ add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE,
+ &pipeline->cs_prog_data.base);
+ }
+
+ /* XXX: Deleting the shader is broken with our current SPIR-V hacks. We
+ * need to fix this ASAP.
+ */
+ if (!all_spirv)
+ brw->ctx.Driver.DeleteShaderProgram(&brw->ctx, program);
+
+ struct anv_device *device = compiler->device;
+ while (device->scratch_block_pool.bo.size < pipeline->total_scratch)
+ anv_block_pool_alloc(&device->scratch_block_pool);
+
+ gen7_compute_urb_partition(pipeline);
+
+ return 0;
+}
+
+/* This badly named function frees the struct anv_pipeline data that the compiler
+ * allocates. Currently just the prog_data structs.
+ */
+void
+anv_compiler_free(struct anv_pipeline *pipeline)
+{
+ for (uint32_t stage = 0; stage < VK_SHADER_STAGE_NUM; stage++) {
+ if (pipeline->prog_data[stage]) {
+ free(pipeline->prog_data[stage]->map_entries);
+ ralloc_free(pipeline->prog_data[stage]->param);
+ ralloc_free(pipeline->prog_data[stage]->pull_param);
+ }
+ }
+}
+
+}
--- /dev/null
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <assert.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "anv_private.h"
+#include "mesa/main/git_sha1.h"
++#include "util/strtod.h"
+
+static int
+anv_env_get_int(const char *name)
+{
+ const char *val = getenv(name);
+
+ if (!val)
+ return 0;
+
+ return strtol(val, NULL, 0);
+}
+
+static VkResult
+anv_physical_device_init(struct anv_physical_device *device,
+ struct anv_instance *instance,
+ const char *path)
+{
+ int fd;
+
+ fd = open(path, O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return vk_error(VK_ERROR_UNAVAILABLE);
+
+ device->instance = instance;
+ device->path = path;
+
+ device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE");
+ device->no_hw = false;
+ if (device->chipset_id) {
+ /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */
+ device->no_hw = true;
+ } else {
+ device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
+ }
+ if (!device->chipset_id)
+ goto fail;
+
+ device->name = brw_get_device_name(device->chipset_id);
+ device->info = brw_get_device_info(device->chipset_id, -1);
+ if (!device->info)
+ goto fail;
+
+ if (anv_gem_get_aperture(fd, &device->aperture_size) == -1)
+ goto fail;
+
+ if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT))
+ goto fail;
+
+ if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2))
+ goto fail;
+
+ if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC))
+ goto fail;
+
+ if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS))
+ goto fail;
+
+ close(fd);
+
+ return VK_SUCCESS;
+
+fail:
+ close(fd);
+ return vk_error(VK_ERROR_UNAVAILABLE);
+}
+
+static void *default_alloc(
+ void* pUserData,
+ size_t size,
+ size_t alignment,
+ VkSystemAllocType allocType)
+{
+ return malloc(size);
+}
+
+static void default_free(
+ void* pUserData,
+ void* pMem)
+{
+ free(pMem);
+}
+
+static const VkAllocCallbacks default_alloc_callbacks = {
+ .pUserData = NULL,
+ .pfnAlloc = default_alloc,
+ .pfnFree = default_free
+};
+
+VkResult anv_CreateInstance(
+ const VkInstanceCreateInfo* pCreateInfo,
+ VkInstance* pInstance)
+{
+ struct anv_instance *instance;
+ const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks;
+ void *user_data = NULL;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
+
+ if (pCreateInfo->pAllocCb) {
+ alloc_callbacks = pCreateInfo->pAllocCb;
+ user_data = pCreateInfo->pAllocCb->pUserData;
+ }
+ instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (!instance)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ instance->pAllocUserData = alloc_callbacks->pUserData;
+ instance->pfnAlloc = alloc_callbacks->pfnAlloc;
+ instance->pfnFree = alloc_callbacks->pfnFree;
+ instance->apiVersion = pCreateInfo->pAppInfo->apiVersion;
+ instance->physicalDeviceCount = 0;
+
++ _mesa_locale_init();
++
+ VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
+
+ *pInstance = anv_instance_to_handle(instance);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyInstance(
+ VkInstance _instance)
+{
+ ANV_FROM_HANDLE(anv_instance, instance, _instance);
+
+ VG(VALGRIND_DESTROY_MEMPOOL(instance));
+
++ _mesa_locale_fini();
++
+ instance->pfnFree(instance->pAllocUserData, instance);
+
+ return VK_SUCCESS;
+}
+
+static void *
+anv_instance_alloc(struct anv_instance *instance, size_t size,
+ size_t alignment, VkSystemAllocType allocType)
+{
+ void *mem = instance->pfnAlloc(instance->pAllocUserData,
+ size, alignment, allocType);
+ if (mem) {
+ VALGRIND_MEMPOOL_ALLOC(instance, mem, size);
+ VALGRIND_MAKE_MEM_UNDEFINED(mem, size);
+ }
+ return mem;
+}
+
+static void
+anv_instance_free(struct anv_instance *instance, void *mem)
+{
+ if (mem == NULL)
+ return;
+
+ VALGRIND_MEMPOOL_FREE(instance, mem);
+
+ instance->pfnFree(instance->pAllocUserData, mem);
+}
+
+VkResult anv_EnumeratePhysicalDevices(
+ VkInstance _instance,
+ uint32_t* pPhysicalDeviceCount,
+ VkPhysicalDevice* pPhysicalDevices)
+{
+ ANV_FROM_HANDLE(anv_instance, instance, _instance);
+ VkResult result;
+
+ if (instance->physicalDeviceCount == 0) {
+ result = anv_physical_device_init(&instance->physicalDevice,
+ instance, "/dev/dri/renderD128");
+ if (result != VK_SUCCESS)
+ return result;
+
+ instance->physicalDeviceCount = 1;
+ }
+
+ /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
+ * otherwise it's an inout parameter.
+ *
+ * The Vulkan spec (git aaed022) says:
+ *
+ * pPhysicalDeviceCount is a pointer to an unsigned integer variable
+ * that is initialized with the number of devices the application is
+ * prepared to receive handles to. pname:pPhysicalDevices is pointer to
+ * an array of at least this many VkPhysicalDevice handles [...].
+ *
+ * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
+ * overwrites the contents of the variable pointed to by
+ * pPhysicalDeviceCount with the number of physical devices in in the
+ * instance; otherwise, vkEnumeratePhysicalDevices overwrites
+ * pPhysicalDeviceCount with the number of physical handles written to
+ * pPhysicalDevices.
+ */
+ if (!pPhysicalDevices) {
+ *pPhysicalDeviceCount = instance->physicalDeviceCount;
+ } else if (*pPhysicalDeviceCount >= 1) {
+ pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
+ *pPhysicalDeviceCount = 1;
+ } else {
+ *pPhysicalDeviceCount = 0;
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetPhysicalDeviceFeatures(
+ VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceFeatures* pFeatures)
+{
+ anv_finishme("Get correct values for PhysicalDeviceFeatures");
+
+ *pFeatures = (VkPhysicalDeviceFeatures) {
+ .robustBufferAccess = false,
+ .fullDrawIndexUint32 = false,
+ .imageCubeArray = false,
+ .independentBlend = false,
+ .geometryShader = true,
+ .tessellationShader = false,
+ .sampleRateShading = false,
+ .dualSourceBlend = true,
+ .logicOp = true,
+ .instancedDrawIndirect = true,
+ .depthClip = false,
+ .depthBiasClamp = false,
+ .fillModeNonSolid = true,
+ .depthBounds = false,
+ .wideLines = true,
+ .largePoints = true,
+ .textureCompressionETC2 = true,
+ .textureCompressionASTC_LDR = true,
+ .textureCompressionBC = true,
+ .pipelineStatisticsQuery = true,
+ .vertexSideEffects = false,
+ .tessellationSideEffects = false,
+ .geometrySideEffects = false,
+ .fragmentSideEffects = false,
+ .shaderTessellationPointSize = false,
+ .shaderGeometryPointSize = true,
+ .shaderTextureGatherExtended = true,
+ .shaderStorageImageExtendedFormats = false,
+ .shaderStorageImageMultisample = false,
+ .shaderStorageBufferArrayConstantIndexing = false,
+ .shaderStorageImageArrayConstantIndexing = false,
+ .shaderUniformBufferArrayDynamicIndexing = true,
+ .shaderSampledImageArrayDynamicIndexing = false,
+ .shaderStorageBufferArrayDynamicIndexing = false,
+ .shaderStorageImageArrayDynamicIndexing = false,
+ .shaderClipDistance = false,
+ .shaderCullDistance = false,
+ .shaderFloat64 = false,
+ .shaderInt64 = false,
+ .shaderFloat16 = false,
+ .shaderInt16 = false,
+ };
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetPhysicalDeviceLimits(
+ VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceLimits* pLimits)
+{
+ ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
+ const struct brw_device_info *devinfo = physical_device->info;
+
+ anv_finishme("Get correct values for PhysicalDeviceLimits");
+
+ *pLimits = (VkPhysicalDeviceLimits) {
+ .maxImageDimension1D = (1 << 14),
+ .maxImageDimension2D = (1 << 14),
+ .maxImageDimension3D = (1 << 10),
+ .maxImageDimensionCube = (1 << 14),
+ .maxImageArrayLayers = (1 << 10),
+ .maxTexelBufferSize = (1 << 14),
+ .maxUniformBufferSize = UINT32_MAX,
+ .maxStorageBufferSize = UINT32_MAX,
+ .maxPushConstantsSize = 128,
+ .maxMemoryAllocationCount = UINT32_MAX,
+ .bufferImageGranularity = 64, /* A cache line */
+ .maxBoundDescriptorSets = MAX_SETS,
+ .maxDescriptorSets = UINT32_MAX,
+ .maxPerStageDescriptorSamplers = 64,
+ .maxPerStageDescriptorUniformBuffers = 64,
+ .maxPerStageDescriptorStorageBuffers = 64,
+ .maxPerStageDescriptorSampledImages = 64,
+ .maxPerStageDescriptorStorageImages = 64,
+ .maxDescriptorSetSamplers = 256,
+ .maxDescriptorSetUniformBuffers = 256,
+ .maxDescriptorSetStorageBuffers = 256,
+ .maxDescriptorSetSampledImages = 256,
+ .maxDescriptorSetStorageImages = 256,
+ .maxVertexInputAttributes = 32,
+ .maxVertexInputAttributeOffset = 256,
+ .maxVertexInputBindingStride = 256,
+ .maxVertexOutputComponents = 32,
+ .maxTessGenLevel = 0,
+ .maxTessPatchSize = 0,
+ .maxTessControlPerVertexInputComponents = 0,
+ .maxTessControlPerVertexOutputComponents = 0,
+ .maxTessControlPerPatchOutputComponents = 0,
+ .maxTessControlTotalOutputComponents = 0,
+ .maxTessEvaluationInputComponents = 0,
+ .maxTessEvaluationOutputComponents = 0,
+ .maxGeometryShaderInvocations = 6,
+ .maxGeometryInputComponents = 16,
+ .maxGeometryOutputComponents = 16,
+ .maxGeometryOutputVertices = 16,
+ .maxGeometryTotalOutputComponents = 16,
+ .maxFragmentInputComponents = 16,
+ .maxFragmentOutputBuffers = 8,
+ .maxFragmentDualSourceBuffers = 2,
+ .maxFragmentCombinedOutputResources = 8,
+ .maxComputeSharedMemorySize = 1024,
+ .maxComputeWorkGroupCount = {
+ 16 * devinfo->max_cs_threads,
+ 16 * devinfo->max_cs_threads,
+ 16 * devinfo->max_cs_threads,
+ },
+ .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads,
+ .maxComputeWorkGroupSize = {
+ 16 * devinfo->max_cs_threads,
+ 16 * devinfo->max_cs_threads,
+ 16 * devinfo->max_cs_threads,
+ },
+ .subPixelPrecisionBits = 4 /* FIXME */,
+ .subTexelPrecisionBits = 4 /* FIXME */,
+ .mipmapPrecisionBits = 4 /* FIXME */,
+ .maxDrawIndexedIndexValue = UINT32_MAX,
+ .maxDrawIndirectInstanceCount = UINT32_MAX,
+ .primitiveRestartForPatches = UINT32_MAX,
+ .maxSamplerLodBias = 16,
+ .maxSamplerAnisotropy = 16,
+ .maxViewports = 16,
+ .maxDynamicViewportStates = UINT32_MAX,
+ .maxViewportDimensions = { (1 << 14), (1 << 14) },
+ .viewportBoundsRange = { -1.0, 1.0 }, /* FIXME */
+ .viewportSubPixelBits = 13, /* We take a float? */
+ .minMemoryMapAlignment = 64, /* A cache line */
+ .minTexelBufferOffsetAlignment = 1,
+ .minUniformBufferOffsetAlignment = 1,
+ .minStorageBufferOffsetAlignment = 1,
+ .minTexelOffset = 0, /* FIXME */
+ .maxTexelOffset = 0, /* FIXME */
+ .minTexelGatherOffset = 0, /* FIXME */
+ .maxTexelGatherOffset = 0, /* FIXME */
+ .minInterpolationOffset = 0, /* FIXME */
+ .maxInterpolationOffset = 0, /* FIXME */
+ .subPixelInterpolationOffsetBits = 0, /* FIXME */
+ .maxFramebufferWidth = (1 << 14),
+ .maxFramebufferHeight = (1 << 14),
+ .maxFramebufferLayers = (1 << 10),
+ .maxFramebufferColorSamples = 8,
+ .maxFramebufferDepthSamples = 8,
+ .maxFramebufferStencilSamples = 8,
+ .maxColorAttachments = MAX_RTS,
+ .maxSampledImageColorSamples = 8,
+ .maxSampledImageDepthSamples = 8,
+ .maxSampledImageIntegerSamples = 1,
+ .maxStorageImageSamples = 1,
+ .maxSampleMaskWords = 1,
+ .timestampFrequency = 1000 * 1000 * 1000 / 80,
+ .maxClipDistances = 0 /* FIXME */,
+ .maxCullDistances = 0 /* FIXME */,
+ .maxCombinedClipAndCullDistances = 0 /* FIXME */,
+ .pointSizeRange = { 0.125, 255.875 },
+ .lineWidthRange = { 0.0, 7.9921875 },
+ .pointSizeGranularity = (1.0 / 8.0),
+ .lineWidthGranularity = (1.0 / 128.0),
+ };
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetPhysicalDeviceProperties(
+ VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceProperties* pProperties)
+{
+ ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
+
+ *pProperties = (VkPhysicalDeviceProperties) {
+ .apiVersion = VK_MAKE_VERSION(0, 138, 1),
+ .driverVersion = 1,
+ .vendorId = 0x8086,
+ .deviceId = pdevice->chipset_id,
+ .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
+ };
+
+ strcpy(pProperties->deviceName, pdevice->name);
+ snprintf((char *)pProperties->pipelineCacheUUID, VK_UUID_LENGTH,
+ "anv-%s", MESA_GIT_SHA1 + 4);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetPhysicalDeviceQueueCount(
+ VkPhysicalDevice physicalDevice,
+ uint32_t* pCount)
+{
+ *pCount = 1;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetPhysicalDeviceQueueProperties(
+ VkPhysicalDevice physicalDevice,
+ uint32_t count,
+ VkPhysicalDeviceQueueProperties* pQueueProperties)
+{
+ assert(count == 1);
+
+ *pQueueProperties = (VkPhysicalDeviceQueueProperties) {
+ .queueFlags = VK_QUEUE_GRAPHICS_BIT |
+ VK_QUEUE_COMPUTE_BIT |
+ VK_QUEUE_DMA_BIT,
+ .queueCount = 1,
+ .supportsTimestamps = true,
+ };
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetPhysicalDeviceMemoryProperties(
+ VkPhysicalDevice physicalDevice,
+ VkPhysicalDeviceMemoryProperties* pMemoryProperties)
+{
+ ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
+ VkDeviceSize heap_size;
+
+ /* Reserve some wiggle room for the driver by exposing only 75% of the
+ * aperture to the heap.
+ */
+ heap_size = 3 * physical_device->aperture_size / 4;
+
+ /* The property flags below are valid only for llc platforms. */
+ pMemoryProperties->memoryTypeCount = 1;
+ pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
+ .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT,
+ .heapIndex = 1,
+ };
+
+ pMemoryProperties->memoryHeapCount = 1;
+ pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
+ .size = heap_size,
+ .flags = VK_MEMORY_HEAP_HOST_LOCAL,
+ };
+
+ return VK_SUCCESS;
+}
+
+PFN_vkVoidFunction anv_GetInstanceProcAddr(
+ VkInstance instance,
+ const char* pName)
+{
+ return anv_lookup_entrypoint(pName);
+}
+
+PFN_vkVoidFunction anv_GetDeviceProcAddr(
+ VkDevice device,
+ const char* pName)
+{
+ return anv_lookup_entrypoint(pName);
+}
+
+static void
+parse_debug_flags(struct anv_device *device)
+{
+ const char *debug, *p, *end;
+
+ debug = getenv("INTEL_DEBUG");
+ device->dump_aub = false;
+ if (debug) {
+ for (p = debug; *p; p = end + 1) {
+ end = strchrnul(p, ',');
+ if (end - p == 3 && memcmp(p, "aub", 3) == 0)
+ device->dump_aub = true;
+ if (end - p == 5 && memcmp(p, "no_hw", 5) == 0)
+ device->no_hw = true;
+ if (*end == '\0')
+ break;
+ }
+ }
+}
+
+static VkResult
+anv_queue_init(struct anv_device *device, struct anv_queue *queue)
+{
+ queue->device = device;
+ queue->pool = &device->surface_state_pool;
+
+ queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
+ if (queue->completed_serial.map == NULL)
+ return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ *(uint32_t *)queue->completed_serial.map = 0;
+ queue->next_serial = 1;
+
+ return VK_SUCCESS;
+}
+
+static void
+anv_queue_finish(struct anv_queue *queue)
+{
+#ifdef HAVE_VALGRIND
+ /* This gets torn down with the device so we only need to do this if
+ * valgrind is present.
+ */
+ anv_state_pool_free(queue->pool, queue->completed_serial);
+#endif
+}
+
+static void
+anv_device_init_border_colors(struct anv_device *device)
+{
+ static const VkClearColorValue border_colors[] = {
+ [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 0.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .f32 = { 0.0, 0.0, 0.0, 1.0 } },
+ [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .f32 = { 1.0, 1.0, 1.0, 1.0 } },
+ [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .u32 = { 0, 0, 0, 0 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .u32 = { 0, 0, 0, 1 } },
+ [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .u32 = { 1, 1, 1, 1 } },
+ };
+
+ device->border_colors =
+ anv_state_pool_alloc(&device->dynamic_state_pool,
+ sizeof(border_colors), 32);
+ memcpy(device->border_colors.map, border_colors, sizeof(border_colors));
+}
+
+VkResult anv_CreateDevice(
+ VkPhysicalDevice physicalDevice,
+ const VkDeviceCreateInfo* pCreateInfo,
+ VkDevice* pDevice)
+{
+ ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
+ struct anv_instance *instance = physical_device->instance;
+ struct anv_device *device;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
+
+ device = anv_instance_alloc(instance, sizeof(*device), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (!device)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ device->no_hw = physical_device->no_hw;
+ parse_debug_flags(device);
+
+ device->instance = physical_device->instance;
+
+ /* XXX(chadv): Can we dup() physicalDevice->fd here? */
+ device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
+ if (device->fd == -1)
+ goto fail_device;
+
+ device->context_id = anv_gem_create_context(device);
+ if (device->context_id == -1)
+ goto fail_fd;
+
+ anv_bo_pool_init(&device->batch_bo_pool, device, ANV_CMD_BUFFER_BATCH_SIZE);
+
+ anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048);
+
+ anv_state_pool_init(&device->dynamic_state_pool,
+ &device->dynamic_state_block_pool);
+
+ anv_block_pool_init(&device->instruction_block_pool, device, 2048);
+ anv_block_pool_init(&device->surface_state_block_pool, device, 2048);
+
+ anv_state_pool_init(&device->surface_state_pool,
+ &device->surface_state_block_pool);
+
+ anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
+
+ device->info = *physical_device->info;
+
+ device->compiler = anv_compiler_create(device);
+ device->aub_writer = NULL;
+
+ pthread_mutex_init(&device->mutex, NULL);
+
+ anv_queue_init(device, &device->queue);
+
+ anv_device_init_meta(device);
+
+ anv_device_init_border_colors(device);
+
+ *pDevice = anv_device_to_handle(device);
+
+ return VK_SUCCESS;
+
+ fail_fd:
+ close(device->fd);
+ fail_device:
+ anv_device_free(device, device);
+
+ return vk_error(VK_ERROR_UNAVAILABLE);
+}
+
+VkResult anv_DestroyDevice(
+ VkDevice _device)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+
+ anv_compiler_destroy(device->compiler);
+
+ anv_queue_finish(&device->queue);
+
+ anv_device_finish_meta(device);
+
+#ifdef HAVE_VALGRIND
+ /* We only need to free these to prevent valgrind errors. The backing
+ * BO will go away in a couple of lines so we don't actually leak.
+ */
+ anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
+#endif
+
+ anv_bo_pool_finish(&device->batch_bo_pool);
+ anv_state_pool_finish(&device->dynamic_state_pool);
+ anv_block_pool_finish(&device->dynamic_state_block_pool);
+ anv_block_pool_finish(&device->instruction_block_pool);
+ anv_state_pool_finish(&device->surface_state_pool);
+ anv_block_pool_finish(&device->surface_state_block_pool);
+ anv_block_pool_finish(&device->scratch_block_pool);
+
+ close(device->fd);
+
+ if (device->aub_writer)
+ anv_aub_writer_destroy(device->aub_writer);
+
+ anv_instance_free(device->instance, device);
+
+ return VK_SUCCESS;
+}
+
+static const VkExtensionProperties global_extensions[] = {
+ {
+ .extName = "VK_WSI_LunarG",
+ .specVersion = 3
+ }
+};
+
+VkResult anv_GetGlobalExtensionProperties(
+ const char* pLayerName,
+ uint32_t* pCount,
+ VkExtensionProperties* pProperties)
+{
+ if (pProperties == NULL) {
+ *pCount = ARRAY_SIZE(global_extensions);
+ return VK_SUCCESS;
+ }
+
+ assert(*pCount < ARRAY_SIZE(global_extensions));
+
+ *pCount = ARRAY_SIZE(global_extensions);
+ memcpy(pProperties, global_extensions, sizeof(global_extensions));
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetPhysicalDeviceExtensionProperties(
+ VkPhysicalDevice physicalDevice,
+ const char* pLayerName,
+ uint32_t* pCount,
+ VkExtensionProperties* pProperties)
+{
+ if (pProperties == NULL) {
+ *pCount = 0;
+ return VK_SUCCESS;
+ }
+
+ /* None supported at this time */
+ return vk_error(VK_ERROR_INVALID_EXTENSION);
+}
+
+VkResult anv_GetGlobalLayerProperties(
+ uint32_t* pCount,
+ VkLayerProperties* pProperties)
+{
+ if (pProperties == NULL) {
+ *pCount = 0;
+ return VK_SUCCESS;
+ }
+
+ /* None supported at this time */
+ return vk_error(VK_ERROR_INVALID_LAYER);
+}
+
+VkResult anv_GetPhysicalDeviceLayerProperties(
+ VkPhysicalDevice physicalDevice,
+ uint32_t* pCount,
+ VkLayerProperties* pProperties)
+{
+ if (pProperties == NULL) {
+ *pCount = 0;
+ return VK_SUCCESS;
+ }
+
+ /* None supported at this time */
+ return vk_error(VK_ERROR_INVALID_LAYER);
+}
+
+VkResult anv_GetDeviceQueue(
+ VkDevice _device,
+ uint32_t queueNodeIndex,
+ uint32_t queueIndex,
+ VkQueue* pQueue)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+
+ assert(queueIndex == 0);
+
+ *pQueue = anv_queue_to_handle(&device->queue);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_QueueSubmit(
+ VkQueue _queue,
+ uint32_t cmdBufferCount,
+ const VkCmdBuffer* pCmdBuffers,
+ VkFence _fence)
+{
+ ANV_FROM_HANDLE(anv_queue, queue, _queue);
+ ANV_FROM_HANDLE(anv_fence, fence, _fence);
+ struct anv_device *device = queue->device;
+ int ret;
+
+ for (uint32_t i = 0; i < cmdBufferCount; i++) {
+ ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, pCmdBuffers[i]);
+
+ assert(cmd_buffer->level == VK_CMD_BUFFER_LEVEL_PRIMARY);
+
+ if (device->dump_aub)
+ anv_cmd_buffer_dump(cmd_buffer);
+
+ if (!device->no_hw) {
+ ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf2.execbuf);
+ if (ret != 0)
+ return vk_error(VK_ERROR_UNKNOWN);
+
+ if (fence) {
+ ret = anv_gem_execbuffer(device, &fence->execbuf);
+ if (ret != 0)
+ return vk_error(VK_ERROR_UNKNOWN);
+ }
+
+ for (uint32_t i = 0; i < cmd_buffer->execbuf2.bo_count; i++)
+ cmd_buffer->execbuf2.bos[i]->offset = cmd_buffer->execbuf2.objects[i].offset;
+ } else {
+ *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial;
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_QueueWaitIdle(
+ VkQueue _queue)
+{
+ ANV_FROM_HANDLE(anv_queue, queue, _queue);
+
+ return vkDeviceWaitIdle(anv_device_to_handle(queue->device));
+}
+
+VkResult anv_DeviceWaitIdle(
+ VkDevice _device)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_state state;
+ struct anv_batch batch;
+ struct drm_i915_gem_execbuffer2 execbuf;
+ struct drm_i915_gem_exec_object2 exec2_objects[1];
+ struct anv_bo *bo = NULL;
+ VkResult result;
+ int64_t timeout;
+ int ret;
+
+ state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32);
+ bo = &device->dynamic_state_pool.block_pool->bo;
+ batch.start = batch.next = state.map;
+ batch.end = state.map + 32;
+ anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
+ anv_batch_emit(&batch, GEN8_MI_NOOP);
+
+ exec2_objects[0].handle = bo->gem_handle;
+ exec2_objects[0].relocation_count = 0;
+ exec2_objects[0].relocs_ptr = 0;
+ exec2_objects[0].alignment = 0;
+ exec2_objects[0].offset = bo->offset;
+ exec2_objects[0].flags = 0;
+ exec2_objects[0].rsvd1 = 0;
+ exec2_objects[0].rsvd2 = 0;
+
+ execbuf.buffers_ptr = (uintptr_t) exec2_objects;
+ execbuf.buffer_count = 1;
+ execbuf.batch_start_offset = state.offset;
+ execbuf.batch_len = batch.next - state.map;
+ execbuf.cliprects_ptr = 0;
+ execbuf.num_cliprects = 0;
+ execbuf.DR1 = 0;
+ execbuf.DR4 = 0;
+
+ execbuf.flags =
+ I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
+ execbuf.rsvd1 = device->context_id;
+ execbuf.rsvd2 = 0;
+
+ if (!device->no_hw) {
+ ret = anv_gem_execbuffer(device, &execbuf);
+ if (ret != 0) {
+ result = vk_error(VK_ERROR_UNKNOWN);
+ goto fail;
+ }
+
+ timeout = INT64_MAX;
+ ret = anv_gem_wait(device, bo->gem_handle, &timeout);
+ if (ret != 0) {
+ result = vk_error(VK_ERROR_UNKNOWN);
+ goto fail;
+ }
+ }
+
+ anv_state_pool_free(&device->dynamic_state_pool, state);
+
+ return VK_SUCCESS;
+
+ fail:
+ anv_state_pool_free(&device->dynamic_state_pool, state);
+
+ return result;
+}
+
+void *
+anv_device_alloc(struct anv_device * device,
+ size_t size,
+ size_t alignment,
+ VkSystemAllocType allocType)
+{
+ return anv_instance_alloc(device->instance, size, alignment, allocType);
+}
+
+void
+anv_device_free(struct anv_device * device,
+ void * mem)
+{
+ anv_instance_free(device->instance, mem);
+}
+
+VkResult
+anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
+{
+ bo->gem_handle = anv_gem_create(device, size);
+ if (!bo->gem_handle)
+ return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
+
+ bo->map = NULL;
+ bo->index = 0;
+ bo->offset = 0;
+ bo->size = size;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_AllocMemory(
+ VkDevice _device,
+ const VkMemoryAllocInfo* pAllocInfo,
+ VkDeviceMemory* pMem)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_device_memory *mem;
+ VkResult result;
+
+ assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO);
+
+ if (pAllocInfo->memoryTypeIndex != 0) {
+ /* We support exactly one memory heap. */
+ return vk_error(VK_ERROR_INVALID_VALUE);
+ }
+
+ /* FINISHME: Fail if allocation request exceeds heap size. */
+
+ mem = anv_device_alloc(device, sizeof(*mem), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (mem == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ *pMem = anv_device_memory_to_handle(mem);
+
+ return VK_SUCCESS;
+
+ fail:
+ anv_device_free(device, mem);
+
+ return result;
+}
+
+VkResult anv_FreeMemory(
+ VkDevice _device,
+ VkDeviceMemory _mem)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
+
+ if (mem->bo.map)
+ anv_gem_munmap(mem->bo.map, mem->bo.size);
+
+ if (mem->bo.gem_handle != 0)
+ anv_gem_close(device, mem->bo.gem_handle);
+
+ anv_device_free(device, mem);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_MapMemory(
+ VkDevice _device,
+ VkDeviceMemory _mem,
+ VkDeviceSize offset,
+ VkDeviceSize size,
+ VkMemoryMapFlags flags,
+ void** ppData)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
+
+ /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
+ * takes a VkDeviceMemory pointer, it seems like only one map of the memory
+ * at a time is valid. We could just mmap up front and return an offset
+ * pointer here, but that may exhaust virtual memory on 32 bit
+ * userspace. */
+
+ mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size);
+ mem->map_size = size;
+
+ *ppData = mem->map;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_UnmapMemory(
+ VkDevice _device,
+ VkDeviceMemory _mem)
+{
+ ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
+
+ anv_gem_munmap(mem->map, mem->map_size);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_FlushMappedMemoryRanges(
+ VkDevice device,
+ uint32_t memRangeCount,
+ const VkMappedMemoryRange* pMemRanges)
+{
+ /* clflush here for !llc platforms */
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_InvalidateMappedMemoryRanges(
+ VkDevice device,
+ uint32_t memRangeCount,
+ const VkMappedMemoryRange* pMemRanges)
+{
+ return anv_FlushMappedMemoryRanges(device, memRangeCount, pMemRanges);
+}
+
+VkResult anv_GetBufferMemoryRequirements(
+ VkDevice device,
+ VkBuffer _buffer,
+ VkMemoryRequirements* pMemoryRequirements)
+{
+ ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+
+ /* The Vulkan spec (git aaed022) says:
+ *
+ * memoryTypeBits is a bitfield and contains one bit set for every
+ * supported memory type for the resource. The bit `1<<i` is set if and
+ * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
+ * structure for the physical device is supported.
+ *
+ * We support exactly one memory type.
+ */
+ pMemoryRequirements->memoryTypeBits = 1;
+
+ pMemoryRequirements->size = buffer->size;
+ pMemoryRequirements->alignment = 16;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetImageMemoryRequirements(
+ VkDevice device,
+ VkImage _image,
+ VkMemoryRequirements* pMemoryRequirements)
+{
+ ANV_FROM_HANDLE(anv_image, image, _image);
+
+ /* The Vulkan spec (git aaed022) says:
+ *
+ * memoryTypeBits is a bitfield and contains one bit set for every
+ * supported memory type for the resource. The bit `1<<i` is set if and
+ * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
+ * structure for the physical device is supported.
+ *
+ * We support exactly one memory type.
+ */
+ pMemoryRequirements->memoryTypeBits = 1;
+
+ pMemoryRequirements->size = image->size;
+ pMemoryRequirements->alignment = image->alignment;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetImageSparseMemoryRequirements(
+ VkDevice device,
+ VkImage image,
+ uint32_t* pNumRequirements,
+ VkSparseImageMemoryRequirements* pSparseMemoryRequirements)
+{
+ return vk_error(VK_UNSUPPORTED);
+}
+
+VkResult anv_GetDeviceMemoryCommitment(
+ VkDevice device,
+ VkDeviceMemory memory,
+ VkDeviceSize* pCommittedMemoryInBytes)
+{
+ *pCommittedMemoryInBytes = 0;
+ stub_return(VK_SUCCESS);
+}
+
+VkResult anv_BindBufferMemory(
+ VkDevice device,
+ VkBuffer _buffer,
+ VkDeviceMemory _mem,
+ VkDeviceSize memOffset)
+{
+ ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
+ ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+
+ buffer->bo = &mem->bo;
+ buffer->offset = memOffset;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_BindImageMemory(
+ VkDevice device,
+ VkImage _image,
+ VkDeviceMemory _mem,
+ VkDeviceSize memOffset)
+{
+ ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
+ ANV_FROM_HANDLE(anv_image, image, _image);
+
+ image->bo = &mem->bo;
+ image->offset = memOffset;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_QueueBindSparseBufferMemory(
+ VkQueue queue,
+ VkBuffer buffer,
+ uint32_t numBindings,
+ const VkSparseMemoryBindInfo* pBindInfo)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_QueueBindSparseImageOpaqueMemory(
+ VkQueue queue,
+ VkImage image,
+ uint32_t numBindings,
+ const VkSparseMemoryBindInfo* pBindInfo)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_QueueBindSparseImageMemory(
+ VkQueue queue,
+ VkImage image,
+ uint32_t numBindings,
+ const VkSparseImageMemoryBindInfo* pBindInfo)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_CreateFence(
+ VkDevice _device,
+ const VkFenceCreateInfo* pCreateInfo,
+ VkFence* pFence)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_fence *fence;
+ struct anv_batch batch;
+ VkResult result;
+
+ const uint32_t fence_size = 128;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
+
+ fence = anv_device_alloc(device, sizeof(*fence), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (fence == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ result = anv_bo_init_new(&fence->bo, device, fence_size);
+ if (result != VK_SUCCESS)
+ goto fail;
+
+ fence->bo.map =
+ anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size);
+ batch.next = batch.start = fence->bo.map;
+ batch.end = fence->bo.map + fence->bo.size;
+ anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
+ anv_batch_emit(&batch, GEN8_MI_NOOP);
+
+ fence->exec2_objects[0].handle = fence->bo.gem_handle;
+ fence->exec2_objects[0].relocation_count = 0;
+ fence->exec2_objects[0].relocs_ptr = 0;
+ fence->exec2_objects[0].alignment = 0;
+ fence->exec2_objects[0].offset = fence->bo.offset;
+ fence->exec2_objects[0].flags = 0;
+ fence->exec2_objects[0].rsvd1 = 0;
+ fence->exec2_objects[0].rsvd2 = 0;
+
+ fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
+ fence->execbuf.buffer_count = 1;
+ fence->execbuf.batch_start_offset = 0;
+ fence->execbuf.batch_len = batch.next - fence->bo.map;
+ fence->execbuf.cliprects_ptr = 0;
+ fence->execbuf.num_cliprects = 0;
+ fence->execbuf.DR1 = 0;
+ fence->execbuf.DR4 = 0;
+
+ fence->execbuf.flags =
+ I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
+ fence->execbuf.rsvd1 = device->context_id;
+ fence->execbuf.rsvd2 = 0;
+
+ *pFence = anv_fence_to_handle(fence);
+
+ return VK_SUCCESS;
+
+ fail:
+ anv_device_free(device, fence);
+
+ return result;
+}
+
+VkResult anv_DestroyFence(
+ VkDevice _device,
+ VkFence _fence)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_fence, fence, _fence);
+
+ anv_gem_munmap(fence->bo.map, fence->bo.size);
+ anv_gem_close(device, fence->bo.gem_handle);
+ anv_device_free(device, fence);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_ResetFences(
+ VkDevice _device,
+ uint32_t fenceCount,
+ const VkFence* pFences)
+{
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+ fence->ready = false;
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetFenceStatus(
+ VkDevice _device,
+ VkFence _fence)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_fence, fence, _fence);
+ int64_t t = 0;
+ int ret;
+
+ if (fence->ready)
+ return VK_SUCCESS;
+
+ ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
+ if (ret == 0) {
+ fence->ready = true;
+ return VK_SUCCESS;
+ }
+
+ return VK_NOT_READY;
+}
+
+VkResult anv_WaitForFences(
+ VkDevice _device,
+ uint32_t fenceCount,
+ const VkFence* pFences,
+ VkBool32 waitAll,
+ uint64_t timeout)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ int64_t t = timeout;
+ int ret;
+
+ /* FIXME: handle !waitAll */
+
+ for (uint32_t i = 0; i < fenceCount; i++) {
+ ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
+ ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
+ if (ret == -1 && errno == ETIME)
+ return VK_TIMEOUT;
+ else if (ret == -1)
+ return vk_error(VK_ERROR_UNKNOWN);
+ }
+
+ return VK_SUCCESS;
+}
+
+// Queue semaphore functions
+
+VkResult anv_CreateSemaphore(
+ VkDevice device,
+ const VkSemaphoreCreateInfo* pCreateInfo,
+ VkSemaphore* pSemaphore)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_DestroySemaphore(
+ VkDevice device,
+ VkSemaphore semaphore)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_QueueSignalSemaphore(
+ VkQueue queue,
+ VkSemaphore semaphore)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_QueueWaitSemaphore(
+ VkQueue queue,
+ VkSemaphore semaphore)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+// Event functions
+
+VkResult anv_CreateEvent(
+ VkDevice device,
+ const VkEventCreateInfo* pCreateInfo,
+ VkEvent* pEvent)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_DestroyEvent(
+ VkDevice device,
+ VkEvent event)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_GetEventStatus(
+ VkDevice device,
+ VkEvent event)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_SetEvent(
+ VkDevice device,
+ VkEvent event)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+VkResult anv_ResetEvent(
+ VkDevice device,
+ VkEvent event)
+{
+ stub_return(VK_UNSUPPORTED);
+}
+
+// Buffer functions
+
+VkResult anv_CreateBuffer(
+ VkDevice _device,
+ const VkBufferCreateInfo* pCreateInfo,
+ VkBuffer* pBuffer)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_buffer *buffer;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
+
+ buffer = anv_device_alloc(device, sizeof(*buffer), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (buffer == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ buffer->size = pCreateInfo->size;
+ buffer->bo = NULL;
+ buffer->offset = 0;
+
+ *pBuffer = anv_buffer_to_handle(buffer);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyBuffer(
+ VkDevice _device,
+ VkBuffer _buffer)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
+
+ anv_device_free(device, buffer);
+
+ return VK_SUCCESS;
+}
+
+// Buffer view functions
+
+void
+anv_fill_buffer_surface_state(void *state, VkFormat format,
+ uint32_t offset, uint32_t range)
+{
+ const struct anv_format *info;
+
+ info = anv_format_for_vk_format(format);
+ /* This assumes RGBA float format. */
+ uint32_t stride = 4;
+ uint32_t num_elements = range / stride;
+
+ struct GEN8_RENDER_SURFACE_STATE surface_state = {
+ .SurfaceType = SURFTYPE_BUFFER,
+ .SurfaceArray = false,
+ .SurfaceFormat = info->surface_format,
+ .SurfaceVerticalAlignment = VALIGN4,
+ .SurfaceHorizontalAlignment = HALIGN4,
+ .TileMode = LINEAR,
+ .VerticalLineStride = 0,
+ .VerticalLineStrideOffset = 0,
+ .SamplerL2BypassModeDisable = true,
+ .RenderCacheReadWriteMode = WriteOnlyCache,
+ .MemoryObjectControlState = GEN8_MOCS,
+ .BaseMipLevel = 0.0,
+ .SurfaceQPitch = 0,
+ .Height = (num_elements >> 7) & 0x3fff,
+ .Width = num_elements & 0x7f,
+ .Depth = (num_elements >> 21) & 0x3f,
+ .SurfacePitch = stride - 1,
+ .MinimumArrayElement = 0,
+ .NumberofMultisamples = MULTISAMPLECOUNT_1,
+ .XOffset = 0,
+ .YOffset = 0,
+ .SurfaceMinLOD = 0,
+ .MIPCountLOD = 0,
+ .AuxiliarySurfaceMode = AUX_NONE,
+ .RedClearColor = 0,
+ .GreenClearColor = 0,
+ .BlueClearColor = 0,
+ .AlphaClearColor = 0,
+ .ShaderChannelSelectRed = SCS_RED,
+ .ShaderChannelSelectGreen = SCS_GREEN,
+ .ShaderChannelSelectBlue = SCS_BLUE,
+ .ShaderChannelSelectAlpha = SCS_ALPHA,
+ .ResourceMinLOD = 0.0,
+ /* FIXME: We assume that the image must be bound at this time. */
+ .SurfaceBaseAddress = { NULL, offset },
+ };
+
+ GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state);
+}
+
+VkResult anv_CreateBufferView(
+ VkDevice _device,
+ const VkBufferViewCreateInfo* pCreateInfo,
+ VkBufferView* pView)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_buffer, buffer, pCreateInfo->buffer);
+ struct anv_buffer_view *bview;
+ struct anv_surface_view *view;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
+
+ bview = anv_device_alloc(device, sizeof(*view), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (bview == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ view = &bview->view;
+ view->bo = buffer->bo;
+ view->offset = buffer->offset + pCreateInfo->offset;
+ view->surface_state =
+ anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
+ view->format = pCreateInfo->format;
+ view->range = pCreateInfo->range;
+
+ anv_fill_buffer_surface_state(view->surface_state.map,
+ pCreateInfo->format,
+ view->offset, pCreateInfo->range);
+
+ *pView = anv_buffer_view_to_handle(bview);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyBufferView(
+ VkDevice _device,
+ VkBufferView _bview)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_buffer_view, bview, _bview);
+
+ anv_surface_view_fini(device, &bview->view);
+ anv_device_free(device, bview);
+
+ return VK_SUCCESS;
+}
+
+// Sampler functions
+
+VkResult anv_CreateSampler(
+ VkDevice _device,
+ const VkSamplerCreateInfo* pCreateInfo,
+ VkSampler* pSampler)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_sampler *sampler;
+ uint32_t mag_filter, min_filter, max_anisotropy;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
+
+ sampler = anv_device_alloc(device, sizeof(*sampler), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (!sampler)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ static const uint32_t vk_to_gen_tex_filter[] = {
+ [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST,
+ [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR
+ };
+
+ static const uint32_t vk_to_gen_mipmap_mode[] = {
+ [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE,
+ [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
+ [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
+ };
+
+ static const uint32_t vk_to_gen_tex_address[] = {
+ [VK_TEX_ADDRESS_WRAP] = TCM_WRAP,
+ [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR,
+ [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP,
+ [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE,
+ [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER,
+ };
+
+ static const uint32_t vk_to_gen_compare_op[] = {
+ [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
+ [VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
+ [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
+ [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL,
+ [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
+ [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
+ [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL,
+ [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
+ };
+
+ if (pCreateInfo->maxAnisotropy > 1) {
+ mag_filter = MAPFILTER_ANISOTROPIC;
+ min_filter = MAPFILTER_ANISOTROPIC;
+ max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2;
+ } else {
+ mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter];
+ min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter];
+ max_anisotropy = RATIO21;
+ }
+
+ struct GEN8_SAMPLER_STATE sampler_state = {
+ .SamplerDisable = false,
+ .TextureBorderColorMode = DX10OGL,
+ .LODPreClampMode = 0,
+ .BaseMipLevel = 0.0,
+ .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode],
+ .MagModeFilter = mag_filter,
+ .MinModeFilter = min_filter,
+ .TextureLODBias = pCreateInfo->mipLodBias * 256,
+ .AnisotropicAlgorithm = EWAApproximation,
+ .MinLOD = pCreateInfo->minLod,
+ .MaxLOD = pCreateInfo->maxLod,
+ .ChromaKeyEnable = 0,
+ .ChromaKeyIndex = 0,
+ .ChromaKeyMode = 0,
+ .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp],
+ .CubeSurfaceControlMode = 0,
+
+ .IndirectStatePointer =
+ device->border_colors.offset +
+ pCreateInfo->borderColor * sizeof(float) * 4,
+
+ .LODClampMagnificationMode = MIPNONE,
+ .MaximumAnisotropy = max_anisotropy,
+ .RAddressMinFilterRoundingEnable = 0,
+ .RAddressMagFilterRoundingEnable = 0,
+ .VAddressMinFilterRoundingEnable = 0,
+ .VAddressMagFilterRoundingEnable = 0,
+ .UAddressMinFilterRoundingEnable = 0,
+ .UAddressMagFilterRoundingEnable = 0,
+ .TrilinearFilterQuality = 0,
+ .NonnormalizedCoordinateEnable = 0,
+ .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU],
+ .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV],
+ .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW],
+ };
+
+ GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state);
+
+ *pSampler = anv_sampler_to_handle(sampler);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroySampler(
+ VkDevice _device,
+ VkSampler _sampler)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
+
+ anv_device_free(device, sampler);
+
+ return VK_SUCCESS;
+}
+
+// Descriptor set functions
+
+VkResult anv_CreateDescriptorSetLayout(
+ VkDevice _device,
+ const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
+ VkDescriptorSetLayout* pSetLayout)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_descriptor_set_layout *set_layout;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
+
+ uint32_t sampler_count[VK_SHADER_STAGE_NUM] = { 0, };
+ uint32_t surface_count[VK_SHADER_STAGE_NUM] = { 0, };
+ uint32_t num_dynamic_buffers = 0;
+ uint32_t count = 0;
+ uint32_t stages = 0;
+ uint32_t s;
+
+ for (uint32_t i = 0; i < pCreateInfo->count; i++) {
+ switch (pCreateInfo->pBinding[i].descriptorType) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
+ sampler_count[s] += pCreateInfo->pBinding[i].arraySize;
+ break;
+ default:
+ break;
+ }
+
+ switch (pCreateInfo->pBinding[i].descriptorType) {
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
+ surface_count[s] += pCreateInfo->pBinding[i].arraySize;
+ break;
+ default:
+ break;
+ }
+
+ switch (pCreateInfo->pBinding[i].descriptorType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ num_dynamic_buffers += pCreateInfo->pBinding[i].arraySize;
+ break;
+ default:
+ break;
+ }
+
+ stages |= pCreateInfo->pBinding[i].stageFlags;
+ count += pCreateInfo->pBinding[i].arraySize;
+ }
+
+ uint32_t sampler_total = 0;
+ uint32_t surface_total = 0;
+ for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) {
+ sampler_total += sampler_count[s];
+ surface_total += surface_count[s];
+ }
+
+ size_t size = sizeof(*set_layout) +
+ (sampler_total + surface_total) * sizeof(set_layout->entries[0]);
+ set_layout = anv_device_alloc(device, size, 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (!set_layout)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ set_layout->num_dynamic_buffers = num_dynamic_buffers;
+ set_layout->count = count;
+ set_layout->shader_stages = stages;
+
+ struct anv_descriptor_slot *p = set_layout->entries;
+ struct anv_descriptor_slot *sampler[VK_SHADER_STAGE_NUM];
+ struct anv_descriptor_slot *surface[VK_SHADER_STAGE_NUM];
+ for (uint32_t s = 0; s < VK_SHADER_STAGE_NUM; s++) {
+ set_layout->stage[s].surface_count = surface_count[s];
+ set_layout->stage[s].surface_start = surface[s] = p;
+ p += surface_count[s];
+ set_layout->stage[s].sampler_count = sampler_count[s];
+ set_layout->stage[s].sampler_start = sampler[s] = p;
+ p += sampler_count[s];
+ }
+
+ uint32_t descriptor = 0;
+ int8_t dynamic_slot = 0;
+ bool is_dynamic;
+ for (uint32_t i = 0; i < pCreateInfo->count; i++) {
+ switch (pCreateInfo->pBinding[i].descriptorType) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
+ for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) {
+ sampler[s]->index = descriptor + j;
+ sampler[s]->dynamic_slot = -1;
+ sampler[s]++;
+ }
+ break;
+ default:
+ break;
+ }
+
+ switch (pCreateInfo->pBinding[i].descriptorType) {
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ is_dynamic = true;
+ break;
+ default:
+ is_dynamic = false;
+ break;
+ }
+
+ switch (pCreateInfo->pBinding[i].descriptorType) {
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
+ for (uint32_t j = 0; j < pCreateInfo->pBinding[i].arraySize; j++) {
+ surface[s]->index = descriptor + j;
+ if (is_dynamic)
+ surface[s]->dynamic_slot = dynamic_slot + j;
+ else
+ surface[s]->dynamic_slot = -1;
+ surface[s]++;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (is_dynamic)
+ dynamic_slot += pCreateInfo->pBinding[i].arraySize;
+
+ descriptor += pCreateInfo->pBinding[i].arraySize;
+ }
+
+ *pSetLayout = anv_descriptor_set_layout_to_handle(set_layout);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyDescriptorSetLayout(
+ VkDevice _device,
+ VkDescriptorSetLayout _set_layout)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_descriptor_set_layout, set_layout, _set_layout);
+
+ anv_device_free(device, set_layout);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_CreateDescriptorPool(
+ VkDevice device,
+ VkDescriptorPoolUsage poolUsage,
+ uint32_t maxSets,
+ const VkDescriptorPoolCreateInfo* pCreateInfo,
+ VkDescriptorPool* pDescriptorPool)
+{
+ anv_finishme("VkDescriptorPool is a stub");
+ pDescriptorPool->handle = 1;
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyDescriptorPool(
+ VkDevice _device,
+ VkDescriptorPool _pool)
+{
+ anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets");
+ return VK_SUCCESS;
+}
+
+VkResult anv_ResetDescriptorPool(
+ VkDevice device,
+ VkDescriptorPool descriptorPool)
+{
+ anv_finishme("VkDescriptorPool is a stub: free the pool's descriptor sets");
+ return VK_SUCCESS;
+}
+
+VkResult
+anv_descriptor_set_create(struct anv_device *device,
+ const struct anv_descriptor_set_layout *layout,
+ struct anv_descriptor_set **out_set)
+{
+ struct anv_descriptor_set *set;
+ size_t size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]);
+
+ set = anv_device_alloc(device, size, 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (!set)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* A descriptor set may not be 100% filled. Clear the set so we can can
+ * later detect holes in it.
+ */
+ memset(set, 0, size);
+
+ *out_set = set;
+
+ return VK_SUCCESS;
+}
+
+void
+anv_descriptor_set_destroy(struct anv_device *device,
+ struct anv_descriptor_set *set)
+{
+ anv_device_free(device, set);
+}
+
+VkResult anv_AllocDescriptorSets(
+ VkDevice _device,
+ VkDescriptorPool descriptorPool,
+ VkDescriptorSetUsage setUsage,
+ uint32_t count,
+ const VkDescriptorSetLayout* pSetLayouts,
+ VkDescriptorSet* pDescriptorSets,
+ uint32_t* pCount)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+
+ VkResult result;
+ struct anv_descriptor_set *set;
+
+ for (uint32_t i = 0; i < count; i++) {
+ ANV_FROM_HANDLE(anv_descriptor_set_layout, layout, pSetLayouts[i]);
+
+ result = anv_descriptor_set_create(device, layout, &set);
+ if (result != VK_SUCCESS) {
+ *pCount = i;
+ return result;
+ }
+
+ pDescriptorSets[i] = anv_descriptor_set_to_handle(set);
+ }
+
+ *pCount = count;
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_FreeDescriptorSets(
+ VkDevice _device,
+ VkDescriptorPool descriptorPool,
+ uint32_t count,
+ const VkDescriptorSet* pDescriptorSets)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+
+ for (uint32_t i = 0; i < count; i++) {
+ ANV_FROM_HANDLE(anv_descriptor_set, set, pDescriptorSets[i]);
+
+ anv_descriptor_set_destroy(device, set);
+ }
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_UpdateDescriptorSets(
+ VkDevice device,
+ uint32_t writeCount,
+ const VkWriteDescriptorSet* pDescriptorWrites,
+ uint32_t copyCount,
+ const VkCopyDescriptorSet* pDescriptorCopies)
+{
+ for (uint32_t i = 0; i < writeCount; i++) {
+ const VkWriteDescriptorSet *write = &pDescriptorWrites[i];
+ ANV_FROM_HANDLE(anv_descriptor_set, set, write->destSet);
+
+ switch (write->descriptorType) {
+ case VK_DESCRIPTOR_TYPE_SAMPLER:
+ case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
+ for (uint32_t j = 0; j < write->count; j++) {
+ set->descriptors[write->destBinding + j].sampler =
+ anv_sampler_from_handle(write->pDescriptors[j].sampler);
+ }
+
+ if (write->descriptorType == VK_DESCRIPTOR_TYPE_SAMPLER)
+ break;
+
+ /* fallthrough */
+
+ case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
+ case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
+ for (uint32_t j = 0; j < write->count; j++) {
+ ANV_FROM_HANDLE(anv_image_view, iview,
+ write->pDescriptors[j].imageView);
+ set->descriptors[write->destBinding + j].view = &iview->view;
+ }
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
+ anv_finishme("texel buffers not implemented");
+ break;
+
+ case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:
+ anv_finishme("input attachments not implemented");
+ break;
+
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
+ case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
+ case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
+ for (uint32_t j = 0; j < write->count; j++) {
+ ANV_FROM_HANDLE(anv_buffer_view, bview,
+ write->pDescriptors[j].bufferView);
+ set->descriptors[write->destBinding + j].view = &bview->view;
+ }
+
+ default:
+ break;
+ }
+ }
+
+ for (uint32_t i = 0; i < copyCount; i++) {
+ const VkCopyDescriptorSet *copy = &pDescriptorCopies[i];
+ ANV_FROM_HANDLE(anv_descriptor_set, src, copy->destSet);
+ ANV_FROM_HANDLE(anv_descriptor_set, dest, copy->destSet);
+ for (uint32_t j = 0; j < copy->count; j++) {
+ dest->descriptors[copy->destBinding + j] =
+ src->descriptors[copy->srcBinding + j];
+ }
+ }
+
+ return VK_SUCCESS;
+}
+
+// State object functions
+
+static inline int64_t
+clamp_int64(int64_t x, int64_t min, int64_t max)
+{
+ if (x < min)
+ return min;
+ else if (x < max)
+ return x;
+ else
+ return max;
+}
+
+VkResult anv_CreateDynamicViewportState(
+ VkDevice _device,
+ const VkDynamicViewportStateCreateInfo* pCreateInfo,
+ VkDynamicViewportState* pState)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_dynamic_vp_state *state;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO);
+
+ state = anv_device_alloc(device, sizeof(*state), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (state == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ unsigned count = pCreateInfo->viewportAndScissorCount;
+ state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
+ count * 64, 64);
+ state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
+ count * 8, 32);
+ state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool,
+ count * 32, 32);
+
+ for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) {
+ const VkViewport *vp = &pCreateInfo->pViewports[i];
+ const VkRect2D *s = &pCreateInfo->pScissors[i];
+
+ struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = {
+ .ViewportMatrixElementm00 = vp->width / 2,
+ .ViewportMatrixElementm11 = vp->height / 2,
+ .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2,
+ .ViewportMatrixElementm30 = vp->originX + vp->width / 2,
+ .ViewportMatrixElementm31 = vp->originY + vp->height / 2,
+ .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2,
+ .XMinClipGuardband = -1.0f,
+ .XMaxClipGuardband = 1.0f,
+ .YMinClipGuardband = -1.0f,
+ .YMaxClipGuardband = 1.0f,
+ .XMinViewPort = vp->originX,
+ .XMaxViewPort = vp->originX + vp->width - 1,
+ .YMinViewPort = vp->originY,
+ .YMaxViewPort = vp->originY + vp->height - 1,
+ };
+
+ struct GEN8_CC_VIEWPORT cc_viewport = {
+ .MinimumDepth = vp->minDepth,
+ .MaximumDepth = vp->maxDepth
+ };
+
+ /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
+ * ymax < ymin for empty clips. In case clip x, y, width height are all
+ * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
+ * what we want. Just special case empty clips and produce a canonical
+ * empty clip. */
+ static const struct GEN8_SCISSOR_RECT empty_scissor = {
+ .ScissorRectangleYMin = 1,
+ .ScissorRectangleXMin = 1,
+ .ScissorRectangleYMax = 0,
+ .ScissorRectangleXMax = 0
+ };
+
+ const int max = 0xffff;
+ struct GEN8_SCISSOR_RECT scissor = {
+ /* Do this math using int64_t so overflow gets clamped correctly. */
+ .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
+ .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
+ .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max),
+ .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max)
+ };
+
+ GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport);
+ GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport);
+
+ if (s->extent.width <= 0 || s->extent.height <= 0) {
+ GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor);
+ } else {
+ GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor);
+ }
+ }
+
+ *pState = anv_dynamic_vp_state_to_handle(state);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyDynamicViewportState(
+ VkDevice _device,
+ VkDynamicViewportState _vp_state)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_dynamic_vp_state, vp_state, _vp_state);
+
+ anv_state_pool_free(&device->dynamic_state_pool, vp_state->sf_clip_vp);
+ anv_state_pool_free(&device->dynamic_state_pool, vp_state->cc_vp);
+ anv_state_pool_free(&device->dynamic_state_pool, vp_state->scissor);
+
+ anv_device_free(device, vp_state);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_CreateDynamicRasterState(
+ VkDevice _device,
+ const VkDynamicRasterStateCreateInfo* pCreateInfo,
+ VkDynamicRasterState* pState)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_dynamic_rs_state *state;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RASTER_STATE_CREATE_INFO);
+
+ state = anv_device_alloc(device, sizeof(*state), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (state == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct GEN8_3DSTATE_SF sf = {
+ GEN8_3DSTATE_SF_header,
+ .LineWidth = pCreateInfo->lineWidth,
+ };
+
+ GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf);
+
+ bool enable_bias = pCreateInfo->depthBias != 0.0f ||
+ pCreateInfo->slopeScaledDepthBias != 0.0f;
+ struct GEN8_3DSTATE_RASTER raster = {
+ .GlobalDepthOffsetEnableSolid = enable_bias,
+ .GlobalDepthOffsetEnableWireframe = enable_bias,
+ .GlobalDepthOffsetEnablePoint = enable_bias,
+ .GlobalDepthOffsetConstant = pCreateInfo->depthBias,
+ .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias,
+ .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp
+ };
+
+ GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster);
+
+ *pState = anv_dynamic_rs_state_to_handle(state);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyDynamicRasterState(
+ VkDevice _device,
+ VkDynamicRasterState _rs_state)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_dynamic_rs_state, rs_state, _rs_state);
+
+ anv_device_free(device, rs_state);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_CreateDynamicColorBlendState(
+ VkDevice _device,
+ const VkDynamicColorBlendStateCreateInfo* pCreateInfo,
+ VkDynamicColorBlendState* pState)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_dynamic_cb_state *state;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_COLOR_BLEND_STATE_CREATE_INFO);
+
+ state = anv_device_alloc(device, sizeof(*state), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (state == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct GEN8_COLOR_CALC_STATE color_calc_state = {
+ .BlendConstantColorRed = pCreateInfo->blendConst[0],
+ .BlendConstantColorGreen = pCreateInfo->blendConst[1],
+ .BlendConstantColorBlue = pCreateInfo->blendConst[2],
+ .BlendConstantColorAlpha = pCreateInfo->blendConst[3]
+ };
+
+ GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
+
+ *pState = anv_dynamic_cb_state_to_handle(state);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyDynamicColorBlendState(
+ VkDevice _device,
+ VkDynamicColorBlendState _cb_state)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_dynamic_cb_state, cb_state, _cb_state);
+
+ anv_device_free(device, cb_state);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_CreateDynamicDepthStencilState(
+ VkDevice _device,
+ const VkDynamicDepthStencilStateCreateInfo* pCreateInfo,
+ VkDynamicDepthStencilState* pState)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_dynamic_ds_state *state;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DEPTH_STENCIL_STATE_CREATE_INFO);
+
+ state = anv_device_alloc(device, sizeof(*state), 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (state == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
+ GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
+
+ /* Is this what we need to do? */
+ .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0,
+
+ .StencilTestMask = pCreateInfo->stencilReadMask & 0xff,
+ .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
+
+ .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff,
+ .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
+ };
+
+ GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil,
+ &wm_depth_stencil);
+
+ struct GEN8_COLOR_CALC_STATE color_calc_state = {
+ .StencilReferenceValue = pCreateInfo->stencilFrontRef,
+ .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef
+ };
+
+ GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
+
+ *pState = anv_dynamic_ds_state_to_handle(state);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyDynamicDepthStencilState(
+ VkDevice _device,
+ VkDynamicDepthStencilState _ds_state)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_dynamic_ds_state, ds_state, _ds_state);
+
+ anv_device_free(device, ds_state);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_CreateFramebuffer(
+ VkDevice _device,
+ const VkFramebufferCreateInfo* pCreateInfo,
+ VkFramebuffer* pFramebuffer)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_framebuffer *framebuffer;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
+
+ size_t size = sizeof(*framebuffer) +
+ sizeof(struct anv_attachment_view *) * pCreateInfo->attachmentCount;
+ framebuffer = anv_device_alloc(device, size, 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (framebuffer == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ framebuffer->attachment_count = pCreateInfo->attachmentCount;
+ for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+ ANV_FROM_HANDLE(anv_attachment_view, view,
+ pCreateInfo->pAttachments[i].view);
+
+ framebuffer->attachments[i] = view;
+ }
+
+ framebuffer->width = pCreateInfo->width;
+ framebuffer->height = pCreateInfo->height;
+ framebuffer->layers = pCreateInfo->layers;
+
+ anv_CreateDynamicViewportState(anv_device_to_handle(device),
+ &(VkDynamicViewportStateCreateInfo) {
+ .sType = VK_STRUCTURE_TYPE_DYNAMIC_VIEWPORT_STATE_CREATE_INFO,
+ .viewportAndScissorCount = 1,
+ .pViewports = (VkViewport[]) {
+ {
+ .originX = 0,
+ .originY = 0,
+ .width = pCreateInfo->width,
+ .height = pCreateInfo->height,
+ .minDepth = 0,
+ .maxDepth = 1
+ },
+ },
+ .pScissors = (VkRect2D[]) {
+ { { 0, 0 },
+ { pCreateInfo->width, pCreateInfo->height } },
+ }
+ },
+ &framebuffer->vp_state);
+
+ *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyFramebuffer(
+ VkDevice _device,
+ VkFramebuffer _fb)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
+
+ anv_DestroyDynamicViewportState(anv_device_to_handle(device),
+ fb->vp_state);
+ anv_device_free(device, fb);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_CreateRenderPass(
+ VkDevice _device,
+ const VkRenderPassCreateInfo* pCreateInfo,
+ VkRenderPass* pRenderPass)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ struct anv_render_pass *pass;
+ size_t size;
+
+ assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
+
+ size = sizeof(*pass) +
+ pCreateInfo->subpassCount * sizeof(struct anv_subpass);
+ pass = anv_device_alloc(device, size, 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ if (pass == NULL)
+ return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
+
+ /* Clear the subpasses along with the parent pass. This required because
+ * each array member of anv_subpass must be a valid pointer if not NULL.
+ */
+ memset(pass, 0, size);
+
+ pass->attachment_count = pCreateInfo->attachmentCount;
+ pass->subpass_count = pCreateInfo->subpassCount;
+
+ size = pCreateInfo->attachmentCount * sizeof(*pass->attachments);
+ pass->attachments = anv_device_alloc(device, size, 8,
+ VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+ for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
+ pass->attachments[i].format = pCreateInfo->pAttachments[i].format;
+ pass->attachments[i].samples = pCreateInfo->pAttachments[i].samples;
+ pass->attachments[i].load_op = pCreateInfo->pAttachments[i].loadOp;
+ pass->attachments[i].stencil_load_op = pCreateInfo->pAttachments[i].stencilLoadOp;
+ // pass->attachments[i].store_op = pCreateInfo->pAttachments[i].storeOp;
+ // pass->attachments[i].stencil_store_op = pCreateInfo->pAttachments[i].stencilStoreOp;
+ }
+
+ for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {
+ const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];
+ struct anv_subpass *subpass = &pass->subpasses[i];
+
+ subpass->input_count = desc->inputCount;
+ subpass->color_count = desc->colorCount;
+
+ if (desc->inputCount > 0) {
+ subpass->input_attachments =
+ anv_device_alloc(device, desc->inputCount * sizeof(uint32_t),
+ 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+
+ for (uint32_t j = 0; j < desc->inputCount; j++) {
+ subpass->input_attachments[j]
+ = desc->inputAttachments[j].attachment;
+ }
+ }
+
+ if (desc->colorCount > 0) {
+ subpass->color_attachments =
+ anv_device_alloc(device, desc->colorCount * sizeof(uint32_t),
+ 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+
+ for (uint32_t j = 0; j < desc->colorCount; j++) {
+ subpass->color_attachments[j]
+ = desc->colorAttachments[j].attachment;
+ }
+ }
+
+ if (desc->resolveAttachments) {
+ subpass->resolve_attachments =
+ anv_device_alloc(device, desc->colorCount * sizeof(uint32_t),
+ 8, VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
+
+ for (uint32_t j = 0; j < desc->colorCount; j++) {
+ subpass->resolve_attachments[j]
+ = desc->resolveAttachments[j].attachment;
+ }
+ }
+
+ subpass->depth_stencil_attachment = desc->depthStencilAttachment.attachment;
+ }
+
+ *pRenderPass = anv_render_pass_to_handle(pass);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_DestroyRenderPass(
+ VkDevice _device,
+ VkRenderPass _pass)
+{
+ ANV_FROM_HANDLE(anv_device, device, _device);
+ ANV_FROM_HANDLE(anv_render_pass, pass, _pass);
+
+ anv_device_free(device, pass->attachments);
+
+ for (uint32_t i = 0; i < pass->subpass_count; i++) {
+ /* In VkSubpassCreateInfo, each of the attachment arrays may be null.
+ * Don't free the null arrays.
+ */
+ struct anv_subpass *subpass = &pass->subpasses[i];
+
+ anv_device_free(device, subpass->input_attachments);
+ anv_device_free(device, subpass->color_attachments);
+ anv_device_free(device, subpass->resolve_attachments);
+ }
+
+ anv_device_free(device, pass);
+
+ return VK_SUCCESS;
+}
+
+VkResult anv_GetRenderAreaGranularity(
+ VkDevice device,
+ VkRenderPass renderPass,
+ VkExtent2D* pGranularity)
+{
+ *pGranularity = (VkExtent2D) { 1, 1 };
+
+ return VK_SUCCESS;
+}
+
+void vkCmdDbgMarkerBegin(
+ VkCmdBuffer cmdBuffer,
+ const char* pMarker)
+ __attribute__ ((visibility ("default")));
+
+void vkCmdDbgMarkerEnd(
+ VkCmdBuffer cmdBuffer)
+ __attribute__ ((visibility ("default")));
+
+void vkCmdDbgMarkerBegin(
+ VkCmdBuffer cmdBuffer,
+ const char* pMarker)
+{
+}
+
+void vkCmdDbgMarkerEnd(
+ VkCmdBuffer cmdBuffer)
+{
+}
--- /dev/null
- assert(is_power_of_two(size));
- assert(element_size < size && is_power_of_two(element_size));
+/*
+ * Copyright © 2015 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include <stdarg.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "anv_private.h"
+
+/** Log an error message. */
+void anv_printflike(1, 2)
+anv_loge(const char *format, ...)
+{
+ va_list va;
+
+ va_start(va, format);
+ anv_loge_v(format, va);
+ va_end(va);
+}
+
+/** \see anv_loge() */
+void
+anv_loge_v(const char *format, va_list va)
+{
+ fprintf(stderr, "vk: error: ");
+ vfprintf(stderr, format, va);
+ fprintf(stderr, "\n");
+}
+
+void anv_printflike(3, 4)
+__anv_finishme(const char *file, int line, const char *format, ...)
+{
+ va_list ap;
+ char buffer[256];
+
+ va_start(ap, format);
+ vsnprintf(buffer, sizeof(buffer), format, ap);
+ va_end(ap);
+
+ fprintf(stderr, "%s:%d: FINISHME: %s\n", file, line, buffer);
+}
+
+void anv_noreturn anv_printflike(1, 2)
+anv_abortf(const char *format, ...)
+{
+ va_list va;
+
+ va_start(va, format);
+ anv_abortfv(format, va);
+ va_end(va);
+}
+
+void anv_noreturn
+anv_abortfv(const char *format, va_list va)
+{
+ fprintf(stderr, "vk: error: ");
+ vfprintf(stderr, format, va);
+ fprintf(stderr, "\n");
+ abort();
+}
+
+int
+anv_vector_init(struct anv_vector *vector, uint32_t element_size, uint32_t size)
+{
++ assert(util_is_power_of_two(size));
++ assert(element_size < size && util_is_power_of_two(element_size));
+
+ vector->head = 0;
+ vector->tail = 0;
+ vector->element_size = element_size;
+ vector->size = size;
+ vector->data = malloc(size);
+
+ return vector->data != NULL;
+}
+
+void *
+anv_vector_add(struct anv_vector *vector)
+{
+ uint32_t offset, size, split, tail;
+ void *data;
+
+ if (vector->head - vector->tail == vector->size) {
+ size = vector->size * 2;
+ data = malloc(size);
+ if (data == NULL)
+ return NULL;
+ split = align_u32(vector->tail, vector->size);
+ tail = vector->tail & (vector->size - 1);
+ if (vector->head - split < vector->size) {
+ memcpy(data + tail,
+ vector->data + tail,
+ split - vector->tail);
+ memcpy(data + vector->size,
+ vector->data, vector->head - split);
+ } else {
+ memcpy(data + tail,
+ vector->data + tail,
+ vector->head - vector->tail);
+ }
+ free(vector->data);
+ vector->data = data;
+ vector->size = size;
+ }
+
+ assert(vector->head - vector->tail < vector->size);
+
+ offset = vector->head & (vector->size - 1);
+ vector->head += vector->element_size;
+
+ return vector->data + offset;
+}
+
+void *
+anv_vector_remove(struct anv_vector *vector)
+{
+ uint32_t offset;
+
+ if (vector->head == vector->tail)
+ return NULL;
+
+ assert(vector->head - vector->tail <= vector->size);
+
+ offset = vector->tail & (vector->size - 1);
+ vector->tail += vector->element_size;
+
+ return vector->data + offset;
+}