From: Jason Ekstrand Date: Mon, 23 Nov 2015 22:03:47 +0000 (-0800) Subject: Merge remote-tracking branch 'mesa-public/master' into vulkan X-Git-Url: https://git.libre-soc.org/?a=commitdiff_plain;h=179fc4aae8f782453f0488e8dd508f9a01117376;p=mesa.git Merge remote-tracking branch 'mesa-public/master' into vulkan This pulls in nir cloning and some much-needed upstream refactors. --- 179fc4aae8f782453f0488e8dd508f9a01117376 diff --cc src/glsl/nir/glsl_types.h index 14c2aa49f85,d8a999ad44e..1aafa5cd547 --- a/src/glsl/nir/glsl_types.h +++ b/src/glsl/nir/glsl_types.h @@@ -870,10 -849,10 +870,11 @@@ struct glsl_struct_field unsigned image_volatile:1; unsigned image_restrict:1; +#ifdef __cplusplus glsl_struct_field(const struct glsl_type *_type, const char *_name) : type(_type), name(_name), location(-1), interpolation(0), centroid(0), - sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0) + sample(0), matrix_layout(GLSL_MATRIX_LAYOUT_INHERITED), patch(0), + precision(GLSL_PRECISION_NONE) { /* empty */ } diff --cc src/glsl/nir/nir_clone.c index 00000000000,33ff5261b21..68b72ef5381 mode 000000,100644..100644 --- a/src/glsl/nir/nir_clone.c +++ b/src/glsl/nir/nir_clone.c @@@ -1,0 -1,671 +1,674 @@@ + /* + * Copyright © 2015 Red Hat + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + + #include "nir.h" + #include "nir_control_flow_private.h" + + /* Secret Decoder Ring: + * clone_foo(): + * Allocate and clone a foo. + * __clone_foo(): + * Clone body of foo (ie. parent class, embedded struct, etc) + */ + + typedef struct { + /* maps orig ptr -> cloned ptr: */ + struct hash_table *ptr_table; + + /* List of phi sources. */ + struct list_head phi_srcs; + + /* new shader object, used as memctx for just about everything else: */ + nir_shader *ns; + } clone_state; + + static void + init_clone_state(clone_state *state) + { + state->ptr_table = _mesa_hash_table_create(NULL, _mesa_hash_pointer, + _mesa_key_pointer_equal); + list_inithead(&state->phi_srcs); + } + + static void + free_clone_state(clone_state *state) + { + _mesa_hash_table_destroy(state->ptr_table, NULL); + } + + static void * + lookup_ptr(clone_state *state, const void *ptr) + { + struct hash_entry *entry; + + if (!ptr) + return NULL; + + entry = _mesa_hash_table_search(state->ptr_table, ptr); + assert(entry && "Failed to find pointer!"); + if (!entry) + return NULL; + + return entry->data; + } + + static void + store_ptr(clone_state *state, void *nptr, const void *ptr) + { + _mesa_hash_table_insert(state->ptr_table, ptr, nptr); + } + + static nir_constant * + clone_constant(clone_state *state, const nir_constant *c, nir_variable *nvar) + { + nir_constant *nc = ralloc(nvar, nir_constant); + + nc->value = c->value; + nc->num_elements = c->num_elements; + nc->elements = ralloc_array(nvar, nir_constant *, c->num_elements); + for (unsigned i = 0; i < c->num_elements; i++) { + nc->elements[i] = clone_constant(state, c->elements[i], nvar); + } + + return nc; + } + + /* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid + * having to deal with locals and globals separately: + */ + static nir_variable * + clone_variable(clone_state *state, const nir_variable *var) + { + nir_variable *nvar = rzalloc(state->ns, nir_variable); + store_ptr(state, nvar, var); + + nvar->type = var->type; + nvar->name = ralloc_strdup(nvar, var->name); + nvar->data = var->data; + nvar->num_state_slots = var->num_state_slots; + nvar->state_slots = ralloc_array(nvar, nir_state_slot, var->num_state_slots); + memcpy(nvar->state_slots, var->state_slots, + var->num_state_slots * sizeof(nir_state_slot)); + if (var->constant_initializer) { + nvar->constant_initializer = + clone_constant(state, var->constant_initializer, nvar); + } + nvar->interface_type = var->interface_type; + + return nvar; + } + + /* clone list of nir_variable: */ + static void + clone_var_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) + { + exec_list_make_empty(dst); + foreach_list_typed(nir_variable, var, node, list) { + nir_variable *nvar = clone_variable(state, var); + exec_list_push_tail(dst, &nvar->node); + } + } + + /* NOTE: for cloning nir_register's, bypass nir_global/local_reg_create() + * to avoid having to deal with locals and globals separately: + */ + static nir_register * + clone_register(clone_state *state, const nir_register *reg) + { + nir_register *nreg = rzalloc(state->ns, nir_register); + store_ptr(state, nreg, reg); + + nreg->num_components = reg->num_components; + nreg->num_array_elems = reg->num_array_elems; + nreg->index = reg->index; + nreg->name = ralloc_strdup(nreg, reg->name); + nreg->is_global = reg->is_global; + nreg->is_packed = reg->is_packed; + + /* reconstructing uses/defs/if_uses handled by nir_instr_insert() */ + list_inithead(&nreg->uses); + list_inithead(&nreg->defs); + list_inithead(&nreg->if_uses); + + return nreg; + } + + /* clone list of nir_register: */ + static void + clone_reg_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) + { + exec_list_make_empty(dst); + foreach_list_typed(nir_register, reg, node, list) { + nir_register *nreg = clone_register(state, reg); + exec_list_push_tail(dst, &nreg->node); + } + } + + static void + __clone_src(clone_state *state, void *ninstr_or_if, + nir_src *nsrc, const nir_src *src) + { + nsrc->is_ssa = src->is_ssa; + if (src->is_ssa) { + nsrc->ssa = lookup_ptr(state, src->ssa); + } else { + nsrc->reg.reg = lookup_ptr(state, src->reg.reg); + if (src->reg.indirect) { + nsrc->reg.indirect = ralloc(ninstr_or_if, nir_src); + __clone_src(state, ninstr_or_if, nsrc->reg.indirect, src->reg.indirect); + } + nsrc->reg.base_offset = src->reg.base_offset; + } + } + + static void + __clone_dst(clone_state *state, nir_instr *ninstr, + nir_dest *ndst, const nir_dest *dst) + { + ndst->is_ssa = dst->is_ssa; + if (dst->is_ssa) { + nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name); + store_ptr(state, &ndst->ssa, &dst->ssa); + } else { + ndst->reg.reg = lookup_ptr(state, dst->reg.reg); + if (dst->reg.indirect) { + ndst->reg.indirect = ralloc(ninstr, nir_src); + __clone_src(state, ninstr, ndst->reg.indirect, dst->reg.indirect); + } + ndst->reg.base_offset = dst->reg.base_offset; + } + } + + static nir_deref *clone_deref(clone_state *state, const nir_deref *deref, + nir_instr *ninstr, nir_deref *parent); + + static nir_deref_var * + clone_deref_var(clone_state *state, const nir_deref_var *dvar, + nir_instr *ninstr) + { + nir_variable *nvar = lookup_ptr(state, dvar->var); + nir_deref_var *ndvar = nir_deref_var_create(ninstr, nvar); + + if (dvar->deref.child) + ndvar->deref.child = clone_deref(state, dvar->deref.child, + ninstr, &ndvar->deref); + + return ndvar; + } + + static nir_deref_array * + clone_deref_array(clone_state *state, const nir_deref_array *darr, + nir_instr *ninstr, nir_deref *parent) + { + nir_deref_array *ndarr = nir_deref_array_create(parent); + + ndarr->deref.type = darr->deref.type; + if (darr->deref.child) + ndarr->deref.child = clone_deref(state, darr->deref.child, + ninstr, &ndarr->deref); + + ndarr->deref_array_type = darr->deref_array_type; + ndarr->base_offset = darr->base_offset; + if (ndarr->deref_array_type == nir_deref_array_type_indirect) + __clone_src(state, ninstr, &ndarr->indirect, &darr->indirect); + + return ndarr; + } + + static nir_deref_struct * + clone_deref_struct(clone_state *state, const nir_deref_struct *dstr, + nir_instr *ninstr, nir_deref *parent) + { + nir_deref_struct *ndstr = nir_deref_struct_create(parent, dstr->index); + + ndstr->deref.type = dstr->deref.type; + if (dstr->deref.child) + ndstr->deref.child = clone_deref(state, dstr->deref.child, + ninstr, &ndstr->deref); + + return ndstr; + } + + static nir_deref * + clone_deref(clone_state *state, const nir_deref *dref, + nir_instr *ninstr, nir_deref *parent) + { + switch (dref->deref_type) { + case nir_deref_type_array: + return &clone_deref_array(state, nir_deref_as_array(dref), + ninstr, parent)->deref; + case nir_deref_type_struct: + return &clone_deref_struct(state, nir_deref_as_struct(dref), + ninstr, parent)->deref; + default: + unreachable("bad deref type"); + return NULL; + } + } + + static nir_alu_instr * + clone_alu(clone_state *state, const nir_alu_instr *alu) + { + nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op); + + __clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest); + nalu->dest.saturate = alu->dest.saturate; + nalu->dest.write_mask = alu->dest.write_mask; + + for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { + __clone_src(state, &nalu->instr, &nalu->src[i].src, &alu->src[i].src); + nalu->src[i].negate = alu->src[i].negate; + nalu->src[i].abs = alu->src[i].abs; + memcpy(nalu->src[i].swizzle, alu->src[i].swizzle, + sizeof(nalu->src[i].swizzle)); + } + + return nalu; + } + + static nir_intrinsic_instr * + clone_intrinsic(clone_state *state, const nir_intrinsic_instr *itr) + { + nir_intrinsic_instr *nitr = + nir_intrinsic_instr_create(state->ns, itr->intrinsic); + + unsigned num_variables = nir_intrinsic_infos[itr->intrinsic].num_variables; + unsigned num_srcs = nir_intrinsic_infos[itr->intrinsic].num_srcs; + + if (nir_intrinsic_infos[itr->intrinsic].has_dest) + __clone_dst(state, &nitr->instr, &nitr->dest, &itr->dest); + + nitr->num_components = itr->num_components; + memcpy(nitr->const_index, itr->const_index, sizeof(nitr->const_index)); + + for (unsigned i = 0; i < num_variables; i++) { + nitr->variables[i] = clone_deref_var(state, itr->variables[i], + &nitr->instr); + } + + for (unsigned i = 0; i < num_srcs; i++) + __clone_src(state, &nitr->instr, &nitr->src[i], &itr->src[i]); + + return nitr; + } + + static nir_load_const_instr * + clone_load_const(clone_state *state, const nir_load_const_instr *lc) + { + nir_load_const_instr *nlc = + nir_load_const_instr_create(state->ns, lc->def.num_components); + + memcpy(&nlc->value, &lc->value, sizeof(nlc->value)); + + store_ptr(state, &nlc->def, &lc->def); + + return nlc; + } + + static nir_ssa_undef_instr * + clone_ssa_undef(clone_state *state, const nir_ssa_undef_instr *sa) + { + nir_ssa_undef_instr *nsa = + nir_ssa_undef_instr_create(state->ns, sa->def.num_components); + + store_ptr(state, &nsa->def, &sa->def); + + return nsa; + } + + static nir_tex_instr * + clone_tex(clone_state *state, const nir_tex_instr *tex) + { + nir_tex_instr *ntex = nir_tex_instr_create(state->ns, tex->num_srcs); + + ntex->sampler_dim = tex->sampler_dim; + ntex->dest_type = tex->dest_type; + ntex->op = tex->op; + __clone_dst(state, &ntex->instr, &ntex->dest, &tex->dest); + for (unsigned i = 0; i < ntex->num_srcs; i++) { + ntex->src[i].src_type = tex->src[i].src_type; + __clone_src(state, &ntex->instr, &ntex->src[i].src, &tex->src[i].src); + } + ntex->coord_components = tex->coord_components; + ntex->is_array = tex->is_array; + ntex->is_shadow = tex->is_shadow; + ntex->is_new_style_shadow = tex->is_new_style_shadow; + memcpy(ntex->const_offset, tex->const_offset, sizeof(ntex->const_offset)); + ntex->component = tex->component; ++ ntex->texture_index = tex->texture_index; ++ ntex->texture_array_size = tex->texture_array_size; ++ if (tex->texture) ++ ntex->texture = clone_deref_var(state, tex->texture, &ntex->instr); + ntex->sampler_index = tex->sampler_index; - ntex->sampler_array_size = tex->sampler_array_size; + if (tex->sampler) + ntex->sampler = clone_deref_var(state, tex->sampler, &ntex->instr); + + return ntex; + } + + static nir_phi_instr * + clone_phi(clone_state *state, const nir_phi_instr *phi, nir_block *nblk) + { + nir_phi_instr *nphi = nir_phi_instr_create(state->ns); + + __clone_dst(state, &nphi->instr, &nphi->dest, &phi->dest); + + /* Cloning a phi node is a bit different from other instructions. The + * sources of phi instructions are the only time where we can use an SSA + * def before it is defined. In order to handle this, we just copy over + * the sources from the old phi instruction directly and then fix them up + * in a second pass once all the instrutions in the function have been + * properly cloned. + * + * In order to ensure that the copied sources (which are the same as the + * old phi instruction's sources for now) don't get inserted into the old + * shader's use-def lists, we have to add the phi instruction *before* we + * set up its sources. + */ + nir_instr_insert_after_block(nblk, &nphi->instr); + + foreach_list_typed(nir_phi_src, src, node, &phi->srcs) { + nir_phi_src *nsrc = ralloc(nphi, nir_phi_src); + + /* Just copy the old source for now. */ + memcpy(nsrc, src, sizeof(*src)); + + /* Since we're not letting nir_insert_instr handle use/def stuff for us, + * we have to set the parent_instr manually. It doesn't really matter + * when we do it, so we might as well do it here. + */ + nsrc->src.parent_instr = &nphi->instr; + + /* Stash it in the list of phi sources. We'll walk this list and fix up + * sources at the very end of clone_function_impl. + */ + list_add(&nsrc->src.use_link, &state->phi_srcs); + + exec_list_push_tail(&nphi->srcs, &nsrc->node); + } + + return nphi; + } + + static nir_jump_instr * + clone_jump(clone_state *state, const nir_jump_instr *jmp) + { + nir_jump_instr *njmp = nir_jump_instr_create(state->ns, jmp->type); + + return njmp; + } + + static nir_call_instr * + clone_call(clone_state *state, const nir_call_instr *call) + { + nir_function_overload *ncallee = lookup_ptr(state, call->callee); + nir_call_instr *ncall = nir_call_instr_create(state->ns, ncallee); + + for (unsigned i = 0; i < ncall->num_params; i++) + ncall->params[i] = clone_deref_var(state, call->params[i], &ncall->instr); + + ncall->return_deref = clone_deref_var(state, call->return_deref, + &ncall->instr); + + return ncall; + } + + static nir_instr * + clone_instr(clone_state *state, const nir_instr *instr) + { + switch (instr->type) { + case nir_instr_type_alu: + return &clone_alu(state, nir_instr_as_alu(instr))->instr; + case nir_instr_type_intrinsic: + return &clone_intrinsic(state, nir_instr_as_intrinsic(instr))->instr; + case nir_instr_type_load_const: + return &clone_load_const(state, nir_instr_as_load_const(instr))->instr; + case nir_instr_type_ssa_undef: + return &clone_ssa_undef(state, nir_instr_as_ssa_undef(instr))->instr; + case nir_instr_type_tex: + return &clone_tex(state, nir_instr_as_tex(instr))->instr; + case nir_instr_type_phi: + unreachable("Cannot clone phis with clone_instr"); + case nir_instr_type_jump: + return &clone_jump(state, nir_instr_as_jump(instr))->instr; + case nir_instr_type_call: + return &clone_call(state, nir_instr_as_call(instr))->instr; + case nir_instr_type_parallel_copy: + unreachable("Cannot clone parallel copies"); + default: + unreachable("bad instr type"); + return NULL; + } + } + + static nir_block * + clone_block(clone_state *state, struct exec_list *cf_list, const nir_block *blk) + { + /* Don't actually create a new block. Just use the one from the tail of + * the list. NIR guarantees that the tail of the list is a block and that + * no two blocks are side-by-side in the IR; It should be empty. + */ + nir_block *nblk = + exec_node_data(nir_block, exec_list_get_tail(cf_list), cf_node.node); + assert(nblk->cf_node.type == nir_cf_node_block); + assert(exec_list_is_empty(&nblk->instr_list)); + + /* We need this for phi sources */ + store_ptr(state, nblk, blk); + + nir_foreach_instr(blk, instr) { + if (instr->type == nir_instr_type_phi) { + /* Phi instructions are a bit of a special case when cloning because + * we don't want inserting the instruction to automatically handle + * use/defs for us. Instead, we need to wait until all the + * blocks/instructions are in so that we can set their sources up. + */ + clone_phi(state, nir_instr_as_phi(instr), nblk); + } else { + nir_instr *ninstr = clone_instr(state, instr); + nir_instr_insert_after_block(nblk, ninstr); + } + } + + return nblk; + } + + static void + clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list); + + static nir_if * + clone_if(clone_state *state, struct exec_list *cf_list, const nir_if *i) + { + nir_if *ni = nir_if_create(state->ns); + + __clone_src(state, ni, &ni->condition, &i->condition); + + nir_cf_node_insert_end(cf_list, &ni->cf_node); + + clone_cf_list(state, &ni->then_list, &i->then_list); + clone_cf_list(state, &ni->else_list, &i->else_list); + + return ni; + } + + static nir_loop * + clone_loop(clone_state *state, struct exec_list *cf_list, const nir_loop *loop) + { + nir_loop *nloop = nir_loop_create(state->ns); + + nir_cf_node_insert_end(cf_list, &nloop->cf_node); + + clone_cf_list(state, &nloop->body, &loop->body); + + return nloop; + } + + /* clone list of nir_cf_node: */ + static void + clone_cf_list(clone_state *state, struct exec_list *dst, + const struct exec_list *list) + { + foreach_list_typed(nir_cf_node, cf, node, list) { + switch (cf->type) { + case nir_cf_node_block: + clone_block(state, dst, nir_cf_node_as_block(cf)); + break; + case nir_cf_node_if: + clone_if(state, dst, nir_cf_node_as_if(cf)); + break; + case nir_cf_node_loop: + clone_loop(state, dst, nir_cf_node_as_loop(cf)); + break; + default: + unreachable("bad cf type"); + } + } + } + + static nir_function_impl * + clone_function_impl(clone_state *state, const nir_function_impl *fi, + nir_function_overload *nfo) + { + nir_function_impl *nfi = nir_function_impl_create(nfo); + + clone_var_list(state, &nfi->locals, &fi->locals); + clone_reg_list(state, &nfi->registers, &fi->registers); + nfi->reg_alloc = fi->reg_alloc; + + nfi->num_params = fi->num_params; + nfi->params = ralloc_array(state->ns, nir_variable *, fi->num_params); + for (unsigned i = 0; i < fi->num_params; i++) { + nfi->params[i] = lookup_ptr(state, fi->params[i]); + } + nfi->return_var = lookup_ptr(state, fi->return_var); + + assert(list_empty(&state->phi_srcs)); + + clone_cf_list(state, &nfi->body, &fi->body); + + /* After we've cloned almost everything, we have to walk the list of phi + * sources and fix them up. Thanks to loops, the block and SSA value for a + * phi source may not be defined when we first encounter it. Instead, we + * add it to the phi_srcs list and we fix it up here. + */ + list_for_each_entry_safe(nir_phi_src, src, &state->phi_srcs, src.use_link) { + src->pred = lookup_ptr(state, src->pred); + assert(src->src.is_ssa); + src->src.ssa = lookup_ptr(state, src->src.ssa); + + /* Remove from this list and place in the uses of the SSA def */ + list_del(&src->src.use_link); + list_addtail(&src->src.use_link, &src->src.ssa->uses); + } + assert(list_empty(&state->phi_srcs)); + + /* All metadata is invalidated in the cloning process */ + nfi->valid_metadata = 0; + + return nfi; + } + + static nir_function_overload * + clone_function_overload(clone_state *state, const nir_function_overload *fo, + nir_function *nfxn) + { + nir_function_overload *nfo = nir_function_overload_create(nfxn); + + /* Needed for call instructions */ + store_ptr(state, nfo, fo); + + nfo->num_params = fo->num_params; + nfo->params = ralloc_array(state->ns, nir_parameter, fo->num_params); + memcpy(nfo->params, fo->params, sizeof(nir_parameter) * fo->num_params); + + nfo->return_type = fo->return_type; + + /* At first glance, it looks like we should clone the function_impl here. + * However, call instructions need to be able to reference at least the + * overload and those will get processed as we clone the function_impl's. + * We stop here and do function_impls as a second pass. + */ + + return nfo; + } + + static nir_function * + clone_function(clone_state *state, const nir_function *fxn, nir_shader *ns) + { + assert(ns == state->ns); + nir_function *nfxn = nir_function_create(ns, fxn->name); + + foreach_list_typed(nir_function_overload, fo, node, &fxn->overload_list) + clone_function_overload(state, fo, nfxn); + + return nfxn; + } + + nir_shader * + nir_shader_clone(void *mem_ctx, const nir_shader *s) + { + clone_state state; + init_clone_state(&state); + + nir_shader *ns = nir_shader_create(mem_ctx, s->stage, s->options); + state.ns = ns; + + clone_var_list(&state, &ns->uniforms, &s->uniforms); + clone_var_list(&state, &ns->inputs, &s->inputs); + clone_var_list(&state, &ns->outputs, &s->outputs); + clone_var_list(&state, &ns->globals, &s->globals); + clone_var_list(&state, &ns->system_values, &s->system_values); + + /* Go through and clone functions and overloads */ + foreach_list_typed(nir_function, fxn, node, &s->functions) + clone_function(&state, fxn, ns); + + /* Only after all overloads are cloned can we clone the actual function + * implementations. This is because nir_call_instr's need to reference the + * overloads of other functions and we don't know what order the functions + * will have in the list. + */ + nir_foreach_overload(s, fo) { + nir_function_overload *nfo = lookup_ptr(&state, fo); + clone_function_impl(&state, fo->impl, nfo); + } + + clone_reg_list(&state, &ns->registers, &s->registers); + ns->reg_alloc = s->reg_alloc; + + ns->info = s->info; + ns->info.name = ralloc_strdup(ns, ns->info.name); + if (ns->info.label) + ns->info.label = ralloc_strdup(ns, ns->info.label); + + ns->num_inputs = s->num_inputs; + ns->num_uniforms = s->num_uniforms; + ns->num_outputs = s->num_outputs; + + free_clone_state(&state); + + return ns; + } diff --cc src/glsl/nir/spirv_to_nir.c index 70610ca0f66,00000000000..86282d25e0a mode 100644,000000..100644 --- a/src/glsl/nir/spirv_to_nir.c +++ b/src/glsl/nir/spirv_to_nir.c @@@ -1,3800 -1,0 +1,3800 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Jason Ekstrand (jason@jlekstrand.net) + * + */ + +#include "spirv_to_nir_private.h" +#include "nir_vla.h" +#include "nir_control_flow.h" + +static struct vtn_ssa_value * +vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant, + const struct glsl_type *type) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->const_table, constant); + + if (entry) + return entry->data; + + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + if (glsl_type_is_vector_or_scalar(type)) { + unsigned num_components = glsl_get_vector_elements(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, num_components); + + for (unsigned i = 0; i < num_components; i++) + load->value.u[i] = constant->value.u[i]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + val->def = &load->def; + } else { + assert(glsl_type_is_matrix(type)); + unsigned rows = glsl_get_vector_elements(val->type); + unsigned columns = glsl_get_matrix_columns(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, columns); + + for (unsigned i = 0; i < columns; i++) { + struct vtn_ssa_value *col_val = rzalloc(b, struct vtn_ssa_value); + col_val->type = glsl_get_column_type(val->type); + nir_load_const_instr *load = + nir_load_const_instr_create(b->shader, rows); + + for (unsigned j = 0; j < rows; j++) + load->value.u[j] = constant->value.u[rows * i + j]; + + nir_instr_insert_before_cf_list(&b->impl->body, &load->instr); + col_val->def = &load->def; + + val->elems[i] = col_val; + } + } + break; + + case GLSL_TYPE_ARRAY: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + const struct glsl_type *elem_type = glsl_get_array_element(val->type); + for (unsigned i = 0; i < elems; i++) + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + break; + } + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *elem_type = + glsl_get_struct_field(val->type, i); + val->elems[i] = vtn_const_ssa_value(b, constant->elements[i], + elem_type); + } + break; + } + + default: + unreachable("bad constant type"); + } + + return val; +} + +struct vtn_ssa_value * +vtn_ssa_value(struct vtn_builder *b, uint32_t value_id) +{ + struct vtn_value *val = vtn_untyped_value(b, value_id); + switch (val->value_type) { + case vtn_value_type_constant: + return vtn_const_ssa_value(b, val->constant, val->const_type); + + case vtn_value_type_ssa: + return val->ssa; + default: + unreachable("Invalid type for an SSA value"); + } +} + +static char * +vtn_string_literal(struct vtn_builder *b, const uint32_t *words, + unsigned word_count) +{ + return ralloc_strndup(b, (char *)words, word_count * sizeof(*words)); +} + +static const uint32_t * +vtn_foreach_instruction(struct vtn_builder *b, const uint32_t *start, + const uint32_t *end, vtn_instruction_handler handler) +{ + const uint32_t *w = start; + while (w < end) { + SpvOp opcode = w[0] & SpvOpCodeMask; + unsigned count = w[0] >> SpvWordCountShift; + assert(count >= 1 && w + count <= end); + + if (opcode == SpvOpNop) { + w++; + continue; + } + + if (!handler(b, opcode, w, count)) + return w; + + w += count; + } + assert(w == end); + return w; +} + +static void +vtn_handle_extension(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpExtInstImport: { + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_extension); + if (strcmp((const char *)&w[2], "GLSL.std.450") == 0) { + val->ext_handler = vtn_handle_glsl450_instruction; + } else { + assert(!"Unsupported extension"); + } + break; + } + + case SpvOpExtInst: { + struct vtn_value *val = vtn_value(b, w[3], vtn_value_type_extension); + bool handled = val->ext_handler(b, w[4], w, count); + (void)handled; + assert(handled); + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +_foreach_decoration_helper(struct vtn_builder *b, + struct vtn_value *base_value, + int parent_member, + struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + for (struct vtn_decoration *dec = value->decoration; dec; dec = dec->next) { + int member; + if (dec->member < 0) { + member = parent_member; + } else { + assert(parent_member == -1); + member = dec->member; + } + + if (dec->group) { + assert(dec->group->value_type == vtn_value_type_decoration_group); + _foreach_decoration_helper(b, base_value, member, dec->group, + cb, data); + } else { + cb(b, base_value, member, dec, data); + } + } +} + +/** Iterates (recursively if needed) over all of the decorations on a value + * + * This function iterates over all of the decorations applied to a given + * value. If it encounters a decoration group, it recurses into the group + * and iterates over all of those decorations as well. + */ +void +vtn_foreach_decoration(struct vtn_builder *b, struct vtn_value *value, + vtn_decoration_foreach_cb cb, void *data) +{ + _foreach_decoration_helper(b, value, -1, value, cb, data); +} + +static void +vtn_handle_decoration(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + const uint32_t *w_end = w + count; + const uint32_t target = w[1]; + w += 2; + + int member = -1; + switch (opcode) { + case SpvOpDecorationGroup: + vtn_push_value(b, target, vtn_value_type_undef); + break; + + case SpvOpMemberDecorate: + member = *(w++); + /* fallthrough */ + case SpvOpDecorate: { + struct vtn_value *val = &b->values[target]; + + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; + dec->decoration = *(w++); + dec->literals = w; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + break; + } + + case SpvOpGroupMemberDecorate: + member = *(w++); + /* fallthrough */ + case SpvOpGroupDecorate: { + struct vtn_value *group = &b->values[target]; + assert(group->value_type == vtn_value_type_decoration_group); + + for (; w < w_end; w++) { + struct vtn_value *val = &b->values[*w]; + struct vtn_decoration *dec = rzalloc(b, struct vtn_decoration); + dec->member = member; + dec->group = group; + + /* Link into the list */ + dec->next = val->decoration; + val->decoration = dec; + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +struct member_decoration_ctx { + struct glsl_struct_field *fields; + struct vtn_type *type; +}; + +/* does a shallow copy of a vtn_type */ + +static struct vtn_type * +vtn_type_copy(struct vtn_builder *b, struct vtn_type *src) +{ + struct vtn_type *dest = ralloc(b, struct vtn_type); + dest->type = src->type; + dest->is_builtin = src->is_builtin; + if (src->is_builtin) + dest->builtin = src->builtin; + + if (!glsl_type_is_vector_or_scalar(src->type)) { + switch (glsl_get_base_type(src->type)) { + case GLSL_TYPE_ARRAY: + dest->array_element = src->array_element; + dest->stride = src->stride; + break; + + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + /* matrices */ + dest->row_major = src->row_major; + dest->stride = src->stride; + break; + + case GLSL_TYPE_STRUCT: { + unsigned elems = glsl_get_length(src->type); + + dest->members = ralloc_array(b, struct vtn_type *, elems); + memcpy(dest->members, src->members, elems * sizeof(struct vtn_type *)); + + dest->offsets = ralloc_array(b, unsigned, elems); + memcpy(dest->offsets, src->offsets, elems * sizeof(unsigned)); + break; + } + + default: + unreachable("unhandled type"); + } + } + + return dest; +} + +static void +struct_member_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_ctx) +{ + struct member_decoration_ctx *ctx = void_ctx; + + if (member < 0) + return; + + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationSmooth: + ctx->fields[member].interpolation = INTERP_QUALIFIER_SMOOTH; + break; + case SpvDecorationNoPerspective: + ctx->fields[member].interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + ctx->fields[member].interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + ctx->fields[member].centroid = true; + break; + case SpvDecorationSample: + ctx->fields[member].sample = true; + break; + case SpvDecorationLocation: + ctx->fields[member].location = dec->literals[0]; + break; + case SpvDecorationBuiltIn: + ctx->type->members[member] = vtn_type_copy(b, + ctx->type->members[member]); + ctx->type->members[member]->is_builtin = true; + ctx->type->members[member]->builtin = dec->literals[0]; + ctx->type->builtin_block = true; + break; + case SpvDecorationOffset: + ctx->type->offsets[member] = dec->literals[0]; + break; + case SpvDecorationMatrixStride: + ctx->type->members[member]->stride = dec->literals[0]; + break; + case SpvDecorationColMajor: + break; /* Nothing to do here. Column-major is the default. */ + default: + unreachable("Unhandled member decoration"); + } +} + +static void +type_decoration_cb(struct vtn_builder *b, + struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *ctx) +{ + struct vtn_type *type = val->type; + + if (member != -1) + return; + + switch (dec->decoration) { + case SpvDecorationArrayStride: + type->stride = dec->literals[0]; + break; + case SpvDecorationBlock: + type->block = true; + break; + case SpvDecorationBufferBlock: + type->buffer_block = true; + break; + case SpvDecorationGLSLShared: + case SpvDecorationGLSLPacked: + /* Ignore these, since we get explicit offsets anyways */ + break; + + case SpvDecorationStream: + assert(dec->literals[0] == 0); + break; + + default: + unreachable("Unhandled type decoration"); + } +} + +static unsigned +translate_image_format(SpvImageFormat format) +{ + switch (format) { + case SpvImageFormatUnknown: return 0; /* GL_NONE */ + case SpvImageFormatRgba32f: return 0x8814; /* GL_RGBA32F */ + case SpvImageFormatRgba16f: return 0x881A; /* GL_RGBA16F */ + case SpvImageFormatR32f: return 0x822E; /* GL_R32F */ + case SpvImageFormatRgba8: return 0x8058; /* GL_RGBA8 */ + case SpvImageFormatRgba8Snorm: return 0x8F97; /* GL_RGBA8_SNORM */ + case SpvImageFormatRg32f: return 0x8230; /* GL_RG32F */ + case SpvImageFormatRg16f: return 0x822F; /* GL_RG16F */ + case SpvImageFormatR11fG11fB10f: return 0x8C3A; /* GL_R11F_G11F_B10F */ + case SpvImageFormatR16f: return 0x822D; /* GL_R16F */ + case SpvImageFormatRgba16: return 0x805B; /* GL_RGBA16 */ + case SpvImageFormatRgb10A2: return 0x8059; /* GL_RGB10_A2 */ + case SpvImageFormatRg16: return 0x822C; /* GL_RG16 */ + case SpvImageFormatRg8: return 0x822B; /* GL_RG8 */ + case SpvImageFormatR16: return 0x822A; /* GL_R16 */ + case SpvImageFormatR8: return 0x8229; /* GL_R8 */ + case SpvImageFormatRgba16Snorm: return 0x8F9B; /* GL_RGBA16_SNORM */ + case SpvImageFormatRg16Snorm: return 0x8F99; /* GL_RG16_SNORM */ + case SpvImageFormatRg8Snorm: return 0x8F95; /* GL_RG8_SNORM */ + case SpvImageFormatR16Snorm: return 0x8F98; /* GL_R16_SNORM */ + case SpvImageFormatR8Snorm: return 0x8F94; /* GL_R8_SNORM */ + case SpvImageFormatRgba32i: return 0x8D82; /* GL_RGBA32I */ + case SpvImageFormatRgba16i: return 0x8D88; /* GL_RGBA16I */ + case SpvImageFormatRgba8i: return 0x8D8E; /* GL_RGBA8I */ + case SpvImageFormatR32i: return 0x8235; /* GL_R32I */ + case SpvImageFormatRg32i: return 0x823B; /* GL_RG32I */ + case SpvImageFormatRg16i: return 0x8239; /* GL_RG16I */ + case SpvImageFormatRg8i: return 0x8237; /* GL_RG8I */ + case SpvImageFormatR16i: return 0x8233; /* GL_R16I */ + case SpvImageFormatR8i: return 0x8231; /* GL_R8I */ + case SpvImageFormatRgba32ui: return 0x8D70; /* GL_RGBA32UI */ + case SpvImageFormatRgba16ui: return 0x8D76; /* GL_RGBA16UI */ + case SpvImageFormatRgba8ui: return 0x8D7C; /* GL_RGBA8UI */ + case SpvImageFormatR32ui: return 0x8236; /* GL_R32UI */ + case SpvImageFormatRgb10a2ui: return 0x906F; /* GL_RGB10_A2UI */ + case SpvImageFormatRg32ui: return 0x823C; /* GL_RG32UI */ + case SpvImageFormatRg16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatRg8ui: return 0x8238; /* GL_RG8UI */ + case SpvImageFormatR16ui: return 0x823A; /* GL_RG16UI */ + case SpvImageFormatR8ui: return 0x8232; /* GL_R8UI */ + default: + assert(!"Invalid image format"); + return 0; + } +} + +static void +vtn_handle_type(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[1], vtn_value_type_type); + + val->type = rzalloc(b, struct vtn_type); + val->type->is_builtin = false; + + switch (opcode) { + case SpvOpTypeVoid: + val->type->type = glsl_void_type(); + break; + case SpvOpTypeBool: + val->type->type = glsl_bool_type(); + break; + case SpvOpTypeInt: + val->type->type = glsl_int_type(); + break; + case SpvOpTypeFloat: + val->type->type = glsl_float_type(); + break; + + case SpvOpTypeVector: { + const struct glsl_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + unsigned elems = w[3]; + + assert(glsl_type_is_scalar(base)); + val->type->type = glsl_vector_type(glsl_get_base_type(base), elems); + break; + } + + case SpvOpTypeMatrix: { + struct vtn_type *base = + vtn_value(b, w[2], vtn_value_type_type)->type; + unsigned columns = w[3]; + + assert(glsl_type_is_vector(base->type)); + val->type->type = glsl_matrix_type(glsl_get_base_type(base->type), + glsl_get_vector_elements(base->type), + columns); + val->type->array_element = base; + val->type->row_major = false; + val->type->stride = 0; + break; + } + + case SpvOpTypeRuntimeArray: + case SpvOpTypeArray: { + struct vtn_type *array_element = + vtn_value(b, w[2], vtn_value_type_type)->type; + + /* A length of 0 is used to denote unsized arrays */ + unsigned length = (opcode == SpvOpTypeArray) ? w[3] : 0; + + val->type->type = glsl_array_type(array_element->type, length); + val->type->array_element = array_element; + val->type->stride = 0; + break; + } + + case SpvOpTypeStruct: { + unsigned num_fields = count - 2; + val->type->members = ralloc_array(b, struct vtn_type *, num_fields); + val->type->offsets = ralloc_array(b, unsigned, num_fields); + + NIR_VLA(struct glsl_struct_field, fields, count); + for (unsigned i = 0; i < num_fields; i++) { + /* TODO: Handle decorators */ + val->type->members[i] = + vtn_value(b, w[i + 2], vtn_value_type_type)->type; + fields[i].type = val->type->members[i]->type; + fields[i].name = ralloc_asprintf(b, "field%d", i); + fields[i].location = -1; + fields[i].interpolation = 0; + fields[i].centroid = 0; + fields[i].sample = 0; + fields[i].matrix_layout = 2; + } + + struct member_decoration_ctx ctx = { + .fields = fields, + .type = val->type + }; + + vtn_foreach_decoration(b, val, struct_member_decoration_cb, &ctx); + + const char *name = val->name ? val->name : "struct"; + + val->type->type = glsl_struct_type(fields, num_fields, name); + break; + } + + case SpvOpTypeFunction: { + const struct glsl_type *return_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + NIR_VLA(struct glsl_function_param, params, count - 3); + for (unsigned i = 0; i < count - 3; i++) { + params[i].type = vtn_value(b, w[i + 3], vtn_value_type_type)->type->type; + + /* FIXME: */ + params[i].in = true; + params[i].out = true; + } + val->type->type = glsl_function_type(return_type, params, count - 3); + break; + } + + case SpvOpTypePointer: + /* FIXME: For now, we'll just do the really lame thing and return + * the same type. The validator should ensure that the proper number + * of dereferences happen + */ + val->type = vtn_value(b, w[3], vtn_value_type_type)->type; + break; + + case SpvOpTypeImage: { + const struct glsl_type *sampled_type = + vtn_value(b, w[2], vtn_value_type_type)->type->type; + + assert(glsl_type_is_vector_or_scalar(sampled_type)); + + enum glsl_sampler_dim dim; + switch ((SpvDim)w[3]) { + case SpvDim1D: dim = GLSL_SAMPLER_DIM_1D; break; + case SpvDim2D: dim = GLSL_SAMPLER_DIM_2D; break; + case SpvDim3D: dim = GLSL_SAMPLER_DIM_3D; break; + case SpvDimCube: dim = GLSL_SAMPLER_DIM_CUBE; break; + case SpvDimRect: dim = GLSL_SAMPLER_DIM_RECT; break; + case SpvDimBuffer: dim = GLSL_SAMPLER_DIM_BUF; break; + default: + unreachable("Invalid SPIR-V Sampler dimension"); + } + + bool is_shadow = w[4]; + bool is_array = w[5]; + bool multisampled = w[6]; + unsigned sampled = w[7]; + SpvImageFormat format = w[8]; + + assert(!multisampled && "FIXME: Handl multi-sampled textures"); + + val->type->image_format = translate_image_format(format); + + if (sampled == 1) { + val->type->type = glsl_sampler_type(dim, is_shadow, is_array, + glsl_get_base_type(sampled_type)); + } else if (sampled == 2) { + assert(format); + assert(!is_shadow); + val->type->type = glsl_image_type(dim, is_array, + glsl_get_base_type(sampled_type)); + } else { + assert(!"We need to know if the image will be sampled"); + } + break; + } + + case SpvOpTypeSampledImage: + val->type = vtn_value(b, w[2], vtn_value_type_type)->type; + break; + + case SpvOpTypeSampler: + /* The actual sampler type here doesn't really matter. It gets + * thrown away the moment you combine it with an image. What really + * matters is that it's a sampler type as opposed to an integer type + * so the backend knows what to do. + * + * TODO: Eventually we should consider adding a "bare sampler" type + * to glsl_types. + */ + val->type->type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D, false, false, + GLSL_TYPE_FLOAT); + break; + + case SpvOpTypeOpaque: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + default: + unreachable("Unhandled opcode"); + } + + vtn_foreach_decoration(b, val, type_decoration_cb, NULL); +} + +static void +vtn_handle_constant(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_constant); + val->const_type = vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->constant = ralloc(b, nir_constant); + switch (opcode) { + case SpvOpConstantTrue: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_TRUE; + break; + case SpvOpConstantFalse: + assert(val->const_type == glsl_bool_type()); + val->constant->value.u[0] = NIR_FALSE; + break; + case SpvOpConstant: + assert(glsl_type_is_scalar(val->const_type)); + val->constant->value.u[0] = w[3]; + break; + case SpvOpConstantComposite: { + unsigned elem_count = count - 3; + nir_constant **elems = ralloc_array(b, nir_constant *, elem_count); + for (unsigned i = 0; i < elem_count; i++) + elems[i] = vtn_value(b, w[i + 3], vtn_value_type_constant)->constant; + + switch (glsl_get_base_type(val->const_type)) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + if (glsl_type_is_matrix(val->const_type)) { + unsigned rows = glsl_get_vector_elements(val->const_type); + assert(glsl_get_matrix_columns(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + for (unsigned j = 0; j < rows; j++) + val->constant->value.u[rows * i + j] = elems[i]->value.u[j]; + } else { + assert(glsl_type_is_vector(val->const_type)); + assert(glsl_get_vector_elements(val->const_type) == elem_count); + for (unsigned i = 0; i < elem_count; i++) + val->constant->value.u[i] = elems[i]->value.u[0]; + } + ralloc_free(elems); + break; + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_ARRAY: + ralloc_steal(val->constant, elems); + val->constant->elements = elems; + break; + + default: + unreachable("Unsupported type for constants"); + } + break; + } + + default: + unreachable("Unhandled opcode"); + } +} + +static void +set_mode_system_value(nir_variable_mode *mode) +{ + assert(*mode == nir_var_system_value || *mode == nir_var_shader_in); + *mode = nir_var_system_value; +} + +static void +validate_per_vertex_mode(struct vtn_builder *b, nir_variable_mode mode) +{ + switch (b->shader->stage) { + case MESA_SHADER_VERTEX: + assert(mode == nir_var_shader_out); + break; + case MESA_SHADER_GEOMETRY: + assert(mode == nir_var_shader_out || mode == nir_var_shader_in); + break; + default: + assert(!"Invalid shader stage"); + } +} + +static void +vtn_get_builtin_location(struct vtn_builder *b, + SpvBuiltIn builtin, int *location, + nir_variable_mode *mode) +{ + switch (builtin) { + case SpvBuiltInPosition: + *location = VARYING_SLOT_POS; + validate_per_vertex_mode(b, *mode); + break; + case SpvBuiltInPointSize: + *location = VARYING_SLOT_PSIZ; + validate_per_vertex_mode(b, *mode); + break; + case SpvBuiltInClipDistance: + *location = VARYING_SLOT_CLIP_DIST0; /* XXX CLIP_DIST1? */ + validate_per_vertex_mode(b, *mode); + break; + case SpvBuiltInCullDistance: + /* XXX figure this out */ + unreachable("unhandled builtin"); + case SpvBuiltInVertexId: + /* Vulkan defines VertexID to be zero-based and reserves the new + * builtin keyword VertexIndex to indicate the non-zero-based value. + */ + *location = SYSTEM_VALUE_VERTEX_ID_ZERO_BASE; + set_mode_system_value(mode); + break; + case SpvBuiltInInstanceId: + *location = SYSTEM_VALUE_INSTANCE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInPrimitiveId: + *location = VARYING_SLOT_PRIMITIVE_ID; + *mode = nir_var_shader_out; + break; + case SpvBuiltInInvocationId: + *location = SYSTEM_VALUE_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLayer: + *location = VARYING_SLOT_LAYER; + *mode = nir_var_shader_out; + break; + case SpvBuiltInTessLevelOuter: + case SpvBuiltInTessLevelInner: + case SpvBuiltInTessCoord: + case SpvBuiltInPatchVertices: + unreachable("no tessellation support"); + case SpvBuiltInFragCoord: + *location = VARYING_SLOT_POS; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInPointCoord: + *location = VARYING_SLOT_PNTC; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInFrontFacing: + *location = VARYING_SLOT_FACE; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_in); + break; + case SpvBuiltInSampleId: + *location = SYSTEM_VALUE_SAMPLE_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInSamplePosition: + *location = SYSTEM_VALUE_SAMPLE_POS; + set_mode_system_value(mode); + break; + case SpvBuiltInSampleMask: + *location = SYSTEM_VALUE_SAMPLE_MASK_IN; /* XXX out? */ + set_mode_system_value(mode); + break; + case SpvBuiltInFragColor: + *location = FRAG_RESULT_COLOR; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_out); + break; + case SpvBuiltInFragDepth: + *location = FRAG_RESULT_DEPTH; + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + assert(*mode == nir_var_shader_out); + break; + case SpvBuiltInNumWorkgroups: + case SpvBuiltInWorkgroupSize: + /* these are constants, need to be handled specially */ + unreachable("unsupported builtin"); + break; + case SpvBuiltInGlobalInvocationId: + case SpvBuiltInLocalInvocationIndex: + /* these are computed values, need to be handled specially */ + unreachable("unsupported builtin"); + case SpvBuiltInWorkgroupId: + *location = SYSTEM_VALUE_WORK_GROUP_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInLocalInvocationId: + *location = SYSTEM_VALUE_LOCAL_INVOCATION_ID; + set_mode_system_value(mode); + break; + case SpvBuiltInHelperInvocation: + default: + unreachable("unsupported builtin"); + } +} + +static void +var_decoration_cb(struct vtn_builder *b, struct vtn_value *val, int member, + const struct vtn_decoration *dec, void *void_var) +{ + assert(val->value_type == vtn_value_type_deref); + assert(val->deref->deref.child == NULL); + assert(val->deref->var == void_var); + + nir_variable *var = void_var; + switch (dec->decoration) { + case SpvDecorationRelaxedPrecision: + break; /* FIXME: Do nothing with this for now. */ + case SpvDecorationSmooth: + var->data.interpolation = INTERP_QUALIFIER_SMOOTH; + break; + case SpvDecorationNoPerspective: + var->data.interpolation = INTERP_QUALIFIER_NOPERSPECTIVE; + break; + case SpvDecorationFlat: + var->data.interpolation = INTERP_QUALIFIER_FLAT; + break; + case SpvDecorationCentroid: + var->data.centroid = true; + break; + case SpvDecorationSample: + var->data.sample = true; + break; + case SpvDecorationInvariant: + var->data.invariant = true; + break; + case SpvDecorationConstant: + assert(var->constant_initializer != NULL); + var->data.read_only = true; + break; + case SpvDecorationNonWritable: + var->data.read_only = true; + break; + case SpvDecorationLocation: + var->data.location = dec->literals[0]; + break; + case SpvDecorationComponent: + var->data.location_frac = dec->literals[0]; + break; + case SpvDecorationIndex: + var->data.explicit_index = true; + var->data.index = dec->literals[0]; + break; + case SpvDecorationBinding: + var->data.explicit_binding = true; + var->data.binding = dec->literals[0]; + break; + case SpvDecorationDescriptorSet: + var->data.descriptor_set = dec->literals[0]; + break; + case SpvDecorationBuiltIn: { + SpvBuiltIn builtin = dec->literals[0]; + + nir_variable_mode mode = var->data.mode; + vtn_get_builtin_location(b, builtin, &var->data.location, &mode); + var->data.explicit_location = true; + var->data.mode = mode; + if (mode == nir_var_shader_in || mode == nir_var_system_value) + var->data.read_only = true; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + + if (mode == nir_var_shader_out) + b->builtins[dec->literals[0]].out = var; + else + b->builtins[dec->literals[0]].in = var; + break; + } + case SpvDecorationRowMajor: + case SpvDecorationColMajor: + case SpvDecorationGLSLShared: + case SpvDecorationPatch: + case SpvDecorationRestrict: + case SpvDecorationAliased: + case SpvDecorationVolatile: + case SpvDecorationCoherent: + case SpvDecorationNonReadable: + case SpvDecorationUniform: + /* This is really nice but we have no use for it right now. */ + case SpvDecorationCPacked: + case SpvDecorationSaturatedConversion: + case SpvDecorationStream: + case SpvDecorationOffset: + case SpvDecorationXfbBuffer: + case SpvDecorationFuncParamAttr: + case SpvDecorationFPRoundingMode: + case SpvDecorationFPFastMathMode: + case SpvDecorationLinkageAttributes: + case SpvDecorationSpecId: + break; + default: + unreachable("Unhandled variable decoration"); + } +} + +static nir_variable * +get_builtin_variable(struct vtn_builder *b, + nir_variable_mode mode, + const struct glsl_type *type, + SpvBuiltIn builtin) +{ + nir_variable *var; + if (mode == nir_var_shader_out) + var = b->builtins[builtin].out; + else + var = b->builtins[builtin].in; + + if (!var) { + int location; + vtn_get_builtin_location(b, builtin, &location, &mode); + + var = nir_variable_create(b->shader, mode, type, "builtin"); + + var->data.location = location; + var->data.explicit_location = true; + + if (builtin == SpvBuiltInFragCoord || builtin == SpvBuiltInSamplePosition) + var->data.origin_upper_left = b->origin_upper_left; + + if (mode == nir_var_shader_out) + b->builtins[builtin].out = var; + else + b->builtins[builtin].in = var; + } + + return var; +} + +static struct vtn_ssa_value * +_vtn_variable_load(struct vtn_builder *b, + nir_deref_var *src_deref, nir_deref *src_deref_tail) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = src_deref_tail->type; + + /* The deref tail may contain a deref to select a component of a vector (in + * other words, it might not be an actual tail) so we have to save it away + * here since we overwrite it later. + */ + nir_deref *old_child = src_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(val->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + src_deref_tail->child = NULL; + + nir_intrinsic_instr *load = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_load_var); + load->variables[0] = + nir_deref_as_var(nir_copy_deref(load, &src_deref->deref)); + load->num_components = glsl_get_vector_elements(val->type); + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + + nir_builder_instr_insert(&b->nb, &load->instr); + + if (src_deref->var->data.mode == nir_var_uniform && + glsl_get_base_type(val->type) == GLSL_TYPE_BOOL) { + /* Uniform boolean loads need to be fixed up since they're defined + * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE. + */ + val->def = nir_ine(&b->nb, &load->dest.ssa, nir_imm_int(&b->nb, 0)); + } else { + val->def = &load->dest.ssa; + } + } else if (glsl_get_base_type(val->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(val->type)) { + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(val->type); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } else { + assert(glsl_get_base_type(val->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(val->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + src_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(val->type, i); + val->elems[i] = _vtn_variable_load(b, src_deref, &deref->deref); + } + } + + src_deref_tail->child = old_child; + + return val; +} + +static void +_vtn_variable_store(struct vtn_builder *b, + nir_deref_var *dest_deref, nir_deref *dest_deref_tail, + struct vtn_ssa_value *src) +{ + nir_deref *old_child = dest_deref_tail->child; + + if (glsl_type_is_vector_or_scalar(src->type)) { + /* Terminate the deref chain in case there is one more link to pick + * off a component of the vector. + */ + dest_deref_tail->child = NULL; + + nir_intrinsic_instr *store = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_store_var); + store->variables[0] = + nir_deref_as_var(nir_copy_deref(store, &dest_deref->deref)); + store->num_components = glsl_get_vector_elements(src->type); + store->src[0] = nir_src_for_ssa(src->def); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else if (glsl_get_base_type(src->type) == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(src->type)) { + unsigned elems = glsl_get_length(src->type); + + nir_deref_array *deref = nir_deref_array_create(b); + deref->deref_array_type = nir_deref_array_type_direct; + deref->deref.type = glsl_get_array_element(src->type); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->base_offset = i; + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } else { + assert(glsl_get_base_type(src->type) == GLSL_TYPE_STRUCT); + unsigned elems = glsl_get_length(src->type); + + nir_deref_struct *deref = nir_deref_struct_create(b, 0); + dest_deref_tail->child = &deref->deref; + for (unsigned i = 0; i < elems; i++) { + deref->index = i; + deref->deref.type = glsl_get_struct_field(src->type, i); + _vtn_variable_store(b, dest_deref, &deref->deref, src->elems[i]); + } + } + + dest_deref_tail->child = old_child; +} + +static nir_ssa_def * +nir_vulkan_resource_index(nir_builder *b, unsigned set, unsigned binding, + nir_variable_mode mode, nir_ssa_def *array_index) +{ + if (array_index == NULL) + array_index = nir_imm_int(b, 0); + + nir_intrinsic_instr *instr = + nir_intrinsic_instr_create(b->shader, + nir_intrinsic_vulkan_resource_index); + instr->src[0] = nir_src_for_ssa(array_index); + instr->const_index[0] = set; + instr->const_index[1] = binding; + instr->const_index[2] = mode; + + nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL); + nir_builder_instr_insert(b, &instr->instr); + + return &instr->dest.ssa; +} + +static struct vtn_ssa_value * +_vtn_block_load(struct vtn_builder *b, nir_intrinsic_op op, + unsigned set, unsigned binding, nir_variable_mode mode, + nir_ssa_def *index, unsigned offset, nir_ssa_def *indirect, + struct vtn_type *type) +{ + struct vtn_ssa_value *val = ralloc(b, struct vtn_ssa_value); + val->type = type->type; + val->transposed = NULL; + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op); + load->num_components = glsl_get_vector_elements(type->type); + load->const_index[0] = offset; + + switch (op) { + case nir_intrinsic_load_ubo_indirect: + case nir_intrinsic_load_ssbo_indirect: + load->src[1] = nir_src_for_ssa(indirect); + /* fall through */ + case nir_intrinsic_load_ubo: + case nir_intrinsic_load_ssbo: { + nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, + set, binding, + mode, index); + load->src[0] = nir_src_for_ssa(res_index); + break; + } + + case nir_intrinsic_load_push_constant: + break; /* Nothing to do */ + case nir_intrinsic_load_push_constant_indirect: + load->src[0] = nir_src_for_ssa(indirect); + break; + + default: + unreachable("Invalid block load intrinsic"); + } + + nir_ssa_dest_init(&load->instr, &load->dest, load->num_components, NULL); + nir_builder_instr_insert(&b->nb, &load->instr); + val->def = &load->dest.ssa; + } else { + unsigned elems = glsl_get_length(type->type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { + val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, + offset + type->offsets[i], + indirect, type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { + val->elems[i] = _vtn_block_load(b, op, set, binding, mode, index, + offset + i * type->stride, + indirect, type->array_element); + } + } + } + + return val; +} + +static void +vtn_block_get_offset(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type **type, nir_deref *src_tail, + nir_ssa_def **index, + unsigned *offset, nir_ssa_def **indirect) +{ + nir_deref *deref = &src->deref; + + if (deref->child->deref_type == nir_deref_type_array) { + deref = deref->child; + *type = (*type)->array_element; + nir_deref_array *deref_array = nir_deref_as_array(deref); + *index = nir_imm_int(&b->nb, deref_array->base_offset); + + if (deref_array->deref_array_type == nir_deref_array_type_indirect) + *index = nir_iadd(&b->nb, *index, deref_array->indirect.ssa); + } else { + *index = nir_imm_int(&b->nb, 0); + } + + *offset = 0; + *indirect = NULL; + while (deref != src_tail) { + deref = deref->child; + switch (deref->deref_type) { + case nir_deref_type_array: { + nir_deref_array *deref_array = nir_deref_as_array(deref); + if (deref_array->deref_array_type == nir_deref_array_type_direct) { + *offset += (*type)->stride * deref_array->base_offset; + } else { + nir_ssa_def *off = nir_imul(&b->nb, deref_array->indirect.ssa, + nir_imm_int(&b->nb, (*type)->stride)); + *indirect = *indirect ? nir_iadd(&b->nb, *indirect, off) : off; + } + *type = (*type)->array_element; + break; + } + + case nir_deref_type_struct: { + nir_deref_struct *deref_struct = nir_deref_as_struct(deref); + *offset += (*type)->offsets[deref_struct->index]; + *type = (*type)->members[deref_struct->index]; + break; + } + + default: + unreachable("unknown deref type"); + } + } +} + +static struct vtn_ssa_value * +vtn_block_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *type, nir_deref *src_tail) +{ + nir_ssa_def *index; + unsigned offset; + nir_ssa_def *indirect; + vtn_block_get_offset(b, src, &type, src_tail, &index, &offset, &indirect); + + nir_intrinsic_op op; + if (src->var->data.mode == nir_var_uniform) { + if (src->var->data.descriptor_set >= 0) { + /* UBO load */ + assert(src->var->data.binding >= 0); + + op = indirect ? nir_intrinsic_load_ubo_indirect + : nir_intrinsic_load_ubo; + } else { + /* Push constant load */ + assert(src->var->data.descriptor_set == -1 && + src->var->data.binding == -1); + + op = indirect ? nir_intrinsic_load_push_constant_indirect + : nir_intrinsic_load_push_constant; + } + } else { + assert(src->var->data.mode == nir_var_shader_storage); + op = indirect ? nir_intrinsic_load_ssbo_indirect + : nir_intrinsic_load_ssbo; + } + + return _vtn_block_load(b, op, src->var->data.descriptor_set, + src->var->data.binding, src->var->data.mode, + index, offset, indirect, type); +} + +/* + * Gets the NIR-level deref tail, which may have as a child an array deref + * selecting which component due to OpAccessChain supporting per-component + * indexing in SPIR-V. + */ + +static nir_deref * +get_deref_tail(nir_deref_var *deref) +{ + nir_deref *cur = &deref->deref; + while (!glsl_type_is_vector_or_scalar(cur->type) && cur->child) + cur = cur->child; + + return cur; +} + +static nir_ssa_def *vtn_vector_extract(struct vtn_builder *b, + nir_ssa_def *src, unsigned index); + +static nir_ssa_def *vtn_vector_extract_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *index); + +static bool +variable_is_external_block(nir_variable *var) +{ + return var->interface_type && + glsl_type_is_struct(var->interface_type) && + (var->data.mode == nir_var_uniform || + var->data.mode == nir_var_shader_storage); +} + +static struct vtn_ssa_value * +vtn_variable_load(struct vtn_builder *b, nir_deref_var *src, + struct vtn_type *src_type) +{ + nir_deref *src_tail = get_deref_tail(src); + + struct vtn_ssa_value *val; + if (variable_is_external_block(src->var)) + val = vtn_block_load(b, src, src_type, src_tail); + else + val = _vtn_variable_load(b, src, src_tail); + + if (src_tail->child) { + nir_deref_array *vec_deref = nir_deref_as_array(src_tail->child); + assert(vec_deref->deref.child == NULL); + val->type = vec_deref->deref.type; + if (vec_deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_extract(b, val->def, vec_deref->base_offset); + else + val->def = vtn_vector_extract_dynamic(b, val->def, + vec_deref->indirect.ssa); + } + + return val; +} + +static void +_vtn_block_store(struct vtn_builder *b, nir_intrinsic_op op, + struct vtn_ssa_value *src, unsigned set, unsigned binding, + nir_variable_mode mode, nir_ssa_def *index, unsigned offset, + nir_ssa_def *indirect, struct vtn_type *type) +{ + assert(src->type == type->type); + if (glsl_type_is_vector_or_scalar(type->type)) { + nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op); + store->num_components = glsl_get_vector_elements(type->type); + store->const_index[0] = offset; + store->const_index[1] = (1 << store->num_components) - 1; + store->src[0] = nir_src_for_ssa(src->def); + + nir_ssa_def *res_index = nir_vulkan_resource_index(&b->nb, + set, binding, + mode, index); + store->src[1] = nir_src_for_ssa(res_index); + + if (op == nir_intrinsic_store_ssbo_indirect) + store->src[2] = nir_src_for_ssa(indirect); + + nir_builder_instr_insert(&b->nb, &store->instr); + } else { + unsigned elems = glsl_get_length(type->type); + if (glsl_type_is_struct(type->type)) { + for (unsigned i = 0; i < elems; i++) { + _vtn_block_store(b, op, src->elems[i], set, binding, mode, + index, offset + type->offsets[i], indirect, + type->members[i]); + } + } else { + for (unsigned i = 0; i < elems; i++) { + _vtn_block_store(b, op, src->elems[i], set, binding, mode, + index, offset + i * type->stride, indirect, + type->array_element); + } + } + } +} + +static void +vtn_block_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *type, + nir_deref *dest_tail) +{ + nir_ssa_def *index; + unsigned offset; + nir_ssa_def *indirect; + vtn_block_get_offset(b, dest, &type, dest_tail, &index, &offset, &indirect); + + nir_intrinsic_op op = indirect ? nir_intrinsic_store_ssbo_indirect + : nir_intrinsic_store_ssbo; + + return _vtn_block_store(b, op, src, dest->var->data.descriptor_set, + dest->var->data.binding, dest->var->data.mode, + index, offset, indirect, type); +} + +static nir_ssa_def * vtn_vector_insert(struct vtn_builder *b, + nir_ssa_def *src, nir_ssa_def *insert, + unsigned index); + +static nir_ssa_def * vtn_vector_insert_dynamic(struct vtn_builder *b, + nir_ssa_def *src, + nir_ssa_def *insert, + nir_ssa_def *index); +static void +vtn_variable_store(struct vtn_builder *b, struct vtn_ssa_value *src, + nir_deref_var *dest, struct vtn_type *dest_type) +{ + nir_deref *dest_tail = get_deref_tail(dest); + if (variable_is_external_block(dest->var)) { + assert(dest->var->data.mode == nir_var_shader_storage); + vtn_block_store(b, src, dest, dest_type, dest_tail); + } else { + if (dest_tail->child) { + struct vtn_ssa_value *val = _vtn_variable_load(b, dest, dest_tail); + nir_deref_array *deref = nir_deref_as_array(dest_tail->child); + assert(deref->deref.child == NULL); + if (deref->deref_array_type == nir_deref_array_type_direct) + val->def = vtn_vector_insert(b, val->def, src->def, + deref->base_offset); + else + val->def = vtn_vector_insert_dynamic(b, val->def, src->def, + deref->indirect.ssa); + _vtn_variable_store(b, dest, dest_tail, val); + } else { + _vtn_variable_store(b, dest, dest_tail, src); + } + } +} + +static void +vtn_variable_copy(struct vtn_builder *b, nir_deref_var *src, + nir_deref_var *dest, struct vtn_type *type) +{ + nir_deref *src_tail = get_deref_tail(src); + + if (src_tail->child || src->var->interface_type) { + assert(get_deref_tail(dest)->child); + struct vtn_ssa_value *val = vtn_variable_load(b, src, type); + vtn_variable_store(b, val, dest, type); + } else { + nir_intrinsic_instr *copy = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_copy_var); + copy->variables[0] = nir_deref_as_var(nir_copy_deref(copy, &dest->deref)); + copy->variables[1] = nir_deref_as_var(nir_copy_deref(copy, &src->deref)); + + nir_builder_instr_insert(&b->nb, ©->instr); + } +} + +/* Tries to compute the size of an interface block based on the strides and + * offsets that are provided to us in the SPIR-V source. + */ +static unsigned +vtn_type_block_size(struct vtn_type *type) +{ + enum glsl_base_type base_type = glsl_get_base_type(type->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_DOUBLE: { + unsigned cols = type->row_major ? glsl_get_vector_elements(type->type) : + glsl_get_matrix_columns(type->type); + if (cols > 1) { + assert(type->stride > 0); + return type->stride * cols; + } else if (base_type == GLSL_TYPE_DOUBLE) { + return glsl_get_vector_elements(type->type) * 8; + } else { + return glsl_get_vector_elements(type->type) * 4; + } + } + + case GLSL_TYPE_STRUCT: + case GLSL_TYPE_INTERFACE: { + unsigned size = 0; + unsigned num_fields = glsl_get_length(type->type); + for (unsigned f = 0; f < num_fields; f++) { + unsigned field_end = type->offsets[f] + + vtn_type_block_size(type->members[f]); + size = MAX2(size, field_end); + } + return size; + } + + case GLSL_TYPE_ARRAY: + assert(type->stride > 0); + assert(glsl_get_length(type->type) > 0); + return type->stride * glsl_get_length(type->type); + + default: + assert(!"Invalid block type"); + return 0; + } +} + +static bool +is_interface_type(struct vtn_type *type) +{ + return type->block || type->buffer_block || + glsl_type_is_sampler(type->type) || + glsl_type_is_image(type->type); +} + +static void +vtn_handle_variables(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpVariable: { + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + + nir_variable *var = rzalloc(b->shader, nir_variable); + + var->type = type->type; + var->name = ralloc_strdup(var, val->name); + + struct vtn_type *interface_type; + if (is_interface_type(type)) { + interface_type = type; + } else if (glsl_type_is_array(type->type) && + is_interface_type(type->array_element)) { + interface_type = type->array_element; + } else { + interface_type = NULL; + } + + if (interface_type) + var->interface_type = interface_type->type; + + switch ((SpvStorageClass)w[3]) { + case SpvStorageClassUniform: + case SpvStorageClassUniformConstant: + if (interface_type && interface_type->buffer_block) { + var->data.mode = nir_var_shader_storage; + b->shader->info.num_ssbos++; + } else { + /* UBO's and samplers */ + var->data.mode = nir_var_uniform; + var->data.read_only = true; + if (interface_type) { + if (glsl_type_is_image(interface_type->type)) { + b->shader->info.num_images++; + var->data.image.format = interface_type->image_format; + } else if (glsl_type_is_sampler(interface_type->type)) { + b->shader->info.num_textures++; + } else { + assert(glsl_type_is_struct(interface_type->type)); + b->shader->info.num_ubos++; + } + } + } + break; + case SpvStorageClassPushConstant: + assert(interface_type && interface_type->block); + var->data.mode = nir_var_uniform; + var->data.read_only = true; + var->data.descriptor_set = -1; + var->data.binding = -1; + + /* We have exactly one push constant block */ + assert(b->shader->num_uniforms == 0); + b->shader->num_uniforms = vtn_type_block_size(type); + break; + case SpvStorageClassInput: + var->data.mode = nir_var_shader_in; + var->data.read_only = true; + break; + case SpvStorageClassOutput: + var->data.mode = nir_var_shader_out; + break; + case SpvStorageClassPrivateGlobal: + var->data.mode = nir_var_global; + break; + case SpvStorageClassFunction: + var->data.mode = nir_var_local; + break; + case SpvStorageClassWorkgroupLocal: + case SpvStorageClassWorkgroupGlobal: + case SpvStorageClassGeneric: + case SpvStorageClassAtomicCounter: + default: + unreachable("Unhandled variable storage class"); + } + + if (count > 4) { + assert(count == 5); + var->constant_initializer = + vtn_value(b, w[4], vtn_value_type_constant)->constant; + } + + val->deref = nir_deref_var_create(b, var); + val->deref_type = type; + + /* We handle decorations first because decorations might give us + * location information. We use the data.explicit_location field to + * note that the location provided is the "final" location. If + * data.explicit_location == false, this means that it's relative to + * whatever the base location is. + */ + vtn_foreach_decoration(b, val, var_decoration_cb, var); + + if (!var->data.explicit_location) { + if (b->execution_model == SpvExecutionModelFragment && + var->data.mode == nir_var_shader_out) { + var->data.location += FRAG_RESULT_DATA0; + } else if (b->execution_model == SpvExecutionModelVertex && + var->data.mode == nir_var_shader_in) { + var->data.location += VERT_ATTRIB_GENERIC0; + } else if (var->data.mode == nir_var_shader_in || + var->data.mode == nir_var_shader_out) { + var->data.location += VARYING_SLOT_VAR0; + } + } + + /* Interface block variables aren't actually going to be referenced + * by the generated NIR, so we don't put them in the list + */ + if (interface_type && glsl_type_is_struct(interface_type->type)) + break; + + if (var->data.mode == nir_var_local) { + nir_function_impl_add_variable(b->impl, var); + } else { + nir_shader_add_variable(b->shader, var); + } + + break; + } + + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: { + nir_deref_var *base; + struct vtn_value *base_val = vtn_untyped_value(b, w[3]); + if (base_val->value_type == vtn_value_type_sampled_image) { + /* This is rather insane. SPIR-V allows you to use OpSampledImage + * to combine an array of images with a single sampler to get an + * array of sampled images that all share the same sampler. + * Fortunately, this means that we can more-or-less ignore the + * sampler when crawling the access chain, but it does leave us + * with this rather awkward little special-case. + */ + base = base_val->sampled_image->image; + } else { + assert(base_val->value_type == vtn_value_type_deref); + base = base_val->deref; + } + + nir_deref_var *deref = nir_deref_as_var(nir_copy_deref(b, &base->deref)); + struct vtn_type *deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + nir_deref *tail = &deref->deref; + while (tail->child) + tail = tail->child; + + for (unsigned i = 0; i < count - 4; i++) { + assert(w[i + 4] < b->value_id_bound); + struct vtn_value *idx_val = &b->values[w[i + 4]]; + + enum glsl_base_type base_type = glsl_get_base_type(tail->type); + switch (base_type) { + case GLSL_TYPE_UINT: + case GLSL_TYPE_INT: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_ARRAY: { + nir_deref_array *deref_arr = nir_deref_array_create(b); + if (base_type == GLSL_TYPE_ARRAY || + glsl_type_is_matrix(tail->type)) { + deref_type = deref_type->array_element; + } else { + assert(glsl_type_is_vector(tail->type)); + deref_type = ralloc(b, struct vtn_type); + deref_type->type = glsl_scalar_type(base_type); + } + + deref_arr->deref.type = deref_type->type; + + if (idx_val->value_type == vtn_value_type_constant) { + unsigned idx = idx_val->constant->value.u[0]; + deref_arr->deref_array_type = nir_deref_array_type_direct; + deref_arr->base_offset = idx; + } else { + assert(idx_val->value_type == vtn_value_type_ssa); + assert(glsl_type_is_scalar(idx_val->ssa->type)); + deref_arr->deref_array_type = nir_deref_array_type_indirect; + deref_arr->base_offset = 0; + deref_arr->indirect = nir_src_for_ssa(idx_val->ssa->def); + } + tail->child = &deref_arr->deref; + break; + } + + case GLSL_TYPE_STRUCT: { + assert(idx_val->value_type == vtn_value_type_constant); + unsigned idx = idx_val->constant->value.u[0]; + deref_type = deref_type->members[idx]; + nir_deref_struct *deref_struct = nir_deref_struct_create(b, idx); + deref_struct->deref.type = deref_type->type; + tail->child = &deref_struct->deref; + break; + } + default: + unreachable("Invalid type for deref"); + } + + if (deref_type->is_builtin) { + /* If we encounter a builtin, we throw away the ress of the + * access chain, jump to the builtin, and keep building. + */ + const struct glsl_type *builtin_type = deref_type->type; + + nir_deref_array *per_vertex_deref = NULL; + if (glsl_type_is_array(base->var->type)) { + /* This builtin is a per-vertex builtin */ + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + assert(base->var->data.mode == nir_var_shader_in); + builtin_type = glsl_array_type(builtin_type, + b->shader->info.gs.vertices_in); + + /* The first non-var deref should be an array deref. */ + assert(deref->deref.child->deref_type == + nir_deref_type_array); + per_vertex_deref = nir_deref_as_array(deref->deref.child); + } + + nir_variable *builtin = get_builtin_variable(b, + base->var->data.mode, + builtin_type, + deref_type->builtin); + deref = nir_deref_var_create(b, builtin); + + if (per_vertex_deref) { + /* Since deref chains start at the variable, we can just + * steal that link and use it. + */ + deref->deref.child = &per_vertex_deref->deref; + per_vertex_deref->deref.child = NULL; + per_vertex_deref->deref.type = + glsl_get_array_element(builtin_type); + + tail = &per_vertex_deref->deref; + } else { + tail = &deref->deref; + } + } else { + tail = tail->child; + } + } + + /* For uniform blocks, we don't resolve the access chain until we + * actually access the variable, so we need to keep around the original + * type of the variable. + */ + if (variable_is_external_block(base->var)) + deref_type = vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + if (base_val->value_type == vtn_value_type_sampled_image) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = deref; + val->sampled_image->sampler = base_val->sampled_image->sampler; + } else { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_deref); + val->deref = deref; + val->deref_type = deref_type; + } + + break; + } + + case SpvOpCopyMemory: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + nir_deref_var *src = vtn_value(b, w[2], vtn_value_type_deref)->deref; + struct vtn_type *type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + + vtn_variable_copy(b, src, dest, type); + break; + } + + case SpvOpLoad: { + nir_deref_var *src = vtn_value(b, w[3], vtn_value_type_deref)->deref; + struct vtn_type *src_type = + vtn_value(b, w[3], vtn_value_type_deref)->deref_type; + + if (src->var->interface_type && + (glsl_type_is_sampler(src->var->interface_type) || + glsl_type_is_image(src->var->interface_type))) { + vtn_push_value(b, w[2], vtn_value_type_deref)->deref = src; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + val->ssa = vtn_variable_load(b, src, src_type); + break; + } + + case SpvOpStore: { + nir_deref_var *dest = vtn_value(b, w[1], vtn_value_type_deref)->deref; + struct vtn_type *dest_type = + vtn_value(b, w[1], vtn_value_type_deref)->deref_type; + struct vtn_ssa_value *src = vtn_ssa_value(b, w[2]); + vtn_variable_store(b, src, dest, dest_type); + break; + } + + case SpvOpCopyMemorySized: + case SpvOpArrayLength: + default: + unreachable("Unhandled opcode"); + } +} + +static void +vtn_handle_function_call(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + unreachable("Unhandled opcode"); +} + +static struct vtn_ssa_value * +vtn_create_ssa_value(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = rzalloc(b, struct vtn_ssa_value); + val->type = type; + + if (!glsl_type_is_vector_or_scalar(type)) { + unsigned elems = glsl_get_length(type); + val->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) { + const struct glsl_type *child_type; + + switch (glsl_get_base_type(type)) { + case GLSL_TYPE_INT: + case GLSL_TYPE_UINT: + case GLSL_TYPE_BOOL: + case GLSL_TYPE_FLOAT: + case GLSL_TYPE_DOUBLE: + child_type = glsl_get_column_type(type); + break; + case GLSL_TYPE_ARRAY: + child_type = glsl_get_array_element(type); + break; + case GLSL_TYPE_STRUCT: + child_type = glsl_get_struct_field(type, i); + break; + default: + unreachable("unkown base type"); + } + + val->elems[i] = vtn_create_ssa_value(b, child_type); + } + } + + return val; +} + +static nir_tex_src +vtn_tex_src(struct vtn_builder *b, unsigned index, nir_tex_src_type type) +{ + nir_tex_src src; + src.src = nir_src_for_ssa(vtn_ssa_value(b, index)->def); + src.src_type = type; + return src; +} + +static void +vtn_handle_texture(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpSampledImage) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_sampled_image); + val->sampled_image = ralloc(b, struct vtn_sampled_image); + val->sampled_image->image = + vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->sampled_image->sampler = + vtn_value(b, w[4], vtn_value_type_deref)->deref; + return; + } + + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + struct vtn_sampled_image sampled; + struct vtn_value *sampled_val = vtn_untyped_value(b, w[3]); + if (sampled_val->value_type == vtn_value_type_sampled_image) { + sampled = *sampled_val->sampled_image; + } else { + assert(sampled_val->value_type == vtn_value_type_deref); + sampled.image = NULL; + sampled.sampler = sampled_val->deref; + } + + nir_tex_src srcs[8]; /* 8 should be enough */ + nir_tex_src *p = srcs; + + unsigned idx = 4; + + unsigned coord_components = 0; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQueryLod: { + /* All these types have the coordinate as their first real argument */ + struct vtn_ssa_value *coord = vtn_ssa_value(b, w[idx++]); + coord_components = glsl_get_vector_elements(coord->type); + p->src = nir_src_for_ssa(coord->def); + p->src_type = nir_tex_src_coord; + p++; + break; + } + + default: + break; + } + + /* These all have an explicit depth value as their next source */ + switch (opcode) { + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_comparitor); + break; + default: + break; + } + + /* Figure out the base texture operation */ + nir_texop texop; + switch (opcode) { + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + texop = nir_texop_tex; + break; + + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + texop = nir_texop_txl; + break; + + case SpvOpImageFetch: + texop = nir_texop_txf; + break; + + case SpvOpImageGather: + case SpvOpImageDrefGather: + texop = nir_texop_tg4; + break; + + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + texop = nir_texop_txs; + break; + + case SpvOpImageQueryLod: + texop = nir_texop_lod; + break; + + case SpvOpImageQueryLevels: + texop = nir_texop_query_levels; + break; + + case SpvOpImageQuerySamples: + default: + unreachable("Unhandled opcode"); + } + + /* Now we need to handle some number of optional arguments */ + if (idx < count) { + uint32_t operands = w[idx++]; + + if (operands & SpvImageOperandsBiasMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txb; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_bias); + } + + if (operands & SpvImageOperandsLodMask) { + assert(texop == nir_texop_txl || texop == nir_texop_txf || + texop == nir_texop_txs); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_lod); + } + + if (operands & SpvImageOperandsGradMask) { + assert(texop == nir_texop_tex); + texop = nir_texop_txd; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddx); + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ddy); + } + + if (operands & SpvImageOperandsOffsetMask || + operands & SpvImageOperandsConstOffsetMask) + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_offset); + + if (operands & SpvImageOperandsConstOffsetsMask) + assert(!"Constant offsets to texture gather not yet implemented"); + + if (operands & SpvImageOperandsSampleMask) { + assert(texop == nir_texop_txf); + texop = nir_texop_txf_ms; + (*p++) = vtn_tex_src(b, w[idx++], nir_tex_src_ms_index); + } + } + /* We should have now consumed exactly all of the arguments */ + assert(idx == count); + + nir_tex_instr *instr = nir_tex_instr_create(b->shader, p - srcs); + + const struct glsl_type *sampler_type = + nir_deref_tail(&sampled.sampler->deref)->type; + instr->sampler_dim = glsl_get_sampler_dim(sampler_type); + + switch (glsl_get_sampler_result_type(sampler_type)) { + case GLSL_TYPE_FLOAT: instr->dest_type = nir_type_float; break; + case GLSL_TYPE_INT: instr->dest_type = nir_type_int; break; - case GLSL_TYPE_UINT: instr->dest_type = nir_type_unsigned; break; ++ case GLSL_TYPE_UINT: instr->dest_type = nir_type_uint; break; + case GLSL_TYPE_BOOL: instr->dest_type = nir_type_bool; break; + default: + unreachable("Invalid base type for sampler result"); + } + + instr->op = texop; + memcpy(instr->src, srcs, instr->num_srcs * sizeof(*instr->src)); + instr->coord_components = coord_components; + instr->is_array = glsl_sampler_type_is_array(sampler_type); + instr->is_shadow = glsl_sampler_type_is_shadow(sampler_type); + + instr->sampler = + nir_deref_as_var(nir_copy_deref(instr, &sampled.sampler->deref)); + if (sampled.image) { + instr->texture = + nir_deref_as_var(nir_copy_deref(instr, &sampled.image->deref)); + } else { + instr->texture = NULL; + } + + nir_ssa_dest_init(&instr->instr, &instr->dest, 4, NULL); + val->ssa = vtn_create_ssa_value(b, glsl_vector_type(GLSL_TYPE_FLOAT, 4)); + val->ssa->def = &instr->dest.ssa; + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +static nir_ssa_def * +get_image_coord(struct vtn_builder *b, uint32_t value) +{ + struct vtn_ssa_value *coord = vtn_ssa_value(b, value); + + /* The image_load_store intrinsics assume a 4-dim coordinate */ + unsigned dim = glsl_get_vector_elements(coord->type); + unsigned swizzle[4]; + for (unsigned i = 0; i < 4; i++) + swizzle[i] = MIN2(i, dim - 1); + + return nir_swizzle(&b->nb, coord->def, swizzle, 4, false); +} + +static void +vtn_handle_image(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + /* Just get this one out of the way */ + if (opcode == SpvOpImageTexelPointer) { + struct vtn_value *val = + vtn_push_value(b, w[2], vtn_value_type_image_pointer); + val->image = ralloc(b, struct vtn_image_pointer); + + val->image->deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + val->image->coord = get_image_coord(b, w[4]); + val->image->sample = vtn_ssa_value(b, w[5])->def; + return; + } + + struct vtn_image_pointer image; + + switch (opcode) { + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + image = *vtn_value(b, w[3], vtn_value_type_image_pointer)->image; + break; + + case SpvOpImageRead: + image.deref = vtn_value(b, w[3], vtn_value_type_deref)->deref; + image.coord = get_image_coord(b, w[4]); + + if (count > 5 && (w[5] & SpvImageOperandsSampleMask)) { + assert(w[5] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[6])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + break; + + case SpvOpImageWrite: + image.deref = vtn_value(b, w[1], vtn_value_type_deref)->deref; + image.coord = get_image_coord(b, w[2]); + + /* texel = w[3] */ + + if (count > 4 && (w[4] & SpvImageOperandsSampleMask)) { + assert(w[4] == SpvImageOperandsSampleMask); + image.sample = vtn_ssa_value(b, w[5])->def; + } else { + image.sample = nir_ssa_undef(&b->nb, 1); + } + + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_op op; + switch (opcode) { +#define OP(S, N) case SpvOp##S: op = nir_intrinsic_image_##N; break; + OP(ImageRead, load) + OP(ImageWrite, store) + OP(AtomicExchange, atomic_exchange) + OP(AtomicCompareExchange, atomic_comp_swap) + OP(AtomicIIncrement, atomic_add) + OP(AtomicIDecrement, atomic_add) + OP(AtomicIAdd, atomic_add) + OP(AtomicISub, atomic_add) + OP(AtomicSMin, atomic_min) + OP(AtomicUMin, atomic_min) + OP(AtomicSMax, atomic_max) + OP(AtomicUMax, atomic_max) + OP(AtomicAnd, atomic_and) + OP(AtomicOr, atomic_or) + OP(AtomicXor, atomic_xor) +#undef OP + default: + unreachable("Invalid image opcode"); + } + + nir_intrinsic_instr *intrin = nir_intrinsic_instr_create(b->shader, op); + intrin->variables[0] = + nir_deref_as_var(nir_copy_deref(&intrin->instr, &image.deref->deref)); + intrin->src[0] = nir_src_for_ssa(image.coord); + intrin->src[1] = nir_src_for_ssa(image.sample); + + switch (opcode) { + case SpvOpImageRead: + break; + case SpvOpImageWrite: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[3])->def); + break; + case SpvOpAtomicIIncrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, 1)); + break; + case SpvOpAtomicIDecrement: + intrin->src[2] = nir_src_for_ssa(nir_imm_int(&b->nb, -1)); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicIAdd: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicCompareExchange: + intrin->src[2] = nir_src_for_ssa(vtn_ssa_value(b, w[7])->def); + intrin->src[3] = nir_src_for_ssa(vtn_ssa_value(b, w[6])->def); + break; + + case SpvOpAtomicISub: + intrin->src[2] = nir_src_for_ssa(nir_ineg(&b->nb, vtn_ssa_value(b, w[6])->def)); + break; + + default: + unreachable("Invalid image opcode"); + } + + if (opcode != SpvOpImageWrite) { + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type; + nir_ssa_dest_init(&intrin->instr, &intrin->dest, + glsl_get_vector_elements(type->type), NULL); + val->ssa = vtn_create_ssa_value(b, type->type); + val->ssa->def = &intrin->dest.ssa; + } + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + +static nir_alu_instr * +create_vec(void *mem_ctx, unsigned num_components) +{ + nir_op op; + switch (num_components) { + case 1: op = nir_op_fmov; break; + case 2: op = nir_op_vec2; break; + case 3: op = nir_op_vec3; break; + case 4: op = nir_op_vec4; break; + default: unreachable("bad vector size"); + } + + nir_alu_instr *vec = nir_alu_instr_create(mem_ctx, op); + nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL); + vec->dest.write_mask = (1 << num_components) - 1; + + return vec; +} + +static struct vtn_ssa_value * +vtn_transpose(struct vtn_builder *b, struct vtn_ssa_value *src) +{ + if (src->transposed) + return src->transposed; + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_transposed_type(src->type)); + + for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) { + nir_alu_instr *vec = create_vec(b, glsl_get_matrix_columns(src->type)); + if (glsl_type_is_vector_or_scalar(src->type)) { + vec->src[0].src = nir_src_for_ssa(src->def); + vec->src[0].swizzle[0] = i; + } else { + for (unsigned j = 0; j < glsl_get_matrix_columns(src->type); j++) { + vec->src[j].src = nir_src_for_ssa(src->elems[j]->def); + vec->src[j].swizzle[0] = i; + } + } + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + + dest->transposed = src; + + return dest; +} + +/* + * Normally, column vectors in SPIR-V correspond to a single NIR SSA + * definition. But for matrix multiplies, we want to do one routine for + * multiplying a matrix by a matrix and then pretend that vectors are matrices + * with one column. So we "wrap" these things, and unwrap the result before we + * send it off. + */ + +static struct vtn_ssa_value * +vtn_wrap_matrix(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (val == NULL) + return NULL; + + if (glsl_type_is_matrix(val->type)) + return val; + + struct vtn_ssa_value *dest = rzalloc(b, struct vtn_ssa_value); + dest->type = val->type; + dest->elems = ralloc_array(b, struct vtn_ssa_value *, 1); + dest->elems[0] = val; + + return dest; +} + +static struct vtn_ssa_value * +vtn_unwrap_matrix(struct vtn_ssa_value *val) +{ + if (glsl_type_is_matrix(val->type)) + return val; + + return val->elems[0]; +} + +static struct vtn_ssa_value * +vtn_matrix_multiply(struct vtn_builder *b, + struct vtn_ssa_value *_src0, struct vtn_ssa_value *_src1) +{ + + struct vtn_ssa_value *src0 = vtn_wrap_matrix(b, _src0); + struct vtn_ssa_value *src1 = vtn_wrap_matrix(b, _src1); + struct vtn_ssa_value *src0_transpose = vtn_wrap_matrix(b, _src0->transposed); + struct vtn_ssa_value *src1_transpose = vtn_wrap_matrix(b, _src1->transposed); + + unsigned src0_rows = glsl_get_vector_elements(src0->type); + unsigned src0_columns = glsl_get_matrix_columns(src0->type); + unsigned src1_columns = glsl_get_matrix_columns(src1->type); + + struct vtn_ssa_value *dest = + vtn_create_ssa_value(b, glsl_matrix_type(glsl_get_base_type(src0->type), + src0_rows, src1_columns)); + + dest = vtn_wrap_matrix(b, dest); + + bool transpose_result = false; + if (src0_transpose && src1_transpose) { + /* transpose(A) * transpose(B) = transpose(B * A) */ + src1 = src0_transpose; + src0 = src1_transpose; + src0_transpose = NULL; + src1_transpose = NULL; + transpose_result = true; + } + + if (src0_transpose && !src1_transpose && + glsl_get_base_type(src0->type) == GLSL_TYPE_FLOAT) { + /* We already have the rows of src0 and the columns of src1 available, + * so we can just take the dot product of each row with each column to + * get the result. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + nir_alu_instr *vec = create_vec(b, src0_rows); + for (unsigned j = 0; j < src0_rows; j++) { + vec->src[j].src = + nir_src_for_ssa(nir_fdot(&b->nb, src0_transpose->elems[j]->def, + src1->elems[i]->def)); + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + dest->elems[i]->def = &vec->dest.dest.ssa; + } + } else { + /* We don't handle the case where src1 is transposed but not src0, since + * the general case only uses individual components of src1 so the + * optimizer should chew through the transpose we emitted for src1. + */ + + for (unsigned i = 0; i < src1_columns; i++) { + /* dest[i] = sum(src0[j] * src1[i][j] for all j) */ + dest->elems[i]->def = + nir_fmul(&b->nb, src0->elems[0]->def, + vtn_vector_extract(b, src1->elems[i]->def, 0)); + for (unsigned j = 1; j < src0_columns; j++) { + dest->elems[i]->def = + nir_fadd(&b->nb, dest->elems[i]->def, + nir_fmul(&b->nb, src0->elems[j]->def, + vtn_vector_extract(b, + src1->elems[i]->def, j))); + } + } + } + + dest = vtn_unwrap_matrix(dest); + + if (transpose_result) + dest = vtn_transpose(b, dest); + + return dest; +} + +static struct vtn_ssa_value * +vtn_mat_times_scalar(struct vtn_builder *b, + struct vtn_ssa_value *mat, + nir_ssa_def *scalar) +{ + struct vtn_ssa_value *dest = vtn_create_ssa_value(b, mat->type); + for (unsigned i = 0; i < glsl_get_matrix_columns(mat->type); i++) { + if (glsl_get_base_type(mat->type) == GLSL_TYPE_FLOAT) + dest->elems[i]->def = nir_fmul(&b->nb, mat->elems[i]->def, scalar); + else + dest->elems[i]->def = nir_imul(&b->nb, mat->elems[i]->def, scalar); + } + + return dest; +} + +static void +vtn_handle_matrix_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + + switch (opcode) { + case SpvOpTranspose: { + struct vtn_ssa_value *src = vtn_ssa_value(b, w[3]); + val->ssa = vtn_transpose(b, src); + break; + } + + case SpvOpOuterProduct: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, vtn_transpose(b, src1)); + break; + } + + case SpvOpMatrixTimesScalar: { + struct vtn_ssa_value *mat = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *scalar = vtn_ssa_value(b, w[4]); + + if (mat->transposed) { + val->ssa = vtn_transpose(b, vtn_mat_times_scalar(b, mat->transposed, + scalar->def)); + } else { + val->ssa = vtn_mat_times_scalar(b, mat, scalar->def); + } + break; + } + + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: { + struct vtn_ssa_value *src0 = vtn_ssa_value(b, w[3]); + struct vtn_ssa_value *src1 = vtn_ssa_value(b, w[4]); + + val->ssa = vtn_matrix_multiply(b, src0, src1); + break; + } + + default: unreachable("unknown matrix opcode"); + } +} + +static void +vtn_handle_alu(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + /* Collect the various SSA sources */ + unsigned num_inputs = count - 3; + nir_ssa_def *src[4]; + for (unsigned i = 0; i < num_inputs; i++) + src[i] = vtn_ssa_value(b, w[i + 3])->def; + + /* Indicates that the first two arguments should be swapped. This is + * used for implementing greater-than and less-than-or-equal. + */ + bool swap = false; + + nir_op op; + switch (opcode) { + /* Basic ALU operations */ + case SpvOpSNegate: op = nir_op_ineg; break; + case SpvOpFNegate: op = nir_op_fneg; break; + case SpvOpNot: op = nir_op_inot; break; + + case SpvOpAny: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_bany2; break; + case 3: op = nir_op_bany3; break; + case 4: op = nir_op_bany4; break; + } + break; + + case SpvOpAll: + switch (src[0]->num_components) { + case 1: op = nir_op_imov; break; + case 2: op = nir_op_ball2; break; + case 3: op = nir_op_ball3; break; + case 4: op = nir_op_ball4; break; + } + break; + + case SpvOpIAdd: op = nir_op_iadd; break; + case SpvOpFAdd: op = nir_op_fadd; break; + case SpvOpISub: op = nir_op_isub; break; + case SpvOpFSub: op = nir_op_fsub; break; + case SpvOpIMul: op = nir_op_imul; break; + case SpvOpFMul: op = nir_op_fmul; break; + case SpvOpUDiv: op = nir_op_udiv; break; + case SpvOpSDiv: op = nir_op_idiv; break; + case SpvOpFDiv: op = nir_op_fdiv; break; + case SpvOpUMod: op = nir_op_umod; break; + case SpvOpSMod: op = nir_op_umod; break; /* FIXME? */ + case SpvOpFMod: op = nir_op_fmod; break; + + case SpvOpDot: + assert(src[0]->num_components == src[1]->num_components); + switch (src[0]->num_components) { + case 1: op = nir_op_fmul; break; + case 2: op = nir_op_fdot2; break; + case 3: op = nir_op_fdot3; break; + case 4: op = nir_op_fdot4; break; + } + break; + + case SpvOpShiftRightLogical: op = nir_op_ushr; break; + case SpvOpShiftRightArithmetic: op = nir_op_ishr; break; + case SpvOpShiftLeftLogical: op = nir_op_ishl; break; + case SpvOpLogicalOr: op = nir_op_ior; break; + case SpvOpLogicalEqual: op = nir_op_ieq; break; + case SpvOpLogicalNotEqual: op = nir_op_ine; break; + case SpvOpLogicalAnd: op = nir_op_iand; break; + case SpvOpBitwiseOr: op = nir_op_ior; break; + case SpvOpBitwiseXor: op = nir_op_ixor; break; + case SpvOpBitwiseAnd: op = nir_op_iand; break; + case SpvOpSelect: op = nir_op_bcsel; break; + case SpvOpIEqual: op = nir_op_ieq; break; + + /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */ + case SpvOpFOrdEqual: op = nir_op_feq; break; + case SpvOpFUnordEqual: op = nir_op_feq; break; + case SpvOpINotEqual: op = nir_op_ine; break; + case SpvOpFOrdNotEqual: op = nir_op_fne; break; + case SpvOpFUnordNotEqual: op = nir_op_fne; break; + case SpvOpULessThan: op = nir_op_ult; break; + case SpvOpSLessThan: op = nir_op_ilt; break; + case SpvOpFOrdLessThan: op = nir_op_flt; break; + case SpvOpFUnordLessThan: op = nir_op_flt; break; + case SpvOpUGreaterThan: op = nir_op_ult; swap = true; break; + case SpvOpSGreaterThan: op = nir_op_ilt; swap = true; break; + case SpvOpFOrdGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpFUnordGreaterThan: op = nir_op_flt; swap = true; break; + case SpvOpULessThanEqual: op = nir_op_uge; swap = true; break; + case SpvOpSLessThanEqual: op = nir_op_ige; swap = true; break; + case SpvOpFOrdLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpFUnordLessThanEqual: op = nir_op_fge; swap = true; break; + case SpvOpUGreaterThanEqual: op = nir_op_uge; break; + case SpvOpSGreaterThanEqual: op = nir_op_ige; break; + case SpvOpFOrdGreaterThanEqual: op = nir_op_fge; break; + case SpvOpFUnordGreaterThanEqual:op = nir_op_fge; break; + + /* Conversions: */ + case SpvOpConvertFToU: op = nir_op_f2u; break; + case SpvOpConvertFToS: op = nir_op_f2i; break; + case SpvOpConvertSToF: op = nir_op_i2f; break; + case SpvOpConvertUToF: op = nir_op_u2f; break; + case SpvOpBitcast: op = nir_op_imov; break; + case SpvOpUConvert: + case SpvOpSConvert: + op = nir_op_imov; /* TODO: NIR is 32-bit only; these are no-ops. */ + break; + case SpvOpFConvert: + op = nir_op_fmov; + break; + + /* Derivatives: */ + case SpvOpDPdx: op = nir_op_fddx; break; + case SpvOpDPdy: op = nir_op_fddy; break; + case SpvOpDPdxFine: op = nir_op_fddx_fine; break; + case SpvOpDPdyFine: op = nir_op_fddy_fine; break; + case SpvOpDPdxCoarse: op = nir_op_fddx_coarse; break; + case SpvOpDPdyCoarse: op = nir_op_fddy_coarse; break; + case SpvOpFwidth: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx(&b->nb, src[1]))); + return; + case SpvOpFwidthFine: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_fine(&b->nb, src[1]))); + return; + case SpvOpFwidthCoarse: + val->ssa->def = nir_fadd(&b->nb, + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[0])), + nir_fabs(&b->nb, nir_fddx_coarse(&b->nb, src[1]))); + return; + + case SpvOpVectorTimesScalar: + /* The builder will take care of splatting for us. */ + val->ssa->def = nir_fmul(&b->nb, src[0], src[1]); + return; + + case SpvOpSRem: + case SpvOpFRem: + unreachable("No NIR equivalent"); + + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + default: + unreachable("Unhandled opcode"); + } + + if (swap) { + nir_ssa_def *tmp = src[0]; + src[0] = src[1]; + src[1] = tmp; + } + + nir_alu_instr *instr = nir_alu_instr_create(b->shader, op); + nir_ssa_dest_init(&instr->instr, &instr->dest.dest, + glsl_get_vector_elements(type), val->name); + instr->dest.write_mask = (1 << glsl_get_vector_elements(type)) - 1; + val->ssa->def = &instr->dest.dest.ssa; + + for (unsigned i = 0; i < nir_op_infos[op].num_inputs; i++) + instr->src[i].src = nir_src_for_ssa(src[i]); + + nir_builder_instr_insert(&b->nb, &instr->instr); +} + +static nir_ssa_def * +vtn_vector_extract(struct vtn_builder *b, nir_ssa_def *src, unsigned index) +{ + unsigned swiz[4] = { index }; + return nir_swizzle(&b->nb, src, swiz, 1, true); +} + + +static nir_ssa_def * +vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert, + unsigned index) +{ + nir_alu_instr *vec = create_vec(b->shader, src->num_components); + + for (unsigned i = 0; i < src->num_components; i++) { + if (i == index) { + vec->src[i].src = nir_src_for_ssa(insert); + } else { + vec->src[i].src = nir_src_for_ssa(src); + vec->src[i].swizzle[0] = i; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static nir_ssa_def * +vtn_vector_extract_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_extract(b, src, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_extract(b, src, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_insert_dynamic(struct vtn_builder *b, nir_ssa_def *src, + nir_ssa_def *insert, nir_ssa_def *index) +{ + nir_ssa_def *dest = vtn_vector_insert(b, src, insert, 0); + for (unsigned i = 1; i < src->num_components; i++) + dest = nir_bcsel(&b->nb, nir_ieq(&b->nb, index, nir_imm_int(&b->nb, i)), + vtn_vector_insert(b, src, insert, i), dest); + + return dest; +} + +static nir_ssa_def * +vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components, + nir_ssa_def *src0, nir_ssa_def *src1, + const uint32_t *indices) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1); + nir_builder_instr_insert(&b->nb, &undef->instr); + + for (unsigned i = 0; i < num_components; i++) { + uint32_t index = indices[i]; + if (index == 0xffffffff) { + vec->src[i].src = nir_src_for_ssa(&undef->def); + } else if (index < src0->num_components) { + vec->src[i].src = nir_src_for_ssa(src0); + vec->src[i].swizzle[0] = index; + } else { + vec->src[i].src = nir_src_for_ssa(src1); + vec->src[i].swizzle[0] = index - src0->num_components; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +/* + * Concatentates a number of vectors/scalars together to produce a vector + */ +static nir_ssa_def * +vtn_vector_construct(struct vtn_builder *b, unsigned num_components, + unsigned num_srcs, nir_ssa_def **srcs) +{ + nir_alu_instr *vec = create_vec(b->shader, num_components); + + unsigned dest_idx = 0; + for (unsigned i = 0; i < num_srcs; i++) { + nir_ssa_def *src = srcs[i]; + for (unsigned j = 0; j < src->num_components; j++) { + vec->src[dest_idx].src = nir_src_for_ssa(src); + vec->src[dest_idx].swizzle[0] = j; + dest_idx++; + } + } + + nir_builder_instr_insert(&b->nb, &vec->instr); + + return &vec->dest.dest.ssa; +} + +static struct vtn_ssa_value * +vtn_composite_copy(void *mem_ctx, struct vtn_ssa_value *src) +{ + struct vtn_ssa_value *dest = rzalloc(mem_ctx, struct vtn_ssa_value); + dest->type = src->type; + + if (glsl_type_is_vector_or_scalar(src->type)) { + dest->def = src->def; + } else { + unsigned elems = glsl_get_length(src->type); + + dest->elems = ralloc_array(mem_ctx, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + dest->elems[i] = vtn_composite_copy(mem_ctx, src->elems[i]); + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_insert(struct vtn_builder *b, struct vtn_ssa_value *src, + struct vtn_ssa_value *insert, const uint32_t *indices, + unsigned num_indices) +{ + struct vtn_ssa_value *dest = vtn_composite_copy(b, src); + + struct vtn_ssa_value *cur = dest; + unsigned i; + for (i = 0; i < num_indices - 1; i++) { + cur = cur->elems[indices[i]]; + } + + if (glsl_type_is_vector_or_scalar(cur->type)) { + /* According to the SPIR-V spec, OpCompositeInsert may work down to + * the component granularity. In that case, the last index will be + * the index to insert the scalar into the vector. + */ + + cur->def = vtn_vector_insert(b, cur->def, insert->def, indices[i]); + } else { + cur->elems[indices[i]] = insert; + } + + return dest; +} + +static struct vtn_ssa_value * +vtn_composite_extract(struct vtn_builder *b, struct vtn_ssa_value *src, + const uint32_t *indices, unsigned num_indices) +{ + struct vtn_ssa_value *cur = src; + for (unsigned i = 0; i < num_indices; i++) { + if (glsl_type_is_vector_or_scalar(cur->type)) { + assert(i == num_indices - 1); + /* According to the SPIR-V spec, OpCompositeExtract may work down to + * the component granularity. The last index will be the index of the + * vector to extract. + */ + + struct vtn_ssa_value *ret = rzalloc(b, struct vtn_ssa_value); + ret->type = glsl_scalar_type(glsl_get_base_type(cur->type)); + ret->def = vtn_vector_extract(b, cur->def, indices[i]); + return ret; + } + } + + return cur; +} + +static void +vtn_handle_composite(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_create_ssa_value(b, type); + + switch (opcode) { + case SpvOpVectorExtractDynamic: + val->ssa->def = vtn_vector_extract_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def); + break; + + case SpvOpVectorInsertDynamic: + val->ssa->def = vtn_vector_insert_dynamic(b, vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + vtn_ssa_value(b, w[5])->def); + break; + + case SpvOpVectorShuffle: + val->ssa->def = vtn_vector_shuffle(b, glsl_get_vector_elements(type), + vtn_ssa_value(b, w[3])->def, + vtn_ssa_value(b, w[4])->def, + w + 5); + break; + + case SpvOpCompositeConstruct: { + unsigned elems = count - 3; + if (glsl_type_is_vector_or_scalar(type)) { + nir_ssa_def *srcs[4]; + for (unsigned i = 0; i < elems; i++) + srcs[i] = vtn_ssa_value(b, w[3 + i])->def; + val->ssa->def = + vtn_vector_construct(b, glsl_get_vector_elements(type), + elems, srcs); + } else { + val->ssa->elems = ralloc_array(b, struct vtn_ssa_value *, elems); + for (unsigned i = 0; i < elems; i++) + val->ssa->elems[i] = vtn_ssa_value(b, w[3 + i]); + } + break; + } + case SpvOpCompositeExtract: + val->ssa = vtn_composite_extract(b, vtn_ssa_value(b, w[3]), + w + 4, count - 4); + break; + + case SpvOpCompositeInsert: + val->ssa = vtn_composite_insert(b, vtn_ssa_value(b, w[4]), + vtn_ssa_value(b, w[3]), + w + 5, count - 5); + break; + + case SpvOpCopyObject: + val->ssa = vtn_composite_copy(b, vtn_ssa_value(b, w[3])); + break; + + default: + unreachable("unknown composite operation"); + } +} + +static void +vtn_handle_barrier(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + nir_intrinsic_op intrinsic_op; + switch (opcode) { + case SpvOpEmitVertex: + case SpvOpEmitStreamVertex: + intrinsic_op = nir_intrinsic_emit_vertex; + break; + case SpvOpEndPrimitive: + case SpvOpEndStreamPrimitive: + intrinsic_op = nir_intrinsic_end_primitive; + break; + case SpvOpMemoryBarrier: + intrinsic_op = nir_intrinsic_memory_barrier; + break; + case SpvOpControlBarrier: + default: + unreachable("unknown barrier instruction"); + } + + nir_intrinsic_instr *intrin = + nir_intrinsic_instr_create(b->shader, intrinsic_op); + + if (opcode == SpvOpEmitStreamVertex || opcode == SpvOpEndStreamPrimitive) + intrin->const_index[0] = w[1]; + + nir_builder_instr_insert(&b->nb, &intrin->instr); +} + +static void +vtn_phi_node_init(struct vtn_builder *b, struct vtn_ssa_value *val) +{ + if (glsl_type_is_vector_or_scalar(val->type)) { + nir_phi_instr *phi = nir_phi_instr_create(b->shader); + nir_ssa_dest_init(&phi->instr, &phi->dest, + glsl_get_vector_elements(val->type), NULL); + exec_list_make_empty(&phi->srcs); + nir_builder_instr_insert(&b->nb, &phi->instr); + val->def = &phi->dest.ssa; + } else { + unsigned elems = glsl_get_length(val->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_init(b, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_phi_node_create(struct vtn_builder *b, const struct glsl_type *type) +{ + struct vtn_ssa_value *val = vtn_create_ssa_value(b, type); + vtn_phi_node_init(b, val); + return val; +} + +static void +vtn_handle_phi_first_pass(struct vtn_builder *b, const uint32_t *w) +{ + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa); + const struct glsl_type *type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + val->ssa = vtn_phi_node_create(b, type); +} + +static void +vtn_phi_node_add_src(struct vtn_ssa_value *phi, const nir_block *pred, + struct vtn_ssa_value *val) +{ + assert(phi->type == val->type); + if (glsl_type_is_vector_or_scalar(phi->type)) { + nir_phi_instr *phi_instr = nir_instr_as_phi(phi->def->parent_instr); + nir_phi_src *src = ralloc(phi_instr, nir_phi_src); + src->pred = (nir_block *) pred; + src->src = nir_src_for_ssa(val->def); + exec_list_push_tail(&phi_instr->srcs, &src->node); + } else { + unsigned elems = glsl_get_length(phi->type); + for (unsigned i = 0; i < elems; i++) + vtn_phi_node_add_src(phi->elems[i], pred, val->elems[i]); + } +} + +static struct vtn_ssa_value * +vtn_get_phi_node_src(struct vtn_builder *b, nir_block *block, + const struct glsl_type *type, const uint32_t *w, + unsigned count) +{ + struct hash_entry *entry = _mesa_hash_table_search(b->block_table, block); + if (entry) { + struct vtn_block *spv_block = entry->data; + for (unsigned off = 4; off < count; off += 2) { + if (spv_block == vtn_value(b, w[off], vtn_value_type_block)->block) { + return vtn_ssa_value(b, w[off - 1]); + } + } + } + + b->nb.cursor = nir_before_block(block); + struct vtn_ssa_value *phi = vtn_phi_node_create(b, type); + + struct set_entry *entry2; + set_foreach(block->predecessors, entry2) { + nir_block *pred = (nir_block *) entry2->key; + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return phi; +} + +static bool +vtn_handle_phi_second_pass(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + if (opcode == SpvOpLabel) { + b->block = vtn_value(b, w[1], vtn_value_type_block)->block; + return true; + } + + if (opcode != SpvOpPhi) + return true; + + struct vtn_ssa_value *phi = vtn_value(b, w[2], vtn_value_type_ssa)->ssa; + + struct set_entry *entry; + set_foreach(b->block->block->predecessors, entry) { + nir_block *pred = (nir_block *) entry->key; + + struct vtn_ssa_value *val = vtn_get_phi_node_src(b, pred, phi->type, w, + count); + vtn_phi_node_add_src(phi, pred, val); + } + + return true; +} + +static unsigned +gl_primitive_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + case SpvExecutionModeOutputPoints: + return 0; /* GL_POINTS */ + case SpvExecutionModeInputLines: + return 1; /* GL_LINES */ + case SpvExecutionModeInputLinesAdjacency: + return 0x000A; /* GL_LINE_STRIP_ADJACENCY_ARB */ + case SpvExecutionModeInputTriangles: + return 4; /* GL_TRIANGLES */ + case SpvExecutionModeInputTrianglesAdjacency: + return 0x000C; /* GL_TRIANGLES_ADJACENCY_ARB */ + case SpvExecutionModeInputQuads: + return 7; /* GL_QUADS */ + case SpvExecutionModeInputIsolines: + return 0x8E7A; /* GL_ISOLINES */ + case SpvExecutionModeOutputLineStrip: + return 3; /* GL_LINE_STRIP */ + case SpvExecutionModeOutputTriangleStrip: + return 5; /* GL_TRIANGLE_STRIP */ + default: + assert(!"Invalid primitive type"); + return 4; + } +} + +static unsigned +vertices_in_from_spv_execution_mode(SpvExecutionMode mode) +{ + switch (mode) { + case SpvExecutionModeInputPoints: + return 1; + case SpvExecutionModeInputLines: + return 2; + case SpvExecutionModeInputLinesAdjacency: + return 4; + case SpvExecutionModeInputTriangles: + return 3; + case SpvExecutionModeInputTrianglesAdjacency: + return 6; + default: + assert(!"Invalid GS input mode"); + return 0; + } +} + +static bool +vtn_handle_preamble_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpSource: + case SpvOpSourceExtension: + case SpvOpExtension: + /* Unhandled, but these are for debug so that's ok. */ + break; + + case SpvOpCapability: + switch ((SpvCapability)w[1]) { + case SpvCapabilityMatrix: + case SpvCapabilityShader: + /* All shaders support these */ + break; + case SpvCapabilityGeometry: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + break; + default: + assert(!"Unsupported capability"); + } + break; + + case SpvOpExtInstImport: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpMemoryModel: + assert(w[1] == SpvAddressingModelLogical); + assert(w[2] == SpvMemoryModelGLSL450); + break; + + case SpvOpEntryPoint: + assert(b->entry_point == NULL); + b->entry_point = &b->values[w[2]]; + b->execution_model = w[1]; + break; + + case SpvOpExecutionMode: + assert(b->entry_point == &b->values[w[1]]); + + SpvExecutionMode mode = w[2]; + switch(mode) { + case SpvExecutionModeOriginUpperLeft: + case SpvExecutionModeOriginLowerLeft: + b->origin_upper_left = (mode == SpvExecutionModeOriginUpperLeft); + break; + + case SpvExecutionModeEarlyFragmentTests: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.early_fragment_tests = true; + break; + + case SpvExecutionModeInvocations: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.invocations = w[3]; + break; + + case SpvExecutionModeDepthReplacing: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_ANY; + break; + case SpvExecutionModeDepthGreater: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_GREATER; + break; + case SpvExecutionModeDepthLess: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_LESS; + break; + case SpvExecutionModeDepthUnchanged: + assert(b->shader->stage == MESA_SHADER_FRAGMENT); + b->shader->info.fs.depth_layout = FRAG_DEPTH_LAYOUT_UNCHANGED; + break; + + case SpvExecutionModeLocalSize: + assert(b->shader->stage == MESA_SHADER_COMPUTE); + b->shader->info.cs.local_size[0] = w[3]; + b->shader->info.cs.local_size[1] = w[4]; + b->shader->info.cs.local_size[2] = w[5]; + break; + case SpvExecutionModeLocalSizeHint: + break; /* Nothing do do with this */ + + case SpvExecutionModeOutputVertices: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.vertices_out = w[3]; + break; + + case SpvExecutionModeInputPoints: + case SpvExecutionModeInputLines: + case SpvExecutionModeInputLinesAdjacency: + case SpvExecutionModeInputTriangles: + case SpvExecutionModeInputTrianglesAdjacency: + case SpvExecutionModeInputQuads: + case SpvExecutionModeInputIsolines: + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = + vertices_in_from_spv_execution_mode(mode); + } else { + assert(!"Tesselation shaders not yet supported"); + } + break; + + case SpvExecutionModeOutputPoints: + case SpvExecutionModeOutputLineStrip: + case SpvExecutionModeOutputTriangleStrip: + assert(b->shader->stage == MESA_SHADER_GEOMETRY); + b->shader->info.gs.output_primitive = + gl_primitive_from_spv_execution_mode(mode); + break; + + case SpvExecutionModeSpacingEqual: + case SpvExecutionModeSpacingFractionalEven: + case SpvExecutionModeSpacingFractionalOdd: + case SpvExecutionModeVertexOrderCw: + case SpvExecutionModeVertexOrderCcw: + case SpvExecutionModePointMode: + assert(!"TODO: Add tessellation metadata"); + break; + + case SpvExecutionModePixelCenterInteger: + case SpvExecutionModeXfb: + assert(!"Unhandled execution mode"); + break; + + case SpvExecutionModeVecTypeHint: + case SpvExecutionModeContractionOff: + case SpvExecutionModeIndependentForwardProgress: + break; /* OpenCL */ + } + break; + + case SpvOpString: + vtn_push_value(b, w[1], vtn_value_type_string)->str = + vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpName: + b->values[w[1]].name = vtn_string_literal(b, &w[2], count - 2); + break; + + case SpvOpMemberName: + /* TODO */ + break; + + case SpvOpLine: + break; /* Ignored for now */ + + case SpvOpDecorationGroup: + case SpvOpDecorate: + case SpvOpMemberDecorate: + case SpvOpGroupDecorate: + case SpvOpGroupMemberDecorate: + vtn_handle_decoration(b, opcode, w, count); + break; + + case SpvOpTypeVoid: + case SpvOpTypeBool: + case SpvOpTypeInt: + case SpvOpTypeFloat: + case SpvOpTypeVector: + case SpvOpTypeMatrix: + case SpvOpTypeImage: + case SpvOpTypeSampler: + case SpvOpTypeSampledImage: + case SpvOpTypeArray: + case SpvOpTypeRuntimeArray: + case SpvOpTypeStruct: + case SpvOpTypeOpaque: + case SpvOpTypePointer: + case SpvOpTypeFunction: + case SpvOpTypeEvent: + case SpvOpTypeDeviceEvent: + case SpvOpTypeReserveId: + case SpvOpTypeQueue: + case SpvOpTypePipe: + vtn_handle_type(b, opcode, w, count); + break; + + case SpvOpConstantTrue: + case SpvOpConstantFalse: + case SpvOpConstant: + case SpvOpConstantComposite: + case SpvOpConstantSampler: + case SpvOpSpecConstantTrue: + case SpvOpSpecConstantFalse: + case SpvOpSpecConstant: + case SpvOpSpecConstantComposite: + vtn_handle_constant(b, opcode, w, count); + break; + + case SpvOpVariable: + vtn_handle_variables(b, opcode, w, count); + break; + + default: + return false; /* End of preamble */ + } + + return true; +} + +static bool +vtn_handle_first_cfg_pass_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpFunction: { + assert(b->func == NULL); + b->func = rzalloc(b, struct vtn_function); + + const struct glsl_type *result_type = + vtn_value(b, w[1], vtn_value_type_type)->type->type; + struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_function); + const struct glsl_type *func_type = + vtn_value(b, w[4], vtn_value_type_type)->type->type; + + assert(glsl_get_function_return_type(func_type) == result_type); + + nir_function *func = + nir_function_create(b->shader, ralloc_strdup(b->shader, val->name)); + + nir_function_overload *overload = nir_function_overload_create(func); + overload->num_params = glsl_get_length(func_type); + overload->params = ralloc_array(overload, nir_parameter, + overload->num_params); + for (unsigned i = 0; i < overload->num_params; i++) { + const struct glsl_function_param *param = + glsl_get_function_param(func_type, i); + overload->params[i].type = param->type; + if (param->in) { + if (param->out) { + overload->params[i].param_type = nir_parameter_inout; + } else { + overload->params[i].param_type = nir_parameter_in; + } + } else { + if (param->out) { + overload->params[i].param_type = nir_parameter_out; + } else { + assert(!"Parameter is neither in nor out"); + } + } + } + b->func->overload = overload; + break; + } + + case SpvOpFunctionEnd: + b->func->end = w; + b->func = NULL; + break; + + case SpvOpFunctionParameter: + break; /* Does nothing */ + + case SpvOpLabel: { + assert(b->block == NULL); + b->block = rzalloc(b, struct vtn_block); + b->block->label = w; + vtn_push_value(b, w[1], vtn_value_type_block)->block = b->block; + + if (b->func->start_block == NULL) { + /* This is the first block encountered for this function. In this + * case, we set the start block and add it to the list of + * implemented functions that we'll walk later. + */ + b->func->start_block = b->block; + exec_list_push_tail(&b->functions, &b->func->node); + } + break; + } + + case SpvOpBranch: + case SpvOpBranchConditional: + case SpvOpSwitch: + case SpvOpKill: + case SpvOpReturn: + case SpvOpReturnValue: + case SpvOpUnreachable: + assert(b->block); + b->block->branch = w; + b->block = NULL; + break; + + case SpvOpSelectionMerge: + case SpvOpLoopMerge: + assert(b->block && b->block->merge_op == SpvOpNop); + b->block->merge_op = opcode; + b->block->merge_block_id = w[1]; + break; + + default: + /* Continue on as per normal */ + return true; + } + + return true; +} + +static bool +vtn_handle_body_instruction(struct vtn_builder *b, SpvOp opcode, + const uint32_t *w, unsigned count) +{ + switch (opcode) { + case SpvOpLabel: { + struct vtn_block *block = vtn_value(b, w[1], vtn_value_type_block)->block; + assert(block->block == NULL); + + block->block = nir_cursor_current_block(b->nb.cursor); + break; + } + + case SpvOpLoopMerge: + case SpvOpSelectionMerge: + /* This is handled by cfg pre-pass and walk_blocks */ + break; + + case SpvOpUndef: + vtn_push_value(b, w[2], vtn_value_type_undef); + break; + + case SpvOpExtInst: + vtn_handle_extension(b, opcode, w, count); + break; + + case SpvOpVariable: + case SpvOpLoad: + case SpvOpStore: + case SpvOpCopyMemory: + case SpvOpCopyMemorySized: + case SpvOpAccessChain: + case SpvOpInBoundsAccessChain: + case SpvOpArrayLength: + vtn_handle_variables(b, opcode, w, count); + break; + + case SpvOpFunctionCall: + vtn_handle_function_call(b, opcode, w, count); + break; + + case SpvOpSampledImage: + case SpvOpImageSampleImplicitLod: + case SpvOpImageSampleExplicitLod: + case SpvOpImageSampleDrefImplicitLod: + case SpvOpImageSampleDrefExplicitLod: + case SpvOpImageSampleProjImplicitLod: + case SpvOpImageSampleProjExplicitLod: + case SpvOpImageSampleProjDrefImplicitLod: + case SpvOpImageSampleProjDrefExplicitLod: + case SpvOpImageFetch: + case SpvOpImageGather: + case SpvOpImageDrefGather: + case SpvOpImageQuerySizeLod: + case SpvOpImageQuerySize: + case SpvOpImageQueryLod: + case SpvOpImageQueryLevels: + case SpvOpImageQuerySamples: + vtn_handle_texture(b, opcode, w, count); + break; + + case SpvOpImageRead: + case SpvOpImageWrite: + case SpvOpImageTexelPointer: + vtn_handle_image(b, opcode, w, count); + break; + + case SpvOpAtomicExchange: + case SpvOpAtomicCompareExchange: + case SpvOpAtomicCompareExchangeWeak: + case SpvOpAtomicIIncrement: + case SpvOpAtomicIDecrement: + case SpvOpAtomicIAdd: + case SpvOpAtomicISub: + case SpvOpAtomicSMin: + case SpvOpAtomicUMin: + case SpvOpAtomicSMax: + case SpvOpAtomicUMax: + case SpvOpAtomicAnd: + case SpvOpAtomicOr: + case SpvOpAtomicXor: { + struct vtn_value *pointer = vtn_untyped_value(b, w[3]); + if (pointer->value_type == vtn_value_type_image_pointer) { + vtn_handle_image(b, opcode, w, count); + } else { + assert(!"Atomic buffers not yet implemented"); + } + } + + case SpvOpSNegate: + case SpvOpFNegate: + case SpvOpNot: + case SpvOpAny: + case SpvOpAll: + case SpvOpConvertFToU: + case SpvOpConvertFToS: + case SpvOpConvertSToF: + case SpvOpConvertUToF: + case SpvOpUConvert: + case SpvOpSConvert: + case SpvOpFConvert: + case SpvOpConvertPtrToU: + case SpvOpConvertUToPtr: + case SpvOpPtrCastToGeneric: + case SpvOpGenericCastToPtr: + case SpvOpBitcast: + case SpvOpIsNan: + case SpvOpIsInf: + case SpvOpIsFinite: + case SpvOpIsNormal: + case SpvOpSignBitSet: + case SpvOpLessOrGreater: + case SpvOpOrdered: + case SpvOpUnordered: + case SpvOpIAdd: + case SpvOpFAdd: + case SpvOpISub: + case SpvOpFSub: + case SpvOpIMul: + case SpvOpFMul: + case SpvOpUDiv: + case SpvOpSDiv: + case SpvOpFDiv: + case SpvOpUMod: + case SpvOpSRem: + case SpvOpSMod: + case SpvOpFRem: + case SpvOpFMod: + case SpvOpVectorTimesScalar: + case SpvOpDot: + case SpvOpShiftRightLogical: + case SpvOpShiftRightArithmetic: + case SpvOpShiftLeftLogical: + case SpvOpLogicalOr: + case SpvOpLogicalEqual: + case SpvOpLogicalNotEqual: + case SpvOpLogicalAnd: + case SpvOpBitwiseOr: + case SpvOpBitwiseXor: + case SpvOpBitwiseAnd: + case SpvOpSelect: + case SpvOpIEqual: + case SpvOpFOrdEqual: + case SpvOpFUnordEqual: + case SpvOpINotEqual: + case SpvOpFOrdNotEqual: + case SpvOpFUnordNotEqual: + case SpvOpULessThan: + case SpvOpSLessThan: + case SpvOpFOrdLessThan: + case SpvOpFUnordLessThan: + case SpvOpUGreaterThan: + case SpvOpSGreaterThan: + case SpvOpFOrdGreaterThan: + case SpvOpFUnordGreaterThan: + case SpvOpULessThanEqual: + case SpvOpSLessThanEqual: + case SpvOpFOrdLessThanEqual: + case SpvOpFUnordLessThanEqual: + case SpvOpUGreaterThanEqual: + case SpvOpSGreaterThanEqual: + case SpvOpFOrdGreaterThanEqual: + case SpvOpFUnordGreaterThanEqual: + case SpvOpDPdx: + case SpvOpDPdy: + case SpvOpFwidth: + case SpvOpDPdxFine: + case SpvOpDPdyFine: + case SpvOpFwidthFine: + case SpvOpDPdxCoarse: + case SpvOpDPdyCoarse: + case SpvOpFwidthCoarse: + vtn_handle_alu(b, opcode, w, count); + break; + + case SpvOpTranspose: + case SpvOpOuterProduct: + case SpvOpMatrixTimesScalar: + case SpvOpVectorTimesMatrix: + case SpvOpMatrixTimesVector: + case SpvOpMatrixTimesMatrix: + vtn_handle_matrix_alu(b, opcode, w, count); + break; + + case SpvOpVectorExtractDynamic: + case SpvOpVectorInsertDynamic: + case SpvOpVectorShuffle: + case SpvOpCompositeConstruct: + case SpvOpCompositeExtract: + case SpvOpCompositeInsert: + case SpvOpCopyObject: + vtn_handle_composite(b, opcode, w, count); + break; + + case SpvOpPhi: + vtn_handle_phi_first_pass(b, w); + break; + + case SpvOpEmitVertex: + case SpvOpEndPrimitive: + case SpvOpEmitStreamVertex: + case SpvOpEndStreamPrimitive: + case SpvOpControlBarrier: + case SpvOpMemoryBarrier: + vtn_handle_barrier(b, opcode, w, count); + break; + + default: + unreachable("Unhandled opcode"); + } + + return true; +} + +static void +vtn_walk_blocks(struct vtn_builder *b, struct vtn_block *start, + struct vtn_block *break_block, struct vtn_block *cont_block, + struct vtn_block *end_block) +{ + struct vtn_block *block = start; + while (block != end_block) { + if (block->merge_op == SpvOpLoopMerge) { + /* This is the jump into a loop. */ + struct vtn_block *new_cont_block = block; + struct vtn_block *new_break_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + nir_loop *loop = nir_loop_create(b->shader); + nir_cf_node_insert(b->nb.cursor, &loop->cf_node); + + /* Reset the merge_op to prerevent infinite recursion */ + block->merge_op = SpvOpNop; + + b->nb.cursor = nir_after_cf_list(&loop->body); + vtn_walk_blocks(b, block, new_break_block, new_cont_block, NULL); + + b->nb.cursor = nir_after_cf_node(&loop->cf_node); + block = new_break_block; + continue; + } + + const uint32_t *w = block->branch; + SpvOp branch_op = w[0] & SpvOpCodeMask; + + b->block = block; + vtn_foreach_instruction(b, block->label, block->branch, + vtn_handle_body_instruction); + + nir_block *cur_block = nir_cursor_current_block(b->nb.cursor); + assert(cur_block == block->block); + _mesa_hash_table_insert(b->block_table, cur_block, block); + + switch (branch_op) { + case SpvOpBranch: { + struct vtn_block *branch_block = + vtn_value(b, w[1], vtn_value_type_block)->block; + + if (branch_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (branch_block == end_block) { + /* We're branching to the merge block of an if, since for loops + * and functions end_block == NULL, so we're done here. + */ + return; + } else { + /* We're branching to another block, and according to the rules, + * we can only branch to another block with one predecessor (so + * we're the only one jumping to it) so we can just process it + * next. + */ + block = branch_block; + continue; + } + } + + case SpvOpBranchConditional: { + /* Gather up the branch blocks */ + struct vtn_block *then_block = + vtn_value(b, w[2], vtn_value_type_block)->block; + struct vtn_block *else_block = + vtn_value(b, w[3], vtn_value_type_block)->block; + + nir_if *if_stmt = nir_if_create(b->shader); + if_stmt->condition = nir_src_for_ssa(vtn_ssa_value(b, w[1])->def); + nir_cf_node_insert(b->nb.cursor, &if_stmt->cf_node); + + if (then_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else if (then_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->then_list, + &jump->instr); + block = else_block; + } else if (else_block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_instr_insert_after_cf_list(&if_stmt->else_list, + &jump->instr); + block = then_block; + } else { + /* According to the rules we're branching to two blocks that don't + * have any other predecessors, so we can handle this as a + * conventional if. + */ + assert(block->merge_op == SpvOpSelectionMerge); + struct vtn_block *merge_block = + vtn_value(b, block->merge_block_id, vtn_value_type_block)->block; + + b->nb.cursor = nir_after_cf_list(&if_stmt->then_list); + vtn_walk_blocks(b, then_block, break_block, cont_block, merge_block); + + b->nb.cursor = nir_after_cf_list(&if_stmt->else_list); + vtn_walk_blocks(b, else_block, break_block, cont_block, merge_block); + + b->nb.cursor = nir_after_cf_node(&if_stmt->cf_node); + block = merge_block; + continue; + } + + /* If we got here then we inserted a predicated break or continue + * above and we need to handle the other case. We already set + * `block` above to indicate what block to visit after the + * predicated break. + */ + + /* It's possible that the other branch is also a break/continue. + * If it is, we handle that here. + */ + if (block == break_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_break); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } else if (block == cont_block) { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_continue); + nir_builder_instr_insert(&b->nb, &jump->instr); + + return; + } + + /* If we got here then there was a predicated break/continue but + * the other half of the if has stuff in it. `block` was already + * set above so there is nothing left for us to do. + */ + continue; + } + + case SpvOpReturn: { + nir_jump_instr *jump = nir_jump_instr_create(b->shader, + nir_jump_return); + nir_builder_instr_insert(&b->nb, &jump->instr); + return; + } + + case SpvOpKill: { + nir_intrinsic_instr *discard = + nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard); + nir_builder_instr_insert(&b->nb, &discard->instr); + return; + } + + case SpvOpSwitch: + case SpvOpReturnValue: + case SpvOpUnreachable: + default: + unreachable("Unhandled opcode"); + } + } +} + +nir_shader * +spirv_to_nir(const uint32_t *words, size_t word_count, + gl_shader_stage stage, + const nir_shader_compiler_options *options) +{ + const uint32_t *word_end = words + word_count; + + /* Handle the SPIR-V header (first 4 dwords) */ + assert(word_count > 5); + + assert(words[0] == SpvMagicNumber); + assert(words[1] == 99); + /* words[2] == generator magic */ + unsigned value_id_bound = words[3]; + assert(words[4] == 0); + + words+= 5; + + nir_shader *shader = nir_shader_create(NULL, stage, options); + + /* Initialize the stn_builder object */ + struct vtn_builder *b = rzalloc(NULL, struct vtn_builder); + b->shader = shader; + b->value_id_bound = value_id_bound; + b->values = rzalloc_array(b, struct vtn_value, value_id_bound); + exec_list_make_empty(&b->functions); + + /* XXX: We shouldn't need these defaults */ + if (b->shader->stage == MESA_SHADER_GEOMETRY) { + b->shader->info.gs.vertices_in = 3; + b->shader->info.gs.output_primitive = 4; /* GL_TRIANGLES */ + } + + /* Handle all the preamble instructions */ + words = vtn_foreach_instruction(b, words, word_end, + vtn_handle_preamble_instruction); + + /* Do a very quick CFG analysis pass */ + vtn_foreach_instruction(b, words, word_end, + vtn_handle_first_cfg_pass_instruction); + + foreach_list_typed(struct vtn_function, func, node, &b->functions) { + b->impl = nir_function_impl_create(func->overload); + b->const_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + b->block_table = _mesa_hash_table_create(b, _mesa_hash_pointer, + _mesa_key_pointer_equal); + nir_builder_init(&b->nb, b->impl); + b->nb.cursor = nir_after_cf_list(&b->impl->body); + vtn_walk_blocks(b, func->start_block, NULL, NULL, NULL); + vtn_foreach_instruction(b, func->start_block->label, func->end, + vtn_handle_phi_second_pass); + } + + /* Because we can still have output reads in NIR, we need to lower + * outputs to temporaries before we are truely finished. + */ + nir_lower_outputs_to_temporaries(shader); + + ralloc_free(b); + + return shader; +} diff --cc src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp index 7fa4ce87f18,e684bdbb72c..b8990cef89e --- a/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp +++ b/src/mesa/drivers/dri/i965/brw_blorp_blit_eu.cpp @@@ -85,7 -85,7 +85,7 @@@ brw_blorp_eu_emitter::emit_texture_look unsigned msg_length) { fs_inst *inst = new (mem_ctx) fs_inst(op, 16, dst, brw_message_reg(base_mrf), - fs_reg(0u), fs_reg(0u)); - brw_imm_ud(0u)); ++ brw_imm_ud(0u), brw_imm_ud(0u)); inst->base_mrf = base_mrf; inst->mlen = msg_length; diff --cc src/mesa/drivers/dri/i965/brw_fs.h index 9b56afd292f,2d408b2f363..658608f9951 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@@ -230,9 -224,6 +224,8 @@@ public fs_reg mcs, int gather_component, bool is_cube_array, - bool is_rect, + uint32_t surface, + fs_reg surface_reg, uint32_t sampler, fs_reg sampler_reg); fs_reg emit_mcs_fetch(const fs_reg &coordinate, unsigned components, diff --cc src/mesa/drivers/dri/i965/brw_fs_nir.cpp index 3a666b8debc,c439da2ec50..6b0c4a5b36e --- a/src/mesa/drivers/dri/i965/brw_fs_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_nir.cpp @@@ -2522,10 -2649,8 +2649,10 @@@ fs_visitor::nir_emit_ssbo_atomic(const void fs_visitor::nir_emit_texture(const fs_builder &bld, nir_tex_instr *instr) { + unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; - fs_reg texture_reg(texture); - fs_reg sampler_reg(sampler); ++ fs_reg texture_reg(brw_imm_ud(texture)); + fs_reg sampler_reg(brw_imm_ud(sampler)); int gather_component = instr->component; @@@ -2602,17 -2726,9 +2728,17 @@@ } brw_mark_surface_used(prog_data, max_used); + /* Emit code to evaluate the actual indexing expression */ + texture_reg = vgrf(glsl_type::uint_type); - bld.ADD(texture_reg, src, fs_reg(texture)); ++ bld.ADD(texture_reg, src, brw_imm_ud(texture)); + texture_reg = bld.emit_uniformize(texture_reg); + break; + } + + case nir_tex_src_sampler_offset: { /* Emit code to evaluate the actual indexing expression */ sampler_reg = vgrf(glsl_type::uint_type); - bld.ADD(sampler_reg, src, fs_reg(sampler)); + bld.ADD(sampler_reg, src, brw_imm_ud(sampler)); sampler_reg = bld.emit_uniformize(sampler_reg); break; } @@@ -2622,12 -2738,13 +2748,13 @@@ } } - if (instr->op == nir_texop_txf_ms) { + if (instr->op == nir_texop_txf_ms || + instr->op == nir_texop_samples_identical) { if (devinfo->gen >= 7 && - key_tex->compressed_multisample_layout_mask & (1 << sampler)) { - mcs = emit_mcs_fetch(coordinate, instr->coord_components, sampler_reg); + key_tex->compressed_multisample_layout_mask & (1 << texture)) { + mcs = emit_mcs_fetch(coordinate, instr->coord_components, texture_reg); } else { - mcs = fs_reg(0u); + mcs = brw_imm_ud(0u); } } @@@ -2675,8 -2793,7 +2803,7 @@@ emit_texture(op, dest_type, coordinate, instr->coord_components, shadow_comparitor, lod, lod2, lod_components, sample_index, tex_offset, mcs, gather_component, - is_cube_array, is_rect, - texture, texture_reg, sampler, sampler_reg); - is_cube_array, sampler, sampler_reg); ++ is_cube_array, texture, texture_reg, sampler, sampler_reg); fs_reg dest = get_nir_dest(instr->dest); dest.type = this->result.type; diff --cc src/mesa/drivers/dri/i965/brw_fs_visitor.cpp index 2647a40c730,1e202165cb6..e82acd141f3 --- a/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp @@@ -203,7 -87,7 +87,7 @@@ fs_visitor::emit_mcs_fetch(const fs_re const fs_reg dest = vgrf(glsl_type::uvec4_type); const fs_reg srcs[] = { coordinate, fs_reg(), fs_reg(), fs_reg(), fs_reg(), fs_reg(), - texture, texture, fs_reg(), fs_reg(components), fs_reg(0) - sampler, fs_reg(), brw_imm_ud(components), brw_imm_d(0) ++ texture, texture, fs_reg(), brw_imm_ud(components), brw_imm_d(0) }; fs_inst *inst = bld.emit(SHADER_OPCODE_TXF_MCS_LOGICAL, dest, srcs, ARRAY_SIZE(srcs)); @@@ -227,9 -111,6 +111,8 @@@ fs_visitor::emit_texture(ir_texture_opc fs_reg mcs, int gather_component, bool is_cube_array, - bool is_rect, + uint32_t surface, + fs_reg surface_reg, uint32_t sampler, fs_reg sampler_reg) { @@@ -275,8 -150,8 +152,8 @@@ fs_reg dst = vgrf(glsl_type::get_instance(dest_type->base_type, 4, 1)); const fs_reg srcs[] = { coordinate, shadow_c, lod, lod2, - sample_index, mcs, sampler_reg, offset_value, + sample_index, mcs, surface_reg, sampler_reg, offset_value, - fs_reg(coord_components), fs_reg(grad_components) + brw_imm_d(coord_components), brw_imm_d(grad_components) }; enum opcode opcode; @@@ -327,11 -202,18 +204,18 @@@ inst->offset = offset_value.ud; if (op == ir_tg4) { - inst->offset |= - gather_channel(gather_component, surface, sampler) << 16; /* M0.2:16-17 */ + if (gather_component == 1 && - key_tex->gather_channel_quirk_mask & (1 << sampler)) { ++ key_tex->gather_channel_quirk_mask & (1 << surface)) { + /* gather4 sampler is broken for green channel on RG32F -- + * we must ask for blue instead. + */ + inst->offset |= 2 << 16; + } else { + inst->offset |= gather_component << 16; + } if (devinfo->gen == 6) - emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], dst); } /* fixup #layers for cube map arrays */ diff --cc src/mesa/drivers/dri/i965/brw_surface_formats.c index 0d49ab7b431,55e7e649620..69eed4bc629 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.c +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.c @@@ -71,257 -86,258 +72,258 @@@ * - VOL2_Part1 section 2.5.11 Format Conversion (vertex fetch). * - VOL4_Part1 section 2.12.2.1.2 Sampler Output Channel Mapping. * - VOL4_Part1 section 3.9.11 Render Target Write. + * - Render Target Surface Types [SKL+] */ -const struct surface_format_info surface_formats[] = { +const struct brw_surface_format_info surface_formats[] = { - /* smpl filt shad CK RT AB VB SO color */ - SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32B32A32_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32B32A32_UINT) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R64G64_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, R32G32B32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32A32_USCALED) - SF( x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU) - SF( Y, 50, x, x, x, x, Y, Y, x, R32G32B32_FLOAT) - SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_SINT) - SF( Y, x, x, x, x, x, Y, Y, x, R32G32B32_UINT) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32B32_USCALED) - SF( x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED) - SF( Y, Y, x, x, Y, 45, Y, x, 60, R16G16B16A16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16B16A16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16B16A16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16B16A16_FLOAT) - SF( Y, 50, x, x, Y, Y, Y, Y, x, R32G32_FLOAT) - SF( Y, 70, x, x, Y, Y, Y, Y, x, R32G32_FLOAT_LD) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32G32_UINT) - SF( Y, 50, Y, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) - SF( Y, 50, x, x, x, x, x, x, x, L32A32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R32G32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32G32_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, R16G16B16X16_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, A32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, L32X32_FLOAT) - SF( Y, 50, x, x, x, x, x, x, x, I32X32_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16A16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32G32_USCALED) - SF( x, x, x, x, x, x, x, x, x, R32G32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, R64_PASSTHRU) - SF( Y, Y, x, Y, Y, Y, Y, x, 60, B8G8R8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B8G8R8A8_UNORM_SRGB) - /* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, Y, Y, Y, x, 60, R10G10B10A2_UNORM) - SF( Y, Y, x, x, x, x, x, x, 60, R10G10B10A2_UNORM_SRGB) - SF( Y, x, x, x, Y, x, Y, x, x, R10G10B10A2_UINT) - SF( Y, Y, x, x, x, Y, Y, x, x, R10G10B10_SNORM_A2_UNORM) - SF( Y, Y, x, x, Y, Y, Y, x, 60, R8G8B8A8_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, R8G8B8A8_UNORM_SRGB) - SF( Y, Y, x, x, Y, 60, Y, x, x, R8G8B8A8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8B8A8_UINT) - SF( Y, Y, x, x, Y, 45, Y, x, x, R16G16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R16G16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R16G16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, R16G16_FLOAT) - SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, 60, B10G10R10A2_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, R11G11B10_FLOAT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32_SINT) - SF( Y, x, x, x, Y, x, Y, Y, x, R32_UINT) - SF( Y, 50, Y, x, Y, Y, Y, Y, x, R32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) - SF( Y, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) - SF( Y, Y, x, x, x, x, x, x, x, L16A16_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, I24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, L24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, A24X8_UNORM) - SF( Y, 50, Y, x, x, x, x, x, x, I32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, L32_FLOAT) - SF( Y, 50, Y, x, x, x, x, x, x, A32_FLOAT) - SF( Y, Y, x, Y, x, x, x, x, 60, B8G8R8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) - SF( Y, Y, x, x, x, x, x, x, x, B10G10R10X2_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, L16A16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R32_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R32_SNORM) - /* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, R10G10B10X2_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8A8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R32_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R32_USCALED) - SF( Y, Y, x, Y, Y, Y, x, x, x, B5G6R5_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B5G6R5_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, B5G5R5A1_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B5G5R5A1_UNORM_SRGB) - SF( Y, Y, x, Y, Y, Y, x, x, x, B4G4R4A4_UNORM) - SF( Y, Y, x, x, Y, Y, x, x, x, B4G4R4A4_UNORM_SRGB) - SF( Y, Y, x, x, Y, Y, Y, x, x, R8G8_UNORM) - SF( Y, Y, x, Y, Y, 60, Y, x, x, R8G8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R8G8_UINT) - SF( Y, Y, Y, x, Y, 45, Y, x, 70, R16_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R16_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R16_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R16_UINT) - SF( Y, Y, x, x, Y, Y, Y, x, x, R16_FLOAT) - SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) - SF( Y, Y, Y, x, x, x, x, x, x, I16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, L16_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, A16_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, L8A8_UNORM) - SF( Y, Y, Y, x, x, x, x, x, x, I16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, L16_FLOAT) - SF( Y, Y, Y, x, x, x, x, x, x, A16_FLOAT) - SF(45, 45, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) - SF( Y, Y, x, Y, x, x, x, x, x, R5G5_SNORM_B6_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM) - SF( x, x, x, x, Y, Y, x, x, x, B5G5R5X1_UNORM_SRGB) - SF( x, x, x, x, x, x, Y, x, x, R8G8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8_USCALED) - /* smpl filt shad CK RT AB VB SO color */ - SF( x, x, x, x, x, x, Y, x, x, R16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16_USCALED) - SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) - SF(50, 50, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) - SF( x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) - SF( x, x, x, x, x, x, x, x, x, L8A8_UINT) - SF( x, x, x, x, x, x, x, x, x, L8A8_SINT) - SF( Y, Y, x, 45, Y, Y, Y, x, x, R8_UNORM) - SF( Y, Y, x, x, Y, 60, Y, x, x, R8_SNORM) - SF( Y, x, x, x, Y, x, Y, x, x, R8_SINT) - SF( Y, x, x, x, Y, x, Y, x, x, R8_UINT) - SF( Y, Y, x, Y, Y, Y, x, x, x, A8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, I8_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, L8_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, P4A4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, A4P4_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8_USCALED) - SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, L8_UNORM_SRGB) - SF(45, 45, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) - SF(45, 45, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) - SF( x, x, x, x, x, x, x, x, x, Y8_SNORM) - SF( x, x, x, x, x, x, x, x, x, L8_UINT) - SF( x, x, x, x, x, x, x, x, x, L8_SINT) - SF( x, x, x, x, x, x, x, x, x, I8_UINT) - SF( x, x, x, x, x, x, x, x, x, I8_SINT) - SF(45, 45, x, x, x, x, x, x, x, DXT1_RGB_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, R1_UINT) - SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_NORMAL) - SF( Y, Y, x, Y, Y, x, x, x, 60, YCRCB_SWAPUVY) - SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) - SF(45, 45, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) - SF( Y, Y, x, Y, x, x, x, x, x, BC1_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BC2_UNORM) - SF( Y, Y, x, Y, x, x, x, x, x, BC3_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC4_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC5_UNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC1_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BC2_UNORM_SRGB) - SF( Y, Y, x, x, x, x, x, x, x, BC3_UNORM_SRGB) - SF( Y, x, x, x, x, x, x, x, x, MONO8) - SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPUV) - SF( Y, Y, x, x, Y, x, x, x, 60, YCRCB_SWAPY) - SF( Y, Y, x, x, x, x, x, x, x, DXT1_RGB) - /* smpl filt shad CK RT AB VB SO color */ - SF( Y, Y, x, x, x, x, x, x, x, FXT1) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R8G8B8_USCALED) - SF( x, x, x, x, x, x, Y, x, x, R64G64B64A64_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R64G64B64_FLOAT) - SF( Y, Y, x, x, x, x, x, x, x, BC4_SNORM) - SF( Y, Y, x, x, x, x, x, x, x, BC5_SNORM) - SF(50, 50, x, x, x, x, 60, x, x, R16G16B16_FLOAT) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_UNORM) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SNORM) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_SSCALED) - SF( x, x, x, x, x, x, Y, x, x, R16G16B16_USCALED) - SF(70, 70, x, x, x, x, x, x, x, BC6H_SF16) - SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM) - SF(70, 70, x, x, x, x, x, x, x, BC7_UNORM_SRGB) - SF(70, 70, x, x, x, x, x, x, x, BC6H_UF16) - SF( x, x, x, x, x, x, x, x, x, PLANAR_420_8) - SF( x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) - SF( x, x, x, x, x, x, x, x, x, ETC1_RGB8) - SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8) - SF( x, x, x, x, x, x, x, x, x, EAC_R11) - SF( x, x, x, x, x, x, x, x, x, EAC_RG11) - SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) - SF( x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) - SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8) - SF( x, x, x, x, x, x, x, x, x, R16G16B16_UINT) - SF( x, x, x, x, x, x, x, x, x, R16G16B16_SINT) - SF( x, x, x, x, x, x, x, x, x, R32_SFIXED) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT) - SF( x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) - SF( x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) - SF( x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) - SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) - SF( x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) - SF( x, x, x, x, x, x, x, x, x, R8G8B8_UINT) - SF( x, x, x, x, x, x, x, x, x, R8G8B8_SINT) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB) - SF(80, 80, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB) + /* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32B32A32_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32B32A32_UINT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R64G64_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, R32G32B32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32A32_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R32G32B32A32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, x, R64G64_PASSTHRU) + SF( Y, 50, x, x, x, x, Y, Y, x, x, R32G32B32_FLOAT) + SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_SINT) + SF( Y, x, x, x, x, x, Y, Y, x, x, R32G32B32_UINT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32B32_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R32G32B32_SFIXED) + SF( Y, Y, x, x, Y, 45, Y, x, 60, 90, R16G16B16A16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16B16A16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16B16A16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16B16A16_FLOAT) + SF( Y, 50, x, x, Y, Y, Y, Y, x, 90, R32G32_FLOAT) + SF( Y, 70, x, x, Y, Y, Y, Y, x, x, R32G32_FLOAT_LD) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32G32_UINT) + SF( Y, 50, Y, x, x, x, x, x, x, x, R32_FLOAT_X8X24_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, x, X32_TYPELESS_G8X24_UINT) + SF( Y, 50, x, x, x, x, x, x, x, x, L32A32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, x, R16G16B16X16_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, 90, R16G16B16X16_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, A32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, L32X32_FLOAT) + SF( Y, 50, x, x, x, x, x, x, x, x, I32X32_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16A16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32G32_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R32G32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, x, R64_PASSTHRU) + SF( Y, Y, x, Y, Y, Y, Y, x, 60, 90, B8G8R8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B8G8R8A8_UNORM_SRGB) + /* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( Y, Y, x, x, Y, Y, Y, x, 60, x, R10G10B10A2_UNORM) + SF( Y, Y, x, x, x, x, x, x, 60, x, R10G10B10A2_UNORM_SRGB) + SF( Y, x, x, x, Y, x, Y, x, x, x, R10G10B10A2_UINT) + SF( Y, Y, x, x, x, Y, Y, x, x, x, R10G10B10_SNORM_A2_UNORM) + SF( Y, Y, x, x, Y, Y, Y, x, 60, 90, R8G8B8A8_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, R8G8B8A8_UNORM_SRGB) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R8G8B8A8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R8G8B8A8_UINT) + SF( Y, Y, x, x, Y, 45, Y, x, x, 90, R16G16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, 90, R16G16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, 90, R16G16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, 90, R16G16_FLOAT) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, 60, x, B10G10R10A2_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, x, R11G11B10_FLOAT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_SINT) + SF( Y, x, x, x, Y, x, Y, Y, x, 90, R32_UINT) + SF( Y, 50, Y, x, Y, Y, Y, Y, x, 90, R32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, R24_UNORM_X8_TYPELESS) + SF( Y, x, x, x, x, x, x, x, x, x, X24_TYPELESS_G8_UINT) + SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, I24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, L24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, A24X8_UNORM) + SF( Y, 50, Y, x, x, x, x, x, x, x, I32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, L32_FLOAT) + SF( Y, 50, Y, x, x, x, x, x, x, x, A32_FLOAT) + SF( Y, Y, x, Y, x, x, x, x, 60, 90, B8G8R8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, B8G8R8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, R8G8B8X8_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, R9G9B9E5_SHAREDEXP) + SF( Y, Y, x, x, x, x, x, x, x, x, B10G10R10X2_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, L16A16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R32_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R32_SNORM) + /* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( x, x, x, x, x, x, Y, x, x, x, R10G10B10X2_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8A8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R32_USCALED) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G6R5_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G6R5_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, B5G5R5A1_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B5G5R5A1_UNORM_SRGB) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, B4G4R4A4_UNORM) + SF( Y, Y, x, x, Y, Y, x, x, x, x, B4G4R4A4_UNORM_SRGB) + SF( Y, Y, x, x, Y, Y, Y, x, x, x, R8G8_UNORM) + SF( Y, Y, x, Y, Y, 60, Y, x, x, x, R8G8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8G8_UINT) + SF( Y, Y, Y, x, Y, 45, Y, x, 70, x, R16_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, x, R16_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, x, R16_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, x, R16_UINT) + SF( Y, Y, x, x, Y, Y, Y, x, x, x, R16_FLOAT) + SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, x, A8P8_UNORM_PALETTE1) + SF( Y, Y, Y, x, x, x, x, x, x, x, I16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, L16_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, A16_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, L8A8_UNORM) + SF( Y, Y, Y, x, x, x, x, x, x, x, I16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, x, L16_FLOAT) + SF( Y, Y, Y, x, x, x, x, x, x, x, A16_FLOAT) + SF(45, 45, x, x, x, x, x, x, x, x, L8A8_UNORM_SRGB) + SF( Y, Y, x, Y, x, x, x, x, x, x, R5G5_SNORM_B6_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM) + SF( x, x, x, x, Y, Y, x, x, x, x, B5G5R5X1_UNORM_SRGB) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8_USCALED) + /* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( x, x, x, x, x, x, Y, x, x, x, R16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16_USCALED) + SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE0) + SF(50, 50, x, x, x, x, x, x, x, x, P8A8_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, x, A1B5G5R5_UNORM) + SF( x, x, x, x, x, x, x, x, x, x, A4B4G4R4_UNORM) + SF( x, x, x, x, x, x, x, x, x, x, L8A8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, L8A8_SINT) + SF( Y, Y, x, 45, Y, Y, Y, x, x, x, R8_UNORM) + SF( Y, Y, x, x, Y, 60, Y, x, x, x, R8_SNORM) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8_SINT) + SF( Y, x, x, x, Y, x, Y, x, x, x, R8_UINT) + SF( Y, Y, x, Y, Y, Y, x, x, x, x, A8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, I8_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, L8_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, P4A4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, A4P4_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8_USCALED) + SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, x, L8_UNORM_SRGB) + SF(45, 45, x, x, x, x, x, x, x, x, P8_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, x, P4A4_UNORM_PALETTE1) + SF(45, 45, x, x, x, x, x, x, x, x, A4P4_UNORM_PALETTE1) + SF( x, x, x, x, x, x, x, x, x, x, Y8_SNORM) + SF( x, x, x, x, x, x, x, x, x, x, L8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, L8_SINT) + SF( x, x, x, x, x, x, x, x, x, x, I8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, I8_SINT) + SF(45, 45, x, x, x, x, x, x, x, x, DXT1_RGB_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, R1_UINT) + SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_NORMAL) + SF( Y, Y, x, Y, Y, x, x, x, 60, x, YCRCB_SWAPUVY) + SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE0) + SF(45, 45, x, x, x, x, x, x, x, x, P2_UNORM_PALETTE1) + SF( Y, Y, x, Y, x, x, x, x, x, x, BC1_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, BC2_UNORM) + SF( Y, Y, x, Y, x, x, x, x, x, x, BC3_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC4_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC5_UNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC1_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, BC2_UNORM_SRGB) + SF( Y, Y, x, x, x, x, x, x, x, x, BC3_UNORM_SRGB) + SF( Y, x, x, x, x, x, x, x, x, x, MONO8) + SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPUV) + SF( Y, Y, x, x, Y, x, x, x, 60, x, YCRCB_SWAPY) + SF( Y, Y, x, x, x, x, x, x, x, x, DXT1_RGB) + /* smpl filt shad CK RT AB VB SO color ccs_e */ + SF( Y, Y, x, x, x, x, x, x, x, x, FXT1) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R8G8B8_USCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64A64_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R64G64B64_FLOAT) + SF( Y, Y, x, x, x, x, x, x, x, x, BC4_SNORM) + SF( Y, Y, x, x, x, x, x, x, x, x, BC5_SNORM) + SF(50, 50, x, x, x, x, 60, x, x, x, R16G16B16_FLOAT) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_UNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SNORM) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_SSCALED) + SF( x, x, x, x, x, x, Y, x, x, x, R16G16B16_USCALED) + SF(70, 70, x, x, x, x, x, x, x, x, BC6H_SF16) + SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM) + SF(70, 70, x, x, x, x, x, x, x, x, BC7_UNORM_SRGB) + SF(70, 70, x, x, x, x, x, x, x, x, BC6H_UF16) + SF( x, x, x, x, x, x, x, x, x, x, PLANAR_420_8) + SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UNORM_SRGB) + SF( x, x, x, x, x, x, x, x, x, x, ETC1_RGB8) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8) + SF( x, x, x, x, x, x, x, x, x, x, EAC_R11) + SF( x, x, x, x, x, x, x, x, x, x, EAC_RG11) + SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_R11) + SF( x, x, x, x, x, x, x, x, x, x, EAC_SIGNED_RG11) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8) + SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_UINT) + SF( x, x, x, x, x, x, x, x, x, x, R16G16B16_SINT) + SF( x, x, x, x, x, x, x, x, x, x, R32_SFIXED) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, x, R10G10B10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SNORM) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_USCALED) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SSCALED) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_UINT) + SF( x, x, x, x, x, x, x, x, x, x, B10G10R10A2_SINT) + SF( x, x, x, x, x, x, x, x, x, x, R64G64B64A64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, x, R64G64B64_PASSTHRU) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_RGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_SRGB8_PTA) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_RGBA8) + SF( x, x, x, x, x, x, x, x, x, x, ETC2_EAC_SRGB8_A8) + SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_UINT) + SF( x, x, x, x, x, x, x, x, x, x, R8G8B8_SINT) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_FLT16) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_4x4_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x4_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_5x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_6x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_8x8_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x5_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x6_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x8_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_10x10_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x10_U8sRGB) + SF(80, 80, x, x, x, x, x, x, x, x, ASTC_LDR_2D_12x12_U8sRGB) }; #undef x #undef Y @@@ -771,6 -787,26 +773,26 @@@ brw_render_target_supported(struct brw_ return brw->format_supported_as_render_target[format]; } + /* + * True if the underlying hardware format can support lossless color + * compression. + */ + bool + brw_losslessly_compressible_format(struct brw_context *brw, + uint32_t brw_format) + { - const struct surface_format_info * const sinfo = ++ const struct brw_surface_format_info * const sinfo = + &surface_formats[brw_format]; + const int gen = brw->gen * 10; + + assert(brw->gen >= 9); + + if (gen >= sinfo->lossless_compression) + return true; + + return false; + } + GLuint translate_tex_format(struct brw_context *brw, mesa_format mesa_format, diff --cc src/mesa/drivers/dri/i965/brw_surface_formats.h index 5c7b60e680b,00000000000..a5cd49f5260 mode 100644,000000..100644 --- a/src/mesa/drivers/dri/i965/brw_surface_formats.h +++ b/src/mesa/drivers/dri/i965/brw_surface_formats.h @@@ -1,40 -1,0 +1,41 @@@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#pragma once + +struct brw_surface_format_info { + bool exists; + int sampling; + int filtering; + int shadow_compare; + int chroma_key; + int render_target; + int alpha_blend; + int input_vb; + int streamed_output_vb; + int color_processing; ++ int lossless_compression; + const char *name; +}; + +extern const struct brw_surface_format_info surface_formats[]; diff --cc src/mesa/drivers/dri/i965/brw_vec4.h index 52d68c5a33d,3f674326284..f94f7128a07 --- a/src/mesa/drivers/dri/i965/brw_vec4.h +++ b/src/mesa/drivers/dri/i965/brw_vec4.h @@@ -273,11 -273,8 +273,9 @@@ public src_reg offset_value, src_reg mcs, bool is_cube_array, + uint32_t surface, src_reg surface_reg, uint32_t sampler, src_reg sampler_reg); - uint32_t gather_channel(unsigned gather_component, - uint32_t surface, uint32_t sampler); src_reg emit_mcs_fetch(const glsl_type *coordinate_type, src_reg coordinate, src_reg sampler); void emit_gen6_gather_wa(uint8_t wa, dst_reg dst); diff --cc src/mesa/drivers/dri/i965/brw_vec4_nir.cpp index bf098b41590,c777acf70a7..260b515ad42 --- a/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_nir.cpp @@@ -1580,10 -1575,8 +1581,10 @@@ glsl_type_for_nir_alu_type(nir_alu_typ void vec4_visitor::nir_emit_texture(nir_tex_instr *instr) { + unsigned texture = instr->texture_index; unsigned sampler = instr->sampler_index; - src_reg texture_reg = src_reg(texture); - src_reg sampler_reg = src_reg(sampler); ++ src_reg texture_reg = brw_imm_ud(texture); + src_reg sampler_reg = brw_imm_ud(sampler); src_reg coordinate; const glsl_type *coord_type = NULL; src_reg shadow_comparitor; @@@ -1690,15 -1666,6 +1673,15 @@@ brw_mark_surface_used(&prog_data->base, max_used); + /* Emit code to evaluate the actual indexing expression */ + src_reg src = get_nir_src(instr->src[i].src, 1); + src_reg temp(this, glsl_type::uint_type); - emit(ADD(dst_reg(temp), src, src_reg(texture))); ++ emit(ADD(dst_reg(temp), src, brw_imm_ud(texture))); + texture_reg = emit_uniformize(temp); + break; + } + + case nir_tex_src_sampler_offset: { /* Emit code to evaluate the actual indexing expression */ src_reg src = get_nir_src(instr->src[i].src, 1); src_reg temp(this, glsl_type::uint_type); @@@ -1727,8 -1705,17 +1721,17 @@@ } /* Stuff the channel select bits in the top of the texture offset */ - if (instr->op == nir_texop_tg4) - constant_offset |= gather_channel(instr->component, texture, sampler) << 16; + if (instr->op == nir_texop_tg4) { + if (instr->component == 1 && - (key_tex->gather_channel_quirk_mask & (1 << sampler))) { ++ (key_tex->gather_channel_quirk_mask & (1 << texture))) { + /* gather4 sampler is broken for green channel on RG32F -- + * we must ask for blue instead. + */ + constant_offset |= 2 << 16; + } else { + constant_offset |= instr->component << 16; + } + } ir_texture_opcode op = ir_texture_opcode_for_nir_texop(instr->op); diff --cc src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp index 6b8798da71c,04ea1775ceb..caf1ee02bf0 --- a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp @@@ -1066,11 -1068,16 +1072,16 @@@ vec4_visitor::emit_texture(ir_texture_o } if (devinfo->gen == 6 && op == ir_tg4) { - emit_gen6_gather_wa(key_tex->gen6_gather_wa[sampler], inst->dst); + emit_gen6_gather_wa(key_tex->gen6_gather_wa[surface], inst->dst); } - swizzle_result(op, dest, - src_reg(inst->dst), sampler, dest_type); + if (op == ir_query_levels) { + /* # levels is in .w */ + src_reg swizzled(dest); + swizzled.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, + SWIZZLE_W, SWIZZLE_W); + emit(MOV(dest, swizzled)); + } } /** diff --cc src/vulkan/anv_pipeline.c index 1193d1e7a5d,00000000000..3d9e0705626 mode 100644,000000..100644 --- a/src/vulkan/anv_pipeline.c +++ b/src/vulkan/anv_pipeline.c @@@ -1,1169 -1,0 +1,1158 @@@ +/* + * Copyright © 2015 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include +#include +#include +#include +#include + +#include "anv_private.h" +#include "brw_nir.h" +#include "anv_nir.h" +#include "glsl/nir/nir_spirv.h" + +/* Needed for SWIZZLE macros */ +#include "program/prog_instruction.h" + +// Shader functions + +VkResult anv_CreateShaderModule( + VkDevice _device, + const VkShaderModuleCreateInfo* pCreateInfo, + VkShaderModule* pShaderModule) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + struct anv_shader_module *module; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + module = anv_device_alloc(device, sizeof(*module) + pCreateInfo->codeSize, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (module == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + module->nir = NULL; + module->size = pCreateInfo->codeSize; + memcpy(module->data, pCreateInfo->pCode, module->size); + + *pShaderModule = anv_shader_module_to_handle(module); + + return VK_SUCCESS; +} + +void anv_DestroyShaderModule( + VkDevice _device, + VkShaderModule _module) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, _module); + + anv_device_free(device, module); +} + +VkResult anv_CreateShader( + VkDevice _device, + const VkShaderCreateInfo* pCreateInfo, + VkShader* pShader) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader_module, module, pCreateInfo->module); + struct anv_shader *shader; + + assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SHADER_CREATE_INFO); + assert(pCreateInfo->flags == 0); + + const char *name = pCreateInfo->pName ? pCreateInfo->pName : "main"; + size_t name_len = strlen(name); + + shader = anv_device_alloc(device, sizeof(*shader) + name_len + 1, 8, + VK_SYSTEM_ALLOC_TYPE_API_OBJECT); + if (shader == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + shader->module = module, + memcpy(shader->entrypoint, name, name_len + 1); + + *pShader = anv_shader_to_handle(shader); + + return VK_SUCCESS; +} + +void anv_DestroyShader( + VkDevice _device, + VkShader _shader) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_shader, shader, _shader); + + anv_device_free(device, shader); +} + +#define SPIR_V_MAGIC_NUMBER 0x07230203 + +static const gl_shader_stage vk_shader_stage_to_mesa_stage[] = { + [VK_SHADER_STAGE_VERTEX] = MESA_SHADER_VERTEX, + [VK_SHADER_STAGE_TESS_CONTROL] = -1, + [VK_SHADER_STAGE_TESS_EVALUATION] = -1, + [VK_SHADER_STAGE_GEOMETRY] = MESA_SHADER_GEOMETRY, + [VK_SHADER_STAGE_FRAGMENT] = MESA_SHADER_FRAGMENT, + [VK_SHADER_STAGE_COMPUTE] = MESA_SHADER_COMPUTE, +}; + +bool +anv_is_scalar_shader_stage(const struct brw_compiler *compiler, + VkShaderStage stage) +{ - switch (stage) { - case VK_SHADER_STAGE_VERTEX: - return compiler->scalar_vs; - case VK_SHADER_STAGE_GEOMETRY: - return false; - case VK_SHADER_STAGE_FRAGMENT: - case VK_SHADER_STAGE_COMPUTE: - return true; - default: - unreachable("Unsupported shader stage"); - } ++ return compiler->scalar_stage[vk_shader_stage_to_mesa_stage[stage]]; +} + +/* Eventually, this will become part of anv_CreateShader. Unfortunately, + * we can't do that yet because we don't have the ability to copy nir. + */ +static nir_shader * +anv_shader_compile_to_nir(struct anv_device *device, + struct anv_shader *shader, VkShaderStage vk_stage) +{ + if (strcmp(shader->entrypoint, "main") != 0) { + anv_finishme("Multiple shaders per module not really supported"); + } + + gl_shader_stage stage = vk_shader_stage_to_mesa_stage[vk_stage]; + const struct brw_compiler *compiler = + device->instance->physicalDevice.compiler; + const nir_shader_compiler_options *nir_options = + compiler->glsl_compiler_options[stage].NirOptions; + + nir_shader *nir; + if (shader->module->nir) { + /* Some things such as our meta clear/blit code will give us a NIR + * shader directly. In that case, we just ignore the SPIR-V entirely + * and just use the NIR shader */ + nir = shader->module->nir; + nir->options = nir_options; + } else { + uint32_t *spirv = (uint32_t *) shader->module->data; + assert(spirv[0] == SPIR_V_MAGIC_NUMBER); + assert(shader->module->size % 4 == 0); + + nir = spirv_to_nir(spirv, shader->module->size / 4, stage, nir_options); + } + nir_validate_shader(nir); + + /* Vulkan uses the separate-shader linking model */ + nir->info.separate_shader = true; + + /* Make sure the provided shader has exactly one entrypoint and that the + * name matches the name that came in from the VkShader. + */ + nir_function_impl *entrypoint = NULL; + nir_foreach_overload(nir, overload) { + if (strcmp(shader->entrypoint, overload->function->name) == 0 && + overload->impl) { + assert(entrypoint == NULL); + entrypoint = overload->impl; + } + } + assert(entrypoint != NULL); + - brw_preprocess_nir(nir, &device->info, - anv_is_scalar_shader_stage(compiler, vk_stage)); ++ nir = brw_preprocess_nir(nir, compiler->scalar_stage[stage]); + + nir_shader_gather_info(nir, entrypoint); + + return nir; +} + +VkResult anv_CreatePipelineCache( + VkDevice device, + const VkPipelineCacheCreateInfo* pCreateInfo, + VkPipelineCache* pPipelineCache) +{ + pPipelineCache->handle = 1; + + stub_return(VK_SUCCESS); +} + +void anv_DestroyPipelineCache( + VkDevice _device, + VkPipelineCache _cache) +{ +} + +size_t anv_GetPipelineCacheSize( + VkDevice device, + VkPipelineCache pipelineCache) +{ + stub_return(0); +} + +VkResult anv_GetPipelineCacheData( + VkDevice device, + VkPipelineCache pipelineCache, + void* pData) +{ + stub_return(VK_UNSUPPORTED); +} + +VkResult anv_MergePipelineCaches( + VkDevice device, + VkPipelineCache destCache, + uint32_t srcCacheCount, + const VkPipelineCache* pSrcCaches) +{ + stub_return(VK_UNSUPPORTED); +} + +void anv_DestroyPipeline( + VkDevice _device, + VkPipeline _pipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline); + + anv_reloc_list_finish(&pipeline->batch_relocs, pipeline->device); + anv_state_stream_finish(&pipeline->program_stream); + if (pipeline->blend_state.map) + anv_state_pool_free(&device->dynamic_state_pool, pipeline->blend_state); + anv_device_free(pipeline->device, pipeline); +} + +static const uint32_t vk_to_gen_primitive_type[] = { + [VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN, + [VK_PRIMITIVE_TOPOLOGY_LINE_LIST_ADJ] = _3DPRIM_LINELIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_ADJ] = _3DPRIM_LINESTRIP_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_ADJ] = _3DPRIM_TRILIST_ADJ, + [VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_ADJ] = _3DPRIM_TRISTRIP_ADJ, +/* [VK_PRIMITIVE_TOPOLOGY_PATCH] = _3DPRIM_PATCHLIST_1 */ +}; + +static void +populate_sampler_prog_key(const struct brw_device_info *devinfo, + struct brw_sampler_prog_key_data *key) +{ + /* XXX: Handle texture swizzle on HSW- */ + for (int i = 0; i < MAX_SAMPLERS; i++) { + /* Assume color sampler, no swizzling. (Works for BDW+) */ + key->swizzles[i] = SWIZZLE_XYZW; + } +} + +static void +populate_vs_prog_key(const struct brw_device_info *devinfo, + struct brw_vs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* XXX: Handle vertex input work-arounds */ + + /* XXX: Handle sampler_prog_key */ +} + +static void +populate_gs_prog_key(const struct brw_device_info *devinfo, + struct brw_gs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static void +populate_wm_prog_key(const struct brw_device_info *devinfo, + const VkGraphicsPipelineCreateInfo *info, + struct brw_wm_prog_key *key) +{ + ANV_FROM_HANDLE(anv_render_pass, render_pass, info->renderPass); + + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); + + /* TODO: Fill out key->input_slots_valid */ + + /* Vulkan doesn't specify a default */ + key->high_quality_derivatives = false; + + /* XXX Vulkan doesn't appear to specify */ + key->clamp_fragment_color = false; + + /* Vulkan always specifies upper-left coordinates */ + key->drawable_height = 0; + key->render_to_fbo = false; + + key->nr_color_regions = render_pass->subpasses[info->subpass].color_count; + + key->replicate_alpha = key->nr_color_regions > 1 && + info->pColorBlendState->alphaToCoverageEnable; + + if (info->pMultisampleState && info->pMultisampleState->rasterSamples > 1) { + /* We should probably pull this out of the shader, but it's fairly + * harmless to compute it and then let dead-code take care of it. + */ + key->persample_shading = info->pMultisampleState->sampleShadingEnable; + if (key->persample_shading) + key->persample_2x = info->pMultisampleState->rasterSamples == 2; + + key->compute_pos_offset = info->pMultisampleState->sampleShadingEnable; + key->compute_sample_id = info->pMultisampleState->sampleShadingEnable; + } +} + +static void +populate_cs_prog_key(const struct brw_device_info *devinfo, + struct brw_cs_prog_key *key) +{ + memset(key, 0, sizeof(*key)); + + populate_sampler_prog_key(devinfo, &key->tex); +} + +static nir_shader * +anv_pipeline_compile(struct anv_pipeline *pipeline, + struct anv_shader *shader, + VkShaderStage stage, + struct brw_stage_prog_data *prog_data) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + + nir_shader *nir = anv_shader_compile_to_nir(pipeline->device, shader, stage); + if (nir == NULL) + return NULL; + + anv_nir_lower_push_constants(nir, anv_is_scalar_shader_stage(compiler, stage)); + + /* Figure out the number of parameters */ + prog_data->nr_params = 0; + + if (nir->num_uniforms > 0) { + /* If the shader uses any push constants at all, we'll just give + * them the maximum possible number + */ + prog_data->nr_params += MAX_PUSH_CONSTANTS_SIZE / sizeof(float); + } + + if (pipeline->layout && pipeline->layout->stage[stage].has_dynamic_offsets) + prog_data->nr_params += MAX_DYNAMIC_BUFFERS * 2; + + if (prog_data->nr_params > 0) { + prog_data->param = (const gl_constant_value **) + anv_device_alloc(pipeline->device, + prog_data->nr_params * sizeof(gl_constant_value *), + 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL_SHADER); + + /* We now set the param values to be offsets into a + * anv_push_constant_data structure. Since the compiler doesn't + * actually dereference any of the gl_constant_value pointers in the + * params array, it doesn't really matter what we put here. + */ + struct anv_push_constants *null_data = NULL; + if (nir->num_uniforms > 0) { + /* Fill out the push constants section of the param array */ + for (unsigned i = 0; i < MAX_PUSH_CONSTANTS_SIZE / sizeof(float); i++) + prog_data->param[i] = (const gl_constant_value *) + &null_data->client_data[i * sizeof(float)]; + } + } + + /* Set up dynamic offsets */ + anv_nir_apply_dynamic_offsets(pipeline, nir, prog_data); + + /* Apply the actual pipeline layout to UBOs, SSBOs, and textures */ + anv_nir_apply_pipeline_layout(nir, pipeline->layout); + + /* All binding table offsets provided by apply_pipeline_layout() are + * relative to the start of the bindint table (plus MAX_RTS for VS). + */ + unsigned bias = stage == VK_SHADER_STAGE_FRAGMENT ? MAX_RTS : 0; + prog_data->binding_table.size_bytes = 0; + prog_data->binding_table.texture_start = bias; + prog_data->binding_table.ubo_start = bias; + prog_data->binding_table.ssbo_start = bias; + prog_data->binding_table.image_start = bias; + + /* Finish the optimization and compilation process */ - brw_postprocess_nir(nir, &pipeline->device->info, ++ nir = brw_lower_nir(nir, &pipeline->device->info, NULL, + anv_is_scalar_shader_stage(compiler, stage)); + + /* nir_lower_io will only handle the push constants; we need to set this + * to the full number of possible uniforms. + */ + nir->num_uniforms = prog_data->nr_params; + + return nir; +} + +static uint32_t +anv_pipeline_upload_kernel(struct anv_pipeline *pipeline, + const void *data, size_t size) +{ + struct anv_state state = + anv_state_stream_alloc(&pipeline->program_stream, size, 64); + + assert(size < pipeline->program_stream.block_pool->block_size); + + memcpy(state.map, data, size); + + return state.offset; +} +static void +anv_pipeline_add_compiled_stage(struct anv_pipeline *pipeline, + VkShaderStage stage, + struct brw_stage_prog_data *prog_data) +{ + struct brw_device_info *devinfo = &pipeline->device->info; + uint32_t max_threads[] = { + [VK_SHADER_STAGE_VERTEX] = devinfo->max_vs_threads, + [VK_SHADER_STAGE_TESS_CONTROL] = 0, + [VK_SHADER_STAGE_TESS_EVALUATION] = 0, + [VK_SHADER_STAGE_GEOMETRY] = devinfo->max_gs_threads, + [VK_SHADER_STAGE_FRAGMENT] = devinfo->max_wm_threads, + [VK_SHADER_STAGE_COMPUTE] = devinfo->max_cs_threads, + }; + + pipeline->prog_data[stage] = prog_data; + pipeline->active_stages |= 1 << stage; + pipeline->scratch_start[stage] = pipeline->total_scratch; + pipeline->total_scratch = + align_u32(pipeline->total_scratch, 1024) + + prog_data->total_scratch * max_threads[stage]; +} + +static VkResult +anv_pipeline_compile_vs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_vs_prog_data *prog_data = &pipeline->vs_prog_data; + struct brw_vs_prog_key key; + + populate_vs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_VERTEX, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + prog_data->inputs_read = nir->info.inputs_read; + pipeline->writes_point_size = nir->info.outputs_written & VARYING_SLOT_PSIZ; + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_vs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, false, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + const uint32_t offset = + anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + if (prog_data->base.dispatch_mode == DISPATCH_MODE_SIMD8) { + pipeline->vs_simd8 = offset; + pipeline->vs_vec4 = NO_KERNEL; + } else { + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = offset; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_VERTEX, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_gs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_gs_prog_data *prog_data = &pipeline->gs_prog_data; + struct brw_gs_prog_key key; + + populate_gs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_GEOMETRY, + &prog_data->base.base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + brw_compute_vue_map(&pipeline->device->info, + &prog_data->base.vue_map, + nir->info.outputs_written, + nir->info.separate_shader); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_gs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + /* TODO: SIMD8 GS */ + pipeline->gs_vec4 = + anv_pipeline_upload_kernel(pipeline, shader_code, code_size); + pipeline->gs_vertex_count = nir->info.gs.vertices_in; + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_GEOMETRY, + &prog_data->base.base); + + return VK_SUCCESS; +} + +static VkResult +anv_pipeline_compile_fs(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_wm_prog_data *prog_data = &pipeline->wm_prog_data; + struct brw_wm_prog_key key; + + populate_wm_prog_key(&pipeline->device->info, info, &key); + + if (pipeline->use_repclear) + key.nr_color_regions = 1; + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + prog_data->binding_table.render_target_start = 0; + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_FRAGMENT, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_fs(compiler, NULL, mem_ctx, &key, prog_data, nir, + NULL, -1, -1, pipeline->use_repclear, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + uint32_t offset = anv_pipeline_upload_kernel(pipeline, + shader_code, code_size); + if (prog_data->no_8) + pipeline->ps_simd8 = NO_KERNEL; + else + pipeline->ps_simd8 = offset; + + if (prog_data->no_8 || prog_data->prog_offset_16) { + pipeline->ps_simd16 = offset + prog_data->prog_offset_16; + } else { + pipeline->ps_simd16 = NO_KERNEL; + } + + pipeline->ps_ksp2 = 0; + pipeline->ps_grf_start2 = 0; + if (pipeline->ps_simd8 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd8; + pipeline->ps_grf_start0 = prog_data->base.dispatch_grf_start_reg; + if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp2 = pipeline->ps_simd16; + pipeline->ps_grf_start2 = prog_data->dispatch_grf_start_reg_16; + } + } else if (pipeline->ps_simd16 != NO_KERNEL) { + pipeline->ps_ksp0 = pipeline->ps_simd16; + pipeline->ps_grf_start0 = prog_data->dispatch_grf_start_reg_16; + } + + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_FRAGMENT, + &prog_data->base); + + return VK_SUCCESS; +} + +VkResult +anv_pipeline_compile_cs(struct anv_pipeline *pipeline, + const VkComputePipelineCreateInfo *info, + struct anv_shader *shader) +{ + const struct brw_compiler *compiler = + pipeline->device->instance->physicalDevice.compiler; + struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data; + struct brw_cs_prog_key key; + + populate_cs_prog_key(&pipeline->device->info, &key); + + /* TODO: Look up shader in cache */ + + memset(prog_data, 0, sizeof(*prog_data)); + + nir_shader *nir = anv_pipeline_compile(pipeline, shader, + VK_SHADER_STAGE_COMPUTE, + &prog_data->base); + if (nir == NULL) + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + + void *mem_ctx = ralloc_context(NULL); + + if (shader->module->nir == NULL) + ralloc_steal(mem_ctx, nir); + + unsigned code_size; + const unsigned *shader_code = + brw_compile_cs(compiler, NULL, mem_ctx, &key, prog_data, nir, + -1, &code_size, NULL); + if (shader_code == NULL) { + ralloc_free(mem_ctx); + return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); + } + + pipeline->cs_simd = anv_pipeline_upload_kernel(pipeline, + shader_code, code_size); + ralloc_free(mem_ctx); + + anv_pipeline_add_compiled_stage(pipeline, VK_SHADER_STAGE_COMPUTE, + &prog_data->base); + + return VK_SUCCESS; +} + +static const int gen8_push_size = 32 * 1024; + +static void +gen7_compute_urb_partition(struct anv_pipeline *pipeline) +{ + const struct brw_device_info *devinfo = &pipeline->device->info; + bool vs_present = pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT; + unsigned vs_size = vs_present ? pipeline->vs_prog_data.base.urb_entry_size : 1; + unsigned vs_entry_size_bytes = vs_size * 64; + bool gs_present = pipeline->active_stages & VK_SHADER_STAGE_GEOMETRY_BIT; + unsigned gs_size = gs_present ? pipeline->gs_prog_data.base.urb_entry_size : 1; + unsigned gs_entry_size_bytes = gs_size * 64; + + /* From p35 of the Ivy Bridge PRM (section 1.7.1: 3DSTATE_URB_GS): + * + * VS Number of URB Entries must be divisible by 8 if the VS URB Entry + * Allocation Size is less than 9 512-bit URB entries. + * + * Similar text exists for GS. + */ + unsigned vs_granularity = (vs_size < 9) ? 8 : 1; + unsigned gs_granularity = (gs_size < 9) ? 8 : 1; + + /* URB allocations must be done in 8k chunks. */ + unsigned chunk_size_bytes = 8192; + + /* Determine the size of the URB in chunks. */ + unsigned urb_chunks = devinfo->urb.size * 1024 / chunk_size_bytes; + + /* Reserve space for push constants */ + unsigned push_constant_bytes = gen8_push_size; + unsigned push_constant_chunks = + push_constant_bytes / chunk_size_bytes; + + /* Initially, assign each stage the minimum amount of URB space it needs, + * and make a note of how much additional space it "wants" (the amount of + * additional space it could actually make use of). + */ + + /* VS has a lower limit on the number of URB entries */ + unsigned vs_chunks = + ALIGN(devinfo->urb.min_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + unsigned vs_wants = + ALIGN(devinfo->urb.max_vs_entries * vs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - vs_chunks; + + unsigned gs_chunks = 0; + unsigned gs_wants = 0; + if (gs_present) { + /* There are two constraints on the minimum amount of URB space we can + * allocate: + * + * (1) We need room for at least 2 URB entries, since we always operate + * the GS in DUAL_OBJECT mode. + * + * (2) We can't allocate less than nr_gs_entries_granularity. + */ + gs_chunks = ALIGN(MAX2(gs_granularity, 2) * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes; + gs_wants = + ALIGN(devinfo->urb.max_gs_entries * gs_entry_size_bytes, + chunk_size_bytes) / chunk_size_bytes - gs_chunks; + } + + /* There should always be enough URB space to satisfy the minimum + * requirements of each stage. + */ + unsigned total_needs = push_constant_chunks + vs_chunks + gs_chunks; + assert(total_needs <= urb_chunks); + + /* Mete out remaining space (if any) in proportion to "wants". */ + unsigned total_wants = vs_wants + gs_wants; + unsigned remaining_space = urb_chunks - total_needs; + if (remaining_space > total_wants) + remaining_space = total_wants; + if (remaining_space > 0) { + unsigned vs_additional = (unsigned) + round(vs_wants * (((double) remaining_space) / total_wants)); + vs_chunks += vs_additional; + remaining_space -= vs_additional; + gs_chunks += remaining_space; + } + + /* Sanity check that we haven't over-allocated. */ + assert(push_constant_chunks + vs_chunks + gs_chunks <= urb_chunks); + + /* Finally, compute the number of entries that can fit in the space + * allocated to each stage. + */ + unsigned nr_vs_entries = vs_chunks * chunk_size_bytes / vs_entry_size_bytes; + unsigned nr_gs_entries = gs_chunks * chunk_size_bytes / gs_entry_size_bytes; + + /* Since we rounded up when computing *_wants, this may be slightly more + * than the maximum allowed amount, so correct for that. + */ + nr_vs_entries = MIN2(nr_vs_entries, devinfo->urb.max_vs_entries); + nr_gs_entries = MIN2(nr_gs_entries, devinfo->urb.max_gs_entries); + + /* Ensure that we program a multiple of the granularity. */ + nr_vs_entries = ROUND_DOWN_TO(nr_vs_entries, vs_granularity); + nr_gs_entries = ROUND_DOWN_TO(nr_gs_entries, gs_granularity); + + /* Finally, sanity check to make sure we have at least the minimum number + * of entries needed for each stage. + */ + assert(nr_vs_entries >= devinfo->urb.min_vs_entries); + if (gs_present) + assert(nr_gs_entries >= 2); + + /* Lay out the URB in the following order: + * - push constants + * - VS + * - GS + */ + pipeline->urb.vs_start = push_constant_chunks; + pipeline->urb.vs_size = vs_size; + pipeline->urb.nr_vs_entries = nr_vs_entries; + + pipeline->urb.gs_start = push_constant_chunks + vs_chunks; + pipeline->urb.gs_size = gs_size; + pipeline->urb.nr_gs_entries = nr_gs_entries; +} + +static void +anv_pipeline_init_dynamic_state(struct anv_pipeline *pipeline, + const VkGraphicsPipelineCreateInfo *pCreateInfo) +{ + anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL; + ANV_FROM_HANDLE(anv_render_pass, pass, pCreateInfo->renderPass); + struct anv_subpass *subpass = &pass->subpasses[pCreateInfo->subpass]; + + pipeline->dynamic_state = default_dynamic_state; + + if (pCreateInfo->pDynamicState) { + /* Remove all of the states that are marked as dynamic */ + uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount; + for (uint32_t s = 0; s < count; s++) + states &= ~(1 << pCreateInfo->pDynamicState->pDynamicStates[s]); + } + + struct anv_dynamic_state *dynamic = &pipeline->dynamic_state; + + dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount; + if (states & (1 << VK_DYNAMIC_STATE_VIEWPORT)) { + typed_memcpy(dynamic->viewport.viewports, + pCreateInfo->pViewportState->pViewports, + pCreateInfo->pViewportState->viewportCount); + } + + dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount; + if (states & (1 << VK_DYNAMIC_STATE_SCISSOR)) { + typed_memcpy(dynamic->scissor.scissors, + pCreateInfo->pViewportState->pScissors, + pCreateInfo->pViewportState->scissorCount); + } + + if (states & (1 << VK_DYNAMIC_STATE_LINE_WIDTH)) { + assert(pCreateInfo->pRasterState); + dynamic->line_width = pCreateInfo->pRasterState->lineWidth; + } + + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BIAS)) { + assert(pCreateInfo->pRasterState); + dynamic->depth_bias.bias = pCreateInfo->pRasterState->depthBias; + dynamic->depth_bias.clamp = pCreateInfo->pRasterState->depthBiasClamp; + dynamic->depth_bias.slope_scaled = + pCreateInfo->pRasterState->slopeScaledDepthBias; + } + + if (states & (1 << VK_DYNAMIC_STATE_BLEND_CONSTANTS)) { + assert(pCreateInfo->pColorBlendState); + typed_memcpy(dynamic->blend_constants, + pCreateInfo->pColorBlendState->blendConst, 4); + } + + /* If there is no depthstencil attachment, then don't read + * pDepthStencilState. The Vulkan spec states that pDepthStencilState may + * be NULL in this case. Even if pDepthStencilState is non-NULL, there is + * no need to override the depthstencil defaults in + * anv_pipeline::dynamic_state when there is no depthstencil attachment. + * + * From the Vulkan spec (20 Oct 2015, git-aa308cb): + * + * pDepthStencilState [...] may only be NULL if renderPass and subpass + * specify a subpass that has no depth/stencil attachment. + */ + if (subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) { + if (states & (1 << VK_DYNAMIC_STATE_DEPTH_BOUNDS)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->depth_bounds.min = + pCreateInfo->pDepthStencilState->minDepthBounds; + dynamic->depth_bounds.max = + pCreateInfo->pDepthStencilState->maxDepthBounds; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_compare_mask.front = + pCreateInfo->pDepthStencilState->front.stencilCompareMask; + dynamic->stencil_compare_mask.back = + pCreateInfo->pDepthStencilState->back.stencilCompareMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_WRITE_MASK)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_write_mask.front = + pCreateInfo->pDepthStencilState->front.stencilWriteMask; + dynamic->stencil_write_mask.back = + pCreateInfo->pDepthStencilState->back.stencilWriteMask; + } + + if (states & (1 << VK_DYNAMIC_STATE_STENCIL_REFERENCE)) { + assert(pCreateInfo->pDepthStencilState); + dynamic->stencil_reference.front = + pCreateInfo->pDepthStencilState->front.stencilReference; + dynamic->stencil_reference.back = + pCreateInfo->pDepthStencilState->back.stencilReference; + } + } + + pipeline->dynamic_state_mask = states; +} + +static void +anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info) +{ + struct anv_render_pass *renderpass = NULL; + struct anv_subpass *subpass = NULL; + + /* Assert that all required members of VkGraphicsPipelineCreateInfo are + * present, as explained by the Vulkan (20 Oct 2015, git-aa308cb), Section + * 4.2 Graphics Pipeline. + */ + assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO); + + renderpass = anv_render_pass_from_handle(info->renderPass); + assert(renderpass); + + if (renderpass != &anv_meta_dummy_renderpass) { + assert(info->subpass < renderpass->subpass_count); + subpass = &renderpass->subpasses[info->subpass]; + } + + assert(info->stageCount >= 1); + assert(info->pVertexInputState); + assert(info->pInputAssemblyState); + assert(info->pViewportState); + assert(info->pRasterState); + assert(info->pMultisampleState); + + if (subpass && subpass->depth_stencil_attachment != VK_ATTACHMENT_UNUSED) + assert(info->pDepthStencilState); + + if (subpass && subpass->color_count > 0) + assert(info->pColorBlendState); + + for (uint32_t i = 0; i < info->stageCount; ++i) { + switch (info->pStages[i].stage) { + case VK_SHADER_STAGE_TESS_CONTROL: + case VK_SHADER_STAGE_TESS_EVALUATION: + assert(info->pTessellationState); + break; + default: + break; + } + } +} + +VkResult +anv_pipeline_init(struct anv_pipeline *pipeline, struct anv_device *device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra) +{ + VkResult result; + + anv_validate { + anv_pipeline_validate_create_info(pCreateInfo); + } + + pipeline->device = device; + pipeline->layout = anv_pipeline_layout_from_handle(pCreateInfo->layout); + + result = anv_reloc_list_init(&pipeline->batch_relocs, device); + if (result != VK_SUCCESS) { + anv_device_free(device, pipeline); + return result; + } + pipeline->batch.next = pipeline->batch.start = pipeline->batch_data; + pipeline->batch.end = pipeline->batch.start + sizeof(pipeline->batch_data); + pipeline->batch.relocs = &pipeline->batch_relocs; + + anv_state_stream_init(&pipeline->program_stream, + &device->instruction_block_pool); + + anv_pipeline_init_dynamic_state(pipeline, pCreateInfo); + + if (pCreateInfo->pTessellationState) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO"); + if (pCreateInfo->pMultisampleState && + pCreateInfo->pMultisampleState->rasterSamples > 1) + anv_finishme("VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO"); + + pipeline->use_repclear = extra && extra->use_repclear; + pipeline->writes_point_size = false; + + /* When we free the pipeline, we detect stages based on the NULL status + * of various prog_data pointers. Make them NULL by default. + */ + memset(pipeline->prog_data, 0, sizeof(pipeline->prog_data)); + memset(pipeline->scratch_start, 0, sizeof(pipeline->scratch_start)); + + pipeline->vs_simd8 = NO_KERNEL; + pipeline->vs_vec4 = NO_KERNEL; + pipeline->gs_vec4 = NO_KERNEL; + + pipeline->active_stages = 0; + pipeline->total_scratch = 0; + + for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) { + ANV_FROM_HANDLE(anv_shader, shader, pCreateInfo->pStages[i].shader); + + switch (pCreateInfo->pStages[i].stage) { + case VK_SHADER_STAGE_VERTEX: + anv_pipeline_compile_vs(pipeline, pCreateInfo, shader); + break; + case VK_SHADER_STAGE_GEOMETRY: + anv_pipeline_compile_gs(pipeline, pCreateInfo, shader); + break; + case VK_SHADER_STAGE_FRAGMENT: + anv_pipeline_compile_fs(pipeline, pCreateInfo, shader); + break; + default: + anv_finishme("Unsupported shader stage"); + } + } + + if (!(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT)) { + /* Vertex is only optional if disable_vs is set */ + assert(extra->disable_vs); + memset(&pipeline->vs_prog_data, 0, sizeof(pipeline->vs_prog_data)); + } + + gen7_compute_urb_partition(pipeline); + + const VkPipelineVertexInputStateCreateInfo *vi_info = + pCreateInfo->pVertexInputState; + pipeline->vb_used = 0; + for (uint32_t i = 0; i < vi_info->bindingCount; i++) { + const VkVertexInputBindingDescription *desc = + &vi_info->pVertexBindingDescriptions[i]; + + pipeline->vb_used |= 1 << desc->binding; + pipeline->binding_stride[desc->binding] = desc->strideInBytes; + + /* Step rate is programmed per vertex element (attribute), not + * binding. Set up a map of which bindings step per instance, for + * reference by vertex element setup. */ + switch (desc->stepRate) { + default: + case VK_VERTEX_INPUT_STEP_RATE_VERTEX: + pipeline->instancing_enable[desc->binding] = false; + break; + case VK_VERTEX_INPUT_STEP_RATE_INSTANCE: + pipeline->instancing_enable[desc->binding] = true; + break; + } + } + + const VkPipelineInputAssemblyStateCreateInfo *ia_info = + pCreateInfo->pInputAssemblyState; + pipeline->primitive_restart = ia_info->primitiveRestartEnable; + pipeline->topology = vk_to_gen_primitive_type[ia_info->topology]; + + if (extra && extra->use_rectlist) + pipeline->topology = _3DPRIM_RECTLIST; + + return VK_SUCCESS; +} + +VkResult +anv_graphics_pipeline_create( + VkDevice _device, + const VkGraphicsPipelineCreateInfo *pCreateInfo, + const struct anv_graphics_pipeline_create_info *extra, + VkPipeline *pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + else + return gen7_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + case 8: + return gen8_graphics_pipeline_create(_device, pCreateInfo, extra, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateGraphicsPipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkGraphicsPipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_graphics_pipeline_create(_device, &pCreateInfos[i], + NULL, &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j]); + } + + return result; + } + } + + return VK_SUCCESS; +} + +static VkResult anv_compute_pipeline_create( + VkDevice _device, + const VkComputePipelineCreateInfo* pCreateInfo, + VkPipeline* pPipeline) +{ + ANV_FROM_HANDLE(anv_device, device, _device); + + switch (device->info.gen) { + case 7: + if (device->info.is_haswell) + return gen75_compute_pipeline_create(_device, pCreateInfo, pPipeline); + else + return gen7_compute_pipeline_create(_device, pCreateInfo, pPipeline); + case 8: + return gen8_compute_pipeline_create(_device, pCreateInfo, pPipeline); + default: + unreachable("unsupported gen\n"); + } +} + +VkResult anv_CreateComputePipelines( + VkDevice _device, + VkPipelineCache pipelineCache, + uint32_t count, + const VkComputePipelineCreateInfo* pCreateInfos, + VkPipeline* pPipelines) +{ + VkResult result = VK_SUCCESS; + + unsigned i = 0; + for (; i < count; i++) { + result = anv_compute_pipeline_create(_device, &pCreateInfos[i], + &pPipelines[i]); + if (result != VK_SUCCESS) { + for (unsigned j = 0; j < i; j++) { + anv_DestroyPipeline(_device, pPipelines[j]); + } + + return result; + } + } + + return VK_SUCCESS; +}