2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
24 * Jason Ekstrand (jason@jlekstrand.net)
28 #include "spirv_to_nir_private.h"
31 static struct vtn_ssa_value
*
32 vtn_const_ssa_value(struct vtn_builder
*b
, nir_constant
*constant
,
33 const struct glsl_type
*type
)
35 struct hash_entry
*entry
= _mesa_hash_table_search(b
->const_table
, constant
);
40 struct vtn_ssa_value
*val
= rzalloc(b
, struct vtn_ssa_value
);
43 switch (glsl_get_base_type(type
)) {
48 case GLSL_TYPE_DOUBLE
:
49 if (glsl_type_is_vector_or_scalar(type
)) {
50 unsigned num_components
= glsl_get_vector_elements(val
->type
);
51 nir_load_const_instr
*load
=
52 nir_load_const_instr_create(b
->shader
, num_components
);
54 for (unsigned i
= 0; i
< num_components
; i
++)
55 load
->value
.u
[i
] = constant
->value
.u
[i
];
57 nir_instr_insert_before_cf_list(&b
->impl
->body
, &load
->instr
);
58 val
->def
= &load
->def
;
60 assert(glsl_type_is_matrix(type
));
61 unsigned rows
= glsl_get_vector_elements(val
->type
);
62 unsigned columns
= glsl_get_matrix_columns(val
->type
);
63 val
->elems
= ralloc_array(b
, struct vtn_ssa_value
*, columns
);
65 for (unsigned i
= 0; i
< columns
; i
++) {
66 struct vtn_ssa_value
*col_val
= rzalloc(b
, struct vtn_ssa_value
);
67 col_val
->type
= glsl_get_column_type(val
->type
);
68 nir_load_const_instr
*load
=
69 nir_load_const_instr_create(b
->shader
, rows
);
71 for (unsigned j
= 0; j
< rows
; j
++)
72 load
->value
.u
[j
] = constant
->value
.u
[rows
* i
+ j
];
74 nir_instr_insert_before_cf_list(&b
->impl
->body
, &load
->instr
);
75 col_val
->def
= &load
->def
;
77 val
->elems
[i
] = col_val
;
82 case GLSL_TYPE_ARRAY
: {
83 unsigned elems
= glsl_get_length(val
->type
);
84 val
->elems
= ralloc_array(b
, struct vtn_ssa_value
*, elems
);
85 const struct glsl_type
*elem_type
= glsl_get_array_element(val
->type
);
86 for (unsigned i
= 0; i
< elems
; i
++)
87 val
->elems
[i
] = vtn_const_ssa_value(b
, constant
->elements
[i
],
92 case GLSL_TYPE_STRUCT
: {
93 unsigned elems
= glsl_get_length(val
->type
);
94 val
->elems
= ralloc_array(b
, struct vtn_ssa_value
*, elems
);
95 for (unsigned i
= 0; i
< elems
; i
++) {
96 const struct glsl_type
*elem_type
=
97 glsl_get_struct_field(val
->type
, i
);
98 val
->elems
[i
] = vtn_const_ssa_value(b
, constant
->elements
[i
],
105 unreachable("bad constant type");
111 struct vtn_ssa_value
*
112 vtn_ssa_value(struct vtn_builder
*b
, uint32_t value_id
)
114 struct vtn_value
*val
= vtn_untyped_value(b
, value_id
);
115 switch (val
->value_type
) {
116 case vtn_value_type_constant
:
117 return vtn_const_ssa_value(b
, val
->constant
, val
->type
);
119 case vtn_value_type_ssa
:
122 unreachable("Invalid type for an SSA value");
127 vtn_string_literal(struct vtn_builder
*b
, const uint32_t *words
,
130 return ralloc_strndup(b
, (char *)words
, word_count
* sizeof(*words
));
133 static const uint32_t *
134 vtn_foreach_instruction(struct vtn_builder
*b
, const uint32_t *start
,
135 const uint32_t *end
, vtn_instruction_handler handler
)
137 const uint32_t *w
= start
;
139 SpvOp opcode
= w
[0] & SpvOpCodeMask
;
140 unsigned count
= w
[0] >> SpvWordCountShift
;
141 assert(count
>= 1 && w
+ count
<= end
);
143 if (!handler(b
, opcode
, w
, count
))
153 vtn_handle_extension(struct vtn_builder
*b
, SpvOp opcode
,
154 const uint32_t *w
, unsigned count
)
157 case SpvOpExtInstImport
: {
158 struct vtn_value
*val
= vtn_push_value(b
, w
[1], vtn_value_type_extension
);
159 if (strcmp((const char *)&w
[2], "GLSL.std.450") == 0) {
160 val
->ext_handler
= vtn_handle_glsl450_instruction
;
162 assert(!"Unsupported extension");
168 struct vtn_value
*val
= vtn_value(b
, w
[3], vtn_value_type_extension
);
169 bool handled
= val
->ext_handler(b
, w
[4], w
, count
);
176 unreachable("Unhandled opcode");
181 _foreach_decoration_helper(struct vtn_builder
*b
,
182 struct vtn_value
*base_value
,
184 struct vtn_value
*value
,
185 vtn_decoration_foreach_cb cb
, void *data
)
187 for (struct vtn_decoration
*dec
= value
->decoration
; dec
; dec
= dec
->next
) {
188 if (dec
->member
>= 0) {
189 assert(member
== -1);
190 member
= dec
->member
;
194 assert(dec
->group
->value_type
== vtn_value_type_decoration_group
);
195 _foreach_decoration_helper(b
, base_value
, member
, dec
->group
, cb
, data
);
197 cb(b
, base_value
, member
, dec
, data
);
202 /** Iterates (recursively if needed) over all of the decorations on a value
204 * This function iterates over all of the decorations applied to a given
205 * value. If it encounters a decoration group, it recurses into the group
206 * and iterates over all of those decorations as well.
209 vtn_foreach_decoration(struct vtn_builder
*b
, struct vtn_value
*value
,
210 vtn_decoration_foreach_cb cb
, void *data
)
212 _foreach_decoration_helper(b
, value
, -1, value
, cb
, data
);
216 vtn_handle_decoration(struct vtn_builder
*b
, SpvOp opcode
,
217 const uint32_t *w
, unsigned count
)
219 const uint32_t *w_end
= w
+ count
;
220 const uint32_t target
= w
[1];
225 case SpvOpDecorationGroup
:
226 vtn_push_value(b
, target
, vtn_value_type_undef
);
229 case SpvOpMemberDecorate
:
232 case SpvOpDecorate
: {
233 struct vtn_value
*val
= &b
->values
[target
];
235 struct vtn_decoration
*dec
= rzalloc(b
, struct vtn_decoration
);
236 dec
->member
= member
;
237 dec
->decoration
= *(w
++);
240 /* Link into the list */
241 dec
->next
= val
->decoration
;
242 val
->decoration
= dec
;
246 case SpvOpGroupMemberDecorate
:
249 case SpvOpGroupDecorate
: {
250 struct vtn_value
*group
= &b
->values
[target
];
251 assert(group
->value_type
== vtn_value_type_decoration_group
);
253 for (; w
< w_end
; w
++) {
254 struct vtn_value
*val
= &b
->values
[*w
];
255 struct vtn_decoration
*dec
= rzalloc(b
, struct vtn_decoration
);
256 dec
->member
= member
;
259 /* Link into the list */
260 dec
->next
= val
->decoration
;
261 val
->decoration
= dec
;
267 unreachable("Unhandled opcode");
272 struct_member_decoration_cb(struct vtn_builder
*b
,
273 struct vtn_value
*val
, int member
,
274 const struct vtn_decoration
*dec
, void *void_fields
)
276 struct glsl_struct_field
*fields
= void_fields
;
281 switch (dec
->decoration
) {
282 case SpvDecorationPrecisionLow
:
283 case SpvDecorationPrecisionMedium
:
284 case SpvDecorationPrecisionHigh
:
285 break; /* FIXME: Do nothing with these for now. */
286 case SpvDecorationSmooth
:
287 fields
[member
].interpolation
= INTERP_QUALIFIER_SMOOTH
;
289 case SpvDecorationNoperspective
:
290 fields
[member
].interpolation
= INTERP_QUALIFIER_NOPERSPECTIVE
;
292 case SpvDecorationFlat
:
293 fields
[member
].interpolation
= INTERP_QUALIFIER_FLAT
;
295 case SpvDecorationCentroid
:
296 fields
[member
].centroid
= true;
298 case SpvDecorationSample
:
299 fields
[member
].sample
= true;
301 case SpvDecorationLocation
:
302 fields
[member
].location
= dec
->literals
[0];
305 unreachable("Unhandled member decoration");
310 vtn_handle_type(struct vtn_builder
*b
, SpvOp opcode
,
311 const uint32_t *w
, unsigned count
)
313 struct vtn_value
*val
= vtn_push_value(b
, w
[1], vtn_value_type_type
);
317 val
->type
= glsl_void_type();
320 val
->type
= glsl_bool_type();
323 val
->type
= glsl_int_type();
326 val
->type
= glsl_float_type();
329 case SpvOpTypeVector
: {
330 const struct glsl_type
*base
=
331 vtn_value(b
, w
[2], vtn_value_type_type
)->type
;
332 unsigned elems
= w
[3];
334 assert(glsl_type_is_scalar(base
));
335 val
->type
= glsl_vector_type(glsl_get_base_type(base
), elems
);
339 case SpvOpTypeMatrix
: {
340 const struct glsl_type
*base
=
341 vtn_value(b
, w
[2], vtn_value_type_type
)->type
;
342 unsigned columns
= w
[3];
344 assert(glsl_type_is_vector(base
));
345 val
->type
= glsl_matrix_type(glsl_get_base_type(base
),
346 glsl_get_vector_elements(base
),
352 val
->type
= glsl_array_type(b
->values
[w
[2]].type
, w
[3]);
355 case SpvOpTypeStruct
: {
356 NIR_VLA(struct glsl_struct_field
, fields
, count
);
357 for (unsigned i
= 0; i
< count
- 2; i
++) {
358 /* TODO: Handle decorators */
359 fields
[i
].type
= vtn_value(b
, w
[i
+ 2], vtn_value_type_type
)->type
;
360 fields
[i
].name
= ralloc_asprintf(b
, "field%d", i
);
361 fields
[i
].location
= -1;
362 fields
[i
].interpolation
= 0;
363 fields
[i
].centroid
= 0;
364 fields
[i
].sample
= 0;
365 fields
[i
].matrix_layout
= 2;
366 fields
[i
].stream
= -1;
369 vtn_foreach_decoration(b
, val
, struct_member_decoration_cb
, fields
);
371 const char *name
= val
->name
? val
->name
: "struct";
373 val
->type
= glsl_struct_type(fields
, count
, name
);
377 case SpvOpTypeFunction
: {
378 const struct glsl_type
*return_type
= b
->values
[w
[2]].type
;
379 NIR_VLA(struct glsl_function_param
, params
, count
- 3);
380 for (unsigned i
= 0; i
< count
- 3; i
++) {
381 params
[i
].type
= vtn_value(b
, w
[i
+ 3], vtn_value_type_type
)->type
;
385 params
[i
].out
= true;
387 val
->type
= glsl_function_type(return_type
, params
, count
- 3);
391 case SpvOpTypePointer
:
392 /* FIXME: For now, we'll just do the really lame thing and return
393 * the same type. The validator should ensure that the proper number
394 * of dereferences happen
396 val
->type
= vtn_value(b
, w
[3], vtn_value_type_type
)->type
;
399 case SpvOpTypeSampler
: {
400 const struct glsl_type
*sampled_type
=
401 vtn_value(b
, w
[2], vtn_value_type_type
)->type
;
403 assert(glsl_type_is_vector_or_scalar(sampled_type
));
405 enum glsl_sampler_dim dim
;
406 switch ((SpvDim
)w
[3]) {
407 case SpvDim1D
: dim
= GLSL_SAMPLER_DIM_1D
; break;
408 case SpvDim2D
: dim
= GLSL_SAMPLER_DIM_2D
; break;
409 case SpvDim3D
: dim
= GLSL_SAMPLER_DIM_3D
; break;
410 case SpvDimCube
: dim
= GLSL_SAMPLER_DIM_CUBE
; break;
411 case SpvDimRect
: dim
= GLSL_SAMPLER_DIM_RECT
; break;
412 case SpvDimBuffer
: dim
= GLSL_SAMPLER_DIM_BUF
; break;
414 unreachable("Invalid SPIR-V Sampler dimension");
417 /* TODO: Handle the various texture image/filter options */
420 bool is_array
= w
[5];
421 bool is_shadow
= w
[6];
423 assert(w
[7] == 0 && "FIXME: Handl multi-sampled textures");
425 val
->type
= glsl_sampler_type(dim
, is_shadow
, is_array
,
426 glsl_get_base_type(sampled_type
));
430 case SpvOpTypeRuntimeArray
:
431 case SpvOpTypeOpaque
:
433 case SpvOpTypeDeviceEvent
:
434 case SpvOpTypeReserveId
:
438 unreachable("Unhandled opcode");
443 vtn_handle_constant(struct vtn_builder
*b
, SpvOp opcode
,
444 const uint32_t *w
, unsigned count
)
446 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_constant
);
447 val
->type
= vtn_value(b
, w
[1], vtn_value_type_type
)->type
;
448 val
->constant
= ralloc(b
, nir_constant
);
450 case SpvOpConstantTrue
:
451 assert(val
->type
== glsl_bool_type());
452 val
->constant
->value
.u
[0] = NIR_TRUE
;
454 case SpvOpConstantFalse
:
455 assert(val
->type
== glsl_bool_type());
456 val
->constant
->value
.u
[0] = NIR_FALSE
;
459 assert(glsl_type_is_scalar(val
->type
));
460 val
->constant
->value
.u
[0] = w
[3];
462 case SpvOpConstantComposite
: {
463 unsigned elem_count
= count
- 3;
464 nir_constant
**elems
= ralloc_array(b
, nir_constant
*, elem_count
);
465 for (unsigned i
= 0; i
< elem_count
; i
++)
466 elems
[i
] = vtn_value(b
, w
[i
+ 3], vtn_value_type_constant
)->constant
;
468 switch (glsl_get_base_type(val
->type
)) {
471 case GLSL_TYPE_FLOAT
:
473 if (glsl_type_is_matrix(val
->type
)) {
474 unsigned rows
= glsl_get_vector_elements(val
->type
);
475 assert(glsl_get_matrix_columns(val
->type
) == elem_count
);
476 for (unsigned i
= 0; i
< elem_count
; i
++)
477 for (unsigned j
= 0; j
< rows
; j
++)
478 val
->constant
->value
.u
[rows
* i
+ j
] = elems
[i
]->value
.u
[j
];
480 assert(glsl_type_is_vector(val
->type
));
481 assert(glsl_get_vector_elements(val
->type
) == elem_count
);
482 for (unsigned i
= 0; i
< elem_count
; i
++)
483 val
->constant
->value
.u
[i
] = elems
[i
]->value
.u
[0];
488 case GLSL_TYPE_STRUCT
:
489 case GLSL_TYPE_ARRAY
:
490 ralloc_steal(val
->constant
, elems
);
491 val
->constant
->elements
= elems
;
495 unreachable("Unsupported type for constants");
501 unreachable("Unhandled opcode");
506 var_decoration_cb(struct vtn_builder
*b
, struct vtn_value
*val
, int member
,
507 const struct vtn_decoration
*dec
, void *void_var
)
509 assert(val
->value_type
== vtn_value_type_deref
);
510 assert(val
->deref
->deref
.child
== NULL
);
511 assert(val
->deref
->var
== void_var
);
513 nir_variable
*var
= void_var
;
514 switch (dec
->decoration
) {
515 case SpvDecorationPrecisionLow
:
516 case SpvDecorationPrecisionMedium
:
517 case SpvDecorationPrecisionHigh
:
518 break; /* FIXME: Do nothing with these for now. */
519 case SpvDecorationSmooth
:
520 var
->data
.interpolation
= INTERP_QUALIFIER_SMOOTH
;
522 case SpvDecorationNoperspective
:
523 var
->data
.interpolation
= INTERP_QUALIFIER_NOPERSPECTIVE
;
525 case SpvDecorationFlat
:
526 var
->data
.interpolation
= INTERP_QUALIFIER_FLAT
;
528 case SpvDecorationCentroid
:
529 var
->data
.centroid
= true;
531 case SpvDecorationSample
:
532 var
->data
.sample
= true;
534 case SpvDecorationInvariant
:
535 var
->data
.invariant
= true;
537 case SpvDecorationConstant
:
538 assert(var
->constant_initializer
!= NULL
);
539 var
->data
.read_only
= true;
541 case SpvDecorationNonwritable
:
542 var
->data
.read_only
= true;
544 case SpvDecorationLocation
:
545 var
->data
.explicit_location
= true;
546 var
->data
.location
= dec
->literals
[0];
548 case SpvDecorationComponent
:
549 var
->data
.location_frac
= dec
->literals
[0];
551 case SpvDecorationIndex
:
552 var
->data
.explicit_index
= true;
553 var
->data
.index
= dec
->literals
[0];
555 case SpvDecorationBinding
:
556 var
->data
.explicit_binding
= true;
557 var
->data
.binding
= dec
->literals
[0];
559 case SpvDecorationDescriptorSet
:
560 var
->data
.descriptor_set
= dec
->literals
[0];
562 case SpvDecorationBuiltIn
:
563 var
->data
.mode
= nir_var_system_value
;
564 var
->data
.read_only
= true;
565 switch ((SpvBuiltIn
)dec
->literals
[0]) {
566 case SpvBuiltInFrontFacing
:
567 var
->data
.location
= SYSTEM_VALUE_FRONT_FACE
;
569 case SpvBuiltInVertexId
:
570 var
->data
.location
= SYSTEM_VALUE_VERTEX_ID
;
572 case SpvBuiltInInstanceId
:
573 var
->data
.location
= SYSTEM_VALUE_INSTANCE_ID
;
575 case SpvBuiltInSampleId
:
576 var
->data
.location
= SYSTEM_VALUE_SAMPLE_ID
;
578 case SpvBuiltInSamplePosition
:
579 var
->data
.location
= SYSTEM_VALUE_SAMPLE_POS
;
581 case SpvBuiltInSampleMask
:
582 var
->data
.location
= SYSTEM_VALUE_SAMPLE_MASK_IN
;
584 case SpvBuiltInInvocationId
:
585 var
->data
.location
= SYSTEM_VALUE_INVOCATION_ID
;
587 case SpvBuiltInPrimitiveId
:
588 case SpvBuiltInPosition
:
589 case SpvBuiltInPointSize
:
590 case SpvBuiltInClipVertex
:
591 case SpvBuiltInClipDistance
:
592 case SpvBuiltInCullDistance
:
593 case SpvBuiltInLayer
:
594 case SpvBuiltInViewportIndex
:
595 case SpvBuiltInTessLevelOuter
:
596 case SpvBuiltInTessLevelInner
:
597 case SpvBuiltInTessCoord
:
598 case SpvBuiltInPatchVertices
:
599 case SpvBuiltInFragCoord
:
600 case SpvBuiltInPointCoord
:
601 case SpvBuiltInFragColor
:
602 case SpvBuiltInFragDepth
:
603 case SpvBuiltInHelperInvocation
:
604 case SpvBuiltInNumWorkgroups
:
605 case SpvBuiltInWorkgroupSize
:
606 case SpvBuiltInWorkgroupId
:
607 case SpvBuiltInLocalInvocationId
:
608 case SpvBuiltInGlobalInvocationId
:
609 case SpvBuiltInLocalInvocationIndex
:
610 case SpvBuiltInWorkDim
:
611 case SpvBuiltInGlobalSize
:
612 case SpvBuiltInEnqueuedWorkgroupSize
:
613 case SpvBuiltInGlobalOffset
:
614 case SpvBuiltInGlobalLinearId
:
615 case SpvBuiltInWorkgroupLinearId
:
616 case SpvBuiltInSubgroupSize
:
617 case SpvBuiltInSubgroupMaxSize
:
618 case SpvBuiltInNumSubgroups
:
619 case SpvBuiltInNumEnqueuedSubgroups
:
620 case SpvBuiltInSubgroupId
:
621 case SpvBuiltInSubgroupLocalInvocationId
:
622 unreachable("Unhandled builtin enum");
625 case SpvDecorationNoStaticUse
:
626 /* This can safely be ignored */
628 case SpvDecorationBlock
:
629 case SpvDecorationBufferBlock
:
630 case SpvDecorationRowMajor
:
631 case SpvDecorationColMajor
:
632 case SpvDecorationGLSLShared
:
633 case SpvDecorationGLSLStd140
:
634 case SpvDecorationGLSLStd430
:
635 case SpvDecorationGLSLPacked
:
636 case SpvDecorationPatch
:
637 case SpvDecorationRestrict
:
638 case SpvDecorationAliased
:
639 case SpvDecorationVolatile
:
640 case SpvDecorationCoherent
:
641 case SpvDecorationNonreadable
:
642 case SpvDecorationUniform
:
643 /* This is really nice but we have no use for it right now. */
644 case SpvDecorationCPacked
:
645 case SpvDecorationSaturatedConversion
:
646 case SpvDecorationStream
:
647 case SpvDecorationOffset
:
648 case SpvDecorationAlignment
:
649 case SpvDecorationXfbBuffer
:
650 case SpvDecorationStride
:
651 case SpvDecorationFuncParamAttr
:
652 case SpvDecorationFPRoundingMode
:
653 case SpvDecorationFPFastMathMode
:
654 case SpvDecorationLinkageAttributes
:
655 case SpvDecorationSpecId
:
658 unreachable("Unhandled variable decoration");
662 static struct vtn_ssa_value
*
663 _vtn_variable_load(struct vtn_builder
*b
,
664 nir_deref_var
*src_deref
, nir_deref
*src_deref_tail
)
666 struct vtn_ssa_value
*val
= rzalloc(b
, struct vtn_ssa_value
);
667 val
->type
= src_deref_tail
->type
;
669 /* The deref tail may contain a deref to select a component of a vector (in
670 * other words, it might not be an actual tail) so we have to save it away
671 * here since we overwrite it later.
673 nir_deref
*old_child
= src_deref_tail
->child
;
675 if (glsl_type_is_vector_or_scalar(val
->type
)) {
676 nir_intrinsic_instr
*load
=
677 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_load_var
);
679 nir_deref_as_var(nir_copy_deref(load
, &src_deref
->deref
));
680 load
->num_components
= glsl_get_vector_elements(val
->type
);
681 nir_ssa_dest_init(&load
->instr
, &load
->dest
, load
->num_components
, NULL
);
683 nir_builder_instr_insert(&b
->nb
, &load
->instr
);
685 if (src_deref
->var
->data
.mode
== nir_var_uniform
&&
686 glsl_get_base_type(val
->type
) == GLSL_TYPE_BOOL
) {
687 /* Uniform boolean loads need to be fixed up since they're defined
688 * to be zero/nonzero rather than NIR_FALSE/NIR_TRUE.
690 val
->def
= nir_ine(&b
->nb
, &load
->dest
.ssa
, nir_imm_int(&b
->nb
, 0));
692 val
->def
= &load
->dest
.ssa
;
694 } else if (glsl_get_base_type(val
->type
) == GLSL_TYPE_ARRAY
||
695 glsl_type_is_matrix(val
->type
)) {
696 unsigned elems
= glsl_get_length(val
->type
);
697 val
->elems
= ralloc_array(b
, struct vtn_ssa_value
*, elems
);
699 nir_deref_array
*deref
= nir_deref_array_create(b
);
700 deref
->deref_array_type
= nir_deref_array_type_direct
;
701 deref
->deref
.type
= glsl_get_array_element(val
->type
);
702 src_deref_tail
->child
= &deref
->deref
;
703 for (unsigned i
= 0; i
< elems
; i
++) {
704 deref
->base_offset
= i
;
705 val
->elems
[i
] = _vtn_variable_load(b
, src_deref
, &deref
->deref
);
708 assert(glsl_get_base_type(val
->type
) == GLSL_TYPE_STRUCT
);
709 unsigned elems
= glsl_get_length(val
->type
);
710 val
->elems
= ralloc_array(b
, struct vtn_ssa_value
*, elems
);
712 nir_deref_struct
*deref
= nir_deref_struct_create(b
, 0);
713 src_deref_tail
->child
= &deref
->deref
;
714 for (unsigned i
= 0; i
< elems
; i
++) {
716 deref
->deref
.type
= glsl_get_struct_field(val
->type
, i
);
717 val
->elems
[i
] = _vtn_variable_load(b
, src_deref
, &deref
->deref
);
721 src_deref_tail
->child
= old_child
;
727 _vtn_variable_store(struct vtn_builder
*b
, nir_deref_var
*dest_deref
,
728 nir_deref
*dest_deref_tail
, struct vtn_ssa_value
*src
)
730 nir_deref
*old_child
= dest_deref_tail
->child
;
732 if (glsl_type_is_vector_or_scalar(src
->type
)) {
733 nir_intrinsic_instr
*store
=
734 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_store_var
);
735 store
->variables
[0] =
736 nir_deref_as_var(nir_copy_deref(store
, &dest_deref
->deref
));
737 store
->src
[0] = nir_src_for_ssa(src
->def
);
739 nir_builder_instr_insert(&b
->nb
, &store
->instr
);
740 } else if (glsl_get_base_type(src
->type
) == GLSL_TYPE_ARRAY
||
741 glsl_type_is_matrix(src
->type
)) {
742 unsigned elems
= glsl_get_length(src
->type
);
744 nir_deref_array
*deref
= nir_deref_array_create(b
);
745 deref
->deref_array_type
= nir_deref_array_type_direct
;
746 deref
->deref
.type
= glsl_get_array_element(src
->type
);
747 dest_deref_tail
->child
= &deref
->deref
;
748 for (unsigned i
= 0; i
< elems
; i
++) {
749 deref
->base_offset
= i
;
750 _vtn_variable_store(b
, dest_deref
, &deref
->deref
, src
->elems
[i
]);
753 assert(glsl_get_base_type(src
->type
) == GLSL_TYPE_STRUCT
);
754 unsigned elems
= glsl_get_length(src
->type
);
756 nir_deref_struct
*deref
= nir_deref_struct_create(b
, 0);
757 dest_deref_tail
->child
= &deref
->deref
;
758 for (unsigned i
= 0; i
< elems
; i
++) {
760 deref
->deref
.type
= glsl_get_struct_field(src
->type
, i
);
761 _vtn_variable_store(b
, dest_deref
, &deref
->deref
, src
->elems
[i
]);
765 dest_deref_tail
->child
= old_child
;
769 * Gets the NIR-level deref tail, which may have as a child an array deref
770 * selecting which component due to OpAccessChain supporting per-component
771 * indexing in SPIR-V.
775 get_deref_tail(nir_deref_var
*deref
)
777 nir_deref
*cur
= &deref
->deref
;
778 while (!glsl_type_is_vector_or_scalar(cur
->type
) && cur
->child
)
784 static nir_ssa_def
*vtn_vector_extract(struct vtn_builder
*b
,
785 nir_ssa_def
*src
, unsigned index
);
787 static nir_ssa_def
*vtn_vector_extract_dynamic(struct vtn_builder
*b
,
791 static struct vtn_ssa_value
*
792 vtn_variable_load(struct vtn_builder
*b
, nir_deref_var
*src
)
794 nir_deref
*src_tail
= get_deref_tail(src
);
795 struct vtn_ssa_value
*val
= _vtn_variable_load(b
, src
, src_tail
);
797 if (src_tail
->child
) {
798 nir_deref_array
*vec_deref
= nir_deref_as_array(src_tail
->child
);
799 assert(vec_deref
->deref
.child
== NULL
);
800 val
->type
= vec_deref
->deref
.type
;
801 if (vec_deref
->deref_array_type
== nir_deref_array_type_direct
)
802 val
->def
= vtn_vector_extract(b
, val
->def
, vec_deref
->base_offset
);
804 val
->def
= vtn_vector_extract_dynamic(b
, val
->def
,
805 vec_deref
->indirect
.ssa
);
811 static nir_ssa_def
* vtn_vector_insert(struct vtn_builder
*b
,
812 nir_ssa_def
*src
, nir_ssa_def
*insert
,
815 static nir_ssa_def
* vtn_vector_insert_dynamic(struct vtn_builder
*b
,
820 vtn_variable_store(struct vtn_builder
*b
, struct vtn_ssa_value
*src
,
823 nir_deref
*dest_tail
= get_deref_tail(dest
);
824 if (dest_tail
->child
) {
825 struct vtn_ssa_value
*val
= _vtn_variable_load(b
, dest
, dest_tail
);
826 nir_deref_array
*deref
= nir_deref_as_array(dest_tail
->child
);
827 assert(deref
->deref
.child
== NULL
);
828 if (deref
->deref_array_type
== nir_deref_array_type_direct
)
829 val
->def
= vtn_vector_insert(b
, val
->def
, src
->def
,
832 val
->def
= vtn_vector_insert_dynamic(b
, val
->def
, src
->def
,
833 deref
->indirect
.ssa
);
834 _vtn_variable_store(b
, dest
, dest_tail
, val
);
836 _vtn_variable_store(b
, dest
, dest_tail
, src
);
841 vtn_variable_copy(struct vtn_builder
*b
, nir_deref_var
*src
,
844 nir_deref
*src_tail
= get_deref_tail(src
);
846 if (src_tail
->child
) {
847 assert(get_deref_tail(dest
)->child
);
848 struct vtn_ssa_value
*val
= vtn_variable_load(b
, src
);
849 vtn_variable_store(b
, val
, dest
);
851 nir_intrinsic_instr
*copy
=
852 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_copy_var
);
853 copy
->variables
[0] = nir_deref_as_var(nir_copy_deref(copy
, &dest
->deref
));
854 copy
->variables
[1] = nir_deref_as_var(nir_copy_deref(copy
, &src
->deref
));
856 nir_builder_instr_insert(&b
->nb
, ©
->instr
);
861 vtn_handle_variables(struct vtn_builder
*b
, SpvOp opcode
,
862 const uint32_t *w
, unsigned count
)
865 case SpvOpVariable
: {
866 const struct glsl_type
*type
=
867 vtn_value(b
, w
[1], vtn_value_type_type
)->type
;
868 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_deref
);
870 nir_variable
*var
= ralloc(b
->shader
, nir_variable
);
873 var
->name
= ralloc_strdup(var
, val
->name
);
875 switch ((SpvStorageClass
)w
[3]) {
876 case SpvStorageClassUniform
:
877 case SpvStorageClassUniformConstant
:
878 var
->data
.mode
= nir_var_uniform
;
879 var
->data
.read_only
= true;
880 var
->interface_type
= type
;
882 case SpvStorageClassInput
:
883 var
->data
.mode
= nir_var_shader_in
;
884 var
->data
.read_only
= true;
886 case SpvStorageClassOutput
:
887 var
->data
.mode
= nir_var_shader_out
;
889 case SpvStorageClassPrivateGlobal
:
890 var
->data
.mode
= nir_var_global
;
892 case SpvStorageClassFunction
:
893 var
->data
.mode
= nir_var_local
;
895 case SpvStorageClassWorkgroupLocal
:
896 case SpvStorageClassWorkgroupGlobal
:
897 case SpvStorageClassGeneric
:
898 case SpvStorageClassPrivate
:
899 case SpvStorageClassAtomicCounter
:
901 unreachable("Unhandled variable storage class");
906 var
->constant_initializer
=
907 vtn_value(b
, w
[4], vtn_value_type_constant
)->constant
;
910 val
->deref
= nir_deref_var_create(b
, var
);
912 vtn_foreach_decoration(b
, val
, var_decoration_cb
, var
);
914 if (b
->execution_model
== SpvExecutionModelFragment
&&
915 var
->data
.mode
== nir_var_shader_out
) {
916 var
->data
.location
+= FRAG_RESULT_DATA0
;
917 } else if (b
->execution_model
== SpvExecutionModelVertex
&&
918 var
->data
.mode
== nir_var_shader_in
) {
919 var
->data
.location
+= VERT_ATTRIB_GENERIC0
;
920 } else if (var
->data
.mode
== nir_var_shader_in
||
921 var
->data
.mode
== nir_var_shader_out
) {
922 var
->data
.location
+= VARYING_SLOT_VAR0
;
925 switch (var
->data
.mode
) {
926 case nir_var_shader_in
:
927 exec_list_push_tail(&b
->shader
->inputs
, &var
->node
);
929 case nir_var_shader_out
:
930 exec_list_push_tail(&b
->shader
->outputs
, &var
->node
);
933 exec_list_push_tail(&b
->shader
->globals
, &var
->node
);
936 exec_list_push_tail(&b
->impl
->locals
, &var
->node
);
938 case nir_var_uniform
:
939 exec_list_push_tail(&b
->shader
->uniforms
, &var
->node
);
941 case nir_var_system_value
:
942 exec_list_push_tail(&b
->shader
->system_values
, &var
->node
);
948 case SpvOpAccessChain
:
949 case SpvOpInBoundsAccessChain
: {
950 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_deref
);
951 nir_deref_var
*base
= vtn_value(b
, w
[3], vtn_value_type_deref
)->deref
;
952 val
->deref
= nir_deref_as_var(nir_copy_deref(b
, &base
->deref
));
954 nir_deref
*tail
= &val
->deref
->deref
;
958 for (unsigned i
= 0; i
< count
- 4; i
++) {
959 assert(w
[i
+ 4] < b
->value_id_bound
);
960 struct vtn_value
*idx_val
= &b
->values
[w
[i
+ 4]];
962 enum glsl_base_type base_type
= glsl_get_base_type(tail
->type
);
966 case GLSL_TYPE_FLOAT
:
967 case GLSL_TYPE_DOUBLE
:
969 case GLSL_TYPE_ARRAY
: {
970 nir_deref_array
*deref_arr
= nir_deref_array_create(b
);
971 if (base_type
== GLSL_TYPE_ARRAY
) {
972 deref_arr
->deref
.type
= glsl_get_array_element(tail
->type
);
973 } else if (glsl_type_is_matrix(tail
->type
)) {
974 deref_arr
->deref
.type
= glsl_get_column_type(tail
->type
);
976 assert(glsl_type_is_vector(tail
->type
));
977 deref_arr
->deref
.type
= glsl_scalar_type(base_type
);
980 if (idx_val
->value_type
== vtn_value_type_constant
) {
981 unsigned idx
= idx_val
->constant
->value
.u
[0];
982 deref_arr
->deref_array_type
= nir_deref_array_type_direct
;
983 deref_arr
->base_offset
= idx
;
985 assert(idx_val
->value_type
== vtn_value_type_ssa
);
986 deref_arr
->deref_array_type
= nir_deref_array_type_indirect
;
987 deref_arr
->base_offset
= 0;
988 deref_arr
->indirect
=
989 nir_src_for_ssa(vtn_ssa_value(b
, w
[1])->def
);
991 tail
->child
= &deref_arr
->deref
;
995 case GLSL_TYPE_STRUCT
: {
996 assert(idx_val
->value_type
== vtn_value_type_constant
);
997 unsigned idx
= idx_val
->constant
->value
.u
[0];
998 nir_deref_struct
*deref_struct
= nir_deref_struct_create(b
, idx
);
999 deref_struct
->deref
.type
= glsl_get_struct_field(tail
->type
, idx
);
1000 tail
->child
= &deref_struct
->deref
;
1004 unreachable("Invalid type for deref");
1011 case SpvOpCopyMemory
: {
1012 nir_deref_var
*dest
= vtn_value(b
, w
[1], vtn_value_type_deref
)->deref
;
1013 nir_deref_var
*src
= vtn_value(b
, w
[2], vtn_value_type_deref
)->deref
;
1015 vtn_variable_copy(b
, src
, dest
);
1020 nir_deref_var
*src
= vtn_value(b
, w
[3], vtn_value_type_deref
)->deref
;
1021 const struct glsl_type
*src_type
= nir_deref_tail(&src
->deref
)->type
;
1023 if (glsl_get_base_type(src_type
) == GLSL_TYPE_SAMPLER
) {
1024 vtn_push_value(b
, w
[2], vtn_value_type_deref
)->deref
= src
;
1028 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
1029 val
->ssa
= vtn_variable_load(b
, src
);
1034 nir_deref_var
*dest
= vtn_value(b
, w
[1], vtn_value_type_deref
)->deref
;
1035 struct vtn_ssa_value
*src
= vtn_ssa_value(b
, w
[2]);
1036 vtn_variable_store(b
, src
, dest
);
1040 case SpvOpVariableArray
:
1041 case SpvOpCopyMemorySized
:
1042 case SpvOpArrayLength
:
1043 case SpvOpImagePointer
:
1045 unreachable("Unhandled opcode");
1050 vtn_handle_function_call(struct vtn_builder
*b
, SpvOp opcode
,
1051 const uint32_t *w
, unsigned count
)
1053 unreachable("Unhandled opcode");
1057 vtn_tex_src(struct vtn_builder
*b
, unsigned index
, nir_tex_src_type type
)
1060 src
.src
= nir_src_for_ssa(vtn_value(b
, index
, vtn_value_type_ssa
)->ssa
->def
);
1061 src
.src_type
= type
;
1066 vtn_handle_texture(struct vtn_builder
*b
, SpvOp opcode
,
1067 const uint32_t *w
, unsigned count
)
1069 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
1070 nir_deref_var
*sampler
= vtn_value(b
, w
[3], vtn_value_type_deref
)->deref
;
1072 nir_tex_src srcs
[8]; /* 8 should be enough */
1073 nir_tex_src
*p
= srcs
;
1075 unsigned coord_components
= 0;
1077 case SpvOpTextureSample
:
1078 case SpvOpTextureSampleDref
:
1079 case SpvOpTextureSampleLod
:
1080 case SpvOpTextureSampleProj
:
1081 case SpvOpTextureSampleGrad
:
1082 case SpvOpTextureSampleOffset
:
1083 case SpvOpTextureSampleProjLod
:
1084 case SpvOpTextureSampleProjGrad
:
1085 case SpvOpTextureSampleLodOffset
:
1086 case SpvOpTextureSampleProjOffset
:
1087 case SpvOpTextureSampleGradOffset
:
1088 case SpvOpTextureSampleProjLodOffset
:
1089 case SpvOpTextureSampleProjGradOffset
:
1090 case SpvOpTextureFetchTexelLod
:
1091 case SpvOpTextureFetchTexelOffset
:
1092 case SpvOpTextureFetchSample
:
1093 case SpvOpTextureFetchTexel
:
1094 case SpvOpTextureGather
:
1095 case SpvOpTextureGatherOffset
:
1096 case SpvOpTextureGatherOffsets
:
1097 case SpvOpTextureQueryLod
: {
1098 /* All these types have the coordinate as their first real argument */
1099 struct vtn_ssa_value
*coord
= vtn_ssa_value(b
, w
[4]);
1100 coord_components
= glsl_get_vector_elements(coord
->type
);
1101 p
->src
= nir_src_for_ssa(coord
->def
);
1102 p
->src_type
= nir_tex_src_coord
;
1113 case SpvOpTextureSample
:
1114 texop
= nir_texop_tex
;
1117 texop
= nir_texop_txb
;
1118 *p
++ = vtn_tex_src(b
, w
[5], nir_tex_src_bias
);
1122 case SpvOpTextureSampleDref
:
1123 case SpvOpTextureSampleLod
:
1124 case SpvOpTextureSampleProj
:
1125 case SpvOpTextureSampleGrad
:
1126 case SpvOpTextureSampleOffset
:
1127 case SpvOpTextureSampleProjLod
:
1128 case SpvOpTextureSampleProjGrad
:
1129 case SpvOpTextureSampleLodOffset
:
1130 case SpvOpTextureSampleProjOffset
:
1131 case SpvOpTextureSampleGradOffset
:
1132 case SpvOpTextureSampleProjLodOffset
:
1133 case SpvOpTextureSampleProjGradOffset
:
1134 case SpvOpTextureFetchTexelLod
:
1135 case SpvOpTextureFetchTexelOffset
:
1136 case SpvOpTextureFetchSample
:
1137 case SpvOpTextureFetchTexel
:
1138 case SpvOpTextureGather
:
1139 case SpvOpTextureGatherOffset
:
1140 case SpvOpTextureGatherOffsets
:
1141 case SpvOpTextureQuerySizeLod
:
1142 case SpvOpTextureQuerySize
:
1143 case SpvOpTextureQueryLod
:
1144 case SpvOpTextureQueryLevels
:
1145 case SpvOpTextureQuerySamples
:
1147 unreachable("Unhandled opcode");
1150 nir_tex_instr
*instr
= nir_tex_instr_create(b
->shader
, p
- srcs
);
1152 const struct glsl_type
*sampler_type
= nir_deref_tail(&sampler
->deref
)->type
;
1153 instr
->sampler_dim
= glsl_get_sampler_dim(sampler_type
);
1155 switch (glsl_get_sampler_result_type(sampler_type
)) {
1156 case GLSL_TYPE_FLOAT
: instr
->dest_type
= nir_type_float
; break;
1157 case GLSL_TYPE_INT
: instr
->dest_type
= nir_type_int
; break;
1158 case GLSL_TYPE_UINT
: instr
->dest_type
= nir_type_unsigned
; break;
1159 case GLSL_TYPE_BOOL
: instr
->dest_type
= nir_type_bool
; break;
1161 unreachable("Invalid base type for sampler result");
1165 memcpy(instr
->src
, srcs
, instr
->num_srcs
* sizeof(*instr
->src
));
1166 instr
->coord_components
= coord_components
;
1167 instr
->is_array
= glsl_sampler_type_is_array(sampler_type
);
1168 instr
->is_shadow
= glsl_sampler_type_is_shadow(sampler_type
);
1170 instr
->sampler
= nir_deref_as_var(nir_copy_deref(instr
, &sampler
->deref
));
1172 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
, 4, NULL
);
1173 val
->ssa
->def
= &instr
->dest
.ssa
;
1174 val
->ssa
->type
= val
->type
;
1176 nir_builder_instr_insert(&b
->nb
, &instr
->instr
);
1179 static struct vtn_ssa_value
*
1180 vtn_create_ssa_value(struct vtn_builder
*b
, const struct glsl_type
*type
)
1182 struct vtn_ssa_value
*val
= rzalloc(b
, struct vtn_ssa_value
);
1185 if (!glsl_type_is_vector_or_scalar(type
)) {
1186 unsigned elems
= glsl_get_length(type
);
1187 val
->elems
= ralloc_array(b
, struct vtn_ssa_value
*, elems
);
1188 for (unsigned i
= 0; i
< elems
; i
++) {
1189 const struct glsl_type
*child_type
;
1191 switch (glsl_get_base_type(type
)) {
1193 case GLSL_TYPE_UINT
:
1194 case GLSL_TYPE_BOOL
:
1195 case GLSL_TYPE_FLOAT
:
1196 case GLSL_TYPE_DOUBLE
:
1197 child_type
= glsl_get_column_type(type
);
1199 case GLSL_TYPE_ARRAY
:
1200 child_type
= glsl_get_array_element(type
);
1202 case GLSL_TYPE_STRUCT
:
1203 child_type
= glsl_get_struct_field(type
, i
);
1206 unreachable("unkown base type");
1209 val
->elems
[i
] = vtn_create_ssa_value(b
, child_type
);
1216 static nir_alu_instr
*
1217 create_vec(void *mem_ctx
, unsigned num_components
)
1220 switch (num_components
) {
1221 case 1: op
= nir_op_fmov
; break;
1222 case 2: op
= nir_op_vec2
; break;
1223 case 3: op
= nir_op_vec3
; break;
1224 case 4: op
= nir_op_vec4
; break;
1225 default: unreachable("bad vector size");
1228 nir_alu_instr
*vec
= nir_alu_instr_create(mem_ctx
, op
);
1229 nir_ssa_dest_init(&vec
->instr
, &vec
->dest
.dest
, num_components
, NULL
);
1234 static struct vtn_ssa_value
*
1235 vtn_transpose(struct vtn_builder
*b
, struct vtn_ssa_value
*src
)
1237 if (src
->transposed
)
1238 return src
->transposed
;
1240 struct vtn_ssa_value
*dest
=
1241 vtn_create_ssa_value(b
, glsl_transposed_type(src
->type
));
1243 for (unsigned i
= 0; i
< glsl_get_matrix_columns(dest
->type
); i
++) {
1244 nir_alu_instr
*vec
= create_vec(b
, glsl_get_matrix_columns(src
->type
));
1245 if (glsl_type_is_vector_or_scalar(src
->type
)) {
1246 vec
->src
[0].src
= nir_src_for_ssa(src
->def
);
1247 vec
->src
[0].swizzle
[0] = i
;
1249 for (unsigned j
= 0; j
< glsl_get_matrix_columns(src
->type
); j
++) {
1250 vec
->src
[j
].src
= nir_src_for_ssa(src
->elems
[j
]->def
);
1251 vec
->src
[j
].swizzle
[0] = i
;
1254 nir_builder_instr_insert(&b
->nb
, &vec
->instr
);
1255 dest
->elems
[i
]->def
= &vec
->dest
.dest
.ssa
;
1258 dest
->transposed
= src
;
1264 * Normally, column vectors in SPIR-V correspond to a single NIR SSA
1265 * definition. But for matrix multiplies, we want to do one routine for
1266 * multiplying a matrix by a matrix and then pretend that vectors are matrices
1267 * with one column. So we "wrap" these things, and unwrap the result before we
1271 static struct vtn_ssa_value
*
1272 vtn_wrap_matrix(struct vtn_builder
*b
, struct vtn_ssa_value
*val
)
1277 if (glsl_type_is_matrix(val
->type
))
1280 struct vtn_ssa_value
*dest
= rzalloc(b
, struct vtn_ssa_value
);
1281 dest
->type
= val
->type
;
1282 dest
->elems
= ralloc_array(b
, struct vtn_ssa_value
*, 1);
1283 dest
->elems
[0] = val
;
1288 static struct vtn_ssa_value
*
1289 vtn_unwrap_matrix(struct vtn_ssa_value
*val
)
1291 if (glsl_type_is_matrix(val
->type
))
1294 return val
->elems
[0];
1297 static struct vtn_ssa_value
*
1298 vtn_matrix_multiply(struct vtn_builder
*b
,
1299 struct vtn_ssa_value
*_src0
, struct vtn_ssa_value
*_src1
)
1302 struct vtn_ssa_value
*src0
= vtn_wrap_matrix(b
, _src0
);
1303 struct vtn_ssa_value
*src1
= vtn_wrap_matrix(b
, _src1
);
1304 struct vtn_ssa_value
*src0_transpose
= vtn_wrap_matrix(b
, _src0
->transposed
);
1305 struct vtn_ssa_value
*src1_transpose
= vtn_wrap_matrix(b
, _src1
->transposed
);
1307 unsigned src0_rows
= glsl_get_vector_elements(src0
->type
);
1308 unsigned src0_columns
= glsl_get_matrix_columns(src0
->type
);
1309 unsigned src1_columns
= glsl_get_matrix_columns(src1
->type
);
1311 struct vtn_ssa_value
*dest
=
1312 vtn_create_ssa_value(b
, glsl_matrix_type(glsl_get_base_type(src0
->type
),
1313 src0_rows
, src1_columns
));
1315 dest
= vtn_wrap_matrix(b
, dest
);
1317 bool transpose_result
= false;
1318 if (src0_transpose
&& src1_transpose
) {
1319 /* transpose(A) * transpose(B) = transpose(B * A) */
1320 src1
= src0_transpose
;
1321 src0
= src1_transpose
;
1322 src0_transpose
= NULL
;
1323 src1_transpose
= NULL
;
1324 transpose_result
= true;
1327 if (src0_transpose
&& !src1_transpose
&&
1328 glsl_get_base_type(src0
->type
) == GLSL_TYPE_FLOAT
) {
1329 /* We already have the rows of src0 and the columns of src1 available,
1330 * so we can just take the dot product of each row with each column to
1334 for (unsigned i
= 0; i
< src1_columns
; i
++) {
1335 nir_alu_instr
*vec
= create_vec(b
, src0_rows
);
1336 for (unsigned j
= 0; j
< src0_rows
; j
++) {
1338 nir_src_for_ssa(nir_fdot(&b
->nb
, src0_transpose
->elems
[j
]->def
,
1339 src1
->elems
[i
]->def
));
1342 nir_builder_instr_insert(&b
->nb
, &vec
->instr
);
1343 dest
->elems
[i
]->def
= &vec
->dest
.dest
.ssa
;
1346 /* We don't handle the case where src1 is transposed but not src0, since
1347 * the general case only uses individual components of src1 so the
1348 * optimizer should chew through the transpose we emitted for src1.
1351 for (unsigned i
= 0; i
< src1_columns
; i
++) {
1352 /* dest[i] = sum(src0[j] * src1[i][j] for all j) */
1353 dest
->elems
[i
]->def
=
1354 nir_fmul(&b
->nb
, src0
->elems
[0]->def
,
1355 vtn_vector_extract(b
, src1
->elems
[i
]->def
, 0));
1356 for (unsigned j
= 1; j
< src0_columns
; j
++) {
1357 dest
->elems
[i
]->def
=
1358 nir_fadd(&b
->nb
, dest
->elems
[i
]->def
,
1359 nir_fmul(&b
->nb
, src0
->elems
[j
]->def
,
1360 vtn_vector_extract(b
,
1361 src1
->elems
[i
]->def
, j
)));
1366 dest
= vtn_unwrap_matrix(dest
);
1368 if (transpose_result
)
1369 dest
= vtn_transpose(b
, dest
);
1374 static struct vtn_ssa_value
*
1375 vtn_mat_times_scalar(struct vtn_builder
*b
,
1376 struct vtn_ssa_value
*mat
,
1377 nir_ssa_def
*scalar
)
1379 struct vtn_ssa_value
*dest
= vtn_create_ssa_value(b
, mat
->type
);
1380 for (unsigned i
= 0; i
< glsl_get_matrix_columns(mat
->type
); i
++) {
1381 if (glsl_get_base_type(mat
->type
) == GLSL_TYPE_FLOAT
)
1382 dest
->elems
[i
]->def
= nir_fmul(&b
->nb
, mat
->elems
[i
]->def
, scalar
);
1384 dest
->elems
[i
]->def
= nir_imul(&b
->nb
, mat
->elems
[i
]->def
, scalar
);
1391 vtn_handle_matrix_alu(struct vtn_builder
*b
, SpvOp opcode
,
1392 const uint32_t *w
, unsigned count
)
1394 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
1395 val
->type
= vtn_value(b
, w
[1], vtn_value_type_type
)->type
;
1398 case SpvOpTranspose
: {
1399 struct vtn_ssa_value
*src
= vtn_ssa_value(b
, w
[3]);
1400 val
->ssa
= vtn_transpose(b
, src
);
1404 case SpvOpOuterProduct
: {
1405 struct vtn_ssa_value
*src0
= vtn_ssa_value(b
, w
[3]);
1406 struct vtn_ssa_value
*src1
= vtn_ssa_value(b
, w
[4]);
1408 val
->ssa
= vtn_matrix_multiply(b
, src0
, vtn_transpose(b
, src1
));
1412 case SpvOpMatrixTimesScalar
: {
1413 struct vtn_ssa_value
*mat
= vtn_ssa_value(b
, w
[3]);
1414 struct vtn_ssa_value
*scalar
= vtn_ssa_value(b
, w
[4]);
1416 if (mat
->transposed
) {
1417 val
->ssa
= vtn_transpose(b
, vtn_mat_times_scalar(b
, mat
->transposed
,
1420 val
->ssa
= vtn_mat_times_scalar(b
, mat
, scalar
->def
);
1425 case SpvOpVectorTimesMatrix
:
1426 case SpvOpMatrixTimesVector
:
1427 case SpvOpMatrixTimesMatrix
: {
1428 struct vtn_ssa_value
*src0
= vtn_ssa_value(b
, w
[3]);
1429 struct vtn_ssa_value
*src1
= vtn_ssa_value(b
, w
[4]);
1431 val
->ssa
= vtn_matrix_multiply(b
, src0
, src1
);
1435 default: unreachable("unknown matrix opcode");
1440 vtn_handle_alu(struct vtn_builder
*b
, SpvOp opcode
,
1441 const uint32_t *w
, unsigned count
)
1443 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
1444 val
->type
= vtn_value(b
, w
[1], vtn_value_type_type
)->type
;
1445 val
->ssa
= vtn_create_ssa_value(b
, val
->type
);
1447 /* Collect the various SSA sources */
1448 unsigned num_inputs
= count
- 3;
1449 nir_ssa_def
*src
[4];
1450 for (unsigned i
= 0; i
< num_inputs
; i
++)
1451 src
[i
] = vtn_ssa_value(b
, w
[i
+ 3])->def
;
1453 /* Indicates that the first two arguments should be swapped. This is
1454 * used for implementing greater-than and less-than-or-equal.
1460 /* Basic ALU operations */
1461 case SpvOpSNegate
: op
= nir_op_ineg
; break;
1462 case SpvOpFNegate
: op
= nir_op_fneg
; break;
1463 case SpvOpNot
: op
= nir_op_inot
; break;
1466 switch (src
[0]->num_components
) {
1467 case 1: op
= nir_op_imov
; break;
1468 case 2: op
= nir_op_bany2
; break;
1469 case 3: op
= nir_op_bany3
; break;
1470 case 4: op
= nir_op_bany4
; break;
1475 switch (src
[0]->num_components
) {
1476 case 1: op
= nir_op_imov
; break;
1477 case 2: op
= nir_op_ball2
; break;
1478 case 3: op
= nir_op_ball3
; break;
1479 case 4: op
= nir_op_ball4
; break;
1483 case SpvOpIAdd
: op
= nir_op_iadd
; break;
1484 case SpvOpFAdd
: op
= nir_op_fadd
; break;
1485 case SpvOpISub
: op
= nir_op_isub
; break;
1486 case SpvOpFSub
: op
= nir_op_fsub
; break;
1487 case SpvOpIMul
: op
= nir_op_imul
; break;
1488 case SpvOpFMul
: op
= nir_op_fmul
; break;
1489 case SpvOpUDiv
: op
= nir_op_udiv
; break;
1490 case SpvOpSDiv
: op
= nir_op_idiv
; break;
1491 case SpvOpFDiv
: op
= nir_op_fdiv
; break;
1492 case SpvOpUMod
: op
= nir_op_umod
; break;
1493 case SpvOpSMod
: op
= nir_op_umod
; break; /* FIXME? */
1494 case SpvOpFMod
: op
= nir_op_fmod
; break;
1497 assert(src
[0]->num_components
== src
[1]->num_components
);
1498 switch (src
[0]->num_components
) {
1499 case 1: op
= nir_op_fmul
; break;
1500 case 2: op
= nir_op_fdot2
; break;
1501 case 3: op
= nir_op_fdot3
; break;
1502 case 4: op
= nir_op_fdot4
; break;
1506 case SpvOpShiftRightLogical
: op
= nir_op_ushr
; break;
1507 case SpvOpShiftRightArithmetic
: op
= nir_op_ishr
; break;
1508 case SpvOpShiftLeftLogical
: op
= nir_op_ishl
; break;
1509 case SpvOpLogicalOr
: op
= nir_op_ior
; break;
1510 case SpvOpLogicalXor
: op
= nir_op_ixor
; break;
1511 case SpvOpLogicalAnd
: op
= nir_op_iand
; break;
1512 case SpvOpBitwiseOr
: op
= nir_op_ior
; break;
1513 case SpvOpBitwiseXor
: op
= nir_op_ixor
; break;
1514 case SpvOpBitwiseAnd
: op
= nir_op_iand
; break;
1515 case SpvOpSelect
: op
= nir_op_bcsel
; break;
1516 case SpvOpIEqual
: op
= nir_op_ieq
; break;
1518 /* Comparisons: (TODO: How do we want to handled ordered/unordered?) */
1519 case SpvOpFOrdEqual
: op
= nir_op_feq
; break;
1520 case SpvOpFUnordEqual
: op
= nir_op_feq
; break;
1521 case SpvOpINotEqual
: op
= nir_op_ine
; break;
1522 case SpvOpFOrdNotEqual
: op
= nir_op_fne
; break;
1523 case SpvOpFUnordNotEqual
: op
= nir_op_fne
; break;
1524 case SpvOpULessThan
: op
= nir_op_ult
; break;
1525 case SpvOpSLessThan
: op
= nir_op_ilt
; break;
1526 case SpvOpFOrdLessThan
: op
= nir_op_flt
; break;
1527 case SpvOpFUnordLessThan
: op
= nir_op_flt
; break;
1528 case SpvOpUGreaterThan
: op
= nir_op_ult
; swap
= true; break;
1529 case SpvOpSGreaterThan
: op
= nir_op_ilt
; swap
= true; break;
1530 case SpvOpFOrdGreaterThan
: op
= nir_op_flt
; swap
= true; break;
1531 case SpvOpFUnordGreaterThan
: op
= nir_op_flt
; swap
= true; break;
1532 case SpvOpULessThanEqual
: op
= nir_op_uge
; swap
= true; break;
1533 case SpvOpSLessThanEqual
: op
= nir_op_ige
; swap
= true; break;
1534 case SpvOpFOrdLessThanEqual
: op
= nir_op_fge
; swap
= true; break;
1535 case SpvOpFUnordLessThanEqual
: op
= nir_op_fge
; swap
= true; break;
1536 case SpvOpUGreaterThanEqual
: op
= nir_op_uge
; break;
1537 case SpvOpSGreaterThanEqual
: op
= nir_op_ige
; break;
1538 case SpvOpFOrdGreaterThanEqual
: op
= nir_op_fge
; break;
1539 case SpvOpFUnordGreaterThanEqual
:op
= nir_op_fge
; break;
1542 case SpvOpConvertFToU
: op
= nir_op_f2u
; break;
1543 case SpvOpConvertFToS
: op
= nir_op_f2i
; break;
1544 case SpvOpConvertSToF
: op
= nir_op_i2f
; break;
1545 case SpvOpConvertUToF
: op
= nir_op_u2f
; break;
1546 case SpvOpBitcast
: op
= nir_op_imov
; break;
1549 op
= nir_op_imov
; /* TODO: NIR is 32-bit only; these are no-ops. */
1556 case SpvOpDPdx
: op
= nir_op_fddx
; break;
1557 case SpvOpDPdy
: op
= nir_op_fddy
; break;
1558 case SpvOpDPdxFine
: op
= nir_op_fddx_fine
; break;
1559 case SpvOpDPdyFine
: op
= nir_op_fddy_fine
; break;
1560 case SpvOpDPdxCoarse
: op
= nir_op_fddx_coarse
; break;
1561 case SpvOpDPdyCoarse
: op
= nir_op_fddy_coarse
; break;
1563 val
->ssa
->def
= nir_fadd(&b
->nb
,
1564 nir_fabs(&b
->nb
, nir_fddx(&b
->nb
, src
[0])),
1565 nir_fabs(&b
->nb
, nir_fddx(&b
->nb
, src
[1])));
1567 case SpvOpFwidthFine
:
1568 val
->ssa
->def
= nir_fadd(&b
->nb
,
1569 nir_fabs(&b
->nb
, nir_fddx_fine(&b
->nb
, src
[0])),
1570 nir_fabs(&b
->nb
, nir_fddx_fine(&b
->nb
, src
[1])));
1572 case SpvOpFwidthCoarse
:
1573 val
->ssa
->def
= nir_fadd(&b
->nb
,
1574 nir_fabs(&b
->nb
, nir_fddx_coarse(&b
->nb
, src
[0])),
1575 nir_fabs(&b
->nb
, nir_fddx_coarse(&b
->nb
, src
[1])));
1578 case SpvOpVectorTimesScalar
:
1579 /* The builder will take care of splatting for us. */
1580 val
->ssa
->def
= nir_fmul(&b
->nb
, src
[0], src
[1]);
1585 unreachable("No NIR equivalent");
1591 case SpvOpSignBitSet
:
1592 case SpvOpLessOrGreater
:
1594 case SpvOpUnordered
:
1596 unreachable("Unhandled opcode");
1600 nir_ssa_def
*tmp
= src
[0];
1605 nir_alu_instr
*instr
= nir_alu_instr_create(b
->shader
, op
);
1606 nir_ssa_dest_init(&instr
->instr
, &instr
->dest
.dest
,
1607 glsl_get_vector_elements(val
->type
), val
->name
);
1608 val
->ssa
->def
= &instr
->dest
.dest
.ssa
;
1610 for (unsigned i
= 0; i
< nir_op_infos
[op
].num_inputs
; i
++)
1611 instr
->src
[i
].src
= nir_src_for_ssa(src
[i
]);
1613 nir_builder_instr_insert(&b
->nb
, &instr
->instr
);
1616 static nir_ssa_def
*
1617 vtn_vector_extract(struct vtn_builder
*b
, nir_ssa_def
*src
, unsigned index
)
1619 unsigned swiz
[4] = { index
};
1620 return nir_swizzle(&b
->nb
, src
, swiz
, 1, true);
1624 static nir_ssa_def
*
1625 vtn_vector_insert(struct vtn_builder
*b
, nir_ssa_def
*src
, nir_ssa_def
*insert
,
1628 nir_alu_instr
*vec
= create_vec(b
->shader
, src
->num_components
);
1630 for (unsigned i
= 0; i
< src
->num_components
; i
++) {
1632 vec
->src
[i
].src
= nir_src_for_ssa(insert
);
1634 vec
->src
[i
].src
= nir_src_for_ssa(src
);
1635 vec
->src
[i
].swizzle
[0] = i
;
1639 nir_builder_instr_insert(&b
->nb
, &vec
->instr
);
1641 return &vec
->dest
.dest
.ssa
;
1644 static nir_ssa_def
*
1645 vtn_vector_extract_dynamic(struct vtn_builder
*b
, nir_ssa_def
*src
,
1648 nir_ssa_def
*dest
= vtn_vector_extract(b
, src
, 0);
1649 for (unsigned i
= 1; i
< src
->num_components
; i
++)
1650 dest
= nir_bcsel(&b
->nb
, nir_ieq(&b
->nb
, index
, nir_imm_int(&b
->nb
, i
)),
1651 vtn_vector_extract(b
, src
, i
), dest
);
1656 static nir_ssa_def
*
1657 vtn_vector_insert_dynamic(struct vtn_builder
*b
, nir_ssa_def
*src
,
1658 nir_ssa_def
*insert
, nir_ssa_def
*index
)
1660 nir_ssa_def
*dest
= vtn_vector_insert(b
, src
, insert
, 0);
1661 for (unsigned i
= 1; i
< src
->num_components
; i
++)
1662 dest
= nir_bcsel(&b
->nb
, nir_ieq(&b
->nb
, index
, nir_imm_int(&b
->nb
, i
)),
1663 vtn_vector_insert(b
, src
, insert
, i
), dest
);
1668 static nir_ssa_def
*
1669 vtn_vector_shuffle(struct vtn_builder
*b
, unsigned num_components
,
1670 nir_ssa_def
*src0
, nir_ssa_def
*src1
,
1671 const uint32_t *indices
)
1673 nir_alu_instr
*vec
= create_vec(b
->shader
, num_components
);
1675 nir_ssa_undef_instr
*undef
= nir_ssa_undef_instr_create(b
->shader
, 1);
1676 nir_builder_instr_insert(&b
->nb
, &undef
->instr
);
1678 for (unsigned i
= 0; i
< num_components
; i
++) {
1679 uint32_t index
= indices
[i
];
1680 if (index
== 0xffffffff) {
1681 vec
->src
[i
].src
= nir_src_for_ssa(&undef
->def
);
1682 } else if (index
< src0
->num_components
) {
1683 vec
->src
[i
].src
= nir_src_for_ssa(src0
);
1684 vec
->src
[i
].swizzle
[0] = index
;
1686 vec
->src
[i
].src
= nir_src_for_ssa(src1
);
1687 vec
->src
[i
].swizzle
[0] = index
- src0
->num_components
;
1691 nir_builder_instr_insert(&b
->nb
, &vec
->instr
);
1693 return &vec
->dest
.dest
.ssa
;
1697 * Concatentates a number of vectors/scalars together to produce a vector
1699 static nir_ssa_def
*
1700 vtn_vector_construct(struct vtn_builder
*b
, unsigned num_components
,
1701 unsigned num_srcs
, nir_ssa_def
**srcs
)
1703 nir_alu_instr
*vec
= create_vec(b
->shader
, num_components
);
1705 unsigned dest_idx
= 0;
1706 for (unsigned i
= 0; i
< num_srcs
; i
++) {
1707 nir_ssa_def
*src
= srcs
[i
];
1708 for (unsigned j
= 0; j
< src
->num_components
; j
++) {
1709 vec
->src
[dest_idx
].src
= nir_src_for_ssa(src
);
1710 vec
->src
[dest_idx
].swizzle
[0] = j
;
1715 nir_builder_instr_insert(&b
->nb
, &vec
->instr
);
1717 return &vec
->dest
.dest
.ssa
;
1720 static struct vtn_ssa_value
*
1721 vtn_composite_copy(void *mem_ctx
, struct vtn_ssa_value
*src
)
1723 struct vtn_ssa_value
*dest
= rzalloc(mem_ctx
, struct vtn_ssa_value
);
1724 dest
->type
= src
->type
;
1726 if (glsl_type_is_vector_or_scalar(src
->type
)) {
1727 dest
->def
= src
->def
;
1729 unsigned elems
= glsl_get_length(src
->type
);
1731 dest
->elems
= ralloc_array(mem_ctx
, struct vtn_ssa_value
*, elems
);
1732 for (unsigned i
= 0; i
< elems
; i
++)
1733 dest
->elems
[i
] = vtn_composite_copy(mem_ctx
, src
->elems
[i
]);
1739 static struct vtn_ssa_value
*
1740 vtn_composite_insert(struct vtn_builder
*b
, struct vtn_ssa_value
*src
,
1741 struct vtn_ssa_value
*insert
, const uint32_t *indices
,
1742 unsigned num_indices
)
1744 struct vtn_ssa_value
*dest
= vtn_composite_copy(b
, src
);
1746 struct vtn_ssa_value
*cur
= dest
;
1748 for (i
= 0; i
< num_indices
- 1; i
++) {
1749 cur
= cur
->elems
[indices
[i
]];
1752 if (glsl_type_is_vector_or_scalar(cur
->type
)) {
1753 /* According to the SPIR-V spec, OpCompositeInsert may work down to
1754 * the component granularity. In that case, the last index will be
1755 * the index to insert the scalar into the vector.
1758 cur
->def
= vtn_vector_insert(b
, cur
->def
, insert
->def
, indices
[i
]);
1760 cur
->elems
[indices
[i
]] = insert
;
1766 static struct vtn_ssa_value
*
1767 vtn_composite_extract(struct vtn_builder
*b
, struct vtn_ssa_value
*src
,
1768 const uint32_t *indices
, unsigned num_indices
)
1770 struct vtn_ssa_value
*cur
= src
;
1771 for (unsigned i
= 0; i
< num_indices
; i
++) {
1772 if (glsl_type_is_vector_or_scalar(cur
->type
)) {
1773 assert(i
== num_indices
- 1);
1774 /* According to the SPIR-V spec, OpCompositeExtract may work down to
1775 * the component granularity. The last index will be the index of the
1776 * vector to extract.
1779 struct vtn_ssa_value
*ret
= rzalloc(b
, struct vtn_ssa_value
);
1780 ret
->type
= glsl_scalar_type(glsl_get_base_type(cur
->type
));
1781 ret
->def
= vtn_vector_extract(b
, cur
->def
, indices
[i
]);
1790 vtn_handle_composite(struct vtn_builder
*b
, SpvOp opcode
,
1791 const uint32_t *w
, unsigned count
)
1793 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
1794 val
->type
= vtn_value(b
, w
[1], vtn_value_type_type
)->type
;
1797 case SpvOpVectorExtractDynamic
:
1798 val
->ssa
->def
= vtn_vector_extract_dynamic(b
, vtn_ssa_value(b
, w
[3])->def
,
1799 vtn_ssa_value(b
, w
[4])->def
);
1802 case SpvOpVectorInsertDynamic
:
1803 val
->ssa
->def
= vtn_vector_insert_dynamic(b
, vtn_ssa_value(b
, w
[3])->def
,
1804 vtn_ssa_value(b
, w
[4])->def
,
1805 vtn_ssa_value(b
, w
[5])->def
);
1808 case SpvOpVectorShuffle
:
1809 val
->ssa
->def
= vtn_vector_shuffle(b
, glsl_get_vector_elements(val
->type
),
1810 vtn_ssa_value(b
, w
[3])->def
,
1811 vtn_ssa_value(b
, w
[4])->def
,
1815 case SpvOpCompositeConstruct
: {
1816 val
->ssa
= rzalloc(b
, struct vtn_ssa_value
);
1817 unsigned elems
= count
- 3;
1818 if (glsl_type_is_vector_or_scalar(val
->type
)) {
1819 nir_ssa_def
*srcs
[4];
1820 for (unsigned i
= 0; i
< elems
; i
++)
1821 srcs
[i
] = vtn_ssa_value(b
, w
[3 + i
])->def
;
1823 vtn_vector_construct(b
, glsl_get_vector_elements(val
->type
),
1826 val
->ssa
->elems
= ralloc_array(b
, struct vtn_ssa_value
*, elems
);
1827 for (unsigned i
= 0; i
< elems
; i
++)
1828 val
->ssa
->elems
[i
] = vtn_ssa_value(b
, w
[3 + i
]);
1832 case SpvOpCompositeExtract
:
1833 val
->ssa
= vtn_composite_extract(b
, vtn_ssa_value(b
, w
[3]),
1837 case SpvOpCompositeInsert
:
1838 val
->ssa
= vtn_composite_insert(b
, vtn_ssa_value(b
, w
[4]),
1839 vtn_ssa_value(b
, w
[3]),
1843 case SpvOpCopyObject
:
1844 val
->ssa
= vtn_composite_copy(b
, vtn_ssa_value(b
, w
[3]));
1848 unreachable("unknown composite operation");
1851 val
->ssa
->type
= val
->type
;
1855 vtn_phi_node_init(struct vtn_builder
*b
, struct vtn_ssa_value
*val
)
1857 if (glsl_type_is_vector_or_scalar(val
->type
)) {
1858 nir_phi_instr
*phi
= nir_phi_instr_create(b
->shader
);
1859 nir_ssa_dest_init(&phi
->instr
, &phi
->dest
,
1860 glsl_get_vector_elements(val
->type
), NULL
);
1861 exec_list_make_empty(&phi
->srcs
);
1862 nir_builder_instr_insert(&b
->nb
, &phi
->instr
);
1863 val
->def
= &phi
->dest
.ssa
;
1865 unsigned elems
= glsl_get_length(val
->type
);
1866 for (unsigned i
= 0; i
< elems
; i
++)
1867 vtn_phi_node_init(b
, val
->elems
[i
]);
1871 static struct vtn_ssa_value
*
1872 vtn_phi_node_create(struct vtn_builder
*b
, const struct glsl_type
*type
)
1874 struct vtn_ssa_value
*val
= vtn_create_ssa_value(b
, type
);
1875 vtn_phi_node_init(b
, val
);
1880 vtn_handle_phi_first_pass(struct vtn_builder
*b
, const uint32_t *w
)
1882 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_ssa
);
1883 val
->type
= vtn_value(b
, w
[1], vtn_value_type_type
)->type
;
1884 val
->ssa
= vtn_phi_node_create(b
, val
->type
);
1888 vtn_phi_node_add_src(struct vtn_ssa_value
*phi
, const nir_block
*pred
,
1889 struct vtn_ssa_value
*val
)
1891 assert(phi
->type
== val
->type
);
1892 if (glsl_type_is_vector_or_scalar(phi
->type
)) {
1893 nir_phi_instr
*phi_instr
= nir_instr_as_phi(phi
->def
->parent_instr
);
1894 nir_phi_src
*src
= ralloc(phi_instr
, nir_phi_src
);
1895 src
->pred
= (nir_block
*) pred
;
1896 src
->src
= nir_src_for_ssa(val
->def
);
1897 exec_list_push_tail(&phi_instr
->srcs
, &src
->node
);
1899 unsigned elems
= glsl_get_length(phi
->type
);
1900 for (unsigned i
= 0; i
< elems
; i
++)
1901 vtn_phi_node_add_src(phi
->elems
[i
], pred
, val
->elems
[i
]);
1905 static struct vtn_ssa_value
*
1906 vtn_get_phi_node_src(struct vtn_builder
*b
, nir_block
*block
,
1907 const struct glsl_type
*type
, const uint32_t *w
,
1910 struct hash_entry
*entry
= _mesa_hash_table_search(b
->block_table
, block
);
1912 struct vtn_block
*spv_block
= entry
->data
;
1913 for (unsigned off
= 4; off
< count
; off
+= 2) {
1914 if (spv_block
== vtn_value(b
, w
[off
], vtn_value_type_block
)->block
) {
1915 return vtn_ssa_value(b
, w
[off
- 1]);
1920 nir_builder_insert_before_block(&b
->nb
, block
);
1921 struct vtn_ssa_value
*phi
= vtn_phi_node_create(b
, type
);
1923 struct set_entry
*entry2
;
1924 set_foreach(block
->predecessors
, entry2
) {
1925 nir_block
*pred
= (nir_block
*) entry2
->key
;
1926 struct vtn_ssa_value
*val
= vtn_get_phi_node_src(b
, pred
, type
, w
,
1928 vtn_phi_node_add_src(phi
, pred
, val
);
1935 vtn_handle_phi_second_pass(struct vtn_builder
*b
, SpvOp opcode
,
1936 const uint32_t *w
, unsigned count
)
1938 if (opcode
== SpvOpLabel
) {
1939 b
->block
= vtn_value(b
, w
[1], vtn_value_type_block
)->block
;
1943 if (opcode
!= SpvOpPhi
)
1946 struct vtn_ssa_value
*phi
= vtn_value(b
, w
[2], vtn_value_type_ssa
)->ssa
;
1948 struct set_entry
*entry
;
1949 set_foreach(b
->block
->block
->predecessors
, entry
) {
1950 nir_block
*pred
= (nir_block
*) entry
->key
;
1952 struct vtn_ssa_value
*val
= vtn_get_phi_node_src(b
, pred
, phi
->type
, w
,
1954 vtn_phi_node_add_src(phi
, pred
, val
);
1961 vtn_handle_preamble_instruction(struct vtn_builder
*b
, SpvOp opcode
,
1962 const uint32_t *w
, unsigned count
)
1966 case SpvOpSourceExtension
:
1967 case SpvOpCompileFlag
:
1968 case SpvOpExtension
:
1969 /* Unhandled, but these are for debug so that's ok. */
1972 case SpvOpExtInstImport
:
1973 vtn_handle_extension(b
, opcode
, w
, count
);
1976 case SpvOpMemoryModel
:
1977 assert(w
[1] == SpvAddressingModelLogical
);
1978 assert(w
[2] == SpvMemoryModelGLSL450
);
1981 case SpvOpEntryPoint
:
1982 assert(b
->entry_point
== NULL
);
1983 b
->entry_point
= &b
->values
[w
[2]];
1984 b
->execution_model
= w
[1];
1987 case SpvOpExecutionMode
:
1988 unreachable("Execution modes not yet implemented");
1992 vtn_push_value(b
, w
[1], vtn_value_type_string
)->str
=
1993 vtn_string_literal(b
, &w
[2], count
- 2);
1997 b
->values
[w
[1]].name
= vtn_string_literal(b
, &w
[2], count
- 2);
2000 case SpvOpMemberName
:
2005 break; /* Ignored for now */
2007 case SpvOpDecorationGroup
:
2009 case SpvOpMemberDecorate
:
2010 case SpvOpGroupDecorate
:
2011 case SpvOpGroupMemberDecorate
:
2012 vtn_handle_decoration(b
, opcode
, w
, count
);
2018 case SpvOpTypeFloat
:
2019 case SpvOpTypeVector
:
2020 case SpvOpTypeMatrix
:
2021 case SpvOpTypeSampler
:
2022 case SpvOpTypeArray
:
2023 case SpvOpTypeRuntimeArray
:
2024 case SpvOpTypeStruct
:
2025 case SpvOpTypeOpaque
:
2026 case SpvOpTypePointer
:
2027 case SpvOpTypeFunction
:
2028 case SpvOpTypeEvent
:
2029 case SpvOpTypeDeviceEvent
:
2030 case SpvOpTypeReserveId
:
2031 case SpvOpTypeQueue
:
2033 vtn_handle_type(b
, opcode
, w
, count
);
2036 case SpvOpConstantTrue
:
2037 case SpvOpConstantFalse
:
2039 case SpvOpConstantComposite
:
2040 case SpvOpConstantSampler
:
2041 case SpvOpConstantNullPointer
:
2042 case SpvOpConstantNullObject
:
2043 case SpvOpSpecConstantTrue
:
2044 case SpvOpSpecConstantFalse
:
2045 case SpvOpSpecConstant
:
2046 case SpvOpSpecConstantComposite
:
2047 vtn_handle_constant(b
, opcode
, w
, count
);
2051 vtn_handle_variables(b
, opcode
, w
, count
);
2055 return false; /* End of preamble */
2062 vtn_handle_first_cfg_pass_instruction(struct vtn_builder
*b
, SpvOp opcode
,
2063 const uint32_t *w
, unsigned count
)
2066 case SpvOpFunction
: {
2067 assert(b
->func
== NULL
);
2068 b
->func
= rzalloc(b
, struct vtn_function
);
2070 const struct glsl_type
*result_type
=
2071 vtn_value(b
, w
[1], vtn_value_type_type
)->type
;
2072 struct vtn_value
*val
= vtn_push_value(b
, w
[2], vtn_value_type_function
);
2073 const struct glsl_type
*func_type
=
2074 vtn_value(b
, w
[4], vtn_value_type_type
)->type
;
2076 assert(glsl_get_function_return_type(func_type
) == result_type
);
2078 nir_function
*func
=
2079 nir_function_create(b
->shader
, ralloc_strdup(b
->shader
, val
->name
));
2081 nir_function_overload
*overload
= nir_function_overload_create(func
);
2082 overload
->num_params
= glsl_get_length(func_type
);
2083 overload
->params
= ralloc_array(overload
, nir_parameter
,
2084 overload
->num_params
);
2085 for (unsigned i
= 0; i
< overload
->num_params
; i
++) {
2086 const struct glsl_function_param
*param
=
2087 glsl_get_function_param(func_type
, i
);
2088 overload
->params
[i
].type
= param
->type
;
2091 overload
->params
[i
].param_type
= nir_parameter_inout
;
2093 overload
->params
[i
].param_type
= nir_parameter_in
;
2097 overload
->params
[i
].param_type
= nir_parameter_out
;
2099 assert(!"Parameter is neither in nor out");
2103 b
->func
->overload
= overload
;
2107 case SpvOpFunctionEnd
:
2112 case SpvOpFunctionParameter
:
2113 break; /* Does nothing */
2116 assert(b
->block
== NULL
);
2117 b
->block
= rzalloc(b
, struct vtn_block
);
2118 b
->block
->label
= w
;
2119 vtn_push_value(b
, w
[1], vtn_value_type_block
)->block
= b
->block
;
2121 if (b
->func
->start_block
== NULL
) {
2122 /* This is the first block encountered for this function. In this
2123 * case, we set the start block and add it to the list of
2124 * implemented functions that we'll walk later.
2126 b
->func
->start_block
= b
->block
;
2127 exec_list_push_tail(&b
->functions
, &b
->func
->node
);
2133 case SpvOpBranchConditional
:
2137 case SpvOpReturnValue
:
2138 case SpvOpUnreachable
:
2140 b
->block
->branch
= w
;
2144 case SpvOpSelectionMerge
:
2145 case SpvOpLoopMerge
:
2146 assert(b
->block
&& b
->block
->merge_op
== SpvOpNop
);
2147 b
->block
->merge_op
= opcode
;
2148 b
->block
->merge_block_id
= w
[1];
2152 /* Continue on as per normal */
2160 vtn_handle_body_instruction(struct vtn_builder
*b
, SpvOp opcode
,
2161 const uint32_t *w
, unsigned count
)
2165 struct vtn_block
*block
= vtn_value(b
, w
[1], vtn_value_type_block
)->block
;
2166 assert(block
->block
== NULL
);
2168 struct exec_node
*list_tail
= exec_list_get_tail(b
->nb
.cf_node_list
);
2169 nir_cf_node
*tail_node
= exec_node_data(nir_cf_node
, list_tail
, node
);
2170 assert(tail_node
->type
== nir_cf_node_block
);
2171 block
->block
= nir_cf_node_as_block(tail_node
);
2175 case SpvOpLoopMerge
:
2176 case SpvOpSelectionMerge
:
2177 /* This is handled by cfg pre-pass and walk_blocks */
2181 vtn_push_value(b
, w
[2], vtn_value_type_undef
);
2185 vtn_handle_extension(b
, opcode
, w
, count
);
2189 case SpvOpVariableArray
:
2192 case SpvOpCopyMemory
:
2193 case SpvOpCopyMemorySized
:
2194 case SpvOpAccessChain
:
2195 case SpvOpInBoundsAccessChain
:
2196 case SpvOpArrayLength
:
2197 case SpvOpImagePointer
:
2198 vtn_handle_variables(b
, opcode
, w
, count
);
2201 case SpvOpFunctionCall
:
2202 vtn_handle_function_call(b
, opcode
, w
, count
);
2205 case SpvOpTextureSample
:
2206 case SpvOpTextureSampleDref
:
2207 case SpvOpTextureSampleLod
:
2208 case SpvOpTextureSampleProj
:
2209 case SpvOpTextureSampleGrad
:
2210 case SpvOpTextureSampleOffset
:
2211 case SpvOpTextureSampleProjLod
:
2212 case SpvOpTextureSampleProjGrad
:
2213 case SpvOpTextureSampleLodOffset
:
2214 case SpvOpTextureSampleProjOffset
:
2215 case SpvOpTextureSampleGradOffset
:
2216 case SpvOpTextureSampleProjLodOffset
:
2217 case SpvOpTextureSampleProjGradOffset
:
2218 case SpvOpTextureFetchTexelLod
:
2219 case SpvOpTextureFetchTexelOffset
:
2220 case SpvOpTextureFetchSample
:
2221 case SpvOpTextureFetchTexel
:
2222 case SpvOpTextureGather
:
2223 case SpvOpTextureGatherOffset
:
2224 case SpvOpTextureGatherOffsets
:
2225 case SpvOpTextureQuerySizeLod
:
2226 case SpvOpTextureQuerySize
:
2227 case SpvOpTextureQueryLod
:
2228 case SpvOpTextureQueryLevels
:
2229 case SpvOpTextureQuerySamples
:
2230 vtn_handle_texture(b
, opcode
, w
, count
);
2238 case SpvOpConvertFToU
:
2239 case SpvOpConvertFToS
:
2240 case SpvOpConvertSToF
:
2241 case SpvOpConvertUToF
:
2245 case SpvOpConvertPtrToU
:
2246 case SpvOpConvertUToPtr
:
2247 case SpvOpPtrCastToGeneric
:
2248 case SpvOpGenericCastToPtr
:
2254 case SpvOpSignBitSet
:
2255 case SpvOpLessOrGreater
:
2257 case SpvOpUnordered
:
2272 case SpvOpVectorTimesScalar
:
2274 case SpvOpShiftRightLogical
:
2275 case SpvOpShiftRightArithmetic
:
2276 case SpvOpShiftLeftLogical
:
2277 case SpvOpLogicalOr
:
2278 case SpvOpLogicalXor
:
2279 case SpvOpLogicalAnd
:
2280 case SpvOpBitwiseOr
:
2281 case SpvOpBitwiseXor
:
2282 case SpvOpBitwiseAnd
:
2285 case SpvOpFOrdEqual
:
2286 case SpvOpFUnordEqual
:
2287 case SpvOpINotEqual
:
2288 case SpvOpFOrdNotEqual
:
2289 case SpvOpFUnordNotEqual
:
2290 case SpvOpULessThan
:
2291 case SpvOpSLessThan
:
2292 case SpvOpFOrdLessThan
:
2293 case SpvOpFUnordLessThan
:
2294 case SpvOpUGreaterThan
:
2295 case SpvOpSGreaterThan
:
2296 case SpvOpFOrdGreaterThan
:
2297 case SpvOpFUnordGreaterThan
:
2298 case SpvOpULessThanEqual
:
2299 case SpvOpSLessThanEqual
:
2300 case SpvOpFOrdLessThanEqual
:
2301 case SpvOpFUnordLessThanEqual
:
2302 case SpvOpUGreaterThanEqual
:
2303 case SpvOpSGreaterThanEqual
:
2304 case SpvOpFOrdGreaterThanEqual
:
2305 case SpvOpFUnordGreaterThanEqual
:
2311 case SpvOpFwidthFine
:
2312 case SpvOpDPdxCoarse
:
2313 case SpvOpDPdyCoarse
:
2314 case SpvOpFwidthCoarse
:
2315 vtn_handle_alu(b
, opcode
, w
, count
);
2318 case SpvOpTranspose
:
2319 case SpvOpOuterProduct
:
2320 case SpvOpMatrixTimesScalar
:
2321 case SpvOpVectorTimesMatrix
:
2322 case SpvOpMatrixTimesVector
:
2323 case SpvOpMatrixTimesMatrix
:
2324 vtn_handle_matrix_alu(b
, opcode
, w
, count
);
2327 case SpvOpVectorExtractDynamic
:
2328 case SpvOpVectorInsertDynamic
:
2329 case SpvOpVectorShuffle
:
2330 case SpvOpCompositeConstruct
:
2331 case SpvOpCompositeExtract
:
2332 case SpvOpCompositeInsert
:
2333 case SpvOpCopyObject
:
2334 vtn_handle_composite(b
, opcode
, w
, count
);
2338 vtn_handle_phi_first_pass(b
, w
);
2342 unreachable("Unhandled opcode");
2349 vtn_walk_blocks(struct vtn_builder
*b
, struct vtn_block
*start
,
2350 struct vtn_block
*break_block
, struct vtn_block
*cont_block
,
2351 struct vtn_block
*end_block
)
2353 struct vtn_block
*block
= start
;
2354 while (block
!= end_block
) {
2355 if (block
->merge_op
== SpvOpLoopMerge
) {
2356 /* This is the jump into a loop. */
2357 struct vtn_block
*new_cont_block
= block
;
2358 struct vtn_block
*new_break_block
=
2359 vtn_value(b
, block
->merge_block_id
, vtn_value_type_block
)->block
;
2361 nir_loop
*loop
= nir_loop_create(b
->shader
);
2362 nir_cf_node_insert_end(b
->nb
.cf_node_list
, &loop
->cf_node
);
2364 struct exec_list
*old_list
= b
->nb
.cf_node_list
;
2366 /* Reset the merge_op to prerevent infinite recursion */
2367 block
->merge_op
= SpvOpNop
;
2369 nir_builder_insert_after_cf_list(&b
->nb
, &loop
->body
);
2370 vtn_walk_blocks(b
, block
, new_break_block
, new_cont_block
, NULL
);
2372 nir_builder_insert_after_cf_list(&b
->nb
, old_list
);
2373 block
= new_break_block
;
2377 const uint32_t *w
= block
->branch
;
2378 SpvOp branch_op
= w
[0] & SpvOpCodeMask
;
2381 vtn_foreach_instruction(b
, block
->label
, block
->branch
,
2382 vtn_handle_body_instruction
);
2384 nir_cf_node
*cur_cf_node
=
2385 exec_node_data(nir_cf_node
, exec_list_get_tail(b
->nb
.cf_node_list
),
2387 nir_block
*cur_block
= nir_cf_node_as_block(cur_cf_node
);
2388 _mesa_hash_table_insert(b
->block_table
, cur_block
, block
);
2390 switch (branch_op
) {
2392 struct vtn_block
*branch_block
=
2393 vtn_value(b
, w
[1], vtn_value_type_block
)->block
;
2395 if (branch_block
== break_block
) {
2396 nir_jump_instr
*jump
= nir_jump_instr_create(b
->shader
,
2398 nir_builder_instr_insert(&b
->nb
, &jump
->instr
);
2401 } else if (branch_block
== cont_block
) {
2402 nir_jump_instr
*jump
= nir_jump_instr_create(b
->shader
,
2404 nir_builder_instr_insert(&b
->nb
, &jump
->instr
);
2407 } else if (branch_block
== end_block
) {
2408 /* We're branching to the merge block of an if, since for loops
2409 * and functions end_block == NULL, so we're done here.
2413 /* We're branching to another block, and according to the rules,
2414 * we can only branch to another block with one predecessor (so
2415 * we're the only one jumping to it) so we can just process it
2418 block
= branch_block
;
2423 case SpvOpBranchConditional
: {
2424 /* Gather up the branch blocks */
2425 struct vtn_block
*then_block
=
2426 vtn_value(b
, w
[2], vtn_value_type_block
)->block
;
2427 struct vtn_block
*else_block
=
2428 vtn_value(b
, w
[3], vtn_value_type_block
)->block
;
2430 nir_if
*if_stmt
= nir_if_create(b
->shader
);
2431 if_stmt
->condition
= nir_src_for_ssa(vtn_ssa_value(b
, w
[1])->def
);
2432 nir_cf_node_insert_end(b
->nb
.cf_node_list
, &if_stmt
->cf_node
);
2434 if (then_block
== break_block
) {
2435 nir_jump_instr
*jump
= nir_jump_instr_create(b
->shader
,
2437 nir_instr_insert_after_cf_list(&if_stmt
->then_list
,
2440 } else if (else_block
== break_block
) {
2441 nir_jump_instr
*jump
= nir_jump_instr_create(b
->shader
,
2443 nir_instr_insert_after_cf_list(&if_stmt
->else_list
,
2446 } else if (then_block
== cont_block
) {
2447 nir_jump_instr
*jump
= nir_jump_instr_create(b
->shader
,
2449 nir_instr_insert_after_cf_list(&if_stmt
->then_list
,
2452 } else if (else_block
== cont_block
) {
2453 nir_jump_instr
*jump
= nir_jump_instr_create(b
->shader
,
2455 nir_instr_insert_after_cf_list(&if_stmt
->else_list
,
2459 /* According to the rules we're branching to two blocks that don't
2460 * have any other predecessors, so we can handle this as a
2463 assert(block
->merge_op
== SpvOpSelectionMerge
);
2464 struct vtn_block
*merge_block
=
2465 vtn_value(b
, block
->merge_block_id
, vtn_value_type_block
)->block
;
2467 struct exec_list
*old_list
= b
->nb
.cf_node_list
;
2469 nir_builder_insert_after_cf_list(&b
->nb
, &if_stmt
->then_list
);
2470 vtn_walk_blocks(b
, then_block
, break_block
, cont_block
, merge_block
);
2472 nir_builder_insert_after_cf_list(&b
->nb
, &if_stmt
->else_list
);
2473 vtn_walk_blocks(b
, else_block
, break_block
, cont_block
, merge_block
);
2475 nir_builder_insert_after_cf_list(&b
->nb
, old_list
);
2476 block
= merge_block
;
2480 /* If we got here then we inserted a predicated break or continue
2481 * above and we need to handle the other case. We already set
2482 * `block` above to indicate what block to visit after the
2486 /* It's possible that the other branch is also a break/continue.
2487 * If it is, we handle that here.
2489 if (block
== break_block
) {
2490 nir_jump_instr
*jump
= nir_jump_instr_create(b
->shader
,
2492 nir_builder_instr_insert(&b
->nb
, &jump
->instr
);
2495 } else if (block
== cont_block
) {
2496 nir_jump_instr
*jump
= nir_jump_instr_create(b
->shader
,
2498 nir_builder_instr_insert(&b
->nb
, &jump
->instr
);
2503 /* If we got here then there was a predicated break/continue but
2504 * the other half of the if has stuff in it. `block` was already
2505 * set above so there is nothing left for us to do.
2511 nir_jump_instr
*jump
= nir_jump_instr_create(b
->shader
,
2513 nir_builder_instr_insert(&b
->nb
, &jump
->instr
);
2518 nir_intrinsic_instr
*discard
=
2519 nir_intrinsic_instr_create(b
->shader
, nir_intrinsic_discard
);
2520 nir_builder_instr_insert(&b
->nb
, &discard
->instr
);
2525 case SpvOpReturnValue
:
2526 case SpvOpUnreachable
:
2528 unreachable("Unhandled opcode");
2534 spirv_to_nir(const uint32_t *words
, size_t word_count
,
2535 const nir_shader_compiler_options
*options
)
2537 const uint32_t *word_end
= words
+ word_count
;
2539 /* Handle the SPIR-V header (first 4 dwords) */
2540 assert(word_count
> 5);
2542 assert(words
[0] == SpvMagicNumber
);
2543 assert(words
[1] == 99);
2544 /* words[2] == generator magic */
2545 unsigned value_id_bound
= words
[3];
2546 assert(words
[4] == 0);
2550 nir_shader
*shader
= nir_shader_create(NULL
, options
);
2552 /* Initialize the stn_builder object */
2553 struct vtn_builder
*b
= rzalloc(NULL
, struct vtn_builder
);
2555 b
->value_id_bound
= value_id_bound
;
2556 b
->values
= ralloc_array(b
, struct vtn_value
, value_id_bound
);
2557 exec_list_make_empty(&b
->functions
);
2559 /* Handle all the preamble instructions */
2560 words
= vtn_foreach_instruction(b
, words
, word_end
,
2561 vtn_handle_preamble_instruction
);
2563 /* Do a very quick CFG analysis pass */
2564 vtn_foreach_instruction(b
, words
, word_end
,
2565 vtn_handle_first_cfg_pass_instruction
);
2567 foreach_list_typed(struct vtn_function
, func
, node
, &b
->functions
) {
2568 b
->impl
= nir_function_impl_create(func
->overload
);
2569 b
->const_table
= _mesa_hash_table_create(b
, _mesa_hash_pointer
,
2570 _mesa_key_pointer_equal
);
2571 b
->block_table
= _mesa_hash_table_create(b
, _mesa_hash_pointer
,
2572 _mesa_key_pointer_equal
);
2573 nir_builder_init(&b
->nb
, b
->impl
);
2574 nir_builder_insert_after_cf_list(&b
->nb
, &b
->impl
->body
);
2575 vtn_walk_blocks(b
, func
->start_block
, NULL
, NULL
, NULL
);
2576 vtn_foreach_instruction(b
, func
->start_block
->label
, func
->end
,
2577 vtn_handle_phi_second_pass
);