2 * Copyright © 2015 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
26 #include "glsl/ir_uniform.h"
31 vec4_visitor::emit_nir_code()
33 nir_shader
*nir
= prog
->nir
;
35 if (nir
->num_inputs
> 0)
36 nir_setup_inputs(nir
);
38 if (nir
->num_uniforms
> 0)
39 nir_setup_uniforms(nir
);
41 nir_setup_system_values(nir
);
43 /* get the main function and emit it */
44 nir_foreach_overload(nir
, overload
) {
45 assert(strcmp(overload
->function
->name
, "main") == 0);
46 assert(overload
->impl
);
47 nir_emit_impl(overload
->impl
);
52 vec4_visitor::nir_setup_system_value_intrinsic(nir_intrinsic_instr
*instr
)
56 switch (instr
->intrinsic
) {
57 case nir_intrinsic_load_vertex_id
:
58 unreachable("should be lowered by lower_vertex_id().");
60 case nir_intrinsic_load_vertex_id_zero_base
:
61 reg
= &this->nir_system_values
[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
];
62 if (reg
->file
== BAD_FILE
)
64 *this->make_reg_for_system_value(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
,
68 case nir_intrinsic_load_base_vertex
:
69 reg
= &this->nir_system_values
[SYSTEM_VALUE_BASE_VERTEX
];
70 if (reg
->file
== BAD_FILE
)
71 *reg
= *this->make_reg_for_system_value(SYSTEM_VALUE_BASE_VERTEX
,
75 case nir_intrinsic_load_instance_id
:
76 reg
= &this->nir_system_values
[SYSTEM_VALUE_INSTANCE_ID
];
77 if (reg
->file
== BAD_FILE
)
78 *reg
= *this->make_reg_for_system_value(SYSTEM_VALUE_INSTANCE_ID
,
88 setup_system_values_block(nir_block
*block
, void *void_visitor
)
90 vec4_visitor
*v
= (vec4_visitor
*)void_visitor
;
92 nir_foreach_instr(block
, instr
) {
93 if (instr
->type
!= nir_instr_type_intrinsic
)
96 nir_intrinsic_instr
*intrin
= nir_instr_as_intrinsic(instr
);
97 v
->nir_setup_system_value_intrinsic(intrin
);
104 vec4_visitor::nir_setup_system_values(nir_shader
*shader
)
106 nir_system_values
= ralloc_array(mem_ctx
, dst_reg
, SYSTEM_VALUE_MAX
);
108 nir_foreach_overload(shader
, overload
) {
109 assert(strcmp(overload
->function
->name
, "main") == 0);
110 assert(overload
->impl
);
111 nir_foreach_block(overload
->impl
, setup_system_values_block
, this);
116 vec4_visitor::nir_setup_inputs(nir_shader
*shader
)
118 nir_inputs
= ralloc_array(mem_ctx
, src_reg
, shader
->num_inputs
);
120 foreach_list_typed(nir_variable
, var
, node
, &shader
->inputs
) {
121 int offset
= var
->data
.driver_location
;
122 unsigned size
= type_size_vec4(var
->type
);
123 for (unsigned i
= 0; i
< size
; i
++) {
124 src_reg src
= src_reg(ATTR
, var
->data
.location
+ i
, var
->type
);
125 nir_inputs
[offset
+ i
] = src
;
131 vec4_visitor::nir_setup_uniforms(nir_shader
*shader
)
136 foreach_list_typed(nir_variable
, var
, node
, &shader
->uniforms
) {
137 /* UBO's, atomics and samplers don't take up space in the
139 if (var
->interface_type
!= NULL
|| var
->type
->contains_atomic() ||
140 type_size_vec4(var
->type
) == 0) {
144 assert(uniforms
< uniform_array_size
);
145 this->uniform_size
[uniforms
] = type_size_vec4(var
->type
);
147 if (strncmp(var
->name
, "gl_", 3) == 0)
148 nir_setup_builtin_uniform(var
);
150 nir_setup_uniform(var
);
153 /* For ARB_vertex_program, only a single "parameters" variable is
154 * generated to support uniform data.
156 nir_variable
*var
= (nir_variable
*) shader
->uniforms
.get_head();
157 assert(shader
->uniforms
.length() == 1 &&
158 strcmp(var
->name
, "parameters") == 0);
160 assert(uniforms
< uniform_array_size
);
161 this->uniform_size
[uniforms
] = type_size_vec4(var
->type
);
163 struct gl_program_parameter_list
*plist
= prog
->Parameters
;
164 for (unsigned p
= 0; p
< plist
->NumParameters
; p
++) {
165 uniform_vector_size
[uniforms
] = plist
->Parameters
[p
].Size
;
167 /* Parameters should be either vec4 uniforms or single component
168 * constants; matrices and other larger types should have been broken
171 assert(uniform_vector_size
[uniforms
] <= 4);
174 for (i
= 0; i
< uniform_vector_size
[uniforms
]; i
++) {
175 stage_prog_data
->param
[uniforms
* 4 + i
] = &plist
->ParameterValues
[p
][i
];
178 static const gl_constant_value zero
= { 0.0 };
179 stage_prog_data
->param
[uniforms
* 4 + i
] = &zero
;
188 vec4_visitor::nir_setup_uniform(nir_variable
*var
)
190 int namelen
= strlen(var
->name
);
192 /* The data for our (non-builtin) uniforms is stored in a series of
193 * gl_uniform_driver_storage structs for each subcomponent that
194 * glGetUniformLocation() could name. We know it's been set up in the same
195 * order we'd walk the type, so walk the list of storage and find anything
196 * with our name, or the prefix of a component that starts with our name.
198 for (unsigned u
= 0; u
< shader_prog
->NumUniformStorage
; u
++) {
199 struct gl_uniform_storage
*storage
= &shader_prog
->UniformStorage
[u
];
201 if (storage
->builtin
)
204 if (strncmp(var
->name
, storage
->name
, namelen
) != 0 ||
205 (storage
->name
[namelen
] != 0 &&
206 storage
->name
[namelen
] != '.' &&
207 storage
->name
[namelen
] != '[')) {
211 gl_constant_value
*components
= storage
->storage
;
212 unsigned vector_count
= (MAX2(storage
->array_elements
, 1) *
213 storage
->type
->matrix_columns
);
215 for (unsigned s
= 0; s
< vector_count
; s
++) {
216 assert(uniforms
< uniform_array_size
);
217 uniform_vector_size
[uniforms
] = storage
->type
->vector_elements
;
220 for (i
= 0; i
< uniform_vector_size
[uniforms
]; i
++) {
221 stage_prog_data
->param
[uniforms
* 4 + i
] = components
;
225 static const gl_constant_value zero
= { 0.0 };
226 stage_prog_data
->param
[uniforms
* 4 + i
] = &zero
;
235 vec4_visitor::nir_setup_builtin_uniform(nir_variable
*var
)
237 const nir_state_slot
*const slots
= var
->state_slots
;
238 assert(var
->state_slots
!= NULL
);
240 for (unsigned int i
= 0; i
< var
->num_state_slots
; i
++) {
241 /* This state reference has already been setup by ir_to_mesa,
242 * but we'll get the same index back here. We can reference
243 * ParameterValues directly, since unlike brw_fs.cpp, we never
244 * add new state references during compile.
246 int index
= _mesa_add_state_reference(this->prog
->Parameters
,
247 (gl_state_index
*)slots
[i
].tokens
);
248 gl_constant_value
*values
=
249 &this->prog
->Parameters
->ParameterValues
[index
][0];
251 assert(uniforms
< uniform_array_size
);
253 for (unsigned j
= 0; j
< 4; j
++)
254 stage_prog_data
->param
[uniforms
* 4 + j
] =
255 &values
[GET_SWZ(slots
[i
].swizzle
, j
)];
257 this->uniform_vector_size
[uniforms
] =
258 (var
->type
->is_scalar() || var
->type
->is_vector() ||
259 var
->type
->is_matrix() ? var
->type
->vector_elements
: 4);
266 vec4_visitor::nir_emit_impl(nir_function_impl
*impl
)
268 nir_locals
= ralloc_array(mem_ctx
, dst_reg
, impl
->reg_alloc
);
270 foreach_list_typed(nir_register
, reg
, node
, &impl
->registers
) {
271 unsigned array_elems
=
272 reg
->num_array_elems
== 0 ? 1 : reg
->num_array_elems
;
274 nir_locals
[reg
->index
] = dst_reg(GRF
, alloc
.allocate(array_elems
));
277 nir_ssa_values
= ralloc_array(mem_ctx
, dst_reg
, impl
->ssa_alloc
);
279 nir_emit_cf_list(&impl
->body
);
283 vec4_visitor::nir_emit_cf_list(exec_list
*list
)
285 exec_list_validate(list
);
286 foreach_list_typed(nir_cf_node
, node
, node
, list
) {
287 switch (node
->type
) {
289 nir_emit_if(nir_cf_node_as_if(node
));
292 case nir_cf_node_loop
:
293 nir_emit_loop(nir_cf_node_as_loop(node
));
296 case nir_cf_node_block
:
297 nir_emit_block(nir_cf_node_as_block(node
));
301 unreachable("Invalid CFG node block");
307 vec4_visitor::nir_emit_if(nir_if
*if_stmt
)
309 /* First, put the condition in f0 */
310 src_reg condition
= get_nir_src(if_stmt
->condition
, BRW_REGISTER_TYPE_D
, 1);
311 vec4_instruction
*inst
= emit(MOV(dst_null_d(), condition
));
312 inst
->conditional_mod
= BRW_CONDITIONAL_NZ
;
314 emit(IF(BRW_PREDICATE_NORMAL
));
316 nir_emit_cf_list(&if_stmt
->then_list
);
318 /* note: if the else is empty, dead CF elimination will remove it */
319 emit(BRW_OPCODE_ELSE
);
321 nir_emit_cf_list(&if_stmt
->else_list
);
323 emit(BRW_OPCODE_ENDIF
);
327 vec4_visitor::nir_emit_loop(nir_loop
*loop
)
331 nir_emit_cf_list(&loop
->body
);
333 emit(BRW_OPCODE_WHILE
);
337 vec4_visitor::nir_emit_block(nir_block
*block
)
339 nir_foreach_instr(block
, instr
) {
340 nir_emit_instr(instr
);
345 vec4_visitor::nir_emit_instr(nir_instr
*instr
)
347 this->base_ir
= instr
;
349 switch (instr
->type
) {
350 case nir_instr_type_load_const
:
351 nir_emit_load_const(nir_instr_as_load_const(instr
));
354 case nir_instr_type_intrinsic
:
355 nir_emit_intrinsic(nir_instr_as_intrinsic(instr
));
358 case nir_instr_type_alu
:
359 nir_emit_alu(nir_instr_as_alu(instr
));
362 case nir_instr_type_jump
:
363 nir_emit_jump(nir_instr_as_jump(instr
));
366 case nir_instr_type_tex
:
367 nir_emit_texture(nir_instr_as_tex(instr
));
371 fprintf(stderr
, "VS instruction not yet implemented by NIR->vec4\n");
377 dst_reg_for_nir_reg(vec4_visitor
*v
, nir_register
*nir_reg
,
378 unsigned base_offset
, nir_src
*indirect
)
382 reg
= v
->nir_locals
[nir_reg
->index
];
383 reg
= offset(reg
, base_offset
);
386 new(v
->mem_ctx
) src_reg(v
->get_nir_src(*indirect
,
394 vec4_visitor::get_nir_dest(nir_dest dest
)
396 assert(!dest
.is_ssa
);
397 return dst_reg_for_nir_reg(this, dest
.reg
.reg
, dest
.reg
.base_offset
,
402 vec4_visitor::get_nir_dest(nir_dest dest
, enum brw_reg_type type
)
404 return retype(get_nir_dest(dest
), type
);
408 vec4_visitor::get_nir_dest(nir_dest dest
, nir_alu_type type
)
410 return get_nir_dest(dest
, brw_type_for_nir_type(type
));
414 vec4_visitor::get_nir_src(nir_src src
, enum brw_reg_type type
,
415 unsigned num_components
)
420 assert(src
.ssa
!= NULL
);
421 reg
= nir_ssa_values
[src
.ssa
->index
];
424 reg
= dst_reg_for_nir_reg(this, src
.reg
.reg
, src
.reg
.base_offset
,
428 reg
= retype(reg
, type
);
430 src_reg reg_as_src
= src_reg(reg
);
431 reg_as_src
.swizzle
= brw_swizzle_for_size(num_components
);
436 vec4_visitor::get_nir_src(nir_src src
, nir_alu_type type
,
437 unsigned num_components
)
439 return get_nir_src(src
, brw_type_for_nir_type(type
), num_components
);
443 vec4_visitor::get_nir_src(nir_src src
, unsigned num_components
)
445 /* if type is not specified, default to signed int */
446 return get_nir_src(src
, nir_type_int
, num_components
);
450 vec4_visitor::nir_emit_load_const(nir_load_const_instr
*instr
)
452 dst_reg reg
= dst_reg(GRF
, alloc
.allocate(1));
453 reg
.type
= BRW_REGISTER_TYPE_F
;
455 unsigned remaining
= brw_writemask_for_size(instr
->def
.num_components
);
457 /* @FIXME: consider emitting vector operations to save some MOVs in
458 * cases where the components are representable in 8 bits.
459 * For now, we emit a MOV for each distinct value.
461 for (unsigned i
= 0; i
< instr
->def
.num_components
; i
++) {
462 unsigned writemask
= 1 << i
;
464 if ((remaining
& writemask
) == 0)
467 for (unsigned j
= i
; j
< instr
->def
.num_components
; j
++) {
468 if (instr
->value
.u
[i
] == instr
->value
.u
[j
]) {
473 reg
.writemask
= writemask
;
474 emit(MOV(reg
, src_reg(instr
->value
.f
[i
])));
476 remaining
&= ~writemask
;
479 /* Set final writemask */
480 reg
.writemask
= brw_writemask_for_size(instr
->def
.num_components
);
482 nir_ssa_values
[instr
->def
.index
] = reg
;
486 vec4_visitor::nir_emit_intrinsic(nir_intrinsic_instr
*instr
)
491 bool has_indirect
= false;
493 switch (instr
->intrinsic
) {
495 case nir_intrinsic_load_input_indirect
:
498 case nir_intrinsic_load_input
: {
499 int offset
= instr
->const_index
[0];
500 src
= nir_inputs
[offset
];
503 dest
.reladdr
= new(mem_ctx
) src_reg(get_nir_src(instr
->src
[0],
507 dest
= get_nir_dest(instr
->dest
, src
.type
);
508 dest
.writemask
= brw_writemask_for_size(instr
->num_components
);
510 emit(MOV(dest
, src
));
514 case nir_intrinsic_store_output_indirect
:
517 case nir_intrinsic_store_output
: {
518 int varying
= instr
->const_index
[0];
520 src
= get_nir_src(instr
->src
[0], BRW_REGISTER_TYPE_F
,
521 instr
->num_components
);
525 dest
.reladdr
= new(mem_ctx
) src_reg(get_nir_src(instr
->src
[1],
529 output_reg
[varying
] = dest
;
533 case nir_intrinsic_load_vertex_id
:
534 unreachable("should be lowered by lower_vertex_id()");
536 case nir_intrinsic_load_vertex_id_zero_base
: {
538 src_reg(nir_system_values
[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE
]);
539 assert(vertex_id
.file
!= BAD_FILE
);
540 dest
= get_nir_dest(instr
->dest
, vertex_id
.type
);
541 emit(MOV(dest
, vertex_id
));
545 case nir_intrinsic_load_base_vertex
: {
546 src_reg base_vertex
=
547 src_reg(nir_system_values
[SYSTEM_VALUE_BASE_VERTEX
]);
548 assert(base_vertex
.file
!= BAD_FILE
);
549 dest
= get_nir_dest(instr
->dest
, base_vertex
.type
);
550 emit(MOV(dest
, base_vertex
));
554 case nir_intrinsic_load_instance_id
: {
555 src_reg instance_id
=
556 src_reg(nir_system_values
[SYSTEM_VALUE_INSTANCE_ID
]);
557 assert(instance_id
.file
!= BAD_FILE
);
558 dest
= get_nir_dest(instr
->dest
, instance_id
.type
);
559 emit(MOV(dest
, instance_id
));
563 case nir_intrinsic_load_uniform_indirect
:
566 case nir_intrinsic_load_uniform
: {
567 dest
= get_nir_dest(instr
->dest
);
569 src
= src_reg(dst_reg(UNIFORM
, instr
->const_index
[0]));
570 src
.reg_offset
= instr
->const_index
[1];
573 src_reg tmp
= get_nir_src(instr
->src
[0], BRW_REGISTER_TYPE_D
, 1);
574 src
.reladdr
= new(mem_ctx
) src_reg(tmp
);
577 emit(MOV(dest
, src
));
581 case nir_intrinsic_atomic_counter_read
:
582 case nir_intrinsic_atomic_counter_inc
:
583 case nir_intrinsic_atomic_counter_dec
: {
584 unsigned surf_index
= prog_data
->base
.binding_table
.abo_start
+
585 (unsigned) instr
->const_index
[0];
586 src_reg offset
= get_nir_src(instr
->src
[0], nir_type_int
,
587 instr
->num_components
);
588 dest
= get_nir_dest(instr
->dest
);
590 switch (instr
->intrinsic
) {
591 case nir_intrinsic_atomic_counter_inc
:
592 emit_untyped_atomic(BRW_AOP_INC
, surf_index
, dest
, offset
,
593 src_reg(), src_reg());
595 case nir_intrinsic_atomic_counter_dec
:
596 emit_untyped_atomic(BRW_AOP_PREDEC
, surf_index
, dest
, offset
,
597 src_reg(), src_reg());
599 case nir_intrinsic_atomic_counter_read
:
600 emit_untyped_surface_read(surf_index
, dest
, offset
);
603 unreachable("Unreachable");
606 brw_mark_surface_used(stage_prog_data
, surf_index
);
610 case nir_intrinsic_load_ubo_indirect
:
613 case nir_intrinsic_load_ubo
: {
614 nir_const_value
*const_block_index
= nir_src_as_const_value(instr
->src
[0]);
617 dest
= get_nir_dest(instr
->dest
);
619 if (const_block_index
) {
620 /* The block index is a constant, so just emit the binding table entry
623 surf_index
= src_reg(prog_data
->base
.binding_table
.ubo_start
+
624 const_block_index
->u
[0]);
626 /* The block index is not a constant. Evaluate the index expression
627 * per-channel and add the base UBO index; we have to select a value
628 * from any live channel.
630 surf_index
= src_reg(this, glsl_type::uint_type
);
631 emit(ADD(dst_reg(surf_index
), get_nir_src(instr
->src
[0], nir_type_int
,
632 instr
->num_components
),
633 src_reg(prog_data
->base
.binding_table
.ubo_start
)));
634 surf_index
= emit_uniformize(surf_index
);
636 /* Assume this may touch any UBO. It would be nice to provide
637 * a tighter bound, but the array information is already lowered away.
639 brw_mark_surface_used(&prog_data
->base
,
640 prog_data
->base
.binding_table
.ubo_start
+
641 shader_prog
->NumUniformBlocks
- 1);
644 unsigned const_offset
= instr
->const_index
[0];
648 offset
= src_reg(const_offset
/ 16);
650 offset
= src_reg(this, glsl_type::uint_type
);
651 emit(SHR(dst_reg(offset
), get_nir_src(instr
->src
[1], nir_type_int
, 1),
655 src_reg packed_consts
= src_reg(this, glsl_type::vec4_type
);
656 packed_consts
.type
= dest
.type
;
658 emit_pull_constant_load_reg(dst_reg(packed_consts
),
661 NULL
, NULL
/* before_block/inst */);
663 packed_consts
.swizzle
= brw_swizzle_for_size(instr
->num_components
);
664 packed_consts
.swizzle
+= BRW_SWIZZLE4(const_offset
% 16 / 4,
665 const_offset
% 16 / 4,
666 const_offset
% 16 / 4,
667 const_offset
% 16 / 4);
669 emit(MOV(dest
, packed_consts
));
674 unreachable("Unknown intrinsic");
679 brw_swizzle_for_nir_swizzle(uint8_t swizzle
[4])
681 return BRW_SWIZZLE4(swizzle
[0], swizzle
[1], swizzle
[2], swizzle
[3]);
684 static enum brw_conditional_mod
685 brw_conditional_for_nir_comparison(nir_op op
)
691 return BRW_CONDITIONAL_L
;
696 return BRW_CONDITIONAL_GE
;
700 case nir_op_ball_fequal2
:
701 case nir_op_ball_iequal2
:
702 case nir_op_ball_fequal3
:
703 case nir_op_ball_iequal3
:
704 case nir_op_ball_fequal4
:
705 case nir_op_ball_iequal4
:
706 return BRW_CONDITIONAL_Z
;
710 case nir_op_bany_fnequal2
:
711 case nir_op_bany_inequal2
:
712 case nir_op_bany_fnequal3
:
713 case nir_op_bany_inequal3
:
714 case nir_op_bany_fnequal4
:
715 case nir_op_bany_inequal4
:
716 return BRW_CONDITIONAL_NZ
;
719 unreachable("not reached: bad operation for comparison");
724 vec4_visitor::nir_emit_alu(nir_alu_instr
*instr
)
726 vec4_instruction
*inst
;
728 dst_reg dst
= get_nir_dest(instr
->dest
.dest
,
729 nir_op_infos
[instr
->op
].output_type
);
730 dst
.writemask
= instr
->dest
.write_mask
;
733 for (unsigned i
= 0; i
< nir_op_infos
[instr
->op
].num_inputs
; i
++) {
734 op
[i
] = get_nir_src(instr
->src
[i
].src
,
735 nir_op_infos
[instr
->op
].input_types
[i
], 4);
736 op
[i
].swizzle
= brw_swizzle_for_nir_swizzle(instr
->src
[i
].swizzle
);
737 op
[i
].abs
= instr
->src
[i
].abs
;
738 op
[i
].negate
= instr
->src
[i
].negate
;
744 inst
= emit(MOV(dst
, op
[0]));
745 inst
->saturate
= instr
->dest
.saturate
;
751 unreachable("not reached: should be handled by lower_vec_to_movs()");
755 inst
= emit(MOV(dst
, op
[0]));
756 inst
->saturate
= instr
->dest
.saturate
;
761 inst
= emit(MOV(dst
, op
[0]));
767 inst
= emit(ADD(dst
, op
[0], op
[1]));
768 inst
->saturate
= instr
->dest
.saturate
;
772 inst
= emit(MUL(dst
, op
[0], op
[1]));
773 inst
->saturate
= instr
->dest
.saturate
;
777 if (devinfo
->gen
< 8) {
778 nir_const_value
*value0
= nir_src_as_const_value(instr
->src
[0].src
);
779 nir_const_value
*value1
= nir_src_as_const_value(instr
->src
[1].src
);
781 /* For integer multiplication, the MUL uses the low 16 bits of one of
782 * the operands (src0 through SNB, src1 on IVB and later). The MACH
783 * accumulates in the contribution of the upper 16 bits of that
784 * operand. If we can determine that one of the args is in the low
785 * 16 bits, though, we can just emit a single MUL.
787 if (value0
&& value0
->u
[0] < (1 << 16)) {
788 if (devinfo
->gen
< 7)
789 emit(MUL(dst
, op
[0], op
[1]));
791 emit(MUL(dst
, op
[1], op
[0]));
792 } else if (value1
&& value1
->u
[0] < (1 << 16)) {
793 if (devinfo
->gen
< 7)
794 emit(MUL(dst
, op
[1], op
[0]));
796 emit(MUL(dst
, op
[0], op
[1]));
798 struct brw_reg acc
= retype(brw_acc_reg(8), dst
.type
);
800 emit(MUL(acc
, op
[0], op
[1]));
801 emit(MACH(dst_null_d(), op
[0], op
[1]));
802 emit(MOV(dst
, src_reg(acc
)));
805 emit(MUL(dst
, op
[0], op
[1]));
810 case nir_op_imul_high
:
811 case nir_op_umul_high
: {
812 struct brw_reg acc
= retype(brw_acc_reg(8), dst
.type
);
814 emit(MUL(acc
, op
[0], op
[1]));
815 emit(MACH(dst
, op
[0], op
[1]));
820 inst
= emit_math(SHADER_OPCODE_RCP
, dst
, op
[0]);
821 inst
->saturate
= instr
->dest
.saturate
;
825 inst
= emit_math(SHADER_OPCODE_EXP2
, dst
, op
[0]);
826 inst
->saturate
= instr
->dest
.saturate
;
830 inst
= emit_math(SHADER_OPCODE_LOG2
, dst
, op
[0]);
831 inst
->saturate
= instr
->dest
.saturate
;
835 inst
= emit_math(SHADER_OPCODE_SIN
, dst
, op
[0]);
836 inst
->saturate
= instr
->dest
.saturate
;
840 inst
= emit_math(SHADER_OPCODE_COS
, dst
, op
[0]);
841 inst
->saturate
= instr
->dest
.saturate
;
846 emit_math(SHADER_OPCODE_INT_QUOTIENT
, dst
, op
[0], op
[1]);
850 emit_math(SHADER_OPCODE_INT_REMAINDER
, dst
, op
[0], op
[1]);
854 unreachable("not reached: should be handled by ldexp_to_arith()");
857 inst
= emit_math(SHADER_OPCODE_SQRT
, dst
, op
[0]);
858 inst
->saturate
= instr
->dest
.saturate
;
862 inst
= emit_math(SHADER_OPCODE_RSQ
, dst
, op
[0]);
863 inst
->saturate
= instr
->dest
.saturate
;
867 inst
= emit_math(SHADER_OPCODE_POW
, dst
, op
[0], op
[1]);
868 inst
->saturate
= instr
->dest
.saturate
;
871 case nir_op_uadd_carry
: {
872 struct brw_reg acc
= retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD
);
874 emit(ADDC(dst_null_ud(), op
[0], op
[1]));
875 emit(MOV(dst
, src_reg(acc
)));
879 case nir_op_usub_borrow
: {
880 struct brw_reg acc
= retype(brw_acc_reg(8), BRW_REGISTER_TYPE_UD
);
882 emit(SUBB(dst_null_ud(), op
[0], op
[1]));
883 emit(MOV(dst
, src_reg(acc
)));
888 inst
= emit(RNDZ(dst
, op
[0]));
889 inst
->saturate
= instr
->dest
.saturate
;
893 src_reg tmp
= src_reg(this, glsl_type::float_type
);
895 brw_swizzle_for_size(instr
->src
[0].src
.is_ssa
?
896 instr
->src
[0].src
.ssa
->num_components
:
897 instr
->src
[0].src
.reg
.reg
->num_components
);
899 op
[0].negate
= !op
[0].negate
;
900 emit(RNDD(dst_reg(tmp
), op
[0]));
902 inst
= emit(MOV(dst
, tmp
));
903 inst
->saturate
= instr
->dest
.saturate
;
908 inst
= emit(RNDD(dst
, op
[0]));
909 inst
->saturate
= instr
->dest
.saturate
;
913 inst
= emit(FRC(dst
, op
[0]));
914 inst
->saturate
= instr
->dest
.saturate
;
917 case nir_op_fround_even
:
918 inst
= emit(RNDE(dst
, op
[0]));
919 inst
->saturate
= instr
->dest
.saturate
;
925 inst
= emit_minmax(BRW_CONDITIONAL_L
, dst
, op
[0], op
[1]);
926 inst
->saturate
= instr
->dest
.saturate
;
932 inst
= emit_minmax(BRW_CONDITIONAL_GE
, dst
, op
[0], op
[1]);
933 inst
->saturate
= instr
->dest
.saturate
;
937 case nir_op_fddx_coarse
:
938 case nir_op_fddx_fine
:
940 case nir_op_fddy_coarse
:
941 case nir_op_fddy_fine
:
942 unreachable("derivatives are not valid in vertex shaders");
954 emit(CMP(dst
, op
[0], op
[1],
955 brw_conditional_for_nir_comparison(instr
->op
)));
958 case nir_op_ball_fequal2
:
959 case nir_op_ball_iequal2
:
960 case nir_op_ball_fequal3
:
961 case nir_op_ball_iequal3
:
962 case nir_op_ball_fequal4
:
963 case nir_op_ball_iequal4
: {
964 dst_reg tmp
= dst_reg(this, glsl_type::bool_type
);
967 case nir_op_ball_fequal2
:
968 case nir_op_ball_iequal2
:
969 tmp
.writemask
= WRITEMASK_XY
;
971 case nir_op_ball_fequal3
:
972 case nir_op_ball_iequal3
:
973 tmp
.writemask
= WRITEMASK_XYZ
;
975 case nir_op_ball_fequal4
:
976 case nir_op_ball_iequal4
:
977 tmp
.writemask
= WRITEMASK_XYZW
;
980 unreachable("not reached");
983 emit(CMP(tmp
, op
[0], op
[1],
984 brw_conditional_for_nir_comparison(instr
->op
)));
985 emit(MOV(dst
, src_reg(0)));
986 inst
= emit(MOV(dst
, src_reg(~0)));
987 inst
->predicate
= BRW_PREDICATE_ALIGN16_ALL4H
;
991 case nir_op_bany_fnequal2
:
992 case nir_op_bany_inequal2
:
993 case nir_op_bany_fnequal3
:
994 case nir_op_bany_inequal3
:
995 case nir_op_bany_fnequal4
:
996 case nir_op_bany_inequal4
: {
997 dst_reg tmp
= dst_reg(this, glsl_type::bool_type
);
1000 case nir_op_bany_fnequal2
:
1001 case nir_op_bany_inequal2
:
1002 tmp
.writemask
= WRITEMASK_XY
;
1004 case nir_op_bany_fnequal3
:
1005 case nir_op_bany_inequal3
:
1006 tmp
.writemask
= WRITEMASK_XYZ
;
1008 case nir_op_bany_fnequal4
:
1009 case nir_op_bany_inequal4
:
1010 tmp
.writemask
= WRITEMASK_XYZW
;
1013 unreachable("not reached");
1016 emit(CMP(tmp
, op
[0], op
[1],
1017 brw_conditional_for_nir_comparison(instr
->op
)));
1019 emit(MOV(dst
, src_reg(0)));
1020 inst
= emit(MOV(dst
, src_reg(~0)));
1021 inst
->predicate
= BRW_PREDICATE_ALIGN16_ANY4H
;
1026 if (devinfo
->gen
>= 8) {
1027 op
[0] = resolve_source_modifiers(op
[0]);
1029 emit(NOT(dst
, op
[0]));
1033 if (devinfo
->gen
>= 8) {
1034 op
[0] = resolve_source_modifiers(op
[0]);
1035 op
[1] = resolve_source_modifiers(op
[1]);
1037 emit(XOR(dst
, op
[0], op
[1]));
1041 if (devinfo
->gen
>= 8) {
1042 op
[0] = resolve_source_modifiers(op
[0]);
1043 op
[1] = resolve_source_modifiers(op
[1]);
1045 emit(OR(dst
, op
[0], op
[1]));
1049 if (devinfo
->gen
>= 8) {
1050 op
[0] = resolve_source_modifiers(op
[0]);
1051 op
[1] = resolve_source_modifiers(op
[1]);
1053 emit(AND(dst
, op
[0], op
[1]));
1057 emit(AND(dst
, op
[0], src_reg(1)));
1061 op
[0].type
= BRW_REGISTER_TYPE_D
;
1062 dst
.type
= BRW_REGISTER_TYPE_D
;
1063 emit(AND(dst
, op
[0], src_reg(0x3f800000u
)));
1064 dst
.type
= BRW_REGISTER_TYPE_F
;
1068 emit(CMP(dst
, op
[0], src_reg(0.0f
), BRW_CONDITIONAL_NZ
));
1072 emit(CMP(dst
, op
[0], src_reg(0), BRW_CONDITIONAL_NZ
));
1075 case nir_op_fnoise1_1
:
1076 case nir_op_fnoise1_2
:
1077 case nir_op_fnoise1_3
:
1078 case nir_op_fnoise1_4
:
1079 case nir_op_fnoise2_1
:
1080 case nir_op_fnoise2_2
:
1081 case nir_op_fnoise2_3
:
1082 case nir_op_fnoise2_4
:
1083 case nir_op_fnoise3_1
:
1084 case nir_op_fnoise3_2
:
1085 case nir_op_fnoise3_3
:
1086 case nir_op_fnoise3_4
:
1087 case nir_op_fnoise4_1
:
1088 case nir_op_fnoise4_2
:
1089 case nir_op_fnoise4_3
:
1090 case nir_op_fnoise4_4
:
1091 unreachable("not reached: should be handled by lower_noise");
1093 case nir_op_unpack_half_2x16_split_x
:
1094 case nir_op_unpack_half_2x16_split_y
:
1095 case nir_op_pack_half_2x16_split
:
1096 unreachable("not reached: should not occur in vertex shader");
1098 case nir_op_unpack_snorm_2x16
:
1099 case nir_op_unpack_unorm_2x16
:
1100 case nir_op_pack_snorm_2x16
:
1101 case nir_op_pack_unorm_2x16
:
1102 unreachable("not reached: should be handled by lower_packing_builtins");
1104 case nir_op_unpack_half_2x16
:
1105 /* As NIR does not guarantee that we have a correct swizzle outside the
1106 * boundaries of a vector, and the implementation of emit_unpack_half_2x16
1107 * uses the source operand in an operation with WRITEMASK_Y while our
1108 * source operand has only size 1, it accessed incorrect data producing
1109 * regressions in Piglit. We repeat the swizzle of the first component on the
1110 * rest of components to avoid regressions. In the vec4_visitor IR code path
1111 * this is not needed because the operand has already the correct swizzle.
1113 op
[0].swizzle
= brw_compose_swizzle(BRW_SWIZZLE_XXXX
, op
[0].swizzle
);
1114 emit_unpack_half_2x16(dst
, op
[0]);
1117 case nir_op_pack_half_2x16
:
1118 emit_pack_half_2x16(dst
, op
[0]);
1121 case nir_op_unpack_unorm_4x8
:
1122 emit_unpack_unorm_4x8(dst
, op
[0]);
1125 case nir_op_pack_unorm_4x8
:
1126 emit_pack_unorm_4x8(dst
, op
[0]);
1129 case nir_op_unpack_snorm_4x8
:
1130 emit_unpack_snorm_4x8(dst
, op
[0]);
1133 case nir_op_pack_snorm_4x8
:
1134 emit_pack_snorm_4x8(dst
, op
[0]);
1137 case nir_op_bitfield_reverse
:
1138 emit(BFREV(dst
, op
[0]));
1141 case nir_op_bit_count
:
1142 emit(CBIT(dst
, op
[0]));
1145 case nir_op_ufind_msb
:
1146 case nir_op_ifind_msb
: {
1147 src_reg temp
= src_reg(this, glsl_type::uint_type
);
1149 inst
= emit(FBH(dst_reg(temp
), op
[0]));
1150 inst
->dst
.writemask
= WRITEMASK_XYZW
;
1152 /* FBH counts from the MSB side, while GLSL's findMSB() wants the count
1153 * from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
1154 * subtract the result from 31 to convert the MSB count into an LSB count.
1157 /* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
1158 temp
.swizzle
= BRW_SWIZZLE_NOOP
;
1159 emit(MOV(dst
, temp
));
1161 src_reg src_tmp
= src_reg(dst
);
1162 emit(CMP(dst_null_d(), src_tmp
, src_reg(-1), BRW_CONDITIONAL_NZ
));
1164 src_tmp
.negate
= true;
1165 inst
= emit(ADD(dst
, src_tmp
, src_reg(31)));
1166 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1170 case nir_op_find_lsb
:
1171 emit(FBL(dst
, op
[0]));
1174 case nir_op_ubitfield_extract
:
1175 case nir_op_ibitfield_extract
:
1176 op
[0] = fix_3src_operand(op
[0]);
1177 op
[1] = fix_3src_operand(op
[1]);
1178 op
[2] = fix_3src_operand(op
[2]);
1180 emit(BFE(dst
, op
[2], op
[1], op
[0]));
1184 emit(BFI1(dst
, op
[0], op
[1]));
1188 op
[0] = fix_3src_operand(op
[0]);
1189 op
[1] = fix_3src_operand(op
[1]);
1190 op
[2] = fix_3src_operand(op
[2]);
1192 emit(BFI2(dst
, op
[0], op
[1], op
[2]));
1195 case nir_op_bitfield_insert
:
1196 unreachable("not reached: should be handled by "
1197 "lower_instructions::bitfield_insert_to_bfm_bfi");
1200 /* AND(val, 0x80000000) gives the sign bit.
1202 * Predicated OR ORs 1.0 (0x3f800000) with the sign bit if val is not
1205 emit(CMP(dst_null_f(), op
[0], src_reg(0.0f
), BRW_CONDITIONAL_NZ
));
1207 op
[0].type
= BRW_REGISTER_TYPE_UD
;
1208 dst
.type
= BRW_REGISTER_TYPE_UD
;
1209 emit(AND(dst
, op
[0], src_reg(0x80000000u
)));
1211 inst
= emit(OR(dst
, src_reg(dst
), src_reg(0x3f800000u
)));
1212 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1213 dst
.type
= BRW_REGISTER_TYPE_F
;
1215 if (instr
->dest
.saturate
) {
1216 inst
= emit(MOV(dst
, src_reg(dst
)));
1217 inst
->saturate
= true;
1222 /* ASR(val, 31) -> negative val generates 0xffffffff (signed -1).
1223 * -> non-negative val generates 0x00000000.
1224 * Predicated OR sets 1 if val is positive.
1226 emit(CMP(dst_null_d(), op
[0], src_reg(0), BRW_CONDITIONAL_G
));
1227 emit(ASR(dst
, op
[0], src_reg(31)));
1228 inst
= emit(OR(dst
, src_reg(dst
), src_reg(1)));
1229 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1233 emit(SHL(dst
, op
[0], op
[1]));
1237 emit(ASR(dst
, op
[0], op
[1]));
1241 emit(SHR(dst
, op
[0], op
[1]));
1245 op
[0] = fix_3src_operand(op
[0]);
1246 op
[1] = fix_3src_operand(op
[1]);
1247 op
[2] = fix_3src_operand(op
[2]);
1249 inst
= emit(MAD(dst
, op
[2], op
[1], op
[0]));
1250 inst
->saturate
= instr
->dest
.saturate
;
1254 inst
= emit_lrp(dst
, op
[0], op
[1], op
[2]);
1255 inst
->saturate
= instr
->dest
.saturate
;
1259 emit(CMP(dst_null_d(), op
[0], src_reg(0), BRW_CONDITIONAL_NZ
));
1260 inst
= emit(BRW_OPCODE_SEL
, dst
, op
[1], op
[2]);
1261 inst
->predicate
= BRW_PREDICATE_NORMAL
;
1265 inst
= emit(BRW_OPCODE_DP2
, dst
, op
[0], op
[1]);
1266 inst
->saturate
= instr
->dest
.saturate
;
1270 inst
= emit(BRW_OPCODE_DP3
, dst
, op
[0], op
[1]);
1271 inst
->saturate
= instr
->dest
.saturate
;
1275 inst
= emit(BRW_OPCODE_DP4
, dst
, op
[0], op
[1]);
1276 inst
->saturate
= instr
->dest
.saturate
;
1281 case nir_op_bany4
: {
1282 dst_reg tmp
= dst_reg(this, glsl_type::bool_type
);
1283 tmp
.writemask
= brw_writemask_for_size(nir_op_infos
[instr
->op
].input_sizes
[0]);
1285 emit(CMP(tmp
, op
[0], src_reg(0), BRW_CONDITIONAL_NZ
));
1287 emit(MOV(dst
, src_reg(0)));
1288 inst
= emit(MOV(dst
, src_reg(~0)));
1289 inst
->predicate
= BRW_PREDICATE_ALIGN16_ANY4H
;
1298 unreachable("not reached: should be lowered by lower_source mods");
1301 unreachable("not reached: should be lowered by DIV_TO_MUL_RCP in the compiler");
1304 unreachable("not reached: should be lowered by MOD_TO_FLOOR in the compiler");
1308 unreachable("not reached: should be handled by ir_sub_to_add_neg");
1311 unreachable("Unimplemented ALU operation");
1314 /* If we need to do a boolean resolve, replace the result with -(x & 1)
1315 * to sign extend the low bit to 0/~0
1317 if (devinfo
->gen
<= 5 &&
1318 (instr
->instr
.pass_flags
& BRW_NIR_BOOLEAN_MASK
) ==
1319 BRW_NIR_BOOLEAN_NEEDS_RESOLVE
) {
1320 dst_reg masked
= dst_reg(this, glsl_type::int_type
);
1321 masked
.writemask
= dst
.writemask
;
1322 emit(AND(masked
, src_reg(dst
), src_reg(1)));
1323 src_reg masked_neg
= src_reg(masked
);
1324 masked_neg
.negate
= true;
1325 emit(MOV(retype(dst
, BRW_REGISTER_TYPE_D
), masked_neg
));
1330 vec4_visitor::nir_emit_jump(nir_jump_instr
*instr
)
1332 switch (instr
->type
) {
1333 case nir_jump_break
:
1334 emit(BRW_OPCODE_BREAK
);
1337 case nir_jump_continue
:
1338 emit(BRW_OPCODE_CONTINUE
);
1341 case nir_jump_return
:
1344 unreachable("unknown jump");
1348 enum ir_texture_opcode
1349 ir_texture_opcode_for_nir_texop(nir_texop texop
)
1351 enum ir_texture_opcode op
;
1354 case nir_texop_lod
: op
= ir_lod
; break;
1355 case nir_texop_query_levels
: op
= ir_query_levels
; break;
1356 case nir_texop_tex
: op
= ir_tex
; break;
1357 case nir_texop_tg4
: op
= ir_tg4
; break;
1358 case nir_texop_txb
: op
= ir_txb
; break;
1359 case nir_texop_txd
: op
= ir_txd
; break;
1360 case nir_texop_txf
: op
= ir_txf
; break;
1361 case nir_texop_txf_ms
: op
= ir_txf_ms
; break;
1362 case nir_texop_txl
: op
= ir_txl
; break;
1363 case nir_texop_txs
: op
= ir_txs
; break;
1365 unreachable("unknown texture opcode");
1371 glsl_type_for_nir_alu_type(nir_alu_type alu_type
,
1372 unsigned components
)
1375 case nir_type_float
:
1376 return glsl_type::vec(components
);
1378 return glsl_type::ivec(components
);
1379 case nir_type_unsigned
:
1380 return glsl_type::uvec(components
);
1382 return glsl_type::bvec(components
);
1384 return glsl_type::error_type
;
1387 return glsl_type::error_type
;
1391 vec4_visitor::nir_emit_texture(nir_tex_instr
*instr
)
1393 unsigned sampler
= instr
->sampler_index
;
1394 src_reg sampler_reg
= src_reg(sampler
);
1396 const glsl_type
*coord_type
= NULL
;
1397 src_reg shadow_comparitor
;
1398 src_reg offset_value
;
1400 src_reg sample_index
;
1403 const glsl_type
*dest_type
=
1404 glsl_type_for_nir_alu_type(instr
->dest_type
,
1405 nir_tex_instr_dest_size(instr
));
1406 dst_reg dest
= get_nir_dest(instr
->dest
, instr
->dest_type
);
1408 /* When tg4 is used with the degenerate ZERO/ONE swizzles, don't bother
1409 * emitting anything other than setting up the constant result.
1411 if (instr
->op
== nir_texop_tg4
) {
1412 int swiz
= GET_SWZ(key_tex
->swizzles
[sampler
], instr
->component
);
1413 if (swiz
== SWIZZLE_ZERO
|| swiz
== SWIZZLE_ONE
) {
1414 emit(MOV(dest
, src_reg(swiz
== SWIZZLE_ONE
? 1.0f
: 0.0f
)));
1419 /* Load the texture operation sources */
1420 for (unsigned i
= 0; i
< instr
->num_srcs
; i
++) {
1421 switch (instr
->src
[i
].src_type
) {
1422 case nir_tex_src_comparitor
:
1423 shadow_comparitor
= get_nir_src(instr
->src
[i
].src
,
1424 BRW_REGISTER_TYPE_F
, 1);
1427 case nir_tex_src_coord
: {
1428 unsigned src_size
= nir_tex_instr_src_size(instr
, i
);
1430 switch (instr
->op
) {
1432 case nir_texop_txf_ms
:
1433 coordinate
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_D
,
1435 coord_type
= glsl_type::ivec(src_size
);
1439 coordinate
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_F
,
1441 coord_type
= glsl_type::vec(src_size
);
1447 case nir_tex_src_ddx
:
1448 lod
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_F
,
1449 nir_tex_instr_src_size(instr
, i
));
1452 case nir_tex_src_ddy
:
1453 lod2
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_F
,
1454 nir_tex_instr_src_size(instr
, i
));
1457 case nir_tex_src_lod
:
1458 switch (instr
->op
) {
1461 lod
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_D
, 1);
1465 lod
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_F
, 1);
1470 case nir_tex_src_ms_index
: {
1471 sample_index
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_D
, 1);
1472 assert(coord_type
!= NULL
);
1473 if (devinfo
->gen
>= 7 &&
1474 key_tex
->compressed_multisample_layout_mask
& (1 << sampler
)) {
1475 mcs
= emit_mcs_fetch(coord_type
, coordinate
, sampler_reg
);
1479 mcs
= retype(mcs
, BRW_REGISTER_TYPE_UD
);
1483 case nir_tex_src_offset
:
1484 offset_value
= get_nir_src(instr
->src
[i
].src
, BRW_REGISTER_TYPE_D
, 2);
1487 case nir_tex_src_sampler_offset
: {
1488 /* The highest sampler which may be used by this operation is
1489 * the last element of the array. Mark it here, because the generator
1490 * doesn't have enough information to determine the bound.
1492 uint32_t array_size
= instr
->sampler_array_size
;
1493 uint32_t max_used
= sampler
+ array_size
- 1;
1494 if (instr
->op
== nir_texop_tg4
) {
1495 max_used
+= prog_data
->base
.binding_table
.gather_texture_start
;
1497 max_used
+= prog_data
->base
.binding_table
.texture_start
;
1500 brw_mark_surface_used(&prog_data
->base
, max_used
);
1502 /* Emit code to evaluate the actual indexing expression */
1503 src_reg src
= get_nir_src(instr
->src
[i
].src
, 1);
1504 src_reg
temp(this, glsl_type::uint_type
);
1505 emit(ADD(dst_reg(temp
), src
, src_reg(sampler
)));
1506 sampler_reg
= emit_uniformize(temp
);
1510 case nir_tex_src_projector
:
1511 unreachable("Should be lowered by do_lower_texture_projection");
1513 case nir_tex_src_bias
:
1514 unreachable("LOD bias is not valid for vertex shaders.\n");
1517 unreachable("unknown texture source");
1521 uint32_t constant_offset
= 0;
1522 for (unsigned i
= 0; i
< 3; i
++) {
1523 if (instr
->const_offset
[i
] != 0) {
1524 constant_offset
= brw_texture_offset(instr
->const_offset
, 3);
1529 /* Stuff the channel select bits in the top of the texture offset */
1530 if (instr
->op
== nir_texop_tg4
)
1531 constant_offset
|= gather_channel(instr
->component
, sampler
) << 16;
1533 ir_texture_opcode op
= ir_texture_opcode_for_nir_texop(instr
->op
);
1535 bool is_cube_array
=
1536 instr
->op
== nir_texop_txs
&&
1537 instr
->sampler_dim
== GLSL_SAMPLER_DIM_CUBE
&&
1540 emit_texture(op
, dest
, dest_type
, coordinate
, instr
->coord_components
,
1542 lod
, lod2
, sample_index
,
1543 constant_offset
, offset_value
,
1544 mcs
, is_cube_array
, sampler
, sampler_reg
);