2 * Copyright © 2017 Ilia Mirkin
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25 #include "compiler/nir/nir_builder.h"
27 /* A4XX has a broken GATHER4 operation. It performs the texture swizzle on the
28 * gather results, rather than before. As a result, it must be emulated with
29 * direct texture calls.
33 lower_tg4(nir_block
*block
, nir_builder
*b
, void *mem_ctx
)
35 bool progress
= false;
37 static const int offsets
[3][2] = { {0, 1}, {1, 1}, {1, 0} };
39 nir_foreach_instr_safe(instr
, block
) {
40 if (instr
->type
!= nir_instr_type_tex
)
43 nir_tex_instr
*tg4
= (nir_tex_instr
*)instr
;
45 if (tg4
->op
!= nir_texop_tg4
)
48 b
->cursor
= nir_before_instr(&tg4
->instr
);
50 nir_ssa_def
*results
[4];
51 int offset_index
= nir_tex_instr_src_index(tg4
, nir_tex_src_offset
);
52 for (int i
= 0; i
< 4; i
++) {
53 int num_srcs
= tg4
->num_srcs
+ 1 /* lod */;
54 if (offset_index
< 0 && i
< 3)
57 nir_tex_instr
*tex
= nir_tex_instr_create(b
->shader
, num_srcs
);
58 tex
->op
= nir_texop_txl
;
59 tex
->sampler_dim
= tg4
->sampler_dim
;
60 tex
->coord_components
= tg4
->coord_components
;
61 tex
->is_array
= tg4
->is_array
;
62 tex
->is_shadow
= tg4
->is_shadow
;
63 tex
->is_new_style_shadow
= tg4
->is_new_style_shadow
;
64 tex
->texture_index
= tg4
->texture_index
;
65 tex
->sampler_index
= tg4
->sampler_index
;
66 tex
->dest_type
= tg4
->dest_type
;
68 for (int j
= 0; j
< tg4
->num_srcs
; j
++) {
69 nir_src_copy(&tex
->src
[j
].src
, &tg4
->src
[j
].src
, tex
);
70 tex
->src
[j
].src_type
= tg4
->src
[j
].src_type
;
74 nir_vec2(b
, nir_imm_int(b
, offsets
[i
][0]),
75 nir_imm_int(b
, offsets
[i
][1]));
76 if (offset_index
< 0) {
77 tex
->src
[tg4
->num_srcs
].src
= nir_src_for_ssa(offset
);
78 tex
->src
[tg4
->num_srcs
].src_type
= nir_tex_src_offset
;
80 assert(nir_tex_instr_src_size(tex
, offset_index
) == 2);
81 nir_ssa_def
*orig
= nir_ssa_for_src(
82 b
, tex
->src
[offset_index
].src
, 2);
83 tex
->src
[offset_index
].src
=
84 nir_src_for_ssa(nir_iadd(b
, orig
, offset
));
87 tex
->src
[num_srcs
- 1].src
= nir_src_for_ssa(nir_imm_float(b
, 0));
88 tex
->src
[num_srcs
- 1].src_type
= nir_tex_src_lod
;
90 nir_ssa_dest_init(&tex
->instr
, &tex
->dest
,
91 nir_tex_instr_dest_size(tex
), 32, NULL
);
92 nir_builder_instr_insert(b
, &tex
->instr
);
94 results
[i
] = nir_channel(b
, &tex
->dest
.ssa
, tg4
->component
);
97 nir_ssa_def
*result
= nir_vec4(b
, results
[0], results
[1], results
[2], results
[3]);
98 nir_ssa_def_rewrite_uses(&tg4
->dest
.ssa
, nir_src_for_ssa(result
));
100 nir_instr_remove(&tg4
->instr
);
109 lower_tg4_func(nir_function_impl
*impl
)
111 void *mem_ctx
= ralloc_parent(impl
);
113 nir_builder_init(&b
, impl
);
115 bool progress
= false;
116 nir_foreach_block_safe(block
, impl
) {
117 progress
|= lower_tg4(block
, &b
, mem_ctx
);
121 nir_metadata_preserve(impl
, nir_metadata_block_index
|
122 nir_metadata_dominance
);
128 ir3_nir_lower_tg4_to_tex(nir_shader
*shader
)
130 bool progress
= false;
132 nir_foreach_function(function
, shader
) {
134 progress
|= lower_tg4_func(function
->impl
);