2 * Copyright © 2020 Google, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
25 #include "nir_builder.h"
27 /* A pass to split intrinsics with discontinuous writemasks into ones
28 * with contiguous writemasks starting with .x, ie:
30 * vec4 32 ssa_76 = vec4 ssa_35, ssa_35, ssa_35, ssa_35
31 * intrinsic store_ssbo (ssa_76, ssa_105, ssa_106) (2, 0, 4, 0) // wrmask=y
35 * vec4 32 ssa_76 = vec4 ssa_35, ssa_35, ssa_35, ssa_35
36 * vec1 32 ssa_107 = load_const (0x00000001)
37 * vec1 32 ssa_108 = iadd ssa_106, ssa_107
38 * vec1 32 ssa_109 = mov ssa_76.y
39 * intrinsic store_ssbo (ssa_109, ssa_105, ssa_108) (1, 0, 4, 0) // wrmask=x
43 * vec4 32 ssa_76 = vec4 ssa_35, ssa_35, ssa_35, ssa_35
44 * intrinsic store_ssbo (ssa_76, ssa_105, ssa_106) (15, 0, 4, 0) // wrmask=xzw
49 * vec4 32 ssa_76 = vec4 ssa_35, ssa_35, ssa_35, ssa_35
50 * vec1 32 ssa_107 = load_const (0x00000000)
51 * vec1 32 ssa_108 = iadd ssa_106, ssa_107
52 * vec1 32 ssa_109 = mov ssa_76.x
53 * intrinsic store_ssbo (ssa_109, ssa_105, ssa_108) (1, 0, 4, 0) // wrmask=x
55 * vec1 32 ssa_110 = load_const (0x00000002)
56 * vec1 32 ssa_111 = iadd ssa_106, ssa_110
57 * vec2 32 ssa_112 = mov ssa_76.zw
58 * intrinsic store_ssbo (ssa_112, ssa_105, ssa_111) (3, 0, 4, 0) // wrmask=xy
62 value_src(nir_intrinsic_op intrinsic
)
65 case nir_intrinsic_store_output
:
66 case nir_intrinsic_store_per_vertex_output
:
67 case nir_intrinsic_store_ssbo
:
68 case nir_intrinsic_store_shared
:
69 case nir_intrinsic_store_global
:
70 case nir_intrinsic_store_scratch
:
78 offset_src(nir_intrinsic_op intrinsic
)
81 case nir_intrinsic_store_output
:
82 case nir_intrinsic_store_shared
:
83 case nir_intrinsic_store_global
:
84 case nir_intrinsic_store_scratch
:
86 case nir_intrinsic_store_per_vertex_output
:
87 case nir_intrinsic_store_ssbo
:
95 split_wrmask(nir_builder
*b
, nir_intrinsic_instr
*intr
)
97 const nir_intrinsic_info
*info
= &nir_intrinsic_infos
[intr
->intrinsic
];
99 b
->cursor
= nir_before_instr(&intr
->instr
);
101 assert(!info
->has_dest
); /* expecting only store intrinsics */
103 unsigned num_srcs
= info
->num_srcs
;
104 unsigned value_idx
= value_src(intr
->intrinsic
);
105 unsigned offset_idx
= offset_src(intr
->intrinsic
);
106 unsigned num_comp
= nir_intrinsic_src_components(intr
, value_idx
);
108 unsigned wrmask
= nir_intrinsic_write_mask(intr
);
110 unsigned first_component
= ffs(wrmask
) - 1;
111 unsigned length
= ffs(~(wrmask
>> first_component
)) - 1;
113 nir_ssa_def
*value
= nir_ssa_for_src(b
, intr
->src
[value_idx
], num_comp
);
114 nir_ssa_def
*offset
= nir_ssa_for_src(b
, intr
->src
[offset_idx
], 1);
116 /* swizzle out the consecutive components that we'll store
119 unsigned cur_mask
= (BITFIELD_MASK(length
) << first_component
);
120 value
= nir_channels(b
, value
, cur_mask
);
122 /* and create the replacement intrinsic: */
123 nir_intrinsic_instr
*new_intr
=
124 nir_intrinsic_instr_create(b
->shader
, intr
->intrinsic
);
126 nir_intrinsic_copy_const_indices(new_intr
, intr
);
127 nir_intrinsic_set_write_mask(new_intr
, BITFIELD_MASK(length
));
129 const int offset_units
= value
->bit_size
/ 8;
131 if (info
->index_map
[NIR_INTRINSIC_ALIGN_MUL
]) {
132 assert(info
->index_map
[NIR_INTRINSIC_ALIGN_OFFSET
]);
133 unsigned align_mul
= nir_intrinsic_align_mul(intr
);
134 unsigned align_off
= nir_intrinsic_align_offset(intr
);
136 align_off
+= offset_units
* first_component
;
137 align_off
= align_off
% align_mul
;
139 nir_intrinsic_set_align(new_intr
, align_mul
, align_off
);
142 /* if the instruction has a BASE, fold the offset adjustment
143 * into that instead of adding alu instructions, otherwise add
146 unsigned offset_adj
= offset_units
* first_component
;
147 if (info
->index_map
[NIR_INTRINSIC_BASE
]) {
148 nir_intrinsic_set_base(new_intr
,
149 nir_intrinsic_base(intr
) + offset_adj
);
151 offset
= nir_iadd(b
, offset
,
152 nir_imm_intN_t(b
, offset_adj
, offset
->bit_size
));
155 new_intr
->num_components
= length
;
157 /* Copy the sources, replacing value/offset, and passing everything
158 * else through to the new instrution:
160 for (unsigned i
= 0; i
< num_srcs
; i
++) {
161 if (i
== value_idx
) {
162 new_intr
->src
[i
] = nir_src_for_ssa(value
);
163 } else if (i
== offset_idx
) {
164 new_intr
->src
[i
] = nir_src_for_ssa(offset
);
166 new_intr
->src
[i
] = intr
->src
[i
];
170 nir_builder_instr_insert(b
, &new_intr
->instr
);
172 /* Clear the bits in the writemask that we just wrote, then try
173 * again to see if more channels are left.
178 /* Finally remove the original intrinsic. */
179 nir_instr_remove(&intr
->instr
);
183 nir_lower_wrmasks(nir_shader
*shader
, nir_instr_filter_cb cb
, const void *data
)
185 bool progress
= false;
187 nir_foreach_function(function
, shader
) {
188 nir_function_impl
*impl
= function
->impl
;
193 nir_foreach_block(block
, impl
) {
194 nir_foreach_instr_safe(instr
, block
) {
195 if (instr
->type
!= nir_instr_type_intrinsic
)
198 nir_intrinsic_instr
*intr
= nir_instr_as_intrinsic(instr
);
199 const nir_intrinsic_info
*info
= &nir_intrinsic_infos
[intr
->intrinsic
];
201 /* if no wrmask, then skip it: */
202 if (!info
->index_map
[NIR_INTRINSIC_WRMASK
])
205 /* if wrmask is already contiguous, then nothing to do: */
206 if (nir_intrinsic_write_mask(intr
) == BITFIELD_MASK(intr
->num_components
))
209 /* do we know how to lower this instruction? */
210 if (value_src(intr
->intrinsic
) < 0)
213 assert(offset_src(intr
->intrinsic
) >= 0);
215 /* does backend need us to lower this intrinsic? */
216 if (cb
&& !cb(instr
, data
))
220 nir_builder_init(&b
, impl
);
221 split_wrmask(&b
, intr
);
226 nir_metadata_preserve(impl
, nir_metadata_block_index
|
227 nir_metadata_dominance
);