radv: only enable TC-compat HTILE for images readable by a shader
[mesa.git] / src / amd / vulkan / radv_meta_bufimage.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "radv_meta.h"
25 #include "nir/nir_builder.h"
26
27 /*
28 * GFX queue: Compute shader implementation of image->buffer copy
29 * Compute queue: implementation also of buffer->image, image->image, and image clear.
30 */
31
32 /* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
33 * for that.
34 */
35 static nir_shader *
36 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
37 {
38 nir_builder b;
39 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
40 const struct glsl_type *sampler_type = glsl_sampler_type(dim,
41 false,
42 false,
43 GLSL_TYPE_FLOAT);
44 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
45 false,
46 false,
47 GLSL_TYPE_FLOAT);
48 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
49 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
50 b.shader->info.cs.local_size[0] = 16;
51 b.shader->info.cs.local_size[1] = 16;
52 b.shader->info.cs.local_size[2] = 1;
53 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
54 sampler_type, "s_tex");
55 input_img->data.descriptor_set = 0;
56 input_img->data.binding = 0;
57
58 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
59 img_type, "out_img");
60 output_img->data.descriptor_set = 0;
61 output_img->data.binding = 1;
62
63 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
64 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
65 nir_ssa_def *block_size = nir_imm_ivec4(&b,
66 b.shader->info.cs.local_size[0],
67 b.shader->info.cs.local_size[1],
68 b.shader->info.cs.local_size[2], 0);
69
70 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
71
72
73
74 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
75 nir_intrinsic_set_base(offset, 0);
76 nir_intrinsic_set_range(offset, 16);
77 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
78 offset->num_components = is_3d ? 3 : 2;
79 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
80 nir_builder_instr_insert(&b, &offset->instr);
81
82 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
83 nir_intrinsic_set_base(stride, 0);
84 nir_intrinsic_set_range(stride, 16);
85 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
86 stride->num_components = 1;
87 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
88 nir_builder_instr_insert(&b, &stride->instr);
89
90 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
91 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
92
93 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
94 tex->sampler_dim = dim;
95 tex->op = nir_texop_txf;
96 tex->src[0].src_type = nir_tex_src_coord;
97 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
98 tex->src[1].src_type = nir_tex_src_lod;
99 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
100 tex->src[2].src_type = nir_tex_src_texture_deref;
101 tex->src[2].src = nir_src_for_ssa(input_img_deref);
102 tex->dest_type = nir_type_float;
103 tex->is_array = false;
104 tex->coord_components = is_3d ? 3 : 2;
105
106 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
107 nir_builder_instr_insert(&b, &tex->instr);
108
109 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
110 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
111
112 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
113 tmp = nir_iadd(&b, tmp, pos_x);
114
115 nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
116
117 nir_ssa_def *outval = &tex->dest.ssa;
118 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
119 store->num_components = 4;
120 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
121 store->src[1] = nir_src_for_ssa(coord);
122 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
123 store->src[3] = nir_src_for_ssa(outval);
124 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
125
126 nir_builder_instr_insert(&b, &store->instr);
127 return b.shader;
128 }
129
130 /* Image to buffer - don't write use image accessors */
131 static VkResult
132 radv_device_init_meta_itob_state(struct radv_device *device)
133 {
134 VkResult result;
135 struct radv_shader_module cs = { .nir = NULL };
136 struct radv_shader_module cs_3d = { .nir = NULL };
137
138 cs.nir = build_nir_itob_compute_shader(device, false);
139 if (device->physical_device->rad_info.chip_class >= GFX9)
140 cs_3d.nir = build_nir_itob_compute_shader(device, true);
141
142 /*
143 * two descriptors one for the image being sampled
144 * one for the buffer being written.
145 */
146 VkDescriptorSetLayoutCreateInfo ds_create_info = {
147 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
148 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
149 .bindingCount = 2,
150 .pBindings = (VkDescriptorSetLayoutBinding[]) {
151 {
152 .binding = 0,
153 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
154 .descriptorCount = 1,
155 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
156 .pImmutableSamplers = NULL
157 },
158 {
159 .binding = 1,
160 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
161 .descriptorCount = 1,
162 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
163 .pImmutableSamplers = NULL
164 },
165 }
166 };
167
168 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
169 &ds_create_info,
170 &device->meta_state.alloc,
171 &device->meta_state.itob.img_ds_layout);
172 if (result != VK_SUCCESS)
173 goto fail;
174
175
176 VkPipelineLayoutCreateInfo pl_create_info = {
177 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
178 .setLayoutCount = 1,
179 .pSetLayouts = &device->meta_state.itob.img_ds_layout,
180 .pushConstantRangeCount = 1,
181 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
182 };
183
184 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
185 &pl_create_info,
186 &device->meta_state.alloc,
187 &device->meta_state.itob.img_p_layout);
188 if (result != VK_SUCCESS)
189 goto fail;
190
191 /* compute shader */
192
193 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
194 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
195 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
196 .module = radv_shader_module_to_handle(&cs),
197 .pName = "main",
198 .pSpecializationInfo = NULL,
199 };
200
201 VkComputePipelineCreateInfo vk_pipeline_info = {
202 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
203 .stage = pipeline_shader_stage,
204 .flags = 0,
205 .layout = device->meta_state.itob.img_p_layout,
206 };
207
208 result = radv_CreateComputePipelines(radv_device_to_handle(device),
209 radv_pipeline_cache_to_handle(&device->meta_state.cache),
210 1, &vk_pipeline_info, NULL,
211 &device->meta_state.itob.pipeline);
212 if (result != VK_SUCCESS)
213 goto fail;
214
215 if (device->physical_device->rad_info.chip_class >= GFX9) {
216 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
217 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
218 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
219 .module = radv_shader_module_to_handle(&cs_3d),
220 .pName = "main",
221 .pSpecializationInfo = NULL,
222 };
223
224 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
225 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
226 .stage = pipeline_shader_stage_3d,
227 .flags = 0,
228 .layout = device->meta_state.itob.img_p_layout,
229 };
230
231 result = radv_CreateComputePipelines(radv_device_to_handle(device),
232 radv_pipeline_cache_to_handle(&device->meta_state.cache),
233 1, &vk_pipeline_info_3d, NULL,
234 &device->meta_state.itob.pipeline_3d);
235 if (result != VK_SUCCESS)
236 goto fail;
237 ralloc_free(cs_3d.nir);
238 }
239 ralloc_free(cs.nir);
240
241 return VK_SUCCESS;
242 fail:
243 ralloc_free(cs.nir);
244 ralloc_free(cs_3d.nir);
245 return result;
246 }
247
248 static void
249 radv_device_finish_meta_itob_state(struct radv_device *device)
250 {
251 struct radv_meta_state *state = &device->meta_state;
252
253 radv_DestroyPipelineLayout(radv_device_to_handle(device),
254 state->itob.img_p_layout, &state->alloc);
255 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
256 state->itob.img_ds_layout,
257 &state->alloc);
258 radv_DestroyPipeline(radv_device_to_handle(device),
259 state->itob.pipeline, &state->alloc);
260 if (device->physical_device->rad_info.chip_class >= GFX9)
261 radv_DestroyPipeline(radv_device_to_handle(device),
262 state->itob.pipeline_3d, &state->alloc);
263 }
264
265 static nir_shader *
266 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
267 {
268 nir_builder b;
269 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
270 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
271 false,
272 false,
273 GLSL_TYPE_FLOAT);
274 const struct glsl_type *img_type = glsl_sampler_type(dim,
275 false,
276 false,
277 GLSL_TYPE_FLOAT);
278 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
279 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
280 b.shader->info.cs.local_size[0] = 16;
281 b.shader->info.cs.local_size[1] = 16;
282 b.shader->info.cs.local_size[2] = 1;
283 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
284 buf_type, "s_tex");
285 input_img->data.descriptor_set = 0;
286 input_img->data.binding = 0;
287
288 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
289 img_type, "out_img");
290 output_img->data.descriptor_set = 0;
291 output_img->data.binding = 1;
292
293 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
294 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
295 nir_ssa_def *block_size = nir_imm_ivec4(&b,
296 b.shader->info.cs.local_size[0],
297 b.shader->info.cs.local_size[1],
298 b.shader->info.cs.local_size[2], 0);
299
300 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
301
302 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
303 nir_intrinsic_set_base(offset, 0);
304 nir_intrinsic_set_range(offset, 16);
305 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
306 offset->num_components = is_3d ? 3 : 2;
307 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
308 nir_builder_instr_insert(&b, &offset->instr);
309
310 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
311 nir_intrinsic_set_base(stride, 0);
312 nir_intrinsic_set_range(stride, 16);
313 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
314 stride->num_components = 1;
315 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
316 nir_builder_instr_insert(&b, &stride->instr);
317
318 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
319 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
320
321 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
322 tmp = nir_iadd(&b, tmp, pos_x);
323
324 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
325
326 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
327 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
328
329 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
330 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
331 tex->op = nir_texop_txf;
332 tex->src[0].src_type = nir_tex_src_coord;
333 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
334 tex->src[1].src_type = nir_tex_src_lod;
335 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
336 tex->src[2].src_type = nir_tex_src_texture_deref;
337 tex->src[2].src = nir_src_for_ssa(input_img_deref);
338 tex->dest_type = nir_type_float;
339 tex->is_array = false;
340 tex->coord_components = 1;
341
342 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
343 nir_builder_instr_insert(&b, &tex->instr);
344
345 nir_ssa_def *outval = &tex->dest.ssa;
346 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
347 store->num_components = 4;
348 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
349 store->src[1] = nir_src_for_ssa(img_coord);
350 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
351 store->src[3] = nir_src_for_ssa(outval);
352 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
353
354 nir_builder_instr_insert(&b, &store->instr);
355 return b.shader;
356 }
357
358 /* Buffer to image - don't write use image accessors */
359 static VkResult
360 radv_device_init_meta_btoi_state(struct radv_device *device)
361 {
362 VkResult result;
363 struct radv_shader_module cs = { .nir = NULL };
364 struct radv_shader_module cs_3d = { .nir = NULL };
365 cs.nir = build_nir_btoi_compute_shader(device, false);
366 if (device->physical_device->rad_info.chip_class >= GFX9)
367 cs_3d.nir = build_nir_btoi_compute_shader(device, true);
368 /*
369 * two descriptors one for the image being sampled
370 * one for the buffer being written.
371 */
372 VkDescriptorSetLayoutCreateInfo ds_create_info = {
373 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
374 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
375 .bindingCount = 2,
376 .pBindings = (VkDescriptorSetLayoutBinding[]) {
377 {
378 .binding = 0,
379 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
380 .descriptorCount = 1,
381 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
382 .pImmutableSamplers = NULL
383 },
384 {
385 .binding = 1,
386 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
387 .descriptorCount = 1,
388 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
389 .pImmutableSamplers = NULL
390 },
391 }
392 };
393
394 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
395 &ds_create_info,
396 &device->meta_state.alloc,
397 &device->meta_state.btoi.img_ds_layout);
398 if (result != VK_SUCCESS)
399 goto fail;
400
401
402 VkPipelineLayoutCreateInfo pl_create_info = {
403 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
404 .setLayoutCount = 1,
405 .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
406 .pushConstantRangeCount = 1,
407 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
408 };
409
410 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
411 &pl_create_info,
412 &device->meta_state.alloc,
413 &device->meta_state.btoi.img_p_layout);
414 if (result != VK_SUCCESS)
415 goto fail;
416
417 /* compute shader */
418
419 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
420 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
421 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
422 .module = radv_shader_module_to_handle(&cs),
423 .pName = "main",
424 .pSpecializationInfo = NULL,
425 };
426
427 VkComputePipelineCreateInfo vk_pipeline_info = {
428 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
429 .stage = pipeline_shader_stage,
430 .flags = 0,
431 .layout = device->meta_state.btoi.img_p_layout,
432 };
433
434 result = radv_CreateComputePipelines(radv_device_to_handle(device),
435 radv_pipeline_cache_to_handle(&device->meta_state.cache),
436 1, &vk_pipeline_info, NULL,
437 &device->meta_state.btoi.pipeline);
438 if (result != VK_SUCCESS)
439 goto fail;
440
441 if (device->physical_device->rad_info.chip_class >= GFX9) {
442 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
443 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
444 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
445 .module = radv_shader_module_to_handle(&cs_3d),
446 .pName = "main",
447 .pSpecializationInfo = NULL,
448 };
449
450 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
451 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
452 .stage = pipeline_shader_stage_3d,
453 .flags = 0,
454 .layout = device->meta_state.btoi.img_p_layout,
455 };
456
457 result = radv_CreateComputePipelines(radv_device_to_handle(device),
458 radv_pipeline_cache_to_handle(&device->meta_state.cache),
459 1, &vk_pipeline_info_3d, NULL,
460 &device->meta_state.btoi.pipeline_3d);
461 ralloc_free(cs_3d.nir);
462 }
463 ralloc_free(cs.nir);
464
465 return VK_SUCCESS;
466 fail:
467 ralloc_free(cs_3d.nir);
468 ralloc_free(cs.nir);
469 return result;
470 }
471
472 static void
473 radv_device_finish_meta_btoi_state(struct radv_device *device)
474 {
475 struct radv_meta_state *state = &device->meta_state;
476
477 radv_DestroyPipelineLayout(radv_device_to_handle(device),
478 state->btoi.img_p_layout, &state->alloc);
479 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
480 state->btoi.img_ds_layout,
481 &state->alloc);
482 radv_DestroyPipeline(radv_device_to_handle(device),
483 state->btoi.pipeline, &state->alloc);
484 radv_DestroyPipeline(radv_device_to_handle(device),
485 state->btoi.pipeline_3d, &state->alloc);
486 }
487
488 /* Buffer to image - special path for R32G32B32 */
489 static nir_shader *
490 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
491 {
492 nir_builder b;
493 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
494 false,
495 false,
496 GLSL_TYPE_FLOAT);
497 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
498 false,
499 false,
500 GLSL_TYPE_FLOAT);
501 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
502 b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
503 b.shader->info.cs.local_size[0] = 16;
504 b.shader->info.cs.local_size[1] = 16;
505 b.shader->info.cs.local_size[2] = 1;
506 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
507 buf_type, "s_tex");
508 input_img->data.descriptor_set = 0;
509 input_img->data.binding = 0;
510
511 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
512 img_type, "out_img");
513 output_img->data.descriptor_set = 0;
514 output_img->data.binding = 1;
515
516 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
517 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
518 nir_ssa_def *block_size = nir_imm_ivec4(&b,
519 b.shader->info.cs.local_size[0],
520 b.shader->info.cs.local_size[1],
521 b.shader->info.cs.local_size[2], 0);
522
523 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
524
525 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
526 nir_intrinsic_set_base(offset, 0);
527 nir_intrinsic_set_range(offset, 16);
528 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
529 offset->num_components = 2;
530 nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
531 nir_builder_instr_insert(&b, &offset->instr);
532
533 nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
534 nir_intrinsic_set_base(pitch, 0);
535 nir_intrinsic_set_range(pitch, 16);
536 pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
537 pitch->num_components = 1;
538 nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
539 nir_builder_instr_insert(&b, &pitch->instr);
540
541 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
542 nir_intrinsic_set_base(stride, 0);
543 nir_intrinsic_set_range(stride, 16);
544 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
545 stride->num_components = 1;
546 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
547 nir_builder_instr_insert(&b, &stride->instr);
548
549 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
550 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
551
552 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
553 tmp = nir_iadd(&b, tmp, pos_x);
554
555 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
556
557 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
558
559 nir_ssa_def *global_pos =
560 nir_iadd(&b,
561 nir_imul(&b, nir_channel(&b, img_coord, 1), &pitch->dest.ssa),
562 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
563
564 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
565
566 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
567 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
568 tex->op = nir_texop_txf;
569 tex->src[0].src_type = nir_tex_src_coord;
570 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
571 tex->src[1].src_type = nir_tex_src_lod;
572 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
573 tex->src[2].src_type = nir_tex_src_texture_deref;
574 tex->src[2].src = nir_src_for_ssa(input_img_deref);
575 tex->dest_type = nir_type_float;
576 tex->is_array = false;
577 tex->coord_components = 1;
578 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
579 nir_builder_instr_insert(&b, &tex->instr);
580
581 nir_ssa_def *outval = &tex->dest.ssa;
582
583 for (int chan = 0; chan < 3; chan++) {
584 nir_ssa_def *local_pos =
585 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
586
587 nir_ssa_def *coord =
588 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
589
590 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
591 store->num_components = 1;
592 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
593 store->src[1] = nir_src_for_ssa(coord);
594 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
595 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
596 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
597 nir_builder_instr_insert(&b, &store->instr);
598 }
599
600 return b.shader;
601 }
602
603 static VkResult
604 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
605 {
606 VkResult result;
607 struct radv_shader_module cs = { .nir = NULL };
608
609 cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
610
611 VkDescriptorSetLayoutCreateInfo ds_create_info = {
612 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
613 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
614 .bindingCount = 2,
615 .pBindings = (VkDescriptorSetLayoutBinding[]) {
616 {
617 .binding = 0,
618 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
619 .descriptorCount = 1,
620 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
621 .pImmutableSamplers = NULL
622 },
623 {
624 .binding = 1,
625 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
626 .descriptorCount = 1,
627 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
628 .pImmutableSamplers = NULL
629 },
630 }
631 };
632
633 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
634 &ds_create_info,
635 &device->meta_state.alloc,
636 &device->meta_state.btoi_r32g32b32.img_ds_layout);
637 if (result != VK_SUCCESS)
638 goto fail;
639
640
641 VkPipelineLayoutCreateInfo pl_create_info = {
642 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
643 .setLayoutCount = 1,
644 .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
645 .pushConstantRangeCount = 1,
646 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
647 };
648
649 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
650 &pl_create_info,
651 &device->meta_state.alloc,
652 &device->meta_state.btoi_r32g32b32.img_p_layout);
653 if (result != VK_SUCCESS)
654 goto fail;
655
656 /* compute shader */
657
658 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
659 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
660 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
661 .module = radv_shader_module_to_handle(&cs),
662 .pName = "main",
663 .pSpecializationInfo = NULL,
664 };
665
666 VkComputePipelineCreateInfo vk_pipeline_info = {
667 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
668 .stage = pipeline_shader_stage,
669 .flags = 0,
670 .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
671 };
672
673 result = radv_CreateComputePipelines(radv_device_to_handle(device),
674 radv_pipeline_cache_to_handle(&device->meta_state.cache),
675 1, &vk_pipeline_info, NULL,
676 &device->meta_state.btoi_r32g32b32.pipeline);
677
678 fail:
679 ralloc_free(cs.nir);
680 return result;
681 }
682
683 static void
684 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
685 {
686 struct radv_meta_state *state = &device->meta_state;
687
688 radv_DestroyPipelineLayout(radv_device_to_handle(device),
689 state->btoi_r32g32b32.img_p_layout, &state->alloc);
690 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
691 state->btoi_r32g32b32.img_ds_layout,
692 &state->alloc);
693 radv_DestroyPipeline(radv_device_to_handle(device),
694 state->btoi_r32g32b32.pipeline, &state->alloc);
695 }
696
697 static nir_shader *
698 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
699 {
700 nir_builder b;
701 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
702 const struct glsl_type *buf_type = glsl_sampler_type(dim,
703 false,
704 false,
705 GLSL_TYPE_FLOAT);
706 const struct glsl_type *img_type = glsl_sampler_type(dim,
707 false,
708 false,
709 GLSL_TYPE_FLOAT);
710 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
711 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
712 b.shader->info.cs.local_size[0] = 16;
713 b.shader->info.cs.local_size[1] = 16;
714 b.shader->info.cs.local_size[2] = 1;
715 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
716 buf_type, "s_tex");
717 input_img->data.descriptor_set = 0;
718 input_img->data.binding = 0;
719
720 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
721 img_type, "out_img");
722 output_img->data.descriptor_set = 0;
723 output_img->data.binding = 1;
724
725 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
726 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
727 nir_ssa_def *block_size = nir_imm_ivec4(&b,
728 b.shader->info.cs.local_size[0],
729 b.shader->info.cs.local_size[1],
730 b.shader->info.cs.local_size[2], 0);
731
732 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
733
734 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
735 nir_intrinsic_set_base(src_offset, 0);
736 nir_intrinsic_set_range(src_offset, 24);
737 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
738 src_offset->num_components = is_3d ? 3 : 2;
739 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
740 nir_builder_instr_insert(&b, &src_offset->instr);
741
742 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
743 nir_intrinsic_set_base(dst_offset, 0);
744 nir_intrinsic_set_range(dst_offset, 24);
745 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
746 dst_offset->num_components = is_3d ? 3 : 2;
747 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
748 nir_builder_instr_insert(&b, &dst_offset->instr);
749
750 nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
751 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
752
753 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
754
755 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
756 tex->sampler_dim = dim;
757 tex->op = nir_texop_txf;
758 tex->src[0].src_type = nir_tex_src_coord;
759 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
760 tex->src[1].src_type = nir_tex_src_lod;
761 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
762 tex->src[2].src_type = nir_tex_src_texture_deref;
763 tex->src[2].src = nir_src_for_ssa(input_img_deref);
764 tex->dest_type = nir_type_float;
765 tex->is_array = false;
766 tex->coord_components = is_3d ? 3 : 2;
767
768 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
769 nir_builder_instr_insert(&b, &tex->instr);
770
771 nir_ssa_def *outval = &tex->dest.ssa;
772 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
773 store->num_components = 4;
774 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
775 store->src[1] = nir_src_for_ssa(dst_coord);
776 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
777 store->src[3] = nir_src_for_ssa(outval);
778 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
779
780 nir_builder_instr_insert(&b, &store->instr);
781 return b.shader;
782 }
783
784 /* image to image - don't write use image accessors */
785 static VkResult
786 radv_device_init_meta_itoi_state(struct radv_device *device)
787 {
788 VkResult result;
789 struct radv_shader_module cs = { .nir = NULL };
790 struct radv_shader_module cs_3d = { .nir = NULL };
791 cs.nir = build_nir_itoi_compute_shader(device, false);
792 if (device->physical_device->rad_info.chip_class >= GFX9)
793 cs_3d.nir = build_nir_itoi_compute_shader(device, true);
794 /*
795 * two descriptors one for the image being sampled
796 * one for the buffer being written.
797 */
798 VkDescriptorSetLayoutCreateInfo ds_create_info = {
799 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
800 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
801 .bindingCount = 2,
802 .pBindings = (VkDescriptorSetLayoutBinding[]) {
803 {
804 .binding = 0,
805 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
806 .descriptorCount = 1,
807 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
808 .pImmutableSamplers = NULL
809 },
810 {
811 .binding = 1,
812 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
813 .descriptorCount = 1,
814 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
815 .pImmutableSamplers = NULL
816 },
817 }
818 };
819
820 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
821 &ds_create_info,
822 &device->meta_state.alloc,
823 &device->meta_state.itoi.img_ds_layout);
824 if (result != VK_SUCCESS)
825 goto fail;
826
827
828 VkPipelineLayoutCreateInfo pl_create_info = {
829 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
830 .setLayoutCount = 1,
831 .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
832 .pushConstantRangeCount = 1,
833 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
834 };
835
836 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
837 &pl_create_info,
838 &device->meta_state.alloc,
839 &device->meta_state.itoi.img_p_layout);
840 if (result != VK_SUCCESS)
841 goto fail;
842
843 /* compute shader */
844
845 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
846 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
847 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
848 .module = radv_shader_module_to_handle(&cs),
849 .pName = "main",
850 .pSpecializationInfo = NULL,
851 };
852
853 VkComputePipelineCreateInfo vk_pipeline_info = {
854 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
855 .stage = pipeline_shader_stage,
856 .flags = 0,
857 .layout = device->meta_state.itoi.img_p_layout,
858 };
859
860 result = radv_CreateComputePipelines(radv_device_to_handle(device),
861 radv_pipeline_cache_to_handle(&device->meta_state.cache),
862 1, &vk_pipeline_info, NULL,
863 &device->meta_state.itoi.pipeline);
864 if (result != VK_SUCCESS)
865 goto fail;
866
867 if (device->physical_device->rad_info.chip_class >= GFX9) {
868 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
869 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
870 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
871 .module = radv_shader_module_to_handle(&cs_3d),
872 .pName = "main",
873 .pSpecializationInfo = NULL,
874 };
875
876 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
877 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
878 .stage = pipeline_shader_stage_3d,
879 .flags = 0,
880 .layout = device->meta_state.itoi.img_p_layout,
881 };
882
883 result = radv_CreateComputePipelines(radv_device_to_handle(device),
884 radv_pipeline_cache_to_handle(&device->meta_state.cache),
885 1, &vk_pipeline_info_3d, NULL,
886 &device->meta_state.itoi.pipeline_3d);
887
888 ralloc_free(cs_3d.nir);
889 }
890 ralloc_free(cs.nir);
891
892 return VK_SUCCESS;
893 fail:
894 ralloc_free(cs.nir);
895 ralloc_free(cs_3d.nir);
896 return result;
897 }
898
899 static void
900 radv_device_finish_meta_itoi_state(struct radv_device *device)
901 {
902 struct radv_meta_state *state = &device->meta_state;
903
904 radv_DestroyPipelineLayout(radv_device_to_handle(device),
905 state->itoi.img_p_layout, &state->alloc);
906 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
907 state->itoi.img_ds_layout,
908 &state->alloc);
909 radv_DestroyPipeline(radv_device_to_handle(device),
910 state->itoi.pipeline, &state->alloc);
911 if (device->physical_device->rad_info.chip_class >= GFX9)
912 radv_DestroyPipeline(radv_device_to_handle(device),
913 state->itoi.pipeline_3d, &state->alloc);
914 }
915
916 static nir_shader *
917 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
918 {
919 nir_builder b;
920 const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
921 false,
922 false,
923 GLSL_TYPE_FLOAT);
924 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
925 b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_r32g32b32_cs");
926 b.shader->info.cs.local_size[0] = 16;
927 b.shader->info.cs.local_size[1] = 16;
928 b.shader->info.cs.local_size[2] = 1;
929 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
930 type, "input_img");
931 input_img->data.descriptor_set = 0;
932 input_img->data.binding = 0;
933
934 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
935 type, "output_img");
936 output_img->data.descriptor_set = 0;
937 output_img->data.binding = 1;
938
939 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
940 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
941 nir_ssa_def *block_size = nir_imm_ivec4(&b,
942 b.shader->info.cs.local_size[0],
943 b.shader->info.cs.local_size[1],
944 b.shader->info.cs.local_size[2], 0);
945
946 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
947
948 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
949 nir_intrinsic_set_base(src_offset, 0);
950 nir_intrinsic_set_range(src_offset, 24);
951 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
952 src_offset->num_components = 3;
953 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 3, 32, "src_offset");
954 nir_builder_instr_insert(&b, &src_offset->instr);
955
956 nir_ssa_def *src_stride = nir_channel(&b, &src_offset->dest.ssa, 2);
957
958 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
959 nir_intrinsic_set_base(dst_offset, 0);
960 nir_intrinsic_set_range(dst_offset, 24);
961 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
962 dst_offset->num_components = 3;
963 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 3, 32, "dst_offset");
964 nir_builder_instr_insert(&b, &dst_offset->instr);
965
966 nir_ssa_def *dst_stride = nir_channel(&b, &dst_offset->dest.ssa, 2);
967
968 nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
969 nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
970
971 nir_ssa_def *src_global_pos =
972 nir_iadd(&b,
973 nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
974 nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
975
976 nir_ssa_def *dst_global_pos =
977 nir_iadd(&b,
978 nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
979 nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
980
981 for (int chan = 0; chan < 3; chan++) {
982 /* src */
983 nir_ssa_def *src_local_pos =
984 nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
985
986 nir_ssa_def *src_coord =
987 nir_vec4(&b, src_local_pos, src_local_pos,
988 src_local_pos, src_local_pos);
989
990 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
991
992 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
993 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
994 tex->op = nir_texop_txf;
995 tex->src[0].src_type = nir_tex_src_coord;
996 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
997 tex->src[1].src_type = nir_tex_src_lod;
998 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
999 tex->src[2].src_type = nir_tex_src_texture_deref;
1000 tex->src[2].src = nir_src_for_ssa(input_img_deref);
1001 tex->dest_type = nir_type_float;
1002 tex->is_array = false;
1003 tex->coord_components = 1;
1004 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
1005 nir_builder_instr_insert(&b, &tex->instr);
1006
1007 nir_ssa_def *outval = &tex->dest.ssa;
1008
1009 /* dst */
1010 nir_ssa_def *dst_local_pos =
1011 nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
1012
1013 nir_ssa_def *dst_coord =
1014 nir_vec4(&b, dst_local_pos, dst_local_pos,
1015 dst_local_pos, dst_local_pos);
1016
1017 nir_intrinsic_instr *store =
1018 nir_intrinsic_instr_create(b.shader,
1019 nir_intrinsic_image_deref_store);
1020 store->num_components = 1;
1021 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1022 store->src[1] = nir_src_for_ssa(dst_coord);
1023 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1024 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, 0));
1025 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
1026 nir_builder_instr_insert(&b, &store->instr);
1027 }
1028
1029 return b.shader;
1030 }
1031
1032 /* Image to image - special path for R32G32B32 */
1033 static VkResult
1034 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
1035 {
1036 VkResult result;
1037 struct radv_shader_module cs = { .nir = NULL };
1038
1039 cs.nir = build_nir_itoi_r32g32b32_compute_shader(device);
1040
1041 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1042 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1043 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1044 .bindingCount = 2,
1045 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1046 {
1047 .binding = 0,
1048 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1049 .descriptorCount = 1,
1050 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1051 .pImmutableSamplers = NULL
1052 },
1053 {
1054 .binding = 1,
1055 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1056 .descriptorCount = 1,
1057 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1058 .pImmutableSamplers = NULL
1059 },
1060 }
1061 };
1062
1063 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1064 &ds_create_info,
1065 &device->meta_state.alloc,
1066 &device->meta_state.itoi_r32g32b32.img_ds_layout);
1067 if (result != VK_SUCCESS)
1068 goto fail;
1069
1070
1071 VkPipelineLayoutCreateInfo pl_create_info = {
1072 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1073 .setLayoutCount = 1,
1074 .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
1075 .pushConstantRangeCount = 1,
1076 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
1077 };
1078
1079 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1080 &pl_create_info,
1081 &device->meta_state.alloc,
1082 &device->meta_state.itoi_r32g32b32.img_p_layout);
1083 if (result != VK_SUCCESS)
1084 goto fail;
1085
1086 /* compute shader */
1087
1088 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1089 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1090 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1091 .module = radv_shader_module_to_handle(&cs),
1092 .pName = "main",
1093 .pSpecializationInfo = NULL,
1094 };
1095
1096 VkComputePipelineCreateInfo vk_pipeline_info = {
1097 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1098 .stage = pipeline_shader_stage,
1099 .flags = 0,
1100 .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
1101 };
1102
1103 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1104 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1105 1, &vk_pipeline_info, NULL,
1106 &device->meta_state.itoi_r32g32b32.pipeline);
1107
1108 fail:
1109 ralloc_free(cs.nir);
1110 return result;
1111 }
1112
1113 static void
1114 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
1115 {
1116 struct radv_meta_state *state = &device->meta_state;
1117
1118 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1119 state->itoi_r32g32b32.img_p_layout, &state->alloc);
1120 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1121 state->itoi_r32g32b32.img_ds_layout,
1122 &state->alloc);
1123 radv_DestroyPipeline(radv_device_to_handle(device),
1124 state->itoi_r32g32b32.pipeline, &state->alloc);
1125 }
1126
1127 static nir_shader *
1128 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
1129 {
1130 nir_builder b;
1131 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
1132 const struct glsl_type *img_type = glsl_sampler_type(dim,
1133 false,
1134 false,
1135 GLSL_TYPE_FLOAT);
1136 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1137 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
1138 b.shader->info.cs.local_size[0] = 16;
1139 b.shader->info.cs.local_size[1] = 16;
1140 b.shader->info.cs.local_size[2] = 1;
1141
1142 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1143 img_type, "out_img");
1144 output_img->data.descriptor_set = 0;
1145 output_img->data.binding = 0;
1146
1147 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1148 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
1149 nir_ssa_def *block_size = nir_imm_ivec4(&b,
1150 b.shader->info.cs.local_size[0],
1151 b.shader->info.cs.local_size[1],
1152 b.shader->info.cs.local_size[2], 0);
1153
1154 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1155
1156 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1157 nir_intrinsic_set_base(clear_val, 0);
1158 nir_intrinsic_set_range(clear_val, 20);
1159 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1160 clear_val->num_components = 4;
1161 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
1162 nir_builder_instr_insert(&b, &clear_val->instr);
1163
1164 nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1165 nir_intrinsic_set_base(layer, 0);
1166 nir_intrinsic_set_range(layer, 20);
1167 layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
1168 layer->num_components = 1;
1169 nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
1170 nir_builder_instr_insert(&b, &layer->instr);
1171
1172 nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
1173
1174 nir_ssa_def *comps[4];
1175 comps[0] = nir_channel(&b, global_id, 0);
1176 comps[1] = nir_channel(&b, global_id, 1);
1177 comps[2] = global_z;
1178 comps[3] = nir_imm_int(&b, 0);
1179 global_id = nir_vec(&b, comps, 4);
1180
1181 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1182 store->num_components = 4;
1183 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1184 store->src[1] = nir_src_for_ssa(global_id);
1185 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1186 store->src[3] = nir_src_for_ssa(&clear_val->dest.ssa);
1187 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
1188
1189 nir_builder_instr_insert(&b, &store->instr);
1190 return b.shader;
1191 }
1192
1193 static VkResult
1194 radv_device_init_meta_cleari_state(struct radv_device *device)
1195 {
1196 VkResult result;
1197 struct radv_shader_module cs = { .nir = NULL };
1198 struct radv_shader_module cs_3d = { .nir = NULL };
1199 cs.nir = build_nir_cleari_compute_shader(device, false);
1200 if (device->physical_device->rad_info.chip_class >= GFX9)
1201 cs_3d.nir = build_nir_cleari_compute_shader(device, true);
1202
1203 /*
1204 * two descriptors one for the image being sampled
1205 * one for the buffer being written.
1206 */
1207 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1208 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1209 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1210 .bindingCount = 1,
1211 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1212 {
1213 .binding = 0,
1214 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1215 .descriptorCount = 1,
1216 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1217 .pImmutableSamplers = NULL
1218 },
1219 }
1220 };
1221
1222 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1223 &ds_create_info,
1224 &device->meta_state.alloc,
1225 &device->meta_state.cleari.img_ds_layout);
1226 if (result != VK_SUCCESS)
1227 goto fail;
1228
1229
1230 VkPipelineLayoutCreateInfo pl_create_info = {
1231 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1232 .setLayoutCount = 1,
1233 .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
1234 .pushConstantRangeCount = 1,
1235 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
1236 };
1237
1238 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1239 &pl_create_info,
1240 &device->meta_state.alloc,
1241 &device->meta_state.cleari.img_p_layout);
1242 if (result != VK_SUCCESS)
1243 goto fail;
1244
1245 /* compute shader */
1246
1247 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1248 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1249 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1250 .module = radv_shader_module_to_handle(&cs),
1251 .pName = "main",
1252 .pSpecializationInfo = NULL,
1253 };
1254
1255 VkComputePipelineCreateInfo vk_pipeline_info = {
1256 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1257 .stage = pipeline_shader_stage,
1258 .flags = 0,
1259 .layout = device->meta_state.cleari.img_p_layout,
1260 };
1261
1262 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1263 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1264 1, &vk_pipeline_info, NULL,
1265 &device->meta_state.cleari.pipeline);
1266 if (result != VK_SUCCESS)
1267 goto fail;
1268
1269
1270 if (device->physical_device->rad_info.chip_class >= GFX9) {
1271 /* compute shader */
1272 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1273 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1274 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1275 .module = radv_shader_module_to_handle(&cs_3d),
1276 .pName = "main",
1277 .pSpecializationInfo = NULL,
1278 };
1279
1280 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1281 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1282 .stage = pipeline_shader_stage_3d,
1283 .flags = 0,
1284 .layout = device->meta_state.cleari.img_p_layout,
1285 };
1286
1287 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1288 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1289 1, &vk_pipeline_info_3d, NULL,
1290 &device->meta_state.cleari.pipeline_3d);
1291 if (result != VK_SUCCESS)
1292 goto fail;
1293
1294 ralloc_free(cs_3d.nir);
1295 }
1296 ralloc_free(cs.nir);
1297 return VK_SUCCESS;
1298 fail:
1299 ralloc_free(cs.nir);
1300 ralloc_free(cs_3d.nir);
1301 return result;
1302 }
1303
1304 static void
1305 radv_device_finish_meta_cleari_state(struct radv_device *device)
1306 {
1307 struct radv_meta_state *state = &device->meta_state;
1308
1309 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1310 state->cleari.img_p_layout, &state->alloc);
1311 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1312 state->cleari.img_ds_layout,
1313 &state->alloc);
1314 radv_DestroyPipeline(radv_device_to_handle(device),
1315 state->cleari.pipeline, &state->alloc);
1316 radv_DestroyPipeline(radv_device_to_handle(device),
1317 state->cleari.pipeline_3d, &state->alloc);
1318 }
1319
1320 /* Special path for clearing R32G32B32 images using a compute shader. */
1321 static nir_shader *
1322 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
1323 {
1324 nir_builder b;
1325 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
1326 false,
1327 false,
1328 GLSL_TYPE_FLOAT);
1329 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1330 b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_r32g32b32_cs");
1331 b.shader->info.cs.local_size[0] = 16;
1332 b.shader->info.cs.local_size[1] = 16;
1333 b.shader->info.cs.local_size[2] = 1;
1334
1335 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1336 img_type, "out_img");
1337 output_img->data.descriptor_set = 0;
1338 output_img->data.binding = 0;
1339
1340 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1341 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
1342 nir_ssa_def *block_size = nir_imm_ivec4(&b,
1343 b.shader->info.cs.local_size[0],
1344 b.shader->info.cs.local_size[1],
1345 b.shader->info.cs.local_size[2], 0);
1346
1347 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1348
1349 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1350 nir_intrinsic_set_base(clear_val, 0);
1351 nir_intrinsic_set_range(clear_val, 16);
1352 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1353 clear_val->num_components = 3;
1354 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 3, 32, "clear_value");
1355 nir_builder_instr_insert(&b, &clear_val->instr);
1356
1357 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1358 nir_intrinsic_set_base(stride, 0);
1359 nir_intrinsic_set_range(stride, 16);
1360 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
1361 stride->num_components = 1;
1362 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
1363 nir_builder_instr_insert(&b, &stride->instr);
1364
1365 nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
1366 nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
1367
1368 nir_ssa_def *global_pos =
1369 nir_iadd(&b,
1370 nir_imul(&b, global_y, &stride->dest.ssa),
1371 nir_imul(&b, global_x, nir_imm_int(&b, 3)));
1372
1373 for (unsigned chan = 0; chan < 3; chan++) {
1374 nir_ssa_def *local_pos =
1375 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
1376
1377 nir_ssa_def *coord =
1378 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
1379
1380 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1381 store->num_components = 1;
1382 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1383 store->src[1] = nir_src_for_ssa(coord);
1384 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1385 store->src[3] = nir_src_for_ssa(nir_channel(&b, &clear_val->dest.ssa, chan));
1386 store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));
1387 nir_builder_instr_insert(&b, &store->instr);
1388 }
1389
1390 return b.shader;
1391 }
1392
1393 static VkResult
1394 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1395 {
1396 VkResult result;
1397 struct radv_shader_module cs = { .nir = NULL };
1398
1399 cs.nir = build_nir_cleari_r32g32b32_compute_shader(device);
1400
1401 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1402 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1403 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1404 .bindingCount = 1,
1405 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1406 {
1407 .binding = 0,
1408 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1409 .descriptorCount = 1,
1410 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1411 .pImmutableSamplers = NULL
1412 },
1413 }
1414 };
1415
1416 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1417 &ds_create_info,
1418 &device->meta_state.alloc,
1419 &device->meta_state.cleari_r32g32b32.img_ds_layout);
1420 if (result != VK_SUCCESS)
1421 goto fail;
1422
1423 VkPipelineLayoutCreateInfo pl_create_info = {
1424 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1425 .setLayoutCount = 1,
1426 .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
1427 .pushConstantRangeCount = 1,
1428 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1429 };
1430
1431 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1432 &pl_create_info,
1433 &device->meta_state.alloc,
1434 &device->meta_state.cleari_r32g32b32.img_p_layout);
1435 if (result != VK_SUCCESS)
1436 goto fail;
1437
1438 /* compute shader */
1439 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1440 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1441 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1442 .module = radv_shader_module_to_handle(&cs),
1443 .pName = "main",
1444 .pSpecializationInfo = NULL,
1445 };
1446
1447 VkComputePipelineCreateInfo vk_pipeline_info = {
1448 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1449 .stage = pipeline_shader_stage,
1450 .flags = 0,
1451 .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
1452 };
1453
1454 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1455 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1456 1, &vk_pipeline_info, NULL,
1457 &device->meta_state.cleari_r32g32b32.pipeline);
1458
1459 fail:
1460 ralloc_free(cs.nir);
1461 return result;
1462 }
1463
1464 static void
1465 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1466 {
1467 struct radv_meta_state *state = &device->meta_state;
1468
1469 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1470 state->cleari_r32g32b32.img_p_layout,
1471 &state->alloc);
1472 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1473 state->cleari_r32g32b32.img_ds_layout,
1474 &state->alloc);
1475 radv_DestroyPipeline(radv_device_to_handle(device),
1476 state->cleari_r32g32b32.pipeline, &state->alloc);
1477 }
1478
1479 void
1480 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1481 {
1482 radv_device_finish_meta_itob_state(device);
1483 radv_device_finish_meta_btoi_state(device);
1484 radv_device_finish_meta_btoi_r32g32b32_state(device);
1485 radv_device_finish_meta_itoi_state(device);
1486 radv_device_finish_meta_itoi_r32g32b32_state(device);
1487 radv_device_finish_meta_cleari_state(device);
1488 radv_device_finish_meta_cleari_r32g32b32_state(device);
1489 }
1490
1491 VkResult
1492 radv_device_init_meta_bufimage_state(struct radv_device *device)
1493 {
1494 VkResult result;
1495
1496 result = radv_device_init_meta_itob_state(device);
1497 if (result != VK_SUCCESS)
1498 goto fail_itob;
1499
1500 result = radv_device_init_meta_btoi_state(device);
1501 if (result != VK_SUCCESS)
1502 goto fail_btoi;
1503
1504 result = radv_device_init_meta_btoi_r32g32b32_state(device);
1505 if (result != VK_SUCCESS)
1506 goto fail_btoi_r32g32b32;
1507
1508 result = radv_device_init_meta_itoi_state(device);
1509 if (result != VK_SUCCESS)
1510 goto fail_itoi;
1511
1512 result = radv_device_init_meta_itoi_r32g32b32_state(device);
1513 if (result != VK_SUCCESS)
1514 goto fail_itoi_r32g32b32;
1515
1516 result = radv_device_init_meta_cleari_state(device);
1517 if (result != VK_SUCCESS)
1518 goto fail_cleari;
1519
1520 result = radv_device_init_meta_cleari_r32g32b32_state(device);
1521 if (result != VK_SUCCESS)
1522 goto fail_cleari_r32g32b32;
1523
1524 return VK_SUCCESS;
1525 fail_cleari_r32g32b32:
1526 radv_device_finish_meta_cleari_r32g32b32_state(device);
1527 fail_cleari:
1528 radv_device_finish_meta_cleari_state(device);
1529 fail_itoi_r32g32b32:
1530 radv_device_finish_meta_itoi_r32g32b32_state(device);
1531 fail_itoi:
1532 radv_device_finish_meta_itoi_state(device);
1533 fail_btoi_r32g32b32:
1534 radv_device_finish_meta_btoi_r32g32b32_state(device);
1535 fail_btoi:
1536 radv_device_finish_meta_btoi_state(device);
1537 fail_itob:
1538 radv_device_finish_meta_itob_state(device);
1539 return result;
1540 }
1541
1542 static void
1543 create_iview(struct radv_cmd_buffer *cmd_buffer,
1544 struct radv_meta_blit2d_surf *surf,
1545 struct radv_image_view *iview)
1546 {
1547 VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
1548 radv_meta_get_view_type(surf->image);
1549 radv_image_view_init(iview, cmd_buffer->device,
1550 &(VkImageViewCreateInfo) {
1551 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1552 .image = radv_image_to_handle(surf->image),
1553 .viewType = view_type,
1554 .format = surf->format,
1555 .subresourceRange = {
1556 .aspectMask = surf->aspect_mask,
1557 .baseMipLevel = surf->level,
1558 .levelCount = 1,
1559 .baseArrayLayer = surf->layer,
1560 .layerCount = 1
1561 },
1562 }, NULL);
1563 }
1564
1565 static void
1566 create_bview(struct radv_cmd_buffer *cmd_buffer,
1567 struct radv_buffer *buffer,
1568 unsigned offset,
1569 VkFormat format,
1570 struct radv_buffer_view *bview)
1571 {
1572 radv_buffer_view_init(bview, cmd_buffer->device,
1573 &(VkBufferViewCreateInfo) {
1574 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1575 .flags = 0,
1576 .buffer = radv_buffer_to_handle(buffer),
1577 .format = format,
1578 .offset = offset,
1579 .range = VK_WHOLE_SIZE,
1580 });
1581
1582 }
1583
1584 static void
1585 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer,
1586 struct radv_meta_blit2d_surf *surf,
1587 VkBufferUsageFlagBits usage,
1588 VkBuffer *buffer)
1589 {
1590 struct radv_device *device = cmd_buffer->device;
1591 struct radv_device_memory mem = { .bo = surf->image->bo };
1592
1593 radv_CreateBuffer(radv_device_to_handle(device),
1594 &(VkBufferCreateInfo) {
1595 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1596 .flags = 0,
1597 .size = surf->image->size,
1598 .usage = usage,
1599 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1600 }, NULL, buffer);
1601
1602 radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1603 (VkBindBufferMemoryInfo[]) {
1604 {
1605 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1606 .buffer = *buffer,
1607 .memory = radv_device_memory_to_handle(&mem),
1608 .memoryOffset = surf->image->offset,
1609 }
1610 });
1611 }
1612
1613 static void
1614 create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1615 struct radv_buffer *buffer,
1616 unsigned offset,
1617 VkFormat src_format,
1618 struct radv_buffer_view *bview)
1619 {
1620 VkFormat format;
1621
1622 switch (src_format) {
1623 case VK_FORMAT_R32G32B32_UINT:
1624 format = VK_FORMAT_R32_UINT;
1625 break;
1626 case VK_FORMAT_R32G32B32_SINT:
1627 format = VK_FORMAT_R32_SINT;
1628 break;
1629 case VK_FORMAT_R32G32B32_SFLOAT:
1630 format = VK_FORMAT_R32_SFLOAT;
1631 break;
1632 default:
1633 unreachable("invalid R32G32B32 format");
1634 }
1635
1636 radv_buffer_view_init(bview, cmd_buffer->device,
1637 &(VkBufferViewCreateInfo) {
1638 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1639 .flags = 0,
1640 .buffer = radv_buffer_to_handle(buffer),
1641 .format = format,
1642 .offset = offset,
1643 .range = VK_WHOLE_SIZE,
1644 });
1645 }
1646
1647 static unsigned
1648 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1649 struct radv_meta_blit2d_surf *surf)
1650 {
1651 unsigned stride;
1652
1653 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1654 stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1655 } else {
1656 stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1657 }
1658
1659 return stride;
1660 }
1661
1662 static void
1663 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1664 struct radv_image_view *src,
1665 struct radv_buffer_view *dst)
1666 {
1667 struct radv_device *device = cmd_buffer->device;
1668
1669 radv_meta_push_descriptor_set(cmd_buffer,
1670 VK_PIPELINE_BIND_POINT_COMPUTE,
1671 device->meta_state.itob.img_p_layout,
1672 0, /* set */
1673 2, /* descriptorWriteCount */
1674 (VkWriteDescriptorSet[]) {
1675 {
1676 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1677 .dstBinding = 0,
1678 .dstArrayElement = 0,
1679 .descriptorCount = 1,
1680 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1681 .pImageInfo = (VkDescriptorImageInfo[]) {
1682 {
1683 .sampler = VK_NULL_HANDLE,
1684 .imageView = radv_image_view_to_handle(src),
1685 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1686 },
1687 }
1688 },
1689 {
1690 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1691 .dstBinding = 1,
1692 .dstArrayElement = 0,
1693 .descriptorCount = 1,
1694 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1695 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1696 }
1697 });
1698 }
1699
1700 void
1701 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
1702 struct radv_meta_blit2d_surf *src,
1703 struct radv_meta_blit2d_buffer *dst,
1704 unsigned num_rects,
1705 struct radv_meta_blit2d_rect *rects)
1706 {
1707 VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1708 struct radv_device *device = cmd_buffer->device;
1709 struct radv_image_view src_view;
1710 struct radv_buffer_view dst_view;
1711
1712 create_iview(cmd_buffer, src, &src_view);
1713 create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1714 itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1715
1716 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1717 src->image->type == VK_IMAGE_TYPE_3D)
1718 pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1719
1720 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1721 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1722
1723 for (unsigned r = 0; r < num_rects; ++r) {
1724 unsigned push_constants[4] = {
1725 rects[r].src_x,
1726 rects[r].src_y,
1727 src->layer,
1728 dst->pitch
1729 };
1730 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1731 device->meta_state.itob.img_p_layout,
1732 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1733 push_constants);
1734
1735 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1736 }
1737 }
1738
1739 static void
1740 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1741 struct radv_buffer_view *src,
1742 struct radv_buffer_view *dst)
1743 {
1744 struct radv_device *device = cmd_buffer->device;
1745
1746 radv_meta_push_descriptor_set(cmd_buffer,
1747 VK_PIPELINE_BIND_POINT_COMPUTE,
1748 device->meta_state.btoi_r32g32b32.img_p_layout,
1749 0, /* set */
1750 2, /* descriptorWriteCount */
1751 (VkWriteDescriptorSet[]) {
1752 {
1753 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1754 .dstBinding = 0,
1755 .dstArrayElement = 0,
1756 .descriptorCount = 1,
1757 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1758 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1759 },
1760 {
1761 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1762 .dstBinding = 1,
1763 .dstArrayElement = 0,
1764 .descriptorCount = 1,
1765 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1766 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1767 }
1768 });
1769 }
1770
1771 static void
1772 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1773 struct radv_meta_blit2d_buffer *src,
1774 struct radv_meta_blit2d_surf *dst,
1775 unsigned num_rects,
1776 struct radv_meta_blit2d_rect *rects)
1777 {
1778 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1779 struct radv_device *device = cmd_buffer->device;
1780 struct radv_buffer_view src_view, dst_view;
1781 unsigned dst_offset = 0;
1782 unsigned stride;
1783 VkBuffer buffer;
1784
1785 /* This special btoi path for R32G32B32 formats will write the linear
1786 * image as a buffer with the same underlying memory. The compute
1787 * shader will copy all components separately using a R32 format.
1788 */
1789 create_buffer_from_image(cmd_buffer, dst,
1790 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1791 &buffer);
1792
1793 create_bview(cmd_buffer, src->buffer, src->offset,
1794 src->format, &src_view);
1795 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
1796 dst_offset, dst->format, &dst_view);
1797 btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1798
1799 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1800 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1801
1802 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1803
1804 for (unsigned r = 0; r < num_rects; ++r) {
1805 unsigned push_constants[4] = {
1806 rects[r].dst_x,
1807 rects[r].dst_y,
1808 stride,
1809 src->pitch,
1810 };
1811
1812 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1813 device->meta_state.btoi_r32g32b32.img_p_layout,
1814 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1815 push_constants);
1816
1817 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1818 }
1819
1820 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1821 }
1822
1823 static void
1824 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1825 struct radv_buffer_view *src,
1826 struct radv_image_view *dst)
1827 {
1828 struct radv_device *device = cmd_buffer->device;
1829
1830 radv_meta_push_descriptor_set(cmd_buffer,
1831 VK_PIPELINE_BIND_POINT_COMPUTE,
1832 device->meta_state.btoi.img_p_layout,
1833 0, /* set */
1834 2, /* descriptorWriteCount */
1835 (VkWriteDescriptorSet[]) {
1836 {
1837 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1838 .dstBinding = 0,
1839 .dstArrayElement = 0,
1840 .descriptorCount = 1,
1841 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1842 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1843 },
1844 {
1845 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1846 .dstBinding = 1,
1847 .dstArrayElement = 0,
1848 .descriptorCount = 1,
1849 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1850 .pImageInfo = (VkDescriptorImageInfo[]) {
1851 {
1852 .sampler = VK_NULL_HANDLE,
1853 .imageView = radv_image_view_to_handle(dst),
1854 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1855 },
1856 }
1857 }
1858 });
1859 }
1860
1861 void
1862 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1863 struct radv_meta_blit2d_buffer *src,
1864 struct radv_meta_blit2d_surf *dst,
1865 unsigned num_rects,
1866 struct radv_meta_blit2d_rect *rects)
1867 {
1868 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1869 struct radv_device *device = cmd_buffer->device;
1870 struct radv_buffer_view src_view;
1871 struct radv_image_view dst_view;
1872
1873 if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1874 dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1875 dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1876 radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
1877 num_rects, rects);
1878 return;
1879 }
1880
1881 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1882 create_iview(cmd_buffer, dst, &dst_view);
1883 btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1884
1885 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1886 dst->image->type == VK_IMAGE_TYPE_3D)
1887 pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1888 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1889 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1890
1891 for (unsigned r = 0; r < num_rects; ++r) {
1892 unsigned push_constants[4] = {
1893 rects[r].dst_x,
1894 rects[r].dst_y,
1895 dst->layer,
1896 src->pitch,
1897 };
1898 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1899 device->meta_state.btoi.img_p_layout,
1900 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1901 push_constants);
1902
1903 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1904 }
1905 }
1906
1907 static void
1908 itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1909 struct radv_buffer_view *src,
1910 struct radv_buffer_view *dst)
1911 {
1912 struct radv_device *device = cmd_buffer->device;
1913
1914 radv_meta_push_descriptor_set(cmd_buffer,
1915 VK_PIPELINE_BIND_POINT_COMPUTE,
1916 device->meta_state.itoi_r32g32b32.img_p_layout,
1917 0, /* set */
1918 2, /* descriptorWriteCount */
1919 (VkWriteDescriptorSet[]) {
1920 {
1921 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1922 .dstBinding = 0,
1923 .dstArrayElement = 0,
1924 .descriptorCount = 1,
1925 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1926 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1927 },
1928 {
1929 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1930 .dstBinding = 1,
1931 .dstArrayElement = 0,
1932 .descriptorCount = 1,
1933 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1934 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1935 }
1936 });
1937 }
1938
1939 static void
1940 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1941 struct radv_meta_blit2d_surf *src,
1942 struct radv_meta_blit2d_surf *dst,
1943 unsigned num_rects,
1944 struct radv_meta_blit2d_rect *rects)
1945 {
1946 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
1947 struct radv_device *device = cmd_buffer->device;
1948 struct radv_buffer_view src_view, dst_view;
1949 unsigned src_offset = 0, dst_offset = 0;
1950 unsigned src_stride, dst_stride;
1951 VkBuffer src_buffer, dst_buffer;
1952
1953 /* 96-bit formats are only compatible to themselves. */
1954 assert(dst->format == VK_FORMAT_R32G32B32_UINT ||
1955 dst->format == VK_FORMAT_R32G32B32_SINT ||
1956 dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1957
1958 /* This special itoi path for R32G32B32 formats will write the linear
1959 * image as a buffer with the same underlying memory. The compute
1960 * shader will copy all components separately using a R32 format.
1961 */
1962 create_buffer_from_image(cmd_buffer, src,
1963 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
1964 &src_buffer);
1965 create_buffer_from_image(cmd_buffer, dst,
1966 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1967 &dst_buffer);
1968
1969 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer),
1970 src_offset, src->format, &src_view);
1971 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer),
1972 dst_offset, dst->format, &dst_view);
1973 itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1974
1975 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1976 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1977
1978 src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1979 dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1980
1981 for (unsigned r = 0; r < num_rects; ++r) {
1982 unsigned push_constants[6] = {
1983 rects[r].src_x,
1984 rects[r].src_y,
1985 src_stride,
1986 rects[r].dst_x,
1987 rects[r].dst_y,
1988 dst_stride,
1989 };
1990 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1991 device->meta_state.itoi_r32g32b32.img_p_layout,
1992 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
1993 push_constants);
1994
1995 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1996 }
1997
1998 radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1999 radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
2000 }
2001
2002 static void
2003 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2004 struct radv_image_view *src,
2005 struct radv_image_view *dst)
2006 {
2007 struct radv_device *device = cmd_buffer->device;
2008
2009 radv_meta_push_descriptor_set(cmd_buffer,
2010 VK_PIPELINE_BIND_POINT_COMPUTE,
2011 device->meta_state.itoi.img_p_layout,
2012 0, /* set */
2013 2, /* descriptorWriteCount */
2014 (VkWriteDescriptorSet[]) {
2015 {
2016 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2017 .dstBinding = 0,
2018 .dstArrayElement = 0,
2019 .descriptorCount = 1,
2020 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
2021 .pImageInfo = (VkDescriptorImageInfo[]) {
2022 {
2023 .sampler = VK_NULL_HANDLE,
2024 .imageView = radv_image_view_to_handle(src),
2025 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2026 },
2027 }
2028 },
2029 {
2030 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2031 .dstBinding = 1,
2032 .dstArrayElement = 0,
2033 .descriptorCount = 1,
2034 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2035 .pImageInfo = (VkDescriptorImageInfo[]) {
2036 {
2037 .sampler = VK_NULL_HANDLE,
2038 .imageView = radv_image_view_to_handle(dst),
2039 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2040 },
2041 }
2042 }
2043 });
2044 }
2045
2046 void
2047 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
2048 struct radv_meta_blit2d_surf *src,
2049 struct radv_meta_blit2d_surf *dst,
2050 unsigned num_rects,
2051 struct radv_meta_blit2d_rect *rects)
2052 {
2053 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline;
2054 struct radv_device *device = cmd_buffer->device;
2055 struct radv_image_view src_view, dst_view;
2056
2057 if (src->format == VK_FORMAT_R32G32B32_UINT ||
2058 src->format == VK_FORMAT_R32G32B32_SINT ||
2059 src->format == VK_FORMAT_R32G32B32_SFLOAT) {
2060 radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst,
2061 num_rects, rects);
2062 return;
2063 }
2064
2065 create_iview(cmd_buffer, src, &src_view);
2066 create_iview(cmd_buffer, dst, &dst_view);
2067
2068 itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
2069
2070 if (device->physical_device->rad_info.chip_class >= GFX9 &&
2071 (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
2072 pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
2073 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2074 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2075
2076 for (unsigned r = 0; r < num_rects; ++r) {
2077 unsigned push_constants[6] = {
2078 rects[r].src_x,
2079 rects[r].src_y,
2080 src->layer,
2081 rects[r].dst_x,
2082 rects[r].dst_y,
2083 dst->layer,
2084 };
2085 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2086 device->meta_state.itoi.img_p_layout,
2087 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
2088 push_constants);
2089
2090 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
2091 }
2092 }
2093
2094 static void
2095 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2096 struct radv_buffer_view *view)
2097 {
2098 struct radv_device *device = cmd_buffer->device;
2099
2100 radv_meta_push_descriptor_set(cmd_buffer,
2101 VK_PIPELINE_BIND_POINT_COMPUTE,
2102 device->meta_state.cleari_r32g32b32.img_p_layout,
2103 0, /* set */
2104 1, /* descriptorWriteCount */
2105 (VkWriteDescriptorSet[]) {
2106 {
2107 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2108 .dstBinding = 0,
2109 .dstArrayElement = 0,
2110 .descriptorCount = 1,
2111 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
2112 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(view) },
2113 }
2114 });
2115 }
2116
2117 static void
2118 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
2119 struct radv_meta_blit2d_surf *dst,
2120 const VkClearColorValue *clear_color)
2121 {
2122 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
2123 struct radv_device *device = cmd_buffer->device;
2124 struct radv_buffer_view dst_view;
2125 unsigned stride;
2126 VkBuffer buffer;
2127
2128 /* This special clear path for R32G32B32 formats will write the linear
2129 * image as a buffer with the same underlying memory. The compute
2130 * shader will clear all components separately using a R32 format.
2131 */
2132 create_buffer_from_image(cmd_buffer, dst,
2133 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
2134 &buffer);
2135
2136 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
2137 0, dst->format, &dst_view);
2138 cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
2139
2140 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2141 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2142
2143 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
2144
2145 unsigned push_constants[4] = {
2146 clear_color->uint32[0],
2147 clear_color->uint32[1],
2148 clear_color->uint32[2],
2149 stride,
2150 };
2151
2152 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2153 device->meta_state.cleari_r32g32b32.img_p_layout,
2154 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
2155 push_constants);
2156
2157 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
2158 dst->image->info.height, 1);
2159
2160 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
2161 }
2162
2163 static void
2164 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2165 struct radv_image_view *dst_iview)
2166 {
2167 struct radv_device *device = cmd_buffer->device;
2168
2169 radv_meta_push_descriptor_set(cmd_buffer,
2170 VK_PIPELINE_BIND_POINT_COMPUTE,
2171 device->meta_state.cleari.img_p_layout,
2172 0, /* set */
2173 1, /* descriptorWriteCount */
2174 (VkWriteDescriptorSet[]) {
2175 {
2176 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2177 .dstBinding = 0,
2178 .dstArrayElement = 0,
2179 .descriptorCount = 1,
2180 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2181 .pImageInfo = (VkDescriptorImageInfo[]) {
2182 {
2183 .sampler = VK_NULL_HANDLE,
2184 .imageView = radv_image_view_to_handle(dst_iview),
2185 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2186 },
2187 }
2188 },
2189 });
2190 }
2191
2192 void
2193 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
2194 struct radv_meta_blit2d_surf *dst,
2195 const VkClearColorValue *clear_color)
2196 {
2197 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
2198 struct radv_device *device = cmd_buffer->device;
2199 struct radv_image_view dst_iview;
2200
2201 if (dst->format == VK_FORMAT_R32G32B32_UINT ||
2202 dst->format == VK_FORMAT_R32G32B32_SINT ||
2203 dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
2204 radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
2205 return;
2206 }
2207
2208 create_iview(cmd_buffer, dst, &dst_iview);
2209 cleari_bind_descriptors(cmd_buffer, &dst_iview);
2210
2211 if (device->physical_device->rad_info.chip_class >= GFX9 &&
2212 dst->image->type == VK_IMAGE_TYPE_3D)
2213 pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
2214
2215 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2216 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2217
2218 unsigned push_constants[5] = {
2219 clear_color->uint32[0],
2220 clear_color->uint32[1],
2221 clear_color->uint32[2],
2222 clear_color->uint32[3],
2223 dst->layer,
2224 };
2225
2226 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2227 device->meta_state.cleari.img_p_layout,
2228 VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
2229 push_constants);
2230
2231 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
2232 }