4c8bfa0462b1e72f7e7f04a244bb09ec318b5fe4
[mesa.git] / src / amd / vulkan / radv_meta_bufimage.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "radv_meta.h"
25 #include "nir/nir_builder.h"
26
27 /*
28 * GFX queue: Compute shader implementation of image->buffer copy
29 * Compute queue: implementation also of buffer->image, image->image, and image clear.
30 */
31
32 /* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
33 * for that.
34 */
35 static nir_shader *
36 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
37 {
38 nir_builder b;
39 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
40 const struct glsl_type *sampler_type = glsl_sampler_type(dim,
41 false,
42 false,
43 GLSL_TYPE_FLOAT);
44 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
45 false,
46 false,
47 GLSL_TYPE_FLOAT);
48 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
49 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
50 b.shader->info.cs.local_size[0] = 16;
51 b.shader->info.cs.local_size[1] = 16;
52 b.shader->info.cs.local_size[2] = 1;
53 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
54 sampler_type, "s_tex");
55 input_img->data.descriptor_set = 0;
56 input_img->data.binding = 0;
57
58 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
59 img_type, "out_img");
60 output_img->data.descriptor_set = 0;
61 output_img->data.binding = 1;
62
63 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
64 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
65 nir_ssa_def *block_size = nir_imm_ivec4(&b,
66 b.shader->info.cs.local_size[0],
67 b.shader->info.cs.local_size[1],
68 b.shader->info.cs.local_size[2], 0);
69
70 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
71
72
73
74 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
75 nir_intrinsic_set_base(offset, 0);
76 nir_intrinsic_set_range(offset, 16);
77 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
78 offset->num_components = is_3d ? 3 : 2;
79 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
80 nir_builder_instr_insert(&b, &offset->instr);
81
82 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
83 nir_intrinsic_set_base(stride, 0);
84 nir_intrinsic_set_range(stride, 16);
85 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
86 stride->num_components = 1;
87 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
88 nir_builder_instr_insert(&b, &stride->instr);
89
90 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
91 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
92
93 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
94 tex->sampler_dim = dim;
95 tex->op = nir_texop_txf;
96 tex->src[0].src_type = nir_tex_src_coord;
97 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
98 tex->src[1].src_type = nir_tex_src_lod;
99 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
100 tex->src[2].src_type = nir_tex_src_texture_deref;
101 tex->src[2].src = nir_src_for_ssa(input_img_deref);
102 tex->dest_type = nir_type_float;
103 tex->is_array = false;
104 tex->coord_components = is_3d ? 3 : 2;
105
106 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
107 nir_builder_instr_insert(&b, &tex->instr);
108
109 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
110 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
111
112 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
113 tmp = nir_iadd(&b, tmp, pos_x);
114
115 nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
116
117 nir_ssa_def *outval = &tex->dest.ssa;
118 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
119 store->num_components = 4;
120 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
121 store->src[1] = nir_src_for_ssa(coord);
122 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
123 store->src[3] = nir_src_for_ssa(outval);
124
125 nir_builder_instr_insert(&b, &store->instr);
126 return b.shader;
127 }
128
129 /* Image to buffer - don't write use image accessors */
130 static VkResult
131 radv_device_init_meta_itob_state(struct radv_device *device)
132 {
133 VkResult result;
134 struct radv_shader_module cs = { .nir = NULL };
135 struct radv_shader_module cs_3d = { .nir = NULL };
136
137 cs.nir = build_nir_itob_compute_shader(device, false);
138 if (device->physical_device->rad_info.chip_class >= GFX9)
139 cs_3d.nir = build_nir_itob_compute_shader(device, true);
140
141 /*
142 * two descriptors one for the image being sampled
143 * one for the buffer being written.
144 */
145 VkDescriptorSetLayoutCreateInfo ds_create_info = {
146 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
147 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
148 .bindingCount = 2,
149 .pBindings = (VkDescriptorSetLayoutBinding[]) {
150 {
151 .binding = 0,
152 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
153 .descriptorCount = 1,
154 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
155 .pImmutableSamplers = NULL
156 },
157 {
158 .binding = 1,
159 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
160 .descriptorCount = 1,
161 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
162 .pImmutableSamplers = NULL
163 },
164 }
165 };
166
167 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
168 &ds_create_info,
169 &device->meta_state.alloc,
170 &device->meta_state.itob.img_ds_layout);
171 if (result != VK_SUCCESS)
172 goto fail;
173
174
175 VkPipelineLayoutCreateInfo pl_create_info = {
176 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
177 .setLayoutCount = 1,
178 .pSetLayouts = &device->meta_state.itob.img_ds_layout,
179 .pushConstantRangeCount = 1,
180 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
181 };
182
183 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
184 &pl_create_info,
185 &device->meta_state.alloc,
186 &device->meta_state.itob.img_p_layout);
187 if (result != VK_SUCCESS)
188 goto fail;
189
190 /* compute shader */
191
192 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
193 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
194 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
195 .module = radv_shader_module_to_handle(&cs),
196 .pName = "main",
197 .pSpecializationInfo = NULL,
198 };
199
200 VkComputePipelineCreateInfo vk_pipeline_info = {
201 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
202 .stage = pipeline_shader_stage,
203 .flags = 0,
204 .layout = device->meta_state.itob.img_p_layout,
205 };
206
207 result = radv_CreateComputePipelines(radv_device_to_handle(device),
208 radv_pipeline_cache_to_handle(&device->meta_state.cache),
209 1, &vk_pipeline_info, NULL,
210 &device->meta_state.itob.pipeline);
211 if (result != VK_SUCCESS)
212 goto fail;
213
214 if (device->physical_device->rad_info.chip_class >= GFX9) {
215 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
216 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
217 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
218 .module = radv_shader_module_to_handle(&cs_3d),
219 .pName = "main",
220 .pSpecializationInfo = NULL,
221 };
222
223 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
224 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
225 .stage = pipeline_shader_stage_3d,
226 .flags = 0,
227 .layout = device->meta_state.itob.img_p_layout,
228 };
229
230 result = radv_CreateComputePipelines(radv_device_to_handle(device),
231 radv_pipeline_cache_to_handle(&device->meta_state.cache),
232 1, &vk_pipeline_info_3d, NULL,
233 &device->meta_state.itob.pipeline_3d);
234 if (result != VK_SUCCESS)
235 goto fail;
236 ralloc_free(cs_3d.nir);
237 }
238 ralloc_free(cs.nir);
239
240 return VK_SUCCESS;
241 fail:
242 ralloc_free(cs.nir);
243 ralloc_free(cs_3d.nir);
244 return result;
245 }
246
247 static void
248 radv_device_finish_meta_itob_state(struct radv_device *device)
249 {
250 struct radv_meta_state *state = &device->meta_state;
251
252 radv_DestroyPipelineLayout(radv_device_to_handle(device),
253 state->itob.img_p_layout, &state->alloc);
254 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
255 state->itob.img_ds_layout,
256 &state->alloc);
257 radv_DestroyPipeline(radv_device_to_handle(device),
258 state->itob.pipeline, &state->alloc);
259 if (device->physical_device->rad_info.chip_class >= GFX9)
260 radv_DestroyPipeline(radv_device_to_handle(device),
261 state->itob.pipeline_3d, &state->alloc);
262 }
263
264 static nir_shader *
265 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
266 {
267 nir_builder b;
268 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
269 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
270 false,
271 false,
272 GLSL_TYPE_FLOAT);
273 const struct glsl_type *img_type = glsl_sampler_type(dim,
274 false,
275 false,
276 GLSL_TYPE_FLOAT);
277 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
278 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
279 b.shader->info.cs.local_size[0] = 16;
280 b.shader->info.cs.local_size[1] = 16;
281 b.shader->info.cs.local_size[2] = 1;
282 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
283 buf_type, "s_tex");
284 input_img->data.descriptor_set = 0;
285 input_img->data.binding = 0;
286
287 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
288 img_type, "out_img");
289 output_img->data.descriptor_set = 0;
290 output_img->data.binding = 1;
291
292 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
293 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
294 nir_ssa_def *block_size = nir_imm_ivec4(&b,
295 b.shader->info.cs.local_size[0],
296 b.shader->info.cs.local_size[1],
297 b.shader->info.cs.local_size[2], 0);
298
299 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
300
301 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
302 nir_intrinsic_set_base(offset, 0);
303 nir_intrinsic_set_range(offset, 16);
304 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
305 offset->num_components = is_3d ? 3 : 2;
306 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
307 nir_builder_instr_insert(&b, &offset->instr);
308
309 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
310 nir_intrinsic_set_base(stride, 0);
311 nir_intrinsic_set_range(stride, 16);
312 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
313 stride->num_components = 1;
314 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
315 nir_builder_instr_insert(&b, &stride->instr);
316
317 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
318 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
319
320 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
321 tmp = nir_iadd(&b, tmp, pos_x);
322
323 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
324
325 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
326 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
327
328 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
329 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
330 tex->op = nir_texop_txf;
331 tex->src[0].src_type = nir_tex_src_coord;
332 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
333 tex->src[1].src_type = nir_tex_src_lod;
334 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
335 tex->src[2].src_type = nir_tex_src_texture_deref;
336 tex->src[2].src = nir_src_for_ssa(input_img_deref);
337 tex->dest_type = nir_type_float;
338 tex->is_array = false;
339 tex->coord_components = 1;
340
341 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
342 nir_builder_instr_insert(&b, &tex->instr);
343
344 nir_ssa_def *outval = &tex->dest.ssa;
345 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
346 store->num_components = 4;
347 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
348 store->src[1] = nir_src_for_ssa(img_coord);
349 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
350 store->src[3] = nir_src_for_ssa(outval);
351
352 nir_builder_instr_insert(&b, &store->instr);
353 return b.shader;
354 }
355
356 /* Buffer to image - don't write use image accessors */
357 static VkResult
358 radv_device_init_meta_btoi_state(struct radv_device *device)
359 {
360 VkResult result;
361 struct radv_shader_module cs = { .nir = NULL };
362 struct radv_shader_module cs_3d = { .nir = NULL };
363 cs.nir = build_nir_btoi_compute_shader(device, false);
364 if (device->physical_device->rad_info.chip_class >= GFX9)
365 cs_3d.nir = build_nir_btoi_compute_shader(device, true);
366 /*
367 * two descriptors one for the image being sampled
368 * one for the buffer being written.
369 */
370 VkDescriptorSetLayoutCreateInfo ds_create_info = {
371 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
372 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
373 .bindingCount = 2,
374 .pBindings = (VkDescriptorSetLayoutBinding[]) {
375 {
376 .binding = 0,
377 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
378 .descriptorCount = 1,
379 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
380 .pImmutableSamplers = NULL
381 },
382 {
383 .binding = 1,
384 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
385 .descriptorCount = 1,
386 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
387 .pImmutableSamplers = NULL
388 },
389 }
390 };
391
392 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
393 &ds_create_info,
394 &device->meta_state.alloc,
395 &device->meta_state.btoi.img_ds_layout);
396 if (result != VK_SUCCESS)
397 goto fail;
398
399
400 VkPipelineLayoutCreateInfo pl_create_info = {
401 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
402 .setLayoutCount = 1,
403 .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
404 .pushConstantRangeCount = 1,
405 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
406 };
407
408 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
409 &pl_create_info,
410 &device->meta_state.alloc,
411 &device->meta_state.btoi.img_p_layout);
412 if (result != VK_SUCCESS)
413 goto fail;
414
415 /* compute shader */
416
417 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
418 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
419 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
420 .module = radv_shader_module_to_handle(&cs),
421 .pName = "main",
422 .pSpecializationInfo = NULL,
423 };
424
425 VkComputePipelineCreateInfo vk_pipeline_info = {
426 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
427 .stage = pipeline_shader_stage,
428 .flags = 0,
429 .layout = device->meta_state.btoi.img_p_layout,
430 };
431
432 result = radv_CreateComputePipelines(radv_device_to_handle(device),
433 radv_pipeline_cache_to_handle(&device->meta_state.cache),
434 1, &vk_pipeline_info, NULL,
435 &device->meta_state.btoi.pipeline);
436 if (result != VK_SUCCESS)
437 goto fail;
438
439 if (device->physical_device->rad_info.chip_class >= GFX9) {
440 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
441 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
442 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
443 .module = radv_shader_module_to_handle(&cs_3d),
444 .pName = "main",
445 .pSpecializationInfo = NULL,
446 };
447
448 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
449 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
450 .stage = pipeline_shader_stage_3d,
451 .flags = 0,
452 .layout = device->meta_state.btoi.img_p_layout,
453 };
454
455 result = radv_CreateComputePipelines(radv_device_to_handle(device),
456 radv_pipeline_cache_to_handle(&device->meta_state.cache),
457 1, &vk_pipeline_info_3d, NULL,
458 &device->meta_state.btoi.pipeline_3d);
459 ralloc_free(cs_3d.nir);
460 }
461 ralloc_free(cs.nir);
462
463 return VK_SUCCESS;
464 fail:
465 ralloc_free(cs_3d.nir);
466 ralloc_free(cs.nir);
467 return result;
468 }
469
470 static void
471 radv_device_finish_meta_btoi_state(struct radv_device *device)
472 {
473 struct radv_meta_state *state = &device->meta_state;
474
475 radv_DestroyPipelineLayout(radv_device_to_handle(device),
476 state->btoi.img_p_layout, &state->alloc);
477 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
478 state->btoi.img_ds_layout,
479 &state->alloc);
480 radv_DestroyPipeline(radv_device_to_handle(device),
481 state->btoi.pipeline, &state->alloc);
482 radv_DestroyPipeline(radv_device_to_handle(device),
483 state->btoi.pipeline_3d, &state->alloc);
484 }
485
486 /* Buffer to image - special path for R32G32B32 */
487 static nir_shader *
488 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
489 {
490 nir_builder b;
491 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
492 false,
493 false,
494 GLSL_TYPE_FLOAT);
495 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
496 false,
497 false,
498 GLSL_TYPE_FLOAT);
499 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
500 b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
501 b.shader->info.cs.local_size[0] = 16;
502 b.shader->info.cs.local_size[1] = 16;
503 b.shader->info.cs.local_size[2] = 1;
504 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
505 buf_type, "s_tex");
506 input_img->data.descriptor_set = 0;
507 input_img->data.binding = 0;
508
509 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
510 img_type, "out_img");
511 output_img->data.descriptor_set = 0;
512 output_img->data.binding = 1;
513
514 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
515 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
516 nir_ssa_def *block_size = nir_imm_ivec4(&b,
517 b.shader->info.cs.local_size[0],
518 b.shader->info.cs.local_size[1],
519 b.shader->info.cs.local_size[2], 0);
520
521 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
522
523 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
524 nir_intrinsic_set_base(offset, 0);
525 nir_intrinsic_set_range(offset, 16);
526 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
527 offset->num_components = 2;
528 nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
529 nir_builder_instr_insert(&b, &offset->instr);
530
531 nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
532 nir_intrinsic_set_base(pitch, 0);
533 nir_intrinsic_set_range(pitch, 16);
534 pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
535 pitch->num_components = 1;
536 nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
537 nir_builder_instr_insert(&b, &pitch->instr);
538
539 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
540 nir_intrinsic_set_base(stride, 0);
541 nir_intrinsic_set_range(stride, 16);
542 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
543 stride->num_components = 1;
544 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
545 nir_builder_instr_insert(&b, &stride->instr);
546
547 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
548 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
549
550 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
551 tmp = nir_iadd(&b, tmp, pos_x);
552
553 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
554
555 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
556
557 nir_ssa_def *global_pos =
558 nir_iadd(&b,
559 nir_imul(&b, nir_channel(&b, img_coord, 1), &pitch->dest.ssa),
560 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
561
562 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
563
564 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
565 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
566 tex->op = nir_texop_txf;
567 tex->src[0].src_type = nir_tex_src_coord;
568 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
569 tex->src[1].src_type = nir_tex_src_lod;
570 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
571 tex->src[2].src_type = nir_tex_src_texture_deref;
572 tex->src[2].src = nir_src_for_ssa(input_img_deref);
573 tex->dest_type = nir_type_float;
574 tex->is_array = false;
575 tex->coord_components = 1;
576 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
577 nir_builder_instr_insert(&b, &tex->instr);
578
579 nir_ssa_def *outval = &tex->dest.ssa;
580
581 for (int chan = 0; chan < 3; chan++) {
582 nir_ssa_def *local_pos =
583 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
584
585 nir_ssa_def *coord =
586 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
587
588 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
589 store->num_components = 1;
590 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
591 store->src[1] = nir_src_for_ssa(coord);
592 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
593 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
594 nir_builder_instr_insert(&b, &store->instr);
595 }
596
597 return b.shader;
598 }
599
600 static VkResult
601 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
602 {
603 VkResult result;
604 struct radv_shader_module cs = { .nir = NULL };
605
606 cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
607
608 VkDescriptorSetLayoutCreateInfo ds_create_info = {
609 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
610 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
611 .bindingCount = 2,
612 .pBindings = (VkDescriptorSetLayoutBinding[]) {
613 {
614 .binding = 0,
615 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
616 .descriptorCount = 1,
617 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
618 .pImmutableSamplers = NULL
619 },
620 {
621 .binding = 1,
622 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
623 .descriptorCount = 1,
624 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
625 .pImmutableSamplers = NULL
626 },
627 }
628 };
629
630 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
631 &ds_create_info,
632 &device->meta_state.alloc,
633 &device->meta_state.btoi_r32g32b32.img_ds_layout);
634 if (result != VK_SUCCESS)
635 goto fail;
636
637
638 VkPipelineLayoutCreateInfo pl_create_info = {
639 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
640 .setLayoutCount = 1,
641 .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
642 .pushConstantRangeCount = 1,
643 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
644 };
645
646 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
647 &pl_create_info,
648 &device->meta_state.alloc,
649 &device->meta_state.btoi_r32g32b32.img_p_layout);
650 if (result != VK_SUCCESS)
651 goto fail;
652
653 /* compute shader */
654
655 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
656 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
657 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
658 .module = radv_shader_module_to_handle(&cs),
659 .pName = "main",
660 .pSpecializationInfo = NULL,
661 };
662
663 VkComputePipelineCreateInfo vk_pipeline_info = {
664 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
665 .stage = pipeline_shader_stage,
666 .flags = 0,
667 .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
668 };
669
670 result = radv_CreateComputePipelines(radv_device_to_handle(device),
671 radv_pipeline_cache_to_handle(&device->meta_state.cache),
672 1, &vk_pipeline_info, NULL,
673 &device->meta_state.btoi_r32g32b32.pipeline);
674
675 fail:
676 ralloc_free(cs.nir);
677 return result;
678 }
679
680 static void
681 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
682 {
683 struct radv_meta_state *state = &device->meta_state;
684
685 radv_DestroyPipelineLayout(radv_device_to_handle(device),
686 state->btoi_r32g32b32.img_p_layout, &state->alloc);
687 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
688 state->btoi_r32g32b32.img_ds_layout,
689 &state->alloc);
690 radv_DestroyPipeline(radv_device_to_handle(device),
691 state->btoi_r32g32b32.pipeline, &state->alloc);
692 }
693
694 static nir_shader *
695 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
696 {
697 nir_builder b;
698 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
699 const struct glsl_type *buf_type = glsl_sampler_type(dim,
700 false,
701 false,
702 GLSL_TYPE_FLOAT);
703 const struct glsl_type *img_type = glsl_sampler_type(dim,
704 false,
705 false,
706 GLSL_TYPE_FLOAT);
707 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
708 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
709 b.shader->info.cs.local_size[0] = 16;
710 b.shader->info.cs.local_size[1] = 16;
711 b.shader->info.cs.local_size[2] = 1;
712 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
713 buf_type, "s_tex");
714 input_img->data.descriptor_set = 0;
715 input_img->data.binding = 0;
716
717 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
718 img_type, "out_img");
719 output_img->data.descriptor_set = 0;
720 output_img->data.binding = 1;
721
722 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
723 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
724 nir_ssa_def *block_size = nir_imm_ivec4(&b,
725 b.shader->info.cs.local_size[0],
726 b.shader->info.cs.local_size[1],
727 b.shader->info.cs.local_size[2], 0);
728
729 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
730
731 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
732 nir_intrinsic_set_base(src_offset, 0);
733 nir_intrinsic_set_range(src_offset, 24);
734 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
735 src_offset->num_components = is_3d ? 3 : 2;
736 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
737 nir_builder_instr_insert(&b, &src_offset->instr);
738
739 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
740 nir_intrinsic_set_base(dst_offset, 0);
741 nir_intrinsic_set_range(dst_offset, 24);
742 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
743 dst_offset->num_components = is_3d ? 3 : 2;
744 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
745 nir_builder_instr_insert(&b, &dst_offset->instr);
746
747 nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
748 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
749
750 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
751
752 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
753 tex->sampler_dim = dim;
754 tex->op = nir_texop_txf;
755 tex->src[0].src_type = nir_tex_src_coord;
756 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
757 tex->src[1].src_type = nir_tex_src_lod;
758 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
759 tex->src[2].src_type = nir_tex_src_texture_deref;
760 tex->src[2].src = nir_src_for_ssa(input_img_deref);
761 tex->dest_type = nir_type_float;
762 tex->is_array = false;
763 tex->coord_components = is_3d ? 3 : 2;
764
765 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
766 nir_builder_instr_insert(&b, &tex->instr);
767
768 nir_ssa_def *outval = &tex->dest.ssa;
769 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
770 store->num_components = 4;
771 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
772 store->src[1] = nir_src_for_ssa(dst_coord);
773 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
774 store->src[3] = nir_src_for_ssa(outval);
775
776 nir_builder_instr_insert(&b, &store->instr);
777 return b.shader;
778 }
779
780 /* image to image - don't write use image accessors */
781 static VkResult
782 radv_device_init_meta_itoi_state(struct radv_device *device)
783 {
784 VkResult result;
785 struct radv_shader_module cs = { .nir = NULL };
786 struct radv_shader_module cs_3d = { .nir = NULL };
787 cs.nir = build_nir_itoi_compute_shader(device, false);
788 if (device->physical_device->rad_info.chip_class >= GFX9)
789 cs_3d.nir = build_nir_itoi_compute_shader(device, true);
790 /*
791 * two descriptors one for the image being sampled
792 * one for the buffer being written.
793 */
794 VkDescriptorSetLayoutCreateInfo ds_create_info = {
795 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
796 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
797 .bindingCount = 2,
798 .pBindings = (VkDescriptorSetLayoutBinding[]) {
799 {
800 .binding = 0,
801 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
802 .descriptorCount = 1,
803 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
804 .pImmutableSamplers = NULL
805 },
806 {
807 .binding = 1,
808 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
809 .descriptorCount = 1,
810 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
811 .pImmutableSamplers = NULL
812 },
813 }
814 };
815
816 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
817 &ds_create_info,
818 &device->meta_state.alloc,
819 &device->meta_state.itoi.img_ds_layout);
820 if (result != VK_SUCCESS)
821 goto fail;
822
823
824 VkPipelineLayoutCreateInfo pl_create_info = {
825 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
826 .setLayoutCount = 1,
827 .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
828 .pushConstantRangeCount = 1,
829 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
830 };
831
832 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
833 &pl_create_info,
834 &device->meta_state.alloc,
835 &device->meta_state.itoi.img_p_layout);
836 if (result != VK_SUCCESS)
837 goto fail;
838
839 /* compute shader */
840
841 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
842 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
843 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
844 .module = radv_shader_module_to_handle(&cs),
845 .pName = "main",
846 .pSpecializationInfo = NULL,
847 };
848
849 VkComputePipelineCreateInfo vk_pipeline_info = {
850 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
851 .stage = pipeline_shader_stage,
852 .flags = 0,
853 .layout = device->meta_state.itoi.img_p_layout,
854 };
855
856 result = radv_CreateComputePipelines(radv_device_to_handle(device),
857 radv_pipeline_cache_to_handle(&device->meta_state.cache),
858 1, &vk_pipeline_info, NULL,
859 &device->meta_state.itoi.pipeline);
860 if (result != VK_SUCCESS)
861 goto fail;
862
863 if (device->physical_device->rad_info.chip_class >= GFX9) {
864 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
865 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
866 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
867 .module = radv_shader_module_to_handle(&cs_3d),
868 .pName = "main",
869 .pSpecializationInfo = NULL,
870 };
871
872 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
873 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
874 .stage = pipeline_shader_stage_3d,
875 .flags = 0,
876 .layout = device->meta_state.itoi.img_p_layout,
877 };
878
879 result = radv_CreateComputePipelines(radv_device_to_handle(device),
880 radv_pipeline_cache_to_handle(&device->meta_state.cache),
881 1, &vk_pipeline_info_3d, NULL,
882 &device->meta_state.itoi.pipeline_3d);
883
884 ralloc_free(cs_3d.nir);
885 }
886 ralloc_free(cs.nir);
887
888 return VK_SUCCESS;
889 fail:
890 ralloc_free(cs.nir);
891 ralloc_free(cs_3d.nir);
892 return result;
893 }
894
895 static void
896 radv_device_finish_meta_itoi_state(struct radv_device *device)
897 {
898 struct radv_meta_state *state = &device->meta_state;
899
900 radv_DestroyPipelineLayout(radv_device_to_handle(device),
901 state->itoi.img_p_layout, &state->alloc);
902 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
903 state->itoi.img_ds_layout,
904 &state->alloc);
905 radv_DestroyPipeline(radv_device_to_handle(device),
906 state->itoi.pipeline, &state->alloc);
907 if (device->physical_device->rad_info.chip_class >= GFX9)
908 radv_DestroyPipeline(radv_device_to_handle(device),
909 state->itoi.pipeline_3d, &state->alloc);
910 }
911
912 static nir_shader *
913 build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
914 {
915 nir_builder b;
916 const struct glsl_type *type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
917 false,
918 false,
919 GLSL_TYPE_FLOAT);
920 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
921 b.shader->info.name = ralloc_strdup(b.shader, "meta_itoi_r32g32b32_cs");
922 b.shader->info.cs.local_size[0] = 16;
923 b.shader->info.cs.local_size[1] = 16;
924 b.shader->info.cs.local_size[2] = 1;
925 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
926 type, "input_img");
927 input_img->data.descriptor_set = 0;
928 input_img->data.binding = 0;
929
930 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
931 type, "output_img");
932 output_img->data.descriptor_set = 0;
933 output_img->data.binding = 1;
934
935 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
936 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
937 nir_ssa_def *block_size = nir_imm_ivec4(&b,
938 b.shader->info.cs.local_size[0],
939 b.shader->info.cs.local_size[1],
940 b.shader->info.cs.local_size[2], 0);
941
942 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
943
944 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
945 nir_intrinsic_set_base(src_offset, 0);
946 nir_intrinsic_set_range(src_offset, 24);
947 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
948 src_offset->num_components = 3;
949 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 3, 32, "src_offset");
950 nir_builder_instr_insert(&b, &src_offset->instr);
951
952 nir_ssa_def *src_stride = nir_channel(&b, &src_offset->dest.ssa, 2);
953
954 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
955 nir_intrinsic_set_base(dst_offset, 0);
956 nir_intrinsic_set_range(dst_offset, 24);
957 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
958 dst_offset->num_components = 3;
959 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 3, 32, "dst_offset");
960 nir_builder_instr_insert(&b, &dst_offset->instr);
961
962 nir_ssa_def *dst_stride = nir_channel(&b, &dst_offset->dest.ssa, 2);
963
964 nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
965 nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
966
967 nir_ssa_def *src_global_pos =
968 nir_iadd(&b,
969 nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
970 nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
971
972 nir_ssa_def *dst_global_pos =
973 nir_iadd(&b,
974 nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
975 nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
976
977 for (int chan = 0; chan < 3; chan++) {
978 /* src */
979 nir_ssa_def *src_local_pos =
980 nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
981
982 nir_ssa_def *src_coord =
983 nir_vec4(&b, src_local_pos, src_local_pos,
984 src_local_pos, src_local_pos);
985
986 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
987
988 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
989 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
990 tex->op = nir_texop_txf;
991 tex->src[0].src_type = nir_tex_src_coord;
992 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
993 tex->src[1].src_type = nir_tex_src_lod;
994 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
995 tex->src[2].src_type = nir_tex_src_texture_deref;
996 tex->src[2].src = nir_src_for_ssa(input_img_deref);
997 tex->dest_type = nir_type_float;
998 tex->is_array = false;
999 tex->coord_components = 1;
1000 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
1001 nir_builder_instr_insert(&b, &tex->instr);
1002
1003 nir_ssa_def *outval = &tex->dest.ssa;
1004
1005 /* dst */
1006 nir_ssa_def *dst_local_pos =
1007 nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
1008
1009 nir_ssa_def *dst_coord =
1010 nir_vec4(&b, dst_local_pos, dst_local_pos,
1011 dst_local_pos, dst_local_pos);
1012
1013 nir_intrinsic_instr *store =
1014 nir_intrinsic_instr_create(b.shader,
1015 nir_intrinsic_image_deref_store);
1016 store->num_components = 1;
1017 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1018 store->src[1] = nir_src_for_ssa(dst_coord);
1019 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1020 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, 0));
1021 nir_builder_instr_insert(&b, &store->instr);
1022 }
1023
1024 return b.shader;
1025 }
1026
1027 /* Image to image - special path for R32G32B32 */
1028 static VkResult
1029 radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
1030 {
1031 VkResult result;
1032 struct radv_shader_module cs = { .nir = NULL };
1033
1034 cs.nir = build_nir_itoi_r32g32b32_compute_shader(device);
1035
1036 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1037 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1038 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1039 .bindingCount = 2,
1040 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1041 {
1042 .binding = 0,
1043 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1044 .descriptorCount = 1,
1045 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1046 .pImmutableSamplers = NULL
1047 },
1048 {
1049 .binding = 1,
1050 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1051 .descriptorCount = 1,
1052 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1053 .pImmutableSamplers = NULL
1054 },
1055 }
1056 };
1057
1058 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1059 &ds_create_info,
1060 &device->meta_state.alloc,
1061 &device->meta_state.itoi_r32g32b32.img_ds_layout);
1062 if (result != VK_SUCCESS)
1063 goto fail;
1064
1065
1066 VkPipelineLayoutCreateInfo pl_create_info = {
1067 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1068 .setLayoutCount = 1,
1069 .pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
1070 .pushConstantRangeCount = 1,
1071 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
1072 };
1073
1074 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1075 &pl_create_info,
1076 &device->meta_state.alloc,
1077 &device->meta_state.itoi_r32g32b32.img_p_layout);
1078 if (result != VK_SUCCESS)
1079 goto fail;
1080
1081 /* compute shader */
1082
1083 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1084 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1085 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1086 .module = radv_shader_module_to_handle(&cs),
1087 .pName = "main",
1088 .pSpecializationInfo = NULL,
1089 };
1090
1091 VkComputePipelineCreateInfo vk_pipeline_info = {
1092 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1093 .stage = pipeline_shader_stage,
1094 .flags = 0,
1095 .layout = device->meta_state.itoi_r32g32b32.img_p_layout,
1096 };
1097
1098 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1099 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1100 1, &vk_pipeline_info, NULL,
1101 &device->meta_state.itoi_r32g32b32.pipeline);
1102
1103 fail:
1104 ralloc_free(cs.nir);
1105 return result;
1106 }
1107
1108 static void
1109 radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
1110 {
1111 struct radv_meta_state *state = &device->meta_state;
1112
1113 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1114 state->itoi_r32g32b32.img_p_layout, &state->alloc);
1115 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1116 state->itoi_r32g32b32.img_ds_layout,
1117 &state->alloc);
1118 radv_DestroyPipeline(radv_device_to_handle(device),
1119 state->itoi_r32g32b32.pipeline, &state->alloc);
1120 }
1121
1122 static nir_shader *
1123 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
1124 {
1125 nir_builder b;
1126 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
1127 const struct glsl_type *img_type = glsl_sampler_type(dim,
1128 false,
1129 false,
1130 GLSL_TYPE_FLOAT);
1131 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1132 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
1133 b.shader->info.cs.local_size[0] = 16;
1134 b.shader->info.cs.local_size[1] = 16;
1135 b.shader->info.cs.local_size[2] = 1;
1136
1137 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1138 img_type, "out_img");
1139 output_img->data.descriptor_set = 0;
1140 output_img->data.binding = 0;
1141
1142 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1143 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
1144 nir_ssa_def *block_size = nir_imm_ivec4(&b,
1145 b.shader->info.cs.local_size[0],
1146 b.shader->info.cs.local_size[1],
1147 b.shader->info.cs.local_size[2], 0);
1148
1149 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1150
1151 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1152 nir_intrinsic_set_base(clear_val, 0);
1153 nir_intrinsic_set_range(clear_val, 20);
1154 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1155 clear_val->num_components = 4;
1156 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
1157 nir_builder_instr_insert(&b, &clear_val->instr);
1158
1159 nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1160 nir_intrinsic_set_base(layer, 0);
1161 nir_intrinsic_set_range(layer, 20);
1162 layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
1163 layer->num_components = 1;
1164 nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
1165 nir_builder_instr_insert(&b, &layer->instr);
1166
1167 nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
1168
1169 nir_ssa_def *comps[4];
1170 comps[0] = nir_channel(&b, global_id, 0);
1171 comps[1] = nir_channel(&b, global_id, 1);
1172 comps[2] = global_z;
1173 comps[3] = nir_imm_int(&b, 0);
1174 global_id = nir_vec(&b, comps, 4);
1175
1176 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1177 store->num_components = 4;
1178 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1179 store->src[1] = nir_src_for_ssa(global_id);
1180 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1181 store->src[3] = nir_src_for_ssa(&clear_val->dest.ssa);
1182
1183 nir_builder_instr_insert(&b, &store->instr);
1184 return b.shader;
1185 }
1186
1187 static VkResult
1188 radv_device_init_meta_cleari_state(struct radv_device *device)
1189 {
1190 VkResult result;
1191 struct radv_shader_module cs = { .nir = NULL };
1192 struct radv_shader_module cs_3d = { .nir = NULL };
1193 cs.nir = build_nir_cleari_compute_shader(device, false);
1194 if (device->physical_device->rad_info.chip_class >= GFX9)
1195 cs_3d.nir = build_nir_cleari_compute_shader(device, true);
1196
1197 /*
1198 * two descriptors one for the image being sampled
1199 * one for the buffer being written.
1200 */
1201 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1202 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1203 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1204 .bindingCount = 1,
1205 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1206 {
1207 .binding = 0,
1208 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1209 .descriptorCount = 1,
1210 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1211 .pImmutableSamplers = NULL
1212 },
1213 }
1214 };
1215
1216 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1217 &ds_create_info,
1218 &device->meta_state.alloc,
1219 &device->meta_state.cleari.img_ds_layout);
1220 if (result != VK_SUCCESS)
1221 goto fail;
1222
1223
1224 VkPipelineLayoutCreateInfo pl_create_info = {
1225 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1226 .setLayoutCount = 1,
1227 .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
1228 .pushConstantRangeCount = 1,
1229 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
1230 };
1231
1232 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1233 &pl_create_info,
1234 &device->meta_state.alloc,
1235 &device->meta_state.cleari.img_p_layout);
1236 if (result != VK_SUCCESS)
1237 goto fail;
1238
1239 /* compute shader */
1240
1241 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1242 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1243 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1244 .module = radv_shader_module_to_handle(&cs),
1245 .pName = "main",
1246 .pSpecializationInfo = NULL,
1247 };
1248
1249 VkComputePipelineCreateInfo vk_pipeline_info = {
1250 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1251 .stage = pipeline_shader_stage,
1252 .flags = 0,
1253 .layout = device->meta_state.cleari.img_p_layout,
1254 };
1255
1256 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1257 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1258 1, &vk_pipeline_info, NULL,
1259 &device->meta_state.cleari.pipeline);
1260 if (result != VK_SUCCESS)
1261 goto fail;
1262
1263
1264 if (device->physical_device->rad_info.chip_class >= GFX9) {
1265 /* compute shader */
1266 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1267 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1268 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1269 .module = radv_shader_module_to_handle(&cs_3d),
1270 .pName = "main",
1271 .pSpecializationInfo = NULL,
1272 };
1273
1274 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1275 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1276 .stage = pipeline_shader_stage_3d,
1277 .flags = 0,
1278 .layout = device->meta_state.cleari.img_p_layout,
1279 };
1280
1281 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1282 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1283 1, &vk_pipeline_info_3d, NULL,
1284 &device->meta_state.cleari.pipeline_3d);
1285 if (result != VK_SUCCESS)
1286 goto fail;
1287
1288 ralloc_free(cs_3d.nir);
1289 }
1290 ralloc_free(cs.nir);
1291 return VK_SUCCESS;
1292 fail:
1293 ralloc_free(cs.nir);
1294 ralloc_free(cs_3d.nir);
1295 return result;
1296 }
1297
1298 static void
1299 radv_device_finish_meta_cleari_state(struct radv_device *device)
1300 {
1301 struct radv_meta_state *state = &device->meta_state;
1302
1303 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1304 state->cleari.img_p_layout, &state->alloc);
1305 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1306 state->cleari.img_ds_layout,
1307 &state->alloc);
1308 radv_DestroyPipeline(radv_device_to_handle(device),
1309 state->cleari.pipeline, &state->alloc);
1310 radv_DestroyPipeline(radv_device_to_handle(device),
1311 state->cleari.pipeline_3d, &state->alloc);
1312 }
1313
1314 /* Special path for clearing R32G32B32 images using a compute shader. */
1315 static nir_shader *
1316 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
1317 {
1318 nir_builder b;
1319 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
1320 false,
1321 false,
1322 GLSL_TYPE_FLOAT);
1323 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1324 b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_r32g32b32_cs");
1325 b.shader->info.cs.local_size[0] = 16;
1326 b.shader->info.cs.local_size[1] = 16;
1327 b.shader->info.cs.local_size[2] = 1;
1328
1329 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1330 img_type, "out_img");
1331 output_img->data.descriptor_set = 0;
1332 output_img->data.binding = 0;
1333
1334 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1335 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
1336 nir_ssa_def *block_size = nir_imm_ivec4(&b,
1337 b.shader->info.cs.local_size[0],
1338 b.shader->info.cs.local_size[1],
1339 b.shader->info.cs.local_size[2], 0);
1340
1341 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1342
1343 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1344 nir_intrinsic_set_base(clear_val, 0);
1345 nir_intrinsic_set_range(clear_val, 16);
1346 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1347 clear_val->num_components = 3;
1348 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 3, 32, "clear_value");
1349 nir_builder_instr_insert(&b, &clear_val->instr);
1350
1351 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1352 nir_intrinsic_set_base(stride, 0);
1353 nir_intrinsic_set_range(stride, 16);
1354 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
1355 stride->num_components = 1;
1356 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
1357 nir_builder_instr_insert(&b, &stride->instr);
1358
1359 nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
1360 nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
1361
1362 nir_ssa_def *global_pos =
1363 nir_iadd(&b,
1364 nir_imul(&b, global_y, &stride->dest.ssa),
1365 nir_imul(&b, global_x, nir_imm_int(&b, 3)));
1366
1367 for (unsigned chan = 0; chan < 3; chan++) {
1368 nir_ssa_def *local_pos =
1369 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
1370
1371 nir_ssa_def *coord =
1372 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
1373
1374 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1375 store->num_components = 1;
1376 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1377 store->src[1] = nir_src_for_ssa(coord);
1378 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1379 store->src[3] = nir_src_for_ssa(nir_channel(&b, &clear_val->dest.ssa, chan));
1380 nir_builder_instr_insert(&b, &store->instr);
1381 }
1382
1383 return b.shader;
1384 }
1385
1386 static VkResult
1387 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1388 {
1389 VkResult result;
1390 struct radv_shader_module cs = { .nir = NULL };
1391
1392 cs.nir = build_nir_cleari_r32g32b32_compute_shader(device);
1393
1394 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1395 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1396 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1397 .bindingCount = 1,
1398 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1399 {
1400 .binding = 0,
1401 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1402 .descriptorCount = 1,
1403 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1404 .pImmutableSamplers = NULL
1405 },
1406 }
1407 };
1408
1409 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1410 &ds_create_info,
1411 &device->meta_state.alloc,
1412 &device->meta_state.cleari_r32g32b32.img_ds_layout);
1413 if (result != VK_SUCCESS)
1414 goto fail;
1415
1416 VkPipelineLayoutCreateInfo pl_create_info = {
1417 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1418 .setLayoutCount = 1,
1419 .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
1420 .pushConstantRangeCount = 1,
1421 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1422 };
1423
1424 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1425 &pl_create_info,
1426 &device->meta_state.alloc,
1427 &device->meta_state.cleari_r32g32b32.img_p_layout);
1428 if (result != VK_SUCCESS)
1429 goto fail;
1430
1431 /* compute shader */
1432 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1433 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1434 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1435 .module = radv_shader_module_to_handle(&cs),
1436 .pName = "main",
1437 .pSpecializationInfo = NULL,
1438 };
1439
1440 VkComputePipelineCreateInfo vk_pipeline_info = {
1441 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1442 .stage = pipeline_shader_stage,
1443 .flags = 0,
1444 .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
1445 };
1446
1447 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1448 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1449 1, &vk_pipeline_info, NULL,
1450 &device->meta_state.cleari_r32g32b32.pipeline);
1451
1452 fail:
1453 ralloc_free(cs.nir);
1454 return result;
1455 }
1456
1457 static void
1458 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1459 {
1460 struct radv_meta_state *state = &device->meta_state;
1461
1462 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1463 state->cleari_r32g32b32.img_p_layout,
1464 &state->alloc);
1465 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1466 state->cleari_r32g32b32.img_ds_layout,
1467 &state->alloc);
1468 radv_DestroyPipeline(radv_device_to_handle(device),
1469 state->cleari_r32g32b32.pipeline, &state->alloc);
1470 }
1471
1472 void
1473 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1474 {
1475 radv_device_finish_meta_itob_state(device);
1476 radv_device_finish_meta_btoi_state(device);
1477 radv_device_finish_meta_btoi_r32g32b32_state(device);
1478 radv_device_finish_meta_itoi_state(device);
1479 radv_device_finish_meta_itoi_r32g32b32_state(device);
1480 radv_device_finish_meta_cleari_state(device);
1481 radv_device_finish_meta_cleari_r32g32b32_state(device);
1482 }
1483
1484 VkResult
1485 radv_device_init_meta_bufimage_state(struct radv_device *device)
1486 {
1487 VkResult result;
1488
1489 result = radv_device_init_meta_itob_state(device);
1490 if (result != VK_SUCCESS)
1491 goto fail_itob;
1492
1493 result = radv_device_init_meta_btoi_state(device);
1494 if (result != VK_SUCCESS)
1495 goto fail_btoi;
1496
1497 result = radv_device_init_meta_btoi_r32g32b32_state(device);
1498 if (result != VK_SUCCESS)
1499 goto fail_btoi_r32g32b32;
1500
1501 result = radv_device_init_meta_itoi_state(device);
1502 if (result != VK_SUCCESS)
1503 goto fail_itoi;
1504
1505 result = radv_device_init_meta_itoi_r32g32b32_state(device);
1506 if (result != VK_SUCCESS)
1507 goto fail_itoi_r32g32b32;
1508
1509 result = radv_device_init_meta_cleari_state(device);
1510 if (result != VK_SUCCESS)
1511 goto fail_cleari;
1512
1513 result = radv_device_init_meta_cleari_r32g32b32_state(device);
1514 if (result != VK_SUCCESS)
1515 goto fail_cleari_r32g32b32;
1516
1517 return VK_SUCCESS;
1518 fail_cleari_r32g32b32:
1519 radv_device_finish_meta_cleari_r32g32b32_state(device);
1520 fail_cleari:
1521 radv_device_finish_meta_cleari_state(device);
1522 fail_itoi_r32g32b32:
1523 radv_device_finish_meta_itoi_r32g32b32_state(device);
1524 fail_itoi:
1525 radv_device_finish_meta_itoi_state(device);
1526 fail_btoi_r32g32b32:
1527 radv_device_finish_meta_btoi_r32g32b32_state(device);
1528 fail_btoi:
1529 radv_device_finish_meta_btoi_state(device);
1530 fail_itob:
1531 radv_device_finish_meta_itob_state(device);
1532 return result;
1533 }
1534
1535 static void
1536 create_iview(struct radv_cmd_buffer *cmd_buffer,
1537 struct radv_meta_blit2d_surf *surf,
1538 struct radv_image_view *iview)
1539 {
1540 VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
1541 radv_meta_get_view_type(surf->image);
1542 radv_image_view_init(iview, cmd_buffer->device,
1543 &(VkImageViewCreateInfo) {
1544 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1545 .image = radv_image_to_handle(surf->image),
1546 .viewType = view_type,
1547 .format = surf->format,
1548 .subresourceRange = {
1549 .aspectMask = surf->aspect_mask,
1550 .baseMipLevel = surf->level,
1551 .levelCount = 1,
1552 .baseArrayLayer = surf->layer,
1553 .layerCount = 1
1554 },
1555 });
1556 }
1557
1558 static void
1559 create_bview(struct radv_cmd_buffer *cmd_buffer,
1560 struct radv_buffer *buffer,
1561 unsigned offset,
1562 VkFormat format,
1563 struct radv_buffer_view *bview)
1564 {
1565 radv_buffer_view_init(bview, cmd_buffer->device,
1566 &(VkBufferViewCreateInfo) {
1567 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1568 .flags = 0,
1569 .buffer = radv_buffer_to_handle(buffer),
1570 .format = format,
1571 .offset = offset,
1572 .range = VK_WHOLE_SIZE,
1573 });
1574
1575 }
1576
1577 static void
1578 create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer,
1579 struct radv_meta_blit2d_surf *surf,
1580 VkBufferUsageFlagBits usage,
1581 VkBuffer *buffer)
1582 {
1583 struct radv_device *device = cmd_buffer->device;
1584 struct radv_device_memory mem = { .bo = surf->image->bo };
1585
1586 radv_CreateBuffer(radv_device_to_handle(device),
1587 &(VkBufferCreateInfo) {
1588 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1589 .flags = 0,
1590 .size = surf->image->size,
1591 .usage = usage,
1592 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1593 }, NULL, buffer);
1594
1595 radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1596 (VkBindBufferMemoryInfo[]) {
1597 {
1598 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1599 .buffer = *buffer,
1600 .memory = radv_device_memory_to_handle(&mem),
1601 .memoryOffset = surf->image->offset,
1602 }
1603 });
1604 }
1605
1606 static void
1607 create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1608 struct radv_buffer *buffer,
1609 unsigned offset,
1610 VkFormat src_format,
1611 struct radv_buffer_view *bview)
1612 {
1613 VkFormat format;
1614
1615 switch (src_format) {
1616 case VK_FORMAT_R32G32B32_UINT:
1617 format = VK_FORMAT_R32_UINT;
1618 break;
1619 case VK_FORMAT_R32G32B32_SINT:
1620 format = VK_FORMAT_R32_SINT;
1621 break;
1622 case VK_FORMAT_R32G32B32_SFLOAT:
1623 format = VK_FORMAT_R32_SFLOAT;
1624 break;
1625 default:
1626 unreachable("invalid R32G32B32 format");
1627 }
1628
1629 radv_buffer_view_init(bview, cmd_buffer->device,
1630 &(VkBufferViewCreateInfo) {
1631 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1632 .flags = 0,
1633 .buffer = radv_buffer_to_handle(buffer),
1634 .format = format,
1635 .offset = offset,
1636 .range = VK_WHOLE_SIZE,
1637 });
1638 }
1639
1640 static unsigned
1641 get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1642 struct radv_meta_blit2d_surf *surf)
1643 {
1644 unsigned stride;
1645
1646 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1647 stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1648 } else {
1649 stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1650 }
1651
1652 return stride;
1653 }
1654
1655 static void
1656 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1657 struct radv_image_view *src,
1658 struct radv_buffer_view *dst)
1659 {
1660 struct radv_device *device = cmd_buffer->device;
1661
1662 radv_meta_push_descriptor_set(cmd_buffer,
1663 VK_PIPELINE_BIND_POINT_COMPUTE,
1664 device->meta_state.itob.img_p_layout,
1665 0, /* set */
1666 2, /* descriptorWriteCount */
1667 (VkWriteDescriptorSet[]) {
1668 {
1669 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1670 .dstBinding = 0,
1671 .dstArrayElement = 0,
1672 .descriptorCount = 1,
1673 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1674 .pImageInfo = (VkDescriptorImageInfo[]) {
1675 {
1676 .sampler = VK_NULL_HANDLE,
1677 .imageView = radv_image_view_to_handle(src),
1678 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1679 },
1680 }
1681 },
1682 {
1683 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1684 .dstBinding = 1,
1685 .dstArrayElement = 0,
1686 .descriptorCount = 1,
1687 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1688 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1689 }
1690 });
1691 }
1692
1693 void
1694 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
1695 struct radv_meta_blit2d_surf *src,
1696 struct radv_meta_blit2d_buffer *dst,
1697 unsigned num_rects,
1698 struct radv_meta_blit2d_rect *rects)
1699 {
1700 VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1701 struct radv_device *device = cmd_buffer->device;
1702 struct radv_image_view src_view;
1703 struct radv_buffer_view dst_view;
1704
1705 create_iview(cmd_buffer, src, &src_view);
1706 create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1707 itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1708
1709 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1710 src->image->type == VK_IMAGE_TYPE_3D)
1711 pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1712
1713 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1714 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1715
1716 for (unsigned r = 0; r < num_rects; ++r) {
1717 unsigned push_constants[4] = {
1718 rects[r].src_x,
1719 rects[r].src_y,
1720 src->layer,
1721 dst->pitch
1722 };
1723 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1724 device->meta_state.itob.img_p_layout,
1725 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1726 push_constants);
1727
1728 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1729 }
1730 }
1731
1732 static void
1733 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1734 struct radv_buffer_view *src,
1735 struct radv_buffer_view *dst)
1736 {
1737 struct radv_device *device = cmd_buffer->device;
1738
1739 radv_meta_push_descriptor_set(cmd_buffer,
1740 VK_PIPELINE_BIND_POINT_COMPUTE,
1741 device->meta_state.btoi_r32g32b32.img_p_layout,
1742 0, /* set */
1743 2, /* descriptorWriteCount */
1744 (VkWriteDescriptorSet[]) {
1745 {
1746 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1747 .dstBinding = 0,
1748 .dstArrayElement = 0,
1749 .descriptorCount = 1,
1750 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1751 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1752 },
1753 {
1754 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1755 .dstBinding = 1,
1756 .dstArrayElement = 0,
1757 .descriptorCount = 1,
1758 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1759 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1760 }
1761 });
1762 }
1763
1764 static void
1765 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1766 struct radv_meta_blit2d_buffer *src,
1767 struct radv_meta_blit2d_surf *dst,
1768 unsigned num_rects,
1769 struct radv_meta_blit2d_rect *rects)
1770 {
1771 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1772 struct radv_device *device = cmd_buffer->device;
1773 struct radv_buffer_view src_view, dst_view;
1774 unsigned dst_offset = 0;
1775 unsigned stride;
1776 VkBuffer buffer;
1777
1778 /* This special btoi path for R32G32B32 formats will write the linear
1779 * image as a buffer with the same underlying memory. The compute
1780 * shader will copy all components separately using a R32 format.
1781 */
1782 create_buffer_from_image(cmd_buffer, dst,
1783 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1784 &buffer);
1785
1786 create_bview(cmd_buffer, src->buffer, src->offset,
1787 src->format, &src_view);
1788 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
1789 dst_offset, dst->format, &dst_view);
1790 btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1791
1792 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1793 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1794
1795 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1796
1797 for (unsigned r = 0; r < num_rects; ++r) {
1798 unsigned push_constants[4] = {
1799 rects[r].dst_x,
1800 rects[r].dst_y,
1801 stride,
1802 src->pitch,
1803 };
1804
1805 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1806 device->meta_state.btoi_r32g32b32.img_p_layout,
1807 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1808 push_constants);
1809
1810 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1811 }
1812
1813 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1814 }
1815
1816 static void
1817 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1818 struct radv_buffer_view *src,
1819 struct radv_image_view *dst)
1820 {
1821 struct radv_device *device = cmd_buffer->device;
1822
1823 radv_meta_push_descriptor_set(cmd_buffer,
1824 VK_PIPELINE_BIND_POINT_COMPUTE,
1825 device->meta_state.btoi.img_p_layout,
1826 0, /* set */
1827 2, /* descriptorWriteCount */
1828 (VkWriteDescriptorSet[]) {
1829 {
1830 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1831 .dstBinding = 0,
1832 .dstArrayElement = 0,
1833 .descriptorCount = 1,
1834 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1835 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1836 },
1837 {
1838 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1839 .dstBinding = 1,
1840 .dstArrayElement = 0,
1841 .descriptorCount = 1,
1842 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1843 .pImageInfo = (VkDescriptorImageInfo[]) {
1844 {
1845 .sampler = VK_NULL_HANDLE,
1846 .imageView = radv_image_view_to_handle(dst),
1847 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1848 },
1849 }
1850 }
1851 });
1852 }
1853
1854 void
1855 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1856 struct radv_meta_blit2d_buffer *src,
1857 struct radv_meta_blit2d_surf *dst,
1858 unsigned num_rects,
1859 struct radv_meta_blit2d_rect *rects)
1860 {
1861 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1862 struct radv_device *device = cmd_buffer->device;
1863 struct radv_buffer_view src_view;
1864 struct radv_image_view dst_view;
1865
1866 if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1867 dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1868 dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1869 radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
1870 num_rects, rects);
1871 return;
1872 }
1873
1874 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1875 create_iview(cmd_buffer, dst, &dst_view);
1876 btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1877
1878 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1879 dst->image->type == VK_IMAGE_TYPE_3D)
1880 pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1881 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1882 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1883
1884 for (unsigned r = 0; r < num_rects; ++r) {
1885 unsigned push_constants[4] = {
1886 rects[r].dst_x,
1887 rects[r].dst_y,
1888 dst->layer,
1889 src->pitch,
1890 };
1891 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1892 device->meta_state.btoi.img_p_layout,
1893 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1894 push_constants);
1895
1896 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1897 }
1898 }
1899
1900 static void
1901 itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1902 struct radv_buffer_view *src,
1903 struct radv_buffer_view *dst)
1904 {
1905 struct radv_device *device = cmd_buffer->device;
1906
1907 radv_meta_push_descriptor_set(cmd_buffer,
1908 VK_PIPELINE_BIND_POINT_COMPUTE,
1909 device->meta_state.itoi_r32g32b32.img_p_layout,
1910 0, /* set */
1911 2, /* descriptorWriteCount */
1912 (VkWriteDescriptorSet[]) {
1913 {
1914 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1915 .dstBinding = 0,
1916 .dstArrayElement = 0,
1917 .descriptorCount = 1,
1918 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1919 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1920 },
1921 {
1922 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1923 .dstBinding = 1,
1924 .dstArrayElement = 0,
1925 .descriptorCount = 1,
1926 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1927 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1928 }
1929 });
1930 }
1931
1932 static void
1933 radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1934 struct radv_meta_blit2d_surf *src,
1935 struct radv_meta_blit2d_surf *dst,
1936 unsigned num_rects,
1937 struct radv_meta_blit2d_rect *rects)
1938 {
1939 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
1940 struct radv_device *device = cmd_buffer->device;
1941 struct radv_buffer_view src_view, dst_view;
1942 unsigned src_offset = 0, dst_offset = 0;
1943 unsigned src_stride, dst_stride;
1944 VkBuffer src_buffer, dst_buffer;
1945
1946 /* 96-bit formats are only compatible to themselves. */
1947 assert(dst->format == VK_FORMAT_R32G32B32_UINT ||
1948 dst->format == VK_FORMAT_R32G32B32_SINT ||
1949 dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1950
1951 /* This special itoi path for R32G32B32 formats will write the linear
1952 * image as a buffer with the same underlying memory. The compute
1953 * shader will copy all components separately using a R32 format.
1954 */
1955 create_buffer_from_image(cmd_buffer, src,
1956 VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT,
1957 &src_buffer);
1958 create_buffer_from_image(cmd_buffer, dst,
1959 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1960 &dst_buffer);
1961
1962 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer),
1963 src_offset, src->format, &src_view);
1964 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer),
1965 dst_offset, dst->format, &dst_view);
1966 itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1967
1968 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1969 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1970
1971 src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1972 dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1973
1974 for (unsigned r = 0; r < num_rects; ++r) {
1975 unsigned push_constants[6] = {
1976 rects[r].src_x,
1977 rects[r].src_y,
1978 src_stride,
1979 rects[r].dst_x,
1980 rects[r].dst_y,
1981 dst_stride,
1982 };
1983 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1984 device->meta_state.itoi_r32g32b32.img_p_layout,
1985 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
1986 push_constants);
1987
1988 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1989 }
1990
1991 radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1992 radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
1993 }
1994
1995 static void
1996 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1997 struct radv_image_view *src,
1998 struct radv_image_view *dst)
1999 {
2000 struct radv_device *device = cmd_buffer->device;
2001
2002 radv_meta_push_descriptor_set(cmd_buffer,
2003 VK_PIPELINE_BIND_POINT_COMPUTE,
2004 device->meta_state.itoi.img_p_layout,
2005 0, /* set */
2006 2, /* descriptorWriteCount */
2007 (VkWriteDescriptorSet[]) {
2008 {
2009 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2010 .dstBinding = 0,
2011 .dstArrayElement = 0,
2012 .descriptorCount = 1,
2013 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
2014 .pImageInfo = (VkDescriptorImageInfo[]) {
2015 {
2016 .sampler = VK_NULL_HANDLE,
2017 .imageView = radv_image_view_to_handle(src),
2018 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2019 },
2020 }
2021 },
2022 {
2023 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2024 .dstBinding = 1,
2025 .dstArrayElement = 0,
2026 .descriptorCount = 1,
2027 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2028 .pImageInfo = (VkDescriptorImageInfo[]) {
2029 {
2030 .sampler = VK_NULL_HANDLE,
2031 .imageView = radv_image_view_to_handle(dst),
2032 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2033 },
2034 }
2035 }
2036 });
2037 }
2038
2039 void
2040 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
2041 struct radv_meta_blit2d_surf *src,
2042 struct radv_meta_blit2d_surf *dst,
2043 unsigned num_rects,
2044 struct radv_meta_blit2d_rect *rects)
2045 {
2046 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline;
2047 struct radv_device *device = cmd_buffer->device;
2048 struct radv_image_view src_view, dst_view;
2049
2050 if (src->format == VK_FORMAT_R32G32B32_UINT ||
2051 src->format == VK_FORMAT_R32G32B32_SINT ||
2052 src->format == VK_FORMAT_R32G32B32_SFLOAT) {
2053 radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst,
2054 num_rects, rects);
2055 return;
2056 }
2057
2058 create_iview(cmd_buffer, src, &src_view);
2059 create_iview(cmd_buffer, dst, &dst_view);
2060
2061 itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
2062
2063 if (device->physical_device->rad_info.chip_class >= GFX9 &&
2064 (src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
2065 pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
2066 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2067 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2068
2069 for (unsigned r = 0; r < num_rects; ++r) {
2070 unsigned push_constants[6] = {
2071 rects[r].src_x,
2072 rects[r].src_y,
2073 src->layer,
2074 rects[r].dst_x,
2075 rects[r].dst_y,
2076 dst->layer,
2077 };
2078 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2079 device->meta_state.itoi.img_p_layout,
2080 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
2081 push_constants);
2082
2083 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
2084 }
2085 }
2086
2087 static void
2088 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2089 struct radv_buffer_view *view)
2090 {
2091 struct radv_device *device = cmd_buffer->device;
2092
2093 radv_meta_push_descriptor_set(cmd_buffer,
2094 VK_PIPELINE_BIND_POINT_COMPUTE,
2095 device->meta_state.cleari_r32g32b32.img_p_layout,
2096 0, /* set */
2097 1, /* descriptorWriteCount */
2098 (VkWriteDescriptorSet[]) {
2099 {
2100 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2101 .dstBinding = 0,
2102 .dstArrayElement = 0,
2103 .descriptorCount = 1,
2104 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
2105 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(view) },
2106 }
2107 });
2108 }
2109
2110 static void
2111 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
2112 struct radv_meta_blit2d_surf *dst,
2113 const VkClearColorValue *clear_color)
2114 {
2115 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
2116 struct radv_device *device = cmd_buffer->device;
2117 struct radv_buffer_view dst_view;
2118 unsigned stride;
2119 VkBuffer buffer;
2120
2121 /* This special clear path for R32G32B32 formats will write the linear
2122 * image as a buffer with the same underlying memory. The compute
2123 * shader will clear all components separately using a R32 format.
2124 */
2125 create_buffer_from_image(cmd_buffer, dst,
2126 VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
2127 &buffer);
2128
2129 create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer),
2130 0, dst->format, &dst_view);
2131 cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
2132
2133 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2134 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2135
2136 stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
2137
2138 unsigned push_constants[4] = {
2139 clear_color->uint32[0],
2140 clear_color->uint32[1],
2141 clear_color->uint32[2],
2142 stride,
2143 };
2144
2145 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2146 device->meta_state.cleari_r32g32b32.img_p_layout,
2147 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
2148 push_constants);
2149
2150 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
2151 dst->image->info.height, 1);
2152
2153 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
2154 }
2155
2156 static void
2157 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
2158 struct radv_image_view *dst_iview)
2159 {
2160 struct radv_device *device = cmd_buffer->device;
2161
2162 radv_meta_push_descriptor_set(cmd_buffer,
2163 VK_PIPELINE_BIND_POINT_COMPUTE,
2164 device->meta_state.cleari.img_p_layout,
2165 0, /* set */
2166 1, /* descriptorWriteCount */
2167 (VkWriteDescriptorSet[]) {
2168 {
2169 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2170 .dstBinding = 0,
2171 .dstArrayElement = 0,
2172 .descriptorCount = 1,
2173 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
2174 .pImageInfo = (VkDescriptorImageInfo[]) {
2175 {
2176 .sampler = VK_NULL_HANDLE,
2177 .imageView = radv_image_view_to_handle(dst_iview),
2178 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
2179 },
2180 }
2181 },
2182 });
2183 }
2184
2185 void
2186 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
2187 struct radv_meta_blit2d_surf *dst,
2188 const VkClearColorValue *clear_color)
2189 {
2190 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
2191 struct radv_device *device = cmd_buffer->device;
2192 struct radv_image_view dst_iview;
2193
2194 if (dst->format == VK_FORMAT_R32G32B32_UINT ||
2195 dst->format == VK_FORMAT_R32G32B32_SINT ||
2196 dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
2197 radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
2198 return;
2199 }
2200
2201 create_iview(cmd_buffer, dst, &dst_iview);
2202 cleari_bind_descriptors(cmd_buffer, &dst_iview);
2203
2204 if (device->physical_device->rad_info.chip_class >= GFX9 &&
2205 dst->image->type == VK_IMAGE_TYPE_3D)
2206 pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
2207
2208 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
2209 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
2210
2211 unsigned push_constants[5] = {
2212 clear_color->uint32[0],
2213 clear_color->uint32[1],
2214 clear_color->uint32[2],
2215 clear_color->uint32[3],
2216 dst->layer,
2217 };
2218
2219 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
2220 device->meta_state.cleari.img_p_layout,
2221 VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
2222 push_constants);
2223
2224 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
2225 }