radv: fix btoi for R32G32B32 when the dest offset is not 0
[mesa.git] / src / amd / vulkan / radv_meta_bufimage.c
1 /*
2 * Copyright © 2016 Red Hat.
3 * Copyright © 2016 Bas Nieuwenhuizen
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice (including the next
13 * paragraph) shall be included in all copies or substantial portions of the
14 * Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22 * IN THE SOFTWARE.
23 */
24 #include "radv_meta.h"
25 #include "nir/nir_builder.h"
26
27 /*
28 * GFX queue: Compute shader implementation of image->buffer copy
29 * Compute queue: implementation also of buffer->image, image->image, and image clear.
30 */
31
32 /* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
33 * for that.
34 */
35 static nir_shader *
36 build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
37 {
38 nir_builder b;
39 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
40 const struct glsl_type *sampler_type = glsl_sampler_type(dim,
41 false,
42 false,
43 GLSL_TYPE_FLOAT);
44 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
45 false,
46 false,
47 GLSL_TYPE_FLOAT);
48 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
49 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
50 b.shader->info.cs.local_size[0] = 16;
51 b.shader->info.cs.local_size[1] = 16;
52 b.shader->info.cs.local_size[2] = 1;
53 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
54 sampler_type, "s_tex");
55 input_img->data.descriptor_set = 0;
56 input_img->data.binding = 0;
57
58 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
59 img_type, "out_img");
60 output_img->data.descriptor_set = 0;
61 output_img->data.binding = 1;
62
63 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
64 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
65 nir_ssa_def *block_size = nir_imm_ivec4(&b,
66 b.shader->info.cs.local_size[0],
67 b.shader->info.cs.local_size[1],
68 b.shader->info.cs.local_size[2], 0);
69
70 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
71
72
73
74 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
75 nir_intrinsic_set_base(offset, 0);
76 nir_intrinsic_set_range(offset, 16);
77 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
78 offset->num_components = is_3d ? 3 : 2;
79 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
80 nir_builder_instr_insert(&b, &offset->instr);
81
82 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
83 nir_intrinsic_set_base(stride, 0);
84 nir_intrinsic_set_range(stride, 16);
85 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
86 stride->num_components = 1;
87 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
88 nir_builder_instr_insert(&b, &stride->instr);
89
90 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
91 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
92
93 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
94 tex->sampler_dim = dim;
95 tex->op = nir_texop_txf;
96 tex->src[0].src_type = nir_tex_src_coord;
97 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
98 tex->src[1].src_type = nir_tex_src_lod;
99 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
100 tex->src[2].src_type = nir_tex_src_texture_deref;
101 tex->src[2].src = nir_src_for_ssa(input_img_deref);
102 tex->dest_type = nir_type_float;
103 tex->is_array = false;
104 tex->coord_components = is_3d ? 3 : 2;
105
106 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
107 nir_builder_instr_insert(&b, &tex->instr);
108
109 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
110 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
111
112 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
113 tmp = nir_iadd(&b, tmp, pos_x);
114
115 nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
116
117 nir_ssa_def *outval = &tex->dest.ssa;
118 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
119 store->num_components = 4;
120 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
121 store->src[1] = nir_src_for_ssa(coord);
122 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
123 store->src[3] = nir_src_for_ssa(outval);
124
125 nir_builder_instr_insert(&b, &store->instr);
126 return b.shader;
127 }
128
129 /* Image to buffer - don't write use image accessors */
130 static VkResult
131 radv_device_init_meta_itob_state(struct radv_device *device)
132 {
133 VkResult result;
134 struct radv_shader_module cs = { .nir = NULL };
135 struct radv_shader_module cs_3d = { .nir = NULL };
136
137 cs.nir = build_nir_itob_compute_shader(device, false);
138 if (device->physical_device->rad_info.chip_class >= GFX9)
139 cs_3d.nir = build_nir_itob_compute_shader(device, true);
140
141 /*
142 * two descriptors one for the image being sampled
143 * one for the buffer being written.
144 */
145 VkDescriptorSetLayoutCreateInfo ds_create_info = {
146 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
147 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
148 .bindingCount = 2,
149 .pBindings = (VkDescriptorSetLayoutBinding[]) {
150 {
151 .binding = 0,
152 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
153 .descriptorCount = 1,
154 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
155 .pImmutableSamplers = NULL
156 },
157 {
158 .binding = 1,
159 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
160 .descriptorCount = 1,
161 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
162 .pImmutableSamplers = NULL
163 },
164 }
165 };
166
167 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
168 &ds_create_info,
169 &device->meta_state.alloc,
170 &device->meta_state.itob.img_ds_layout);
171 if (result != VK_SUCCESS)
172 goto fail;
173
174
175 VkPipelineLayoutCreateInfo pl_create_info = {
176 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
177 .setLayoutCount = 1,
178 .pSetLayouts = &device->meta_state.itob.img_ds_layout,
179 .pushConstantRangeCount = 1,
180 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
181 };
182
183 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
184 &pl_create_info,
185 &device->meta_state.alloc,
186 &device->meta_state.itob.img_p_layout);
187 if (result != VK_SUCCESS)
188 goto fail;
189
190 /* compute shader */
191
192 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
193 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
194 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
195 .module = radv_shader_module_to_handle(&cs),
196 .pName = "main",
197 .pSpecializationInfo = NULL,
198 };
199
200 VkComputePipelineCreateInfo vk_pipeline_info = {
201 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
202 .stage = pipeline_shader_stage,
203 .flags = 0,
204 .layout = device->meta_state.itob.img_p_layout,
205 };
206
207 result = radv_CreateComputePipelines(radv_device_to_handle(device),
208 radv_pipeline_cache_to_handle(&device->meta_state.cache),
209 1, &vk_pipeline_info, NULL,
210 &device->meta_state.itob.pipeline);
211 if (result != VK_SUCCESS)
212 goto fail;
213
214 if (device->physical_device->rad_info.chip_class >= GFX9) {
215 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
216 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
217 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
218 .module = radv_shader_module_to_handle(&cs_3d),
219 .pName = "main",
220 .pSpecializationInfo = NULL,
221 };
222
223 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
224 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
225 .stage = pipeline_shader_stage_3d,
226 .flags = 0,
227 .layout = device->meta_state.itob.img_p_layout,
228 };
229
230 result = radv_CreateComputePipelines(radv_device_to_handle(device),
231 radv_pipeline_cache_to_handle(&device->meta_state.cache),
232 1, &vk_pipeline_info_3d, NULL,
233 &device->meta_state.itob.pipeline_3d);
234 if (result != VK_SUCCESS)
235 goto fail;
236 ralloc_free(cs_3d.nir);
237 }
238 ralloc_free(cs.nir);
239
240 return VK_SUCCESS;
241 fail:
242 ralloc_free(cs.nir);
243 ralloc_free(cs_3d.nir);
244 return result;
245 }
246
247 static void
248 radv_device_finish_meta_itob_state(struct radv_device *device)
249 {
250 struct radv_meta_state *state = &device->meta_state;
251
252 radv_DestroyPipelineLayout(radv_device_to_handle(device),
253 state->itob.img_p_layout, &state->alloc);
254 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
255 state->itob.img_ds_layout,
256 &state->alloc);
257 radv_DestroyPipeline(radv_device_to_handle(device),
258 state->itob.pipeline, &state->alloc);
259 if (device->physical_device->rad_info.chip_class >= GFX9)
260 radv_DestroyPipeline(radv_device_to_handle(device),
261 state->itob.pipeline_3d, &state->alloc);
262 }
263
264 static nir_shader *
265 build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
266 {
267 nir_builder b;
268 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
269 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
270 false,
271 false,
272 GLSL_TYPE_FLOAT);
273 const struct glsl_type *img_type = glsl_sampler_type(dim,
274 false,
275 false,
276 GLSL_TYPE_FLOAT);
277 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
278 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
279 b.shader->info.cs.local_size[0] = 16;
280 b.shader->info.cs.local_size[1] = 16;
281 b.shader->info.cs.local_size[2] = 1;
282 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
283 buf_type, "s_tex");
284 input_img->data.descriptor_set = 0;
285 input_img->data.binding = 0;
286
287 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
288 img_type, "out_img");
289 output_img->data.descriptor_set = 0;
290 output_img->data.binding = 1;
291
292 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
293 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
294 nir_ssa_def *block_size = nir_imm_ivec4(&b,
295 b.shader->info.cs.local_size[0],
296 b.shader->info.cs.local_size[1],
297 b.shader->info.cs.local_size[2], 0);
298
299 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
300
301 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
302 nir_intrinsic_set_base(offset, 0);
303 nir_intrinsic_set_range(offset, 16);
304 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
305 offset->num_components = is_3d ? 3 : 2;
306 nir_ssa_dest_init(&offset->instr, &offset->dest, is_3d ? 3 : 2, 32, "offset");
307 nir_builder_instr_insert(&b, &offset->instr);
308
309 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
310 nir_intrinsic_set_base(stride, 0);
311 nir_intrinsic_set_range(stride, 16);
312 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
313 stride->num_components = 1;
314 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
315 nir_builder_instr_insert(&b, &stride->instr);
316
317 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
318 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
319
320 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
321 tmp = nir_iadd(&b, tmp, pos_x);
322
323 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
324
325 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
326 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
327
328 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
329 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
330 tex->op = nir_texop_txf;
331 tex->src[0].src_type = nir_tex_src_coord;
332 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
333 tex->src[1].src_type = nir_tex_src_lod;
334 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
335 tex->src[2].src_type = nir_tex_src_texture_deref;
336 tex->src[2].src = nir_src_for_ssa(input_img_deref);
337 tex->dest_type = nir_type_float;
338 tex->is_array = false;
339 tex->coord_components = 1;
340
341 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
342 nir_builder_instr_insert(&b, &tex->instr);
343
344 nir_ssa_def *outval = &tex->dest.ssa;
345 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
346 store->num_components = 4;
347 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
348 store->src[1] = nir_src_for_ssa(img_coord);
349 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
350 store->src[3] = nir_src_for_ssa(outval);
351
352 nir_builder_instr_insert(&b, &store->instr);
353 return b.shader;
354 }
355
356 /* Buffer to image - don't write use image accessors */
357 static VkResult
358 radv_device_init_meta_btoi_state(struct radv_device *device)
359 {
360 VkResult result;
361 struct radv_shader_module cs = { .nir = NULL };
362 struct radv_shader_module cs_3d = { .nir = NULL };
363 cs.nir = build_nir_btoi_compute_shader(device, false);
364 if (device->physical_device->rad_info.chip_class >= GFX9)
365 cs_3d.nir = build_nir_btoi_compute_shader(device, true);
366 /*
367 * two descriptors one for the image being sampled
368 * one for the buffer being written.
369 */
370 VkDescriptorSetLayoutCreateInfo ds_create_info = {
371 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
372 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
373 .bindingCount = 2,
374 .pBindings = (VkDescriptorSetLayoutBinding[]) {
375 {
376 .binding = 0,
377 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
378 .descriptorCount = 1,
379 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
380 .pImmutableSamplers = NULL
381 },
382 {
383 .binding = 1,
384 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
385 .descriptorCount = 1,
386 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
387 .pImmutableSamplers = NULL
388 },
389 }
390 };
391
392 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
393 &ds_create_info,
394 &device->meta_state.alloc,
395 &device->meta_state.btoi.img_ds_layout);
396 if (result != VK_SUCCESS)
397 goto fail;
398
399
400 VkPipelineLayoutCreateInfo pl_create_info = {
401 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
402 .setLayoutCount = 1,
403 .pSetLayouts = &device->meta_state.btoi.img_ds_layout,
404 .pushConstantRangeCount = 1,
405 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
406 };
407
408 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
409 &pl_create_info,
410 &device->meta_state.alloc,
411 &device->meta_state.btoi.img_p_layout);
412 if (result != VK_SUCCESS)
413 goto fail;
414
415 /* compute shader */
416
417 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
418 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
419 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
420 .module = radv_shader_module_to_handle(&cs),
421 .pName = "main",
422 .pSpecializationInfo = NULL,
423 };
424
425 VkComputePipelineCreateInfo vk_pipeline_info = {
426 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
427 .stage = pipeline_shader_stage,
428 .flags = 0,
429 .layout = device->meta_state.btoi.img_p_layout,
430 };
431
432 result = radv_CreateComputePipelines(radv_device_to_handle(device),
433 radv_pipeline_cache_to_handle(&device->meta_state.cache),
434 1, &vk_pipeline_info, NULL,
435 &device->meta_state.btoi.pipeline);
436 if (result != VK_SUCCESS)
437 goto fail;
438
439 if (device->physical_device->rad_info.chip_class >= GFX9) {
440 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
441 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
442 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
443 .module = radv_shader_module_to_handle(&cs_3d),
444 .pName = "main",
445 .pSpecializationInfo = NULL,
446 };
447
448 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
449 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
450 .stage = pipeline_shader_stage_3d,
451 .flags = 0,
452 .layout = device->meta_state.btoi.img_p_layout,
453 };
454
455 result = radv_CreateComputePipelines(radv_device_to_handle(device),
456 radv_pipeline_cache_to_handle(&device->meta_state.cache),
457 1, &vk_pipeline_info_3d, NULL,
458 &device->meta_state.btoi.pipeline_3d);
459 ralloc_free(cs_3d.nir);
460 }
461 ralloc_free(cs.nir);
462
463 return VK_SUCCESS;
464 fail:
465 ralloc_free(cs_3d.nir);
466 ralloc_free(cs.nir);
467 return result;
468 }
469
470 static void
471 radv_device_finish_meta_btoi_state(struct radv_device *device)
472 {
473 struct radv_meta_state *state = &device->meta_state;
474
475 radv_DestroyPipelineLayout(radv_device_to_handle(device),
476 state->btoi.img_p_layout, &state->alloc);
477 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
478 state->btoi.img_ds_layout,
479 &state->alloc);
480 radv_DestroyPipeline(radv_device_to_handle(device),
481 state->btoi.pipeline, &state->alloc);
482 radv_DestroyPipeline(radv_device_to_handle(device),
483 state->btoi.pipeline_3d, &state->alloc);
484 }
485
486 /* Buffer to image - special path for R32G32B32 */
487 static nir_shader *
488 build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
489 {
490 nir_builder b;
491 const struct glsl_type *buf_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
492 false,
493 false,
494 GLSL_TYPE_FLOAT);
495 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
496 false,
497 false,
498 GLSL_TYPE_FLOAT);
499 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
500 b.shader->info.name = ralloc_strdup(b.shader, "meta_btoi_r32g32b32_cs");
501 b.shader->info.cs.local_size[0] = 16;
502 b.shader->info.cs.local_size[1] = 16;
503 b.shader->info.cs.local_size[2] = 1;
504 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
505 buf_type, "s_tex");
506 input_img->data.descriptor_set = 0;
507 input_img->data.binding = 0;
508
509 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
510 img_type, "out_img");
511 output_img->data.descriptor_set = 0;
512 output_img->data.binding = 1;
513
514 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
515 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
516 nir_ssa_def *block_size = nir_imm_ivec4(&b,
517 b.shader->info.cs.local_size[0],
518 b.shader->info.cs.local_size[1],
519 b.shader->info.cs.local_size[2], 0);
520
521 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
522
523 nir_intrinsic_instr *offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
524 nir_intrinsic_set_base(offset, 0);
525 nir_intrinsic_set_range(offset, 16);
526 offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
527 offset->num_components = 2;
528 nir_ssa_dest_init(&offset->instr, &offset->dest, 2, 32, "offset");
529 nir_builder_instr_insert(&b, &offset->instr);
530
531 nir_intrinsic_instr *pitch = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
532 nir_intrinsic_set_base(pitch, 0);
533 nir_intrinsic_set_range(pitch, 16);
534 pitch->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
535 pitch->num_components = 1;
536 nir_ssa_dest_init(&pitch->instr, &pitch->dest, 1, 32, "pitch");
537 nir_builder_instr_insert(&b, &pitch->instr);
538
539 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
540 nir_intrinsic_set_base(stride, 0);
541 nir_intrinsic_set_range(stride, 16);
542 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
543 stride->num_components = 1;
544 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
545 nir_builder_instr_insert(&b, &stride->instr);
546
547 nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
548 nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
549
550 nir_ssa_def *tmp = nir_imul(&b, pos_y, &stride->dest.ssa);
551 tmp = nir_iadd(&b, tmp, pos_x);
552
553 nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
554
555 nir_ssa_def *img_coord = nir_iadd(&b, global_id, &offset->dest.ssa);
556
557 nir_ssa_def *global_pos =
558 nir_iadd(&b,
559 nir_imul(&b, nir_channel(&b, img_coord, 1), &pitch->dest.ssa),
560 nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
561
562 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
563
564 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
565 tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
566 tex->op = nir_texop_txf;
567 tex->src[0].src_type = nir_tex_src_coord;
568 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
569 tex->src[1].src_type = nir_tex_src_lod;
570 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
571 tex->src[2].src_type = nir_tex_src_texture_deref;
572 tex->src[2].src = nir_src_for_ssa(input_img_deref);
573 tex->dest_type = nir_type_float;
574 tex->is_array = false;
575 tex->coord_components = 1;
576 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
577 nir_builder_instr_insert(&b, &tex->instr);
578
579 nir_ssa_def *outval = &tex->dest.ssa;
580
581 for (int chan = 0; chan < 3; chan++) {
582 nir_ssa_def *local_pos =
583 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
584
585 nir_ssa_def *coord =
586 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
587
588 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
589 store->num_components = 1;
590 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
591 store->src[1] = nir_src_for_ssa(coord);
592 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
593 store->src[3] = nir_src_for_ssa(nir_channel(&b, outval, chan));
594 nir_builder_instr_insert(&b, &store->instr);
595 }
596
597 return b.shader;
598 }
599
600 static VkResult
601 radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
602 {
603 VkResult result;
604 struct radv_shader_module cs = { .nir = NULL };
605
606 cs.nir = build_nir_btoi_r32g32b32_compute_shader(device);
607
608 VkDescriptorSetLayoutCreateInfo ds_create_info = {
609 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
610 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
611 .bindingCount = 2,
612 .pBindings = (VkDescriptorSetLayoutBinding[]) {
613 {
614 .binding = 0,
615 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
616 .descriptorCount = 1,
617 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
618 .pImmutableSamplers = NULL
619 },
620 {
621 .binding = 1,
622 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
623 .descriptorCount = 1,
624 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
625 .pImmutableSamplers = NULL
626 },
627 }
628 };
629
630 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
631 &ds_create_info,
632 &device->meta_state.alloc,
633 &device->meta_state.btoi_r32g32b32.img_ds_layout);
634 if (result != VK_SUCCESS)
635 goto fail;
636
637
638 VkPipelineLayoutCreateInfo pl_create_info = {
639 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
640 .setLayoutCount = 1,
641 .pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
642 .pushConstantRangeCount = 1,
643 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
644 };
645
646 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
647 &pl_create_info,
648 &device->meta_state.alloc,
649 &device->meta_state.btoi_r32g32b32.img_p_layout);
650 if (result != VK_SUCCESS)
651 goto fail;
652
653 /* compute shader */
654
655 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
656 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
657 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
658 .module = radv_shader_module_to_handle(&cs),
659 .pName = "main",
660 .pSpecializationInfo = NULL,
661 };
662
663 VkComputePipelineCreateInfo vk_pipeline_info = {
664 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
665 .stage = pipeline_shader_stage,
666 .flags = 0,
667 .layout = device->meta_state.btoi_r32g32b32.img_p_layout,
668 };
669
670 result = radv_CreateComputePipelines(radv_device_to_handle(device),
671 radv_pipeline_cache_to_handle(&device->meta_state.cache),
672 1, &vk_pipeline_info, NULL,
673 &device->meta_state.btoi_r32g32b32.pipeline);
674
675 fail:
676 ralloc_free(cs.nir);
677 return result;
678 }
679
680 static void
681 radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
682 {
683 struct radv_meta_state *state = &device->meta_state;
684
685 radv_DestroyPipelineLayout(radv_device_to_handle(device),
686 state->btoi_r32g32b32.img_p_layout, &state->alloc);
687 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
688 state->btoi_r32g32b32.img_ds_layout,
689 &state->alloc);
690 radv_DestroyPipeline(radv_device_to_handle(device),
691 state->btoi_r32g32b32.pipeline, &state->alloc);
692 }
693
694 static nir_shader *
695 build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d)
696 {
697 nir_builder b;
698 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
699 const struct glsl_type *buf_type = glsl_sampler_type(dim,
700 false,
701 false,
702 GLSL_TYPE_FLOAT);
703 const struct glsl_type *img_type = glsl_sampler_type(dim,
704 false,
705 false,
706 GLSL_TYPE_FLOAT);
707 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
708 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_itoi_cs_3d" : "meta_itoi_cs");
709 b.shader->info.cs.local_size[0] = 16;
710 b.shader->info.cs.local_size[1] = 16;
711 b.shader->info.cs.local_size[2] = 1;
712 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
713 buf_type, "s_tex");
714 input_img->data.descriptor_set = 0;
715 input_img->data.binding = 0;
716
717 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
718 img_type, "out_img");
719 output_img->data.descriptor_set = 0;
720 output_img->data.binding = 1;
721
722 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
723 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
724 nir_ssa_def *block_size = nir_imm_ivec4(&b,
725 b.shader->info.cs.local_size[0],
726 b.shader->info.cs.local_size[1],
727 b.shader->info.cs.local_size[2], 0);
728
729 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
730
731 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
732 nir_intrinsic_set_base(src_offset, 0);
733 nir_intrinsic_set_range(src_offset, 24);
734 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
735 src_offset->num_components = is_3d ? 3 : 2;
736 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, is_3d ? 3 : 2, 32, "src_offset");
737 nir_builder_instr_insert(&b, &src_offset->instr);
738
739 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
740 nir_intrinsic_set_base(dst_offset, 0);
741 nir_intrinsic_set_range(dst_offset, 24);
742 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
743 dst_offset->num_components = is_3d ? 3 : 2;
744 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, is_3d ? 3 : 2, 32, "dst_offset");
745 nir_builder_instr_insert(&b, &dst_offset->instr);
746
747 nir_ssa_def *src_coord = nir_iadd(&b, global_id, &src_offset->dest.ssa);
748 nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
749
750 nir_ssa_def *dst_coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
751
752 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
753 tex->sampler_dim = dim;
754 tex->op = nir_texop_txf;
755 tex->src[0].src_type = nir_tex_src_coord;
756 tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
757 tex->src[1].src_type = nir_tex_src_lod;
758 tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
759 tex->src[2].src_type = nir_tex_src_texture_deref;
760 tex->src[2].src = nir_src_for_ssa(input_img_deref);
761 tex->dest_type = nir_type_float;
762 tex->is_array = false;
763 tex->coord_components = is_3d ? 3 : 2;
764
765 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
766 nir_builder_instr_insert(&b, &tex->instr);
767
768 nir_ssa_def *outval = &tex->dest.ssa;
769 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
770 store->num_components = 4;
771 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
772 store->src[1] = nir_src_for_ssa(dst_coord);
773 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
774 store->src[3] = nir_src_for_ssa(outval);
775
776 nir_builder_instr_insert(&b, &store->instr);
777 return b.shader;
778 }
779
780 /* image to image - don't write use image accessors */
781 static VkResult
782 radv_device_init_meta_itoi_state(struct radv_device *device)
783 {
784 VkResult result;
785 struct radv_shader_module cs = { .nir = NULL };
786 struct radv_shader_module cs_3d = { .nir = NULL };
787 cs.nir = build_nir_itoi_compute_shader(device, false);
788 if (device->physical_device->rad_info.chip_class >= GFX9)
789 cs_3d.nir = build_nir_itoi_compute_shader(device, true);
790 /*
791 * two descriptors one for the image being sampled
792 * one for the buffer being written.
793 */
794 VkDescriptorSetLayoutCreateInfo ds_create_info = {
795 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
796 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
797 .bindingCount = 2,
798 .pBindings = (VkDescriptorSetLayoutBinding[]) {
799 {
800 .binding = 0,
801 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
802 .descriptorCount = 1,
803 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
804 .pImmutableSamplers = NULL
805 },
806 {
807 .binding = 1,
808 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
809 .descriptorCount = 1,
810 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
811 .pImmutableSamplers = NULL
812 },
813 }
814 };
815
816 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
817 &ds_create_info,
818 &device->meta_state.alloc,
819 &device->meta_state.itoi.img_ds_layout);
820 if (result != VK_SUCCESS)
821 goto fail;
822
823
824 VkPipelineLayoutCreateInfo pl_create_info = {
825 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
826 .setLayoutCount = 1,
827 .pSetLayouts = &device->meta_state.itoi.img_ds_layout,
828 .pushConstantRangeCount = 1,
829 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
830 };
831
832 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
833 &pl_create_info,
834 &device->meta_state.alloc,
835 &device->meta_state.itoi.img_p_layout);
836 if (result != VK_SUCCESS)
837 goto fail;
838
839 /* compute shader */
840
841 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
842 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
843 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
844 .module = radv_shader_module_to_handle(&cs),
845 .pName = "main",
846 .pSpecializationInfo = NULL,
847 };
848
849 VkComputePipelineCreateInfo vk_pipeline_info = {
850 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
851 .stage = pipeline_shader_stage,
852 .flags = 0,
853 .layout = device->meta_state.itoi.img_p_layout,
854 };
855
856 result = radv_CreateComputePipelines(radv_device_to_handle(device),
857 radv_pipeline_cache_to_handle(&device->meta_state.cache),
858 1, &vk_pipeline_info, NULL,
859 &device->meta_state.itoi.pipeline);
860 if (result != VK_SUCCESS)
861 goto fail;
862
863 if (device->physical_device->rad_info.chip_class >= GFX9) {
864 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
865 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
866 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
867 .module = radv_shader_module_to_handle(&cs_3d),
868 .pName = "main",
869 .pSpecializationInfo = NULL,
870 };
871
872 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
873 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
874 .stage = pipeline_shader_stage_3d,
875 .flags = 0,
876 .layout = device->meta_state.itoi.img_p_layout,
877 };
878
879 result = radv_CreateComputePipelines(radv_device_to_handle(device),
880 radv_pipeline_cache_to_handle(&device->meta_state.cache),
881 1, &vk_pipeline_info_3d, NULL,
882 &device->meta_state.itoi.pipeline_3d);
883
884 ralloc_free(cs_3d.nir);
885 }
886 ralloc_free(cs.nir);
887
888 return VK_SUCCESS;
889 fail:
890 ralloc_free(cs.nir);
891 ralloc_free(cs_3d.nir);
892 return result;
893 }
894
895 static void
896 radv_device_finish_meta_itoi_state(struct radv_device *device)
897 {
898 struct radv_meta_state *state = &device->meta_state;
899
900 radv_DestroyPipelineLayout(radv_device_to_handle(device),
901 state->itoi.img_p_layout, &state->alloc);
902 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
903 state->itoi.img_ds_layout,
904 &state->alloc);
905 radv_DestroyPipeline(radv_device_to_handle(device),
906 state->itoi.pipeline, &state->alloc);
907 if (device->physical_device->rad_info.chip_class >= GFX9)
908 radv_DestroyPipeline(radv_device_to_handle(device),
909 state->itoi.pipeline_3d, &state->alloc);
910 }
911
912 static nir_shader *
913 build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d)
914 {
915 nir_builder b;
916 enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
917 const struct glsl_type *img_type = glsl_sampler_type(dim,
918 false,
919 false,
920 GLSL_TYPE_FLOAT);
921 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
922 b.shader->info.name = ralloc_strdup(b.shader, is_3d ? "meta_cleari_cs_3d" : "meta_cleari_cs");
923 b.shader->info.cs.local_size[0] = 16;
924 b.shader->info.cs.local_size[1] = 16;
925 b.shader->info.cs.local_size[2] = 1;
926
927 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
928 img_type, "out_img");
929 output_img->data.descriptor_set = 0;
930 output_img->data.binding = 0;
931
932 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
933 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
934 nir_ssa_def *block_size = nir_imm_ivec4(&b,
935 b.shader->info.cs.local_size[0],
936 b.shader->info.cs.local_size[1],
937 b.shader->info.cs.local_size[2], 0);
938
939 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
940
941 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
942 nir_intrinsic_set_base(clear_val, 0);
943 nir_intrinsic_set_range(clear_val, 20);
944 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
945 clear_val->num_components = 4;
946 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 4, 32, "clear_value");
947 nir_builder_instr_insert(&b, &clear_val->instr);
948
949 nir_intrinsic_instr *layer = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
950 nir_intrinsic_set_base(layer, 0);
951 nir_intrinsic_set_range(layer, 20);
952 layer->src[0] = nir_src_for_ssa(nir_imm_int(&b, 16));
953 layer->num_components = 1;
954 nir_ssa_dest_init(&layer->instr, &layer->dest, 1, 32, "layer");
955 nir_builder_instr_insert(&b, &layer->instr);
956
957 nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), &layer->dest.ssa);
958
959 nir_ssa_def *comps[4];
960 comps[0] = nir_channel(&b, global_id, 0);
961 comps[1] = nir_channel(&b, global_id, 1);
962 comps[2] = global_z;
963 comps[3] = nir_imm_int(&b, 0);
964 global_id = nir_vec(&b, comps, 4);
965
966 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
967 store->num_components = 4;
968 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
969 store->src[1] = nir_src_for_ssa(global_id);
970 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
971 store->src[3] = nir_src_for_ssa(&clear_val->dest.ssa);
972
973 nir_builder_instr_insert(&b, &store->instr);
974 return b.shader;
975 }
976
977 static VkResult
978 radv_device_init_meta_cleari_state(struct radv_device *device)
979 {
980 VkResult result;
981 struct radv_shader_module cs = { .nir = NULL };
982 struct radv_shader_module cs_3d = { .nir = NULL };
983 cs.nir = build_nir_cleari_compute_shader(device, false);
984 if (device->physical_device->rad_info.chip_class >= GFX9)
985 cs_3d.nir = build_nir_cleari_compute_shader(device, true);
986
987 /*
988 * two descriptors one for the image being sampled
989 * one for the buffer being written.
990 */
991 VkDescriptorSetLayoutCreateInfo ds_create_info = {
992 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
993 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
994 .bindingCount = 1,
995 .pBindings = (VkDescriptorSetLayoutBinding[]) {
996 {
997 .binding = 0,
998 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
999 .descriptorCount = 1,
1000 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1001 .pImmutableSamplers = NULL
1002 },
1003 }
1004 };
1005
1006 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1007 &ds_create_info,
1008 &device->meta_state.alloc,
1009 &device->meta_state.cleari.img_ds_layout);
1010 if (result != VK_SUCCESS)
1011 goto fail;
1012
1013
1014 VkPipelineLayoutCreateInfo pl_create_info = {
1015 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1016 .setLayoutCount = 1,
1017 .pSetLayouts = &device->meta_state.cleari.img_ds_layout,
1018 .pushConstantRangeCount = 1,
1019 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
1020 };
1021
1022 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1023 &pl_create_info,
1024 &device->meta_state.alloc,
1025 &device->meta_state.cleari.img_p_layout);
1026 if (result != VK_SUCCESS)
1027 goto fail;
1028
1029 /* compute shader */
1030
1031 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1032 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1033 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1034 .module = radv_shader_module_to_handle(&cs),
1035 .pName = "main",
1036 .pSpecializationInfo = NULL,
1037 };
1038
1039 VkComputePipelineCreateInfo vk_pipeline_info = {
1040 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1041 .stage = pipeline_shader_stage,
1042 .flags = 0,
1043 .layout = device->meta_state.cleari.img_p_layout,
1044 };
1045
1046 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1047 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1048 1, &vk_pipeline_info, NULL,
1049 &device->meta_state.cleari.pipeline);
1050 if (result != VK_SUCCESS)
1051 goto fail;
1052
1053
1054 if (device->physical_device->rad_info.chip_class >= GFX9) {
1055 /* compute shader */
1056 VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1057 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1058 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1059 .module = radv_shader_module_to_handle(&cs_3d),
1060 .pName = "main",
1061 .pSpecializationInfo = NULL,
1062 };
1063
1064 VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1065 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1066 .stage = pipeline_shader_stage_3d,
1067 .flags = 0,
1068 .layout = device->meta_state.cleari.img_p_layout,
1069 };
1070
1071 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1072 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1073 1, &vk_pipeline_info_3d, NULL,
1074 &device->meta_state.cleari.pipeline_3d);
1075 if (result != VK_SUCCESS)
1076 goto fail;
1077
1078 ralloc_free(cs_3d.nir);
1079 }
1080 ralloc_free(cs.nir);
1081 return VK_SUCCESS;
1082 fail:
1083 ralloc_free(cs.nir);
1084 ralloc_free(cs_3d.nir);
1085 return result;
1086 }
1087
1088 static void
1089 radv_device_finish_meta_cleari_state(struct radv_device *device)
1090 {
1091 struct radv_meta_state *state = &device->meta_state;
1092
1093 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1094 state->cleari.img_p_layout, &state->alloc);
1095 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1096 state->cleari.img_ds_layout,
1097 &state->alloc);
1098 radv_DestroyPipeline(radv_device_to_handle(device),
1099 state->cleari.pipeline, &state->alloc);
1100 radv_DestroyPipeline(radv_device_to_handle(device),
1101 state->cleari.pipeline_3d, &state->alloc);
1102 }
1103
1104 /* Special path for clearing R32G32B32 images using a compute shader. */
1105 static nir_shader *
1106 build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
1107 {
1108 nir_builder b;
1109 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_BUF,
1110 false,
1111 false,
1112 GLSL_TYPE_FLOAT);
1113 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
1114 b.shader->info.name = ralloc_strdup(b.shader, "meta_cleari_r32g32b32_cs");
1115 b.shader->info.cs.local_size[0] = 16;
1116 b.shader->info.cs.local_size[1] = 16;
1117 b.shader->info.cs.local_size[2] = 1;
1118
1119 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
1120 img_type, "out_img");
1121 output_img->data.descriptor_set = 0;
1122 output_img->data.binding = 0;
1123
1124 nir_ssa_def *invoc_id = nir_load_system_value(&b, nir_intrinsic_load_local_invocation_id, 0);
1125 nir_ssa_def *wg_id = nir_load_system_value(&b, nir_intrinsic_load_work_group_id, 0);
1126 nir_ssa_def *block_size = nir_imm_ivec4(&b,
1127 b.shader->info.cs.local_size[0],
1128 b.shader->info.cs.local_size[1],
1129 b.shader->info.cs.local_size[2], 0);
1130
1131 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1132
1133 nir_intrinsic_instr *clear_val = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1134 nir_intrinsic_set_base(clear_val, 0);
1135 nir_intrinsic_set_range(clear_val, 16);
1136 clear_val->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
1137 clear_val->num_components = 3;
1138 nir_ssa_dest_init(&clear_val->instr, &clear_val->dest, 3, 32, "clear_value");
1139 nir_builder_instr_insert(&b, &clear_val->instr);
1140
1141 nir_intrinsic_instr *stride = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
1142 nir_intrinsic_set_base(stride, 0);
1143 nir_intrinsic_set_range(stride, 16);
1144 stride->src[0] = nir_src_for_ssa(nir_imm_int(&b, 12));
1145 stride->num_components = 1;
1146 nir_ssa_dest_init(&stride->instr, &stride->dest, 1, 32, "stride");
1147 nir_builder_instr_insert(&b, &stride->instr);
1148
1149 nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
1150 nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
1151
1152 nir_ssa_def *global_pos =
1153 nir_iadd(&b,
1154 nir_imul(&b, global_y, &stride->dest.ssa),
1155 nir_imul(&b, global_x, nir_imm_int(&b, 3)));
1156
1157 for (unsigned chan = 0; chan < 3; chan++) {
1158 nir_ssa_def *local_pos =
1159 nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
1160
1161 nir_ssa_def *coord =
1162 nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
1163
1164 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
1165 store->num_components = 1;
1166 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
1167 store->src[1] = nir_src_for_ssa(coord);
1168 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
1169 store->src[3] = nir_src_for_ssa(nir_channel(&b, &clear_val->dest.ssa, chan));
1170 nir_builder_instr_insert(&b, &store->instr);
1171 }
1172
1173 return b.shader;
1174 }
1175
1176 static VkResult
1177 radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1178 {
1179 VkResult result;
1180 struct radv_shader_module cs = { .nir = NULL };
1181
1182 cs.nir = build_nir_cleari_r32g32b32_compute_shader(device);
1183
1184 VkDescriptorSetLayoutCreateInfo ds_create_info = {
1185 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1186 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1187 .bindingCount = 1,
1188 .pBindings = (VkDescriptorSetLayoutBinding[]) {
1189 {
1190 .binding = 0,
1191 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1192 .descriptorCount = 1,
1193 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1194 .pImmutableSamplers = NULL
1195 },
1196 }
1197 };
1198
1199 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
1200 &ds_create_info,
1201 &device->meta_state.alloc,
1202 &device->meta_state.cleari_r32g32b32.img_ds_layout);
1203 if (result != VK_SUCCESS)
1204 goto fail;
1205
1206 VkPipelineLayoutCreateInfo pl_create_info = {
1207 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1208 .setLayoutCount = 1,
1209 .pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
1210 .pushConstantRangeCount = 1,
1211 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1212 };
1213
1214 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
1215 &pl_create_info,
1216 &device->meta_state.alloc,
1217 &device->meta_state.cleari_r32g32b32.img_p_layout);
1218 if (result != VK_SUCCESS)
1219 goto fail;
1220
1221 /* compute shader */
1222 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1223 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1224 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
1225 .module = radv_shader_module_to_handle(&cs),
1226 .pName = "main",
1227 .pSpecializationInfo = NULL,
1228 };
1229
1230 VkComputePipelineCreateInfo vk_pipeline_info = {
1231 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1232 .stage = pipeline_shader_stage,
1233 .flags = 0,
1234 .layout = device->meta_state.cleari_r32g32b32.img_p_layout,
1235 };
1236
1237 result = radv_CreateComputePipelines(radv_device_to_handle(device),
1238 radv_pipeline_cache_to_handle(&device->meta_state.cache),
1239 1, &vk_pipeline_info, NULL,
1240 &device->meta_state.cleari_r32g32b32.pipeline);
1241
1242 fail:
1243 ralloc_free(cs.nir);
1244 return result;
1245 }
1246
1247 static void
1248 radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1249 {
1250 struct radv_meta_state *state = &device->meta_state;
1251
1252 radv_DestroyPipelineLayout(radv_device_to_handle(device),
1253 state->cleari_r32g32b32.img_p_layout,
1254 &state->alloc);
1255 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1256 state->cleari_r32g32b32.img_ds_layout,
1257 &state->alloc);
1258 radv_DestroyPipeline(radv_device_to_handle(device),
1259 state->cleari_r32g32b32.pipeline, &state->alloc);
1260 }
1261
1262 void
1263 radv_device_finish_meta_bufimage_state(struct radv_device *device)
1264 {
1265 radv_device_finish_meta_itob_state(device);
1266 radv_device_finish_meta_btoi_state(device);
1267 radv_device_finish_meta_btoi_r32g32b32_state(device);
1268 radv_device_finish_meta_itoi_state(device);
1269 radv_device_finish_meta_cleari_state(device);
1270 radv_device_finish_meta_cleari_r32g32b32_state(device);
1271 }
1272
1273 VkResult
1274 radv_device_init_meta_bufimage_state(struct radv_device *device)
1275 {
1276 VkResult result;
1277
1278 result = radv_device_init_meta_itob_state(device);
1279 if (result != VK_SUCCESS)
1280 goto fail_itob;
1281
1282 result = radv_device_init_meta_btoi_state(device);
1283 if (result != VK_SUCCESS)
1284 goto fail_btoi;
1285
1286 result = radv_device_init_meta_btoi_r32g32b32_state(device);
1287 if (result != VK_SUCCESS)
1288 goto fail_btoi_r32g32b32;
1289
1290 result = radv_device_init_meta_itoi_state(device);
1291 if (result != VK_SUCCESS)
1292 goto fail_itoi;
1293
1294 result = radv_device_init_meta_cleari_state(device);
1295 if (result != VK_SUCCESS)
1296 goto fail_cleari;
1297
1298 result = radv_device_init_meta_cleari_r32g32b32_state(device);
1299 if (result != VK_SUCCESS)
1300 goto fail_cleari_r32g32b32;
1301
1302 return VK_SUCCESS;
1303 fail_cleari_r32g32b32:
1304 radv_device_finish_meta_cleari_r32g32b32_state(device);
1305 fail_cleari:
1306 radv_device_finish_meta_cleari_state(device);
1307 fail_itoi:
1308 radv_device_finish_meta_itoi_state(device);
1309 fail_btoi_r32g32b32:
1310 radv_device_finish_meta_btoi_r32g32b32_state(device);
1311 fail_btoi:
1312 radv_device_finish_meta_btoi_state(device);
1313 fail_itob:
1314 radv_device_finish_meta_itob_state(device);
1315 return result;
1316 }
1317
1318 static void
1319 create_iview(struct radv_cmd_buffer *cmd_buffer,
1320 struct radv_meta_blit2d_surf *surf,
1321 struct radv_image_view *iview)
1322 {
1323 VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9 ? VK_IMAGE_VIEW_TYPE_2D :
1324 radv_meta_get_view_type(surf->image);
1325 radv_image_view_init(iview, cmd_buffer->device,
1326 &(VkImageViewCreateInfo) {
1327 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1328 .image = radv_image_to_handle(surf->image),
1329 .viewType = view_type,
1330 .format = surf->format,
1331 .subresourceRange = {
1332 .aspectMask = surf->aspect_mask,
1333 .baseMipLevel = surf->level,
1334 .levelCount = 1,
1335 .baseArrayLayer = surf->layer,
1336 .layerCount = 1
1337 },
1338 });
1339 }
1340
1341 static void
1342 create_bview(struct radv_cmd_buffer *cmd_buffer,
1343 struct radv_buffer *buffer,
1344 unsigned offset,
1345 VkFormat format,
1346 struct radv_buffer_view *bview)
1347 {
1348 radv_buffer_view_init(bview, cmd_buffer->device,
1349 &(VkBufferViewCreateInfo) {
1350 .sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1351 .flags = 0,
1352 .buffer = radv_buffer_to_handle(buffer),
1353 .format = format,
1354 .offset = offset,
1355 .range = VK_WHOLE_SIZE,
1356 });
1357
1358 }
1359
1360 static void
1361 itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1362 struct radv_image_view *src,
1363 struct radv_buffer_view *dst)
1364 {
1365 struct radv_device *device = cmd_buffer->device;
1366
1367 radv_meta_push_descriptor_set(cmd_buffer,
1368 VK_PIPELINE_BIND_POINT_COMPUTE,
1369 device->meta_state.itob.img_p_layout,
1370 0, /* set */
1371 2, /* descriptorWriteCount */
1372 (VkWriteDescriptorSet[]) {
1373 {
1374 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1375 .dstBinding = 0,
1376 .dstArrayElement = 0,
1377 .descriptorCount = 1,
1378 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1379 .pImageInfo = (VkDescriptorImageInfo[]) {
1380 {
1381 .sampler = VK_NULL_HANDLE,
1382 .imageView = radv_image_view_to_handle(src),
1383 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1384 },
1385 }
1386 },
1387 {
1388 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1389 .dstBinding = 1,
1390 .dstArrayElement = 0,
1391 .descriptorCount = 1,
1392 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1393 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1394 }
1395 });
1396 }
1397
1398 void
1399 radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer,
1400 struct radv_meta_blit2d_surf *src,
1401 struct radv_meta_blit2d_buffer *dst,
1402 unsigned num_rects,
1403 struct radv_meta_blit2d_rect *rects)
1404 {
1405 VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1406 struct radv_device *device = cmd_buffer->device;
1407 struct radv_image_view src_view;
1408 struct radv_buffer_view dst_view;
1409
1410 create_iview(cmd_buffer, src, &src_view);
1411 create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1412 itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1413
1414 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1415 src->image->type == VK_IMAGE_TYPE_3D)
1416 pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1417
1418 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1419 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1420
1421 for (unsigned r = 0; r < num_rects; ++r) {
1422 unsigned push_constants[4] = {
1423 rects[r].src_x,
1424 rects[r].src_y,
1425 src->layer,
1426 dst->pitch
1427 };
1428 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1429 device->meta_state.itob.img_p_layout,
1430 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1431 push_constants);
1432
1433 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1434 }
1435 }
1436
1437 static void
1438 btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1439 struct radv_buffer_view *src,
1440 struct radv_buffer_view *dst)
1441 {
1442 struct radv_device *device = cmd_buffer->device;
1443
1444 radv_meta_push_descriptor_set(cmd_buffer,
1445 VK_PIPELINE_BIND_POINT_COMPUTE,
1446 device->meta_state.btoi_r32g32b32.img_p_layout,
1447 0, /* set */
1448 2, /* descriptorWriteCount */
1449 (VkWriteDescriptorSet[]) {
1450 {
1451 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1452 .dstBinding = 0,
1453 .dstArrayElement = 0,
1454 .descriptorCount = 1,
1455 .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1456 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1457 },
1458 {
1459 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1460 .dstBinding = 1,
1461 .dstArrayElement = 0,
1462 .descriptorCount = 1,
1463 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1464 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(dst) },
1465 }
1466 });
1467 }
1468
1469 static void
1470 radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1471 struct radv_meta_blit2d_buffer *src,
1472 struct radv_meta_blit2d_surf *dst,
1473 unsigned num_rects,
1474 struct radv_meta_blit2d_rect *rects)
1475 {
1476 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1477 struct radv_device_memory mem = { .bo = dst->image->bo };
1478 struct radv_device *device = cmd_buffer->device;
1479 struct radv_buffer_view src_view, dst_view;
1480 unsigned dst_offset = 0;
1481 unsigned stride;
1482 VkFormat dst_format;
1483 VkBuffer buffer;
1484
1485 switch (dst->format) {
1486 case VK_FORMAT_R32G32B32_UINT:
1487 dst_format = VK_FORMAT_R32_UINT;
1488 break;
1489 case VK_FORMAT_R32G32B32_SINT:
1490 dst_format = VK_FORMAT_R32_SINT;
1491 break;
1492 case VK_FORMAT_R32G32B32_SFLOAT:
1493 dst_format = VK_FORMAT_R32_SFLOAT;
1494 break;
1495 default:
1496 unreachable("invalid R32G32B32 format");
1497 }
1498
1499 /* This special btoi path for R32G32B32 formats will write the linear
1500 * image as a buffer with the same underlying memory. The compute
1501 * shader will clear all components separately using a R32 format.
1502 */
1503 radv_CreateBuffer(radv_device_to_handle(device),
1504 &(VkBufferCreateInfo) {
1505 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1506 .flags = 0,
1507 .size = dst->image->size,
1508 .usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1509 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1510 }, NULL, &buffer);
1511
1512 radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1513 (VkBindBufferMemoryInfoKHR[]) {
1514 {
1515 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1516 .buffer = buffer,
1517 .memory = radv_device_memory_to_handle(&mem),
1518 .memoryOffset = dst->image->offset,
1519 }
1520 });
1521
1522 create_bview(cmd_buffer, src->buffer, src->offset,
1523 src->format, &src_view);
1524 create_bview(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset,
1525 dst_format, &dst_view);
1526 btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1527
1528 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1529 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1530
1531 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1532 stride = dst->image->surface.u.gfx9.surf_pitch;
1533 } else {
1534 stride = dst->image->surface.u.legacy.level[0].nblk_x * 3;
1535 }
1536
1537 for (unsigned r = 0; r < num_rects; ++r) {
1538 unsigned push_constants[4] = {
1539 rects[r].dst_x,
1540 rects[r].dst_y,
1541 stride,
1542 src->pitch,
1543 };
1544
1545 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1546 device->meta_state.btoi_r32g32b32.img_p_layout,
1547 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1548 push_constants);
1549
1550 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1551 }
1552
1553 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1554 }
1555
1556 static void
1557 btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1558 struct radv_buffer_view *src,
1559 struct radv_image_view *dst)
1560 {
1561 struct radv_device *device = cmd_buffer->device;
1562
1563 radv_meta_push_descriptor_set(cmd_buffer,
1564 VK_PIPELINE_BIND_POINT_COMPUTE,
1565 device->meta_state.btoi.img_p_layout,
1566 0, /* set */
1567 2, /* descriptorWriteCount */
1568 (VkWriteDescriptorSet[]) {
1569 {
1570 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1571 .dstBinding = 0,
1572 .dstArrayElement = 0,
1573 .descriptorCount = 1,
1574 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1575 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(src) },
1576 },
1577 {
1578 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1579 .dstBinding = 1,
1580 .dstArrayElement = 0,
1581 .descriptorCount = 1,
1582 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1583 .pImageInfo = (VkDescriptorImageInfo[]) {
1584 {
1585 .sampler = VK_NULL_HANDLE,
1586 .imageView = radv_image_view_to_handle(dst),
1587 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1588 },
1589 }
1590 }
1591 });
1592 }
1593
1594 void
1595 radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1596 struct radv_meta_blit2d_buffer *src,
1597 struct radv_meta_blit2d_surf *dst,
1598 unsigned num_rects,
1599 struct radv_meta_blit2d_rect *rects)
1600 {
1601 VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1602 struct radv_device *device = cmd_buffer->device;
1603 struct radv_buffer_view src_view;
1604 struct radv_image_view dst_view;
1605
1606 if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1607 dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1608 dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1609 radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst,
1610 num_rects, rects);
1611 return;
1612 }
1613
1614 create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1615 create_iview(cmd_buffer, dst, &dst_view);
1616 btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1617
1618 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1619 dst->image->type == VK_IMAGE_TYPE_3D)
1620 pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1621 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1622 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1623
1624 for (unsigned r = 0; r < num_rects; ++r) {
1625 unsigned push_constants[4] = {
1626 rects[r].dst_x,
1627 rects[r].dst_y,
1628 dst->layer,
1629 src->pitch,
1630 };
1631 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1632 device->meta_state.btoi.img_p_layout,
1633 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1634 push_constants);
1635
1636 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1637 }
1638 }
1639
1640 static void
1641 itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1642 struct radv_image_view *src,
1643 struct radv_image_view *dst)
1644 {
1645 struct radv_device *device = cmd_buffer->device;
1646
1647 radv_meta_push_descriptor_set(cmd_buffer,
1648 VK_PIPELINE_BIND_POINT_COMPUTE,
1649 device->meta_state.itoi.img_p_layout,
1650 0, /* set */
1651 2, /* descriptorWriteCount */
1652 (VkWriteDescriptorSet[]) {
1653 {
1654 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1655 .dstBinding = 0,
1656 .dstArrayElement = 0,
1657 .descriptorCount = 1,
1658 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1659 .pImageInfo = (VkDescriptorImageInfo[]) {
1660 {
1661 .sampler = VK_NULL_HANDLE,
1662 .imageView = radv_image_view_to_handle(src),
1663 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1664 },
1665 }
1666 },
1667 {
1668 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1669 .dstBinding = 1,
1670 .dstArrayElement = 0,
1671 .descriptorCount = 1,
1672 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1673 .pImageInfo = (VkDescriptorImageInfo[]) {
1674 {
1675 .sampler = VK_NULL_HANDLE,
1676 .imageView = radv_image_view_to_handle(dst),
1677 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1678 },
1679 }
1680 }
1681 });
1682 }
1683
1684 void
1685 radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1686 struct radv_meta_blit2d_surf *src,
1687 struct radv_meta_blit2d_surf *dst,
1688 unsigned num_rects,
1689 struct radv_meta_blit2d_rect *rects)
1690 {
1691 VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline;
1692 struct radv_device *device = cmd_buffer->device;
1693 struct radv_image_view src_view, dst_view;
1694
1695 create_iview(cmd_buffer, src, &src_view);
1696 create_iview(cmd_buffer, dst, &dst_view);
1697
1698 itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1699
1700 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1701 src->image->type == VK_IMAGE_TYPE_3D)
1702 pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
1703 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1704 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1705
1706 for (unsigned r = 0; r < num_rects; ++r) {
1707 unsigned push_constants[6] = {
1708 rects[r].src_x,
1709 rects[r].src_y,
1710 src->layer,
1711 rects[r].dst_x,
1712 rects[r].dst_y,
1713 dst->layer,
1714 };
1715 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1716 device->meta_state.itoi.img_p_layout,
1717 VK_SHADER_STAGE_COMPUTE_BIT, 0, 24,
1718 push_constants);
1719
1720 radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1721 }
1722 }
1723
1724 static void
1725 cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1726 struct radv_buffer_view *view)
1727 {
1728 struct radv_device *device = cmd_buffer->device;
1729
1730 radv_meta_push_descriptor_set(cmd_buffer,
1731 VK_PIPELINE_BIND_POINT_COMPUTE,
1732 device->meta_state.cleari_r32g32b32.img_p_layout,
1733 0, /* set */
1734 1, /* descriptorWriteCount */
1735 (VkWriteDescriptorSet[]) {
1736 {
1737 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1738 .dstBinding = 0,
1739 .dstArrayElement = 0,
1740 .descriptorCount = 1,
1741 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1742 .pTexelBufferView = (VkBufferView[]) { radv_buffer_view_to_handle(view) },
1743 }
1744 });
1745 }
1746
1747 static void
1748 radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1749 struct radv_meta_blit2d_surf *dst,
1750 const VkClearColorValue *clear_color)
1751 {
1752 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
1753 struct radv_device_memory mem = { .bo = dst->image->bo };
1754 struct radv_device *device = cmd_buffer->device;
1755 struct radv_buffer_view dst_view;
1756 unsigned stride;
1757 VkFormat format;
1758 VkBuffer buffer;
1759
1760 switch (dst->format) {
1761 case VK_FORMAT_R32G32B32_UINT:
1762 format = VK_FORMAT_R32_UINT;
1763 break;
1764 case VK_FORMAT_R32G32B32_SINT:
1765 format = VK_FORMAT_R32_SINT;
1766 break;
1767 case VK_FORMAT_R32G32B32_SFLOAT:
1768 format = VK_FORMAT_R32_SFLOAT;
1769 break;
1770 default:
1771 unreachable("invalid R32G32B32 format");
1772 }
1773
1774 /* This special clear path for R32G32B32 formats will write the linear
1775 * image as a buffer with the same underlying memory. The compute
1776 * shader will clear all components separately using a R32 format.
1777 */
1778 radv_CreateBuffer(radv_device_to_handle(device),
1779 &(VkBufferCreateInfo) {
1780 .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1781 .flags = 0,
1782 .size = dst->image->size,
1783 .usage = VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT,
1784 .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1785 }, NULL, &buffer);
1786
1787 radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1788 (VkBindBufferMemoryInfoKHR[]) {
1789 {
1790 .sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1791 .buffer = buffer,
1792 .memory = radv_device_memory_to_handle(&mem),
1793 .memoryOffset = dst->image->offset,
1794 }
1795 });
1796
1797 create_bview(cmd_buffer, radv_buffer_from_handle(buffer), 0, format, &dst_view);
1798 cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
1799
1800 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1801 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1802
1803 if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1804 stride = dst->image->surface.u.gfx9.surf_pitch;
1805 } else {
1806 stride = dst->image->surface.u.legacy.level[0].nblk_x * 3;
1807 }
1808
1809 unsigned push_constants[4] = {
1810 clear_color->uint32[0],
1811 clear_color->uint32[1],
1812 clear_color->uint32[2],
1813 stride,
1814 };
1815
1816 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1817 device->meta_state.cleari_r32g32b32.img_p_layout,
1818 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
1819 push_constants);
1820
1821 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width,
1822 dst->image->info.height, 1);
1823
1824 radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1825 }
1826
1827 static void
1828 cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer,
1829 struct radv_image_view *dst_iview)
1830 {
1831 struct radv_device *device = cmd_buffer->device;
1832
1833 radv_meta_push_descriptor_set(cmd_buffer,
1834 VK_PIPELINE_BIND_POINT_COMPUTE,
1835 device->meta_state.cleari.img_p_layout,
1836 0, /* set */
1837 1, /* descriptorWriteCount */
1838 (VkWriteDescriptorSet[]) {
1839 {
1840 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1841 .dstBinding = 0,
1842 .dstArrayElement = 0,
1843 .descriptorCount = 1,
1844 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1845 .pImageInfo = (VkDescriptorImageInfo[]) {
1846 {
1847 .sampler = VK_NULL_HANDLE,
1848 .imageView = radv_image_view_to_handle(dst_iview),
1849 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1850 },
1851 }
1852 },
1853 });
1854 }
1855
1856 void
1857 radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer,
1858 struct radv_meta_blit2d_surf *dst,
1859 const VkClearColorValue *clear_color)
1860 {
1861 VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline;
1862 struct radv_device *device = cmd_buffer->device;
1863 struct radv_image_view dst_iview;
1864
1865 if (dst->format == VK_FORMAT_R32G32B32_UINT ||
1866 dst->format == VK_FORMAT_R32G32B32_SINT ||
1867 dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
1868 radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
1869 return;
1870 }
1871
1872 create_iview(cmd_buffer, dst, &dst_iview);
1873 cleari_bind_descriptors(cmd_buffer, &dst_iview);
1874
1875 if (device->physical_device->rad_info.chip_class >= GFX9 &&
1876 dst->image->type == VK_IMAGE_TYPE_3D)
1877 pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
1878
1879 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
1880 VK_PIPELINE_BIND_POINT_COMPUTE, pipeline);
1881
1882 unsigned push_constants[5] = {
1883 clear_color->uint32[0],
1884 clear_color->uint32[1],
1885 clear_color->uint32[2],
1886 clear_color->uint32[3],
1887 dst->layer,
1888 };
1889
1890 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1891 device->meta_state.cleari.img_p_layout,
1892 VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
1893 push_constants);
1894
1895 radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
1896 }