radv add radv_get_resolve_pipeline() in the compute path
[mesa.git] / src / amd / vulkan / radv_meta_resolve_cs.c
1 /*
2 * Copyright © 2016 Dave Airlie
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24
25 #include <assert.h>
26 #include <stdbool.h>
27
28 #include "radv_meta.h"
29 #include "radv_private.h"
30 #include "nir/nir_builder.h"
31 #include "sid.h"
32 #include "vk_format.h"
33
34 static nir_ssa_def *radv_meta_build_resolve_srgb_conversion(nir_builder *b,
35 nir_ssa_def *input)
36 {
37 unsigned i;
38
39 nir_ssa_def *cmp[3];
40 for (i = 0; i < 3; i++)
41 cmp[i] = nir_flt(b, nir_channel(b, input, i),
42 nir_imm_int(b, 0x3b4d2e1c));
43
44 nir_ssa_def *ltvals[3];
45 for (i = 0; i < 3; i++)
46 ltvals[i] = nir_fmul(b, nir_channel(b, input, i),
47 nir_imm_float(b, 12.92));
48
49 nir_ssa_def *gtvals[3];
50
51 for (i = 0; i < 3; i++) {
52 gtvals[i] = nir_fpow(b, nir_channel(b, input, i),
53 nir_imm_float(b, 1.0/2.4));
54 gtvals[i] = nir_fmul(b, gtvals[i],
55 nir_imm_float(b, 1.055));
56 gtvals[i] = nir_fsub(b, gtvals[i],
57 nir_imm_float(b, 0.055));
58 }
59
60 nir_ssa_def *comp[4];
61 for (i = 0; i < 3; i++)
62 comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
63 comp[3] = nir_channels(b, input, 1 << 3);
64 return nir_vec(b, comp, 4);
65 }
66
67 static nir_shader *
68 build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
69 {
70 nir_builder b;
71 char name[64];
72 const struct glsl_type *sampler_type = glsl_sampler_type(GLSL_SAMPLER_DIM_MS,
73 false,
74 false,
75 GLSL_TYPE_FLOAT);
76 const struct glsl_type *img_type = glsl_sampler_type(GLSL_SAMPLER_DIM_2D,
77 false,
78 false,
79 GLSL_TYPE_FLOAT);
80 snprintf(name, 64, "meta_resolve_cs-%d-%s", samples, is_integer ? "int" : (is_srgb ? "srgb" : "float"));
81 nir_builder_init_simple_shader(&b, NULL, MESA_SHADER_COMPUTE, NULL);
82 b.shader->info.name = ralloc_strdup(b.shader, name);
83 b.shader->info.cs.local_size[0] = 16;
84 b.shader->info.cs.local_size[1] = 16;
85 b.shader->info.cs.local_size[2] = 1;
86
87 nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform,
88 sampler_type, "s_tex");
89 input_img->data.descriptor_set = 0;
90 input_img->data.binding = 0;
91
92 nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform,
93 img_type, "out_img");
94 output_img->data.descriptor_set = 0;
95 output_img->data.binding = 1;
96 nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
97 nir_ssa_def *wg_id = nir_load_work_group_id(&b);
98 nir_ssa_def *block_size = nir_imm_ivec4(&b,
99 b.shader->info.cs.local_size[0],
100 b.shader->info.cs.local_size[1],
101 b.shader->info.cs.local_size[2], 0);
102
103 nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
104
105 nir_intrinsic_instr *src_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
106 nir_intrinsic_set_base(src_offset, 0);
107 nir_intrinsic_set_range(src_offset, 16);
108 src_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
109 src_offset->num_components = 2;
110 nir_ssa_dest_init(&src_offset->instr, &src_offset->dest, 2, 32, "src_offset");
111 nir_builder_instr_insert(&b, &src_offset->instr);
112
113 nir_intrinsic_instr *dst_offset = nir_intrinsic_instr_create(b.shader, nir_intrinsic_load_push_constant);
114 nir_intrinsic_set_base(dst_offset, 0);
115 nir_intrinsic_set_range(dst_offset, 16);
116 dst_offset->src[0] = nir_src_for_ssa(nir_imm_int(&b, 8));
117 dst_offset->num_components = 2;
118 nir_ssa_dest_init(&dst_offset->instr, &dst_offset->dest, 2, 32, "dst_offset");
119 nir_builder_instr_insert(&b, &dst_offset->instr);
120
121 nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, &src_offset->dest.ssa), 0x3);
122 nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
123
124 radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img,
125 color, img_coord);
126
127 nir_ssa_def *outval = nir_load_var(&b, color);
128 if (is_srgb)
129 outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
130
131 nir_ssa_def *coord = nir_iadd(&b, global_id, &dst_offset->dest.ssa);
132 nir_intrinsic_instr *store = nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);
133 store->num_components = 4;
134 store->src[0] = nir_src_for_ssa(&nir_build_deref_var(&b, output_img)->dest.ssa);
135 store->src[1] = nir_src_for_ssa(coord);
136 store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));
137 store->src[3] = nir_src_for_ssa(outval);
138 nir_builder_instr_insert(&b, &store->instr);
139 return b.shader;
140 }
141
142
143 static VkResult
144 create_layout(struct radv_device *device)
145 {
146 VkResult result;
147 /*
148 * two descriptors one for the image being sampled
149 * one for the buffer being written.
150 */
151 VkDescriptorSetLayoutCreateInfo ds_create_info = {
152 .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
153 .flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
154 .bindingCount = 2,
155 .pBindings = (VkDescriptorSetLayoutBinding[]) {
156 {
157 .binding = 0,
158 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
159 .descriptorCount = 1,
160 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
161 .pImmutableSamplers = NULL
162 },
163 {
164 .binding = 1,
165 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
166 .descriptorCount = 1,
167 .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
168 .pImmutableSamplers = NULL
169 },
170 }
171 };
172
173 result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device),
174 &ds_create_info,
175 &device->meta_state.alloc,
176 &device->meta_state.resolve_compute.ds_layout);
177 if (result != VK_SUCCESS)
178 goto fail;
179
180
181 VkPipelineLayoutCreateInfo pl_create_info = {
182 .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
183 .setLayoutCount = 1,
184 .pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
185 .pushConstantRangeCount = 1,
186 .pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
187 };
188
189 result = radv_CreatePipelineLayout(radv_device_to_handle(device),
190 &pl_create_info,
191 &device->meta_state.alloc,
192 &device->meta_state.resolve_compute.p_layout);
193 if (result != VK_SUCCESS)
194 goto fail;
195 return VK_SUCCESS;
196 fail:
197 return result;
198 }
199
200 static VkResult
201 create_resolve_pipeline(struct radv_device *device,
202 int samples,
203 bool is_integer,
204 bool is_srgb,
205 VkPipeline *pipeline)
206 {
207 VkResult result;
208 struct radv_shader_module cs = { .nir = NULL };
209
210 mtx_lock(&device->meta_state.mtx);
211 if (*pipeline) {
212 mtx_unlock(&device->meta_state.mtx);
213 return VK_SUCCESS;
214 }
215
216 cs.nir = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
217
218 /* compute shader */
219
220 VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
221 .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
222 .stage = VK_SHADER_STAGE_COMPUTE_BIT,
223 .module = radv_shader_module_to_handle(&cs),
224 .pName = "main",
225 .pSpecializationInfo = NULL,
226 };
227
228 VkComputePipelineCreateInfo vk_pipeline_info = {
229 .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
230 .stage = pipeline_shader_stage,
231 .flags = 0,
232 .layout = device->meta_state.resolve_compute.p_layout,
233 };
234
235 result = radv_CreateComputePipelines(radv_device_to_handle(device),
236 radv_pipeline_cache_to_handle(&device->meta_state.cache),
237 1, &vk_pipeline_info, NULL,
238 pipeline);
239 if (result != VK_SUCCESS)
240 goto fail;
241
242 ralloc_free(cs.nir);
243 mtx_unlock(&device->meta_state.mtx);
244 return VK_SUCCESS;
245 fail:
246 ralloc_free(cs.nir);
247 mtx_unlock(&device->meta_state.mtx);
248 return result;
249 }
250
251 VkResult
252 radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
253 {
254 struct radv_meta_state *state = &device->meta_state;
255 VkResult res;
256
257 res = create_layout(device);
258 if (res != VK_SUCCESS)
259 goto fail;
260
261 if (on_demand)
262 return VK_SUCCESS;
263
264 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
265 uint32_t samples = 1 << i;
266
267 res = create_resolve_pipeline(device, samples, false, false,
268 &state->resolve_compute.rc[i].pipeline);
269 if (res != VK_SUCCESS)
270 goto fail;
271
272 res = create_resolve_pipeline(device, samples, true, false,
273 &state->resolve_compute.rc[i].i_pipeline);
274 if (res != VK_SUCCESS)
275 goto fail;
276
277 res = create_resolve_pipeline(device, samples, false, true,
278 &state->resolve_compute.rc[i].srgb_pipeline);
279 if (res != VK_SUCCESS)
280 goto fail;
281
282 }
283
284 return VK_SUCCESS;
285 fail:
286 radv_device_finish_meta_resolve_compute_state(device);
287 return res;
288 }
289
290 void
291 radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
292 {
293 struct radv_meta_state *state = &device->meta_state;
294 for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
295 radv_DestroyPipeline(radv_device_to_handle(device),
296 state->resolve_compute.rc[i].pipeline,
297 &state->alloc);
298
299 radv_DestroyPipeline(radv_device_to_handle(device),
300 state->resolve_compute.rc[i].i_pipeline,
301 &state->alloc);
302
303 radv_DestroyPipeline(radv_device_to_handle(device),
304 state->resolve_compute.rc[i].srgb_pipeline,
305 &state->alloc);
306 }
307
308 radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
309 state->resolve_compute.ds_layout,
310 &state->alloc);
311 radv_DestroyPipelineLayout(radv_device_to_handle(device),
312 state->resolve_compute.p_layout,
313 &state->alloc);
314 }
315
316 static VkPipeline *
317 radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer,
318 struct radv_image *src_image)
319 {
320 struct radv_device *device = cmd_buffer->device;
321 struct radv_meta_state *state = &device->meta_state;
322 uint32_t samples = src_image->info.samples;
323 uint32_t samples_log2 = ffs(samples) - 1;
324 VkPipeline *pipeline;
325
326 if (vk_format_is_int(src_image->vk_format))
327 pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
328 else if (vk_format_is_srgb(src_image->vk_format))
329 pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
330 else
331 pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
332
333 if (!*pipeline) {
334 VkResult ret;
335
336 ret = create_resolve_pipeline(device, samples,
337 vk_format_is_int(src_image->vk_format),
338 vk_format_is_srgb(src_image->vk_format),
339 pipeline);
340 if (ret != VK_SUCCESS) {
341 cmd_buffer->record_result = ret;
342 return NULL;
343 }
344 }
345
346 return pipeline;
347 }
348
349 static void
350 emit_resolve(struct radv_cmd_buffer *cmd_buffer,
351 struct radv_image_view *src_iview,
352 struct radv_image_view *dest_iview,
353 const VkOffset2D *src_offset,
354 const VkOffset2D *dest_offset,
355 const VkExtent2D *resolve_extent)
356 {
357 struct radv_device *device = cmd_buffer->device;
358 VkPipeline *pipeline;
359
360 radv_meta_push_descriptor_set(cmd_buffer,
361 VK_PIPELINE_BIND_POINT_COMPUTE,
362 device->meta_state.resolve_compute.p_layout,
363 0, /* set */
364 2, /* descriptorWriteCount */
365 (VkWriteDescriptorSet[]) {
366 {
367 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
368 .dstBinding = 0,
369 .dstArrayElement = 0,
370 .descriptorCount = 1,
371 .descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
372 .pImageInfo = (VkDescriptorImageInfo[]) {
373 {
374 .sampler = VK_NULL_HANDLE,
375 .imageView = radv_image_view_to_handle(src_iview),
376 .imageLayout = VK_IMAGE_LAYOUT_GENERAL },
377 }
378 },
379 {
380 .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
381 .dstBinding = 1,
382 .dstArrayElement = 0,
383 .descriptorCount = 1,
384 .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
385 .pImageInfo = (VkDescriptorImageInfo[]) {
386 {
387 .sampler = VK_NULL_HANDLE,
388 .imageView = radv_image_view_to_handle(dest_iview),
389 .imageLayout = VK_IMAGE_LAYOUT_GENERAL,
390 },
391 }
392 }
393 });
394
395 pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview->image);
396
397 radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer),
398 VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
399
400 unsigned push_constants[4] = {
401 src_offset->x,
402 src_offset->y,
403 dest_offset->x,
404 dest_offset->y,
405 };
406 radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
407 device->meta_state.resolve_compute.p_layout,
408 VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,
409 push_constants);
410 radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
411
412 }
413
414 void radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer,
415 struct radv_image *src_image,
416 VkImageLayout src_image_layout,
417 struct radv_image *dest_image,
418 VkImageLayout dest_image_layout,
419 uint32_t region_count,
420 const VkImageResolve *regions)
421 {
422 struct radv_meta_saved_state saved_state;
423
424 radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout,
425 region_count, regions);
426
427 radv_meta_save(&saved_state, cmd_buffer,
428 RADV_META_SAVE_COMPUTE_PIPELINE |
429 RADV_META_SAVE_CONSTANTS |
430 RADV_META_SAVE_DESCRIPTORS);
431
432 for (uint32_t r = 0; r < region_count; ++r) {
433 const VkImageResolve *region = &regions[r];
434
435 assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
436 assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
437 assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
438
439 const uint32_t src_base_layer =
440 radv_meta_get_iview_layer(src_image, &region->srcSubresource,
441 &region->srcOffset);
442
443 const uint32_t dest_base_layer =
444 radv_meta_get_iview_layer(dest_image, &region->dstSubresource,
445 &region->dstOffset);
446
447 const struct VkExtent3D extent =
448 radv_sanitize_image_extent(src_image->type, region->extent);
449 const struct VkOffset3D srcOffset =
450 radv_sanitize_image_offset(src_image->type, region->srcOffset);
451 const struct VkOffset3D dstOffset =
452 radv_sanitize_image_offset(dest_image->type, region->dstOffset);
453
454 for (uint32_t layer = 0; layer < region->srcSubresource.layerCount;
455 ++layer) {
456
457 struct radv_image_view src_iview;
458 radv_image_view_init(&src_iview, cmd_buffer->device,
459 &(VkImageViewCreateInfo) {
460 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
461 .image = radv_image_to_handle(src_image),
462 .viewType = radv_meta_get_view_type(src_image),
463 .format = src_image->vk_format,
464 .subresourceRange = {
465 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
466 .baseMipLevel = region->srcSubresource.mipLevel,
467 .levelCount = 1,
468 .baseArrayLayer = src_base_layer + layer,
469 .layerCount = 1,
470 },
471 });
472
473 struct radv_image_view dest_iview;
474 radv_image_view_init(&dest_iview, cmd_buffer->device,
475 &(VkImageViewCreateInfo) {
476 .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
477 .image = radv_image_to_handle(dest_image),
478 .viewType = radv_meta_get_view_type(dest_image),
479 .format = vk_to_non_srgb_format(dest_image->vk_format),
480 .subresourceRange = {
481 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
482 .baseMipLevel = region->dstSubresource.mipLevel,
483 .levelCount = 1,
484 .baseArrayLayer = dest_base_layer + layer,
485 .layerCount = 1,
486 },
487 });
488
489 emit_resolve(cmd_buffer,
490 &src_iview,
491 &dest_iview,
492 &(VkOffset2D) {srcOffset.x, srcOffset.y },
493 &(VkOffset2D) {dstOffset.x, dstOffset.y },
494 &(VkExtent2D) {extent.width, extent.height });
495 }
496 }
497 radv_meta_restore(&saved_state, cmd_buffer);
498 }
499
500 /**
501 * Emit any needed resolves for the current subpass.
502 */
503 void
504 radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
505 {
506 struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
507 const struct radv_subpass *subpass = cmd_buffer->state.subpass;
508 struct radv_subpass_barrier barrier;
509
510 /* Resolves happen before the end-of-subpass barriers get executed, so
511 * we have to make the attachment shader-readable.
512 */
513 barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
514 barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
515 barrier.dst_access_mask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT;
516 radv_subpass_barrier(cmd_buffer, &barrier);
517
518 for (uint32_t i = 0; i < subpass->color_count; ++i) {
519 struct radv_subpass_attachment src_att = subpass->color_attachments[i];
520 struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
521 struct radv_image_view *src_iview = fb->attachments[src_att.attachment].attachment;
522 struct radv_image_view *dst_iview = fb->attachments[dst_att.attachment].attachment;
523
524 if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
525 continue;
526
527 VkImageResolve region = {
528 .extent = (VkExtent3D){ fb->width, fb->height, 0 },
529 .srcSubresource = (VkImageSubresourceLayers) {
530 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
531 .mipLevel = src_iview->base_mip,
532 .baseArrayLayer = 0,
533 .layerCount = src_iview->image->info.array_size
534 },
535 .dstSubresource = (VkImageSubresourceLayers) {
536 .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
537 .mipLevel = dst_iview->base_mip,
538 .baseArrayLayer = 0,
539 .layerCount = dst_iview->image->info.array_size
540 },
541 .srcOffset = (VkOffset3D){ 0, 0, 0 },
542 .dstOffset = (VkOffset3D){ 0, 0, 0 },
543 };
544
545 radv_meta_resolve_compute_image(cmd_buffer,
546 src_iview->image,
547 src_att.layout,
548 dst_iview->image,
549 dst_att.layout,
550 1, &region);
551 }
552
553 cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |
554 RADV_CMD_FLAG_INV_VMEM_L1;
555 }