vk: Remove begin/end descriptor pool update
[mesa.git] / src / vulkan / device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "private.h"
31
32 static int
33 anv_env_get_int(const char *name)
34 {
35 const char *val = getenv(name);
36
37 if (!val)
38 return 0;
39
40 return strtol(val, NULL, 0);
41 }
42
43 static VkResult
44 fill_physical_device(struct anv_physical_device *device,
45 struct anv_instance *instance,
46 const char *path)
47 {
48 int fd;
49
50 fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC);
51 if (fd < 0)
52 return vk_error(VK_ERROR_UNAVAILABLE);
53
54 device->instance = instance;
55 device->path = path;
56
57 device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE");
58 device->no_hw = false;
59 if (device->chipset_id) {
60 /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */
61 device->no_hw = true;
62 } else {
63 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
64 }
65 if (!device->chipset_id)
66 goto fail;
67
68 device->name = brw_get_device_name(device->chipset_id);
69 device->info = brw_get_device_info(device->chipset_id, -1);
70 if (!device->info)
71 goto fail;
72
73 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT))
74 goto fail;
75
76 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2))
77 goto fail;
78
79 if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC))
80 goto fail;
81
82 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS))
83 goto fail;
84
85 close(fd);
86
87 return VK_SUCCESS;
88
89 fail:
90 close(fd);
91
92 return vk_error(VK_ERROR_UNAVAILABLE);
93 }
94
95 static void *default_alloc(
96 void* pUserData,
97 size_t size,
98 size_t alignment,
99 VkSystemAllocType allocType)
100 {
101 return malloc(size);
102 }
103
104 static void default_free(
105 void* pUserData,
106 void* pMem)
107 {
108 free(pMem);
109 }
110
111 static const VkAllocCallbacks default_alloc_callbacks = {
112 .pUserData = NULL,
113 .pfnAlloc = default_alloc,
114 .pfnFree = default_free
115 };
116
117 VkResult anv_CreateInstance(
118 const VkInstanceCreateInfo* pCreateInfo,
119 VkInstance* pInstance)
120 {
121 struct anv_instance *instance;
122 const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks;
123 void *user_data = NULL;
124 VkResult result;
125
126 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
127
128 if (pCreateInfo->pAllocCb) {
129 alloc_callbacks = pCreateInfo->pAllocCb;
130 user_data = pCreateInfo->pAllocCb->pUserData;
131 }
132 instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8,
133 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
134 if (!instance)
135 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
136
137 instance->pAllocUserData = alloc_callbacks->pUserData;
138 instance->pfnAlloc = alloc_callbacks->pfnAlloc;
139 instance->pfnFree = alloc_callbacks->pfnFree;
140 instance->apiVersion = pCreateInfo->pAppInfo->apiVersion;
141
142 instance->physicalDeviceCount = 0;
143 result = fill_physical_device(&instance->physicalDevice,
144 instance, "/dev/dri/renderD128");
145
146 if (result != VK_SUCCESS)
147 return result;
148
149 instance->physicalDeviceCount++;
150 *pInstance = (VkInstance) instance;
151
152 return VK_SUCCESS;
153 }
154
155 VkResult anv_DestroyInstance(
156 VkInstance _instance)
157 {
158 struct anv_instance *instance = (struct anv_instance *) _instance;
159
160 instance->pfnFree(instance->pAllocUserData, instance);
161
162 return VK_SUCCESS;
163 }
164
165 VkResult anv_EnumeratePhysicalDevices(
166 VkInstance _instance,
167 uint32_t* pPhysicalDeviceCount,
168 VkPhysicalDevice* pPhysicalDevices)
169 {
170 struct anv_instance *instance = (struct anv_instance *) _instance;
171
172 if (*pPhysicalDeviceCount >= 1)
173 pPhysicalDevices[0] = (VkPhysicalDevice) &instance->physicalDevice;
174 *pPhysicalDeviceCount = instance->physicalDeviceCount;
175
176 return VK_SUCCESS;
177 }
178
179 VkResult anv_GetPhysicalDeviceInfo(
180 VkPhysicalDevice physicalDevice,
181 VkPhysicalDeviceInfoType infoType,
182 size_t* pDataSize,
183 void* pData)
184 {
185 struct anv_physical_device *device = (struct anv_physical_device *) physicalDevice;
186 VkPhysicalDeviceProperties *properties;
187 VkPhysicalDevicePerformance *performance;
188 VkPhysicalDeviceQueueProperties *queue_properties;
189 VkPhysicalDeviceMemoryProperties *memory_properties;
190 VkDisplayPropertiesWSI *display_properties;
191 uint64_t ns_per_tick = 80;
192
193 switch ((uint32_t) infoType) {
194 case VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES:
195 properties = pData;
196
197 *pDataSize = sizeof(*properties);
198 if (pData == NULL)
199 return VK_SUCCESS;
200
201 properties->apiVersion = 1;
202 properties->driverVersion = 1;
203 properties->vendorId = 0x8086;
204 properties->deviceId = device->chipset_id;
205 properties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
206 strcpy(properties->deviceName, device->name);
207 properties->maxInlineMemoryUpdateSize = 0;
208 properties->maxBoundDescriptorSets = MAX_SETS;
209 properties->maxThreadGroupSize = 512;
210 properties->timestampFrequency = 1000 * 1000 * 1000 / ns_per_tick;
211 properties->multiColorAttachmentClears = true;
212 properties->maxDescriptorSets = 8;
213 properties->maxViewports = 16;
214 properties->maxColorAttachments = 8;
215 return VK_SUCCESS;
216
217 case VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE:
218 performance = pData;
219
220 *pDataSize = sizeof(*performance);
221 if (pData == NULL)
222 return VK_SUCCESS;
223
224 performance->maxDeviceClock = 1.0;
225 performance->aluPerClock = 1.0;
226 performance->texPerClock = 1.0;
227 performance->primsPerClock = 1.0;
228 performance->pixelsPerClock = 1.0;
229 return VK_SUCCESS;
230
231 case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES:
232 queue_properties = pData;
233
234 *pDataSize = sizeof(*queue_properties);
235 if (pData == NULL)
236 return VK_SUCCESS;
237
238 queue_properties->queueFlags = 0;
239 queue_properties->queueCount = 1;
240 queue_properties->supportsTimestamps = true;
241 return VK_SUCCESS;
242
243 case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES:
244 memory_properties = pData;
245
246 *pDataSize = sizeof(*memory_properties);
247 if (pData == NULL)
248 return VK_SUCCESS;
249
250 memory_properties->supportsMigration = false;
251 memory_properties->supportsPinning = false;
252 return VK_SUCCESS;
253
254 case VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI:
255 anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI");
256
257 *pDataSize = sizeof(*display_properties);
258 if (pData == NULL)
259 return VK_SUCCESS;
260
261 display_properties = pData;
262 display_properties->display = 0;
263 display_properties->physicalResolution = (VkExtent2D) { 0, 0 };
264 return VK_SUCCESS;
265
266 case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI:
267 anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI");
268 return VK_SUCCESS;
269
270
271 default:
272 return VK_UNSUPPORTED;
273 }
274
275 }
276
277 void * vkGetProcAddr(
278 VkPhysicalDevice physicalDevice,
279 const char* pName)
280 {
281 return anv_lookup_entrypoint(pName);
282 }
283
284 static void
285 parse_debug_flags(struct anv_device *device)
286 {
287 const char *debug, *p, *end;
288
289 debug = getenv("INTEL_DEBUG");
290 device->dump_aub = false;
291 if (debug) {
292 for (p = debug; *p; p = end + 1) {
293 end = strchrnul(p, ',');
294 if (end - p == 3 && memcmp(p, "aub", 3) == 0)
295 device->dump_aub = true;
296 if (end - p == 5 && memcmp(p, "no_hw", 5) == 0)
297 device->no_hw = true;
298 if (*end == '\0')
299 break;
300 }
301 }
302 }
303
304 static VkResult
305 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
306 {
307 queue->device = device;
308 queue->pool = &device->surface_state_pool;
309
310 queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
311 if (queue->completed_serial.map == NULL)
312 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
313
314 *(uint32_t *)queue->completed_serial.map = 0;
315 queue->next_serial = 1;
316
317 return VK_SUCCESS;
318 }
319
320 static void
321 anv_queue_finish(struct anv_queue *queue)
322 {
323 #ifdef HAVE_VALGRIND
324 /* This gets torn down with the device so we only need to do this if
325 * valgrind is present.
326 */
327 anv_state_pool_free(queue->pool, queue->completed_serial);
328 #endif
329 }
330
331 static void
332 anv_device_init_border_colors(struct anv_device *device)
333 {
334 float float_border_colors[][4] = {
335 [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1.0, 1.0, 1.0, 1.0 },
336 [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0.0, 0.0, 0.0, 0.0 },
337 [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0.0, 0.0, 0.0, 1.0 }
338 };
339
340 uint32_t uint32_border_colors[][4] = {
341 [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1, 1, 1, 1 },
342 [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0, 0, 0, 0 },
343 [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0, 0, 0, 1 }
344 };
345
346 device->float_border_colors =
347 anv_state_pool_alloc(&device->dynamic_state_pool,
348 sizeof(float_border_colors), 32);
349 memcpy(device->float_border_colors.map,
350 float_border_colors, sizeof(float_border_colors));
351
352 device->uint32_border_colors =
353 anv_state_pool_alloc(&device->dynamic_state_pool,
354 sizeof(uint32_border_colors), 32);
355 memcpy(device->uint32_border_colors.map,
356 uint32_border_colors, sizeof(uint32_border_colors));
357
358 }
359
360 static const uint32_t BATCH_SIZE = 8192;
361
362 VkResult anv_CreateDevice(
363 VkPhysicalDevice _physicalDevice,
364 const VkDeviceCreateInfo* pCreateInfo,
365 VkDevice* pDevice)
366 {
367 struct anv_physical_device *physicalDevice =
368 (struct anv_physical_device *) _physicalDevice;
369 struct anv_instance *instance = physicalDevice->instance;
370 struct anv_device *device;
371
372 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
373
374 device = instance->pfnAlloc(instance->pAllocUserData,
375 sizeof(*device), 8,
376 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
377 if (!device)
378 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
379
380 device->no_hw = physicalDevice->no_hw;
381 parse_debug_flags(device);
382
383 device->instance = physicalDevice->instance;
384 device->fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC);
385 if (device->fd == -1)
386 goto fail_device;
387
388 device->context_id = anv_gem_create_context(device);
389 if (device->context_id == -1)
390 goto fail_fd;
391
392 anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE);
393
394 anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048);
395
396 anv_state_pool_init(&device->dynamic_state_pool,
397 &device->dynamic_state_block_pool);
398
399 anv_block_pool_init(&device->instruction_block_pool, device, 2048);
400 anv_block_pool_init(&device->surface_state_block_pool, device, 2048);
401
402 anv_state_pool_init(&device->surface_state_pool,
403 &device->surface_state_block_pool);
404
405 anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
406
407 device->info = *physicalDevice->info;
408
409 device->compiler = anv_compiler_create(device);
410 device->aub_writer = NULL;
411
412 pthread_mutex_init(&device->mutex, NULL);
413
414 anv_queue_init(device, &device->queue);
415
416 anv_device_init_meta(device);
417
418 anv_device_init_border_colors(device);
419
420 *pDevice = (VkDevice) device;
421
422 return VK_SUCCESS;
423
424 fail_fd:
425 close(device->fd);
426 fail_device:
427 anv_device_free(device, device);
428
429 return vk_error(VK_ERROR_UNAVAILABLE);
430 }
431
432 VkResult anv_DestroyDevice(
433 VkDevice _device)
434 {
435 struct anv_device *device = (struct anv_device *) _device;
436
437 anv_compiler_destroy(device->compiler);
438
439 anv_queue_finish(&device->queue);
440
441 anv_device_finish_meta(device);
442
443 #ifdef HAVE_VALGRIND
444 /* We only need to free these to prevent valgrind errors. The backing
445 * BO will go away in a couple of lines so we don't actually leak.
446 */
447 anv_state_pool_free(&device->dynamic_state_pool,
448 device->float_border_colors);
449 anv_state_pool_free(&device->dynamic_state_pool,
450 device->uint32_border_colors);
451 #endif
452
453 anv_bo_pool_finish(&device->batch_bo_pool);
454 anv_block_pool_finish(&device->dynamic_state_block_pool);
455 anv_block_pool_finish(&device->instruction_block_pool);
456 anv_block_pool_finish(&device->surface_state_block_pool);
457
458 close(device->fd);
459
460 if (device->aub_writer)
461 anv_aub_writer_destroy(device->aub_writer);
462
463 anv_device_free(device, device);
464
465 return VK_SUCCESS;
466 }
467
468 VkResult anv_GetGlobalExtensionInfo(
469 VkExtensionInfoType infoType,
470 uint32_t extensionIndex,
471 size_t* pDataSize,
472 void* pData)
473 {
474 static const VkExtensionProperties extensions[] = {
475 {
476 .extName = "VK_WSI_LunarG",
477 .version = 3
478 }
479 };
480 uint32_t count = ARRAY_SIZE(extensions);
481
482 switch (infoType) {
483 case VK_EXTENSION_INFO_TYPE_COUNT:
484 memcpy(pData, &count, sizeof(count));
485 *pDataSize = sizeof(count);
486 return VK_SUCCESS;
487
488 case VK_EXTENSION_INFO_TYPE_PROPERTIES:
489 if (extensionIndex >= count)
490 return vk_error(VK_ERROR_INVALID_EXTENSION);
491
492 memcpy(pData, &extensions[extensionIndex], sizeof(extensions[0]));
493 *pDataSize = sizeof(extensions[0]);
494 return VK_SUCCESS;
495
496 default:
497 return VK_UNSUPPORTED;
498 }
499 }
500
501 VkResult anv_GetPhysicalDeviceExtensionInfo(
502 VkPhysicalDevice physicalDevice,
503 VkExtensionInfoType infoType,
504 uint32_t extensionIndex,
505 size_t* pDataSize,
506 void* pData)
507 {
508 uint32_t *count;
509
510 switch (infoType) {
511 case VK_EXTENSION_INFO_TYPE_COUNT:
512 *pDataSize = 4;
513 if (pData == NULL)
514 return VK_SUCCESS;
515
516 count = pData;
517 *count = 0;
518 return VK_SUCCESS;
519
520 case VK_EXTENSION_INFO_TYPE_PROPERTIES:
521 return vk_error(VK_ERROR_INVALID_EXTENSION);
522
523 default:
524 return VK_UNSUPPORTED;
525 }
526 }
527
528 VkResult anv_EnumerateLayers(
529 VkPhysicalDevice physicalDevice,
530 size_t maxStringSize,
531 size_t* pLayerCount,
532 char* const* pOutLayers,
533 void* pReserved)
534 {
535 *pLayerCount = 0;
536
537 return VK_SUCCESS;
538 }
539
540 VkResult anv_GetDeviceQueue(
541 VkDevice _device,
542 uint32_t queueNodeIndex,
543 uint32_t queueIndex,
544 VkQueue* pQueue)
545 {
546 struct anv_device *device = (struct anv_device *) _device;
547
548 assert(queueIndex == 0);
549
550 *pQueue = (VkQueue) &device->queue;
551
552 return VK_SUCCESS;
553 }
554
555 VkResult
556 anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device)
557 {
558 list->num_relocs = 0;
559 list->array_length = 256;
560 list->relocs =
561 anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8,
562 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
563
564 if (list->relocs == NULL)
565 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
566
567 list->reloc_bos =
568 anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8,
569 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
570
571 if (list->relocs == NULL) {
572 anv_device_free(device, list->relocs);
573 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
574 }
575
576 return VK_SUCCESS;
577 }
578
579 void
580 anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device)
581 {
582 anv_device_free(device, list->relocs);
583 anv_device_free(device, list->reloc_bos);
584 }
585
586 static VkResult
587 anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device,
588 size_t num_additional_relocs)
589 {
590 if (list->num_relocs + num_additional_relocs <= list->array_length)
591 return VK_SUCCESS;
592
593 size_t new_length = list->array_length * 2;
594 while (new_length < list->num_relocs + num_additional_relocs)
595 new_length *= 2;
596
597 struct drm_i915_gem_relocation_entry *new_relocs =
598 anv_device_alloc(device, new_length * sizeof(*list->relocs), 8,
599 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
600 if (new_relocs == NULL)
601 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
602
603 struct anv_bo **new_reloc_bos =
604 anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8,
605 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
606 if (new_relocs == NULL) {
607 anv_device_free(device, new_relocs);
608 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
609 }
610
611 memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
612 memcpy(new_reloc_bos, list->reloc_bos,
613 list->num_relocs * sizeof(*list->reloc_bos));
614
615 anv_device_free(device, list->relocs);
616 anv_device_free(device, list->reloc_bos);
617
618 list->relocs = new_relocs;
619 list->reloc_bos = new_reloc_bos;
620
621 return VK_SUCCESS;
622 }
623
624 static VkResult
625 anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out)
626 {
627 VkResult result;
628
629 struct anv_batch_bo *bbo =
630 anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
631 if (bbo == NULL)
632 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
633
634 bbo->num_relocs = 0;
635 bbo->prev_batch_bo = NULL;
636
637 result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
638 if (result != VK_SUCCESS) {
639 anv_device_free(device, bbo);
640 return result;
641 }
642
643 *bbo_out = bbo;
644
645 return VK_SUCCESS;
646 }
647
648 static void
649 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
650 size_t batch_padding)
651 {
652 batch->next = batch->start = bbo->bo.map;
653 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
654 bbo->first_reloc = batch->relocs.num_relocs;
655 }
656
657 static void
658 anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
659 {
660 assert(batch->start == bbo->bo.map);
661 bbo->length = batch->next - batch->start;
662 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
663 bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc;
664 }
665
666 static void
667 anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device)
668 {
669 anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
670 anv_device_free(device, bbo);
671 }
672
673 void *
674 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
675 {
676 if (batch->next + num_dwords * 4 > batch->end)
677 batch->extend_cb(batch, batch->user_data);
678
679 void *p = batch->next;
680
681 batch->next += num_dwords * 4;
682 assert(batch->next <= batch->end);
683
684 return p;
685 }
686
687 static void
688 anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device,
689 struct anv_reloc_list *other, uint32_t offset)
690 {
691 anv_reloc_list_grow(list, device, other->num_relocs);
692 /* TODO: Handle failure */
693
694 memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
695 other->num_relocs * sizeof(other->relocs[0]));
696 memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
697 other->num_relocs * sizeof(other->reloc_bos[0]));
698
699 for (uint32_t i = 0; i < other->num_relocs; i++)
700 list->relocs[i + list->num_relocs].offset += offset;
701
702 list->num_relocs += other->num_relocs;
703 }
704
705 static uint64_t
706 anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device,
707 uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
708 {
709 struct drm_i915_gem_relocation_entry *entry;
710 int index;
711
712 anv_reloc_list_grow(list, device, 1);
713 /* TODO: Handle failure */
714
715 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
716 index = list->num_relocs++;
717 list->reloc_bos[index] = target_bo;
718 entry = &list->relocs[index];
719 entry->target_handle = target_bo->gem_handle;
720 entry->delta = delta;
721 entry->offset = offset;
722 entry->presumed_offset = target_bo->offset;
723 entry->read_domains = 0;
724 entry->write_domain = 0;
725
726 return target_bo->offset + delta;
727 }
728
729 void
730 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
731 {
732 uint32_t size, offset;
733
734 size = other->next - other->start;
735 assert(size % 4 == 0);
736
737 if (batch->next + size > batch->end)
738 batch->extend_cb(batch, batch->user_data);
739
740 assert(batch->next + size <= batch->end);
741
742 memcpy(batch->next, other->start, size);
743
744 offset = batch->next - batch->start;
745 anv_reloc_list_append(&batch->relocs, batch->device,
746 &other->relocs, offset);
747
748 batch->next += size;
749 }
750
751 uint64_t
752 anv_batch_emit_reloc(struct anv_batch *batch,
753 void *location, struct anv_bo *bo, uint32_t delta)
754 {
755 return anv_reloc_list_add(&batch->relocs, batch->device,
756 location - batch->start, bo, delta);
757 }
758
759 VkResult anv_QueueSubmit(
760 VkQueue _queue,
761 uint32_t cmdBufferCount,
762 const VkCmdBuffer* pCmdBuffers,
763 VkFence _fence)
764 {
765 struct anv_queue *queue = (struct anv_queue *) _queue;
766 struct anv_device *device = queue->device;
767 struct anv_fence *fence = (struct anv_fence *) _fence;
768 int ret;
769
770 for (uint32_t i = 0; i < cmdBufferCount; i++) {
771 struct anv_cmd_buffer *cmd_buffer =
772 (struct anv_cmd_buffer *) pCmdBuffers[i];
773
774 if (device->dump_aub)
775 anv_cmd_buffer_dump(cmd_buffer);
776
777 if (!device->no_hw) {
778 ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf);
779 if (ret != 0)
780 return vk_error(VK_ERROR_UNKNOWN);
781
782 if (fence) {
783 ret = anv_gem_execbuffer(device, &fence->execbuf);
784 if (ret != 0)
785 return vk_error(VK_ERROR_UNKNOWN);
786 }
787
788 for (uint32_t i = 0; i < cmd_buffer->bo_count; i++)
789 cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset;
790 } else {
791 *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial;
792 }
793 }
794
795 return VK_SUCCESS;
796 }
797
798 VkResult anv_QueueWaitIdle(
799 VkQueue _queue)
800 {
801 struct anv_queue *queue = (struct anv_queue *) _queue;
802
803 return vkDeviceWaitIdle((VkDevice) queue->device);
804 }
805
806 VkResult anv_DeviceWaitIdle(
807 VkDevice _device)
808 {
809 struct anv_device *device = (struct anv_device *) _device;
810 struct anv_state state;
811 struct anv_batch batch;
812 struct drm_i915_gem_execbuffer2 execbuf;
813 struct drm_i915_gem_exec_object2 exec2_objects[1];
814 struct anv_bo *bo = NULL;
815 VkResult result;
816 int64_t timeout;
817 int ret;
818
819 state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32);
820 bo = &device->dynamic_state_pool.block_pool->bo;
821 batch.start = batch.next = state.map;
822 batch.end = state.map + 32;
823 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
824 anv_batch_emit(&batch, GEN8_MI_NOOP);
825
826 exec2_objects[0].handle = bo->gem_handle;
827 exec2_objects[0].relocation_count = 0;
828 exec2_objects[0].relocs_ptr = 0;
829 exec2_objects[0].alignment = 0;
830 exec2_objects[0].offset = bo->offset;
831 exec2_objects[0].flags = 0;
832 exec2_objects[0].rsvd1 = 0;
833 exec2_objects[0].rsvd2 = 0;
834
835 execbuf.buffers_ptr = (uintptr_t) exec2_objects;
836 execbuf.buffer_count = 1;
837 execbuf.batch_start_offset = state.offset;
838 execbuf.batch_len = batch.next - state.map;
839 execbuf.cliprects_ptr = 0;
840 execbuf.num_cliprects = 0;
841 execbuf.DR1 = 0;
842 execbuf.DR4 = 0;
843
844 execbuf.flags =
845 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
846 execbuf.rsvd1 = device->context_id;
847 execbuf.rsvd2 = 0;
848
849 if (!device->no_hw) {
850 ret = anv_gem_execbuffer(device, &execbuf);
851 if (ret != 0) {
852 result = vk_error(VK_ERROR_UNKNOWN);
853 goto fail;
854 }
855
856 timeout = INT64_MAX;
857 ret = anv_gem_wait(device, bo->gem_handle, &timeout);
858 if (ret != 0) {
859 result = vk_error(VK_ERROR_UNKNOWN);
860 goto fail;
861 }
862 }
863
864 anv_state_pool_free(&device->dynamic_state_pool, state);
865
866 return VK_SUCCESS;
867
868 fail:
869 anv_state_pool_free(&device->dynamic_state_pool, state);
870
871 return result;
872 }
873
874 void *
875 anv_device_alloc(struct anv_device * device,
876 size_t size,
877 size_t alignment,
878 VkSystemAllocType allocType)
879 {
880 return device->instance->pfnAlloc(device->instance->pAllocUserData,
881 size,
882 alignment,
883 allocType);
884 }
885
886 void
887 anv_device_free(struct anv_device * device,
888 void * mem)
889 {
890 return device->instance->pfnFree(device->instance->pAllocUserData,
891 mem);
892 }
893
894 VkResult
895 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
896 {
897 bo->gem_handle = anv_gem_create(device, size);
898 if (!bo->gem_handle)
899 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
900
901 bo->map = NULL;
902 bo->index = 0;
903 bo->offset = 0;
904 bo->size = size;
905
906 return VK_SUCCESS;
907 }
908
909 VkResult anv_AllocMemory(
910 VkDevice _device,
911 const VkMemoryAllocInfo* pAllocInfo,
912 VkDeviceMemory* pMem)
913 {
914 struct anv_device *device = (struct anv_device *) _device;
915 struct anv_device_memory *mem;
916 VkResult result;
917
918 assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO);
919
920 mem = anv_device_alloc(device, sizeof(*mem), 8,
921 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
922 if (mem == NULL)
923 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
924
925 result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize);
926 if (result != VK_SUCCESS)
927 goto fail;
928
929 *pMem = (VkDeviceMemory) mem;
930
931 return VK_SUCCESS;
932
933 fail:
934 anv_device_free(device, mem);
935
936 return result;
937 }
938
939 VkResult anv_FreeMemory(
940 VkDevice _device,
941 VkDeviceMemory _mem)
942 {
943 struct anv_device *device = (struct anv_device *) _device;
944 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
945
946 if (mem->bo.map)
947 anv_gem_munmap(mem->bo.map, mem->bo.size);
948
949 if (mem->bo.gem_handle != 0)
950 anv_gem_close(device, mem->bo.gem_handle);
951
952 anv_device_free(device, mem);
953
954 return VK_SUCCESS;
955 }
956
957 VkResult anv_MapMemory(
958 VkDevice _device,
959 VkDeviceMemory _mem,
960 VkDeviceSize offset,
961 VkDeviceSize size,
962 VkMemoryMapFlags flags,
963 void** ppData)
964 {
965 struct anv_device *device = (struct anv_device *) _device;
966 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
967
968 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
969 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
970 * at a time is valid. We could just mmap up front and return an offset
971 * pointer here, but that may exhaust virtual memory on 32 bit
972 * userspace. */
973
974 mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size);
975 mem->map_size = size;
976
977 *ppData = mem->map;
978
979 return VK_SUCCESS;
980 }
981
982 VkResult anv_UnmapMemory(
983 VkDevice _device,
984 VkDeviceMemory _mem)
985 {
986 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
987
988 anv_gem_munmap(mem->map, mem->map_size);
989
990 return VK_SUCCESS;
991 }
992
993 VkResult anv_FlushMappedMemory(
994 VkDevice device,
995 VkDeviceMemory mem,
996 VkDeviceSize offset,
997 VkDeviceSize size)
998 {
999 /* clflush here for !llc platforms */
1000
1001 return VK_SUCCESS;
1002 }
1003
1004 VkResult anv_PinSystemMemory(
1005 VkDevice device,
1006 const void* pSysMem,
1007 size_t memSize,
1008 VkDeviceMemory* pMem)
1009 {
1010 return VK_SUCCESS;
1011 }
1012
1013 VkResult anv_GetMultiDeviceCompatibility(
1014 VkPhysicalDevice physicalDevice0,
1015 VkPhysicalDevice physicalDevice1,
1016 VkPhysicalDeviceCompatibilityInfo* pInfo)
1017 {
1018 return VK_UNSUPPORTED;
1019 }
1020
1021 VkResult anv_OpenSharedMemory(
1022 VkDevice device,
1023 const VkMemoryOpenInfo* pOpenInfo,
1024 VkDeviceMemory* pMem)
1025 {
1026 return VK_UNSUPPORTED;
1027 }
1028
1029 VkResult anv_OpenSharedSemaphore(
1030 VkDevice device,
1031 const VkSemaphoreOpenInfo* pOpenInfo,
1032 VkSemaphore* pSemaphore)
1033 {
1034 return VK_UNSUPPORTED;
1035 }
1036
1037 VkResult anv_OpenPeerMemory(
1038 VkDevice device,
1039 const VkPeerMemoryOpenInfo* pOpenInfo,
1040 VkDeviceMemory* pMem)
1041 {
1042 return VK_UNSUPPORTED;
1043 }
1044
1045 VkResult anv_OpenPeerImage(
1046 VkDevice device,
1047 const VkPeerImageOpenInfo* pOpenInfo,
1048 VkImage* pImage,
1049 VkDeviceMemory* pMem)
1050 {
1051 return VK_UNSUPPORTED;
1052 }
1053
1054 VkResult anv_DestroyObject(
1055 VkDevice _device,
1056 VkObjectType objType,
1057 VkObject _object)
1058 {
1059 struct anv_device *device = (struct anv_device *) _device;
1060 struct anv_object *object = (struct anv_object *) _object;
1061
1062 switch (objType) {
1063 case VK_OBJECT_TYPE_INSTANCE:
1064 return anv_DestroyInstance((VkInstance) _object);
1065
1066 case VK_OBJECT_TYPE_PHYSICAL_DEVICE:
1067 /* We don't want to actually destroy physical devices */
1068 return VK_SUCCESS;
1069
1070 case VK_OBJECT_TYPE_DEVICE:
1071 assert(_device == (VkDevice) _object);
1072 return anv_DestroyDevice((VkDevice) _object);
1073
1074 case VK_OBJECT_TYPE_QUEUE:
1075 /* TODO */
1076 return VK_SUCCESS;
1077
1078 case VK_OBJECT_TYPE_DEVICE_MEMORY:
1079 return anv_FreeMemory(_device, (VkDeviceMemory) _object);
1080
1081 case VK_OBJECT_TYPE_DESCRIPTOR_POOL:
1082 /* These are just dummys anyway, so we don't need to destroy them */
1083 return VK_SUCCESS;
1084
1085 case VK_OBJECT_TYPE_BUFFER:
1086 case VK_OBJECT_TYPE_IMAGE:
1087 case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW:
1088 case VK_OBJECT_TYPE_SHADER:
1089 case VK_OBJECT_TYPE_PIPELINE_LAYOUT:
1090 case VK_OBJECT_TYPE_SAMPLER:
1091 case VK_OBJECT_TYPE_DESCRIPTOR_SET:
1092 case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT:
1093 case VK_OBJECT_TYPE_DYNAMIC_RS_STATE:
1094 case VK_OBJECT_TYPE_DYNAMIC_CB_STATE:
1095 case VK_OBJECT_TYPE_DYNAMIC_DS_STATE:
1096 case VK_OBJECT_TYPE_RENDER_PASS:
1097 /* These are trivially destroyable */
1098 anv_device_free(device, (void *) _object);
1099 return VK_SUCCESS;
1100
1101 case VK_OBJECT_TYPE_COMMAND_BUFFER:
1102 case VK_OBJECT_TYPE_PIPELINE:
1103 case VK_OBJECT_TYPE_DYNAMIC_VP_STATE:
1104 case VK_OBJECT_TYPE_FENCE:
1105 case VK_OBJECT_TYPE_QUERY_POOL:
1106 case VK_OBJECT_TYPE_FRAMEBUFFER:
1107 case VK_OBJECT_TYPE_BUFFER_VIEW:
1108 case VK_OBJECT_TYPE_IMAGE_VIEW:
1109 case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW:
1110 (object->destructor)(device, object, objType);
1111 return VK_SUCCESS;
1112
1113 case VK_OBJECT_TYPE_SEMAPHORE:
1114 case VK_OBJECT_TYPE_EVENT:
1115 stub_return(VK_UNSUPPORTED);
1116
1117 default:
1118 unreachable("Invalid object type");
1119 }
1120 }
1121
1122 static void
1123 fill_memory_requirements(
1124 VkObjectType objType,
1125 VkObject object,
1126 VkMemoryRequirements * memory_requirements)
1127 {
1128 struct anv_buffer *buffer;
1129 struct anv_image *image;
1130
1131 memory_requirements->memPropsAllowed =
1132 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1133 VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT |
1134 /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */
1135 VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT |
1136 VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL |
1137 VK_MEMORY_PROPERTY_SHAREABLE_BIT;
1138
1139 memory_requirements->memPropsRequired = 0;
1140
1141 switch (objType) {
1142 case VK_OBJECT_TYPE_BUFFER:
1143 buffer = (struct anv_buffer *) object;
1144 memory_requirements->size = buffer->size;
1145 memory_requirements->alignment = 16;
1146 break;
1147 case VK_OBJECT_TYPE_IMAGE:
1148 image = (struct anv_image *) object;
1149 memory_requirements->size = image->size;
1150 memory_requirements->alignment = image->alignment;
1151 break;
1152 default:
1153 memory_requirements->size = 0;
1154 break;
1155 }
1156 }
1157
1158 static uint32_t
1159 get_allocation_count(VkObjectType objType)
1160 {
1161 switch (objType) {
1162 case VK_OBJECT_TYPE_BUFFER:
1163 case VK_OBJECT_TYPE_IMAGE:
1164 return 1;
1165 default:
1166 return 0;
1167 }
1168 }
1169
1170 VkResult anv_GetObjectInfo(
1171 VkDevice _device,
1172 VkObjectType objType,
1173 VkObject object,
1174 VkObjectInfoType infoType,
1175 size_t* pDataSize,
1176 void* pData)
1177 {
1178 VkMemoryRequirements memory_requirements;
1179 uint32_t *count;
1180
1181 switch (infoType) {
1182 case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS:
1183 *pDataSize = sizeof(memory_requirements);
1184 if (pData == NULL)
1185 return VK_SUCCESS;
1186
1187 fill_memory_requirements(objType, object, pData);
1188 return VK_SUCCESS;
1189
1190 case VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT:
1191 *pDataSize = sizeof(count);
1192 if (pData == NULL)
1193 return VK_SUCCESS;
1194
1195 count = pData;
1196 *count = get_allocation_count(objType);
1197 return VK_SUCCESS;
1198
1199 default:
1200 return vk_error(VK_UNSUPPORTED);
1201 }
1202
1203 }
1204
1205 VkResult anv_QueueBindObjectMemory(
1206 VkQueue queue,
1207 VkObjectType objType,
1208 VkObject object,
1209 uint32_t allocationIdx,
1210 VkDeviceMemory _mem,
1211 VkDeviceSize memOffset)
1212 {
1213 struct anv_buffer *buffer;
1214 struct anv_image *image;
1215 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
1216
1217 switch (objType) {
1218 case VK_OBJECT_TYPE_BUFFER:
1219 buffer = (struct anv_buffer *) object;
1220 buffer->bo = &mem->bo;
1221 buffer->offset = memOffset;
1222 break;
1223 case VK_OBJECT_TYPE_IMAGE:
1224 image = (struct anv_image *) object;
1225 image->bo = &mem->bo;
1226 image->offset = memOffset;
1227 break;
1228 default:
1229 break;
1230 }
1231
1232 return VK_SUCCESS;
1233 }
1234
1235 VkResult anv_QueueBindObjectMemoryRange(
1236 VkQueue queue,
1237 VkObjectType objType,
1238 VkObject object,
1239 uint32_t allocationIdx,
1240 VkDeviceSize rangeOffset,
1241 VkDeviceSize rangeSize,
1242 VkDeviceMemory mem,
1243 VkDeviceSize memOffset)
1244 {
1245 stub_return(VK_UNSUPPORTED);
1246 }
1247
1248 VkResult anv_QueueBindImageMemoryRange(
1249 VkQueue queue,
1250 VkImage image,
1251 uint32_t allocationIdx,
1252 const VkImageMemoryBindInfo* pBindInfo,
1253 VkDeviceMemory mem,
1254 VkDeviceSize memOffset)
1255 {
1256 stub_return(VK_UNSUPPORTED);
1257 }
1258
1259 static void
1260 anv_fence_destroy(struct anv_device *device,
1261 struct anv_object *object,
1262 VkObjectType obj_type)
1263 {
1264 struct anv_fence *fence = (struct anv_fence *) object;
1265
1266 assert(obj_type == VK_OBJECT_TYPE_FENCE);
1267
1268 anv_gem_munmap(fence->bo.map, fence->bo.size);
1269 anv_gem_close(device, fence->bo.gem_handle);
1270 anv_device_free(device, fence);
1271 }
1272
1273 VkResult anv_CreateFence(
1274 VkDevice _device,
1275 const VkFenceCreateInfo* pCreateInfo,
1276 VkFence* pFence)
1277 {
1278 struct anv_device *device = (struct anv_device *) _device;
1279 struct anv_fence *fence;
1280 struct anv_batch batch;
1281 VkResult result;
1282
1283 const uint32_t fence_size = 128;
1284
1285 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1286
1287 fence = anv_device_alloc(device, sizeof(*fence), 8,
1288 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1289 if (fence == NULL)
1290 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1291
1292 result = anv_bo_init_new(&fence->bo, device, fence_size);
1293 if (result != VK_SUCCESS)
1294 goto fail;
1295
1296 fence->base.destructor = anv_fence_destroy;
1297
1298 fence->bo.map =
1299 anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size);
1300 batch.next = batch.start = fence->bo.map;
1301 batch.end = fence->bo.map + fence->bo.size;
1302 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
1303 anv_batch_emit(&batch, GEN8_MI_NOOP);
1304
1305 fence->exec2_objects[0].handle = fence->bo.gem_handle;
1306 fence->exec2_objects[0].relocation_count = 0;
1307 fence->exec2_objects[0].relocs_ptr = 0;
1308 fence->exec2_objects[0].alignment = 0;
1309 fence->exec2_objects[0].offset = fence->bo.offset;
1310 fence->exec2_objects[0].flags = 0;
1311 fence->exec2_objects[0].rsvd1 = 0;
1312 fence->exec2_objects[0].rsvd2 = 0;
1313
1314 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1315 fence->execbuf.buffer_count = 1;
1316 fence->execbuf.batch_start_offset = 0;
1317 fence->execbuf.batch_len = batch.next - fence->bo.map;
1318 fence->execbuf.cliprects_ptr = 0;
1319 fence->execbuf.num_cliprects = 0;
1320 fence->execbuf.DR1 = 0;
1321 fence->execbuf.DR4 = 0;
1322
1323 fence->execbuf.flags =
1324 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1325 fence->execbuf.rsvd1 = device->context_id;
1326 fence->execbuf.rsvd2 = 0;
1327
1328 *pFence = (VkFence) fence;
1329
1330 return VK_SUCCESS;
1331
1332 fail:
1333 anv_device_free(device, fence);
1334
1335 return result;
1336 }
1337
1338 VkResult anv_ResetFences(
1339 VkDevice _device,
1340 uint32_t fenceCount,
1341 VkFence* pFences)
1342 {
1343 struct anv_fence **fences = (struct anv_fence **) pFences;
1344
1345 for (uint32_t i = 0; i < fenceCount; i++)
1346 fences[i]->ready = false;
1347
1348 return VK_SUCCESS;
1349 }
1350
1351 VkResult anv_GetFenceStatus(
1352 VkDevice _device,
1353 VkFence _fence)
1354 {
1355 struct anv_device *device = (struct anv_device *) _device;
1356 struct anv_fence *fence = (struct anv_fence *) _fence;
1357 int64_t t = 0;
1358 int ret;
1359
1360 if (fence->ready)
1361 return VK_SUCCESS;
1362
1363 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1364 if (ret == 0) {
1365 fence->ready = true;
1366 return VK_SUCCESS;
1367 }
1368
1369 return VK_NOT_READY;
1370 }
1371
1372 VkResult anv_WaitForFences(
1373 VkDevice _device,
1374 uint32_t fenceCount,
1375 const VkFence* pFences,
1376 bool32_t waitAll,
1377 uint64_t timeout)
1378 {
1379 struct anv_device *device = (struct anv_device *) _device;
1380 struct anv_fence **fences = (struct anv_fence **) pFences;
1381 int64_t t = timeout;
1382 int ret;
1383
1384 /* FIXME: handle !waitAll */
1385
1386 for (uint32_t i = 0; i < fenceCount; i++) {
1387 ret = anv_gem_wait(device, fences[i]->bo.gem_handle, &t);
1388 if (ret == -1 && errno == ETIME)
1389 return VK_TIMEOUT;
1390 else if (ret == -1)
1391 return vk_error(VK_ERROR_UNKNOWN);
1392 }
1393
1394 return VK_SUCCESS;
1395 }
1396
1397 // Queue semaphore functions
1398
1399 VkResult anv_CreateSemaphore(
1400 VkDevice device,
1401 const VkSemaphoreCreateInfo* pCreateInfo,
1402 VkSemaphore* pSemaphore)
1403 {
1404 stub_return(VK_UNSUPPORTED);
1405 }
1406
1407 VkResult anv_QueueSignalSemaphore(
1408 VkQueue queue,
1409 VkSemaphore semaphore)
1410 {
1411 stub_return(VK_UNSUPPORTED);
1412 }
1413
1414 VkResult anv_QueueWaitSemaphore(
1415 VkQueue queue,
1416 VkSemaphore semaphore)
1417 {
1418 stub_return(VK_UNSUPPORTED);
1419 }
1420
1421 // Event functions
1422
1423 VkResult anv_CreateEvent(
1424 VkDevice device,
1425 const VkEventCreateInfo* pCreateInfo,
1426 VkEvent* pEvent)
1427 {
1428 stub_return(VK_UNSUPPORTED);
1429 }
1430
1431 VkResult anv_GetEventStatus(
1432 VkDevice device,
1433 VkEvent event)
1434 {
1435 stub_return(VK_UNSUPPORTED);
1436 }
1437
1438 VkResult anv_SetEvent(
1439 VkDevice device,
1440 VkEvent event)
1441 {
1442 stub_return(VK_UNSUPPORTED);
1443 }
1444
1445 VkResult anv_ResetEvent(
1446 VkDevice device,
1447 VkEvent event)
1448 {
1449 stub_return(VK_UNSUPPORTED);
1450 }
1451
1452 // Buffer functions
1453
1454 VkResult anv_CreateBuffer(
1455 VkDevice _device,
1456 const VkBufferCreateInfo* pCreateInfo,
1457 VkBuffer* pBuffer)
1458 {
1459 struct anv_device *device = (struct anv_device *) _device;
1460 struct anv_buffer *buffer;
1461
1462 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1463
1464 buffer = anv_device_alloc(device, sizeof(*buffer), 8,
1465 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1466 if (buffer == NULL)
1467 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1468
1469 buffer->size = pCreateInfo->size;
1470 buffer->bo = NULL;
1471 buffer->offset = 0;
1472
1473 *pBuffer = (VkBuffer) buffer;
1474
1475 return VK_SUCCESS;
1476 }
1477
1478 // Buffer view functions
1479
1480 static void
1481 fill_buffer_surface_state(void *state, VkFormat format,
1482 uint32_t offset, uint32_t range)
1483 {
1484 const struct anv_format *info;
1485
1486 info = anv_format_for_vk_format(format);
1487 /* This assumes RGBA float format. */
1488 uint32_t stride = 4;
1489 uint32_t num_elements = range / stride;
1490
1491 struct GEN8_RENDER_SURFACE_STATE surface_state = {
1492 .SurfaceType = SURFTYPE_BUFFER,
1493 .SurfaceArray = false,
1494 .SurfaceFormat = info->surface_format,
1495 .SurfaceVerticalAlignment = VALIGN4,
1496 .SurfaceHorizontalAlignment = HALIGN4,
1497 .TileMode = LINEAR,
1498 .VerticalLineStride = 0,
1499 .VerticalLineStrideOffset = 0,
1500 .SamplerL2BypassModeDisable = true,
1501 .RenderCacheReadWriteMode = WriteOnlyCache,
1502 .MemoryObjectControlState = GEN8_MOCS,
1503 .BaseMipLevel = 0.0,
1504 .SurfaceQPitch = 0,
1505 .Height = (num_elements >> 7) & 0x3fff,
1506 .Width = num_elements & 0x7f,
1507 .Depth = (num_elements >> 21) & 0x3f,
1508 .SurfacePitch = stride - 1,
1509 .MinimumArrayElement = 0,
1510 .NumberofMultisamples = MULTISAMPLECOUNT_1,
1511 .XOffset = 0,
1512 .YOffset = 0,
1513 .SurfaceMinLOD = 0,
1514 .MIPCountLOD = 0,
1515 .AuxiliarySurfaceMode = AUX_NONE,
1516 .RedClearColor = 0,
1517 .GreenClearColor = 0,
1518 .BlueClearColor = 0,
1519 .AlphaClearColor = 0,
1520 .ShaderChannelSelectRed = SCS_RED,
1521 .ShaderChannelSelectGreen = SCS_GREEN,
1522 .ShaderChannelSelectBlue = SCS_BLUE,
1523 .ShaderChannelSelectAlpha = SCS_ALPHA,
1524 .ResourceMinLOD = 0.0,
1525 /* FIXME: We assume that the image must be bound at this time. */
1526 .SurfaceBaseAddress = { NULL, offset },
1527 };
1528
1529 GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state);
1530 }
1531
1532 VkResult anv_CreateBufferView(
1533 VkDevice _device,
1534 const VkBufferViewCreateInfo* pCreateInfo,
1535 VkBufferView* pView)
1536 {
1537 struct anv_device *device = (struct anv_device *) _device;
1538 struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer;
1539 struct anv_surface_view *view;
1540
1541 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
1542
1543 view = anv_device_alloc(device, sizeof(*view), 8,
1544 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1545 if (view == NULL)
1546 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1547
1548 view->base.destructor = anv_surface_view_destroy;
1549
1550 view->bo = buffer->bo;
1551 view->offset = buffer->offset + pCreateInfo->offset;
1552 view->surface_state =
1553 anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
1554 view->format = pCreateInfo->format;
1555 view->range = pCreateInfo->range;
1556
1557 fill_buffer_surface_state(view->surface_state.map,
1558 pCreateInfo->format, view->offset, pCreateInfo->range);
1559
1560 *pView = (VkBufferView) view;
1561
1562 return VK_SUCCESS;
1563 }
1564
1565 // Sampler functions
1566
1567 VkResult anv_CreateSampler(
1568 VkDevice _device,
1569 const VkSamplerCreateInfo* pCreateInfo,
1570 VkSampler* pSampler)
1571 {
1572 struct anv_device *device = (struct anv_device *) _device;
1573 struct anv_sampler *sampler;
1574 uint32_t mag_filter, min_filter, max_anisotropy;
1575
1576 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
1577
1578 sampler = anv_device_alloc(device, sizeof(*sampler), 8,
1579 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1580 if (!sampler)
1581 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1582
1583 static const uint32_t vk_to_gen_tex_filter[] = {
1584 [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST,
1585 [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR
1586 };
1587
1588 static const uint32_t vk_to_gen_mipmap_mode[] = {
1589 [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE,
1590 [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
1591 [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
1592 };
1593
1594 static const uint32_t vk_to_gen_tex_address[] = {
1595 [VK_TEX_ADDRESS_WRAP] = TCM_WRAP,
1596 [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR,
1597 [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP,
1598 [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE,
1599 [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER,
1600 };
1601
1602 static const uint32_t vk_to_gen_compare_op[] = {
1603 [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
1604 [VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
1605 [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
1606 [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL,
1607 [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
1608 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
1609 [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL,
1610 [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
1611 };
1612
1613 if (pCreateInfo->maxAnisotropy > 1) {
1614 mag_filter = MAPFILTER_ANISOTROPIC;
1615 min_filter = MAPFILTER_ANISOTROPIC;
1616 max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2;
1617 } else {
1618 mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter];
1619 min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter];
1620 max_anisotropy = RATIO21;
1621 }
1622
1623 struct GEN8_SAMPLER_STATE sampler_state = {
1624 .SamplerDisable = false,
1625 .TextureBorderColorMode = DX10OGL,
1626 .LODPreClampMode = 0,
1627 .BaseMipLevel = 0.0,
1628 .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode],
1629 .MagModeFilter = mag_filter,
1630 .MinModeFilter = min_filter,
1631 .TextureLODBias = pCreateInfo->mipLodBias * 256,
1632 .AnisotropicAlgorithm = EWAApproximation,
1633 .MinLOD = pCreateInfo->minLod,
1634 .MaxLOD = pCreateInfo->maxLod,
1635 .ChromaKeyEnable = 0,
1636 .ChromaKeyIndex = 0,
1637 .ChromaKeyMode = 0,
1638 .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp],
1639 .CubeSurfaceControlMode = 0,
1640
1641 .IndirectStatePointer =
1642 device->float_border_colors.offset +
1643 pCreateInfo->borderColor * sizeof(float) * 4,
1644
1645 .LODClampMagnificationMode = MIPNONE,
1646 .MaximumAnisotropy = max_anisotropy,
1647 .RAddressMinFilterRoundingEnable = 0,
1648 .RAddressMagFilterRoundingEnable = 0,
1649 .VAddressMinFilterRoundingEnable = 0,
1650 .VAddressMagFilterRoundingEnable = 0,
1651 .UAddressMinFilterRoundingEnable = 0,
1652 .UAddressMagFilterRoundingEnable = 0,
1653 .TrilinearFilterQuality = 0,
1654 .NonnormalizedCoordinateEnable = 0,
1655 .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU],
1656 .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV],
1657 .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW],
1658 };
1659
1660 GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state);
1661
1662 *pSampler = (VkSampler) sampler;
1663
1664 return VK_SUCCESS;
1665 }
1666
1667 // Descriptor set functions
1668
1669 VkResult anv_CreateDescriptorSetLayout(
1670 VkDevice _device,
1671 const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
1672 VkDescriptorSetLayout* pSetLayout)
1673 {
1674 struct anv_device *device = (struct anv_device *) _device;
1675 struct anv_descriptor_set_layout *set_layout;
1676
1677 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
1678
1679 uint32_t sampler_count[VK_NUM_SHADER_STAGE] = { 0, };
1680 uint32_t surface_count[VK_NUM_SHADER_STAGE] = { 0, };
1681 uint32_t num_dynamic_buffers = 0;
1682 uint32_t count = 0;
1683 uint32_t stages = 0;
1684 uint32_t s;
1685
1686 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1687 switch (pCreateInfo->pBinding[i].descriptorType) {
1688 case VK_DESCRIPTOR_TYPE_SAMPLER:
1689 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1690 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1691 sampler_count[s] += pCreateInfo->pBinding[i].count;
1692 break;
1693 default:
1694 break;
1695 }
1696
1697 switch (pCreateInfo->pBinding[i].descriptorType) {
1698 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1699 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1700 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1701 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1702 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1703 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1704 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1705 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1706 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1707 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1708 surface_count[s] += pCreateInfo->pBinding[i].count;
1709 break;
1710 default:
1711 break;
1712 }
1713
1714 switch (pCreateInfo->pBinding[i].descriptorType) {
1715 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1716 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1717 num_dynamic_buffers += pCreateInfo->pBinding[i].count;
1718 break;
1719 default:
1720 break;
1721 }
1722
1723 stages |= pCreateInfo->pBinding[i].stageFlags;
1724 count += pCreateInfo->pBinding[i].count;
1725 }
1726
1727 uint32_t sampler_total = 0;
1728 uint32_t surface_total = 0;
1729 for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
1730 sampler_total += sampler_count[s];
1731 surface_total += surface_count[s];
1732 }
1733
1734 size_t size = sizeof(*set_layout) +
1735 (sampler_total + surface_total) * sizeof(set_layout->entries[0]);
1736 set_layout = anv_device_alloc(device, size, 8,
1737 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1738 if (!set_layout)
1739 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1740
1741 set_layout->num_dynamic_buffers = num_dynamic_buffers;
1742 set_layout->count = count;
1743 set_layout->shader_stages = stages;
1744
1745 struct anv_descriptor_slot *p = set_layout->entries;
1746 struct anv_descriptor_slot *sampler[VK_NUM_SHADER_STAGE];
1747 struct anv_descriptor_slot *surface[VK_NUM_SHADER_STAGE];
1748 for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
1749 set_layout->stage[s].surface_count = surface_count[s];
1750 set_layout->stage[s].surface_start = surface[s] = p;
1751 p += surface_count[s];
1752 set_layout->stage[s].sampler_count = sampler_count[s];
1753 set_layout->stage[s].sampler_start = sampler[s] = p;
1754 p += sampler_count[s];
1755 }
1756
1757 uint32_t descriptor = 0;
1758 int8_t dynamic_slot = 0;
1759 bool is_dynamic;
1760 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1761 switch (pCreateInfo->pBinding[i].descriptorType) {
1762 case VK_DESCRIPTOR_TYPE_SAMPLER:
1763 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1764 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1765 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
1766 sampler[s]->index = descriptor + j;
1767 sampler[s]->dynamic_slot = -1;
1768 sampler[s]++;
1769 }
1770 break;
1771 default:
1772 break;
1773 }
1774
1775 switch (pCreateInfo->pBinding[i].descriptorType) {
1776 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1777 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1778 is_dynamic = true;
1779 break;
1780 default:
1781 is_dynamic = false;
1782 break;
1783 }
1784
1785 switch (pCreateInfo->pBinding[i].descriptorType) {
1786 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1787 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1788 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1789 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1790 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1791 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1792 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1793 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1794 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1795 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1796 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
1797 surface[s]->index = descriptor + j;
1798 if (is_dynamic)
1799 surface[s]->dynamic_slot = dynamic_slot + j;
1800 else
1801 surface[s]->dynamic_slot = -1;
1802 surface[s]++;
1803 }
1804 break;
1805 default:
1806 break;
1807 }
1808
1809 if (is_dynamic)
1810 dynamic_slot += pCreateInfo->pBinding[i].count;
1811
1812 descriptor += pCreateInfo->pBinding[i].count;
1813 }
1814
1815 *pSetLayout = (VkDescriptorSetLayout) set_layout;
1816
1817 return VK_SUCCESS;
1818 }
1819
1820 VkResult anv_CreateDescriptorPool(
1821 VkDevice device,
1822 VkDescriptorPoolUsage poolUsage,
1823 uint32_t maxSets,
1824 const VkDescriptorPoolCreateInfo* pCreateInfo,
1825 VkDescriptorPool* pDescriptorPool)
1826 {
1827 *pDescriptorPool = 1;
1828
1829 return VK_SUCCESS;
1830 }
1831
1832 VkResult anv_ResetDescriptorPool(
1833 VkDevice device,
1834 VkDescriptorPool descriptorPool)
1835 {
1836 return VK_SUCCESS;
1837 }
1838
1839 VkResult anv_AllocDescriptorSets(
1840 VkDevice _device,
1841 VkDescriptorPool descriptorPool,
1842 VkDescriptorSetUsage setUsage,
1843 uint32_t count,
1844 const VkDescriptorSetLayout* pSetLayouts,
1845 VkDescriptorSet* pDescriptorSets,
1846 uint32_t* pCount)
1847 {
1848 struct anv_device *device = (struct anv_device *) _device;
1849 const struct anv_descriptor_set_layout *layout;
1850 struct anv_descriptor_set *set;
1851 size_t size;
1852
1853 for (uint32_t i = 0; i < count; i++) {
1854 layout = (struct anv_descriptor_set_layout *) pSetLayouts[i];
1855 size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]);
1856 set = anv_device_alloc(device, size, 8,
1857 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1858 if (!set) {
1859 *pCount = i;
1860 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1861 }
1862
1863 /* Descriptor sets may not be 100% filled out so we need to memset to
1864 * ensure that we can properly detect and handle holes.
1865 */
1866 memset(set, 0, size);
1867
1868 pDescriptorSets[i] = (VkDescriptorSet) set;
1869 }
1870
1871 *pCount = count;
1872
1873 return VK_SUCCESS;
1874 }
1875
1876 void anv_ClearDescriptorSets(
1877 VkDevice device,
1878 VkDescriptorPool descriptorPool,
1879 uint32_t count,
1880 const VkDescriptorSet* pDescriptorSets)
1881 {
1882 }
1883
1884 void anv_UpdateDescriptors(
1885 VkDevice _device,
1886 VkDescriptorSet descriptorSet,
1887 uint32_t updateCount,
1888 const void** ppUpdateArray)
1889 {
1890 struct anv_descriptor_set *set = (struct anv_descriptor_set *) descriptorSet;
1891 VkUpdateSamplers *update_samplers;
1892 VkUpdateSamplerTextures *update_sampler_textures;
1893 VkUpdateImages *update_images;
1894 VkUpdateBuffers *update_buffers;
1895 VkUpdateAsCopy *update_as_copy;
1896
1897 for (uint32_t i = 0; i < updateCount; i++) {
1898 const struct anv_common *common = ppUpdateArray[i];
1899
1900 switch (common->sType) {
1901 case VK_STRUCTURE_TYPE_UPDATE_SAMPLERS:
1902 update_samplers = (VkUpdateSamplers *) common;
1903
1904 for (uint32_t j = 0; j < update_samplers->count; j++) {
1905 set->descriptors[update_samplers->binding + j].sampler =
1906 (struct anv_sampler *) update_samplers->pSamplers[j];
1907 }
1908 break;
1909
1910 case VK_STRUCTURE_TYPE_UPDATE_SAMPLER_TEXTURES:
1911 /* FIXME: Shouldn't this be *_UPDATE_SAMPLER_IMAGES? */
1912 update_sampler_textures = (VkUpdateSamplerTextures *) common;
1913
1914 for (uint32_t j = 0; j < update_sampler_textures->count; j++) {
1915 set->descriptors[update_sampler_textures->binding + j].view =
1916 (struct anv_surface_view *)
1917 update_sampler_textures->pSamplerImageViews[j].pImageView->view;
1918 set->descriptors[update_sampler_textures->binding + j].sampler =
1919 (struct anv_sampler *)
1920 update_sampler_textures->pSamplerImageViews[j].sampler;
1921 }
1922 break;
1923
1924 case VK_STRUCTURE_TYPE_UPDATE_IMAGES:
1925 update_images = (VkUpdateImages *) common;
1926
1927 for (uint32_t j = 0; j < update_images->count; j++) {
1928 set->descriptors[update_images->binding + j].view =
1929 (struct anv_surface_view *) update_images->pImageViews[j].view;
1930 }
1931 break;
1932
1933 case VK_STRUCTURE_TYPE_UPDATE_BUFFERS:
1934 update_buffers = (VkUpdateBuffers *) common;
1935
1936 for (uint32_t j = 0; j < update_buffers->count; j++) {
1937 set->descriptors[update_buffers->binding + j].view =
1938 (struct anv_surface_view *) update_buffers->pBufferViews[j].view;
1939 }
1940 /* FIXME: descriptor arrays? */
1941 break;
1942
1943 case VK_STRUCTURE_TYPE_UPDATE_AS_COPY:
1944 update_as_copy = (VkUpdateAsCopy *) common;
1945 (void) update_as_copy;
1946 break;
1947
1948 default:
1949 break;
1950 }
1951 }
1952 }
1953
1954 // State object functions
1955
1956 static inline int64_t
1957 clamp_int64(int64_t x, int64_t min, int64_t max)
1958 {
1959 if (x < min)
1960 return min;
1961 else if (x < max)
1962 return x;
1963 else
1964 return max;
1965 }
1966
1967 static void
1968 anv_dynamic_vp_state_destroy(struct anv_device *device,
1969 struct anv_object *object,
1970 VkObjectType obj_type)
1971 {
1972 struct anv_dynamic_vp_state *state = (void *)object;
1973
1974 assert(obj_type == VK_OBJECT_TYPE_DYNAMIC_VP_STATE);
1975
1976 anv_state_pool_free(&device->dynamic_state_pool, state->sf_clip_vp);
1977 anv_state_pool_free(&device->dynamic_state_pool, state->cc_vp);
1978 anv_state_pool_free(&device->dynamic_state_pool, state->scissor);
1979
1980 anv_device_free(device, state);
1981 }
1982
1983 VkResult anv_CreateDynamicViewportState(
1984 VkDevice _device,
1985 const VkDynamicVpStateCreateInfo* pCreateInfo,
1986 VkDynamicVpState* pState)
1987 {
1988 struct anv_device *device = (struct anv_device *) _device;
1989 struct anv_dynamic_vp_state *state;
1990
1991 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO);
1992
1993 state = anv_device_alloc(device, sizeof(*state), 8,
1994 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1995 if (state == NULL)
1996 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1997
1998 state->base.destructor = anv_dynamic_vp_state_destroy;
1999
2000 unsigned count = pCreateInfo->viewportAndScissorCount;
2001 state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2002 count * 64, 64);
2003 state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2004 count * 8, 32);
2005 state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool,
2006 count * 32, 32);
2007
2008 for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) {
2009 const VkViewport *vp = &pCreateInfo->pViewports[i];
2010 const VkRect *s = &pCreateInfo->pScissors[i];
2011
2012 struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = {
2013 .ViewportMatrixElementm00 = vp->width / 2,
2014 .ViewportMatrixElementm11 = vp->height / 2,
2015 .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2,
2016 .ViewportMatrixElementm30 = vp->originX + vp->width / 2,
2017 .ViewportMatrixElementm31 = vp->originY + vp->height / 2,
2018 .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2,
2019 .XMinClipGuardband = -1.0f,
2020 .XMaxClipGuardband = 1.0f,
2021 .YMinClipGuardband = -1.0f,
2022 .YMaxClipGuardband = 1.0f,
2023 .XMinViewPort = vp->originX,
2024 .XMaxViewPort = vp->originX + vp->width - 1,
2025 .YMinViewPort = vp->originY,
2026 .YMaxViewPort = vp->originY + vp->height - 1,
2027 };
2028
2029 struct GEN8_CC_VIEWPORT cc_viewport = {
2030 .MinimumDepth = vp->minDepth,
2031 .MaximumDepth = vp->maxDepth
2032 };
2033
2034 /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
2035 * ymax < ymin for empty clips. In case clip x, y, width height are all
2036 * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
2037 * what we want. Just special case empty clips and produce a canonical
2038 * empty clip. */
2039 static const struct GEN8_SCISSOR_RECT empty_scissor = {
2040 .ScissorRectangleYMin = 1,
2041 .ScissorRectangleXMin = 1,
2042 .ScissorRectangleYMax = 0,
2043 .ScissorRectangleXMax = 0
2044 };
2045
2046 const int max = 0xffff;
2047 struct GEN8_SCISSOR_RECT scissor = {
2048 /* Do this math using int64_t so overflow gets clamped correctly. */
2049 .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
2050 .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
2051 .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max),
2052 .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max)
2053 };
2054
2055 GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport);
2056 GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport);
2057
2058 if (s->extent.width <= 0 || s->extent.height <= 0) {
2059 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor);
2060 } else {
2061 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor);
2062 }
2063 }
2064
2065 *pState = (VkDynamicVpState) state;
2066
2067 return VK_SUCCESS;
2068 }
2069
2070 VkResult anv_CreateDynamicRasterState(
2071 VkDevice _device,
2072 const VkDynamicRsStateCreateInfo* pCreateInfo,
2073 VkDynamicRsState* pState)
2074 {
2075 struct anv_device *device = (struct anv_device *) _device;
2076 struct anv_dynamic_rs_state *state;
2077
2078 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO);
2079
2080 state = anv_device_alloc(device, sizeof(*state), 8,
2081 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2082 if (state == NULL)
2083 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2084
2085 /* Missing these:
2086 * float pointFadeThreshold;
2087 * // optional (GL45) - Size of point fade threshold
2088 */
2089
2090 struct GEN8_3DSTATE_SF sf = {
2091 GEN8_3DSTATE_SF_header,
2092 .LineWidth = pCreateInfo->lineWidth,
2093 .PointWidth = pCreateInfo->pointSize,
2094 };
2095
2096 GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf);
2097
2098 bool enable_bias = pCreateInfo->depthBias != 0.0f ||
2099 pCreateInfo->slopeScaledDepthBias != 0.0f;
2100 struct GEN8_3DSTATE_RASTER raster = {
2101 .GlobalDepthOffsetEnableSolid = enable_bias,
2102 .GlobalDepthOffsetEnableWireframe = enable_bias,
2103 .GlobalDepthOffsetEnablePoint = enable_bias,
2104 .GlobalDepthOffsetConstant = pCreateInfo->depthBias,
2105 .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias,
2106 .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp
2107 };
2108
2109 GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster);
2110
2111 *pState = (VkDynamicRsState) state;
2112
2113 return VK_SUCCESS;
2114 }
2115
2116 VkResult anv_CreateDynamicColorBlendState(
2117 VkDevice _device,
2118 const VkDynamicCbStateCreateInfo* pCreateInfo,
2119 VkDynamicCbState* pState)
2120 {
2121 struct anv_device *device = (struct anv_device *) _device;
2122 struct anv_dynamic_cb_state *state;
2123
2124 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO);
2125
2126 state = anv_device_alloc(device, sizeof(*state), 8,
2127 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2128 if (state == NULL)
2129 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2130
2131 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2132 .BlendConstantColorRed = pCreateInfo->blendConst[0],
2133 .BlendConstantColorGreen = pCreateInfo->blendConst[1],
2134 .BlendConstantColorBlue = pCreateInfo->blendConst[2],
2135 .BlendConstantColorAlpha = pCreateInfo->blendConst[3]
2136 };
2137
2138 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2139
2140 *pState = (VkDynamicCbState) state;
2141
2142 return VK_SUCCESS;
2143 }
2144
2145 VkResult anv_CreateDynamicDepthStencilState(
2146 VkDevice _device,
2147 const VkDynamicDsStateCreateInfo* pCreateInfo,
2148 VkDynamicDsState* pState)
2149 {
2150 struct anv_device *device = (struct anv_device *) _device;
2151 struct anv_dynamic_ds_state *state;
2152
2153 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO);
2154
2155 state = anv_device_alloc(device, sizeof(*state), 8,
2156 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2157 if (state == NULL)
2158 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2159
2160 struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
2161 GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
2162
2163 /* Is this what we need to do? */
2164 .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0,
2165
2166 .StencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2167 .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2168
2169 .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2170 .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2171 };
2172
2173 GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil,
2174 &wm_depth_stencil);
2175
2176 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2177 .StencilReferenceValue = pCreateInfo->stencilFrontRef,
2178 .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef
2179 };
2180
2181 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2182
2183 *pState = (VkDynamicDsState) state;
2184
2185 return VK_SUCCESS;
2186 }
2187
2188 // Command buffer functions
2189
2190 static void
2191 anv_cmd_buffer_destroy(struct anv_device *device,
2192 struct anv_object *object,
2193 VkObjectType obj_type)
2194 {
2195 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object;
2196
2197 assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER);
2198
2199 /* Destroy all of the batch buffers */
2200 struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo;
2201 while (bbo) {
2202 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2203 anv_batch_bo_destroy(bbo, device);
2204 bbo = prev;
2205 }
2206 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2207
2208 /* Destroy all of the surface state buffers */
2209 bbo = cmd_buffer->surface_batch_bo;
2210 while (bbo) {
2211 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2212 anv_batch_bo_destroy(bbo, device);
2213 bbo = prev;
2214 }
2215 anv_reloc_list_finish(&cmd_buffer->surface_relocs, device);
2216
2217 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
2218 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
2219 anv_device_free(device, cmd_buffer->exec2_objects);
2220 anv_device_free(device, cmd_buffer->exec2_bos);
2221 anv_device_free(device, cmd_buffer);
2222 }
2223
2224 static VkResult
2225 anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
2226 {
2227 struct anv_cmd_buffer *cmd_buffer = _data;
2228
2229 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo;
2230
2231 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2232 if (result != VK_SUCCESS)
2233 return result;
2234
2235 /* We set the end of the batch a little short so we would be sure we
2236 * have room for the chaining command. Since we're about to emit the
2237 * chaining command, let's set it back where it should go.
2238 */
2239 batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
2240 assert(batch->end == old_bbo->bo.map + old_bbo->bo.size);
2241
2242 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START,
2243 GEN8_MI_BATCH_BUFFER_START_header,
2244 ._2ndLevelBatchBuffer = _1stlevelbatch,
2245 .AddressSpaceIndicator = ASI_PPGTT,
2246 .BatchBufferStartAddress = { &new_bbo->bo, 0 },
2247 );
2248
2249 /* Pad out to a 2-dword aligned boundary with zeros */
2250 if ((uintptr_t)batch->next % 8 != 0) {
2251 *(uint32_t *)batch->next = 0;
2252 batch->next += 4;
2253 }
2254
2255 anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch);
2256
2257 new_bbo->prev_batch_bo = old_bbo;
2258 cmd_buffer->last_batch_bo = new_bbo;
2259
2260 anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
2261
2262 return VK_SUCCESS;
2263 }
2264
2265 VkResult anv_CreateCommandBuffer(
2266 VkDevice _device,
2267 const VkCmdBufferCreateInfo* pCreateInfo,
2268 VkCmdBuffer* pCmdBuffer)
2269 {
2270 struct anv_device *device = (struct anv_device *) _device;
2271 struct anv_cmd_buffer *cmd_buffer;
2272 VkResult result;
2273
2274 cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8,
2275 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2276 if (cmd_buffer == NULL)
2277 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2278
2279 cmd_buffer->base.destructor = anv_cmd_buffer_destroy;
2280
2281 cmd_buffer->device = device;
2282 cmd_buffer->rs_state = NULL;
2283 cmd_buffer->vp_state = NULL;
2284 cmd_buffer->cb_state = NULL;
2285 cmd_buffer->ds_state = NULL;
2286 memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors));
2287
2288 result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo);
2289 if (result != VK_SUCCESS)
2290 goto fail;
2291
2292 result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device);
2293 if (result != VK_SUCCESS)
2294 goto fail_batch_bo;
2295
2296 cmd_buffer->batch.device = device;
2297 cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
2298 cmd_buffer->batch.user_data = cmd_buffer;
2299
2300 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2301 GEN8_MI_BATCH_BUFFER_START_length * 4);
2302
2303 result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo);
2304 if (result != VK_SUCCESS)
2305 goto fail_batch_relocs;
2306 cmd_buffer->surface_batch_bo->first_reloc = 0;
2307
2308 result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device);
2309 if (result != VK_SUCCESS)
2310 goto fail_ss_batch_bo;
2311
2312 /* Start surface_next at 1 so surface offset 0 is invalid. */
2313 cmd_buffer->surface_next = 1;
2314
2315 cmd_buffer->exec2_objects = NULL;
2316 cmd_buffer->exec2_bos = NULL;
2317 cmd_buffer->exec2_array_length = 0;
2318
2319 anv_state_stream_init(&cmd_buffer->surface_state_stream,
2320 &device->surface_state_block_pool);
2321 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
2322 &device->dynamic_state_block_pool);
2323
2324 cmd_buffer->dirty = 0;
2325 cmd_buffer->vb_dirty = 0;
2326 cmd_buffer->descriptors_dirty = 0;
2327 cmd_buffer->pipeline = NULL;
2328 cmd_buffer->vp_state = NULL;
2329 cmd_buffer->rs_state = NULL;
2330 cmd_buffer->ds_state = NULL;
2331
2332 *pCmdBuffer = (VkCmdBuffer) cmd_buffer;
2333
2334 return VK_SUCCESS;
2335
2336 fail_ss_batch_bo:
2337 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device);
2338 fail_batch_relocs:
2339 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2340 fail_batch_bo:
2341 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device);
2342 fail:
2343 anv_device_free(device, cmd_buffer);
2344
2345 return result;
2346 }
2347
2348 static void
2349 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
2350 {
2351 struct anv_device *device = cmd_buffer->device;
2352 struct anv_bo *scratch_bo = NULL;
2353
2354 cmd_buffer->scratch_size = device->scratch_block_pool.size;
2355 if (cmd_buffer->scratch_size > 0)
2356 scratch_bo = &device->scratch_block_pool.bo;
2357
2358 anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
2359 .GeneralStateBaseAddress = { scratch_bo, 0 },
2360 .GeneralStateMemoryObjectControlState = GEN8_MOCS,
2361 .GeneralStateBaseAddressModifyEnable = true,
2362 .GeneralStateBufferSize = 0xfffff,
2363 .GeneralStateBufferSizeModifyEnable = true,
2364
2365 .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 },
2366 .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
2367 .SurfaceStateBaseAddressModifyEnable = true,
2368
2369 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
2370 .DynamicStateMemoryObjectControlState = GEN8_MOCS,
2371 .DynamicStateBaseAddressModifyEnable = true,
2372 .DynamicStateBufferSize = 0xfffff,
2373 .DynamicStateBufferSizeModifyEnable = true,
2374
2375 .IndirectObjectBaseAddress = { NULL, 0 },
2376 .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
2377 .IndirectObjectBaseAddressModifyEnable = true,
2378 .IndirectObjectBufferSize = 0xfffff,
2379 .IndirectObjectBufferSizeModifyEnable = true,
2380
2381 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
2382 .InstructionMemoryObjectControlState = GEN8_MOCS,
2383 .InstructionBaseAddressModifyEnable = true,
2384 .InstructionBufferSize = 0xfffff,
2385 .InstructionBuffersizeModifyEnable = true);
2386 }
2387
2388 VkResult anv_BeginCommandBuffer(
2389 VkCmdBuffer cmdBuffer,
2390 const VkCmdBufferBeginInfo* pBeginInfo)
2391 {
2392 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2393
2394 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2395 cmd_buffer->current_pipeline = UINT32_MAX;
2396
2397 return VK_SUCCESS;
2398 }
2399
2400 static VkResult
2401 anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
2402 struct anv_bo *bo,
2403 struct drm_i915_gem_relocation_entry *relocs,
2404 size_t num_relocs)
2405 {
2406 struct drm_i915_gem_exec_object2 *obj;
2407
2408 if (bo->index < cmd_buffer->bo_count &&
2409 cmd_buffer->exec2_bos[bo->index] == bo)
2410 return VK_SUCCESS;
2411
2412 if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) {
2413 uint32_t new_len = cmd_buffer->exec2_objects ?
2414 cmd_buffer->exec2_array_length * 2 : 64;
2415
2416 struct drm_i915_gem_exec_object2 *new_objects =
2417 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects),
2418 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2419 if (new_objects == NULL)
2420 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2421
2422 struct anv_bo **new_bos =
2423 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos),
2424 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2425 if (new_objects == NULL) {
2426 anv_device_free(cmd_buffer->device, new_objects);
2427 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2428 }
2429
2430 if (cmd_buffer->exec2_objects) {
2431 memcpy(new_objects, cmd_buffer->exec2_objects,
2432 cmd_buffer->bo_count * sizeof(*new_objects));
2433 memcpy(new_bos, cmd_buffer->exec2_bos,
2434 cmd_buffer->bo_count * sizeof(*new_bos));
2435 }
2436
2437 cmd_buffer->exec2_objects = new_objects;
2438 cmd_buffer->exec2_bos = new_bos;
2439 cmd_buffer->exec2_array_length = new_len;
2440 }
2441
2442 assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length);
2443
2444 bo->index = cmd_buffer->bo_count++;
2445 obj = &cmd_buffer->exec2_objects[bo->index];
2446 cmd_buffer->exec2_bos[bo->index] = bo;
2447
2448 obj->handle = bo->gem_handle;
2449 obj->relocation_count = 0;
2450 obj->relocs_ptr = 0;
2451 obj->alignment = 0;
2452 obj->offset = bo->offset;
2453 obj->flags = 0;
2454 obj->rsvd1 = 0;
2455 obj->rsvd2 = 0;
2456
2457 if (relocs) {
2458 obj->relocation_count = num_relocs;
2459 obj->relocs_ptr = (uintptr_t) relocs;
2460 }
2461
2462 return VK_SUCCESS;
2463 }
2464
2465 static void
2466 anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer,
2467 struct anv_reloc_list *list)
2468 {
2469 for (size_t i = 0; i < list->num_relocs; i++)
2470 anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0);
2471 }
2472
2473 static void
2474 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
2475 struct anv_reloc_list *list)
2476 {
2477 struct anv_bo *bo;
2478
2479 /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
2480 * struct drm_i915_gem_exec_object2 against the bos current offset and if
2481 * all bos haven't moved it will skip relocation processing alltogether.
2482 * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
2483 * value of offset so we can set it either way. For that to work we need
2484 * to make sure all relocs use the same presumed offset.
2485 */
2486
2487 for (size_t i = 0; i < list->num_relocs; i++) {
2488 bo = list->reloc_bos[i];
2489 if (bo->offset != list->relocs[i].presumed_offset)
2490 cmd_buffer->need_reloc = true;
2491
2492 list->relocs[i].target_handle = bo->index;
2493 }
2494 }
2495
2496 VkResult anv_EndCommandBuffer(
2497 VkCmdBuffer cmdBuffer)
2498 {
2499 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2500 struct anv_device *device = cmd_buffer->device;
2501 struct anv_batch *batch = &cmd_buffer->batch;
2502
2503 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END);
2504
2505 /* Round batch up to an even number of dwords. */
2506 if ((batch->next - batch->start) & 4)
2507 anv_batch_emit(batch, GEN8_MI_NOOP);
2508
2509 anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch);
2510 cmd_buffer->surface_batch_bo->num_relocs =
2511 cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc;
2512 cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next;
2513
2514 cmd_buffer->bo_count = 0;
2515 cmd_buffer->need_reloc = false;
2516
2517 /* Lock for access to bo->index. */
2518 pthread_mutex_lock(&device->mutex);
2519
2520 /* Add surface state bos first so we can add them with their relocs. */
2521 for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo;
2522 bbo != NULL; bbo = bbo->prev_batch_bo) {
2523 anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo,
2524 &cmd_buffer->surface_relocs.relocs[bbo->first_reloc],
2525 bbo->num_relocs);
2526 }
2527
2528 /* Add all of the BOs referenced by surface state */
2529 anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs);
2530
2531 /* Add all but the first batch BO */
2532 struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo;
2533 while (batch_bo->prev_batch_bo) {
2534 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2535 &batch->relocs.relocs[batch_bo->first_reloc],
2536 batch_bo->num_relocs);
2537 batch_bo = batch_bo->prev_batch_bo;
2538 }
2539
2540 /* Add everything referenced by the batches */
2541 anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs);
2542
2543 /* Add the first batch bo last */
2544 assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0);
2545 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2546 &batch->relocs.relocs[batch_bo->first_reloc],
2547 batch_bo->num_relocs);
2548 assert(batch_bo->bo.index == cmd_buffer->bo_count - 1);
2549
2550 anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
2551 anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs);
2552
2553 cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects;
2554 cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count;
2555 cmd_buffer->execbuf.batch_start_offset = 0;
2556 cmd_buffer->execbuf.batch_len = batch->next - batch->start;
2557 cmd_buffer->execbuf.cliprects_ptr = 0;
2558 cmd_buffer->execbuf.num_cliprects = 0;
2559 cmd_buffer->execbuf.DR1 = 0;
2560 cmd_buffer->execbuf.DR4 = 0;
2561
2562 cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT;
2563 if (!cmd_buffer->need_reloc)
2564 cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC;
2565 cmd_buffer->execbuf.flags |= I915_EXEC_RENDER;
2566 cmd_buffer->execbuf.rsvd1 = device->context_id;
2567 cmd_buffer->execbuf.rsvd2 = 0;
2568
2569 pthread_mutex_unlock(&device->mutex);
2570
2571 return VK_SUCCESS;
2572 }
2573
2574 VkResult anv_ResetCommandBuffer(
2575 VkCmdBuffer cmdBuffer)
2576 {
2577 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2578
2579 /* Delete all but the first batch bo */
2580 while (cmd_buffer->last_batch_bo->prev_batch_bo) {
2581 struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo;
2582 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device);
2583 cmd_buffer->last_batch_bo = prev;
2584 }
2585 assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL);
2586
2587 cmd_buffer->batch.relocs.num_relocs = 0;
2588 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2589 GEN8_MI_BATCH_BUFFER_START_length * 4);
2590
2591 /* Delete all but the first batch bo */
2592 while (cmd_buffer->surface_batch_bo->prev_batch_bo) {
2593 struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo;
2594 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device);
2595 cmd_buffer->surface_batch_bo = prev;
2596 }
2597 assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL);
2598
2599 cmd_buffer->surface_next = 1;
2600 cmd_buffer->surface_relocs.num_relocs = 0;
2601
2602 cmd_buffer->rs_state = NULL;
2603 cmd_buffer->vp_state = NULL;
2604 cmd_buffer->cb_state = NULL;
2605 cmd_buffer->ds_state = NULL;
2606
2607 return VK_SUCCESS;
2608 }
2609
2610 // Command buffer building functions
2611
2612 void anv_CmdBindPipeline(
2613 VkCmdBuffer cmdBuffer,
2614 VkPipelineBindPoint pipelineBindPoint,
2615 VkPipeline _pipeline)
2616 {
2617 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2618 struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline;
2619
2620 switch (pipelineBindPoint) {
2621 case VK_PIPELINE_BIND_POINT_COMPUTE:
2622 cmd_buffer->compute_pipeline = pipeline;
2623 cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2624 break;
2625
2626 case VK_PIPELINE_BIND_POINT_GRAPHICS:
2627 cmd_buffer->pipeline = pipeline;
2628 cmd_buffer->vb_dirty |= pipeline->vb_used;
2629 cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2630 break;
2631
2632 default:
2633 assert(!"invalid bind point");
2634 break;
2635 }
2636 }
2637
2638 void anv_CmdBindDynamicStateObject(
2639 VkCmdBuffer cmdBuffer,
2640 VkStateBindPoint stateBindPoint,
2641 VkDynamicStateObject dynamicState)
2642 {
2643 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2644
2645 switch (stateBindPoint) {
2646 case VK_STATE_BIND_POINT_VIEWPORT:
2647 cmd_buffer->vp_state = (struct anv_dynamic_vp_state *) dynamicState;
2648 cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY;
2649 break;
2650 case VK_STATE_BIND_POINT_RASTER:
2651 cmd_buffer->rs_state = (struct anv_dynamic_rs_state *) dynamicState;
2652 cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY;
2653 break;
2654 case VK_STATE_BIND_POINT_COLOR_BLEND:
2655 cmd_buffer->cb_state = (struct anv_dynamic_cb_state *) dynamicState;
2656 cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY;
2657 break;
2658 case VK_STATE_BIND_POINT_DEPTH_STENCIL:
2659 cmd_buffer->ds_state = (struct anv_dynamic_ds_state *) dynamicState;
2660 cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY;
2661 break;
2662 default:
2663 break;
2664 };
2665 }
2666
2667 static struct anv_state
2668 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer,
2669 uint32_t size, uint32_t alignment)
2670 {
2671 struct anv_state state;
2672
2673 state.offset = align_u32(cmd_buffer->surface_next, alignment);
2674 if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size)
2675 return (struct anv_state) { 0 };
2676
2677 state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset;
2678 state.alloc_size = size;
2679 cmd_buffer->surface_next = state.offset + size;
2680
2681 assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size);
2682
2683 return state;
2684 }
2685
2686 static VkResult
2687 anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer)
2688 {
2689 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo;
2690
2691 /* Finish off the old buffer */
2692 old_bbo->num_relocs =
2693 cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc;
2694 old_bbo->length = cmd_buffer->surface_next;
2695
2696 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2697 if (result != VK_SUCCESS)
2698 return result;
2699
2700 new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs;
2701 cmd_buffer->surface_next = 1;
2702
2703 new_bbo->prev_batch_bo = old_bbo;
2704 cmd_buffer->surface_batch_bo = new_bbo;
2705
2706 /* Re-emit state base addresses so we get the new surface state base
2707 * address before we start emitting binding tables etc.
2708 */
2709 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2710
2711 /* It seems like just changing the state base addresses isn't enough.
2712 * Invalidating the cache seems to be enough to cause things to
2713 * propagate. However, I'm not 100% sure what we're supposed to do.
2714 */
2715 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
2716 .TextureCacheInvalidationEnable = true);
2717
2718 return VK_SUCCESS;
2719 }
2720
2721 void anv_CmdBindDescriptorSets(
2722 VkCmdBuffer cmdBuffer,
2723 VkPipelineBindPoint pipelineBindPoint,
2724 uint32_t firstSet,
2725 uint32_t setCount,
2726 const VkDescriptorSet* pDescriptorSets,
2727 uint32_t dynamicOffsetCount,
2728 const uint32_t* pDynamicOffsets)
2729 {
2730 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2731 struct anv_pipeline_layout *layout;
2732 struct anv_descriptor_set *set;
2733 struct anv_descriptor_set_layout *set_layout;
2734
2735 assert(firstSet + setCount < MAX_SETS);
2736
2737 if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
2738 layout = cmd_buffer->pipeline->layout;
2739 else
2740 layout = cmd_buffer->compute_pipeline->layout;
2741
2742 uint32_t dynamic_slot = 0;
2743 for (uint32_t i = 0; i < setCount; i++) {
2744 set = (struct anv_descriptor_set *) pDescriptorSets[i];
2745 set_layout = layout->set[firstSet + i].layout;
2746
2747 cmd_buffer->descriptors[firstSet + i].set = set;
2748
2749 assert(set_layout->num_dynamic_buffers <
2750 ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets));
2751 memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets,
2752 pDynamicOffsets + dynamic_slot,
2753 set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
2754
2755 cmd_buffer->descriptors_dirty |= set_layout->shader_stages;
2756
2757 dynamic_slot += set_layout->num_dynamic_buffers;
2758 }
2759 }
2760
2761 void anv_CmdBindIndexBuffer(
2762 VkCmdBuffer cmdBuffer,
2763 VkBuffer _buffer,
2764 VkDeviceSize offset,
2765 VkIndexType indexType)
2766 {
2767 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2768 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
2769
2770 static const uint32_t vk_to_gen_index_type[] = {
2771 [VK_INDEX_TYPE_UINT8] = INDEX_BYTE,
2772 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
2773 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
2774 };
2775
2776 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
2777 .IndexFormat = vk_to_gen_index_type[indexType],
2778 .MemoryObjectControlState = GEN8_MOCS,
2779 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
2780 .BufferSize = buffer->size - offset);
2781 }
2782
2783 void anv_CmdBindVertexBuffers(
2784 VkCmdBuffer cmdBuffer,
2785 uint32_t startBinding,
2786 uint32_t bindingCount,
2787 const VkBuffer* pBuffers,
2788 const VkDeviceSize* pOffsets)
2789 {
2790 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2791 struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings;
2792
2793 /* We have to defer setting up vertex buffer since we need the buffer
2794 * stride from the pipeline. */
2795
2796 assert(startBinding + bindingCount < MAX_VBS);
2797 for (uint32_t i = 0; i < bindingCount; i++) {
2798 vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i];
2799 vb[startBinding + i].offset = pOffsets[i];
2800 cmd_buffer->vb_dirty |= 1 << (startBinding + i);
2801 }
2802 }
2803
2804 static VkResult
2805 cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
2806 unsigned stage, struct anv_state *bt_state)
2807 {
2808 struct anv_pipeline_layout *layout;
2809 uint32_t color_attachments, bias, size;
2810
2811 if (stage == VK_SHADER_STAGE_COMPUTE)
2812 layout = cmd_buffer->compute_pipeline->layout;
2813 else
2814 layout = cmd_buffer->pipeline->layout;
2815
2816 if (stage == VK_SHADER_STAGE_FRAGMENT) {
2817 bias = MAX_RTS;
2818 color_attachments = cmd_buffer->framebuffer->color_attachment_count;
2819 } else {
2820 bias = 0;
2821 color_attachments = 0;
2822 }
2823
2824 /* This is a little awkward: layout can be NULL but we still have to
2825 * allocate and set a binding table for the PS stage for render
2826 * targets. */
2827 uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
2828
2829 if (color_attachments + surface_count == 0)
2830 return VK_SUCCESS;
2831
2832 size = (bias + surface_count) * sizeof(uint32_t);
2833 *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
2834 uint32_t *bt_map = bt_state->map;
2835
2836 if (bt_state->map == NULL)
2837 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2838
2839 for (uint32_t ca = 0; ca < color_attachments; ca++) {
2840 const struct anv_surface_view *view =
2841 cmd_buffer->framebuffer->color_attachments[ca];
2842
2843 struct anv_state state =
2844 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2845
2846 if (state.map == NULL)
2847 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2848
2849 memcpy(state.map, view->surface_state.map, 64);
2850
2851 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2852 *(uint64_t *)(state.map + 8 * 4) =
2853 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2854 cmd_buffer->device,
2855 state.offset + 8 * 4,
2856 view->bo, view->offset);
2857
2858 bt_map[ca] = state.offset;
2859 }
2860
2861 if (layout == NULL)
2862 return VK_SUCCESS;
2863
2864 for (uint32_t set = 0; set < layout->num_sets; set++) {
2865 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
2866 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2867 struct anv_descriptor_slot *surface_slots =
2868 set_layout->stage[stage].surface_start;
2869
2870 uint32_t start = bias + layout->set[set].surface_start[stage];
2871
2872 for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
2873 struct anv_surface_view *view =
2874 d->set->descriptors[surface_slots[b].index].view;
2875
2876 if (!view)
2877 continue;
2878
2879 struct anv_state state =
2880 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2881
2882 if (state.map == NULL)
2883 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2884
2885 uint32_t offset;
2886 if (surface_slots[b].dynamic_slot >= 0) {
2887 uint32_t dynamic_offset =
2888 d->dynamic_offsets[surface_slots[b].dynamic_slot];
2889
2890 offset = view->offset + dynamic_offset;
2891 fill_buffer_surface_state(state.map, view->format, offset,
2892 view->range - dynamic_offset);
2893 } else {
2894 offset = view->offset;
2895 memcpy(state.map, view->surface_state.map, 64);
2896 }
2897
2898 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2899 *(uint64_t *)(state.map + 8 * 4) =
2900 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2901 cmd_buffer->device,
2902 state.offset + 8 * 4,
2903 view->bo, offset);
2904
2905 bt_map[start + b] = state.offset;
2906 }
2907 }
2908
2909 return VK_SUCCESS;
2910 }
2911
2912 static VkResult
2913 cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
2914 unsigned stage, struct anv_state *state)
2915 {
2916 struct anv_pipeline_layout *layout;
2917 uint32_t sampler_count;
2918
2919 if (stage == VK_SHADER_STAGE_COMPUTE)
2920 layout = cmd_buffer->compute_pipeline->layout;
2921 else
2922 layout = cmd_buffer->pipeline->layout;
2923
2924 sampler_count = layout ? layout->stage[stage].sampler_count : 0;
2925 if (sampler_count == 0)
2926 return VK_SUCCESS;
2927
2928 uint32_t size = sampler_count * 16;
2929 *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
2930
2931 if (state->map == NULL)
2932 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2933
2934 for (uint32_t set = 0; set < layout->num_sets; set++) {
2935 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
2936 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2937 struct anv_descriptor_slot *sampler_slots =
2938 set_layout->stage[stage].sampler_start;
2939
2940 uint32_t start = layout->set[set].sampler_start[stage];
2941
2942 for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
2943 struct anv_sampler *sampler =
2944 d->set->descriptors[sampler_slots[b].index].sampler;
2945
2946 if (!sampler)
2947 continue;
2948
2949 memcpy(state->map + (start + b) * 16,
2950 sampler->state, sizeof(sampler->state));
2951 }
2952 }
2953
2954 return VK_SUCCESS;
2955 }
2956
2957 static VkResult
2958 flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
2959 {
2960 struct anv_state surfaces = { 0, }, samplers = { 0, };
2961 VkResult result;
2962
2963 result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
2964 if (result != VK_SUCCESS)
2965 return result;
2966 result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
2967 if (result != VK_SUCCESS)
2968 return result;
2969
2970 static const uint32_t sampler_state_opcodes[] = {
2971 [VK_SHADER_STAGE_VERTEX] = 43,
2972 [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
2973 [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
2974 [VK_SHADER_STAGE_GEOMETRY] = 46,
2975 [VK_SHADER_STAGE_FRAGMENT] = 47,
2976 [VK_SHADER_STAGE_COMPUTE] = 0,
2977 };
2978
2979 static const uint32_t binding_table_opcodes[] = {
2980 [VK_SHADER_STAGE_VERTEX] = 38,
2981 [VK_SHADER_STAGE_TESS_CONTROL] = 39,
2982 [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
2983 [VK_SHADER_STAGE_GEOMETRY] = 41,
2984 [VK_SHADER_STAGE_FRAGMENT] = 42,
2985 [VK_SHADER_STAGE_COMPUTE] = 0,
2986 };
2987
2988 if (samplers.alloc_size > 0) {
2989 anv_batch_emit(&cmd_buffer->batch,
2990 GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
2991 ._3DCommandSubOpcode = sampler_state_opcodes[stage],
2992 .PointertoVSSamplerState = samplers.offset);
2993 }
2994
2995 if (surfaces.alloc_size > 0) {
2996 anv_batch_emit(&cmd_buffer->batch,
2997 GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
2998 ._3DCommandSubOpcode = binding_table_opcodes[stage],
2999 .PointertoVSBindingTable = surfaces.offset);
3000 }
3001
3002 return VK_SUCCESS;
3003 }
3004
3005 static void
3006 flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
3007 {
3008 uint32_t s, dirty = cmd_buffer->descriptors_dirty &
3009 cmd_buffer->pipeline->active_stages;
3010
3011 VkResult result;
3012 for_each_bit(s, dirty) {
3013 result = flush_descriptor_set(cmd_buffer, s);
3014 if (result != VK_SUCCESS)
3015 break;
3016 }
3017
3018 if (result != VK_SUCCESS) {
3019 assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3020
3021 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3022 assert(result == VK_SUCCESS);
3023
3024 /* Re-emit all active binding tables */
3025 for_each_bit(s, cmd_buffer->pipeline->active_stages) {
3026 result = flush_descriptor_set(cmd_buffer, s);
3027
3028 /* It had better succeed this time */
3029 assert(result == VK_SUCCESS);
3030 }
3031 }
3032
3033 cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages;
3034 }
3035
3036 static struct anv_state
3037 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3038 uint32_t *a, uint32_t dwords, uint32_t alignment)
3039 {
3040 struct anv_state state;
3041
3042 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3043 dwords * 4, alignment);
3044 memcpy(state.map, a, dwords * 4);
3045
3046 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
3047
3048 return state;
3049 }
3050
3051 static struct anv_state
3052 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3053 uint32_t *a, uint32_t *b,
3054 uint32_t dwords, uint32_t alignment)
3055 {
3056 struct anv_state state;
3057 uint32_t *p;
3058
3059 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3060 dwords * 4, alignment);
3061 p = state.map;
3062 for (uint32_t i = 0; i < dwords; i++)
3063 p[i] = a[i] | b[i];
3064
3065 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
3066
3067 return state;
3068 }
3069
3070 static VkResult
3071 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
3072 {
3073 struct anv_device *device = cmd_buffer->device;
3074 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3075 struct anv_state surfaces = { 0, }, samplers = { 0, };
3076 VkResult result;
3077
3078 result = cmd_buffer_emit_samplers(cmd_buffer,
3079 VK_SHADER_STAGE_COMPUTE, &samplers);
3080 if (result != VK_SUCCESS)
3081 return result;
3082 result = cmd_buffer_emit_binding_table(cmd_buffer,
3083 VK_SHADER_STAGE_COMPUTE, &surfaces);
3084 if (result != VK_SUCCESS)
3085 return result;
3086
3087 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
3088 .KernelStartPointer = pipeline->cs_simd,
3089 .KernelStartPointerHigh = 0,
3090 .BindingTablePointer = surfaces.offset,
3091 .BindingTableEntryCount = 0,
3092 .SamplerStatePointer = samplers.offset,
3093 .SamplerCount = 0,
3094 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
3095 };
3096
3097 uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
3098 struct anv_state state =
3099 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
3100
3101 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
3102
3103 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
3104 .InterfaceDescriptorTotalLength = size,
3105 .InterfaceDescriptorDataStartAddress = state.offset);
3106
3107 return VK_SUCCESS;
3108 }
3109
3110 static void
3111 anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
3112 {
3113 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3114 VkResult result;
3115
3116 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
3117
3118 if (cmd_buffer->current_pipeline != GPGPU) {
3119 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3120 .PipelineSelection = GPGPU);
3121 cmd_buffer->current_pipeline = GPGPU;
3122 }
3123
3124 if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
3125 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3126
3127 if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
3128 (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
3129 result = flush_compute_descriptor_set(cmd_buffer);
3130 if (result != VK_SUCCESS) {
3131 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3132 assert(result == VK_SUCCESS);
3133 result = flush_compute_descriptor_set(cmd_buffer);
3134 assert(result == VK_SUCCESS);
3135 }
3136 cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
3137 }
3138
3139 cmd_buffer->compute_dirty = 0;
3140 }
3141
3142 static void
3143 anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
3144 {
3145 struct anv_pipeline *pipeline = cmd_buffer->pipeline;
3146 uint32_t *p;
3147
3148 uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used;
3149
3150 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
3151
3152 if (cmd_buffer->current_pipeline != _3D) {
3153 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3154 .PipelineSelection = _3D);
3155 cmd_buffer->current_pipeline = _3D;
3156 }
3157
3158 if (vb_emit) {
3159 const uint32_t num_buffers = __builtin_popcount(vb_emit);
3160 const uint32_t num_dwords = 1 + num_buffers * 4;
3161
3162 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
3163 GEN8_3DSTATE_VERTEX_BUFFERS);
3164 uint32_t vb, i = 0;
3165 for_each_bit(vb, vb_emit) {
3166 struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
3167 uint32_t offset = cmd_buffer->vertex_bindings[vb].offset;
3168
3169 struct GEN8_VERTEX_BUFFER_STATE state = {
3170 .VertexBufferIndex = vb,
3171 .MemoryObjectControlState = GEN8_MOCS,
3172 .AddressModifyEnable = true,
3173 .BufferPitch = pipeline->binding_stride[vb],
3174 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
3175 .BufferSize = buffer->size - offset
3176 };
3177
3178 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
3179 i++;
3180 }
3181 }
3182
3183 if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
3184 /* If somebody compiled a pipeline after starting a command buffer the
3185 * scratch bo may have grown since we started this cmd buffer (and
3186 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
3187 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
3188 if (cmd_buffer->scratch_size < pipeline->total_scratch)
3189 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
3190
3191 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3192 }
3193
3194 if (cmd_buffer->descriptors_dirty)
3195 flush_descriptor_sets(cmd_buffer);
3196
3197 if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) {
3198 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
3199 .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset);
3200 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
3201 .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset);
3202 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
3203 .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset);
3204 }
3205
3206 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) {
3207 anv_batch_emit_merge(&cmd_buffer->batch,
3208 cmd_buffer->rs_state->state_sf, pipeline->state_sf);
3209 anv_batch_emit_merge(&cmd_buffer->batch,
3210 cmd_buffer->rs_state->state_raster, pipeline->state_raster);
3211 }
3212
3213 if (cmd_buffer->ds_state &&
3214 (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)))
3215 anv_batch_emit_merge(&cmd_buffer->batch,
3216 cmd_buffer->ds_state->state_wm_depth_stencil,
3217 pipeline->state_wm_depth_stencil);
3218
3219 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) {
3220 struct anv_state state;
3221 if (cmd_buffer->ds_state == NULL)
3222 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3223 cmd_buffer->cb_state->state_color_calc,
3224 GEN8_COLOR_CALC_STATE_length, 64);
3225 else if (cmd_buffer->cb_state == NULL)
3226 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3227 cmd_buffer->ds_state->state_color_calc,
3228 GEN8_COLOR_CALC_STATE_length, 64);
3229 else
3230 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
3231 cmd_buffer->ds_state->state_color_calc,
3232 cmd_buffer->cb_state->state_color_calc,
3233 GEN8_COLOR_CALC_STATE_length, 64);
3234
3235 anv_batch_emit(&cmd_buffer->batch,
3236 GEN8_3DSTATE_CC_STATE_POINTERS,
3237 .ColorCalcStatePointer = state.offset,
3238 .ColorCalcStatePointerValid = true);
3239 }
3240
3241 cmd_buffer->vb_dirty &= ~vb_emit;
3242 cmd_buffer->dirty = 0;
3243 }
3244
3245 void anv_CmdDraw(
3246 VkCmdBuffer cmdBuffer,
3247 uint32_t firstVertex,
3248 uint32_t vertexCount,
3249 uint32_t firstInstance,
3250 uint32_t instanceCount)
3251 {
3252 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3253
3254 anv_cmd_buffer_flush_state(cmd_buffer);
3255
3256 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3257 .VertexAccessType = SEQUENTIAL,
3258 .VertexCountPerInstance = vertexCount,
3259 .StartVertexLocation = firstVertex,
3260 .InstanceCount = instanceCount,
3261 .StartInstanceLocation = firstInstance,
3262 .BaseVertexLocation = 0);
3263 }
3264
3265 void anv_CmdDrawIndexed(
3266 VkCmdBuffer cmdBuffer,
3267 uint32_t firstIndex,
3268 uint32_t indexCount,
3269 int32_t vertexOffset,
3270 uint32_t firstInstance,
3271 uint32_t instanceCount)
3272 {
3273 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3274
3275 anv_cmd_buffer_flush_state(cmd_buffer);
3276
3277 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3278 .VertexAccessType = RANDOM,
3279 .VertexCountPerInstance = indexCount,
3280 .StartVertexLocation = firstIndex,
3281 .InstanceCount = instanceCount,
3282 .StartInstanceLocation = firstInstance,
3283 .BaseVertexLocation = vertexOffset);
3284 }
3285
3286 static void
3287 anv_batch_lrm(struct anv_batch *batch,
3288 uint32_t reg, struct anv_bo *bo, uint32_t offset)
3289 {
3290 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
3291 .RegisterAddress = reg,
3292 .MemoryAddress = { bo, offset });
3293 }
3294
3295 static void
3296 anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
3297 {
3298 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
3299 .RegisterOffset = reg,
3300 .DataDWord = imm);
3301 }
3302
3303 /* Auto-Draw / Indirect Registers */
3304 #define GEN7_3DPRIM_END_OFFSET 0x2420
3305 #define GEN7_3DPRIM_START_VERTEX 0x2430
3306 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
3307 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
3308 #define GEN7_3DPRIM_START_INSTANCE 0x243C
3309 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
3310
3311 void anv_CmdDrawIndirect(
3312 VkCmdBuffer cmdBuffer,
3313 VkBuffer _buffer,
3314 VkDeviceSize offset,
3315 uint32_t count,
3316 uint32_t stride)
3317 {
3318 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3319 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
3320 struct anv_bo *bo = buffer->bo;
3321 uint32_t bo_offset = buffer->offset + offset;
3322
3323 anv_cmd_buffer_flush_state(cmd_buffer);
3324
3325 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3326 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3327 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3328 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
3329 anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
3330
3331 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3332 .IndirectParameterEnable = true,
3333 .VertexAccessType = SEQUENTIAL);
3334 }
3335
3336 void anv_CmdDrawIndexedIndirect(
3337 VkCmdBuffer cmdBuffer,
3338 VkBuffer _buffer,
3339 VkDeviceSize offset,
3340 uint32_t count,
3341 uint32_t stride)
3342 {
3343 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3344 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
3345 struct anv_bo *bo = buffer->bo;
3346 uint32_t bo_offset = buffer->offset + offset;
3347
3348 anv_cmd_buffer_flush_state(cmd_buffer);
3349
3350 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3351 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3352 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3353 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
3354 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
3355
3356 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3357 .IndirectParameterEnable = true,
3358 .VertexAccessType = RANDOM);
3359 }
3360
3361 void anv_CmdDispatch(
3362 VkCmdBuffer cmdBuffer,
3363 uint32_t x,
3364 uint32_t y,
3365 uint32_t z)
3366 {
3367 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3368 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3369 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
3370
3371 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3372
3373 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3374 .SIMDSize = prog_data->simd_size / 16,
3375 .ThreadDepthCounterMaximum = 0,
3376 .ThreadHeightCounterMaximum = 0,
3377 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
3378 .ThreadGroupIDXDimension = x,
3379 .ThreadGroupIDYDimension = y,
3380 .ThreadGroupIDZDimension = z,
3381 .RightExecutionMask = pipeline->cs_right_mask,
3382 .BottomExecutionMask = 0xffffffff);
3383
3384 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3385 }
3386
3387 #define GPGPU_DISPATCHDIMX 0x2500
3388 #define GPGPU_DISPATCHDIMY 0x2504
3389 #define GPGPU_DISPATCHDIMZ 0x2508
3390
3391 void anv_CmdDispatchIndirect(
3392 VkCmdBuffer cmdBuffer,
3393 VkBuffer _buffer,
3394 VkDeviceSize offset)
3395 {
3396 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3397 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3398 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
3399 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
3400 struct anv_bo *bo = buffer->bo;
3401 uint32_t bo_offset = buffer->offset + offset;
3402
3403 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3404
3405 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
3406 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
3407 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
3408
3409 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3410 .IndirectParameterEnable = true,
3411 .SIMDSize = prog_data->simd_size / 16,
3412 .ThreadDepthCounterMaximum = 0,
3413 .ThreadHeightCounterMaximum = 0,
3414 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
3415 .RightExecutionMask = pipeline->cs_right_mask,
3416 .BottomExecutionMask = 0xffffffff);
3417
3418 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3419 }
3420
3421 void anv_CmdSetEvent(
3422 VkCmdBuffer cmdBuffer,
3423 VkEvent event,
3424 VkPipeEvent pipeEvent)
3425 {
3426 stub();
3427 }
3428
3429 void anv_CmdResetEvent(
3430 VkCmdBuffer cmdBuffer,
3431 VkEvent event,
3432 VkPipeEvent pipeEvent)
3433 {
3434 stub();
3435 }
3436
3437 void anv_CmdWaitEvents(
3438 VkCmdBuffer cmdBuffer,
3439 VkWaitEvent waitEvent,
3440 uint32_t eventCount,
3441 const VkEvent* pEvents,
3442 uint32_t memBarrierCount,
3443 const void** ppMemBarriers)
3444 {
3445 stub();
3446 }
3447
3448 void anv_CmdPipelineBarrier(
3449 VkCmdBuffer cmdBuffer,
3450 VkWaitEvent waitEvent,
3451 uint32_t pipeEventCount,
3452 const VkPipeEvent* pPipeEvents,
3453 uint32_t memBarrierCount,
3454 const void** ppMemBarriers)
3455 {
3456 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer;
3457 uint32_t b, *dw;
3458
3459 struct GEN8_PIPE_CONTROL cmd = {
3460 GEN8_PIPE_CONTROL_header,
3461 .PostSyncOperation = NoWrite,
3462 };
3463
3464 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
3465
3466 for (uint32_t i = 0; i < pipeEventCount; i++) {
3467 switch (pPipeEvents[i]) {
3468 case VK_PIPE_EVENT_TOP_OF_PIPE:
3469 /* This is just what PIPE_CONTROL does */
3470 break;
3471 case VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE:
3472 case VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE:
3473 case VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE:
3474 cmd.StallAtPixelScoreboard = true;
3475 break;
3476 case VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE:
3477 case VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE:
3478 case VK_PIPE_EVENT_TRANSFER_COMPLETE:
3479 case VK_PIPE_EVENT_COMMANDS_COMPLETE:
3480 cmd.CommandStreamerStallEnable = true;
3481 break;
3482 default:
3483 unreachable("Invalid VkPipeEvent");
3484 }
3485 }
3486
3487 /* XXX: Right now, we're really dumb and just flush whatever categories
3488 * the app asks for. One of these days we may make this a bit better
3489 * but right now that's all the hardware allows for in most areas.
3490 */
3491 VkMemoryOutputFlags out_flags = 0;
3492 VkMemoryInputFlags in_flags = 0;
3493
3494 for (uint32_t i = 0; i < memBarrierCount; i++) {
3495 const struct anv_common *common = ppMemBarriers[i];
3496 switch (common->sType) {
3497 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
3498 const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common;
3499 out_flags |= barrier->outputMask;
3500 in_flags |= barrier->inputMask;
3501 break;
3502 }
3503 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
3504 const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common;
3505 out_flags |= barrier->outputMask;
3506 in_flags |= barrier->inputMask;
3507 break;
3508 }
3509 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
3510 const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common;
3511 out_flags |= barrier->outputMask;
3512 in_flags |= barrier->inputMask;
3513 break;
3514 }
3515 default:
3516 unreachable("Invalid memory barrier type");
3517 }
3518 }
3519
3520 for_each_bit(b, out_flags) {
3521 switch ((VkMemoryOutputFlags)(1 << b)) {
3522 case VK_MEMORY_OUTPUT_CPU_WRITE_BIT:
3523 break; /* FIXME: Little-core systems */
3524 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
3525 cmd.DCFlushEnable = true;
3526 break;
3527 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
3528 cmd.RenderTargetCacheFlushEnable = true;
3529 break;
3530 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3531 cmd.DepthCacheFlushEnable = true;
3532 break;
3533 case VK_MEMORY_OUTPUT_TRANSFER_BIT:
3534 cmd.RenderTargetCacheFlushEnable = true;
3535 cmd.DepthCacheFlushEnable = true;
3536 break;
3537 default:
3538 unreachable("Invalid memory output flag");
3539 }
3540 }
3541
3542 for_each_bit(b, out_flags) {
3543 switch ((VkMemoryInputFlags)(1 << b)) {
3544 case VK_MEMORY_INPUT_CPU_READ_BIT:
3545 break; /* FIXME: Little-core systems */
3546 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
3547 case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
3548 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
3549 cmd.VFCacheInvalidationEnable = true;
3550 break;
3551 case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
3552 cmd.ConstantCacheInvalidationEnable = true;
3553 /* fallthrough */
3554 case VK_MEMORY_INPUT_SHADER_READ_BIT:
3555 cmd.DCFlushEnable = true;
3556 cmd.TextureCacheInvalidationEnable = true;
3557 break;
3558 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
3559 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3560 break; /* XXX: Hunh? */
3561 case VK_MEMORY_INPUT_TRANSFER_BIT:
3562 cmd.TextureCacheInvalidationEnable = true;
3563 break;
3564 }
3565 }
3566
3567 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
3568 GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
3569 }
3570
3571 static void
3572 anv_framebuffer_destroy(struct anv_device *device,
3573 struct anv_object *object,
3574 VkObjectType obj_type)
3575 {
3576 struct anv_framebuffer *fb = (struct anv_framebuffer *)object;
3577
3578 assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER);
3579
3580 anv_DestroyObject((VkDevice) device,
3581 VK_OBJECT_TYPE_DYNAMIC_VP_STATE,
3582 fb->vp_state);
3583
3584 anv_device_free(device, fb);
3585 }
3586
3587 VkResult anv_CreateFramebuffer(
3588 VkDevice _device,
3589 const VkFramebufferCreateInfo* pCreateInfo,
3590 VkFramebuffer* pFramebuffer)
3591 {
3592 struct anv_device *device = (struct anv_device *) _device;
3593 struct anv_framebuffer *framebuffer;
3594
3595 static const struct anv_depth_stencil_view null_view =
3596 { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 };
3597
3598 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3599
3600 framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8,
3601 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3602 if (framebuffer == NULL)
3603 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3604
3605 framebuffer->base.destructor = anv_framebuffer_destroy;
3606
3607 framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount;
3608 for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) {
3609 framebuffer->color_attachments[i] =
3610 (struct anv_surface_view *) pCreateInfo->pColorAttachments[i].view;
3611 }
3612
3613 if (pCreateInfo->pDepthStencilAttachment) {
3614 framebuffer->depth_stencil =
3615 (struct anv_depth_stencil_view *) pCreateInfo->pDepthStencilAttachment->view;
3616 } else {
3617 framebuffer->depth_stencil = &null_view;
3618 }
3619
3620 framebuffer->sample_count = pCreateInfo->sampleCount;
3621 framebuffer->width = pCreateInfo->width;
3622 framebuffer->height = pCreateInfo->height;
3623 framebuffer->layers = pCreateInfo->layers;
3624
3625 anv_CreateDynamicViewportState((VkDevice) device,
3626 &(VkDynamicVpStateCreateInfo) {
3627 .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO,
3628 .viewportAndScissorCount = 1,
3629 .pViewports = (VkViewport[]) {
3630 {
3631 .originX = 0,
3632 .originY = 0,
3633 .width = pCreateInfo->width,
3634 .height = pCreateInfo->height,
3635 .minDepth = 0,
3636 .maxDepth = 1
3637 },
3638 },
3639 .pScissors = (VkRect[]) {
3640 { { 0, 0 },
3641 { pCreateInfo->width, pCreateInfo->height } },
3642 }
3643 },
3644 &framebuffer->vp_state);
3645
3646 *pFramebuffer = (VkFramebuffer) framebuffer;
3647
3648 return VK_SUCCESS;
3649 }
3650
3651 VkResult anv_CreateRenderPass(
3652 VkDevice _device,
3653 const VkRenderPassCreateInfo* pCreateInfo,
3654 VkRenderPass* pRenderPass)
3655 {
3656 struct anv_device *device = (struct anv_device *) _device;
3657 struct anv_render_pass *pass;
3658 size_t size;
3659
3660 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
3661
3662 size = sizeof(*pass) +
3663 pCreateInfo->layers * sizeof(struct anv_render_pass_layer);
3664 pass = anv_device_alloc(device, size, 8,
3665 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3666 if (pass == NULL)
3667 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3668
3669 pass->render_area = pCreateInfo->renderArea;
3670
3671 pass->num_layers = pCreateInfo->layers;
3672
3673 pass->num_clear_layers = 0;
3674 for (uint32_t i = 0; i < pCreateInfo->layers; i++) {
3675 pass->layers[i].color_load_op = pCreateInfo->pColorLoadOps[i];
3676 pass->layers[i].clear_color = pCreateInfo->pColorLoadClearValues[i];
3677 if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
3678 pass->num_clear_layers++;
3679 }
3680
3681 *pRenderPass = (VkRenderPass) pass;
3682
3683 return VK_SUCCESS;
3684 }
3685
3686 static void
3687 anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3688 struct anv_render_pass *pass)
3689 {
3690 const struct anv_depth_stencil_view *view =
3691 cmd_buffer->framebuffer->depth_stencil;
3692
3693 /* FIXME: Implement the PMA stall W/A */
3694 /* FIXME: Width and Height are wrong */
3695
3696 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
3697 .SurfaceType = SURFTYPE_2D,
3698 .DepthWriteEnable = view->depth_stride > 0,
3699 .StencilWriteEnable = view->stencil_stride > 0,
3700 .HierarchicalDepthBufferEnable = false,
3701 .SurfaceFormat = view->depth_format,
3702 .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
3703 .SurfaceBaseAddress = { view->bo, view->depth_offset },
3704 .Height = pass->render_area.extent.height - 1,
3705 .Width = pass->render_area.extent.width - 1,
3706 .LOD = 0,
3707 .Depth = 1 - 1,
3708 .MinimumArrayElement = 0,
3709 .DepthBufferObjectControlState = GEN8_MOCS,
3710 .RenderTargetViewExtent = 1 - 1,
3711 .SurfaceQPitch = view->depth_qpitch >> 2);
3712
3713 /* Disable hierarchial depth buffers. */
3714 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
3715
3716 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
3717 .StencilBufferEnable = view->stencil_stride > 0,
3718 .StencilBufferObjectControlState = GEN8_MOCS,
3719 .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
3720 .SurfaceBaseAddress = { view->bo, view->stencil_offset },
3721 .SurfaceQPitch = view->stencil_qpitch >> 2);
3722
3723 /* Clear the clear params. */
3724 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
3725 }
3726
3727 void anv_CmdBeginRenderPass(
3728 VkCmdBuffer cmdBuffer,
3729 const VkRenderPassBegin* pRenderPassBegin)
3730 {
3731 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3732 struct anv_render_pass *pass = (struct anv_render_pass *) pRenderPassBegin->renderPass;
3733 struct anv_framebuffer *framebuffer =
3734 (struct anv_framebuffer *) pRenderPassBegin->framebuffer;
3735
3736 cmd_buffer->framebuffer = framebuffer;
3737
3738 cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
3739
3740 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
3741 .ClippedDrawingRectangleYMin = pass->render_area.offset.y,
3742 .ClippedDrawingRectangleXMin = pass->render_area.offset.x,
3743 .ClippedDrawingRectangleYMax =
3744 pass->render_area.offset.y + pass->render_area.extent.height - 1,
3745 .ClippedDrawingRectangleXMax =
3746 pass->render_area.offset.x + pass->render_area.extent.width - 1,
3747 .DrawingRectangleOriginY = 0,
3748 .DrawingRectangleOriginX = 0);
3749
3750 anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass);
3751
3752 anv_cmd_buffer_clear(cmd_buffer, pass);
3753 }
3754
3755 void anv_CmdEndRenderPass(
3756 VkCmdBuffer cmdBuffer,
3757 VkRenderPass renderPass)
3758 {
3759 /* Emit a flushing pipe control at the end of a pass. This is kind of a
3760 * hack but it ensures that render targets always actually get written.
3761 * Eventually, we should do flushing based on image format transitions
3762 * or something of that nature.
3763 */
3764 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer;
3765 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
3766 .PostSyncOperation = NoWrite,
3767 .RenderTargetCacheFlushEnable = true,
3768 .InstructionCacheInvalidateEnable = true,
3769 .DepthCacheFlushEnable = true,
3770 .VFCacheInvalidationEnable = true,
3771 .TextureCacheInvalidationEnable = true,
3772 .CommandStreamerStallEnable = true);
3773 }
3774
3775 void vkCmdDbgMarkerBegin(
3776 VkCmdBuffer cmdBuffer,
3777 const char* pMarker)
3778 __attribute__ ((visibility ("default")));
3779
3780 void vkCmdDbgMarkerEnd(
3781 VkCmdBuffer cmdBuffer)
3782 __attribute__ ((visibility ("default")));
3783
3784 VkResult vkDbgSetObjectTag(
3785 VkDevice device,
3786 VkObject object,
3787 size_t tagSize,
3788 const void* pTag)
3789 __attribute__ ((visibility ("default")));
3790
3791
3792 void vkCmdDbgMarkerBegin(
3793 VkCmdBuffer cmdBuffer,
3794 const char* pMarker)
3795 {
3796 }
3797
3798 void vkCmdDbgMarkerEnd(
3799 VkCmdBuffer cmdBuffer)
3800 {
3801 }
3802
3803 VkResult vkDbgSetObjectTag(
3804 VkDevice device,
3805 VkObject object,
3806 size_t tagSize,
3807 const void* pTag)
3808 {
3809 return VK_SUCCESS;
3810 }