vk: Implement scratch buffers to make spilling work
[mesa.git] / src / vulkan / device.c
1 /*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 #include <assert.h>
25 #include <stdbool.h>
26 #include <string.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29
30 #include "private.h"
31
32 static int
33 anv_env_get_int(const char *name)
34 {
35 const char *val = getenv(name);
36
37 if (!val)
38 return 0;
39
40 return strtol(val, NULL, 0);
41 }
42
43 static VkResult
44 fill_physical_device(struct anv_physical_device *device,
45 struct anv_instance *instance,
46 const char *path)
47 {
48 int fd;
49
50 fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC);
51 if (fd < 0)
52 return vk_error(VK_ERROR_UNAVAILABLE);
53
54 device->instance = instance;
55 device->path = path;
56
57 device->chipset_id = anv_env_get_int("INTEL_DEVID_OVERRIDE");
58 device->no_hw = false;
59 if (device->chipset_id) {
60 /* INTEL_DEVID_OVERRIDE implies INTEL_NO_HW. */
61 device->no_hw = true;
62 } else {
63 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
64 }
65 if (!device->chipset_id)
66 goto fail;
67
68 device->name = brw_get_device_name(device->chipset_id);
69 device->info = brw_get_device_info(device->chipset_id, -1);
70 if (!device->info)
71 goto fail;
72
73 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT))
74 goto fail;
75
76 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2))
77 goto fail;
78
79 if (!anv_gem_get_param(fd, I915_PARAM_HAS_LLC))
80 goto fail;
81
82 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXEC_CONSTANTS))
83 goto fail;
84
85 close(fd);
86
87 return VK_SUCCESS;
88
89 fail:
90 close(fd);
91
92 return vk_error(VK_ERROR_UNAVAILABLE);
93 }
94
95 static void *default_alloc(
96 void* pUserData,
97 size_t size,
98 size_t alignment,
99 VkSystemAllocType allocType)
100 {
101 return malloc(size);
102 }
103
104 static void default_free(
105 void* pUserData,
106 void* pMem)
107 {
108 free(pMem);
109 }
110
111 static const VkAllocCallbacks default_alloc_callbacks = {
112 .pUserData = NULL,
113 .pfnAlloc = default_alloc,
114 .pfnFree = default_free
115 };
116
117 VkResult anv_CreateInstance(
118 const VkInstanceCreateInfo* pCreateInfo,
119 VkInstance* pInstance)
120 {
121 struct anv_instance *instance;
122 const VkAllocCallbacks *alloc_callbacks = &default_alloc_callbacks;
123 void *user_data = NULL;
124 VkResult result;
125
126 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
127
128 if (pCreateInfo->pAllocCb) {
129 alloc_callbacks = pCreateInfo->pAllocCb;
130 user_data = pCreateInfo->pAllocCb->pUserData;
131 }
132 instance = alloc_callbacks->pfnAlloc(user_data, sizeof(*instance), 8,
133 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
134 if (!instance)
135 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
136
137 instance->pAllocUserData = alloc_callbacks->pUserData;
138 instance->pfnAlloc = alloc_callbacks->pfnAlloc;
139 instance->pfnFree = alloc_callbacks->pfnFree;
140 instance->apiVersion = pCreateInfo->pAppInfo->apiVersion;
141
142 instance->physicalDeviceCount = 0;
143 result = fill_physical_device(&instance->physicalDevice,
144 instance, "/dev/dri/renderD128");
145
146 if (result != VK_SUCCESS)
147 return result;
148
149 instance->physicalDeviceCount++;
150 *pInstance = (VkInstance) instance;
151
152 return VK_SUCCESS;
153 }
154
155 VkResult anv_DestroyInstance(
156 VkInstance _instance)
157 {
158 struct anv_instance *instance = (struct anv_instance *) _instance;
159
160 instance->pfnFree(instance->pAllocUserData, instance);
161
162 return VK_SUCCESS;
163 }
164
165 VkResult anv_EnumeratePhysicalDevices(
166 VkInstance _instance,
167 uint32_t* pPhysicalDeviceCount,
168 VkPhysicalDevice* pPhysicalDevices)
169 {
170 struct anv_instance *instance = (struct anv_instance *) _instance;
171
172 if (*pPhysicalDeviceCount >= 1)
173 pPhysicalDevices[0] = (VkPhysicalDevice) &instance->physicalDevice;
174 *pPhysicalDeviceCount = instance->physicalDeviceCount;
175
176 return VK_SUCCESS;
177 }
178
179 VkResult anv_GetPhysicalDeviceInfo(
180 VkPhysicalDevice physicalDevice,
181 VkPhysicalDeviceInfoType infoType,
182 size_t* pDataSize,
183 void* pData)
184 {
185 struct anv_physical_device *device = (struct anv_physical_device *) physicalDevice;
186 VkPhysicalDeviceProperties *properties;
187 VkPhysicalDevicePerformance *performance;
188 VkPhysicalDeviceQueueProperties *queue_properties;
189 VkPhysicalDeviceMemoryProperties *memory_properties;
190 VkDisplayPropertiesWSI *display_properties;
191 uint64_t ns_per_tick = 80;
192
193 switch ((uint32_t) infoType) {
194 case VK_PHYSICAL_DEVICE_INFO_TYPE_PROPERTIES:
195 properties = pData;
196
197 *pDataSize = sizeof(*properties);
198 if (pData == NULL)
199 return VK_SUCCESS;
200
201 properties->apiVersion = 1;
202 properties->driverVersion = 1;
203 properties->vendorId = 0x8086;
204 properties->deviceId = device->chipset_id;
205 properties->deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
206 strcpy(properties->deviceName, device->name);
207 properties->maxInlineMemoryUpdateSize = 0;
208 properties->maxBoundDescriptorSets = MAX_SETS;
209 properties->maxThreadGroupSize = 512;
210 properties->timestampFrequency = 1000 * 1000 * 1000 / ns_per_tick;
211 properties->multiColorAttachmentClears = true;
212 properties->maxDescriptorSets = 8;
213 properties->maxViewports = 16;
214 properties->maxColorAttachments = 8;
215 return VK_SUCCESS;
216
217 case VK_PHYSICAL_DEVICE_INFO_TYPE_PERFORMANCE:
218 performance = pData;
219
220 *pDataSize = sizeof(*performance);
221 if (pData == NULL)
222 return VK_SUCCESS;
223
224 performance->maxDeviceClock = 1.0;
225 performance->aluPerClock = 1.0;
226 performance->texPerClock = 1.0;
227 performance->primsPerClock = 1.0;
228 performance->pixelsPerClock = 1.0;
229 return VK_SUCCESS;
230
231 case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PROPERTIES:
232 queue_properties = pData;
233
234 *pDataSize = sizeof(*queue_properties);
235 if (pData == NULL)
236 return VK_SUCCESS;
237
238 queue_properties->queueFlags = 0;
239 queue_properties->queueCount = 1;
240 queue_properties->maxAtomicCounters = 0;
241 queue_properties->supportsTimestamps = true;
242 queue_properties->maxMemReferences = 256;
243 return VK_SUCCESS;
244
245 case VK_PHYSICAL_DEVICE_INFO_TYPE_MEMORY_PROPERTIES:
246 memory_properties = pData;
247
248 *pDataSize = sizeof(*memory_properties);
249 if (pData == NULL)
250 return VK_SUCCESS;
251
252 memory_properties->supportsMigration = false;
253 memory_properties->supportsPinning = false;
254 return VK_SUCCESS;
255
256 case VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI:
257 anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_DISPLAY_PROPERTIES_WSI");
258
259 *pDataSize = sizeof(*display_properties);
260 if (pData == NULL)
261 return VK_SUCCESS;
262
263 display_properties = pData;
264 display_properties->display = 0;
265 display_properties->physicalResolution = (VkExtent2D) { 0, 0 };
266 return VK_SUCCESS;
267
268 case VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI:
269 anv_finishme("VK_PHYSICAL_DEVICE_INFO_TYPE_QUEUE_PRESENT_PROPERTIES_WSI");
270 return VK_SUCCESS;
271
272
273 default:
274 return VK_UNSUPPORTED;
275 }
276
277 }
278
279 void * vkGetProcAddr(
280 VkPhysicalDevice physicalDevice,
281 const char* pName)
282 {
283 return anv_lookup_entrypoint(pName);
284 }
285
286 static void
287 parse_debug_flags(struct anv_device *device)
288 {
289 const char *debug, *p, *end;
290
291 debug = getenv("INTEL_DEBUG");
292 device->dump_aub = false;
293 if (debug) {
294 for (p = debug; *p; p = end + 1) {
295 end = strchrnul(p, ',');
296 if (end - p == 3 && memcmp(p, "aub", 3) == 0)
297 device->dump_aub = true;
298 if (end - p == 5 && memcmp(p, "no_hw", 5) == 0)
299 device->no_hw = true;
300 if (*end == '\0')
301 break;
302 }
303 }
304 }
305
306 static VkResult
307 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
308 {
309 queue->device = device;
310 queue->pool = &device->surface_state_pool;
311
312 queue->completed_serial = anv_state_pool_alloc(queue->pool, 4, 4);
313 if (queue->completed_serial.map == NULL)
314 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
315
316 *(uint32_t *)queue->completed_serial.map = 0;
317 queue->next_serial = 1;
318
319 return VK_SUCCESS;
320 }
321
322 static void
323 anv_queue_finish(struct anv_queue *queue)
324 {
325 #ifdef HAVE_VALGRIND
326 /* This gets torn down with the device so we only need to do this if
327 * valgrind is present.
328 */
329 anv_state_pool_free(queue->pool, queue->completed_serial);
330 #endif
331 }
332
333 static void
334 anv_device_init_border_colors(struct anv_device *device)
335 {
336 float float_border_colors[][4] = {
337 [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1.0, 1.0, 1.0, 1.0 },
338 [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0.0, 0.0, 0.0, 0.0 },
339 [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0.0, 0.0, 0.0, 1.0 }
340 };
341
342 uint32_t uint32_border_colors[][4] = {
343 [VK_BORDER_COLOR_OPAQUE_WHITE] = { 1, 1, 1, 1 },
344 [VK_BORDER_COLOR_TRANSPARENT_BLACK] = { 0, 0, 0, 0 },
345 [VK_BORDER_COLOR_OPAQUE_BLACK] = { 0, 0, 0, 1 }
346 };
347
348 device->float_border_colors =
349 anv_state_pool_alloc(&device->dynamic_state_pool,
350 sizeof(float_border_colors), 32);
351 memcpy(device->float_border_colors.map,
352 float_border_colors, sizeof(float_border_colors));
353
354 device->uint32_border_colors =
355 anv_state_pool_alloc(&device->dynamic_state_pool,
356 sizeof(uint32_border_colors), 32);
357 memcpy(device->uint32_border_colors.map,
358 uint32_border_colors, sizeof(uint32_border_colors));
359
360 }
361
362 static const uint32_t BATCH_SIZE = 8192;
363
364 VkResult anv_CreateDevice(
365 VkPhysicalDevice _physicalDevice,
366 const VkDeviceCreateInfo* pCreateInfo,
367 VkDevice* pDevice)
368 {
369 struct anv_physical_device *physicalDevice =
370 (struct anv_physical_device *) _physicalDevice;
371 struct anv_instance *instance = physicalDevice->instance;
372 struct anv_device *device;
373
374 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
375
376 device = instance->pfnAlloc(instance->pAllocUserData,
377 sizeof(*device), 8,
378 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
379 if (!device)
380 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
381
382 device->no_hw = physicalDevice->no_hw;
383 parse_debug_flags(device);
384
385 device->instance = physicalDevice->instance;
386 device->fd = open("/dev/dri/renderD128", O_RDWR | O_CLOEXEC);
387 if (device->fd == -1)
388 goto fail_device;
389
390 device->context_id = anv_gem_create_context(device);
391 if (device->context_id == -1)
392 goto fail_fd;
393
394 anv_bo_pool_init(&device->batch_bo_pool, device, BATCH_SIZE);
395
396 anv_block_pool_init(&device->dynamic_state_block_pool, device, 2048);
397
398 anv_state_pool_init(&device->dynamic_state_pool,
399 &device->dynamic_state_block_pool);
400
401 anv_block_pool_init(&device->instruction_block_pool, device, 2048);
402 anv_block_pool_init(&device->surface_state_block_pool, device, 2048);
403
404 anv_state_pool_init(&device->surface_state_pool,
405 &device->surface_state_block_pool);
406
407 anv_block_pool_init(&device->scratch_block_pool, device, 0x10000);
408
409 device->info = *physicalDevice->info;
410
411 device->compiler = anv_compiler_create(device);
412 device->aub_writer = NULL;
413
414 pthread_mutex_init(&device->mutex, NULL);
415
416 anv_queue_init(device, &device->queue);
417
418 anv_device_init_meta(device);
419
420 anv_device_init_border_colors(device);
421
422 *pDevice = (VkDevice) device;
423
424 return VK_SUCCESS;
425
426 fail_fd:
427 close(device->fd);
428 fail_device:
429 anv_device_free(device, device);
430
431 return vk_error(VK_ERROR_UNAVAILABLE);
432 }
433
434 VkResult anv_DestroyDevice(
435 VkDevice _device)
436 {
437 struct anv_device *device = (struct anv_device *) _device;
438
439 anv_compiler_destroy(device->compiler);
440
441 anv_queue_finish(&device->queue);
442
443 anv_device_finish_meta(device);
444
445 #ifdef HAVE_VALGRIND
446 /* We only need to free these to prevent valgrind errors. The backing
447 * BO will go away in a couple of lines so we don't actually leak.
448 */
449 anv_state_pool_free(&device->dynamic_state_pool,
450 device->float_border_colors);
451 anv_state_pool_free(&device->dynamic_state_pool,
452 device->uint32_border_colors);
453 #endif
454
455 anv_bo_pool_finish(&device->batch_bo_pool);
456 anv_block_pool_finish(&device->dynamic_state_block_pool);
457 anv_block_pool_finish(&device->instruction_block_pool);
458 anv_block_pool_finish(&device->surface_state_block_pool);
459
460 close(device->fd);
461
462 if (device->aub_writer)
463 anv_aub_writer_destroy(device->aub_writer);
464
465 anv_device_free(device, device);
466
467 return VK_SUCCESS;
468 }
469
470 VkResult anv_GetGlobalExtensionInfo(
471 VkExtensionInfoType infoType,
472 uint32_t extensionIndex,
473 size_t* pDataSize,
474 void* pData)
475 {
476 static const VkExtensionProperties extensions[] = {
477 {
478 .extName = "VK_WSI_LunarG",
479 .version = 3
480 }
481 };
482 uint32_t count = ARRAY_SIZE(extensions);
483
484 switch (infoType) {
485 case VK_EXTENSION_INFO_TYPE_COUNT:
486 memcpy(pData, &count, sizeof(count));
487 *pDataSize = sizeof(count);
488 return VK_SUCCESS;
489
490 case VK_EXTENSION_INFO_TYPE_PROPERTIES:
491 if (extensionIndex >= count)
492 return vk_error(VK_ERROR_INVALID_EXTENSION);
493
494 memcpy(pData, &extensions[extensionIndex], sizeof(extensions[0]));
495 *pDataSize = sizeof(extensions[0]);
496 return VK_SUCCESS;
497
498 default:
499 return VK_UNSUPPORTED;
500 }
501 }
502
503 VkResult anv_GetPhysicalDeviceExtensionInfo(
504 VkPhysicalDevice physicalDevice,
505 VkExtensionInfoType infoType,
506 uint32_t extensionIndex,
507 size_t* pDataSize,
508 void* pData)
509 {
510 uint32_t *count;
511
512 switch (infoType) {
513 case VK_EXTENSION_INFO_TYPE_COUNT:
514 *pDataSize = 4;
515 if (pData == NULL)
516 return VK_SUCCESS;
517
518 count = pData;
519 *count = 0;
520 return VK_SUCCESS;
521
522 case VK_EXTENSION_INFO_TYPE_PROPERTIES:
523 return vk_error(VK_ERROR_INVALID_EXTENSION);
524
525 default:
526 return VK_UNSUPPORTED;
527 }
528 }
529
530 VkResult anv_EnumerateLayers(
531 VkPhysicalDevice physicalDevice,
532 size_t maxStringSize,
533 size_t* pLayerCount,
534 char* const* pOutLayers,
535 void* pReserved)
536 {
537 *pLayerCount = 0;
538
539 return VK_SUCCESS;
540 }
541
542 VkResult anv_GetDeviceQueue(
543 VkDevice _device,
544 uint32_t queueNodeIndex,
545 uint32_t queueIndex,
546 VkQueue* pQueue)
547 {
548 struct anv_device *device = (struct anv_device *) _device;
549
550 assert(queueIndex == 0);
551
552 *pQueue = (VkQueue) &device->queue;
553
554 return VK_SUCCESS;
555 }
556
557 VkResult
558 anv_reloc_list_init(struct anv_reloc_list *list, struct anv_device *device)
559 {
560 list->num_relocs = 0;
561 list->array_length = 256;
562 list->relocs =
563 anv_device_alloc(device, list->array_length * sizeof(*list->relocs), 8,
564 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
565
566 if (list->relocs == NULL)
567 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
568
569 list->reloc_bos =
570 anv_device_alloc(device, list->array_length * sizeof(*list->reloc_bos), 8,
571 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
572
573 if (list->relocs == NULL) {
574 anv_device_free(device, list->relocs);
575 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
576 }
577
578 return VK_SUCCESS;
579 }
580
581 void
582 anv_reloc_list_finish(struct anv_reloc_list *list, struct anv_device *device)
583 {
584 anv_device_free(device, list->relocs);
585 anv_device_free(device, list->reloc_bos);
586 }
587
588 static VkResult
589 anv_reloc_list_grow(struct anv_reloc_list *list, struct anv_device *device,
590 size_t num_additional_relocs)
591 {
592 if (list->num_relocs + num_additional_relocs <= list->array_length)
593 return VK_SUCCESS;
594
595 size_t new_length = list->array_length * 2;
596 while (new_length < list->num_relocs + num_additional_relocs)
597 new_length *= 2;
598
599 struct drm_i915_gem_relocation_entry *new_relocs =
600 anv_device_alloc(device, new_length * sizeof(*list->relocs), 8,
601 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
602 if (new_relocs == NULL)
603 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
604
605 struct anv_bo **new_reloc_bos =
606 anv_device_alloc(device, new_length * sizeof(*list->reloc_bos), 8,
607 VK_SYSTEM_ALLOC_TYPE_INTERNAL);
608 if (new_relocs == NULL) {
609 anv_device_free(device, new_relocs);
610 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
611 }
612
613 memcpy(new_relocs, list->relocs, list->num_relocs * sizeof(*list->relocs));
614 memcpy(new_reloc_bos, list->reloc_bos,
615 list->num_relocs * sizeof(*list->reloc_bos));
616
617 anv_device_free(device, list->relocs);
618 anv_device_free(device, list->reloc_bos);
619
620 list->relocs = new_relocs;
621 list->reloc_bos = new_reloc_bos;
622
623 return VK_SUCCESS;
624 }
625
626 static VkResult
627 anv_batch_bo_create(struct anv_device *device, struct anv_batch_bo **bbo_out)
628 {
629 VkResult result;
630
631 struct anv_batch_bo *bbo =
632 anv_device_alloc(device, sizeof(*bbo), 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
633 if (bbo == NULL)
634 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
635
636 bbo->num_relocs = 0;
637 bbo->prev_batch_bo = NULL;
638
639 result = anv_bo_pool_alloc(&device->batch_bo_pool, &bbo->bo);
640 if (result != VK_SUCCESS) {
641 anv_device_free(device, bbo);
642 return result;
643 }
644
645 *bbo_out = bbo;
646
647 return VK_SUCCESS;
648 }
649
650 static void
651 anv_batch_bo_start(struct anv_batch_bo *bbo, struct anv_batch *batch,
652 size_t batch_padding)
653 {
654 batch->next = batch->start = bbo->bo.map;
655 batch->end = bbo->bo.map + bbo->bo.size - batch_padding;
656 bbo->first_reloc = batch->relocs.num_relocs;
657 }
658
659 static void
660 anv_batch_bo_finish(struct anv_batch_bo *bbo, struct anv_batch *batch)
661 {
662 assert(batch->start == bbo->bo.map);
663 bbo->length = batch->next - batch->start;
664 VG(VALGRIND_CHECK_MEM_IS_DEFINED(batch->start, bbo->length));
665 bbo->num_relocs = batch->relocs.num_relocs - bbo->first_reloc;
666 }
667
668 static void
669 anv_batch_bo_destroy(struct anv_batch_bo *bbo, struct anv_device *device)
670 {
671 anv_bo_pool_free(&device->batch_bo_pool, &bbo->bo);
672 anv_device_free(device, bbo);
673 }
674
675 void *
676 anv_batch_emit_dwords(struct anv_batch *batch, int num_dwords)
677 {
678 if (batch->next + num_dwords * 4 > batch->end)
679 batch->extend_cb(batch, batch->user_data);
680
681 void *p = batch->next;
682
683 batch->next += num_dwords * 4;
684 assert(batch->next <= batch->end);
685
686 return p;
687 }
688
689 static void
690 anv_reloc_list_append(struct anv_reloc_list *list, struct anv_device *device,
691 struct anv_reloc_list *other, uint32_t offset)
692 {
693 anv_reloc_list_grow(list, device, other->num_relocs);
694 /* TODO: Handle failure */
695
696 memcpy(&list->relocs[list->num_relocs], &other->relocs[0],
697 other->num_relocs * sizeof(other->relocs[0]));
698 memcpy(&list->reloc_bos[list->num_relocs], &other->reloc_bos[0],
699 other->num_relocs * sizeof(other->reloc_bos[0]));
700
701 for (uint32_t i = 0; i < other->num_relocs; i++)
702 list->relocs[i + list->num_relocs].offset += offset;
703
704 list->num_relocs += other->num_relocs;
705 }
706
707 static uint64_t
708 anv_reloc_list_add(struct anv_reloc_list *list, struct anv_device *device,
709 uint32_t offset, struct anv_bo *target_bo, uint32_t delta)
710 {
711 struct drm_i915_gem_relocation_entry *entry;
712 int index;
713
714 anv_reloc_list_grow(list, device, 1);
715 /* TODO: Handle failure */
716
717 /* XXX: Can we use I915_EXEC_HANDLE_LUT? */
718 index = list->num_relocs++;
719 list->reloc_bos[index] = target_bo;
720 entry = &list->relocs[index];
721 entry->target_handle = target_bo->gem_handle;
722 entry->delta = delta;
723 entry->offset = offset;
724 entry->presumed_offset = target_bo->offset;
725 entry->read_domains = 0;
726 entry->write_domain = 0;
727
728 return target_bo->offset + delta;
729 }
730
731 void
732 anv_batch_emit_batch(struct anv_batch *batch, struct anv_batch *other)
733 {
734 uint32_t size, offset;
735
736 size = other->next - other->start;
737 assert(size % 4 == 0);
738
739 if (batch->next + size > batch->end)
740 batch->extend_cb(batch, batch->user_data);
741
742 assert(batch->next + size <= batch->end);
743
744 memcpy(batch->next, other->start, size);
745
746 offset = batch->next - batch->start;
747 anv_reloc_list_append(&batch->relocs, batch->device,
748 &other->relocs, offset);
749
750 batch->next += size;
751 }
752
753 uint64_t
754 anv_batch_emit_reloc(struct anv_batch *batch,
755 void *location, struct anv_bo *bo, uint32_t delta)
756 {
757 return anv_reloc_list_add(&batch->relocs, batch->device,
758 location - batch->start, bo, delta);
759 }
760
761 VkResult anv_QueueSubmit(
762 VkQueue _queue,
763 uint32_t cmdBufferCount,
764 const VkCmdBuffer* pCmdBuffers,
765 VkFence _fence)
766 {
767 struct anv_queue *queue = (struct anv_queue *) _queue;
768 struct anv_device *device = queue->device;
769 struct anv_fence *fence = (struct anv_fence *) _fence;
770 int ret;
771
772 for (uint32_t i = 0; i < cmdBufferCount; i++) {
773 struct anv_cmd_buffer *cmd_buffer =
774 (struct anv_cmd_buffer *) pCmdBuffers[i];
775
776 if (device->dump_aub)
777 anv_cmd_buffer_dump(cmd_buffer);
778
779 if (!device->no_hw) {
780 ret = anv_gem_execbuffer(device, &cmd_buffer->execbuf);
781 if (ret != 0)
782 return vk_error(VK_ERROR_UNKNOWN);
783
784 if (fence) {
785 ret = anv_gem_execbuffer(device, &fence->execbuf);
786 if (ret != 0)
787 return vk_error(VK_ERROR_UNKNOWN);
788 }
789
790 for (uint32_t i = 0; i < cmd_buffer->bo_count; i++)
791 cmd_buffer->exec2_bos[i]->offset = cmd_buffer->exec2_objects[i].offset;
792 } else {
793 *(uint32_t *)queue->completed_serial.map = cmd_buffer->serial;
794 }
795 }
796
797 return VK_SUCCESS;
798 }
799
800 VkResult anv_QueueAddMemReferences(
801 VkQueue queue,
802 uint32_t count,
803 const VkDeviceMemory* pMems)
804 {
805 return VK_SUCCESS;
806 }
807
808 VkResult anv_QueueRemoveMemReferences(
809 VkQueue queue,
810 uint32_t count,
811 const VkDeviceMemory* pMems)
812 {
813 return VK_SUCCESS;
814 }
815
816 VkResult anv_QueueWaitIdle(
817 VkQueue _queue)
818 {
819 struct anv_queue *queue = (struct anv_queue *) _queue;
820
821 return vkDeviceWaitIdle((VkDevice) queue->device);
822 }
823
824 VkResult anv_DeviceWaitIdle(
825 VkDevice _device)
826 {
827 struct anv_device *device = (struct anv_device *) _device;
828 struct anv_state state;
829 struct anv_batch batch;
830 struct drm_i915_gem_execbuffer2 execbuf;
831 struct drm_i915_gem_exec_object2 exec2_objects[1];
832 struct anv_bo *bo = NULL;
833 VkResult result;
834 int64_t timeout;
835 int ret;
836
837 state = anv_state_pool_alloc(&device->dynamic_state_pool, 32, 32);
838 bo = &device->dynamic_state_pool.block_pool->bo;
839 batch.start = batch.next = state.map;
840 batch.end = state.map + 32;
841 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
842 anv_batch_emit(&batch, GEN8_MI_NOOP);
843
844 exec2_objects[0].handle = bo->gem_handle;
845 exec2_objects[0].relocation_count = 0;
846 exec2_objects[0].relocs_ptr = 0;
847 exec2_objects[0].alignment = 0;
848 exec2_objects[0].offset = bo->offset;
849 exec2_objects[0].flags = 0;
850 exec2_objects[0].rsvd1 = 0;
851 exec2_objects[0].rsvd2 = 0;
852
853 execbuf.buffers_ptr = (uintptr_t) exec2_objects;
854 execbuf.buffer_count = 1;
855 execbuf.batch_start_offset = state.offset;
856 execbuf.batch_len = batch.next - state.map;
857 execbuf.cliprects_ptr = 0;
858 execbuf.num_cliprects = 0;
859 execbuf.DR1 = 0;
860 execbuf.DR4 = 0;
861
862 execbuf.flags =
863 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
864 execbuf.rsvd1 = device->context_id;
865 execbuf.rsvd2 = 0;
866
867 if (!device->no_hw) {
868 ret = anv_gem_execbuffer(device, &execbuf);
869 if (ret != 0) {
870 result = vk_error(VK_ERROR_UNKNOWN);
871 goto fail;
872 }
873
874 timeout = INT64_MAX;
875 ret = anv_gem_wait(device, bo->gem_handle, &timeout);
876 if (ret != 0) {
877 result = vk_error(VK_ERROR_UNKNOWN);
878 goto fail;
879 }
880 }
881
882 anv_state_pool_free(&device->dynamic_state_pool, state);
883
884 return VK_SUCCESS;
885
886 fail:
887 anv_state_pool_free(&device->dynamic_state_pool, state);
888
889 return result;
890 }
891
892 void *
893 anv_device_alloc(struct anv_device * device,
894 size_t size,
895 size_t alignment,
896 VkSystemAllocType allocType)
897 {
898 return device->instance->pfnAlloc(device->instance->pAllocUserData,
899 size,
900 alignment,
901 allocType);
902 }
903
904 void
905 anv_device_free(struct anv_device * device,
906 void * mem)
907 {
908 return device->instance->pfnFree(device->instance->pAllocUserData,
909 mem);
910 }
911
912 VkResult
913 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
914 {
915 bo->gem_handle = anv_gem_create(device, size);
916 if (!bo->gem_handle)
917 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
918
919 bo->map = NULL;
920 bo->index = 0;
921 bo->offset = 0;
922 bo->size = size;
923
924 return VK_SUCCESS;
925 }
926
927 VkResult anv_AllocMemory(
928 VkDevice _device,
929 const VkMemoryAllocInfo* pAllocInfo,
930 VkDeviceMemory* pMem)
931 {
932 struct anv_device *device = (struct anv_device *) _device;
933 struct anv_device_memory *mem;
934 VkResult result;
935
936 assert(pAllocInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOC_INFO);
937
938 mem = anv_device_alloc(device, sizeof(*mem), 8,
939 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
940 if (mem == NULL)
941 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
942
943 result = anv_bo_init_new(&mem->bo, device, pAllocInfo->allocationSize);
944 if (result != VK_SUCCESS)
945 goto fail;
946
947 *pMem = (VkDeviceMemory) mem;
948
949 return VK_SUCCESS;
950
951 fail:
952 anv_device_free(device, mem);
953
954 return result;
955 }
956
957 VkResult anv_FreeMemory(
958 VkDevice _device,
959 VkDeviceMemory _mem)
960 {
961 struct anv_device *device = (struct anv_device *) _device;
962 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
963
964 if (mem->bo.map)
965 anv_gem_munmap(mem->bo.map, mem->bo.size);
966
967 if (mem->bo.gem_handle != 0)
968 anv_gem_close(device, mem->bo.gem_handle);
969
970 anv_device_free(device, mem);
971
972 return VK_SUCCESS;
973 }
974
975 VkResult anv_SetMemoryPriority(
976 VkDevice device,
977 VkDeviceMemory mem,
978 VkMemoryPriority priority)
979 {
980 return VK_SUCCESS;
981 }
982
983 VkResult anv_MapMemory(
984 VkDevice _device,
985 VkDeviceMemory _mem,
986 VkDeviceSize offset,
987 VkDeviceSize size,
988 VkMemoryMapFlags flags,
989 void** ppData)
990 {
991 struct anv_device *device = (struct anv_device *) _device;
992 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
993
994 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
995 * takes a VkDeviceMemory pointer, it seems like only one map of the memory
996 * at a time is valid. We could just mmap up front and return an offset
997 * pointer here, but that may exhaust virtual memory on 32 bit
998 * userspace. */
999
1000 mem->map = anv_gem_mmap(device, mem->bo.gem_handle, offset, size);
1001 mem->map_size = size;
1002
1003 *ppData = mem->map;
1004
1005 return VK_SUCCESS;
1006 }
1007
1008 VkResult anv_UnmapMemory(
1009 VkDevice _device,
1010 VkDeviceMemory _mem)
1011 {
1012 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
1013
1014 anv_gem_munmap(mem->map, mem->map_size);
1015
1016 return VK_SUCCESS;
1017 }
1018
1019 VkResult anv_FlushMappedMemory(
1020 VkDevice device,
1021 VkDeviceMemory mem,
1022 VkDeviceSize offset,
1023 VkDeviceSize size)
1024 {
1025 /* clflush here for !llc platforms */
1026
1027 return VK_SUCCESS;
1028 }
1029
1030 VkResult anv_PinSystemMemory(
1031 VkDevice device,
1032 const void* pSysMem,
1033 size_t memSize,
1034 VkDeviceMemory* pMem)
1035 {
1036 return VK_SUCCESS;
1037 }
1038
1039 VkResult anv_GetMultiDeviceCompatibility(
1040 VkPhysicalDevice physicalDevice0,
1041 VkPhysicalDevice physicalDevice1,
1042 VkPhysicalDeviceCompatibilityInfo* pInfo)
1043 {
1044 return VK_UNSUPPORTED;
1045 }
1046
1047 VkResult anv_OpenSharedMemory(
1048 VkDevice device,
1049 const VkMemoryOpenInfo* pOpenInfo,
1050 VkDeviceMemory* pMem)
1051 {
1052 return VK_UNSUPPORTED;
1053 }
1054
1055 VkResult anv_OpenSharedSemaphore(
1056 VkDevice device,
1057 const VkSemaphoreOpenInfo* pOpenInfo,
1058 VkSemaphore* pSemaphore)
1059 {
1060 return VK_UNSUPPORTED;
1061 }
1062
1063 VkResult anv_OpenPeerMemory(
1064 VkDevice device,
1065 const VkPeerMemoryOpenInfo* pOpenInfo,
1066 VkDeviceMemory* pMem)
1067 {
1068 return VK_UNSUPPORTED;
1069 }
1070
1071 VkResult anv_OpenPeerImage(
1072 VkDevice device,
1073 const VkPeerImageOpenInfo* pOpenInfo,
1074 VkImage* pImage,
1075 VkDeviceMemory* pMem)
1076 {
1077 return VK_UNSUPPORTED;
1078 }
1079
1080 VkResult anv_DestroyObject(
1081 VkDevice _device,
1082 VkObjectType objType,
1083 VkObject _object)
1084 {
1085 struct anv_device *device = (struct anv_device *) _device;
1086 struct anv_object *object = (struct anv_object *) _object;
1087
1088 switch (objType) {
1089 case VK_OBJECT_TYPE_INSTANCE:
1090 return anv_DestroyInstance((VkInstance) _object);
1091
1092 case VK_OBJECT_TYPE_PHYSICAL_DEVICE:
1093 /* We don't want to actually destroy physical devices */
1094 return VK_SUCCESS;
1095
1096 case VK_OBJECT_TYPE_DEVICE:
1097 assert(_device == (VkDevice) _object);
1098 return anv_DestroyDevice((VkDevice) _object);
1099
1100 case VK_OBJECT_TYPE_QUEUE:
1101 /* TODO */
1102 return VK_SUCCESS;
1103
1104 case VK_OBJECT_TYPE_DEVICE_MEMORY:
1105 return anv_FreeMemory(_device, (VkDeviceMemory) _object);
1106
1107 case VK_OBJECT_TYPE_DESCRIPTOR_POOL:
1108 /* These are just dummys anyway, so we don't need to destroy them */
1109 return VK_SUCCESS;
1110
1111 case VK_OBJECT_TYPE_BUFFER:
1112 case VK_OBJECT_TYPE_IMAGE:
1113 case VK_OBJECT_TYPE_DEPTH_STENCIL_VIEW:
1114 case VK_OBJECT_TYPE_SHADER:
1115 case VK_OBJECT_TYPE_PIPELINE_LAYOUT:
1116 case VK_OBJECT_TYPE_SAMPLER:
1117 case VK_OBJECT_TYPE_DESCRIPTOR_SET:
1118 case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT:
1119 case VK_OBJECT_TYPE_DYNAMIC_RS_STATE:
1120 case VK_OBJECT_TYPE_DYNAMIC_CB_STATE:
1121 case VK_OBJECT_TYPE_DYNAMIC_DS_STATE:
1122 case VK_OBJECT_TYPE_RENDER_PASS:
1123 /* These are trivially destroyable */
1124 anv_device_free(device, (void *) _object);
1125 return VK_SUCCESS;
1126
1127 case VK_OBJECT_TYPE_COMMAND_BUFFER:
1128 case VK_OBJECT_TYPE_PIPELINE:
1129 case VK_OBJECT_TYPE_DYNAMIC_VP_STATE:
1130 case VK_OBJECT_TYPE_FENCE:
1131 case VK_OBJECT_TYPE_QUERY_POOL:
1132 case VK_OBJECT_TYPE_FRAMEBUFFER:
1133 case VK_OBJECT_TYPE_BUFFER_VIEW:
1134 case VK_OBJECT_TYPE_IMAGE_VIEW:
1135 case VK_OBJECT_TYPE_COLOR_ATTACHMENT_VIEW:
1136 (object->destructor)(device, object, objType);
1137 return VK_SUCCESS;
1138
1139 case VK_OBJECT_TYPE_SEMAPHORE:
1140 case VK_OBJECT_TYPE_EVENT:
1141 stub_return(VK_UNSUPPORTED);
1142
1143 default:
1144 unreachable("Invalid object type");
1145 }
1146 }
1147
1148 static void
1149 fill_memory_requirements(
1150 VkObjectType objType,
1151 VkObject object,
1152 VkMemoryRequirements * memory_requirements)
1153 {
1154 struct anv_buffer *buffer;
1155 struct anv_image *image;
1156
1157 memory_requirements->memPropsAllowed =
1158 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
1159 VK_MEMORY_PROPERTY_HOST_DEVICE_COHERENT_BIT |
1160 /* VK_MEMORY_PROPERTY_HOST_UNCACHED_BIT | */
1161 VK_MEMORY_PROPERTY_HOST_WRITE_COMBINED_BIT |
1162 VK_MEMORY_PROPERTY_PREFER_HOST_LOCAL |
1163 VK_MEMORY_PROPERTY_SHAREABLE_BIT;
1164
1165 memory_requirements->memPropsRequired = 0;
1166
1167 switch (objType) {
1168 case VK_OBJECT_TYPE_BUFFER:
1169 buffer = (struct anv_buffer *) object;
1170 memory_requirements->size = buffer->size;
1171 memory_requirements->alignment = 16;
1172 break;
1173 case VK_OBJECT_TYPE_IMAGE:
1174 image = (struct anv_image *) object;
1175 memory_requirements->size = image->size;
1176 memory_requirements->alignment = image->alignment;
1177 break;
1178 default:
1179 memory_requirements->size = 0;
1180 break;
1181 }
1182 }
1183
1184 static uint32_t
1185 get_allocation_count(VkObjectType objType)
1186 {
1187 switch (objType) {
1188 case VK_OBJECT_TYPE_BUFFER:
1189 case VK_OBJECT_TYPE_IMAGE:
1190 return 1;
1191 default:
1192 return 0;
1193 }
1194 }
1195
1196 VkResult anv_GetObjectInfo(
1197 VkDevice _device,
1198 VkObjectType objType,
1199 VkObject object,
1200 VkObjectInfoType infoType,
1201 size_t* pDataSize,
1202 void* pData)
1203 {
1204 VkMemoryRequirements memory_requirements;
1205 uint32_t *count;
1206
1207 switch (infoType) {
1208 case VK_OBJECT_INFO_TYPE_MEMORY_REQUIREMENTS:
1209 *pDataSize = sizeof(memory_requirements);
1210 if (pData == NULL)
1211 return VK_SUCCESS;
1212
1213 fill_memory_requirements(objType, object, pData);
1214 return VK_SUCCESS;
1215
1216 case VK_OBJECT_INFO_TYPE_MEMORY_ALLOCATION_COUNT:
1217 *pDataSize = sizeof(count);
1218 if (pData == NULL)
1219 return VK_SUCCESS;
1220
1221 count = pData;
1222 *count = get_allocation_count(objType);
1223 return VK_SUCCESS;
1224
1225 default:
1226 return vk_error(VK_UNSUPPORTED);
1227 }
1228
1229 }
1230
1231 VkResult anv_QueueBindObjectMemory(
1232 VkQueue queue,
1233 VkObjectType objType,
1234 VkObject object,
1235 uint32_t allocationIdx,
1236 VkDeviceMemory _mem,
1237 VkDeviceSize memOffset)
1238 {
1239 struct anv_buffer *buffer;
1240 struct anv_image *image;
1241 struct anv_device_memory *mem = (struct anv_device_memory *) _mem;
1242
1243 switch (objType) {
1244 case VK_OBJECT_TYPE_BUFFER:
1245 buffer = (struct anv_buffer *) object;
1246 buffer->bo = &mem->bo;
1247 buffer->offset = memOffset;
1248 break;
1249 case VK_OBJECT_TYPE_IMAGE:
1250 image = (struct anv_image *) object;
1251 image->bo = &mem->bo;
1252 image->offset = memOffset;
1253 break;
1254 default:
1255 break;
1256 }
1257
1258 return VK_SUCCESS;
1259 }
1260
1261 VkResult anv_QueueBindObjectMemoryRange(
1262 VkQueue queue,
1263 VkObjectType objType,
1264 VkObject object,
1265 uint32_t allocationIdx,
1266 VkDeviceSize rangeOffset,
1267 VkDeviceSize rangeSize,
1268 VkDeviceMemory mem,
1269 VkDeviceSize memOffset)
1270 {
1271 stub_return(VK_UNSUPPORTED);
1272 }
1273
1274 VkResult anv_QueueBindImageMemoryRange(
1275 VkQueue queue,
1276 VkImage image,
1277 uint32_t allocationIdx,
1278 const VkImageMemoryBindInfo* pBindInfo,
1279 VkDeviceMemory mem,
1280 VkDeviceSize memOffset)
1281 {
1282 stub_return(VK_UNSUPPORTED);
1283 }
1284
1285 static void
1286 anv_fence_destroy(struct anv_device *device,
1287 struct anv_object *object,
1288 VkObjectType obj_type)
1289 {
1290 struct anv_fence *fence = (struct anv_fence *) object;
1291
1292 assert(obj_type == VK_OBJECT_TYPE_FENCE);
1293
1294 anv_gem_munmap(fence->bo.map, fence->bo.size);
1295 anv_gem_close(device, fence->bo.gem_handle);
1296 anv_device_free(device, fence);
1297 }
1298
1299 VkResult anv_CreateFence(
1300 VkDevice _device,
1301 const VkFenceCreateInfo* pCreateInfo,
1302 VkFence* pFence)
1303 {
1304 struct anv_device *device = (struct anv_device *) _device;
1305 struct anv_fence *fence;
1306 struct anv_batch batch;
1307 VkResult result;
1308
1309 const uint32_t fence_size = 128;
1310
1311 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
1312
1313 fence = anv_device_alloc(device, sizeof(*fence), 8,
1314 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1315 if (fence == NULL)
1316 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1317
1318 result = anv_bo_init_new(&fence->bo, device, fence_size);
1319 if (result != VK_SUCCESS)
1320 goto fail;
1321
1322 fence->base.destructor = anv_fence_destroy;
1323
1324 fence->bo.map =
1325 anv_gem_mmap(device, fence->bo.gem_handle, 0, fence->bo.size);
1326 batch.next = batch.start = fence->bo.map;
1327 batch.end = fence->bo.map + fence->bo.size;
1328 anv_batch_emit(&batch, GEN8_MI_BATCH_BUFFER_END);
1329 anv_batch_emit(&batch, GEN8_MI_NOOP);
1330
1331 fence->exec2_objects[0].handle = fence->bo.gem_handle;
1332 fence->exec2_objects[0].relocation_count = 0;
1333 fence->exec2_objects[0].relocs_ptr = 0;
1334 fence->exec2_objects[0].alignment = 0;
1335 fence->exec2_objects[0].offset = fence->bo.offset;
1336 fence->exec2_objects[0].flags = 0;
1337 fence->exec2_objects[0].rsvd1 = 0;
1338 fence->exec2_objects[0].rsvd2 = 0;
1339
1340 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
1341 fence->execbuf.buffer_count = 1;
1342 fence->execbuf.batch_start_offset = 0;
1343 fence->execbuf.batch_len = batch.next - fence->bo.map;
1344 fence->execbuf.cliprects_ptr = 0;
1345 fence->execbuf.num_cliprects = 0;
1346 fence->execbuf.DR1 = 0;
1347 fence->execbuf.DR4 = 0;
1348
1349 fence->execbuf.flags =
1350 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
1351 fence->execbuf.rsvd1 = device->context_id;
1352 fence->execbuf.rsvd2 = 0;
1353
1354 *pFence = (VkFence) fence;
1355
1356 return VK_SUCCESS;
1357
1358 fail:
1359 anv_device_free(device, fence);
1360
1361 return result;
1362 }
1363
1364 VkResult anv_ResetFences(
1365 VkDevice _device,
1366 uint32_t fenceCount,
1367 VkFence* pFences)
1368 {
1369 struct anv_fence **fences = (struct anv_fence **) pFences;
1370
1371 for (uint32_t i = 0; i < fenceCount; i++)
1372 fences[i]->ready = false;
1373
1374 return VK_SUCCESS;
1375 }
1376
1377 VkResult anv_GetFenceStatus(
1378 VkDevice _device,
1379 VkFence _fence)
1380 {
1381 struct anv_device *device = (struct anv_device *) _device;
1382 struct anv_fence *fence = (struct anv_fence *) _fence;
1383 int64_t t = 0;
1384 int ret;
1385
1386 if (fence->ready)
1387 return VK_SUCCESS;
1388
1389 ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
1390 if (ret == 0) {
1391 fence->ready = true;
1392 return VK_SUCCESS;
1393 }
1394
1395 return VK_NOT_READY;
1396 }
1397
1398 VkResult anv_WaitForFences(
1399 VkDevice _device,
1400 uint32_t fenceCount,
1401 const VkFence* pFences,
1402 bool32_t waitAll,
1403 uint64_t timeout)
1404 {
1405 struct anv_device *device = (struct anv_device *) _device;
1406 struct anv_fence **fences = (struct anv_fence **) pFences;
1407 int64_t t = timeout;
1408 int ret;
1409
1410 /* FIXME: handle !waitAll */
1411
1412 for (uint32_t i = 0; i < fenceCount; i++) {
1413 ret = anv_gem_wait(device, fences[i]->bo.gem_handle, &t);
1414 if (ret == -1 && errno == ETIME)
1415 return VK_TIMEOUT;
1416 else if (ret == -1)
1417 return vk_error(VK_ERROR_UNKNOWN);
1418 }
1419
1420 return VK_SUCCESS;
1421 }
1422
1423 // Queue semaphore functions
1424
1425 VkResult anv_CreateSemaphore(
1426 VkDevice device,
1427 const VkSemaphoreCreateInfo* pCreateInfo,
1428 VkSemaphore* pSemaphore)
1429 {
1430 stub_return(VK_UNSUPPORTED);
1431 }
1432
1433 VkResult anv_QueueSignalSemaphore(
1434 VkQueue queue,
1435 VkSemaphore semaphore)
1436 {
1437 stub_return(VK_UNSUPPORTED);
1438 }
1439
1440 VkResult anv_QueueWaitSemaphore(
1441 VkQueue queue,
1442 VkSemaphore semaphore)
1443 {
1444 stub_return(VK_UNSUPPORTED);
1445 }
1446
1447 // Event functions
1448
1449 VkResult anv_CreateEvent(
1450 VkDevice device,
1451 const VkEventCreateInfo* pCreateInfo,
1452 VkEvent* pEvent)
1453 {
1454 stub_return(VK_UNSUPPORTED);
1455 }
1456
1457 VkResult anv_GetEventStatus(
1458 VkDevice device,
1459 VkEvent event)
1460 {
1461 stub_return(VK_UNSUPPORTED);
1462 }
1463
1464 VkResult anv_SetEvent(
1465 VkDevice device,
1466 VkEvent event)
1467 {
1468 stub_return(VK_UNSUPPORTED);
1469 }
1470
1471 VkResult anv_ResetEvent(
1472 VkDevice device,
1473 VkEvent event)
1474 {
1475 stub_return(VK_UNSUPPORTED);
1476 }
1477
1478 // Buffer functions
1479
1480 VkResult anv_CreateBuffer(
1481 VkDevice _device,
1482 const VkBufferCreateInfo* pCreateInfo,
1483 VkBuffer* pBuffer)
1484 {
1485 struct anv_device *device = (struct anv_device *) _device;
1486 struct anv_buffer *buffer;
1487
1488 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
1489
1490 buffer = anv_device_alloc(device, sizeof(*buffer), 8,
1491 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1492 if (buffer == NULL)
1493 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1494
1495 buffer->size = pCreateInfo->size;
1496 buffer->bo = NULL;
1497 buffer->offset = 0;
1498
1499 *pBuffer = (VkBuffer) buffer;
1500
1501 return VK_SUCCESS;
1502 }
1503
1504 // Buffer view functions
1505
1506 static void
1507 fill_buffer_surface_state(void *state, VkFormat format,
1508 uint32_t offset, uint32_t range)
1509 {
1510 const struct anv_format *info;
1511
1512 info = anv_format_for_vk_format(format);
1513 /* This assumes RGBA float format. */
1514 uint32_t stride = 4;
1515 uint32_t num_elements = range / stride;
1516
1517 struct GEN8_RENDER_SURFACE_STATE surface_state = {
1518 .SurfaceType = SURFTYPE_BUFFER,
1519 .SurfaceArray = false,
1520 .SurfaceFormat = info->format,
1521 .SurfaceVerticalAlignment = VALIGN4,
1522 .SurfaceHorizontalAlignment = HALIGN4,
1523 .TileMode = LINEAR,
1524 .VerticalLineStride = 0,
1525 .VerticalLineStrideOffset = 0,
1526 .SamplerL2BypassModeDisable = true,
1527 .RenderCacheReadWriteMode = WriteOnlyCache,
1528 .MemoryObjectControlState = GEN8_MOCS,
1529 .BaseMipLevel = 0.0,
1530 .SurfaceQPitch = 0,
1531 .Height = (num_elements >> 7) & 0x3fff,
1532 .Width = num_elements & 0x7f,
1533 .Depth = (num_elements >> 21) & 0x3f,
1534 .SurfacePitch = stride - 1,
1535 .MinimumArrayElement = 0,
1536 .NumberofMultisamples = MULTISAMPLECOUNT_1,
1537 .XOffset = 0,
1538 .YOffset = 0,
1539 .SurfaceMinLOD = 0,
1540 .MIPCountLOD = 0,
1541 .AuxiliarySurfaceMode = AUX_NONE,
1542 .RedClearColor = 0,
1543 .GreenClearColor = 0,
1544 .BlueClearColor = 0,
1545 .AlphaClearColor = 0,
1546 .ShaderChannelSelectRed = SCS_RED,
1547 .ShaderChannelSelectGreen = SCS_GREEN,
1548 .ShaderChannelSelectBlue = SCS_BLUE,
1549 .ShaderChannelSelectAlpha = SCS_ALPHA,
1550 .ResourceMinLOD = 0.0,
1551 /* FIXME: We assume that the image must be bound at this time. */
1552 .SurfaceBaseAddress = { NULL, offset },
1553 };
1554
1555 GEN8_RENDER_SURFACE_STATE_pack(NULL, state, &surface_state);
1556 }
1557
1558 VkResult anv_CreateBufferView(
1559 VkDevice _device,
1560 const VkBufferViewCreateInfo* pCreateInfo,
1561 VkBufferView* pView)
1562 {
1563 struct anv_device *device = (struct anv_device *) _device;
1564 struct anv_buffer *buffer = (struct anv_buffer *) pCreateInfo->buffer;
1565 struct anv_surface_view *view;
1566
1567 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO);
1568
1569 view = anv_device_alloc(device, sizeof(*view), 8,
1570 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1571 if (view == NULL)
1572 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1573
1574 view->base.destructor = anv_surface_view_destroy;
1575
1576 view->bo = buffer->bo;
1577 view->offset = buffer->offset + pCreateInfo->offset;
1578 view->surface_state =
1579 anv_state_pool_alloc(&device->surface_state_pool, 64, 64);
1580 view->format = pCreateInfo->format;
1581 view->range = pCreateInfo->range;
1582
1583 fill_buffer_surface_state(view->surface_state.map,
1584 pCreateInfo->format, view->offset, pCreateInfo->range);
1585
1586 *pView = (VkBufferView) view;
1587
1588 return VK_SUCCESS;
1589 }
1590
1591 // Sampler functions
1592
1593 VkResult anv_CreateSampler(
1594 VkDevice _device,
1595 const VkSamplerCreateInfo* pCreateInfo,
1596 VkSampler* pSampler)
1597 {
1598 struct anv_device *device = (struct anv_device *) _device;
1599 struct anv_sampler *sampler;
1600 uint32_t mag_filter, min_filter, max_anisotropy;
1601
1602 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
1603
1604 sampler = anv_device_alloc(device, sizeof(*sampler), 8,
1605 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1606 if (!sampler)
1607 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1608
1609 static const uint32_t vk_to_gen_tex_filter[] = {
1610 [VK_TEX_FILTER_NEAREST] = MAPFILTER_NEAREST,
1611 [VK_TEX_FILTER_LINEAR] = MAPFILTER_LINEAR
1612 };
1613
1614 static const uint32_t vk_to_gen_mipmap_mode[] = {
1615 [VK_TEX_MIPMAP_MODE_BASE] = MIPFILTER_NONE,
1616 [VK_TEX_MIPMAP_MODE_NEAREST] = MIPFILTER_NEAREST,
1617 [VK_TEX_MIPMAP_MODE_LINEAR] = MIPFILTER_LINEAR
1618 };
1619
1620 static const uint32_t vk_to_gen_tex_address[] = {
1621 [VK_TEX_ADDRESS_WRAP] = TCM_WRAP,
1622 [VK_TEX_ADDRESS_MIRROR] = TCM_MIRROR,
1623 [VK_TEX_ADDRESS_CLAMP] = TCM_CLAMP,
1624 [VK_TEX_ADDRESS_MIRROR_ONCE] = TCM_MIRROR_ONCE,
1625 [VK_TEX_ADDRESS_CLAMP_BORDER] = TCM_CLAMP_BORDER,
1626 };
1627
1628 static const uint32_t vk_to_gen_compare_op[] = {
1629 [VK_COMPARE_OP_NEVER] = PREFILTEROPNEVER,
1630 [VK_COMPARE_OP_LESS] = PREFILTEROPLESS,
1631 [VK_COMPARE_OP_EQUAL] = PREFILTEROPEQUAL,
1632 [VK_COMPARE_OP_LESS_EQUAL] = PREFILTEROPLEQUAL,
1633 [VK_COMPARE_OP_GREATER] = PREFILTEROPGREATER,
1634 [VK_COMPARE_OP_NOT_EQUAL] = PREFILTEROPNOTEQUAL,
1635 [VK_COMPARE_OP_GREATER_EQUAL] = PREFILTEROPGEQUAL,
1636 [VK_COMPARE_OP_ALWAYS] = PREFILTEROPALWAYS,
1637 };
1638
1639 if (pCreateInfo->maxAnisotropy > 1) {
1640 mag_filter = MAPFILTER_ANISOTROPIC;
1641 min_filter = MAPFILTER_ANISOTROPIC;
1642 max_anisotropy = (pCreateInfo->maxAnisotropy - 2) / 2;
1643 } else {
1644 mag_filter = vk_to_gen_tex_filter[pCreateInfo->magFilter];
1645 min_filter = vk_to_gen_tex_filter[pCreateInfo->minFilter];
1646 max_anisotropy = RATIO21;
1647 }
1648
1649 struct GEN8_SAMPLER_STATE sampler_state = {
1650 .SamplerDisable = false,
1651 .TextureBorderColorMode = DX10OGL,
1652 .LODPreClampMode = 0,
1653 .BaseMipLevel = 0.0,
1654 .MipModeFilter = vk_to_gen_mipmap_mode[pCreateInfo->mipMode],
1655 .MagModeFilter = mag_filter,
1656 .MinModeFilter = min_filter,
1657 .TextureLODBias = pCreateInfo->mipLodBias * 256,
1658 .AnisotropicAlgorithm = EWAApproximation,
1659 .MinLOD = pCreateInfo->minLod,
1660 .MaxLOD = pCreateInfo->maxLod,
1661 .ChromaKeyEnable = 0,
1662 .ChromaKeyIndex = 0,
1663 .ChromaKeyMode = 0,
1664 .ShadowFunction = vk_to_gen_compare_op[pCreateInfo->compareOp],
1665 .CubeSurfaceControlMode = 0,
1666
1667 .IndirectStatePointer =
1668 device->float_border_colors.offset +
1669 pCreateInfo->borderColor * sizeof(float) * 4,
1670
1671 .LODClampMagnificationMode = MIPNONE,
1672 .MaximumAnisotropy = max_anisotropy,
1673 .RAddressMinFilterRoundingEnable = 0,
1674 .RAddressMagFilterRoundingEnable = 0,
1675 .VAddressMinFilterRoundingEnable = 0,
1676 .VAddressMagFilterRoundingEnable = 0,
1677 .UAddressMinFilterRoundingEnable = 0,
1678 .UAddressMagFilterRoundingEnable = 0,
1679 .TrilinearFilterQuality = 0,
1680 .NonnormalizedCoordinateEnable = 0,
1681 .TCXAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressU],
1682 .TCYAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressV],
1683 .TCZAddressControlMode = vk_to_gen_tex_address[pCreateInfo->addressW],
1684 };
1685
1686 GEN8_SAMPLER_STATE_pack(NULL, sampler->state, &sampler_state);
1687
1688 *pSampler = (VkSampler) sampler;
1689
1690 return VK_SUCCESS;
1691 }
1692
1693 // Descriptor set functions
1694
1695 VkResult anv_CreateDescriptorSetLayout(
1696 VkDevice _device,
1697 const VkDescriptorSetLayoutCreateInfo* pCreateInfo,
1698 VkDescriptorSetLayout* pSetLayout)
1699 {
1700 struct anv_device *device = (struct anv_device *) _device;
1701 struct anv_descriptor_set_layout *set_layout;
1702
1703 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO);
1704
1705 uint32_t sampler_count[VK_NUM_SHADER_STAGE] = { 0, };
1706 uint32_t surface_count[VK_NUM_SHADER_STAGE] = { 0, };
1707 uint32_t num_dynamic_buffers = 0;
1708 uint32_t count = 0;
1709 uint32_t stages = 0;
1710 uint32_t s;
1711
1712 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1713 switch (pCreateInfo->pBinding[i].descriptorType) {
1714 case VK_DESCRIPTOR_TYPE_SAMPLER:
1715 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1716 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1717 sampler_count[s] += pCreateInfo->pBinding[i].count;
1718 break;
1719 default:
1720 break;
1721 }
1722
1723 switch (pCreateInfo->pBinding[i].descriptorType) {
1724 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1725 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1726 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1727 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1728 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1729 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1730 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1731 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1732 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1733 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1734 surface_count[s] += pCreateInfo->pBinding[i].count;
1735 break;
1736 default:
1737 break;
1738 }
1739
1740 switch (pCreateInfo->pBinding[i].descriptorType) {
1741 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1742 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1743 num_dynamic_buffers += pCreateInfo->pBinding[i].count;
1744 break;
1745 default:
1746 break;
1747 }
1748
1749 stages |= pCreateInfo->pBinding[i].stageFlags;
1750 count += pCreateInfo->pBinding[i].count;
1751 }
1752
1753 uint32_t sampler_total = 0;
1754 uint32_t surface_total = 0;
1755 for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
1756 sampler_total += sampler_count[s];
1757 surface_total += surface_count[s];
1758 }
1759
1760 size_t size = sizeof(*set_layout) +
1761 (sampler_total + surface_total) * sizeof(set_layout->entries[0]);
1762 set_layout = anv_device_alloc(device, size, 8,
1763 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1764 if (!set_layout)
1765 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1766
1767 set_layout->num_dynamic_buffers = num_dynamic_buffers;
1768 set_layout->count = count;
1769 set_layout->shader_stages = stages;
1770
1771 struct anv_descriptor_slot *p = set_layout->entries;
1772 struct anv_descriptor_slot *sampler[VK_NUM_SHADER_STAGE];
1773 struct anv_descriptor_slot *surface[VK_NUM_SHADER_STAGE];
1774 for (uint32_t s = 0; s < VK_NUM_SHADER_STAGE; s++) {
1775 set_layout->stage[s].surface_count = surface_count[s];
1776 set_layout->stage[s].surface_start = surface[s] = p;
1777 p += surface_count[s];
1778 set_layout->stage[s].sampler_count = sampler_count[s];
1779 set_layout->stage[s].sampler_start = sampler[s] = p;
1780 p += sampler_count[s];
1781 }
1782
1783 uint32_t descriptor = 0;
1784 int8_t dynamic_slot = 0;
1785 bool is_dynamic;
1786 for (uint32_t i = 0; i < pCreateInfo->count; i++) {
1787 switch (pCreateInfo->pBinding[i].descriptorType) {
1788 case VK_DESCRIPTOR_TYPE_SAMPLER:
1789 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1790 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1791 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
1792 sampler[s]->index = descriptor + j;
1793 sampler[s]->dynamic_slot = -1;
1794 sampler[s]++;
1795 }
1796 break;
1797 default:
1798 break;
1799 }
1800
1801 switch (pCreateInfo->pBinding[i].descriptorType) {
1802 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1803 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1804 is_dynamic = true;
1805 break;
1806 default:
1807 is_dynamic = false;
1808 break;
1809 }
1810
1811 switch (pCreateInfo->pBinding[i].descriptorType) {
1812 case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:
1813 case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:
1814 case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:
1815 case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:
1816 case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:
1817 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:
1818 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:
1819 case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:
1820 case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:
1821 for_each_bit(s, pCreateInfo->pBinding[i].stageFlags)
1822 for (uint32_t j = 0; j < pCreateInfo->pBinding[i].count; j++) {
1823 surface[s]->index = descriptor + j;
1824 if (is_dynamic)
1825 surface[s]->dynamic_slot = dynamic_slot + j;
1826 else
1827 surface[s]->dynamic_slot = -1;
1828 surface[s]++;
1829 }
1830 break;
1831 default:
1832 break;
1833 }
1834
1835 if (is_dynamic)
1836 dynamic_slot += pCreateInfo->pBinding[i].count;
1837
1838 descriptor += pCreateInfo->pBinding[i].count;
1839 }
1840
1841 *pSetLayout = (VkDescriptorSetLayout) set_layout;
1842
1843 return VK_SUCCESS;
1844 }
1845
1846 VkResult anv_BeginDescriptorPoolUpdate(
1847 VkDevice device,
1848 VkDescriptorUpdateMode updateMode)
1849 {
1850 return VK_SUCCESS;
1851 }
1852
1853 VkResult anv_EndDescriptorPoolUpdate(
1854 VkDevice device,
1855 VkCmdBuffer cmd)
1856 {
1857 return VK_SUCCESS;
1858 }
1859
1860 VkResult anv_CreateDescriptorPool(
1861 VkDevice device,
1862 VkDescriptorPoolUsage poolUsage,
1863 uint32_t maxSets,
1864 const VkDescriptorPoolCreateInfo* pCreateInfo,
1865 VkDescriptorPool* pDescriptorPool)
1866 {
1867 *pDescriptorPool = 1;
1868
1869 return VK_SUCCESS;
1870 }
1871
1872 VkResult anv_ResetDescriptorPool(
1873 VkDevice device,
1874 VkDescriptorPool descriptorPool)
1875 {
1876 return VK_SUCCESS;
1877 }
1878
1879 VkResult anv_AllocDescriptorSets(
1880 VkDevice _device,
1881 VkDescriptorPool descriptorPool,
1882 VkDescriptorSetUsage setUsage,
1883 uint32_t count,
1884 const VkDescriptorSetLayout* pSetLayouts,
1885 VkDescriptorSet* pDescriptorSets,
1886 uint32_t* pCount)
1887 {
1888 struct anv_device *device = (struct anv_device *) _device;
1889 const struct anv_descriptor_set_layout *layout;
1890 struct anv_descriptor_set *set;
1891 size_t size;
1892
1893 for (uint32_t i = 0; i < count; i++) {
1894 layout = (struct anv_descriptor_set_layout *) pSetLayouts[i];
1895 size = sizeof(*set) + layout->count * sizeof(set->descriptors[0]);
1896 set = anv_device_alloc(device, size, 8,
1897 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
1898 if (!set) {
1899 *pCount = i;
1900 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
1901 }
1902
1903 /* Descriptor sets may not be 100% filled out so we need to memset to
1904 * ensure that we can properly detect and handle holes.
1905 */
1906 memset(set, 0, size);
1907
1908 pDescriptorSets[i] = (VkDescriptorSet) set;
1909 }
1910
1911 *pCount = count;
1912
1913 return VK_SUCCESS;
1914 }
1915
1916 void anv_ClearDescriptorSets(
1917 VkDevice device,
1918 VkDescriptorPool descriptorPool,
1919 uint32_t count,
1920 const VkDescriptorSet* pDescriptorSets)
1921 {
1922 }
1923
1924 void anv_UpdateDescriptors(
1925 VkDevice _device,
1926 VkDescriptorSet descriptorSet,
1927 uint32_t updateCount,
1928 const void** ppUpdateArray)
1929 {
1930 struct anv_descriptor_set *set = (struct anv_descriptor_set *) descriptorSet;
1931 VkUpdateSamplers *update_samplers;
1932 VkUpdateSamplerTextures *update_sampler_textures;
1933 VkUpdateImages *update_images;
1934 VkUpdateBuffers *update_buffers;
1935 VkUpdateAsCopy *update_as_copy;
1936
1937 for (uint32_t i = 0; i < updateCount; i++) {
1938 const struct anv_common *common = ppUpdateArray[i];
1939
1940 switch (common->sType) {
1941 case VK_STRUCTURE_TYPE_UPDATE_SAMPLERS:
1942 update_samplers = (VkUpdateSamplers *) common;
1943
1944 for (uint32_t j = 0; j < update_samplers->count; j++) {
1945 set->descriptors[update_samplers->binding + j].sampler =
1946 (struct anv_sampler *) update_samplers->pSamplers[j];
1947 }
1948 break;
1949
1950 case VK_STRUCTURE_TYPE_UPDATE_SAMPLER_TEXTURES:
1951 /* FIXME: Shouldn't this be *_UPDATE_SAMPLER_IMAGES? */
1952 update_sampler_textures = (VkUpdateSamplerTextures *) common;
1953
1954 for (uint32_t j = 0; j < update_sampler_textures->count; j++) {
1955 set->descriptors[update_sampler_textures->binding + j].view =
1956 (struct anv_surface_view *)
1957 update_sampler_textures->pSamplerImageViews[j].pImageView->view;
1958 set->descriptors[update_sampler_textures->binding + j].sampler =
1959 (struct anv_sampler *)
1960 update_sampler_textures->pSamplerImageViews[j].sampler;
1961 }
1962 break;
1963
1964 case VK_STRUCTURE_TYPE_UPDATE_IMAGES:
1965 update_images = (VkUpdateImages *) common;
1966
1967 for (uint32_t j = 0; j < update_images->count; j++) {
1968 set->descriptors[update_images->binding + j].view =
1969 (struct anv_surface_view *) update_images->pImageViews[j].view;
1970 }
1971 break;
1972
1973 case VK_STRUCTURE_TYPE_UPDATE_BUFFERS:
1974 update_buffers = (VkUpdateBuffers *) common;
1975
1976 for (uint32_t j = 0; j < update_buffers->count; j++) {
1977 set->descriptors[update_buffers->binding + j].view =
1978 (struct anv_surface_view *) update_buffers->pBufferViews[j].view;
1979 }
1980 /* FIXME: descriptor arrays? */
1981 break;
1982
1983 case VK_STRUCTURE_TYPE_UPDATE_AS_COPY:
1984 update_as_copy = (VkUpdateAsCopy *) common;
1985 (void) update_as_copy;
1986 break;
1987
1988 default:
1989 break;
1990 }
1991 }
1992 }
1993
1994 // State object functions
1995
1996 static inline int64_t
1997 clamp_int64(int64_t x, int64_t min, int64_t max)
1998 {
1999 if (x < min)
2000 return min;
2001 else if (x < max)
2002 return x;
2003 else
2004 return max;
2005 }
2006
2007 static void
2008 anv_dynamic_vp_state_destroy(struct anv_device *device,
2009 struct anv_object *object,
2010 VkObjectType obj_type)
2011 {
2012 struct anv_dynamic_vp_state *state = (void *)object;
2013
2014 assert(obj_type == VK_OBJECT_TYPE_DYNAMIC_VP_STATE);
2015
2016 anv_state_pool_free(&device->dynamic_state_pool, state->sf_clip_vp);
2017 anv_state_pool_free(&device->dynamic_state_pool, state->cc_vp);
2018 anv_state_pool_free(&device->dynamic_state_pool, state->scissor);
2019
2020 anv_device_free(device, state);
2021 }
2022
2023 VkResult anv_CreateDynamicViewportState(
2024 VkDevice _device,
2025 const VkDynamicVpStateCreateInfo* pCreateInfo,
2026 VkDynamicVpState* pState)
2027 {
2028 struct anv_device *device = (struct anv_device *) _device;
2029 struct anv_dynamic_vp_state *state;
2030
2031 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO);
2032
2033 state = anv_device_alloc(device, sizeof(*state), 8,
2034 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2035 if (state == NULL)
2036 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2037
2038 state->base.destructor = anv_dynamic_vp_state_destroy;
2039
2040 unsigned count = pCreateInfo->viewportAndScissorCount;
2041 state->sf_clip_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2042 count * 64, 64);
2043 state->cc_vp = anv_state_pool_alloc(&device->dynamic_state_pool,
2044 count * 8, 32);
2045 state->scissor = anv_state_pool_alloc(&device->dynamic_state_pool,
2046 count * 32, 32);
2047
2048 for (uint32_t i = 0; i < pCreateInfo->viewportAndScissorCount; i++) {
2049 const VkViewport *vp = &pCreateInfo->pViewports[i];
2050 const VkRect *s = &pCreateInfo->pScissors[i];
2051
2052 struct GEN8_SF_CLIP_VIEWPORT sf_clip_viewport = {
2053 .ViewportMatrixElementm00 = vp->width / 2,
2054 .ViewportMatrixElementm11 = vp->height / 2,
2055 .ViewportMatrixElementm22 = (vp->maxDepth - vp->minDepth) / 2,
2056 .ViewportMatrixElementm30 = vp->originX + vp->width / 2,
2057 .ViewportMatrixElementm31 = vp->originY + vp->height / 2,
2058 .ViewportMatrixElementm32 = (vp->maxDepth + vp->minDepth) / 2,
2059 .XMinClipGuardband = -1.0f,
2060 .XMaxClipGuardband = 1.0f,
2061 .YMinClipGuardband = -1.0f,
2062 .YMaxClipGuardband = 1.0f,
2063 .XMinViewPort = vp->originX,
2064 .XMaxViewPort = vp->originX + vp->width - 1,
2065 .YMinViewPort = vp->originY,
2066 .YMaxViewPort = vp->originY + vp->height - 1,
2067 };
2068
2069 struct GEN8_CC_VIEWPORT cc_viewport = {
2070 .MinimumDepth = vp->minDepth,
2071 .MaximumDepth = vp->maxDepth
2072 };
2073
2074 /* Since xmax and ymax are inclusive, we have to have xmax < xmin or
2075 * ymax < ymin for empty clips. In case clip x, y, width height are all
2076 * 0, the clamps below produce 0 for xmin, ymin, xmax, ymax, which isn't
2077 * what we want. Just special case empty clips and produce a canonical
2078 * empty clip. */
2079 static const struct GEN8_SCISSOR_RECT empty_scissor = {
2080 .ScissorRectangleYMin = 1,
2081 .ScissorRectangleXMin = 1,
2082 .ScissorRectangleYMax = 0,
2083 .ScissorRectangleXMax = 0
2084 };
2085
2086 const int max = 0xffff;
2087 struct GEN8_SCISSOR_RECT scissor = {
2088 /* Do this math using int64_t so overflow gets clamped correctly. */
2089 .ScissorRectangleYMin = clamp_int64(s->offset.y, 0, max),
2090 .ScissorRectangleXMin = clamp_int64(s->offset.x, 0, max),
2091 .ScissorRectangleYMax = clamp_int64((uint64_t) s->offset.y + s->extent.height - 1, 0, max),
2092 .ScissorRectangleXMax = clamp_int64((uint64_t) s->offset.x + s->extent.width - 1, 0, max)
2093 };
2094
2095 GEN8_SF_CLIP_VIEWPORT_pack(NULL, state->sf_clip_vp.map + i * 64, &sf_clip_viewport);
2096 GEN8_CC_VIEWPORT_pack(NULL, state->cc_vp.map + i * 32, &cc_viewport);
2097
2098 if (s->extent.width <= 0 || s->extent.height <= 0) {
2099 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &empty_scissor);
2100 } else {
2101 GEN8_SCISSOR_RECT_pack(NULL, state->scissor.map + i * 32, &scissor);
2102 }
2103 }
2104
2105 *pState = (VkDynamicVpState) state;
2106
2107 return VK_SUCCESS;
2108 }
2109
2110 VkResult anv_CreateDynamicRasterState(
2111 VkDevice _device,
2112 const VkDynamicRsStateCreateInfo* pCreateInfo,
2113 VkDynamicRsState* pState)
2114 {
2115 struct anv_device *device = (struct anv_device *) _device;
2116 struct anv_dynamic_rs_state *state;
2117
2118 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_RS_STATE_CREATE_INFO);
2119
2120 state = anv_device_alloc(device, sizeof(*state), 8,
2121 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2122 if (state == NULL)
2123 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2124
2125 /* Missing these:
2126 * float pointFadeThreshold;
2127 * // optional (GL45) - Size of point fade threshold
2128 */
2129
2130 struct GEN8_3DSTATE_SF sf = {
2131 GEN8_3DSTATE_SF_header,
2132 .LineWidth = pCreateInfo->lineWidth,
2133 .PointWidth = pCreateInfo->pointSize,
2134 };
2135
2136 GEN8_3DSTATE_SF_pack(NULL, state->state_sf, &sf);
2137
2138 bool enable_bias = pCreateInfo->depthBias != 0.0f ||
2139 pCreateInfo->slopeScaledDepthBias != 0.0f;
2140 struct GEN8_3DSTATE_RASTER raster = {
2141 .GlobalDepthOffsetEnableSolid = enable_bias,
2142 .GlobalDepthOffsetEnableWireframe = enable_bias,
2143 .GlobalDepthOffsetEnablePoint = enable_bias,
2144 .GlobalDepthOffsetConstant = pCreateInfo->depthBias,
2145 .GlobalDepthOffsetScale = pCreateInfo->slopeScaledDepthBias,
2146 .GlobalDepthOffsetClamp = pCreateInfo->depthBiasClamp
2147 };
2148
2149 GEN8_3DSTATE_RASTER_pack(NULL, state->state_raster, &raster);
2150
2151 *pState = (VkDynamicRsState) state;
2152
2153 return VK_SUCCESS;
2154 }
2155
2156 VkResult anv_CreateDynamicColorBlendState(
2157 VkDevice _device,
2158 const VkDynamicCbStateCreateInfo* pCreateInfo,
2159 VkDynamicCbState* pState)
2160 {
2161 struct anv_device *device = (struct anv_device *) _device;
2162 struct anv_dynamic_cb_state *state;
2163
2164 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_CB_STATE_CREATE_INFO);
2165
2166 state = anv_device_alloc(device, sizeof(*state), 8,
2167 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2168 if (state == NULL)
2169 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2170
2171 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2172 .BlendConstantColorRed = pCreateInfo->blendConst[0],
2173 .BlendConstantColorGreen = pCreateInfo->blendConst[1],
2174 .BlendConstantColorBlue = pCreateInfo->blendConst[2],
2175 .BlendConstantColorAlpha = pCreateInfo->blendConst[3]
2176 };
2177
2178 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2179
2180 *pState = (VkDynamicCbState) state;
2181
2182 return VK_SUCCESS;
2183 }
2184
2185 VkResult anv_CreateDynamicDepthStencilState(
2186 VkDevice _device,
2187 const VkDynamicDsStateCreateInfo* pCreateInfo,
2188 VkDynamicDsState* pState)
2189 {
2190 struct anv_device *device = (struct anv_device *) _device;
2191 struct anv_dynamic_ds_state *state;
2192
2193 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DYNAMIC_DS_STATE_CREATE_INFO);
2194
2195 state = anv_device_alloc(device, sizeof(*state), 8,
2196 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2197 if (state == NULL)
2198 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2199
2200 struct GEN8_3DSTATE_WM_DEPTH_STENCIL wm_depth_stencil = {
2201 GEN8_3DSTATE_WM_DEPTH_STENCIL_header,
2202
2203 /* Is this what we need to do? */
2204 .StencilBufferWriteEnable = pCreateInfo->stencilWriteMask != 0,
2205
2206 .StencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2207 .StencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2208
2209 .BackfaceStencilTestMask = pCreateInfo->stencilReadMask & 0xff,
2210 .BackfaceStencilWriteMask = pCreateInfo->stencilWriteMask & 0xff,
2211 };
2212
2213 GEN8_3DSTATE_WM_DEPTH_STENCIL_pack(NULL, state->state_wm_depth_stencil,
2214 &wm_depth_stencil);
2215
2216 struct GEN8_COLOR_CALC_STATE color_calc_state = {
2217 .StencilReferenceValue = pCreateInfo->stencilFrontRef,
2218 .BackFaceStencilReferenceValue = pCreateInfo->stencilBackRef
2219 };
2220
2221 GEN8_COLOR_CALC_STATE_pack(NULL, state->state_color_calc, &color_calc_state);
2222
2223 *pState = (VkDynamicDsState) state;
2224
2225 return VK_SUCCESS;
2226 }
2227
2228 // Command buffer functions
2229
2230 static void
2231 anv_cmd_buffer_destroy(struct anv_device *device,
2232 struct anv_object *object,
2233 VkObjectType obj_type)
2234 {
2235 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) object;
2236
2237 assert(obj_type == VK_OBJECT_TYPE_COMMAND_BUFFER);
2238
2239 /* Destroy all of the batch buffers */
2240 struct anv_batch_bo *bbo = cmd_buffer->last_batch_bo;
2241 while (bbo) {
2242 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2243 anv_batch_bo_destroy(bbo, device);
2244 bbo = prev;
2245 }
2246 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2247
2248 /* Destroy all of the surface state buffers */
2249 bbo = cmd_buffer->surface_batch_bo;
2250 while (bbo) {
2251 struct anv_batch_bo *prev = bbo->prev_batch_bo;
2252 anv_batch_bo_destroy(bbo, device);
2253 bbo = prev;
2254 }
2255 anv_reloc_list_finish(&cmd_buffer->surface_relocs, device);
2256
2257 anv_state_stream_finish(&cmd_buffer->surface_state_stream);
2258 anv_state_stream_finish(&cmd_buffer->dynamic_state_stream);
2259 anv_device_free(device, cmd_buffer->exec2_objects);
2260 anv_device_free(device, cmd_buffer->exec2_bos);
2261 anv_device_free(device, cmd_buffer);
2262 }
2263
2264 static VkResult
2265 anv_cmd_buffer_chain_batch(struct anv_batch *batch, void *_data)
2266 {
2267 struct anv_cmd_buffer *cmd_buffer = _data;
2268
2269 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->last_batch_bo;
2270
2271 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2272 if (result != VK_SUCCESS)
2273 return result;
2274
2275 /* We set the end of the batch a little short so we would be sure we
2276 * have room for the chaining command. Since we're about to emit the
2277 * chaining command, let's set it back where it should go.
2278 */
2279 batch->end += GEN8_MI_BATCH_BUFFER_START_length * 4;
2280 assert(batch->end == old_bbo->bo.map + old_bbo->bo.size);
2281
2282 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_START,
2283 GEN8_MI_BATCH_BUFFER_START_header,
2284 ._2ndLevelBatchBuffer = _1stlevelbatch,
2285 .AddressSpaceIndicator = ASI_PPGTT,
2286 .BatchBufferStartAddress = { &new_bbo->bo, 0 },
2287 );
2288
2289 /* Pad out to a 2-dword aligned boundary with zeros */
2290 if ((uintptr_t)batch->next % 8 != 0) {
2291 *(uint32_t *)batch->next = 0;
2292 batch->next += 4;
2293 }
2294
2295 anv_batch_bo_finish(cmd_buffer->last_batch_bo, batch);
2296
2297 new_bbo->prev_batch_bo = old_bbo;
2298 cmd_buffer->last_batch_bo = new_bbo;
2299
2300 anv_batch_bo_start(new_bbo, batch, GEN8_MI_BATCH_BUFFER_START_length * 4);
2301
2302 return VK_SUCCESS;
2303 }
2304
2305 VkResult anv_CreateCommandBuffer(
2306 VkDevice _device,
2307 const VkCmdBufferCreateInfo* pCreateInfo,
2308 VkCmdBuffer* pCmdBuffer)
2309 {
2310 struct anv_device *device = (struct anv_device *) _device;
2311 struct anv_cmd_buffer *cmd_buffer;
2312 VkResult result;
2313
2314 cmd_buffer = anv_device_alloc(device, sizeof(*cmd_buffer), 8,
2315 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
2316 if (cmd_buffer == NULL)
2317 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2318
2319 cmd_buffer->base.destructor = anv_cmd_buffer_destroy;
2320
2321 cmd_buffer->device = device;
2322 cmd_buffer->rs_state = NULL;
2323 cmd_buffer->vp_state = NULL;
2324 cmd_buffer->cb_state = NULL;
2325 cmd_buffer->ds_state = NULL;
2326 memset(&cmd_buffer->descriptors, 0, sizeof(cmd_buffer->descriptors));
2327
2328 result = anv_batch_bo_create(device, &cmd_buffer->last_batch_bo);
2329 if (result != VK_SUCCESS)
2330 goto fail;
2331
2332 result = anv_reloc_list_init(&cmd_buffer->batch.relocs, device);
2333 if (result != VK_SUCCESS)
2334 goto fail_batch_bo;
2335
2336 cmd_buffer->batch.device = device;
2337 cmd_buffer->batch.extend_cb = anv_cmd_buffer_chain_batch;
2338 cmd_buffer->batch.user_data = cmd_buffer;
2339
2340 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2341 GEN8_MI_BATCH_BUFFER_START_length * 4);
2342
2343 result = anv_batch_bo_create(device, &cmd_buffer->surface_batch_bo);
2344 if (result != VK_SUCCESS)
2345 goto fail_batch_relocs;
2346 cmd_buffer->surface_batch_bo->first_reloc = 0;
2347
2348 result = anv_reloc_list_init(&cmd_buffer->surface_relocs, device);
2349 if (result != VK_SUCCESS)
2350 goto fail_ss_batch_bo;
2351
2352 /* Start surface_next at 1 so surface offset 0 is invalid. */
2353 cmd_buffer->surface_next = 1;
2354
2355 cmd_buffer->exec2_objects = NULL;
2356 cmd_buffer->exec2_bos = NULL;
2357 cmd_buffer->exec2_array_length = 0;
2358
2359 anv_state_stream_init(&cmd_buffer->surface_state_stream,
2360 &device->surface_state_block_pool);
2361 anv_state_stream_init(&cmd_buffer->dynamic_state_stream,
2362 &device->dynamic_state_block_pool);
2363
2364 cmd_buffer->dirty = 0;
2365 cmd_buffer->vb_dirty = 0;
2366 cmd_buffer->descriptors_dirty = 0;
2367 cmd_buffer->pipeline = NULL;
2368 cmd_buffer->vp_state = NULL;
2369 cmd_buffer->rs_state = NULL;
2370 cmd_buffer->ds_state = NULL;
2371
2372 *pCmdBuffer = (VkCmdBuffer) cmd_buffer;
2373
2374 return VK_SUCCESS;
2375
2376 fail_ss_batch_bo:
2377 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, device);
2378 fail_batch_relocs:
2379 anv_reloc_list_finish(&cmd_buffer->batch.relocs, device);
2380 fail_batch_bo:
2381 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, device);
2382 fail:
2383 anv_device_free(device, cmd_buffer);
2384
2385 return result;
2386 }
2387
2388 static void
2389 anv_cmd_buffer_emit_state_base_address(struct anv_cmd_buffer *cmd_buffer)
2390 {
2391 struct anv_device *device = cmd_buffer->device;
2392 struct anv_bo *scratch_bo = NULL;
2393
2394 cmd_buffer->scratch_size = device->scratch_block_pool.size;
2395 if (cmd_buffer->scratch_size > 0)
2396 scratch_bo = &device->scratch_block_pool.bo;
2397
2398 anv_batch_emit(&cmd_buffer->batch, GEN8_STATE_BASE_ADDRESS,
2399 .GeneralStateBaseAddress = { scratch_bo, 0 },
2400 .GeneralStateMemoryObjectControlState = GEN8_MOCS,
2401 .GeneralStateBaseAddressModifyEnable = true,
2402 .GeneralStateBufferSize = 0xfffff,
2403 .GeneralStateBufferSizeModifyEnable = true,
2404
2405 .SurfaceStateBaseAddress = { &cmd_buffer->surface_batch_bo->bo, 0 },
2406 .SurfaceStateMemoryObjectControlState = GEN8_MOCS,
2407 .SurfaceStateBaseAddressModifyEnable = true,
2408
2409 .DynamicStateBaseAddress = { &device->dynamic_state_block_pool.bo, 0 },
2410 .DynamicStateMemoryObjectControlState = GEN8_MOCS,
2411 .DynamicStateBaseAddressModifyEnable = true,
2412 .DynamicStateBufferSize = 0xfffff,
2413 .DynamicStateBufferSizeModifyEnable = true,
2414
2415 .IndirectObjectBaseAddress = { NULL, 0 },
2416 .IndirectObjectMemoryObjectControlState = GEN8_MOCS,
2417 .IndirectObjectBaseAddressModifyEnable = true,
2418 .IndirectObjectBufferSize = 0xfffff,
2419 .IndirectObjectBufferSizeModifyEnable = true,
2420
2421 .InstructionBaseAddress = { &device->instruction_block_pool.bo, 0 },
2422 .InstructionMemoryObjectControlState = GEN8_MOCS,
2423 .InstructionBaseAddressModifyEnable = true,
2424 .InstructionBufferSize = 0xfffff,
2425 .InstructionBuffersizeModifyEnable = true);
2426 }
2427
2428 VkResult anv_BeginCommandBuffer(
2429 VkCmdBuffer cmdBuffer,
2430 const VkCmdBufferBeginInfo* pBeginInfo)
2431 {
2432 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2433
2434 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2435 cmd_buffer->current_pipeline = UINT32_MAX;
2436
2437 return VK_SUCCESS;
2438 }
2439
2440 static VkResult
2441 anv_cmd_buffer_add_bo(struct anv_cmd_buffer *cmd_buffer,
2442 struct anv_bo *bo,
2443 struct drm_i915_gem_relocation_entry *relocs,
2444 size_t num_relocs)
2445 {
2446 struct drm_i915_gem_exec_object2 *obj;
2447
2448 if (bo->index < cmd_buffer->bo_count &&
2449 cmd_buffer->exec2_bos[bo->index] == bo)
2450 return VK_SUCCESS;
2451
2452 if (cmd_buffer->bo_count >= cmd_buffer->exec2_array_length) {
2453 uint32_t new_len = cmd_buffer->exec2_objects ?
2454 cmd_buffer->exec2_array_length * 2 : 64;
2455
2456 struct drm_i915_gem_exec_object2 *new_objects =
2457 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_objects),
2458 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2459 if (new_objects == NULL)
2460 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2461
2462 struct anv_bo **new_bos =
2463 anv_device_alloc(cmd_buffer->device, new_len * sizeof(*new_bos),
2464 8, VK_SYSTEM_ALLOC_TYPE_INTERNAL);
2465 if (new_objects == NULL) {
2466 anv_device_free(cmd_buffer->device, new_objects);
2467 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
2468 }
2469
2470 if (cmd_buffer->exec2_objects) {
2471 memcpy(new_objects, cmd_buffer->exec2_objects,
2472 cmd_buffer->bo_count * sizeof(*new_objects));
2473 memcpy(new_bos, cmd_buffer->exec2_bos,
2474 cmd_buffer->bo_count * sizeof(*new_bos));
2475 }
2476
2477 cmd_buffer->exec2_objects = new_objects;
2478 cmd_buffer->exec2_bos = new_bos;
2479 cmd_buffer->exec2_array_length = new_len;
2480 }
2481
2482 assert(cmd_buffer->bo_count < cmd_buffer->exec2_array_length);
2483
2484 bo->index = cmd_buffer->bo_count++;
2485 obj = &cmd_buffer->exec2_objects[bo->index];
2486 cmd_buffer->exec2_bos[bo->index] = bo;
2487
2488 obj->handle = bo->gem_handle;
2489 obj->relocation_count = 0;
2490 obj->relocs_ptr = 0;
2491 obj->alignment = 0;
2492 obj->offset = bo->offset;
2493 obj->flags = 0;
2494 obj->rsvd1 = 0;
2495 obj->rsvd2 = 0;
2496
2497 if (relocs) {
2498 obj->relocation_count = num_relocs;
2499 obj->relocs_ptr = (uintptr_t) relocs;
2500 }
2501
2502 return VK_SUCCESS;
2503 }
2504
2505 static void
2506 anv_cmd_buffer_add_validate_bos(struct anv_cmd_buffer *cmd_buffer,
2507 struct anv_reloc_list *list)
2508 {
2509 for (size_t i = 0; i < list->num_relocs; i++)
2510 anv_cmd_buffer_add_bo(cmd_buffer, list->reloc_bos[i], NULL, 0);
2511 }
2512
2513 static void
2514 anv_cmd_buffer_process_relocs(struct anv_cmd_buffer *cmd_buffer,
2515 struct anv_reloc_list *list)
2516 {
2517 struct anv_bo *bo;
2518
2519 /* If the kernel supports I915_EXEC_NO_RELOC, it will compare offset in
2520 * struct drm_i915_gem_exec_object2 against the bos current offset and if
2521 * all bos haven't moved it will skip relocation processing alltogether.
2522 * If I915_EXEC_NO_RELOC is not supported, the kernel ignores the incoming
2523 * value of offset so we can set it either way. For that to work we need
2524 * to make sure all relocs use the same presumed offset.
2525 */
2526
2527 for (size_t i = 0; i < list->num_relocs; i++) {
2528 bo = list->reloc_bos[i];
2529 if (bo->offset != list->relocs[i].presumed_offset)
2530 cmd_buffer->need_reloc = true;
2531
2532 list->relocs[i].target_handle = bo->index;
2533 }
2534 }
2535
2536 VkResult anv_EndCommandBuffer(
2537 VkCmdBuffer cmdBuffer)
2538 {
2539 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2540 struct anv_device *device = cmd_buffer->device;
2541 struct anv_batch *batch = &cmd_buffer->batch;
2542
2543 anv_batch_emit(batch, GEN8_MI_BATCH_BUFFER_END);
2544
2545 /* Round batch up to an even number of dwords. */
2546 if ((batch->next - batch->start) & 4)
2547 anv_batch_emit(batch, GEN8_MI_NOOP);
2548
2549 anv_batch_bo_finish(cmd_buffer->last_batch_bo, &cmd_buffer->batch);
2550 cmd_buffer->surface_batch_bo->num_relocs =
2551 cmd_buffer->surface_relocs.num_relocs - cmd_buffer->surface_batch_bo->first_reloc;
2552 cmd_buffer->surface_batch_bo->length = cmd_buffer->surface_next;
2553
2554 cmd_buffer->bo_count = 0;
2555 cmd_buffer->need_reloc = false;
2556
2557 /* Lock for access to bo->index. */
2558 pthread_mutex_lock(&device->mutex);
2559
2560 /* Add surface state bos first so we can add them with their relocs. */
2561 for (struct anv_batch_bo *bbo = cmd_buffer->surface_batch_bo;
2562 bbo != NULL; bbo = bbo->prev_batch_bo) {
2563 anv_cmd_buffer_add_bo(cmd_buffer, &bbo->bo,
2564 &cmd_buffer->surface_relocs.relocs[bbo->first_reloc],
2565 bbo->num_relocs);
2566 }
2567
2568 /* Add all of the BOs referenced by surface state */
2569 anv_cmd_buffer_add_validate_bos(cmd_buffer, &cmd_buffer->surface_relocs);
2570
2571 /* Add all but the first batch BO */
2572 struct anv_batch_bo *batch_bo = cmd_buffer->last_batch_bo;
2573 while (batch_bo->prev_batch_bo) {
2574 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2575 &batch->relocs.relocs[batch_bo->first_reloc],
2576 batch_bo->num_relocs);
2577 batch_bo = batch_bo->prev_batch_bo;
2578 }
2579
2580 /* Add everything referenced by the batches */
2581 anv_cmd_buffer_add_validate_bos(cmd_buffer, &batch->relocs);
2582
2583 /* Add the first batch bo last */
2584 assert(batch_bo->prev_batch_bo == NULL && batch_bo->first_reloc == 0);
2585 anv_cmd_buffer_add_bo(cmd_buffer, &batch_bo->bo,
2586 &batch->relocs.relocs[batch_bo->first_reloc],
2587 batch_bo->num_relocs);
2588 assert(batch_bo->bo.index == cmd_buffer->bo_count - 1);
2589
2590 anv_cmd_buffer_process_relocs(cmd_buffer, &cmd_buffer->surface_relocs);
2591 anv_cmd_buffer_process_relocs(cmd_buffer, &batch->relocs);
2592
2593 cmd_buffer->execbuf.buffers_ptr = (uintptr_t) cmd_buffer->exec2_objects;
2594 cmd_buffer->execbuf.buffer_count = cmd_buffer->bo_count;
2595 cmd_buffer->execbuf.batch_start_offset = 0;
2596 cmd_buffer->execbuf.batch_len = batch->next - batch->start;
2597 cmd_buffer->execbuf.cliprects_ptr = 0;
2598 cmd_buffer->execbuf.num_cliprects = 0;
2599 cmd_buffer->execbuf.DR1 = 0;
2600 cmd_buffer->execbuf.DR4 = 0;
2601
2602 cmd_buffer->execbuf.flags = I915_EXEC_HANDLE_LUT;
2603 if (!cmd_buffer->need_reloc)
2604 cmd_buffer->execbuf.flags |= I915_EXEC_NO_RELOC;
2605 cmd_buffer->execbuf.flags |= I915_EXEC_RENDER;
2606 cmd_buffer->execbuf.rsvd1 = device->context_id;
2607 cmd_buffer->execbuf.rsvd2 = 0;
2608
2609 pthread_mutex_unlock(&device->mutex);
2610
2611 return VK_SUCCESS;
2612 }
2613
2614 VkResult anv_ResetCommandBuffer(
2615 VkCmdBuffer cmdBuffer)
2616 {
2617 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2618
2619 /* Delete all but the first batch bo */
2620 while (cmd_buffer->last_batch_bo->prev_batch_bo) {
2621 struct anv_batch_bo *prev = cmd_buffer->last_batch_bo->prev_batch_bo;
2622 anv_batch_bo_destroy(cmd_buffer->last_batch_bo, cmd_buffer->device);
2623 cmd_buffer->last_batch_bo = prev;
2624 }
2625 assert(cmd_buffer->last_batch_bo->prev_batch_bo == NULL);
2626
2627 cmd_buffer->batch.relocs.num_relocs = 0;
2628 anv_batch_bo_start(cmd_buffer->last_batch_bo, &cmd_buffer->batch,
2629 GEN8_MI_BATCH_BUFFER_START_length * 4);
2630
2631 /* Delete all but the first batch bo */
2632 while (cmd_buffer->surface_batch_bo->prev_batch_bo) {
2633 struct anv_batch_bo *prev = cmd_buffer->surface_batch_bo->prev_batch_bo;
2634 anv_batch_bo_destroy(cmd_buffer->surface_batch_bo, cmd_buffer->device);
2635 cmd_buffer->surface_batch_bo = prev;
2636 }
2637 assert(cmd_buffer->surface_batch_bo->prev_batch_bo == NULL);
2638
2639 cmd_buffer->surface_next = 1;
2640 cmd_buffer->surface_relocs.num_relocs = 0;
2641
2642 cmd_buffer->rs_state = NULL;
2643 cmd_buffer->vp_state = NULL;
2644 cmd_buffer->cb_state = NULL;
2645 cmd_buffer->ds_state = NULL;
2646
2647 return VK_SUCCESS;
2648 }
2649
2650 // Command buffer building functions
2651
2652 void anv_CmdBindPipeline(
2653 VkCmdBuffer cmdBuffer,
2654 VkPipelineBindPoint pipelineBindPoint,
2655 VkPipeline _pipeline)
2656 {
2657 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2658 struct anv_pipeline *pipeline = (struct anv_pipeline *) _pipeline;
2659
2660 switch (pipelineBindPoint) {
2661 case VK_PIPELINE_BIND_POINT_COMPUTE:
2662 cmd_buffer->compute_pipeline = pipeline;
2663 cmd_buffer->compute_dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2664 break;
2665
2666 case VK_PIPELINE_BIND_POINT_GRAPHICS:
2667 cmd_buffer->pipeline = pipeline;
2668 cmd_buffer->vb_dirty |= pipeline->vb_used;
2669 cmd_buffer->dirty |= ANV_CMD_BUFFER_PIPELINE_DIRTY;
2670 break;
2671
2672 default:
2673 assert(!"invalid bind point");
2674 break;
2675 }
2676 }
2677
2678 void anv_CmdBindDynamicStateObject(
2679 VkCmdBuffer cmdBuffer,
2680 VkStateBindPoint stateBindPoint,
2681 VkDynamicStateObject dynamicState)
2682 {
2683 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2684
2685 switch (stateBindPoint) {
2686 case VK_STATE_BIND_POINT_VIEWPORT:
2687 cmd_buffer->vp_state = (struct anv_dynamic_vp_state *) dynamicState;
2688 cmd_buffer->dirty |= ANV_CMD_BUFFER_VP_DIRTY;
2689 break;
2690 case VK_STATE_BIND_POINT_RASTER:
2691 cmd_buffer->rs_state = (struct anv_dynamic_rs_state *) dynamicState;
2692 cmd_buffer->dirty |= ANV_CMD_BUFFER_RS_DIRTY;
2693 break;
2694 case VK_STATE_BIND_POINT_COLOR_BLEND:
2695 cmd_buffer->cb_state = (struct anv_dynamic_cb_state *) dynamicState;
2696 cmd_buffer->dirty |= ANV_CMD_BUFFER_CB_DIRTY;
2697 break;
2698 case VK_STATE_BIND_POINT_DEPTH_STENCIL:
2699 cmd_buffer->ds_state = (struct anv_dynamic_ds_state *) dynamicState;
2700 cmd_buffer->dirty |= ANV_CMD_BUFFER_DS_DIRTY;
2701 break;
2702 default:
2703 break;
2704 };
2705 }
2706
2707 static struct anv_state
2708 anv_cmd_buffer_alloc_surface_state(struct anv_cmd_buffer *cmd_buffer,
2709 uint32_t size, uint32_t alignment)
2710 {
2711 struct anv_state state;
2712
2713 state.offset = ALIGN_U32(cmd_buffer->surface_next, alignment);
2714 if (state.offset + size > cmd_buffer->surface_batch_bo->bo.size)
2715 return (struct anv_state) { 0 };
2716
2717 state.map = cmd_buffer->surface_batch_bo->bo.map + state.offset;
2718 state.alloc_size = size;
2719 cmd_buffer->surface_next = state.offset + size;
2720
2721 assert(state.offset + size <= cmd_buffer->surface_batch_bo->bo.size);
2722
2723 return state;
2724 }
2725
2726 static VkResult
2727 anv_cmd_buffer_new_surface_state_bo(struct anv_cmd_buffer *cmd_buffer)
2728 {
2729 struct anv_batch_bo *new_bbo, *old_bbo = cmd_buffer->surface_batch_bo;
2730
2731 /* Finish off the old buffer */
2732 old_bbo->num_relocs =
2733 cmd_buffer->surface_relocs.num_relocs - old_bbo->first_reloc;
2734 old_bbo->length = cmd_buffer->surface_next;
2735
2736 VkResult result = anv_batch_bo_create(cmd_buffer->device, &new_bbo);
2737 if (result != VK_SUCCESS)
2738 return result;
2739
2740 new_bbo->first_reloc = cmd_buffer->surface_relocs.num_relocs;
2741 cmd_buffer->surface_next = 1;
2742
2743 new_bbo->prev_batch_bo = old_bbo;
2744 cmd_buffer->surface_batch_bo = new_bbo;
2745
2746 /* Re-emit state base addresses so we get the new surface state base
2747 * address before we start emitting binding tables etc.
2748 */
2749 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
2750
2751 /* It seems like just changing the state base addresses isn't enough.
2752 * Invalidating the cache seems to be enough to cause things to
2753 * propagate. However, I'm not 100% sure what we're supposed to do.
2754 */
2755 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
2756 .TextureCacheInvalidationEnable = true);
2757
2758 return VK_SUCCESS;
2759 }
2760
2761 void anv_CmdBindDescriptorSets(
2762 VkCmdBuffer cmdBuffer,
2763 VkPipelineBindPoint pipelineBindPoint,
2764 uint32_t firstSet,
2765 uint32_t setCount,
2766 const VkDescriptorSet* pDescriptorSets,
2767 uint32_t dynamicOffsetCount,
2768 const uint32_t* pDynamicOffsets)
2769 {
2770 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2771 struct anv_pipeline_layout *layout;
2772 struct anv_descriptor_set *set;
2773 struct anv_descriptor_set_layout *set_layout;
2774
2775 assert(firstSet + setCount < MAX_SETS);
2776
2777 if (pipelineBindPoint == VK_PIPELINE_BIND_POINT_GRAPHICS)
2778 layout = cmd_buffer->pipeline->layout;
2779 else
2780 layout = cmd_buffer->compute_pipeline->layout;
2781
2782 uint32_t dynamic_slot = 0;
2783 for (uint32_t i = 0; i < setCount; i++) {
2784 set = (struct anv_descriptor_set *) pDescriptorSets[i];
2785 set_layout = layout->set[firstSet + i].layout;
2786
2787 cmd_buffer->descriptors[firstSet + i].set = set;
2788
2789 assert(set_layout->num_dynamic_buffers <
2790 ARRAY_SIZE(cmd_buffer->descriptors[0].dynamic_offsets));
2791 memcpy(cmd_buffer->descriptors[firstSet + i].dynamic_offsets,
2792 pDynamicOffsets + dynamic_slot,
2793 set_layout->num_dynamic_buffers * sizeof(*pDynamicOffsets));
2794
2795 cmd_buffer->descriptors_dirty |= set_layout->shader_stages;
2796
2797 dynamic_slot += set_layout->num_dynamic_buffers;
2798 }
2799 }
2800
2801 void anv_CmdBindIndexBuffer(
2802 VkCmdBuffer cmdBuffer,
2803 VkBuffer _buffer,
2804 VkDeviceSize offset,
2805 VkIndexType indexType)
2806 {
2807 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2808 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
2809
2810 static const uint32_t vk_to_gen_index_type[] = {
2811 [VK_INDEX_TYPE_UINT8] = INDEX_BYTE,
2812 [VK_INDEX_TYPE_UINT16] = INDEX_WORD,
2813 [VK_INDEX_TYPE_UINT32] = INDEX_DWORD,
2814 };
2815
2816 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_INDEX_BUFFER,
2817 .IndexFormat = vk_to_gen_index_type[indexType],
2818 .MemoryObjectControlState = GEN8_MOCS,
2819 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
2820 .BufferSize = buffer->size - offset);
2821 }
2822
2823 void anv_CmdBindVertexBuffers(
2824 VkCmdBuffer cmdBuffer,
2825 uint32_t startBinding,
2826 uint32_t bindingCount,
2827 const VkBuffer* pBuffers,
2828 const VkDeviceSize* pOffsets)
2829 {
2830 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
2831 struct anv_vertex_binding *vb = cmd_buffer->vertex_bindings;
2832
2833 /* We have to defer setting up vertex buffer since we need the buffer
2834 * stride from the pipeline. */
2835
2836 assert(startBinding + bindingCount < MAX_VBS);
2837 for (uint32_t i = 0; i < bindingCount; i++) {
2838 vb[startBinding + i].buffer = (struct anv_buffer *) pBuffers[i];
2839 vb[startBinding + i].offset = pOffsets[i];
2840 cmd_buffer->vb_dirty |= 1 << (startBinding + i);
2841 }
2842 }
2843
2844 static VkResult
2845 cmd_buffer_emit_binding_table(struct anv_cmd_buffer *cmd_buffer,
2846 unsigned stage, struct anv_state *bt_state)
2847 {
2848 struct anv_pipeline_layout *layout;
2849 uint32_t color_attachments, bias, size;
2850
2851 if (stage == VK_SHADER_STAGE_COMPUTE)
2852 layout = cmd_buffer->compute_pipeline->layout;
2853 else
2854 layout = cmd_buffer->pipeline->layout;
2855
2856 if (stage == VK_SHADER_STAGE_FRAGMENT) {
2857 bias = MAX_RTS;
2858 color_attachments = cmd_buffer->framebuffer->color_attachment_count;
2859 } else {
2860 bias = 0;
2861 color_attachments = 0;
2862 }
2863
2864 /* This is a little awkward: layout can be NULL but we still have to
2865 * allocate and set a binding table for the PS stage for render
2866 * targets. */
2867 uint32_t surface_count = layout ? layout->stage[stage].surface_count : 0;
2868
2869 if (color_attachments + surface_count == 0)
2870 return VK_SUCCESS;
2871
2872 size = (bias + surface_count) * sizeof(uint32_t);
2873 *bt_state = anv_cmd_buffer_alloc_surface_state(cmd_buffer, size, 32);
2874 uint32_t *bt_map = bt_state->map;
2875
2876 if (bt_state->map == NULL)
2877 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2878
2879 for (uint32_t ca = 0; ca < color_attachments; ca++) {
2880 const struct anv_surface_view *view =
2881 cmd_buffer->framebuffer->color_attachments[ca];
2882
2883 struct anv_state state =
2884 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2885
2886 if (state.map == NULL)
2887 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2888
2889 memcpy(state.map, view->surface_state.map, 64);
2890
2891 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2892 *(uint64_t *)(state.map + 8 * 4) =
2893 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2894 cmd_buffer->device,
2895 state.offset + 8 * 4,
2896 view->bo, view->offset);
2897
2898 bt_map[ca] = state.offset;
2899 }
2900
2901 if (layout == NULL)
2902 return VK_SUCCESS;
2903
2904 for (uint32_t set = 0; set < layout->num_sets; set++) {
2905 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
2906 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2907 struct anv_descriptor_slot *surface_slots =
2908 set_layout->stage[stage].surface_start;
2909
2910 uint32_t start = bias + layout->set[set].surface_start[stage];
2911
2912 for (uint32_t b = 0; b < set_layout->stage[stage].surface_count; b++) {
2913 struct anv_surface_view *view =
2914 d->set->descriptors[surface_slots[b].index].view;
2915
2916 if (!view)
2917 continue;
2918
2919 struct anv_state state =
2920 anv_cmd_buffer_alloc_surface_state(cmd_buffer, 64, 64);
2921
2922 if (state.map == NULL)
2923 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2924
2925 uint32_t offset;
2926 if (surface_slots[b].dynamic_slot >= 0) {
2927 uint32_t dynamic_offset =
2928 d->dynamic_offsets[surface_slots[b].dynamic_slot];
2929
2930 offset = view->offset + dynamic_offset;
2931 fill_buffer_surface_state(state.map, view->format, offset,
2932 view->range - dynamic_offset);
2933 } else {
2934 offset = view->offset;
2935 memcpy(state.map, view->surface_state.map, 64);
2936 }
2937
2938 /* The address goes in dwords 8 and 9 of the SURFACE_STATE */
2939 *(uint64_t *)(state.map + 8 * 4) =
2940 anv_reloc_list_add(&cmd_buffer->surface_relocs,
2941 cmd_buffer->device,
2942 state.offset + 8 * 4,
2943 view->bo, offset);
2944
2945 bt_map[start + b] = state.offset;
2946 }
2947 }
2948
2949 return VK_SUCCESS;
2950 }
2951
2952 static VkResult
2953 cmd_buffer_emit_samplers(struct anv_cmd_buffer *cmd_buffer,
2954 unsigned stage, struct anv_state *state)
2955 {
2956 struct anv_pipeline_layout *layout;
2957 uint32_t sampler_count;
2958
2959 if (stage == VK_SHADER_STAGE_COMPUTE)
2960 layout = cmd_buffer->compute_pipeline->layout;
2961 else
2962 layout = cmd_buffer->pipeline->layout;
2963
2964 sampler_count = layout ? layout->stage[stage].sampler_count : 0;
2965 if (sampler_count == 0)
2966 return VK_SUCCESS;
2967
2968 uint32_t size = sampler_count * 16;
2969 *state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream, size, 32);
2970
2971 if (state->map == NULL)
2972 return VK_ERROR_OUT_OF_DEVICE_MEMORY;
2973
2974 for (uint32_t set = 0; set < layout->num_sets; set++) {
2975 struct anv_descriptor_set_binding *d = &cmd_buffer->descriptors[set];
2976 struct anv_descriptor_set_layout *set_layout = layout->set[set].layout;
2977 struct anv_descriptor_slot *sampler_slots =
2978 set_layout->stage[stage].sampler_start;
2979
2980 uint32_t start = layout->set[set].sampler_start[stage];
2981
2982 for (uint32_t b = 0; b < set_layout->stage[stage].sampler_count; b++) {
2983 struct anv_sampler *sampler =
2984 d->set->descriptors[sampler_slots[b].index].sampler;
2985
2986 if (!sampler)
2987 continue;
2988
2989 memcpy(state->map + (start + b) * 16,
2990 sampler->state, sizeof(sampler->state));
2991 }
2992 }
2993
2994 return VK_SUCCESS;
2995 }
2996
2997 static VkResult
2998 flush_descriptor_set(struct anv_cmd_buffer *cmd_buffer, uint32_t stage)
2999 {
3000 struct anv_state surfaces = { 0, }, samplers = { 0, };
3001 VkResult result;
3002
3003 result = cmd_buffer_emit_samplers(cmd_buffer, stage, &samplers);
3004 if (result != VK_SUCCESS)
3005 return result;
3006 result = cmd_buffer_emit_binding_table(cmd_buffer, stage, &surfaces);
3007 if (result != VK_SUCCESS)
3008 return result;
3009
3010 static const uint32_t sampler_state_opcodes[] = {
3011 [VK_SHADER_STAGE_VERTEX] = 43,
3012 [VK_SHADER_STAGE_TESS_CONTROL] = 44, /* HS */
3013 [VK_SHADER_STAGE_TESS_EVALUATION] = 45, /* DS */
3014 [VK_SHADER_STAGE_GEOMETRY] = 46,
3015 [VK_SHADER_STAGE_FRAGMENT] = 47,
3016 [VK_SHADER_STAGE_COMPUTE] = 0,
3017 };
3018
3019 static const uint32_t binding_table_opcodes[] = {
3020 [VK_SHADER_STAGE_VERTEX] = 38,
3021 [VK_SHADER_STAGE_TESS_CONTROL] = 39,
3022 [VK_SHADER_STAGE_TESS_EVALUATION] = 40,
3023 [VK_SHADER_STAGE_GEOMETRY] = 41,
3024 [VK_SHADER_STAGE_FRAGMENT] = 42,
3025 [VK_SHADER_STAGE_COMPUTE] = 0,
3026 };
3027
3028 if (samplers.alloc_size > 0) {
3029 anv_batch_emit(&cmd_buffer->batch,
3030 GEN8_3DSTATE_SAMPLER_STATE_POINTERS_VS,
3031 ._3DCommandSubOpcode = sampler_state_opcodes[stage],
3032 .PointertoVSSamplerState = samplers.offset);
3033 }
3034
3035 if (surfaces.alloc_size > 0) {
3036 anv_batch_emit(&cmd_buffer->batch,
3037 GEN8_3DSTATE_BINDING_TABLE_POINTERS_VS,
3038 ._3DCommandSubOpcode = binding_table_opcodes[stage],
3039 .PointertoVSBindingTable = surfaces.offset);
3040 }
3041
3042 return VK_SUCCESS;
3043 }
3044
3045 static void
3046 flush_descriptor_sets(struct anv_cmd_buffer *cmd_buffer)
3047 {
3048 uint32_t s, dirty = cmd_buffer->descriptors_dirty &
3049 cmd_buffer->pipeline->active_stages;
3050
3051 VkResult result;
3052 for_each_bit(s, dirty) {
3053 result = flush_descriptor_set(cmd_buffer, s);
3054 if (result != VK_SUCCESS)
3055 break;
3056 }
3057
3058 if (result != VK_SUCCESS) {
3059 assert(result == VK_ERROR_OUT_OF_DEVICE_MEMORY);
3060
3061 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3062 assert(result == VK_SUCCESS);
3063
3064 /* Re-emit all active binding tables */
3065 for_each_bit(s, cmd_buffer->pipeline->active_stages) {
3066 result = flush_descriptor_set(cmd_buffer, s);
3067
3068 /* It had better succeed this time */
3069 assert(result == VK_SUCCESS);
3070 }
3071 }
3072
3073 cmd_buffer->descriptors_dirty &= ~cmd_buffer->pipeline->active_stages;
3074 }
3075
3076 static struct anv_state
3077 anv_cmd_buffer_emit_dynamic(struct anv_cmd_buffer *cmd_buffer,
3078 uint32_t *a, uint32_t dwords, uint32_t alignment)
3079 {
3080 struct anv_state state;
3081
3082 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3083 dwords * 4, alignment);
3084 memcpy(state.map, a, dwords * 4);
3085
3086 VG(VALGRIND_CHECK_MEM_IS_DEFINED(state.map, dwords * 4));
3087
3088 return state;
3089 }
3090
3091 static struct anv_state
3092 anv_cmd_buffer_merge_dynamic(struct anv_cmd_buffer *cmd_buffer,
3093 uint32_t *a, uint32_t *b,
3094 uint32_t dwords, uint32_t alignment)
3095 {
3096 struct anv_state state;
3097 uint32_t *p;
3098
3099 state = anv_state_stream_alloc(&cmd_buffer->dynamic_state_stream,
3100 dwords * 4, alignment);
3101 p = state.map;
3102 for (uint32_t i = 0; i < dwords; i++)
3103 p[i] = a[i] | b[i];
3104
3105 VG(VALGRIND_CHECK_MEM_IS_DEFINED(p, dwords * 4));
3106
3107 return state;
3108 }
3109
3110 static VkResult
3111 flush_compute_descriptor_set(struct anv_cmd_buffer *cmd_buffer)
3112 {
3113 struct anv_device *device = cmd_buffer->device;
3114 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3115 struct anv_state surfaces = { 0, }, samplers = { 0, };
3116 VkResult result;
3117
3118 result = cmd_buffer_emit_samplers(cmd_buffer,
3119 VK_SHADER_STAGE_COMPUTE, &samplers);
3120 if (result != VK_SUCCESS)
3121 return result;
3122 result = cmd_buffer_emit_binding_table(cmd_buffer,
3123 VK_SHADER_STAGE_COMPUTE, &surfaces);
3124 if (result != VK_SUCCESS)
3125 return result;
3126
3127 struct GEN8_INTERFACE_DESCRIPTOR_DATA desc = {
3128 .KernelStartPointer = pipeline->cs_simd,
3129 .KernelStartPointerHigh = 0,
3130 .BindingTablePointer = surfaces.offset,
3131 .BindingTableEntryCount = 0,
3132 .SamplerStatePointer = samplers.offset,
3133 .SamplerCount = 0,
3134 .NumberofThreadsinGPGPUThreadGroup = 0 /* FIXME: Really? */
3135 };
3136
3137 uint32_t size = GEN8_INTERFACE_DESCRIPTOR_DATA_length * sizeof(uint32_t);
3138 struct anv_state state =
3139 anv_state_pool_alloc(&device->dynamic_state_pool, size, 64);
3140
3141 GEN8_INTERFACE_DESCRIPTOR_DATA_pack(NULL, state.map, &desc);
3142
3143 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_INTERFACE_DESCRIPTOR_LOAD,
3144 .InterfaceDescriptorTotalLength = size,
3145 .InterfaceDescriptorDataStartAddress = state.offset);
3146
3147 return VK_SUCCESS;
3148 }
3149
3150 static void
3151 anv_cmd_buffer_flush_compute_state(struct anv_cmd_buffer *cmd_buffer)
3152 {
3153 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3154 VkResult result;
3155
3156 assert(pipeline->active_stages == VK_SHADER_STAGE_COMPUTE_BIT);
3157
3158 if (cmd_buffer->current_pipeline != GPGPU) {
3159 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3160 .PipelineSelection = GPGPU);
3161 cmd_buffer->current_pipeline = GPGPU;
3162 }
3163
3164 if (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)
3165 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3166
3167 if ((cmd_buffer->descriptors_dirty & VK_SHADER_STAGE_COMPUTE_BIT) ||
3168 (cmd_buffer->compute_dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY)) {
3169 result = flush_compute_descriptor_set(cmd_buffer);
3170 if (result != VK_SUCCESS) {
3171 result = anv_cmd_buffer_new_surface_state_bo(cmd_buffer);
3172 assert(result == VK_SUCCESS);
3173 result = flush_compute_descriptor_set(cmd_buffer);
3174 assert(result == VK_SUCCESS);
3175 }
3176 cmd_buffer->descriptors_dirty &= ~VK_SHADER_STAGE_COMPUTE;
3177 }
3178
3179 cmd_buffer->compute_dirty = 0;
3180 }
3181
3182 static void
3183 anv_cmd_buffer_flush_state(struct anv_cmd_buffer *cmd_buffer)
3184 {
3185 struct anv_pipeline *pipeline = cmd_buffer->pipeline;
3186 uint32_t *p;
3187
3188 uint32_t vb_emit = cmd_buffer->vb_dirty & pipeline->vb_used;
3189
3190 assert((pipeline->active_stages & VK_SHADER_STAGE_COMPUTE_BIT) == 0);
3191
3192 if (cmd_buffer->current_pipeline != _3D) {
3193 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPELINE_SELECT,
3194 .PipelineSelection = _3D);
3195 cmd_buffer->current_pipeline = _3D;
3196 }
3197
3198 if (vb_emit) {
3199 const uint32_t num_buffers = __builtin_popcount(vb_emit);
3200 const uint32_t num_dwords = 1 + num_buffers * 4;
3201
3202 p = anv_batch_emitn(&cmd_buffer->batch, num_dwords,
3203 GEN8_3DSTATE_VERTEX_BUFFERS);
3204 uint32_t vb, i = 0;
3205 for_each_bit(vb, vb_emit) {
3206 struct anv_buffer *buffer = cmd_buffer->vertex_bindings[vb].buffer;
3207 uint32_t offset = cmd_buffer->vertex_bindings[vb].offset;
3208
3209 struct GEN8_VERTEX_BUFFER_STATE state = {
3210 .VertexBufferIndex = vb,
3211 .MemoryObjectControlState = GEN8_MOCS,
3212 .AddressModifyEnable = true,
3213 .BufferPitch = pipeline->binding_stride[vb],
3214 .BufferStartingAddress = { buffer->bo, buffer->offset + offset },
3215 .BufferSize = buffer->size - offset
3216 };
3217
3218 GEN8_VERTEX_BUFFER_STATE_pack(&cmd_buffer->batch, &p[1 + i * 4], &state);
3219 i++;
3220 }
3221 }
3222
3223 if (cmd_buffer->dirty & ANV_CMD_BUFFER_PIPELINE_DIRTY) {
3224 /* If somebody compiled a pipeline after starting a command buffer the
3225 * scratch bo may have grown since we started this cmd buffer (and
3226 * emitted STATE_BASE_ADDRESS). If we're binding that pipeline now,
3227 * reemit STATE_BASE_ADDRESS so that we use the bigger scratch bo. */
3228 if (cmd_buffer->scratch_size < pipeline->total_scratch)
3229 anv_cmd_buffer_emit_state_base_address(cmd_buffer);
3230
3231 anv_batch_emit_batch(&cmd_buffer->batch, &pipeline->batch);
3232 }
3233
3234 if (cmd_buffer->descriptors_dirty)
3235 flush_descriptor_sets(cmd_buffer);
3236
3237 if (cmd_buffer->dirty & ANV_CMD_BUFFER_VP_DIRTY) {
3238 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_SCISSOR_STATE_POINTERS,
3239 .ScissorRectPointer = cmd_buffer->vp_state->scissor.offset);
3240 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_CC,
3241 .CCViewportPointer = cmd_buffer->vp_state->cc_vp.offset);
3242 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP,
3243 .SFClipViewportPointer = cmd_buffer->vp_state->sf_clip_vp.offset);
3244 }
3245
3246 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_RS_DIRTY)) {
3247 anv_batch_emit_merge(&cmd_buffer->batch,
3248 cmd_buffer->rs_state->state_sf, pipeline->state_sf);
3249 anv_batch_emit_merge(&cmd_buffer->batch,
3250 cmd_buffer->rs_state->state_raster, pipeline->state_raster);
3251 }
3252
3253 if (cmd_buffer->ds_state &&
3254 (cmd_buffer->dirty & (ANV_CMD_BUFFER_PIPELINE_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)))
3255 anv_batch_emit_merge(&cmd_buffer->batch,
3256 cmd_buffer->ds_state->state_wm_depth_stencil,
3257 pipeline->state_wm_depth_stencil);
3258
3259 if (cmd_buffer->dirty & (ANV_CMD_BUFFER_CB_DIRTY | ANV_CMD_BUFFER_DS_DIRTY)) {
3260 struct anv_state state;
3261 if (cmd_buffer->ds_state == NULL)
3262 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3263 cmd_buffer->cb_state->state_color_calc,
3264 GEN8_COLOR_CALC_STATE_length, 64);
3265 else if (cmd_buffer->cb_state == NULL)
3266 state = anv_cmd_buffer_emit_dynamic(cmd_buffer,
3267 cmd_buffer->ds_state->state_color_calc,
3268 GEN8_COLOR_CALC_STATE_length, 64);
3269 else
3270 state = anv_cmd_buffer_merge_dynamic(cmd_buffer,
3271 cmd_buffer->ds_state->state_color_calc,
3272 cmd_buffer->cb_state->state_color_calc,
3273 GEN8_COLOR_CALC_STATE_length, 64);
3274
3275 anv_batch_emit(&cmd_buffer->batch,
3276 GEN8_3DSTATE_CC_STATE_POINTERS,
3277 .ColorCalcStatePointer = state.offset,
3278 .ColorCalcStatePointerValid = true);
3279 }
3280
3281 cmd_buffer->vb_dirty &= ~vb_emit;
3282 cmd_buffer->dirty = 0;
3283 }
3284
3285 void anv_CmdDraw(
3286 VkCmdBuffer cmdBuffer,
3287 uint32_t firstVertex,
3288 uint32_t vertexCount,
3289 uint32_t firstInstance,
3290 uint32_t instanceCount)
3291 {
3292 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3293
3294 anv_cmd_buffer_flush_state(cmd_buffer);
3295
3296 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3297 .VertexAccessType = SEQUENTIAL,
3298 .VertexCountPerInstance = vertexCount,
3299 .StartVertexLocation = firstVertex,
3300 .InstanceCount = instanceCount,
3301 .StartInstanceLocation = firstInstance,
3302 .BaseVertexLocation = 0);
3303 }
3304
3305 void anv_CmdDrawIndexed(
3306 VkCmdBuffer cmdBuffer,
3307 uint32_t firstIndex,
3308 uint32_t indexCount,
3309 int32_t vertexOffset,
3310 uint32_t firstInstance,
3311 uint32_t instanceCount)
3312 {
3313 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3314
3315 anv_cmd_buffer_flush_state(cmd_buffer);
3316
3317 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3318 .VertexAccessType = RANDOM,
3319 .VertexCountPerInstance = indexCount,
3320 .StartVertexLocation = firstIndex,
3321 .InstanceCount = instanceCount,
3322 .StartInstanceLocation = firstInstance,
3323 .BaseVertexLocation = vertexOffset);
3324 }
3325
3326 static void
3327 anv_batch_lrm(struct anv_batch *batch,
3328 uint32_t reg, struct anv_bo *bo, uint32_t offset)
3329 {
3330 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_MEM,
3331 .RegisterAddress = reg,
3332 .MemoryAddress = { bo, offset });
3333 }
3334
3335 static void
3336 anv_batch_lri(struct anv_batch *batch, uint32_t reg, uint32_t imm)
3337 {
3338 anv_batch_emit(batch, GEN8_MI_LOAD_REGISTER_IMM,
3339 .RegisterOffset = reg,
3340 .DataDWord = imm);
3341 }
3342
3343 /* Auto-Draw / Indirect Registers */
3344 #define GEN7_3DPRIM_END_OFFSET 0x2420
3345 #define GEN7_3DPRIM_START_VERTEX 0x2430
3346 #define GEN7_3DPRIM_VERTEX_COUNT 0x2434
3347 #define GEN7_3DPRIM_INSTANCE_COUNT 0x2438
3348 #define GEN7_3DPRIM_START_INSTANCE 0x243C
3349 #define GEN7_3DPRIM_BASE_VERTEX 0x2440
3350
3351 void anv_CmdDrawIndirect(
3352 VkCmdBuffer cmdBuffer,
3353 VkBuffer _buffer,
3354 VkDeviceSize offset,
3355 uint32_t count,
3356 uint32_t stride)
3357 {
3358 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3359 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
3360 struct anv_bo *bo = buffer->bo;
3361 uint32_t bo_offset = buffer->offset + offset;
3362
3363 anv_cmd_buffer_flush_state(cmd_buffer);
3364
3365 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3366 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3367 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3368 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 12);
3369 anv_batch_lri(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, 0);
3370
3371 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3372 .IndirectParameterEnable = true,
3373 .VertexAccessType = SEQUENTIAL);
3374 }
3375
3376 void anv_CmdDrawIndexedIndirect(
3377 VkCmdBuffer cmdBuffer,
3378 VkBuffer _buffer,
3379 VkDeviceSize offset,
3380 uint32_t count,
3381 uint32_t stride)
3382 {
3383 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3384 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
3385 struct anv_bo *bo = buffer->bo;
3386 uint32_t bo_offset = buffer->offset + offset;
3387
3388 anv_cmd_buffer_flush_state(cmd_buffer);
3389
3390 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_VERTEX_COUNT, bo, bo_offset);
3391 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_INSTANCE_COUNT, bo, bo_offset + 4);
3392 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_VERTEX, bo, bo_offset + 8);
3393 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_BASE_VERTEX, bo, bo_offset + 12);
3394 anv_batch_lrm(&cmd_buffer->batch, GEN7_3DPRIM_START_INSTANCE, bo, bo_offset + 16);
3395
3396 anv_batch_emit(&cmd_buffer->batch, GEN8_3DPRIMITIVE,
3397 .IndirectParameterEnable = true,
3398 .VertexAccessType = RANDOM);
3399 }
3400
3401 void anv_CmdDispatch(
3402 VkCmdBuffer cmdBuffer,
3403 uint32_t x,
3404 uint32_t y,
3405 uint32_t z)
3406 {
3407 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3408 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3409 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
3410
3411 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3412
3413 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3414 .SIMDSize = prog_data->simd_size / 16,
3415 .ThreadDepthCounterMaximum = 0,
3416 .ThreadHeightCounterMaximum = 0,
3417 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
3418 .ThreadGroupIDXDimension = x,
3419 .ThreadGroupIDYDimension = y,
3420 .ThreadGroupIDZDimension = z,
3421 .RightExecutionMask = pipeline->cs_right_mask,
3422 .BottomExecutionMask = 0xffffffff);
3423
3424 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3425 }
3426
3427 #define GPGPU_DISPATCHDIMX 0x2500
3428 #define GPGPU_DISPATCHDIMY 0x2504
3429 #define GPGPU_DISPATCHDIMZ 0x2508
3430
3431 void anv_CmdDispatchIndirect(
3432 VkCmdBuffer cmdBuffer,
3433 VkBuffer _buffer,
3434 VkDeviceSize offset)
3435 {
3436 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3437 struct anv_pipeline *pipeline = cmd_buffer->compute_pipeline;
3438 struct brw_cs_prog_data *prog_data = &pipeline->cs_prog_data;
3439 struct anv_buffer *buffer = (struct anv_buffer *) _buffer;
3440 struct anv_bo *bo = buffer->bo;
3441 uint32_t bo_offset = buffer->offset + offset;
3442
3443 anv_cmd_buffer_flush_compute_state(cmd_buffer);
3444
3445 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMX, bo, bo_offset);
3446 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMY, bo, bo_offset + 4);
3447 anv_batch_lrm(&cmd_buffer->batch, GPGPU_DISPATCHDIMZ, bo, bo_offset + 8);
3448
3449 anv_batch_emit(&cmd_buffer->batch, GEN8_GPGPU_WALKER,
3450 .IndirectParameterEnable = true,
3451 .SIMDSize = prog_data->simd_size / 16,
3452 .ThreadDepthCounterMaximum = 0,
3453 .ThreadHeightCounterMaximum = 0,
3454 .ThreadWidthCounterMaximum = pipeline->cs_thread_width_max,
3455 .RightExecutionMask = pipeline->cs_right_mask,
3456 .BottomExecutionMask = 0xffffffff);
3457
3458 anv_batch_emit(&cmd_buffer->batch, GEN8_MEDIA_STATE_FLUSH);
3459 }
3460
3461 void anv_CmdSetEvent(
3462 VkCmdBuffer cmdBuffer,
3463 VkEvent event,
3464 VkPipeEvent pipeEvent)
3465 {
3466 stub();
3467 }
3468
3469 void anv_CmdResetEvent(
3470 VkCmdBuffer cmdBuffer,
3471 VkEvent event,
3472 VkPipeEvent pipeEvent)
3473 {
3474 stub();
3475 }
3476
3477 void anv_CmdWaitEvents(
3478 VkCmdBuffer cmdBuffer,
3479 VkWaitEvent waitEvent,
3480 uint32_t eventCount,
3481 const VkEvent* pEvents,
3482 uint32_t memBarrierCount,
3483 const void** ppMemBarriers)
3484 {
3485 stub();
3486 }
3487
3488 void anv_CmdPipelineBarrier(
3489 VkCmdBuffer cmdBuffer,
3490 VkWaitEvent waitEvent,
3491 uint32_t pipeEventCount,
3492 const VkPipeEvent* pPipeEvents,
3493 uint32_t memBarrierCount,
3494 const void** ppMemBarriers)
3495 {
3496 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer;
3497 uint32_t b, *dw;
3498
3499 struct GEN8_PIPE_CONTROL cmd = {
3500 GEN8_PIPE_CONTROL_header,
3501 .PostSyncOperation = NoWrite,
3502 };
3503
3504 /* XXX: I think waitEvent is a no-op on our HW. We should verify that. */
3505
3506 for (uint32_t i = 0; i < pipeEventCount; i++) {
3507 switch (pPipeEvents[i]) {
3508 case VK_PIPE_EVENT_TOP_OF_PIPE:
3509 /* This is just what PIPE_CONTROL does */
3510 break;
3511 case VK_PIPE_EVENT_VERTEX_PROCESSING_COMPLETE:
3512 case VK_PIPE_EVENT_LOCAL_FRAGMENT_PROCESSING_COMPLETE:
3513 case VK_PIPE_EVENT_FRAGMENT_PROCESSING_COMPLETE:
3514 cmd.StallAtPixelScoreboard = true;
3515 break;
3516 case VK_PIPE_EVENT_GRAPHICS_PIPELINE_COMPLETE:
3517 case VK_PIPE_EVENT_COMPUTE_PIPELINE_COMPLETE:
3518 case VK_PIPE_EVENT_TRANSFER_COMPLETE:
3519 case VK_PIPE_EVENT_COMMANDS_COMPLETE:
3520 cmd.CommandStreamerStallEnable = true;
3521 break;
3522 default:
3523 unreachable("Invalid VkPipeEvent");
3524 }
3525 }
3526
3527 /* XXX: Right now, we're really dumb and just flush whatever categories
3528 * the app asks for. One of these days we may make this a bit better
3529 * but right now that's all the hardware allows for in most areas.
3530 */
3531 VkMemoryOutputFlags out_flags = 0;
3532 VkMemoryInputFlags in_flags = 0;
3533
3534 for (uint32_t i = 0; i < memBarrierCount; i++) {
3535 const struct anv_common *common = ppMemBarriers[i];
3536 switch (common->sType) {
3537 case VK_STRUCTURE_TYPE_MEMORY_BARRIER: {
3538 const VkMemoryBarrier *barrier = (VkMemoryBarrier *)common;
3539 out_flags |= barrier->outputMask;
3540 in_flags |= barrier->inputMask;
3541 break;
3542 }
3543 case VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER: {
3544 const VkBufferMemoryBarrier *barrier = (VkBufferMemoryBarrier *)common;
3545 out_flags |= barrier->outputMask;
3546 in_flags |= barrier->inputMask;
3547 break;
3548 }
3549 case VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER: {
3550 const VkImageMemoryBarrier *barrier = (VkImageMemoryBarrier *)common;
3551 out_flags |= barrier->outputMask;
3552 in_flags |= barrier->inputMask;
3553 break;
3554 }
3555 default:
3556 unreachable("Invalid memory barrier type");
3557 }
3558 }
3559
3560 for_each_bit(b, out_flags) {
3561 switch ((VkMemoryOutputFlags)(1 << b)) {
3562 case VK_MEMORY_OUTPUT_CPU_WRITE_BIT:
3563 break; /* FIXME: Little-core systems */
3564 case VK_MEMORY_OUTPUT_SHADER_WRITE_BIT:
3565 cmd.DCFlushEnable = true;
3566 break;
3567 case VK_MEMORY_OUTPUT_COLOR_ATTACHMENT_BIT:
3568 cmd.RenderTargetCacheFlushEnable = true;
3569 break;
3570 case VK_MEMORY_OUTPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3571 cmd.DepthCacheFlushEnable = true;
3572 break;
3573 case VK_MEMORY_OUTPUT_TRANSFER_BIT:
3574 cmd.RenderTargetCacheFlushEnable = true;
3575 cmd.DepthCacheFlushEnable = true;
3576 break;
3577 default:
3578 unreachable("Invalid memory output flag");
3579 }
3580 }
3581
3582 for_each_bit(b, out_flags) {
3583 switch ((VkMemoryInputFlags)(1 << b)) {
3584 case VK_MEMORY_INPUT_CPU_READ_BIT:
3585 break; /* FIXME: Little-core systems */
3586 case VK_MEMORY_INPUT_INDIRECT_COMMAND_BIT:
3587 case VK_MEMORY_INPUT_INDEX_FETCH_BIT:
3588 case VK_MEMORY_INPUT_VERTEX_ATTRIBUTE_FETCH_BIT:
3589 cmd.VFCacheInvalidationEnable = true;
3590 break;
3591 case VK_MEMORY_INPUT_UNIFORM_READ_BIT:
3592 cmd.ConstantCacheInvalidationEnable = true;
3593 /* fallthrough */
3594 case VK_MEMORY_INPUT_SHADER_READ_BIT:
3595 cmd.DCFlushEnable = true;
3596 cmd.TextureCacheInvalidationEnable = true;
3597 break;
3598 case VK_MEMORY_INPUT_COLOR_ATTACHMENT_BIT:
3599 case VK_MEMORY_INPUT_DEPTH_STENCIL_ATTACHMENT_BIT:
3600 break; /* XXX: Hunh? */
3601 case VK_MEMORY_INPUT_TRANSFER_BIT:
3602 cmd.TextureCacheInvalidationEnable = true;
3603 break;
3604 }
3605 }
3606
3607 dw = anv_batch_emit_dwords(&cmd_buffer->batch, GEN8_PIPE_CONTROL_length);
3608 GEN8_PIPE_CONTROL_pack(&cmd_buffer->batch, dw, &cmd);
3609 }
3610
3611 void anv_CmdInitAtomicCounters(
3612 VkCmdBuffer cmdBuffer,
3613 VkPipelineBindPoint pipelineBindPoint,
3614 uint32_t startCounter,
3615 uint32_t counterCount,
3616 const uint32_t* pData)
3617 {
3618 stub();
3619 }
3620
3621 void anv_CmdLoadAtomicCounters(
3622 VkCmdBuffer cmdBuffer,
3623 VkPipelineBindPoint pipelineBindPoint,
3624 uint32_t startCounter,
3625 uint32_t counterCount,
3626 VkBuffer srcBuffer,
3627 VkDeviceSize srcOffset)
3628 {
3629 stub();
3630 }
3631
3632 void anv_CmdSaveAtomicCounters(
3633 VkCmdBuffer cmdBuffer,
3634 VkPipelineBindPoint pipelineBindPoint,
3635 uint32_t startCounter,
3636 uint32_t counterCount,
3637 VkBuffer destBuffer,
3638 VkDeviceSize destOffset)
3639 {
3640 stub();
3641 }
3642
3643 static void
3644 anv_framebuffer_destroy(struct anv_device *device,
3645 struct anv_object *object,
3646 VkObjectType obj_type)
3647 {
3648 struct anv_framebuffer *fb = (struct anv_framebuffer *)object;
3649
3650 assert(obj_type == VK_OBJECT_TYPE_FRAMEBUFFER);
3651
3652 anv_DestroyObject((VkDevice) device,
3653 VK_OBJECT_TYPE_DYNAMIC_VP_STATE,
3654 fb->vp_state);
3655
3656 anv_device_free(device, fb);
3657 }
3658
3659 VkResult anv_CreateFramebuffer(
3660 VkDevice _device,
3661 const VkFramebufferCreateInfo* pCreateInfo,
3662 VkFramebuffer* pFramebuffer)
3663 {
3664 struct anv_device *device = (struct anv_device *) _device;
3665 struct anv_framebuffer *framebuffer;
3666
3667 static const struct anv_depth_stencil_view null_view =
3668 { .depth_format = D16_UNORM, .depth_stride = 0, .stencil_stride = 0 };
3669
3670 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
3671
3672 framebuffer = anv_device_alloc(device, sizeof(*framebuffer), 8,
3673 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3674 if (framebuffer == NULL)
3675 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3676
3677 framebuffer->base.destructor = anv_framebuffer_destroy;
3678
3679 framebuffer->color_attachment_count = pCreateInfo->colorAttachmentCount;
3680 for (uint32_t i = 0; i < pCreateInfo->colorAttachmentCount; i++) {
3681 framebuffer->color_attachments[i] =
3682 (struct anv_surface_view *) pCreateInfo->pColorAttachments[i].view;
3683 }
3684
3685 if (pCreateInfo->pDepthStencilAttachment) {
3686 framebuffer->depth_stencil =
3687 (struct anv_depth_stencil_view *) pCreateInfo->pDepthStencilAttachment->view;
3688 } else {
3689 framebuffer->depth_stencil = &null_view;
3690 }
3691
3692 framebuffer->sample_count = pCreateInfo->sampleCount;
3693 framebuffer->width = pCreateInfo->width;
3694 framebuffer->height = pCreateInfo->height;
3695 framebuffer->layers = pCreateInfo->layers;
3696
3697 anv_CreateDynamicViewportState((VkDevice) device,
3698 &(VkDynamicVpStateCreateInfo) {
3699 .sType = VK_STRUCTURE_TYPE_DYNAMIC_VP_STATE_CREATE_INFO,
3700 .viewportAndScissorCount = 1,
3701 .pViewports = (VkViewport[]) {
3702 {
3703 .originX = 0,
3704 .originY = 0,
3705 .width = pCreateInfo->width,
3706 .height = pCreateInfo->height,
3707 .minDepth = 0,
3708 .maxDepth = 1
3709 },
3710 },
3711 .pScissors = (VkRect[]) {
3712 { { 0, 0 },
3713 { pCreateInfo->width, pCreateInfo->height } },
3714 }
3715 },
3716 &framebuffer->vp_state);
3717
3718 *pFramebuffer = (VkFramebuffer) framebuffer;
3719
3720 return VK_SUCCESS;
3721 }
3722
3723 VkResult anv_CreateRenderPass(
3724 VkDevice _device,
3725 const VkRenderPassCreateInfo* pCreateInfo,
3726 VkRenderPass* pRenderPass)
3727 {
3728 struct anv_device *device = (struct anv_device *) _device;
3729 struct anv_render_pass *pass;
3730 size_t size;
3731
3732 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);
3733
3734 size = sizeof(*pass) +
3735 pCreateInfo->layers * sizeof(struct anv_render_pass_layer);
3736 pass = anv_device_alloc(device, size, 8,
3737 VK_SYSTEM_ALLOC_TYPE_API_OBJECT);
3738 if (pass == NULL)
3739 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
3740
3741 pass->render_area = pCreateInfo->renderArea;
3742
3743 pass->num_layers = pCreateInfo->layers;
3744
3745 pass->num_clear_layers = 0;
3746 for (uint32_t i = 0; i < pCreateInfo->layers; i++) {
3747 pass->layers[i].color_load_op = pCreateInfo->pColorLoadOps[i];
3748 pass->layers[i].clear_color = pCreateInfo->pColorLoadClearValues[i];
3749 if (pass->layers[i].color_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR)
3750 pass->num_clear_layers++;
3751 }
3752
3753 *pRenderPass = (VkRenderPass) pass;
3754
3755 return VK_SUCCESS;
3756 }
3757
3758 static void
3759 anv_cmd_buffer_emit_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
3760 struct anv_render_pass *pass)
3761 {
3762 const struct anv_depth_stencil_view *view =
3763 cmd_buffer->framebuffer->depth_stencil;
3764
3765 /* FIXME: Implement the PMA stall W/A */
3766
3767 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DEPTH_BUFFER,
3768 .SurfaceType = SURFTYPE_2D,
3769 .DepthWriteEnable = view->depth_stride > 0,
3770 .StencilWriteEnable = view->stencil_stride > 0,
3771 .HierarchicalDepthBufferEnable = false,
3772 .SurfaceFormat = view->depth_format,
3773 .SurfacePitch = view->depth_stride > 0 ? view->depth_stride - 1 : 0,
3774 .SurfaceBaseAddress = { view->bo, view->depth_offset },
3775 .Height = pass->render_area.extent.height - 1,
3776 .Width = pass->render_area.extent.width - 1,
3777 .LOD = 0,
3778 .Depth = 1 - 1,
3779 .MinimumArrayElement = 0,
3780 .DepthBufferObjectControlState = GEN8_MOCS,
3781 .RenderTargetViewExtent = 1 - 1,
3782 .SurfaceQPitch = 0);
3783
3784 /* Disable hierarchial depth buffers. */
3785 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_HIER_DEPTH_BUFFER);
3786
3787 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_STENCIL_BUFFER,
3788 .StencilBufferEnable = view->stencil_stride > 0,
3789 .StencilBufferObjectControlState = GEN8_MOCS,
3790 .SurfacePitch = view->stencil_stride > 0 ? view->stencil_stride - 1 : 0,
3791 .SurfaceBaseAddress = { view->bo, view->stencil_offset },
3792 .SurfaceQPitch = 0);
3793
3794 /* Clear the clear params. */
3795 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_CLEAR_PARAMS);
3796 }
3797
3798 void anv_CmdBeginRenderPass(
3799 VkCmdBuffer cmdBuffer,
3800 const VkRenderPassBegin* pRenderPassBegin)
3801 {
3802 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *) cmdBuffer;
3803 struct anv_render_pass *pass = (struct anv_render_pass *) pRenderPassBegin->renderPass;
3804 struct anv_framebuffer *framebuffer =
3805 (struct anv_framebuffer *) pRenderPassBegin->framebuffer;
3806
3807 cmd_buffer->framebuffer = framebuffer;
3808
3809 cmd_buffer->descriptors_dirty |= VK_SHADER_STAGE_FRAGMENT_BIT;
3810
3811 anv_batch_emit(&cmd_buffer->batch, GEN8_3DSTATE_DRAWING_RECTANGLE,
3812 .ClippedDrawingRectangleYMin = pass->render_area.offset.y,
3813 .ClippedDrawingRectangleXMin = pass->render_area.offset.x,
3814 .ClippedDrawingRectangleYMax =
3815 pass->render_area.offset.y + pass->render_area.extent.height - 1,
3816 .ClippedDrawingRectangleXMax =
3817 pass->render_area.offset.x + pass->render_area.extent.width - 1,
3818 .DrawingRectangleOriginY = 0,
3819 .DrawingRectangleOriginX = 0);
3820
3821 anv_cmd_buffer_emit_depth_stencil(cmd_buffer, pass);
3822
3823 anv_cmd_buffer_clear(cmd_buffer, pass);
3824 }
3825
3826 void anv_CmdEndRenderPass(
3827 VkCmdBuffer cmdBuffer,
3828 VkRenderPass renderPass)
3829 {
3830 /* Emit a flushing pipe control at the end of a pass. This is kind of a
3831 * hack but it ensures that render targets always actually get written.
3832 * Eventually, we should do flushing based on image format transitions
3833 * or something of that nature.
3834 */
3835 struct anv_cmd_buffer *cmd_buffer = (struct anv_cmd_buffer *)cmdBuffer;
3836 anv_batch_emit(&cmd_buffer->batch, GEN8_PIPE_CONTROL,
3837 .PostSyncOperation = NoWrite,
3838 .RenderTargetCacheFlushEnable = true,
3839 .InstructionCacheInvalidateEnable = true,
3840 .DepthCacheFlushEnable = true,
3841 .VFCacheInvalidationEnable = true,
3842 .TextureCacheInvalidationEnable = true,
3843 .CommandStreamerStallEnable = true);
3844 }
3845
3846 void vkCmdDbgMarkerBegin(
3847 VkCmdBuffer cmdBuffer,
3848 const char* pMarker)
3849 __attribute__ ((visibility ("default")));
3850
3851 void vkCmdDbgMarkerEnd(
3852 VkCmdBuffer cmdBuffer)
3853 __attribute__ ((visibility ("default")));
3854
3855 VkResult vkDbgSetObjectTag(
3856 VkDevice device,
3857 VkObject object,
3858 size_t tagSize,
3859 const void* pTag)
3860 __attribute__ ((visibility ("default")));
3861
3862
3863 void vkCmdDbgMarkerBegin(
3864 VkCmdBuffer cmdBuffer,
3865 const char* pMarker)
3866 {
3867 }
3868
3869 void vkCmdDbgMarkerEnd(
3870 VkCmdBuffer cmdBuffer)
3871 {
3872 }
3873
3874 VkResult vkDbgSetObjectTag(
3875 VkDevice device,
3876 VkObject object,
3877 size_t tagSize,
3878 const void* pTag)
3879 {
3880 return VK_SUCCESS;
3881 }