2 * Copyright 2011 Adam Rak <adam.rak@streamnovation.com>
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * on the rights to use, copy, modify, merge, publish, distribute, sub
8 * license, and/or sell copies of the Software, and to permit persons to whom
9 * the Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
24 * Adam Rak <adam.rak@streamnovation.com>
29 #include "pipe/p_defines.h"
30 #include "pipe/p_state.h"
31 #include "pipe/p_context.h"
32 #include "util/u_blitter.h"
33 #include "util/u_double_list.h"
34 #include "util/u_transfer.h"
35 #include "util/u_surface.h"
36 #include "util/u_pack_color.h"
37 #include "util/u_memory.h"
38 #include "util/u_inlines.h"
39 #include "util/u_framebuffer.h"
40 #include "pipebuffer/pb_buffer.h"
42 #include "evergreend.h"
43 #include "r600_resource.h"
44 #include "r600_shader.h"
45 #include "r600_pipe.h"
46 #include "r600_formats.h"
47 #include "evergreen_compute.h"
48 #include "r600_hw_context_priv.h"
49 #include "evergreen_compute_internal.h"
50 #include "compute_memory_pool.h"
52 #include "llvm_wrapper.h"
56 RAT0 is for global binding write
57 VTX1 is for global binding read
59 for wrting images RAT1...
60 for reading images TEX2...
63 TEX2... consumes the same fetch resources, that VTX2... would consume
65 CONST0 and VTX0 is for parameters
66 CONST0 is binding smaller input parameter buffer, and for constant indexing,
68 VTX0 is for indirect/non-constant indexing, or if the input is bigger than
69 the constant cache can handle
71 RAT-s are limited to 12, so we can only bind at most 11 texture for writing
72 because we reserve RAT0 for global bindings. With byteaddressing enabled,
73 we should reserve another one too.=> 10 image binding for writing max.
76 CL_DEVICE_MAX_READ_IMAGE_ARGS: 128
77 CL_DEVICE_MAX_WRITE_IMAGE_ARGS: 8
79 so 10 for writing is enough. 176 is the max for reading according to the docs
81 writable images should be listed first < 10, so their id corresponds to RAT(id+1)
82 writable images will consume TEX slots, VTX slots too because of linear indexing
86 const struct u_resource_vtbl r600_global_buffer_vtbl
=
88 u_default_resource_get_handle
, /* get_handle */
89 r600_compute_global_buffer_destroy
, /* resource_destroy */
90 r600_compute_global_get_transfer
, /* get_transfer */
91 r600_compute_global_transfer_destroy
, /* transfer_destroy */
92 r600_compute_global_transfer_map
, /* transfer_map */
93 r600_compute_global_transfer_flush_region
,/* transfer_flush_region */
94 r600_compute_global_transfer_unmap
, /* transfer_unmap */
95 r600_compute_global_transfer_inline_write
/* transfer_inline_write */
99 void *evergreen_create_compute_state(
100 struct pipe_context
*ctx_
,
101 const const struct pipe_compute_state
*cso
)
103 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
106 const struct pipe_llvm_program_header
* header
;
107 const unsigned char * code
;
110 code
= cso
->prog
+ sizeof(struct pipe_llvm_program_header
);
113 if (!ctx
->screen
->screen
.get_param(&ctx
->screen
->screen
,
115 fprintf(stderr
, "Compute is not supported\n");
118 struct r600_pipe_compute
*shader
= CALLOC_STRUCT(r600_pipe_compute
);
120 shader
->ctx
= (struct r600_context
*)ctx
;
121 shader
->resources
= (struct evergreen_compute_resource
*)
122 CALLOC(sizeof(struct evergreen_compute_resource
),
123 get_compute_resource_num());
124 shader
->local_size
= cso
->req_local_mem
; ///TODO: assert it
125 shader
->private_size
= cso
->req_private_mem
;
126 shader
->input_size
= cso
->req_input_mem
;
129 shader
->mod
= llvm_parse_bitcode(code
, header
->num_bytes
);
131 r600_compute_shader_create(ctx_
, shader
->mod
, &shader
->bc
);
136 void evergreen_delete_compute_state(struct pipe_context
*ctx
, void* state
)
138 struct r600_pipe_compute
*shader
= (struct r600_pipe_compute
*)state
;
140 free(shader
->resources
);
144 static void evergreen_bind_compute_state(struct pipe_context
*ctx_
, void *state
)
146 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
148 ctx
->cs_shader
= (struct r600_pipe_compute
*)state
;
150 if (!ctx
->cs_shader
->shader_code_bo
) {
152 ctx
->cs_shader
->shader_code_bo
=
153 r600_compute_buffer_alloc_vram(ctx
->screen
,
154 ctx
->cs_shader
->bc
.ndw
* 4);
156 void *p
= ctx
->ws
->buffer_map(
157 ctx
->cs_shader
->shader_code_bo
->cs_buf
,
158 ctx
->cs
, PIPE_TRANSFER_WRITE
);
160 memcpy(p
, ctx
->cs_shader
->bc
.bytecode
, ctx
->cs_shader
->bc
.ndw
* 4);
162 ctx
->ws
->buffer_unmap(ctx
->cs_shader
->shader_code_bo
->cs_buf
);
166 evergreen_compute_init_config(ctx
);
168 struct evergreen_compute_resource
* res
= get_empty_res(ctx
->cs_shader
,
169 COMPUTE_RESOURCE_SHADER
, 0);
171 if (ctx
->chip_class
< CAYMAN
) {
172 evergreen_reg_set(res
, R_008C0C_SQ_GPR_RESOURCE_MGMT_3
,
173 S_008C0C_NUM_LS_GPRS(ctx
->cs_shader
->bc
.ngpr
));
176 ///maybe we can use it later
177 evergreen_reg_set(res
, R_0286C8_SPI_THREAD_GROUPING
, 0);
178 ///maybe we can use it later
179 evergreen_reg_set(res
, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2
, 0);
181 evergreen_reg_set(res
, R_0288D4_SQ_PGM_RESOURCES_LS
,
182 S_0288D4_NUM_GPRS(ctx
->cs_shader
->bc
.ngpr
)
183 | S_0288D4_STACK_SIZE(ctx
->cs_shader
->bc
.nstack
));
184 evergreen_reg_set(res
, R_0288D8_SQ_PGM_RESOURCES_LS_2
, 0);
186 evergreen_reg_set(res
, R_0288D0_SQ_PGM_START_LS
, 0);
187 res
->bo
= ctx
->cs_shader
->shader_code_bo
;
188 res
->usage
= RADEON_USAGE_READ
;
189 res
->coher_bo_size
= ctx
->cs_shader
->bc
.ndw
*4;
191 r600_inval_shader_cache(ctx
);
193 /* We can't always determine the
194 * number of iterations in a loop before it's executed,
195 * so we just need to set up the loop counter to give us the maximum
196 * number of iterations possible. Currently, loops in shader code
197 * ignore the loop counter and use a break instruction to exit the
198 * loop at the correct time.
200 evergreen_set_loop_const(ctx
->cs_shader
,
202 0xFFF, /* Maximum value of the loop counter (i.e. when the loop
203 * counter reaches this value, the program will break
204 * out of the loop. */
205 0x0, /* Starting value of the loop counter. */
206 0x1); /* Amount to increment the loop counter each iteration. */
209 /* The kernel parameters are stored a vtx buffer (ID=0), besides the explicit
210 * kernel parameters there are inplicit parameters that need to be stored
211 * in the vertex buffer as well. Here is how these parameters are organized in
214 * DWORDS 0-2: Number of work groups in each dimension (x,y,z)
215 * DWORDS 3-5: Number of global work items in each dimension (x,y,z)
216 * DWORDS 6-8: Number of work items within each work group in each dimension
218 * DWORDS 9+ : Kernel parameters
220 void evergreen_compute_upload_input(
221 struct pipe_context
*ctx_
,
222 const uint
*block_layout
,
223 const uint
*grid_layout
,
226 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
228 unsigned kernel_parameters_offset_bytes
= 36;
229 uint32_t * num_work_groups_start
;
230 uint32_t * global_size_start
;
231 uint32_t * local_size_start
;
232 uint32_t * kernel_parameters_start
;
234 if (ctx
->cs_shader
->input_size
== 0) {
238 if (!ctx
->cs_shader
->kernel_param
) {
239 unsigned buffer_size
= ctx
->cs_shader
->input_size
;
241 /* Add space for the grid dimensions */
242 buffer_size
+= kernel_parameters_offset_bytes
* sizeof(uint
);
243 ctx
->cs_shader
->kernel_param
=
244 r600_compute_buffer_alloc_vram(ctx
->screen
,
248 num_work_groups_start
= ctx
->ws
->buffer_map(
249 ctx
->cs_shader
->kernel_param
->cs_buf
,
250 ctx
->cs
, PIPE_TRANSFER_WRITE
);
251 global_size_start
= num_work_groups_start
+ (3 * (sizeof(uint
) /4));
252 local_size_start
= global_size_start
+ (3 * (sizeof(uint
)) / 4);
253 kernel_parameters_start
= local_size_start
+ (3 * (sizeof(uint
)) / 4);
255 /* Copy the work group size */
256 memcpy(num_work_groups_start
, grid_layout
, 3 * sizeof(uint
));
258 /* Copy the global size */
259 for (i
= 0; i
< 3; i
++) {
260 global_size_start
[i
] = grid_layout
[i
] * block_layout
[i
];
263 /* Copy the local dimensions */
264 memcpy(local_size_start
, block_layout
, 3 * sizeof(uint
));
266 /* Copy the kernel inputs */
267 memcpy(kernel_parameters_start
, input
, ctx
->cs_shader
->input_size
);
269 for (i
= 0; i
< (kernel_parameters_offset_bytes
/ 4) +
270 (ctx
->cs_shader
->input_size
/ 4); i
++) {
271 COMPUTE_DBG("input %i : %i\n", i
,
272 ((unsigned*)num_work_groups_start
)[i
]);
275 ctx
->ws
->buffer_unmap(ctx
->cs_shader
->kernel_param
->cs_buf
);
277 ///ID=0 is reserved for the parameters
278 evergreen_set_vtx_resource(ctx
->cs_shader
,
279 ctx
->cs_shader
->kernel_param
, 0, 0, 0);
280 ///ID=0 is reserved for parameters
281 evergreen_set_const_cache(ctx
->cs_shader
, 0,
282 ctx
->cs_shader
->kernel_param
, ctx
->cs_shader
->input_size
, 0);
285 void evergreen_direct_dispatch(
286 struct pipe_context
*ctx_
,
287 const uint
*block_layout
, const uint
*grid_layout
)
289 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
293 struct evergreen_compute_resource
* res
= get_empty_res(ctx
->cs_shader
,
294 COMPUTE_RESOURCE_DISPATCH
, 0);
296 evergreen_reg_set(res
, R_008958_VGT_PRIMITIVE_TYPE
, V_008958_DI_PT_POINTLIST
);
298 evergreen_reg_set(res
, R_00899C_VGT_COMPUTE_START_X
, 0);
299 evergreen_reg_set(res
, R_0089A0_VGT_COMPUTE_START_Y
, 0);
300 evergreen_reg_set(res
, R_0089A4_VGT_COMPUTE_START_Z
, 0);
302 evergreen_reg_set(res
, R_0286EC_SPI_COMPUTE_NUM_THREAD_X
, block_layout
[0]);
303 evergreen_reg_set(res
, R_0286F0_SPI_COMPUTE_NUM_THREAD_Y
, block_layout
[1]);
304 evergreen_reg_set(res
, R_0286F4_SPI_COMPUTE_NUM_THREAD_Z
, block_layout
[2]);
310 for (i
= 0; i
< 3; i
++) {
311 group_size
*= block_layout
[i
];
314 for (i
= 0; i
< 3; i
++) {
315 grid_size
*= grid_layout
[i
];
318 evergreen_reg_set(res
, R_008970_VGT_NUM_INDICES
, group_size
);
319 evergreen_reg_set(res
, R_0089AC_VGT_COMPUTE_THREAD_GROUP_SIZE
, group_size
);
321 evergreen_emit_raw_value(res
, PKT3C(PKT3_DISPATCH_DIRECT
, 3, 0));
322 evergreen_emit_raw_value(res
, grid_layout
[0]);
323 evergreen_emit_raw_value(res
, grid_layout
[1]);
324 evergreen_emit_raw_value(res
, grid_layout
[2]);
325 ///VGT_DISPATCH_INITIATOR = COMPUTE_SHADER_EN
326 evergreen_emit_raw_value(res
, 1);
329 static void compute_emit_cs(struct r600_context
*ctx
)
331 struct radeon_winsys_cs
*cs
= ctx
->cs
;
334 r600_emit_atom(ctx
, &ctx
->start_cs_cmd
.atom
);
336 struct r600_resource
*onebo
= NULL
;
338 for (i
= 0; i
< get_compute_resource_num(); i
++) {
339 if (ctx
->cs_shader
->resources
[i
].enabled
) {
341 COMPUTE_DBG("resnum: %i, cdw: %i\n", i
, cs
->cdw
);
343 for (j
= 0; j
< ctx
->cs_shader
->resources
[i
].cs_end
; j
++) {
344 if (ctx
->cs_shader
->resources
[i
].do_reloc
[j
]) {
345 assert(ctx
->cs_shader
->resources
[i
].bo
);
346 evergreen_emit_ctx_reloc(ctx
,
347 ctx
->cs_shader
->resources
[i
].bo
,
348 ctx
->cs_shader
->resources
[i
].usage
);
351 cs
->buf
[cs
->cdw
++] = ctx
->cs_shader
->resources
[i
].cs
[j
];
354 if (ctx
->cs_shader
->resources
[i
].bo
) {
355 onebo
= ctx
->cs_shader
->resources
[i
].bo
;
356 evergreen_emit_ctx_reloc(ctx
,
357 ctx
->cs_shader
->resources
[i
].bo
,
358 ctx
->cs_shader
->resources
[i
].usage
);
360 ///special case for textures
361 if (ctx
->cs_shader
->resources
[i
].do_reloc
362 [ctx
->cs_shader
->resources
[i
].cs_end
] == 2) {
363 evergreen_emit_ctx_reloc(ctx
,
364 ctx
->cs_shader
->resources
[i
].bo
,
365 ctx
->cs_shader
->resources
[i
].usage
);
371 /* r600_flush_framebuffer() updates the cb_flush_flags and then
372 * calls r600_emit_atom() on the ctx->surface_sync_cmd.atom, which emits
373 * a SURFACE_SYNC packet via r600_emit_surface_sync().
375 * XXX r600_emit_surface_sync() hardcodes the CP_COHER_SIZE to
376 * 0xffffffff, so we will need to add a field to struct
377 * r600_surface_sync_cmd if we want to manually set this value.
379 r600_flush_framebuffer(ctx
, true /* Flush now */);
382 COMPUTE_DBG("cdw: %i\n", cs
->cdw
);
383 for (i
= 0; i
< cs
->cdw
; i
++) {
384 COMPUTE_DBG("%4i : 0x%08X\n", i
, ctx
->cs
->buf
[i
]);
388 ctx
->ws
->cs_flush(ctx
->cs
, RADEON_FLUSH_ASYNC
| RADEON_FLUSH_COMPUTE
);
390 ctx
->pm4_dirty_cdwords
= 0;
393 COMPUTE_DBG("shader started\n");
395 ctx
->ws
->buffer_wait(onebo
->buf
, 0);
397 COMPUTE_DBG("...\n");
399 r600_emit_atom(ctx
, &ctx
->start_cs_cmd
.atom
);
401 ctx
->streamout_start
= TRUE
;
402 ctx
->streamout_append_bitmask
= ~0;
406 static void evergreen_launch_grid(
407 struct pipe_context
*ctx_
,
408 const uint
*block_layout
, const uint
*grid_layout
,
409 uint32_t pc
, const void *input
)
411 COMPUTE_DBG("PC: %i\n", pc
);
413 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
415 unsigned num_pipes
= ctx
->screen
->info
.r600_max_pipes
;
416 unsigned wave_divisor
= (16 * num_pipes
);
418 /* num_waves = ceil((tg_size.x * tg_size.y, tg_size.z) / (16 * num_pipes)) */
419 num_waves
= (block_layout
[0] * block_layout
[1] * block_layout
[2] +
420 wave_divisor
- 1) / wave_divisor
;
422 COMPUTE_DBG("Using %u pipes, there are %u wavefronts per thread block\n",
423 num_pipes
, num_waves
);
425 evergreen_set_lds(ctx
->cs_shader
, 0, 0, num_waves
);
426 evergreen_compute_upload_input(ctx_
, block_layout
, grid_layout
, input
);
427 evergreen_direct_dispatch(ctx_
, block_layout
, grid_layout
);
428 compute_emit_cs(ctx
);
431 static void evergreen_set_compute_resources(struct pipe_context
* ctx_
,
432 unsigned start
, unsigned count
,
433 struct pipe_surface
** surfaces
)
435 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
436 struct r600_surface
**resources
= (struct r600_surface
**)surfaces
;
437 for (int i
= 0; i
< count
; i
++) {
439 struct r600_resource_global
*buffer
=
440 (struct r600_resource_global
*)resources
[i
]->base
.texture
;
441 if (resources
[i
]->base
.writable
) {
443 struct r600_resource_global
*buffer
=
444 (struct r600_resource_global
*)
445 resources
[i
]->base
.texture
;
447 evergreen_set_rat(ctx
->cs_shader
, i
+1,
448 (struct r600_resource
*)resources
[i
]->base
.texture
,
449 buffer
->chunk
->start_in_dw
*4,
450 resources
[i
]->base
.texture
->width0
);
453 evergreen_set_vtx_resource(ctx
->cs_shader
,
454 (struct r600_resource
*)resources
[i
]->base
.texture
, i
+2,
455 buffer
->chunk
->start_in_dw
*4, resources
[i
]->base
.writable
);
461 static void evergreen_set_cs_sampler_view(struct pipe_context
*ctx_
,
462 unsigned start_slot
, unsigned count
,
463 struct pipe_sampler_view
**views
)
465 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
466 struct r600_pipe_sampler_view
**resource
=
467 (struct r600_pipe_sampler_view
**)views
;
469 for (int i
= 0; i
< count
; i
++) {
472 ///FETCH0 = VTX0 (param buffer),
473 //FETCH1 = VTX1 (global buffer pool), FETCH2... = TEX
474 evergreen_set_tex_resource(ctx
->cs_shader
, resource
[i
], i
+2);
479 static void evergreen_bind_compute_sampler_states(
480 struct pipe_context
*ctx_
,
482 unsigned num_samplers
,
485 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
486 struct compute_sampler_state
** samplers
=
487 (struct compute_sampler_state
**)samplers_
;
489 for (int i
= 0; i
< num_samplers
; i
++) {
491 evergreen_set_sampler_resource(ctx
->cs_shader
, samplers
[i
], i
);
496 static void evergreen_set_global_binding(
497 struct pipe_context
*ctx_
, unsigned first
, unsigned n
,
498 struct pipe_resource
**resources
,
501 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
502 struct compute_memory_pool
*pool
= ctx
->screen
->global_pool
;
503 struct r600_resource_global
**buffers
=
504 (struct r600_resource_global
**)resources
;
511 compute_memory_finalize_pending(pool
, ctx_
);
513 for (int i
= 0; i
< n
; i
++)
515 assert(resources
[i
]->target
== PIPE_BUFFER
);
516 assert(resources
[i
]->bind
& PIPE_BIND_GLOBAL
);
518 *(handles
[i
]) = buffers
[i
]->chunk
->start_in_dw
* 4;
521 evergreen_set_rat(ctx
->cs_shader
, 0, pool
->bo
, 0, pool
->size_in_dw
* 4);
522 evergreen_set_vtx_resource(ctx
->cs_shader
, pool
->bo
, 1, 0, 1);
526 void evergreen_compute_init_config(struct r600_context
*ctx
)
528 struct evergreen_compute_resource
* res
=
529 get_empty_res(ctx
->cs_shader
, COMPUTE_RESOURCE_CONFIG
, 0);
532 int num_stack_entries
;
535 enum radeon_family family
;
538 family
= ctx
->family
;
545 num_stack_entries
= 256;
550 num_stack_entries
= 256;
555 num_stack_entries
= 512;
561 num_stack_entries
= 512;
566 num_stack_entries
= 256;
571 num_stack_entries
= 256;
576 num_stack_entries
= 512;
581 num_stack_entries
= 512;
586 num_stack_entries
= 256;
591 num_stack_entries
= 256;
604 tmp
|= S_008C00_VC_ENABLE(1);
607 tmp
|= S_008C00_EXPORT_SRC_C(1);
608 tmp
|= S_008C00_CS_PRIO(0);
609 tmp
|= S_008C00_LS_PRIO(0);
610 tmp
|= S_008C00_HS_PRIO(0);
611 tmp
|= S_008C00_PS_PRIO(0);
612 tmp
|= S_008C00_VS_PRIO(0);
613 tmp
|= S_008C00_GS_PRIO(0);
614 tmp
|= S_008C00_ES_PRIO(0);
616 evergreen_reg_set(res
, R_008C00_SQ_CONFIG
, tmp
);
618 evergreen_reg_set(res
, R_008C04_SQ_GPR_RESOURCE_MGMT_1
,
619 S_008C04_NUM_CLAUSE_TEMP_GPRS(num_temp_gprs
));
620 if (ctx
->chip_class
< CAYMAN
) {
621 evergreen_reg_set(res
, R_008C08_SQ_GPR_RESOURCE_MGMT_2
, 0);
623 evergreen_reg_set(res
, R_008C10_SQ_GLOBAL_GPR_RESOURCE_MGMT_1
, 0);
624 evergreen_reg_set(res
, R_008C14_SQ_GLOBAL_GPR_RESOURCE_MGMT_2
, 0);
625 evergreen_reg_set(res
, R_008D8C_SQ_DYN_GPR_CNTL_PS_FLUSH_REQ
, (1 << 8));
627 /* workaround for hw issues with dyn gpr - must set all limits to 240
628 * instead of 0, 0x1e == 240/8 */
629 if (ctx
->chip_class
< CAYMAN
) {
630 evergreen_reg_set(res
, R_028838_SQ_DYN_GPR_RESOURCE_LIMIT_1
,
631 S_028838_PS_GPRS(0x1e) |
632 S_028838_VS_GPRS(0x1e) |
633 S_028838_GS_GPRS(0x1e) |
634 S_028838_ES_GPRS(0x1e) |
635 S_028838_HS_GPRS(0x1e) |
636 S_028838_LS_GPRS(0x1e));
638 evergreen_reg_set(res
, 0x286f8,
639 S_028838_PS_GPRS(0x1e) |
640 S_028838_VS_GPRS(0x1e) |
641 S_028838_GS_GPRS(0x1e) |
642 S_028838_ES_GPRS(0x1e) |
643 S_028838_HS_GPRS(0x1e) |
644 S_028838_LS_GPRS(0x1e));
647 if (ctx
->chip_class
< CAYMAN
) {
649 evergreen_reg_set(res
, R_008E20_SQ_STATIC_THREAD_MGMT1
, 0xFFFFFFFF);
650 evergreen_reg_set(res
, R_008E24_SQ_STATIC_THREAD_MGMT2
, 0xFFFFFFFF);
651 evergreen_reg_set(res
, R_008E20_SQ_STATIC_THREAD_MGMT1
, 0xFFFFFFFF);
652 evergreen_reg_set(res
, R_008E24_SQ_STATIC_THREAD_MGMT2
, 0xFFFFFFFF);
653 evergreen_reg_set(res
, R_008E28_SQ_STATIC_THREAD_MGMT3
, 0xFFFFFFFF);
654 evergreen_reg_set(res
, R_008C18_SQ_THREAD_RESOURCE_MGMT_1
, 0);
655 tmp
= S_008C1C_NUM_LS_THREADS(num_threads
);
656 evergreen_reg_set(res
, R_008C1C_SQ_THREAD_RESOURCE_MGMT_2
, tmp
);
657 evergreen_reg_set(res
, R_008C20_SQ_STACK_RESOURCE_MGMT_1
, 0);
658 evergreen_reg_set(res
, R_008C24_SQ_STACK_RESOURCE_MGMT_2
, 0);
659 tmp
= S_008C28_NUM_LS_STACK_ENTRIES(num_stack_entries
);
660 evergreen_reg_set(res
, R_008C28_SQ_STACK_RESOURCE_MGMT_3
, tmp
);
662 evergreen_reg_set(res
, R_0286CC_SPI_PS_IN_CONTROL_0
, S_0286CC_LINEAR_GRADIENT_ENA(1));
663 evergreen_reg_set(res
, R_0286D0_SPI_PS_IN_CONTROL_1
, 0);
664 evergreen_reg_set(res
, R_0286E4_SPI_PS_IN_CONTROL_2
, 0);
665 evergreen_reg_set(res
, R_0286D8_SPI_INPUT_Z
, 0);
666 evergreen_reg_set(res
, R_0286E0_SPI_BARYC_CNTL
, 1 << 20);
667 tmp
= S_0286E8_TID_IN_GROUP_ENA
| S_0286E8_TGID_ENA
| S_0286E8_DISABLE_INDEX_PACK
;
668 evergreen_reg_set(res
, R_0286E8_SPI_COMPUTE_INPUT_CNTL
, tmp
);
669 tmp
= S_028A40_COMPUTE_MODE(1) | S_028A40_PARTIAL_THD_AT_EOI(1);
670 evergreen_reg_set(res
, R_028A40_VGT_GS_MODE
, tmp
);
671 evergreen_reg_set(res
, R_028B54_VGT_SHADER_STAGES_EN
, 2/*CS_ON*/);
672 evergreen_reg_set(res
, R_028800_DB_DEPTH_CONTROL
, 0);
673 evergreen_reg_set(res
, R_02880C_DB_SHADER_CONTROL
, 0);
674 evergreen_reg_set(res
, R_028000_DB_RENDER_CONTROL
, S_028000_COLOR_DISABLE(1));
675 evergreen_reg_set(res
, R_02800C_DB_RENDER_OVERRIDE
, 0);
676 evergreen_reg_set(res
, R_0286E8_SPI_COMPUTE_INPUT_CNTL
,
677 S_0286E8_TID_IN_GROUP_ENA
679 | S_0286E8_DISABLE_INDEX_PACK
)
683 void evergreen_init_compute_state_functions(struct r600_context
*ctx
)
685 ctx
->context
.create_compute_state
= evergreen_create_compute_state
;
686 ctx
->context
.delete_compute_state
= evergreen_delete_compute_state
;
687 ctx
->context
.bind_compute_state
= evergreen_bind_compute_state
;
688 // ctx->context.create_sampler_view = evergreen_compute_create_sampler_view;
689 ctx
->context
.set_compute_resources
= evergreen_set_compute_resources
;
690 ctx
->context
.set_compute_sampler_views
= evergreen_set_cs_sampler_view
;
691 ctx
->context
.bind_compute_sampler_states
= evergreen_bind_compute_sampler_states
;
692 ctx
->context
.set_global_binding
= evergreen_set_global_binding
;
693 ctx
->context
.launch_grid
= evergreen_launch_grid
;
697 struct pipe_resource
*r600_compute_global_buffer_create(
698 struct pipe_screen
*screen
,
699 const struct pipe_resource
*templ
)
701 assert(templ
->target
== PIPE_BUFFER
);
702 assert(templ
->bind
& PIPE_BIND_GLOBAL
);
703 assert(templ
->array_size
== 1 || templ
->array_size
== 0);
704 assert(templ
->depth0
== 1 || templ
->depth0
== 0);
705 assert(templ
->height0
== 1 || templ
->height0
== 0);
707 struct r600_resource_global
* result
= (struct r600_resource_global
*)
708 CALLOC(sizeof(struct r600_resource_global
), 1);
709 struct r600_screen
* rscreen
= (struct r600_screen
*)screen
;
711 result
->base
.b
.vtbl
= &r600_global_buffer_vtbl
;
712 result
->base
.b
.b
.screen
= screen
;
713 result
->base
.b
.b
= *templ
;
714 pipe_reference_init(&result
->base
.b
.b
.reference
, 1);
716 int size_in_dw
= (templ
->width0
+3) / 4;
718 result
->chunk
= compute_memory_alloc(rscreen
->global_pool
, size_in_dw
);
720 if (result
->chunk
== NULL
)
726 return &result
->base
.b
.b
;
729 void r600_compute_global_buffer_destroy(
730 struct pipe_screen
*screen
,
731 struct pipe_resource
*res
)
733 assert(res
->target
== PIPE_BUFFER
);
734 assert(res
->bind
& PIPE_BIND_GLOBAL
);
736 struct r600_resource_global
* buffer
= (struct r600_resource_global
*)res
;
737 struct r600_screen
* rscreen
= (struct r600_screen
*)screen
;
739 compute_memory_free(rscreen
->global_pool
, buffer
->chunk
->id
);
741 buffer
->chunk
= NULL
;
745 void* r600_compute_global_transfer_map(
746 struct pipe_context
*ctx_
,
747 struct pipe_transfer
* transfer
)
749 assert(transfer
->resource
->target
== PIPE_BUFFER
);
750 assert(transfer
->resource
->bind
& PIPE_BIND_GLOBAL
);
751 assert(transfer
->box
.x
>= 0);
752 assert(transfer
->box
.y
== 0);
753 assert(transfer
->box
.z
== 0);
755 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
756 struct r600_resource_global
* buffer
=
757 (struct r600_resource_global
*)transfer
->resource
;
760 ///TODO: do it better, mapping is not possible if the pool is too big
762 if (!(map
= ctx
->ws
->buffer_map(buffer
->chunk
->pool
->bo
->cs_buf
,
763 ctx
->cs
, transfer
->usage
))) {
767 COMPUTE_DBG("buffer start: %lli\n", buffer
->chunk
->start_in_dw
);
768 return ((char*)(map
+ buffer
->chunk
->start_in_dw
)) + transfer
->box
.x
;
771 void r600_compute_global_transfer_unmap(
772 struct pipe_context
*ctx_
,
773 struct pipe_transfer
* transfer
)
775 assert(transfer
->resource
->target
== PIPE_BUFFER
);
776 assert(transfer
->resource
->bind
& PIPE_BIND_GLOBAL
);
778 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
779 struct r600_resource_global
* buffer
=
780 (struct r600_resource_global
*)transfer
->resource
;
782 ctx
->ws
->buffer_unmap(buffer
->chunk
->pool
->bo
->cs_buf
);
785 struct pipe_transfer
* r600_compute_global_get_transfer(
786 struct pipe_context
*ctx_
,
787 struct pipe_resource
*resource
,
790 const struct pipe_box
*box
)
792 struct r600_context
*ctx
= (struct r600_context
*)ctx_
;
793 struct compute_memory_pool
*pool
= ctx
->screen
->global_pool
;
795 compute_memory_finalize_pending(pool
, ctx_
);
797 assert(resource
->target
== PIPE_BUFFER
);
798 struct r600_context
*rctx
= (struct r600_context
*)ctx_
;
799 struct pipe_transfer
*transfer
= util_slab_alloc(&rctx
->pool_transfers
);
801 transfer
->resource
= resource
;
802 transfer
->level
= level
;
803 transfer
->usage
= usage
;
804 transfer
->box
= *box
;
805 transfer
->stride
= 0;
806 transfer
->layer_stride
= 0;
807 transfer
->data
= NULL
;
809 /* Note strides are zero, this is ok for buffers, but not for
810 * textures 2d & higher at least.
815 void r600_compute_global_transfer_destroy(
816 struct pipe_context
*ctx_
,
817 struct pipe_transfer
*transfer
)
819 struct r600_context
*rctx
= (struct r600_context
*)ctx_
;
820 util_slab_free(&rctx
->pool_transfers
, transfer
);
823 void r600_compute_global_transfer_flush_region(
824 struct pipe_context
*ctx_
,
825 struct pipe_transfer
*transfer
,
826 const struct pipe_box
*box
)
831 void r600_compute_global_transfer_inline_write(
832 struct pipe_context
*pipe
,
833 struct pipe_resource
*resource
,
836 const struct pipe_box
*box
,
839 unsigned layer_stride
)