r600g: Compute support for Cayman
[mesa.git] / src / gallium / drivers / r600 / evergreen_compute_internal.c
1 /*
2 * Permission is hereby granted, free of charge, to any person obtaining a
3 * copy of this software and associated documentation files (the "Software"),
4 * to deal in the Software without restriction, including without limitation
5 * on the rights to use, copy, modify, merge, publish, distribute, sub
6 * license, and/or sell copies of the Software, and to permit persons to whom
7 * the Software is furnished to do so, subject to the following conditions:
8 *
9 * The above copyright notice and this permission notice (including the next
10 * paragraph) shall be included in all copies or substantial portions of the
11 * Software.
12 *
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
17 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19 * USE OR OTHER DEALINGS IN THE SOFTWARE.
20 *
21 * Authors:
22 * Adam Rak <adam.rak@streamnovation.com>
23 */
24
25 #include <stdlib.h>
26 #include <stdio.h>
27
28 #include "pipe/p_defines.h"
29 #include "pipe/p_state.h"
30 #include "pipe/p_context.h"
31 #include "util/u_blitter.h"
32 #include "util/u_double_list.h"
33 #include "util/u_transfer.h"
34 #include "util/u_surface.h"
35 #include "util/u_pack_color.h"
36 #include "util/u_memory.h"
37 #include "util/u_inlines.h"
38 #include "util/u_framebuffer.h"
39 #include "r600.h"
40 #include "r600_resource.h"
41 #include "r600_shader.h"
42 #include "r600_pipe.h"
43 #include "r600_formats.h"
44 #include "evergreend.h"
45 #include "evergreen_compute_internal.h"
46 #include "r600_hw_context_priv.h"
47
48 int get_compute_resource_num(void)
49 {
50 int num = 0;
51 #define DECL_COMPUTE_RESOURCE(name, n) num += n;
52 #include "compute_resource.def"
53 #undef DECL_COMPUTE_RESOURCE
54 return num;
55 }
56
57 void evergreen_emit_raw_value(
58 struct evergreen_compute_resource* res,
59 unsigned value)
60 {
61 res->cs[res->cs_end++] = value;
62 }
63
64 void evergreen_emit_ctx_value(struct r600_context *ctx, unsigned value)
65 {
66 ctx->cs->buf[ctx->cs->cdw++] = value;
67 }
68
69 void evergreen_mult_reg_set_(
70 struct evergreen_compute_resource* res,
71 int index,
72 u32* array,
73 int size)
74 {
75 int i = 0;
76
77 evergreen_emit_raw_reg_set(res, index, size / 4);
78
79 for (i = 0; i < size; i+=4) {
80 res->cs[res->cs_end++] = array[i / 4];
81 }
82 }
83
84 void evergreen_reg_set(
85 struct evergreen_compute_resource* res,
86 unsigned index,
87 unsigned value)
88 {
89 evergreen_emit_raw_reg_set(res, index, 1);
90 res->cs[res->cs_end++] = value;
91 }
92
93 struct evergreen_compute_resource* get_empty_res(
94 struct r600_pipe_compute* pipe,
95 enum evergreen_compute_resources res_code,
96 int offset_index)
97 {
98 int code_index = -1;
99 int code_size = -1;
100
101 {
102 int i = 0;
103 #define DECL_COMPUTE_RESOURCE(name, n) if (COMPUTE_RESOURCE_ ## name == res_code) {code_index = i; code_size = n;} i += n;
104 #include "compute_resource.def"
105 #undef DECL_COMPUTE_RESOURCE
106 }
107
108 assert(code_index != -1 && "internal error: resouce index not found");
109 assert(offset_index < code_size && "internal error: overindexing resource");
110
111 int index = code_index + offset_index;
112
113 struct evergreen_compute_resource* res = &pipe->resources[index];
114
115 res->enabled = true;
116 res->bo = NULL;
117 res->cs_end = 0;
118 bzero(&res->do_reloc, sizeof(res->do_reloc));
119
120 return res;
121 }
122
123 void evergreen_emit_raw_reg_set(
124 struct evergreen_compute_resource* res,
125 unsigned index,
126 int num)
127 {
128 res->enabled = 1;
129 int cs_end = res->cs_end;
130
131 if (index >= EVERGREEN_CONFIG_REG_OFFSET
132 && index < EVERGREEN_CONFIG_REG_END) {
133 res->cs[cs_end] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
134 res->cs[cs_end+1] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
135 } else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
136 && index < EVERGREEN_CONTEXT_REG_END) {
137 res->cs[cs_end] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
138 res->cs[cs_end+1] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
139 } else if (index >= EVERGREEN_RESOURCE_OFFSET
140 && index < EVERGREEN_RESOURCE_END) {
141 res->cs[cs_end] = PKT3C(PKT3_SET_RESOURCE, num, 0);
142 res->cs[cs_end+1] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
143 } else if (index >= EVERGREEN_SAMPLER_OFFSET
144 && index < EVERGREEN_SAMPLER_END) {
145 res->cs[cs_end] = PKT3C(PKT3_SET_SAMPLER, num, 0);
146 res->cs[cs_end+1] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
147 } else if (index >= EVERGREEN_CTL_CONST_OFFSET
148 && index < EVERGREEN_CTL_CONST_END) {
149 res->cs[cs_end] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
150 res->cs[cs_end+1] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
151 } else if (index >= EVERGREEN_LOOP_CONST_OFFSET
152 && index < EVERGREEN_LOOP_CONST_END) {
153 res->cs[cs_end] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
154 res->cs[cs_end+1] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
155 } else if (index >= EVERGREEN_BOOL_CONST_OFFSET
156 && index < EVERGREEN_BOOL_CONST_END) {
157 res->cs[cs_end] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
158 res->cs[cs_end+1] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
159 } else {
160 res->cs[cs_end] = PKT0(index, num-1);
161 res->cs_end--;
162 }
163
164 res->cs_end += 2;
165 }
166
167 void evergreen_emit_force_reloc(struct evergreen_compute_resource* res)
168 {
169 res->do_reloc[res->cs_end] += 1;
170 }
171
172 void evergreen_emit_ctx_reg_set(
173 struct r600_context *ctx,
174 unsigned index,
175 int num)
176 {
177
178 if (index >= EVERGREEN_CONFIG_REG_OFFSET
179 && index < EVERGREEN_CONFIG_REG_END) {
180 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONFIG_REG, num, 0);
181 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONFIG_REG_OFFSET) >> 2;
182 } else if (index >= EVERGREEN_CONTEXT_REG_OFFSET
183 && index < EVERGREEN_CONTEXT_REG_END) {
184 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CONTEXT_REG, num, 0);
185 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CONTEXT_REG_OFFSET) >> 2;
186 } else if (index >= EVERGREEN_RESOURCE_OFFSET
187 && index < EVERGREEN_RESOURCE_END) {
188 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_RESOURCE, num, 0);
189 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_RESOURCE_OFFSET) >> 2;
190 } else if (index >= EVERGREEN_SAMPLER_OFFSET
191 && index < EVERGREEN_SAMPLER_END) {
192 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_SAMPLER, num, 0);
193 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_SAMPLER_OFFSET) >> 2;
194 } else if (index >= EVERGREEN_CTL_CONST_OFFSET
195 && index < EVERGREEN_CTL_CONST_END) {
196 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_CTL_CONST, num, 0);
197 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_CTL_CONST_OFFSET) >> 2;
198 } else if (index >= EVERGREEN_LOOP_CONST_OFFSET
199 && index < EVERGREEN_LOOP_CONST_END) {
200 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_LOOP_CONST, num, 0);
201 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_LOOP_CONST_OFFSET) >> 2;
202 } else if (index >= EVERGREEN_BOOL_CONST_OFFSET
203 && index < EVERGREEN_BOOL_CONST_END) {
204 ctx->cs->buf[ctx->cs->cdw++] = PKT3C(PKT3_SET_BOOL_CONST, num, 0);
205 ctx->cs->buf[ctx->cs->cdw++] = (index - EVERGREEN_BOOL_CONST_OFFSET) >> 2;
206 } else {
207 ctx->cs->buf[ctx->cs->cdw++] = PKT0(index, num-1);
208 }
209 }
210
211 void evergreen_emit_ctx_reloc(
212 struct r600_context *ctx,
213 struct r600_resource *bo,
214 enum radeon_bo_usage usage)
215 {
216 assert(bo);
217
218 ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
219 u32 rr = r600_context_bo_reloc(ctx, bo, usage);
220 ctx->cs->buf[ctx->cs->cdw++] = rr;
221 }
222
223 void evergreen_set_buffer_sync(
224 struct r600_context *ctx,
225 struct r600_resource* bo,
226 int size,
227 int flags,
228 enum radeon_bo_usage usage)
229 {
230 assert(bo);
231 int32_t cp_coher_size = 0;
232
233 if (size == 0xffffffff || size == 0) {
234 cp_coher_size = 0xffffffff;
235 }
236 else {
237 cp_coher_size = ((size + 255) >> 8);
238 }
239
240 uint32_t sync_flags = 0;
241
242 if ((flags & COMPUTE_RES_TC_FLUSH) == COMPUTE_RES_TC_FLUSH) {
243 sync_flags |= S_0085F0_TC_ACTION_ENA(1);
244 }
245
246 if ((flags & COMPUTE_RES_VC_FLUSH) == COMPUTE_RES_VC_FLUSH) {
247 sync_flags |= S_0085F0_VC_ACTION_ENA(1);
248 }
249
250 if ((flags & COMPUTE_RES_SH_FLUSH) == COMPUTE_RES_SH_FLUSH) {
251 sync_flags |= S_0085F0_SH_ACTION_ENA(1);
252 }
253
254 if ((flags & COMPUTE_RES_CB_FLUSH(0)) == COMPUTE_RES_CB_FLUSH(0)) {
255 sync_flags |= S_0085F0_CB_ACTION_ENA(1);
256
257 switch((flags >> 8) & 0xF) {
258 case 0:
259 sync_flags |= S_0085F0_CB0_DEST_BASE_ENA(1);
260 break;
261 case 1:
262 sync_flags |= S_0085F0_CB1_DEST_BASE_ENA(1);
263 break;
264 case 2:
265 sync_flags |= S_0085F0_CB2_DEST_BASE_ENA(1);
266 break;
267 case 3:
268 sync_flags |= S_0085F0_CB3_DEST_BASE_ENA(1);
269 break;
270 case 4:
271 sync_flags |= S_0085F0_CB4_DEST_BASE_ENA(1);
272 break;
273 case 5:
274 sync_flags |= S_0085F0_CB5_DEST_BASE_ENA(1);
275 break;
276 case 6:
277 sync_flags |= S_0085F0_CB6_DEST_BASE_ENA(1);
278 break;
279 case 7:
280 sync_flags |= S_0085F0_CB7_DEST_BASE_ENA(1);
281 break;
282 case 8:
283 sync_flags |= S_0085F0_CB8_DEST_BASE_ENA(1);
284 break;
285 case 9:
286 sync_flags |= S_0085F0_CB9_DEST_BASE_ENA(1);
287 break;
288 case 10:
289 sync_flags |= S_0085F0_CB10_DEST_BASE_ENA(1);
290 break;
291 case 11:
292 sync_flags |= S_0085F0_CB11_DEST_BASE_ENA(1);
293 break;
294 default:
295 assert(0);
296 }
297 }
298
299 int32_t poll_interval = 10;
300
301 ctx->cs->buf[ctx->cs->cdw++] = PKT3(PKT3_SURFACE_SYNC, 3, 0);
302 ctx->cs->buf[ctx->cs->cdw++] = sync_flags;
303 ctx->cs->buf[ctx->cs->cdw++] = cp_coher_size;
304 ctx->cs->buf[ctx->cs->cdw++] = 0;
305 ctx->cs->buf[ctx->cs->cdw++] = poll_interval;
306
307 if (cp_coher_size != 0xffffffff) {
308 evergreen_emit_ctx_reloc(ctx, bo, usage);
309 }
310 }
311
312 int evergreen_compute_get_gpu_format(
313 struct number_type_and_format* fmt,
314 struct r600_resource *bo)
315 {
316 switch (bo->b.b.format)
317 {
318 case PIPE_FORMAT_R8_UNORM:
319 case PIPE_FORMAT_R32_UNORM:
320 fmt->format = V_028C70_COLOR_32;
321 fmt->number_type = V_028C70_NUMBER_UNORM;
322 fmt->num_format_all = 0;
323 break;
324 case PIPE_FORMAT_R32_FLOAT:
325 fmt->format = V_028C70_COLOR_32_FLOAT;
326 fmt->number_type = V_028C70_NUMBER_FLOAT;
327 fmt->num_format_all = 0;
328 break;
329 case PIPE_FORMAT_R32G32B32A32_FLOAT:
330 fmt->format = V_028C70_COLOR_32_32_32_32_FLOAT;
331 fmt->number_type = V_028C70_NUMBER_FLOAT;
332 fmt->num_format_all = 0;
333 break;
334
335 ///TODO: other formats...
336
337 default:
338 return 0;
339 }
340
341 return 1;
342 }
343
344 void evergreen_set_rat(
345 struct r600_pipe_compute *pipe,
346 int id,
347 struct r600_resource* bo,
348 int start,
349 int size)
350 {
351 assert(id < 12);
352 assert((size & 3) == 0);
353 assert((start & 0xFF) == 0);
354
355 int offset;
356 COMPUTE_DBG("bind rat: %i \n", id);
357
358 if (id < 8) {
359 offset = id*0x3c;
360 }
361 else {
362 offset = 8*0x3c + (id-8)*0x1c;
363 }
364
365 int linear = 0;
366
367 if (bo->b.b.height0 <= 1 && bo->b.b.depth0 <= 1
368 && bo->b.b.target == PIPE_BUFFER) {
369 linear = 1;
370 }
371
372 struct evergreen_compute_resource* res =
373 get_empty_res(pipe, COMPUTE_RESOURCE_RAT, id);
374
375 evergreen_emit_force_reloc(res);
376
377 evergreen_reg_set(res, R_028C64_CB_COLOR0_PITCH, 0); ///TODO: for 2D?
378 evergreen_reg_set(res, R_028C68_CB_COLOR0_SLICE, 0);
379
380 struct number_type_and_format fmt;
381
382 ///default config
383 if (bo->b.b.format == PIPE_FORMAT_NONE) {
384 fmt.format = V_028C70_COLOR_32;
385 fmt.number_type = V_028C70_NUMBER_FLOAT;
386 } else {
387 evergreen_compute_get_gpu_format(&fmt, bo);
388 }
389
390 if (linear) {
391 evergreen_reg_set(res,
392 R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1)
393 | S_028C70_ARRAY_MODE(V_028C70_ARRAY_LINEAR_ALIGNED)
394 | S_028C70_FORMAT(fmt.format)
395 | S_028C70_NUMBER_TYPE(fmt.number_type)
396 );
397 evergreen_emit_force_reloc(res);
398 } else {
399 assert(0 && "TODO");
400 ///TODO
401 // evergreen_reg_set(res, R_028C70_CB_COLOR0_INFO, S_028C70_RAT(1) | S_028C70_ARRAY_MODE(????));
402 // evergreen_emit_force_reloc(res);
403 }
404
405 evergreen_reg_set(res, R_028C74_CB_COLOR0_ATTRIB, S_028C74_NON_DISP_TILING_ORDER(1));
406 evergreen_emit_force_reloc(res);
407
408 if (linear) {
409 /* XXX: Why are we using size instead of bo->b.b.b.width0 ? */
410 evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM, size);
411 } else {
412 evergreen_reg_set(res, R_028C78_CB_COLOR0_DIM,
413 S_028C78_WIDTH_MAX(bo->b.b.width0)
414 | S_028C78_HEIGHT_MAX(bo->b.b.height0));
415 }
416
417 if (id < 8) {
418 evergreen_reg_set(res, R_028C7C_CB_COLOR0_CMASK, 0);
419 evergreen_emit_force_reloc(res);
420 evergreen_reg_set(res, R_028C84_CB_COLOR0_FMASK, 0);
421 evergreen_emit_force_reloc(res);
422 }
423
424 evergreen_reg_set(res, R_028C60_CB_COLOR0_BASE + offset, start >> 8);
425
426 res->bo = bo;
427 res->usage = RADEON_USAGE_READWRITE;
428 res->coher_bo_size = size;
429 res->flags = COMPUTE_RES_CB_FLUSH(id);
430 }
431
432 void evergreen_set_lds(
433 struct r600_pipe_compute *pipe,
434 int num_lds,
435 int size,
436 int num_waves)
437 {
438 struct evergreen_compute_resource* res =
439 get_empty_res(pipe, COMPUTE_RESOURCE_LDS, 0);
440
441 if (pipe->ctx->chip_class < CAYMAN) {
442 evergreen_reg_set(res, R_008E2C_SQ_LDS_RESOURCE_MGMT,
443 S_008E2C_NUM_LS_LDS(num_lds));
444 } else {
445 evergreen_reg_set(res, CM_R_0286FC_SPI_LDS_MGMT,
446 S_0286FC_NUM_LS_LDS(num_lds));
447 }
448 evergreen_reg_set(res, CM_R_0288E8_SQ_LDS_ALLOC, size | num_waves << 14);
449 }
450
451 void evergreen_set_gds(
452 struct r600_pipe_compute *pipe,
453 uint32_t addr,
454 uint32_t size)
455 {
456 struct evergreen_compute_resource* res =
457 get_empty_res(pipe, COMPUTE_RESOURCE_GDS, 0);
458
459 evergreen_reg_set(res, R_028728_GDS_ORDERED_WAVE_PER_SE, 1);
460 evergreen_reg_set(res, R_028720_GDS_ADDR_BASE, addr);
461 evergreen_reg_set(res, R_028724_GDS_ADDR_SIZE, size);
462 }
463
464 void evergreen_set_export(
465 struct r600_pipe_compute *pipe,
466 struct r600_resource* bo,
467 int offset, int size)
468 {
469 #define SX_MEMORY_EXPORT_BASE 0x9010
470 #define SX_MEMORY_EXPORT_SIZE 0x9014
471
472 struct evergreen_compute_resource* res =
473 get_empty_res(pipe, COMPUTE_RESOURCE_EXPORT, 0);
474
475 evergreen_reg_set(res, SX_MEMORY_EXPORT_SIZE, size);
476
477 if (size) {
478 evergreen_reg_set(res, SX_MEMORY_EXPORT_BASE, offset);
479 res->bo = bo;
480 res->usage = RADEON_USAGE_WRITE;
481 res->coher_bo_size = size;
482 res->flags = 0;
483 }
484 }
485
486 void evergreen_set_loop_const(
487 struct r600_pipe_compute *pipe,
488 int id, int count, int init, int inc) {
489
490 struct evergreen_compute_resource* res =
491 get_empty_res(pipe, COMPUTE_RESOURCE_LOOP, id);
492
493 assert(id < 32);
494 assert(count <= 0xFFF);
495 assert(init <= 0xFF);
496 assert(inc <= 0xFF);
497
498 /* Compute shaders use LOOP_CONST registers SQ_LOOP_CONST_160 to
499 * SQ_LOOP_CONST_191 */
500 evergreen_reg_set(res, R_03A200_SQ_LOOP_CONST_0 + (160 * 4) + (id * 4),
501 count | init << 12 | inc << 24);
502 }
503
504 void evergreen_set_tmp_ring(
505 struct r600_pipe_compute *pipe,
506 struct r600_resource* bo,
507 int offset, int size, int se)
508 {
509 #define SQ_LSTMP_RING_BASE 0x00008e10
510 #define SQ_LSTMP_RING_SIZE 0x00008e14
511 #define GRBM_GFX_INDEX 0x802C
512 #define INSTANCE_INDEX(x) ((x) << 0)
513 #define SE_INDEX(x) ((x) << 16)
514 #define INSTANCE_BROADCAST_WRITES (1 << 30)
515 #define SE_BROADCAST_WRITES (1 << 31)
516
517 struct evergreen_compute_resource* res =
518 get_empty_res(pipe, COMPUTE_RESOURCE_TMPRING, se);
519
520 evergreen_reg_set(res,
521 GRBM_GFX_INDEX,INSTANCE_INDEX(0)
522 | SE_INDEX(se)
523 | INSTANCE_BROADCAST_WRITES);
524 evergreen_reg_set(res, SQ_LSTMP_RING_SIZE, size);
525
526 if (size) {
527 assert(bo);
528
529 evergreen_reg_set(res, SQ_LSTMP_RING_BASE, offset);
530 res->bo = bo;
531 res->usage = RADEON_USAGE_WRITE;
532 res->coher_bo_size = 0;
533 res->flags = 0;
534 }
535
536 if (size) {
537 evergreen_emit_force_reloc(res);
538 }
539
540 evergreen_reg_set(res,
541 GRBM_GFX_INDEX,INSTANCE_INDEX(0)
542 | SE_INDEX(0)
543 | INSTANCE_BROADCAST_WRITES
544 | SE_BROADCAST_WRITES);
545 }
546
547 static uint32_t r600_colorformat_endian_swap(uint32_t colorformat)
548 {
549 if (R600_BIG_ENDIAN) {
550 switch(colorformat) {
551 case V_028C70_COLOR_4_4:
552 return ENDIAN_NONE;
553
554 /* 8-bit buffers. */
555 case V_028C70_COLOR_8:
556 return ENDIAN_NONE;
557
558 /* 16-bit buffers. */
559 case V_028C70_COLOR_5_6_5:
560 case V_028C70_COLOR_1_5_5_5:
561 case V_028C70_COLOR_4_4_4_4:
562 case V_028C70_COLOR_16:
563 case V_028C70_COLOR_8_8:
564 return ENDIAN_8IN16;
565
566 /* 32-bit buffers. */
567 case V_028C70_COLOR_8_8_8_8:
568 case V_028C70_COLOR_2_10_10_10:
569 case V_028C70_COLOR_8_24:
570 case V_028C70_COLOR_24_8:
571 case V_028C70_COLOR_32_FLOAT:
572 case V_028C70_COLOR_16_16_FLOAT:
573 case V_028C70_COLOR_16_16:
574 return ENDIAN_8IN32;
575
576 /* 64-bit buffers. */
577 case V_028C70_COLOR_16_16_16_16:
578 case V_028C70_COLOR_16_16_16_16_FLOAT:
579 return ENDIAN_8IN16;
580
581 case V_028C70_COLOR_32_32_FLOAT:
582 case V_028C70_COLOR_32_32:
583 case V_028C70_COLOR_X24_8_32_FLOAT:
584 return ENDIAN_8IN32;
585
586 /* 96-bit buffers. */
587 case V_028C70_COLOR_32_32_32_FLOAT:
588 /* 128-bit buffers. */
589 case V_028C70_COLOR_32_32_32_32_FLOAT:
590 case V_028C70_COLOR_32_32_32_32:
591 return ENDIAN_8IN32;
592 default:
593 return ENDIAN_NONE; /* Unsupported. */
594 }
595 } else {
596 return ENDIAN_NONE;
597 }
598 }
599
600 static unsigned r600_tex_dim(unsigned dim)
601 {
602 switch (dim) {
603 default:
604 case PIPE_TEXTURE_1D:
605 return V_030000_SQ_TEX_DIM_1D;
606 case PIPE_TEXTURE_1D_ARRAY:
607 return V_030000_SQ_TEX_DIM_1D_ARRAY;
608 case PIPE_TEXTURE_2D:
609 case PIPE_TEXTURE_RECT:
610 return V_030000_SQ_TEX_DIM_2D;
611 case PIPE_TEXTURE_2D_ARRAY:
612 return V_030000_SQ_TEX_DIM_2D_ARRAY;
613 case PIPE_TEXTURE_3D:
614 return V_030000_SQ_TEX_DIM_3D;
615 case PIPE_TEXTURE_CUBE:
616 return V_030000_SQ_TEX_DIM_CUBEMAP;
617 }
618 }
619
620 void evergreen_set_vtx_resource(
621 struct r600_pipe_compute *pipe,
622 struct r600_resource* bo,
623 int id, uint64_t offset, int writable)
624 {
625 assert(id < 16);
626 uint32_t sq_vtx_constant_word2, sq_vtx_constant_word3, sq_vtx_constant_word4;
627 struct number_type_and_format fmt;
628 uint64_t va;
629
630 fmt.format = 0;
631
632 assert(bo->b.b.height0 <= 1);
633 assert(bo->b.b.depth0 <= 1);
634
635 int e = evergreen_compute_get_gpu_format(&fmt, bo);
636
637 assert(e && "unknown format");
638
639 struct evergreen_compute_resource* res =
640 get_empty_res(pipe, COMPUTE_RESOURCE_VERT, id);
641
642 unsigned size = bo->b.b.width0;
643 unsigned stride = 1;
644
645 // size = (size * util_format_get_blockwidth(bo->b.b.b.format) *
646 // util_format_get_blocksize(bo->b.b.b.format));
647
648 va = r600_resource_va(&pipe->ctx->screen->screen, &bo->b.b) + offset;
649
650 COMPUTE_DBG("id: %i vtx size: %i byte, width0: %i elem\n",
651 id, size, bo->b.b.width0);
652
653 sq_vtx_constant_word2 =
654 S_030008_BASE_ADDRESS_HI(va >> 32) |
655 S_030008_STRIDE(stride) |
656 S_030008_DATA_FORMAT(fmt.format) |
657 S_030008_NUM_FORMAT_ALL(fmt.num_format_all) |
658 S_030008_ENDIAN_SWAP(0);
659
660 COMPUTE_DBG("%08X %i %i %i %i\n", sq_vtx_constant_word2, offset,
661 stride, fmt.format, fmt.num_format_all);
662
663 sq_vtx_constant_word3 =
664 S_03000C_DST_SEL_X(0) |
665 S_03000C_DST_SEL_Y(1) |
666 S_03000C_DST_SEL_Z(2) |
667 S_03000C_DST_SEL_W(3);
668
669 sq_vtx_constant_word4 = 0;
670
671 evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
672 evergreen_emit_raw_value(res, (id+816)*32 >> 2);
673 evergreen_emit_raw_value(res, (unsigned)((va) & 0xffffffff));
674 evergreen_emit_raw_value(res, size - 1);
675 evergreen_emit_raw_value(res, sq_vtx_constant_word2);
676 evergreen_emit_raw_value(res, sq_vtx_constant_word3);
677 evergreen_emit_raw_value(res, sq_vtx_constant_word4);
678 evergreen_emit_raw_value(res, 0);
679 evergreen_emit_raw_value(res, 0);
680 evergreen_emit_raw_value(res, S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_BUFFER));
681
682 res->bo = bo;
683
684 if (writable) {
685 res->usage = RADEON_USAGE_READWRITE;
686 }
687 else {
688 res->usage = RADEON_USAGE_READ;
689 }
690
691 res->coher_bo_size = size;
692 res->flags = COMPUTE_RES_TC_FLUSH | COMPUTE_RES_VC_FLUSH;
693 }
694
695 void evergreen_set_tex_resource(
696 struct r600_pipe_compute *pipe,
697 struct r600_pipe_sampler_view* view,
698 int id)
699 {
700 struct evergreen_compute_resource* res =
701 get_empty_res(pipe, COMPUTE_RESOURCE_TEX, id);
702 struct r600_resource_texture *tmp =
703 (struct r600_resource_texture*)view->base.texture;
704
705 unsigned format, endian;
706 uint32_t word4 = 0, yuv_format = 0, pitch = 0;
707 unsigned char swizzle[4], array_mode = 0, tile_type = 0;
708 unsigned height, depth;
709
710 swizzle[0] = 0;
711 swizzle[1] = 1;
712 swizzle[2] = 2;
713 swizzle[3] = 3;
714
715 format = r600_translate_texformat((struct pipe_screen *)pipe->ctx->screen,
716 view->base.format, swizzle, &word4, &yuv_format);
717
718 if (format == ~0) {
719 format = 0;
720 }
721
722 endian = r600_colorformat_endian_swap(format);
723
724 height = view->base.texture->height0;
725 depth = view->base.texture->depth0;
726
727 pitch = align(tmp->pitch_in_blocks[0] *
728 util_format_get_blockwidth(tmp->real_format), 8);
729 array_mode = tmp->array_mode[0];
730 tile_type = tmp->tile_type;
731
732 assert(view->base.texture->target != PIPE_TEXTURE_1D_ARRAY);
733 assert(view->base.texture->target != PIPE_TEXTURE_2D_ARRAY);
734
735 evergreen_emit_raw_value(res, PKT3C(PKT3_SET_RESOURCE, 8, 0));
736 evergreen_emit_raw_value(res, (id+816)*32 >> 2); ///TODO: check this line
737 evergreen_emit_raw_value(res,
738 (S_030000_DIM(r600_tex_dim(view->base.texture->target)) |
739 S_030000_PITCH((pitch / 8) - 1) |
740 S_030000_NON_DISP_TILING_ORDER(tile_type) |
741 S_030000_TEX_WIDTH(view->base.texture->width0 - 1)));
742 evergreen_emit_raw_value(res, (S_030004_TEX_HEIGHT(height - 1) |
743 S_030004_TEX_DEPTH(depth - 1) |
744 S_030004_ARRAY_MODE(array_mode)));
745 evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
746 evergreen_emit_raw_value(res, tmp->offset[0] >> 8);
747 evergreen_emit_raw_value(res, (word4 |
748 S_030010_SRF_MODE_ALL(V_030010_SRF_MODE_ZERO_CLAMP_MINUS_ONE) |
749 S_030010_ENDIAN_SWAP(endian) |
750 S_030010_BASE_LEVEL(0)));
751 evergreen_emit_raw_value(res, (S_030014_LAST_LEVEL(0) |
752 S_030014_BASE_ARRAY(0) |
753 S_030014_LAST_ARRAY(0)));
754 evergreen_emit_raw_value(res, (S_030018_MAX_ANISO(4 /* max 16 samples */)));
755 evergreen_emit_raw_value(res,
756 S_03001C_TYPE(V_03001C_SQ_TEX_VTX_VALID_TEXTURE)
757 | S_03001C_DATA_FORMAT(format));
758
759 res->bo = (struct r600_resource*)view->base.texture;
760
761 res->usage = RADEON_USAGE_READ;
762
763 res->coher_bo_size = tmp->offset[0] + util_format_get_blockwidth(tmp->real_format)*view->base.texture->width0*height*depth;
764 res->flags = COMPUTE_RES_TC_FLUSH;
765
766 evergreen_emit_force_reloc(res);
767 evergreen_emit_force_reloc(res);
768 }
769
770 void evergreen_set_sampler_resource(
771 struct r600_pipe_compute *pipe,
772 struct compute_sampler_state *sampler,
773 int id)
774 {
775 struct evergreen_compute_resource* res =
776 get_empty_res(pipe, COMPUTE_RESOURCE_SAMPLER, id);
777
778 unsigned aniso_flag_offset = sampler->state.max_anisotropy > 1 ? 2 : 0;
779
780 evergreen_emit_raw_value(res, PKT3C(PKT3_SET_SAMPLER, 3, 0));
781 evergreen_emit_raw_value(res, (id + 90)*3);
782 evergreen_emit_raw_value(res,
783 S_03C000_CLAMP_X(r600_tex_wrap(sampler->state.wrap_s)) |
784 S_03C000_CLAMP_Y(r600_tex_wrap(sampler->state.wrap_t)) |
785 S_03C000_CLAMP_Z(r600_tex_wrap(sampler->state.wrap_r)) |
786 S_03C000_XY_MAG_FILTER(r600_tex_filter(sampler->state.mag_img_filter) | aniso_flag_offset) |
787 S_03C000_XY_MIN_FILTER(r600_tex_filter(sampler->state.min_img_filter) | aniso_flag_offset) |
788 S_03C000_BORDER_COLOR_TYPE(V_03C000_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK)
789 );
790 evergreen_emit_raw_value(res,
791 S_03C004_MIN_LOD(S_FIXED(CLAMP(sampler->state.min_lod, 0, 15), 8)) |
792 S_03C004_MAX_LOD(S_FIXED(CLAMP(sampler->state.max_lod, 0, 15), 8))
793 );
794 evergreen_emit_raw_value(res,
795 S_03C008_LOD_BIAS(S_FIXED(CLAMP(sampler->state.lod_bias, -16, 16), 8)) |
796 (sampler->state.seamless_cube_map ? 0 : S_03C008_DISABLE_CUBE_WRAP(1)) |
797 S_03C008_TYPE(1)
798 );
799 }
800
801 void evergreen_set_const_cache(
802 struct r600_pipe_compute *pipe,
803 int cache_id,
804 struct r600_resource* cbo,
805 int size, int offset)
806 {
807 #define SQ_ALU_CONST_BUFFER_SIZE_LS_0 0x00028fc0
808 #define SQ_ALU_CONST_CACHE_LS_0 0x00028f40
809
810 struct evergreen_compute_resource* res =
811 get_empty_res(pipe, COMPUTE_RESOURCE_CONST_MEM, cache_id);
812
813 assert(size < 0x200);
814 assert((offset & 0xFF) == 0);
815 assert(cache_id < 16);
816
817 evergreen_reg_set(res, SQ_ALU_CONST_BUFFER_SIZE_LS_0 + cache_id*4, size);
818 evergreen_reg_set(res, SQ_ALU_CONST_CACHE_LS_0 + cache_id*4, offset >> 8);
819 res->bo = cbo;
820 res->usage = RADEON_USAGE_READ;
821 res->coher_bo_size = size;
822 res->flags = COMPUTE_RES_SH_FLUSH;
823 }
824
825 struct r600_resource* r600_compute_buffer_alloc_vram(
826 struct r600_screen *screen,
827 unsigned size)
828 {
829 assert(size);
830
831 struct pipe_resource * buffer = pipe_buffer_create(
832 (struct pipe_screen*) screen,
833 PIPE_BIND_CUSTOM,
834 PIPE_USAGE_IMMUTABLE,
835 size);
836
837 return (struct r600_resource *)buffer;
838 }