cell: add support for fragment shader constant buffers
[mesa.git] / src / gallium / drivers / cell / spu / spu_command.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * SPU command processing code
31 */
32
33
34 #include <stdio.h>
35 #include <libmisc.h>
36
37 #include "pipe/p_defines.h"
38
39 #include "spu_command.h"
40 #include "spu_main.h"
41 #include "spu_render.h"
42 #include "spu_per_fragment_op.h"
43 #include "spu_texture.h"
44 #include "spu_tile.h"
45 #include "spu_vertex_shader.h"
46 #include "spu_dcache.h"
47 #include "spu_debug.h"
48 #include "cell/common.h"
49
50
51 struct spu_vs_context draw;
52
53
54 /**
55 * Buffers containing dynamically generated SPU code:
56 */
57 static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
58 ALIGN16_ATTRIB;
59
60
61
62 /**
63 * Tell the PPU that this SPU has finished copying a buffer to
64 * local store and that it may be reused by the PPU.
65 * This is done by writting a 16-byte batch-buffer-status block back into
66 * main memory (in cell_context->buffer_status[]).
67 */
68 static void
69 release_buffer(uint buffer)
70 {
71 /* Evidently, using less than a 16-byte status doesn't work reliably */
72 static const uint status[4] ALIGN16_ATTRIB
73 = {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
74
75 const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
76 uint *dst = spu.init.buffer_status + index;
77
78 ASSERT(buffer < CELL_NUM_BUFFERS);
79
80 mfc_put((void *) &status, /* src in local memory */
81 (unsigned int) dst, /* dst in main memory */
82 sizeof(status), /* size */
83 TAG_MISC, /* tag is unimportant */
84 0, /* tid */
85 0 /* rid */);
86 }
87
88
89 static void
90 cmd_clear_surface(const struct cell_command_clear_surface *clear)
91 {
92 DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
93
94 if (clear->surface == 0) {
95 spu.fb.color_clear_value = clear->value;
96 if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
97 uint x = (spu.init.id << 4) | (spu.init.id << 12) |
98 (spu.init.id << 20) | (spu.init.id << 28);
99 spu.fb.color_clear_value ^= x;
100 }
101 }
102 else {
103 spu.fb.depth_clear_value = clear->value;
104 }
105
106 #define CLEAR_OPT 1
107 #if CLEAR_OPT
108
109 /* Simply set all tiles' status to CLEAR.
110 * When we actually begin rendering into a tile, we'll initialize it to
111 * the clear value. If any tiles go untouched during the frame,
112 * really_clear_tiles() will set them to the clear value.
113 */
114 if (clear->surface == 0) {
115 memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
116 }
117 else {
118 memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
119 }
120
121 #else
122
123 /*
124 * This path clears the whole framebuffer to the clear color right now.
125 */
126
127 /*
128 printf("SPU: %s num=%d w=%d h=%d\n",
129 __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
130 */
131
132 /* init a single tile to the clear value */
133 if (clear->surface == 0) {
134 clear_c_tile(&spu.ctile);
135 }
136 else {
137 clear_z_tile(&spu.ztile);
138 }
139
140 /* walk over my tiles, writing the 'clear' tile's data */
141 {
142 const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
143 uint i;
144 for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
145 uint tx = i % spu.fb.width_tiles;
146 uint ty = i / spu.fb.width_tiles;
147 if (clear->surface == 0)
148 put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
149 else
150 put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
151 }
152 }
153
154 if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
155 wait_on_mask(1 << TAG_SURFACE_CLEAR);
156 }
157
158 #endif /* CLEAR_OPT */
159
160 DEBUG_PRINTF("CLEAR SURF done\n");
161 }
162
163
164 static void
165 cmd_release_verts(const struct cell_command_release_verts *release)
166 {
167 DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf);
168 ASSERT(release->vertex_buf != ~0U);
169 release_buffer(release->vertex_buf);
170 }
171
172
173 /**
174 * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
175 * This involves installing new fragment ops SPU code.
176 * If this function is never called, we'll use a regular C fallback function
177 * for fragment processing.
178 */
179 static void
180 cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
181 {
182 static int warned = 0;
183
184 DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n");
185 /* Copy SPU code from batch buffer to spu buffer */
186 memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
187 /* Copy state info (for fallback case only) */
188 memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
189 memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
190
191 /* Parity twist! For now, always use the fallback code by default,
192 * only switching to codegen when specifically requested. This
193 * allows us to develop freely without risking taking down the
194 * branch.
195 *
196 * Later, the parity of this check will be reversed, so that
197 * codegen is *always* used, unless we specifically indicate that
198 * we don't want it.
199 *
200 * Eventually, the option will be removed completely, because in
201 * final code we'll always use codegen and won't even provide the
202 * raw state records that the fallback code requires.
203 */
204 if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) {
205 spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
206 }
207 else {
208 /* otherwise, the default fallback code remains in place */
209 if (!warned) {
210 fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
211 warned = 1;
212 }
213 }
214
215 spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
216 spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
217 }
218
219
220 static void
221 cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
222 {
223 DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n");
224 /* Copy SPU code from batch buffer to spu buffer */
225 memcpy(spu.fragment_program_code, fp->code,
226 SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
227 #if 01
228 /* Point function pointer at new code */
229 spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
230 #endif
231 }
232
233
234 static uint
235 cmd_state_fs_constants(const uint64_t *buffer, uint pos)
236 {
237 const uint num_const = buffer[pos + 1];
238 const float *constants = (const float *) &buffer[pos + 2];
239 uint i;
240
241 DEBUG_PRINTF("CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
242
243 /* Expand each float to float[4] for SOA execution */
244 for (i = 0; i < num_const; i++) {
245 spu.constants[i] = spu_splats(constants[i]);
246 }
247
248 /* return new buffer pos (in 8-byte words) */
249 return pos + 2 + num_const / 2;
250 }
251
252
253 static void
254 cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
255 {
256 DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
257 cmd->width,
258 cmd->height,
259 cmd->color_start,
260 cmd->color_format,
261 cmd->depth_format);
262
263 ASSERT_ALIGN16(cmd->color_start);
264 ASSERT_ALIGN16(cmd->depth_start);
265
266 spu.fb.color_start = cmd->color_start;
267 spu.fb.depth_start = cmd->depth_start;
268 spu.fb.color_format = cmd->color_format;
269 spu.fb.depth_format = cmd->depth_format;
270 spu.fb.width = cmd->width;
271 spu.fb.height = cmd->height;
272 spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
273 spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
274
275 switch (spu.fb.depth_format) {
276 case PIPE_FORMAT_Z32_UNORM:
277 spu.fb.zsize = 4;
278 spu.fb.zscale = (float) 0xffffffffu;
279 break;
280 case PIPE_FORMAT_Z24S8_UNORM:
281 case PIPE_FORMAT_S8Z24_UNORM:
282 case PIPE_FORMAT_Z24X8_UNORM:
283 case PIPE_FORMAT_X8Z24_UNORM:
284 spu.fb.zsize = 4;
285 spu.fb.zscale = (float) 0x00ffffffu;
286 break;
287 case PIPE_FORMAT_Z16_UNORM:
288 spu.fb.zsize = 2;
289 spu.fb.zscale = (float) 0xffffu;
290 break;
291 default:
292 spu.fb.zsize = 0;
293 break;
294 }
295 }
296
297
298 static void
299 cmd_state_sampler(const struct cell_command_sampler *sampler)
300 {
301 DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit);
302
303 spu.sampler[sampler->unit] = sampler->state;
304 if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR)
305 spu.sample_texture[sampler->unit] = sample_texture_bilinear;
306 else
307 spu.sample_texture[sampler->unit] = sample_texture_nearest;
308 }
309
310
311 static void
312 cmd_state_texture(const struct cell_command_texture *texture)
313 {
314 const uint unit = texture->unit;
315 const uint width = texture->width;
316 const uint height = texture->height;
317
318 DEBUG_PRINTF("TEXTURE [%u] at %p size %u x %u\n",
319 texture->unit, texture->start,
320 texture->width, texture->height);
321
322 spu.texture[unit].start = texture->start;
323 spu.texture[unit].width = width;
324 spu.texture[unit].height = height;
325
326 spu.texture[unit].tiles_per_row = width / TILE_SIZE;
327
328 spu.texture[unit].tex_size = (vector float) { width, height, 0.0, 0.0};
329 spu.texture[unit].tex_size_mask = (vector unsigned int)
330 { width - 1, height - 1, 0, 0 };
331 spu.texture[unit].tex_size_x_mask = spu_splats(width - 1);
332 spu.texture[unit].tex_size_y_mask = spu_splats(height - 1);
333 }
334
335
336 static void
337 cmd_state_vertex_info(const struct vertex_info *vinfo)
338 {
339 DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
340 ASSERT(vinfo->num_attribs >= 1);
341 ASSERT(vinfo->num_attribs <= 8);
342 memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
343 }
344
345
346 static void
347 cmd_state_vs_array_info(const struct cell_array_info *vs_info)
348 {
349 const unsigned attr = vs_info->attr;
350
351 ASSERT(attr < PIPE_MAX_ATTRIBS);
352 draw.vertex_fetch.src_ptr[attr] = vs_info->base;
353 draw.vertex_fetch.pitch[attr] = vs_info->pitch;
354 draw.vertex_fetch.size[attr] = vs_info->size;
355 draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
356 draw.vertex_fetch.dirty = 1;
357 }
358
359
360 static void
361 cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
362 {
363 mfc_get(attribute_fetch_code_buffer,
364 (unsigned int) code->base, /* src */
365 code->size,
366 TAG_BATCH_BUFFER,
367 0, /* tid */
368 0 /* rid */);
369 wait_on_mask(1 << TAG_BATCH_BUFFER);
370
371 draw.vertex_fetch.code = attribute_fetch_code_buffer;
372 }
373
374
375 static void
376 cmd_finish(void)
377 {
378 DEBUG_PRINTF("FINISH\n");
379 really_clear_tiles(0);
380 /* wait for all outstanding DMAs to finish */
381 mfc_write_tag_mask(~0);
382 mfc_read_tag_status_all();
383 /* send mbox message to PPU */
384 spu_write_out_mbox(CELL_CMD_FINISH);
385 }
386
387
388 /**
389 * Execute a batch of commands which was sent to us by the PPU.
390 * See the cell_emit_state.c code to see where the commands come from.
391 *
392 * The opcode param encodes the location of the buffer and its size.
393 */
394 static void
395 cmd_batch(uint opcode)
396 {
397 const uint buf = (opcode >> 8) & 0xff;
398 uint size = (opcode >> 16);
399 uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
400 const unsigned usize = size / sizeof(buffer[0]);
401 uint pos;
402
403 DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n",
404 buf, size, spu.init.buffers[buf]);
405
406 ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
407
408 ASSERT_ALIGN16(spu.init.buffers[buf]);
409
410 size = ROUNDUP16(size);
411
412 ASSERT_ALIGN16(spu.init.buffers[buf]);
413
414 mfc_get(buffer, /* dest */
415 (unsigned int) spu.init.buffers[buf], /* src */
416 size,
417 TAG_BATCH_BUFFER,
418 0, /* tid */
419 0 /* rid */);
420 wait_on_mask(1 << TAG_BATCH_BUFFER);
421
422 /* Tell PPU we're done copying the buffer to local store */
423 DEBUG_PRINTF("release batch buf %u\n", buf);
424 release_buffer(buf);
425
426 /*
427 * Loop over commands in the batch buffer
428 */
429 for (pos = 0; pos < usize; /* no incr */) {
430 switch (buffer[pos]) {
431 /*
432 * rendering commands
433 */
434 case CELL_CMD_CLEAR_SURFACE:
435 {
436 struct cell_command_clear_surface *clr
437 = (struct cell_command_clear_surface *) &buffer[pos];
438 cmd_clear_surface(clr);
439 pos += sizeof(*clr) / 8;
440 }
441 break;
442 case CELL_CMD_RENDER:
443 {
444 struct cell_command_render *render
445 = (struct cell_command_render *) &buffer[pos];
446 uint pos_incr;
447 cmd_render(render, &pos_incr);
448 pos += pos_incr;
449 }
450 break;
451 /*
452 * state-update commands
453 */
454 case CELL_CMD_STATE_FRAMEBUFFER:
455 {
456 struct cell_command_framebuffer *fb
457 = (struct cell_command_framebuffer *) &buffer[pos];
458 cmd_state_framebuffer(fb);
459 pos += sizeof(*fb) / 8;
460 }
461 break;
462 case CELL_CMD_STATE_FRAGMENT_OPS:
463 {
464 struct cell_command_fragment_ops *fops
465 = (struct cell_command_fragment_ops *) &buffer[pos];
466 cmd_state_fragment_ops(fops);
467 pos += sizeof(*fops) / 8;
468 }
469 break;
470 case CELL_CMD_STATE_FRAGMENT_PROGRAM:
471 {
472 struct cell_command_fragment_program *fp
473 = (struct cell_command_fragment_program *) &buffer[pos];
474 cmd_state_fragment_program(fp);
475 pos += sizeof(*fp) / 8;
476 }
477 break;
478 case CELL_CMD_STATE_FS_CONSTANTS:
479 pos = cmd_state_fs_constants(buffer, pos);
480 break;
481 case CELL_CMD_STATE_SAMPLER:
482 {
483 struct cell_command_sampler *sampler
484 = (struct cell_command_sampler *) &buffer[pos];
485 cmd_state_sampler(sampler);
486 pos += sizeof(*sampler) / 8;
487 }
488 break;
489 case CELL_CMD_STATE_TEXTURE:
490 {
491 struct cell_command_texture *texture
492 = (struct cell_command_texture *) &buffer[pos];
493 cmd_state_texture(texture);
494 pos += sizeof(*texture) / 8;
495 }
496 break;
497 case CELL_CMD_STATE_VERTEX_INFO:
498 cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
499 pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
500 break;
501 case CELL_CMD_STATE_VIEWPORT:
502 (void) memcpy(& draw.viewport, &buffer[pos+1],
503 sizeof(struct pipe_viewport_state));
504 pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
505 break;
506 case CELL_CMD_STATE_UNIFORMS:
507 draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
508 pos += 2;
509 break;
510 case CELL_CMD_STATE_VS_ARRAY_INFO:
511 cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
512 pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
513 break;
514 case CELL_CMD_STATE_BIND_VS:
515 #if 0
516 spu_bind_vertex_shader(&draw,
517 (struct cell_shader_info *) &buffer[pos+1]);
518 #endif
519 pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
520 break;
521 case CELL_CMD_STATE_ATTRIB_FETCH:
522 cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
523 &buffer[pos+1]);
524 pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
525 break;
526 /*
527 * misc commands
528 */
529 case CELL_CMD_FINISH:
530 cmd_finish();
531 pos += 1;
532 break;
533 case CELL_CMD_RELEASE_VERTS:
534 {
535 struct cell_command_release_verts *release
536 = (struct cell_command_release_verts *) &buffer[pos];
537 cmd_release_verts(release);
538 pos += sizeof(*release) / 8;
539 }
540 break;
541 case CELL_CMD_FLUSH_BUFFER_RANGE: {
542 struct cell_buffer_range *br = (struct cell_buffer_range *)
543 &buffer[pos+1];
544
545 spu_dcache_mark_dirty((unsigned) br->base, br->size);
546 pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8);
547 break;
548 }
549 default:
550 printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
551 ASSERT(0);
552 break;
553 }
554 }
555
556 DEBUG_PRINTF("BATCH complete\n");
557 }
558
559
560
561 /**
562 * Main loop for SPEs: Get a command, execute it, repeat.
563 */
564 void
565 command_loop(void)
566 {
567 struct cell_command cmd;
568 int exitFlag = 0;
569
570 DEBUG_PRINTF("Enter command loop\n");
571
572 ASSERT((sizeof(struct cell_command) & 0xf) == 0);
573 ASSERT_ALIGN16(&cmd);
574
575 while (!exitFlag) {
576 unsigned opcode;
577 int tag = 0;
578
579 DEBUG_PRINTF("Wait for cmd...\n");
580
581 /* read/wait from mailbox */
582 opcode = (unsigned int) spu_read_in_mbox();
583
584 DEBUG_PRINTF("got cmd 0x%x\n", opcode);
585
586 /* command payload */
587 mfc_get(&cmd, /* dest */
588 (unsigned int) spu.init.cmd, /* src */
589 sizeof(struct cell_command), /* bytes */
590 tag,
591 0, /* tid */
592 0 /* rid */);
593 wait_on_mask( 1 << tag );
594
595 /*
596 * NOTE: most commands should be contained in a batch buffer
597 */
598
599 switch (opcode & CELL_CMD_OPCODE_MASK) {
600 case CELL_CMD_EXIT:
601 DEBUG_PRINTF("EXIT\n");
602 exitFlag = 1;
603 break;
604 case CELL_CMD_VS_EXECUTE:
605 #if 0
606 spu_execute_vertex_shader(&draw, &cmd.vs);
607 #endif
608 break;
609 case CELL_CMD_BATCH:
610 cmd_batch(opcode);
611 break;
612 default:
613 printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
614 }
615
616 }
617
618 DEBUG_PRINTF("Exit command loop\n");
619
620 spu_dcache_report();
621 }