8500d19754e03f2db1afc450693ae9ce3d5de650
[mesa.git] / src / gallium / drivers / cell / spu / spu_command.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * SPU command processing code
31 */
32
33
34 #include <stdio.h>
35 #include <libmisc.h>
36
37 #include "pipe/p_defines.h"
38
39 #include "spu_command.h"
40 #include "spu_main.h"
41 #include "spu_render.h"
42 #include "spu_per_fragment_op.h"
43 #include "spu_texture.h"
44 #include "spu_tile.h"
45 #include "spu_vertex_shader.h"
46 #include "spu_dcache.h"
47 #include "cell/common.h"
48
49
50 struct spu_vs_context draw;
51
52
53 /**
54 * Buffers containing dynamically generated SPU code:
55 */
56 static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
57 ALIGN16_ATTRIB;
58
59
60
61 static INLINE int
62 align(int value, int alignment)
63 {
64 return (value + alignment - 1) & ~(alignment - 1);
65 }
66
67
68
69 /**
70 * Tell the PPU that this SPU has finished copying a buffer to
71 * local store and that it may be reused by the PPU.
72 * This is done by writting a 16-byte batch-buffer-status block back into
73 * main memory (in cell_context->buffer_status[]).
74 */
75 static void
76 release_buffer(uint buffer)
77 {
78 /* Evidently, using less than a 16-byte status doesn't work reliably */
79 static const vector unsigned int status = {CELL_BUFFER_STATUS_FREE,
80 CELL_BUFFER_STATUS_FREE,
81 CELL_BUFFER_STATUS_FREE,
82 CELL_BUFFER_STATUS_FREE};
83 const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
84 uint *dst = spu.init.buffer_status + index;
85
86 ASSERT(buffer < CELL_NUM_BUFFERS);
87
88 mfc_put((void *) &status, /* src in local memory */
89 (unsigned int) dst, /* dst in main memory */
90 sizeof(status), /* size */
91 TAG_MISC, /* tag is unimportant */
92 0, /* tid */
93 0 /* rid */);
94 }
95
96
97 /**
98 * Write CELL_FENCE_SIGNALLED back to the fence status qword in main memory.
99 * There's a qword of status per SPU.
100 */
101 static void
102 cmd_fence(struct cell_command_fence *fence_cmd)
103 {
104 static const vector unsigned int status = {CELL_FENCE_SIGNALLED,
105 CELL_FENCE_SIGNALLED,
106 CELL_FENCE_SIGNALLED,
107 CELL_FENCE_SIGNALLED};
108 uint *dst = (uint *) fence_cmd->fence;
109 dst += 4 * spu.init.id; /* main store/memory address, not local store */
110 ASSERT_ALIGN16(dst);
111 mfc_put((void *) &status, /* src in local memory */
112 (unsigned int) dst, /* dst in main memory */
113 sizeof(status), /* size */
114 TAG_FENCE, /* tag */
115 0, /* tid */
116 0 /* rid */);
117 }
118
119
120 static void
121 cmd_clear_surface(const struct cell_command_clear_surface *clear)
122 {
123 D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
124
125 if (clear->surface == 0) {
126 spu.fb.color_clear_value = clear->value;
127 if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
128 uint x = (spu.init.id << 4) | (spu.init.id << 12) |
129 (spu.init.id << 20) | (spu.init.id << 28);
130 spu.fb.color_clear_value ^= x;
131 }
132 }
133 else {
134 spu.fb.depth_clear_value = clear->value;
135 }
136
137 #define CLEAR_OPT 1
138 #if CLEAR_OPT
139
140 /* Simply set all tiles' status to CLEAR.
141 * When we actually begin rendering into a tile, we'll initialize it to
142 * the clear value. If any tiles go untouched during the frame,
143 * really_clear_tiles() will set them to the clear value.
144 */
145 if (clear->surface == 0) {
146 memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
147 }
148 else {
149 memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
150 }
151
152 #else
153
154 /*
155 * This path clears the whole framebuffer to the clear color right now.
156 */
157
158 /*
159 printf("SPU: %s num=%d w=%d h=%d\n",
160 __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
161 */
162
163 /* init a single tile to the clear value */
164 if (clear->surface == 0) {
165 clear_c_tile(&spu.ctile);
166 }
167 else {
168 clear_z_tile(&spu.ztile);
169 }
170
171 /* walk over my tiles, writing the 'clear' tile's data */
172 {
173 const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
174 uint i;
175 for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
176 uint tx = i % spu.fb.width_tiles;
177 uint ty = i / spu.fb.width_tiles;
178 if (clear->surface == 0)
179 put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
180 else
181 put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
182 }
183 }
184
185 if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
186 wait_on_mask(1 << TAG_SURFACE_CLEAR);
187 }
188
189 #endif /* CLEAR_OPT */
190
191 D_PRINTF(CELL_DEBUG_CMD, "CLEAR SURF done\n");
192 }
193
194
195 static void
196 cmd_release_verts(const struct cell_command_release_verts *release)
197 {
198 D_PRINTF(CELL_DEBUG_CMD, "RELEASE VERTS %u\n", release->vertex_buf);
199 ASSERT(release->vertex_buf != ~0U);
200 release_buffer(release->vertex_buf);
201 }
202
203
204 /**
205 * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
206 * This involves installing new fragment ops SPU code.
207 * If this function is never called, we'll use a regular C fallback function
208 * for fragment processing.
209 */
210 static void
211 cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
212 {
213 D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n");
214
215 /* Copy state info (for fallback case only - this will eventually
216 * go away when the fallback case goes away)
217 */
218 memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
219 memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
220 memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color));
221
222 /* Make sure the SPU knows which buffers it's expected to read when
223 * it's told to pull tiles.
224 */
225 spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled);
226
227 /* If we're forcing the fallback code to be used (for debug purposes),
228 * install that. Otherwise install the incoming SPU code.
229 */
230 if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) {
231 static unsigned int warned = 0;
232 if (!warned) {
233 fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
234 warned = 1;
235 }
236 /* The following two lines aren't really necessary if you
237 * know the debug flags won't change during a run, and if you
238 * know that the function pointers are initialized correctly.
239 * We set them here to allow a person to change the debug
240 * flags during a run (from inside a debugger).
241 */
242 spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
243 spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
244 return;
245 }
246
247 /* Make sure the SPU code buffer is large enough to hold the incoming code.
248 * Note that we *don't* use align_malloc() and align_free(), because
249 * those utility functions are *not* available in SPU code.
250 * */
251 if (spu.fragment_ops_code_size < fops->total_code_size) {
252 if (spu.fragment_ops_code != NULL) {
253 free(spu.fragment_ops_code);
254 }
255 spu.fragment_ops_code_size = fops->total_code_size;
256 spu.fragment_ops_code = malloc(fops->total_code_size);
257 if (spu.fragment_ops_code == NULL) {
258 /* Whoops. */
259 fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size);
260 spu.fragment_ops_code = NULL;
261 spu.fragment_ops_code_size = 0;
262 spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
263 spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
264 return;
265 }
266 }
267
268 /* Copy the SPU code from the command buffer to the spu buffer */
269 memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size);
270
271 /* Set the pointers for the front-facing and back-facing fragments
272 * to the specified offsets within the code. Note that if the
273 * front-facing and back-facing code are the same, they'll have
274 * the same offset.
275 */
276 spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index];
277 spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index];
278 }
279
280 static void
281 cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
282 {
283 D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_PROGRAM\n");
284 /* Copy SPU code from batch buffer to spu buffer */
285 memcpy(spu.fragment_program_code, fp->code,
286 SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
287 #if 01
288 /* Point function pointer at new code */
289 spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
290 #endif
291 }
292
293
294 static uint
295 cmd_state_fs_constants(const uint64_t *buffer, uint pos)
296 {
297 const uint num_const = buffer[pos + 1];
298 const float *constants = (const float *) &buffer[pos + 2];
299 uint i;
300
301 D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
302
303 /* Expand each float to float[4] for SOA execution */
304 for (i = 0; i < num_const; i++) {
305 D_PRINTF(CELL_DEBUG_CMD, " const[%u] = %f\n", i, constants[i]);
306 spu.constants[i] = spu_splats(constants[i]);
307 }
308
309 /* return new buffer pos (in 8-byte words) */
310 return pos + 2 + num_const / 2;
311 }
312
313
314 static void
315 cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
316 {
317 D_PRINTF(CELL_DEBUG_CMD, "FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
318 cmd->width,
319 cmd->height,
320 cmd->color_start,
321 cmd->color_format,
322 cmd->depth_format);
323
324 ASSERT_ALIGN16(cmd->color_start);
325 ASSERT_ALIGN16(cmd->depth_start);
326
327 spu.fb.color_start = cmd->color_start;
328 spu.fb.depth_start = cmd->depth_start;
329 spu.fb.color_format = cmd->color_format;
330 spu.fb.depth_format = cmd->depth_format;
331 spu.fb.width = cmd->width;
332 spu.fb.height = cmd->height;
333 spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
334 spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
335
336 switch (spu.fb.depth_format) {
337 case PIPE_FORMAT_Z32_UNORM:
338 spu.fb.zsize = 4;
339 spu.fb.zscale = (float) 0xffffffffu;
340 break;
341 case PIPE_FORMAT_Z24S8_UNORM:
342 case PIPE_FORMAT_S8Z24_UNORM:
343 case PIPE_FORMAT_Z24X8_UNORM:
344 case PIPE_FORMAT_X8Z24_UNORM:
345 spu.fb.zsize = 4;
346 spu.fb.zscale = (float) 0x00ffffffu;
347 break;
348 case PIPE_FORMAT_Z16_UNORM:
349 spu.fb.zsize = 2;
350 spu.fb.zscale = (float) 0xffffu;
351 break;
352 default:
353 spu.fb.zsize = 0;
354 break;
355 }
356 }
357
358
359 /**
360 * Tex texture mask_s/t and scale_s/t fields depend on the texture size and
361 * sampler wrap modes.
362 */
363 static void
364 update_tex_masks(struct spu_texture *texture,
365 const struct pipe_sampler_state *sampler)
366 {
367 uint i;
368
369 for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
370 int width = texture->level[i].width;
371 int height = texture->level[i].height;
372
373 if (sampler->wrap_s == PIPE_TEX_WRAP_REPEAT)
374 texture->level[i].mask_s = spu_splats(width - 1);
375 else
376 texture->level[i].mask_s = spu_splats(~0);
377
378 if (sampler->wrap_t == PIPE_TEX_WRAP_REPEAT)
379 texture->level[i].mask_t = spu_splats(height - 1);
380 else
381 texture->level[i].mask_t = spu_splats(~0);
382
383 if (sampler->normalized_coords) {
384 texture->level[i].scale_s = spu_splats((float) width);
385 texture->level[i].scale_t = spu_splats((float) height);
386 }
387 else {
388 texture->level[i].scale_s = spu_splats(1.0f);
389 texture->level[i].scale_t = spu_splats(1.0f);
390 }
391 }
392 }
393
394
395 static void
396 cmd_state_sampler(const struct cell_command_sampler *sampler)
397 {
398 uint unit = sampler->unit;
399
400 D_PRINTF(CELL_DEBUG_CMD, "SAMPLER [%u]\n", unit);
401
402 spu.sampler[unit] = sampler->state;
403
404 switch (spu.sampler[unit].min_img_filter) {
405 case PIPE_TEX_FILTER_LINEAR:
406 spu.min_sample_texture_2d[unit] = sample_texture_2d_bilinear;
407 break;
408 case PIPE_TEX_FILTER_ANISO:
409 /* fall-through, for now */
410 case PIPE_TEX_FILTER_NEAREST:
411 spu.min_sample_texture_2d[unit] = sample_texture_2d_nearest;
412 break;
413 default:
414 ASSERT(0);
415 }
416
417 switch (spu.sampler[sampler->unit].mag_img_filter) {
418 case PIPE_TEX_FILTER_LINEAR:
419 spu.mag_sample_texture_2d[unit] = sample_texture_2d_bilinear;
420 break;
421 case PIPE_TEX_FILTER_ANISO:
422 /* fall-through, for now */
423 case PIPE_TEX_FILTER_NEAREST:
424 spu.mag_sample_texture_2d[unit] = sample_texture_2d_nearest;
425 break;
426 default:
427 ASSERT(0);
428 }
429
430 switch (spu.sampler[sampler->unit].min_mip_filter) {
431 case PIPE_TEX_MIPFILTER_NEAREST:
432 case PIPE_TEX_MIPFILTER_LINEAR:
433 spu.sample_texture_2d[unit] = sample_texture_2d_lod;
434 break;
435 case PIPE_TEX_MIPFILTER_NONE:
436 spu.sample_texture_2d[unit] = spu.mag_sample_texture_2d[unit];
437 break;
438 default:
439 ASSERT(0);
440 }
441
442 update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
443 }
444
445
446 static void
447 cmd_state_texture(const struct cell_command_texture *texture)
448 {
449 const uint unit = texture->unit;
450 uint i;
451
452 D_PRINTF(CELL_DEBUG_CMD, "TEXTURE [%u]\n", texture->unit);
453
454 spu.texture[unit].max_level = 0;
455 spu.texture[unit].target = texture->target;
456
457 for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
458 uint width = texture->width[i];
459 uint height = texture->height[i];
460 uint depth = texture->depth[i];
461
462 D_PRINTF(CELL_DEBUG_CMD, " LEVEL %u: at %p size[0] %u x %u\n", i,
463 texture->start[i], texture->width[i], texture->height[i]);
464
465 spu.texture[unit].level[i].start = texture->start[i];
466 spu.texture[unit].level[i].width = width;
467 spu.texture[unit].level[i].height = height;
468 spu.texture[unit].level[i].depth = depth;
469
470 spu.texture[unit].level[i].tiles_per_row =
471 (width + TILE_SIZE - 1) / TILE_SIZE;
472
473 spu.texture[unit].level[i].bytes_per_image =
474 4 * align(width, TILE_SIZE) * align(height, TILE_SIZE) * depth;
475
476 spu.texture[unit].level[i].max_s = spu_splats((int) width - 1);
477 spu.texture[unit].level[i].max_t = spu_splats((int) height - 1);
478
479 if (texture->start[i])
480 spu.texture[unit].max_level = i;
481 }
482
483 update_tex_masks(&spu.texture[unit], &spu.sampler[unit]);
484 }
485
486
487 static void
488 cmd_state_vertex_info(const struct vertex_info *vinfo)
489 {
490 D_PRINTF(CELL_DEBUG_CMD, "VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
491 ASSERT(vinfo->num_attribs >= 1);
492 ASSERT(vinfo->num_attribs <= 8);
493 memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
494 }
495
496
497 static void
498 cmd_state_vs_array_info(const struct cell_array_info *vs_info)
499 {
500 const unsigned attr = vs_info->attr;
501
502 ASSERT(attr < PIPE_MAX_ATTRIBS);
503 draw.vertex_fetch.src_ptr[attr] = vs_info->base;
504 draw.vertex_fetch.pitch[attr] = vs_info->pitch;
505 draw.vertex_fetch.size[attr] = vs_info->size;
506 draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
507 draw.vertex_fetch.dirty = 1;
508 }
509
510
511 static void
512 cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
513 {
514 mfc_get(attribute_fetch_code_buffer,
515 (unsigned int) code->base, /* src */
516 code->size,
517 TAG_BATCH_BUFFER,
518 0, /* tid */
519 0 /* rid */);
520 wait_on_mask(1 << TAG_BATCH_BUFFER);
521
522 draw.vertex_fetch.code = attribute_fetch_code_buffer;
523 }
524
525
526 static void
527 cmd_finish(void)
528 {
529 D_PRINTF(CELL_DEBUG_CMD, "FINISH\n");
530 really_clear_tiles(0);
531 /* wait for all outstanding DMAs to finish */
532 mfc_write_tag_mask(~0);
533 mfc_read_tag_status_all();
534 /* send mbox message to PPU */
535 spu_write_out_mbox(CELL_CMD_FINISH);
536 }
537
538
539 /**
540 * Execute a batch of commands which was sent to us by the PPU.
541 * See the cell_emit_state.c code to see where the commands come from.
542 *
543 * The opcode param encodes the location of the buffer and its size.
544 */
545 static void
546 cmd_batch(uint opcode)
547 {
548 const uint buf = (opcode >> 8) & 0xff;
549 uint size = (opcode >> 16);
550 uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
551 const unsigned usize = size / sizeof(buffer[0]);
552 uint pos;
553
554 D_PRINTF(CELL_DEBUG_CMD, "BATCH buffer %u, len %u, from %p\n",
555 buf, size, spu.init.buffers[buf]);
556
557 ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
558
559 ASSERT_ALIGN16(spu.init.buffers[buf]);
560
561 size = ROUNDUP16(size);
562
563 ASSERT_ALIGN16(spu.init.buffers[buf]);
564
565 mfc_get(buffer, /* dest */
566 (unsigned int) spu.init.buffers[buf], /* src */
567 size,
568 TAG_BATCH_BUFFER,
569 0, /* tid */
570 0 /* rid */);
571 wait_on_mask(1 << TAG_BATCH_BUFFER);
572
573 /* Tell PPU we're done copying the buffer to local store */
574 D_PRINTF(CELL_DEBUG_CMD, "release batch buf %u\n", buf);
575 release_buffer(buf);
576
577 /*
578 * Loop over commands in the batch buffer
579 */
580 for (pos = 0; pos < usize; /* no incr */) {
581 switch (buffer[pos]) {
582 /*
583 * rendering commands
584 */
585 case CELL_CMD_CLEAR_SURFACE:
586 {
587 struct cell_command_clear_surface *clr
588 = (struct cell_command_clear_surface *) &buffer[pos];
589 cmd_clear_surface(clr);
590 pos += sizeof(*clr) / 8;
591 }
592 break;
593 case CELL_CMD_RENDER:
594 {
595 struct cell_command_render *render
596 = (struct cell_command_render *) &buffer[pos];
597 uint pos_incr;
598 cmd_render(render, &pos_incr);
599 pos += pos_incr;
600 }
601 break;
602 /*
603 * state-update commands
604 */
605 case CELL_CMD_STATE_FRAMEBUFFER:
606 {
607 struct cell_command_framebuffer *fb
608 = (struct cell_command_framebuffer *) &buffer[pos];
609 cmd_state_framebuffer(fb);
610 pos += sizeof(*fb) / 8;
611 }
612 break;
613 case CELL_CMD_STATE_FRAGMENT_OPS:
614 {
615 struct cell_command_fragment_ops *fops
616 = (struct cell_command_fragment_ops *) &buffer[pos];
617 cmd_state_fragment_ops(fops);
618 /* This is a variant-sized command */
619 pos += (sizeof(*fops) + fops->total_code_size)/ 8;
620 }
621 break;
622 case CELL_CMD_STATE_FRAGMENT_PROGRAM:
623 {
624 struct cell_command_fragment_program *fp
625 = (struct cell_command_fragment_program *) &buffer[pos];
626 cmd_state_fragment_program(fp);
627 pos += sizeof(*fp) / 8;
628 }
629 break;
630 case CELL_CMD_STATE_FS_CONSTANTS:
631 pos = cmd_state_fs_constants(buffer, pos);
632 break;
633 case CELL_CMD_STATE_RASTERIZER:
634 {
635 struct cell_command_rasterizer *rast =
636 (struct cell_command_rasterizer *) &buffer[pos];
637 spu.rasterizer = rast->rasterizer;
638 pos += sizeof(*rast) / 8;
639 }
640 break;
641 case CELL_CMD_STATE_SAMPLER:
642 {
643 struct cell_command_sampler *sampler
644 = (struct cell_command_sampler *) &buffer[pos];
645 cmd_state_sampler(sampler);
646 pos += sizeof(*sampler) / 8;
647 }
648 break;
649 case CELL_CMD_STATE_TEXTURE:
650 {
651 struct cell_command_texture *texture
652 = (struct cell_command_texture *) &buffer[pos];
653 cmd_state_texture(texture);
654 pos += sizeof(*texture) / 8;
655 }
656 break;
657 case CELL_CMD_STATE_VERTEX_INFO:
658 cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
659 pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
660 break;
661 case CELL_CMD_STATE_VIEWPORT:
662 (void) memcpy(& draw.viewport, &buffer[pos+1],
663 sizeof(struct pipe_viewport_state));
664 pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
665 break;
666 case CELL_CMD_STATE_UNIFORMS:
667 draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
668 pos += 2;
669 break;
670 case CELL_CMD_STATE_VS_ARRAY_INFO:
671 cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
672 pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
673 break;
674 case CELL_CMD_STATE_BIND_VS:
675 #if 0
676 spu_bind_vertex_shader(&draw,
677 (struct cell_shader_info *) &buffer[pos+1]);
678 #endif
679 pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
680 break;
681 case CELL_CMD_STATE_ATTRIB_FETCH:
682 cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
683 &buffer[pos+1]);
684 pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
685 break;
686 /*
687 * misc commands
688 */
689 case CELL_CMD_FINISH:
690 cmd_finish();
691 pos += 1;
692 break;
693 case CELL_CMD_FENCE:
694 {
695 struct cell_command_fence *fence_cmd =
696 (struct cell_command_fence *) &buffer[pos];
697 cmd_fence(fence_cmd);
698 pos += sizeof(*fence_cmd) / 8;
699 }
700 break;
701 case CELL_CMD_RELEASE_VERTS:
702 {
703 struct cell_command_release_verts *release
704 = (struct cell_command_release_verts *) &buffer[pos];
705 cmd_release_verts(release);
706 pos += sizeof(*release) / 8;
707 }
708 break;
709 case CELL_CMD_FLUSH_BUFFER_RANGE: {
710 struct cell_buffer_range *br = (struct cell_buffer_range *)
711 &buffer[pos+1];
712
713 spu_dcache_mark_dirty((unsigned) br->base, br->size);
714 pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8);
715 break;
716 }
717 default:
718 printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
719 ASSERT(0);
720 break;
721 }
722 }
723
724 D_PRINTF(CELL_DEBUG_CMD, "BATCH complete\n");
725 }
726
727
728 #define PERF 0
729
730
731 /**
732 * Main loop for SPEs: Get a command, execute it, repeat.
733 */
734 void
735 command_loop(void)
736 {
737 int exitFlag = 0;
738 uint t0, t1;
739
740 D_PRINTF(CELL_DEBUG_CMD, "Enter command loop\n");
741
742 while (!exitFlag) {
743 unsigned opcode;
744
745 D_PRINTF(CELL_DEBUG_CMD, "Wait for cmd...\n");
746
747 if (PERF)
748 spu_write_decrementer(~0);
749
750 /* read/wait from mailbox */
751 opcode = (unsigned int) spu_read_in_mbox();
752 D_PRINTF(CELL_DEBUG_CMD, "got cmd 0x%x\n", opcode);
753
754 if (PERF)
755 t0 = spu_read_decrementer();
756
757 switch (opcode & CELL_CMD_OPCODE_MASK) {
758 case CELL_CMD_EXIT:
759 D_PRINTF(CELL_DEBUG_CMD, "EXIT\n");
760 exitFlag = 1;
761 break;
762 case CELL_CMD_VS_EXECUTE:
763 #if 0
764 spu_execute_vertex_shader(&draw, &cmd.vs);
765 #endif
766 break;
767 case CELL_CMD_BATCH:
768 cmd_batch(opcode);
769 break;
770 default:
771 printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
772 }
773
774 if (PERF) {
775 t1 = spu_read_decrementer();
776 printf("wait mbox time: %gms batch time: %gms\n",
777 (~0u - t0) * spu.init.inv_timebase,
778 (t0 - t1) * spu.init.inv_timebase);
779 }
780 }
781
782 D_PRINTF(CELL_DEBUG_CMD, "Exit command loop\n");
783
784 if (spu.init.debug_flags & CELL_DEBUG_CACHE)
785 spu_dcache_report();
786 }
787
788 /* Initialize this module; we manage the fragment ops buffer here. */
789 void
790 spu_command_init(void)
791 {
792 /* Install default/fallback fragment processing function.
793 * This will normally be overriden by a code-gen'd function
794 * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set.
795 */
796 spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops;
797 spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops;
798
799 /* Set up the basic empty buffer for code-gen'ed fragment ops */
800 spu.fragment_ops_code = NULL;
801 spu.fragment_ops_code_size = 0;
802 }
803
804 void
805 spu_command_close(void)
806 {
807 /* Deallocate the code-gen buffer for fragment ops, and reset the
808 * fragment ops functions to their initial setting (just to leave
809 * things in a good state).
810 */
811 if (spu.fragment_ops_code != NULL) {
812 free(spu.fragment_ops_code);
813 }
814 spu_command_init();
815 }