cell: initial work for mipmap texture filtering
[mesa.git] / src / gallium / drivers / cell / spu / spu_command.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 /**
30 * SPU command processing code
31 */
32
33
34 #include <stdio.h>
35 #include <libmisc.h>
36
37 #include "pipe/p_defines.h"
38
39 #include "spu_command.h"
40 #include "spu_main.h"
41 #include "spu_render.h"
42 #include "spu_per_fragment_op.h"
43 #include "spu_texture.h"
44 #include "spu_tile.h"
45 #include "spu_vertex_shader.h"
46 #include "spu_dcache.h"
47 #include "spu_debug.h"
48 #include "cell/common.h"
49
50
51 struct spu_vs_context draw;
52
53
54 /**
55 * Buffers containing dynamically generated SPU code:
56 */
57 static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS]
58 ALIGN16_ATTRIB;
59
60
61
62 /**
63 * Tell the PPU that this SPU has finished copying a buffer to
64 * local store and that it may be reused by the PPU.
65 * This is done by writting a 16-byte batch-buffer-status block back into
66 * main memory (in cell_context->buffer_status[]).
67 */
68 static void
69 release_buffer(uint buffer)
70 {
71 /* Evidently, using less than a 16-byte status doesn't work reliably */
72 static const uint status[4] ALIGN16_ATTRIB
73 = {CELL_BUFFER_STATUS_FREE, 0, 0, 0};
74
75 const uint index = 4 * (spu.init.id * CELL_NUM_BUFFERS + buffer);
76 uint *dst = spu.init.buffer_status + index;
77
78 ASSERT(buffer < CELL_NUM_BUFFERS);
79
80 mfc_put((void *) &status, /* src in local memory */
81 (unsigned int) dst, /* dst in main memory */
82 sizeof(status), /* size */
83 TAG_MISC, /* tag is unimportant */
84 0, /* tid */
85 0 /* rid */);
86 }
87
88
89 static void
90 cmd_clear_surface(const struct cell_command_clear_surface *clear)
91 {
92 DEBUG_PRINTF("CLEAR SURF %u to 0x%08x\n", clear->surface, clear->value);
93
94 if (clear->surface == 0) {
95 spu.fb.color_clear_value = clear->value;
96 if (spu.init.debug_flags & CELL_DEBUG_CHECKER) {
97 uint x = (spu.init.id << 4) | (spu.init.id << 12) |
98 (spu.init.id << 20) | (spu.init.id << 28);
99 spu.fb.color_clear_value ^= x;
100 }
101 }
102 else {
103 spu.fb.depth_clear_value = clear->value;
104 }
105
106 #define CLEAR_OPT 1
107 #if CLEAR_OPT
108
109 /* Simply set all tiles' status to CLEAR.
110 * When we actually begin rendering into a tile, we'll initialize it to
111 * the clear value. If any tiles go untouched during the frame,
112 * really_clear_tiles() will set them to the clear value.
113 */
114 if (clear->surface == 0) {
115 memset(spu.ctile_status, TILE_STATUS_CLEAR, sizeof(spu.ctile_status));
116 }
117 else {
118 memset(spu.ztile_status, TILE_STATUS_CLEAR, sizeof(spu.ztile_status));
119 }
120
121 #else
122
123 /*
124 * This path clears the whole framebuffer to the clear color right now.
125 */
126
127 /*
128 printf("SPU: %s num=%d w=%d h=%d\n",
129 __FUNCTION__, num_tiles, spu.fb.width_tiles, spu.fb.height_tiles);
130 */
131
132 /* init a single tile to the clear value */
133 if (clear->surface == 0) {
134 clear_c_tile(&spu.ctile);
135 }
136 else {
137 clear_z_tile(&spu.ztile);
138 }
139
140 /* walk over my tiles, writing the 'clear' tile's data */
141 {
142 const uint num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
143 uint i;
144 for (i = spu.init.id; i < num_tiles; i += spu.init.num_spus) {
145 uint tx = i % spu.fb.width_tiles;
146 uint ty = i / spu.fb.width_tiles;
147 if (clear->surface == 0)
148 put_tile(tx, ty, &spu.ctile, TAG_SURFACE_CLEAR, 0);
149 else
150 put_tile(tx, ty, &spu.ztile, TAG_SURFACE_CLEAR, 1);
151 }
152 }
153
154 if (spu.init.debug_flags & CELL_DEBUG_SYNC) {
155 wait_on_mask(1 << TAG_SURFACE_CLEAR);
156 }
157
158 #endif /* CLEAR_OPT */
159
160 DEBUG_PRINTF("CLEAR SURF done\n");
161 }
162
163
164 static void
165 cmd_release_verts(const struct cell_command_release_verts *release)
166 {
167 DEBUG_PRINTF("RELEASE VERTS %u\n", release->vertex_buf);
168 ASSERT(release->vertex_buf != ~0U);
169 release_buffer(release->vertex_buf);
170 }
171
172
173 /**
174 * Process a CELL_CMD_STATE_FRAGMENT_OPS command.
175 * This involves installing new fragment ops SPU code.
176 * If this function is never called, we'll use a regular C fallback function
177 * for fragment processing.
178 */
179 static void
180 cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops)
181 {
182 static int warned = 0;
183
184 DEBUG_PRINTF("CMD_STATE_FRAGMENT_OPS\n");
185 /* Copy SPU code from batch buffer to spu buffer */
186 memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4);
187 /* Copy state info (for fallback case only) */
188 memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa));
189 memcpy(&spu.blend, &fops->blend, sizeof(fops->blend));
190
191 /* Parity twist! For now, always use the fallback code by default,
192 * only switching to codegen when specifically requested. This
193 * allows us to develop freely without risking taking down the
194 * branch.
195 *
196 * Later, the parity of this check will be reversed, so that
197 * codegen is *always* used, unless we specifically indicate that
198 * we don't want it.
199 *
200 * Eventually, the option will be removed completely, because in
201 * final code we'll always use codegen and won't even provide the
202 * raw state records that the fallback code requires.
203 */
204 if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) {
205 spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code;
206 }
207 else {
208 /* otherwise, the default fallback code remains in place */
209 if (!warned) {
210 fprintf(stderr, "Cell Warning: using fallback per-fragment code\n");
211 warned = 1;
212 }
213 }
214
215 spu.read_depth = spu.depth_stencil_alpha.depth.enabled;
216 spu.read_stencil = spu.depth_stencil_alpha.stencil[0].enabled;
217 }
218
219
220 static void
221 cmd_state_fragment_program(const struct cell_command_fragment_program *fp)
222 {
223 DEBUG_PRINTF("CMD_STATE_FRAGMENT_PROGRAM\n");
224 /* Copy SPU code from batch buffer to spu buffer */
225 memcpy(spu.fragment_program_code, fp->code,
226 SPU_MAX_FRAGMENT_PROGRAM_INSTS * 4);
227 #if 01
228 /* Point function pointer at new code */
229 spu.fragment_program = (spu_fragment_program_func)spu.fragment_program_code;
230 #endif
231 }
232
233
234 static uint
235 cmd_state_fs_constants(const uint64_t *buffer, uint pos)
236 {
237 const uint num_const = buffer[pos + 1];
238 const float *constants = (const float *) &buffer[pos + 2];
239 uint i;
240
241 DEBUG_PRINTF("CMD_STATE_FS_CONSTANTS (%u)\n", num_const);
242
243 /* Expand each float to float[4] for SOA execution */
244 for (i = 0; i < num_const; i++) {
245 spu.constants[i] = spu_splats(constants[i]);
246 }
247
248 /* return new buffer pos (in 8-byte words) */
249 return pos + 2 + num_const / 2;
250 }
251
252
253 static void
254 cmd_state_framebuffer(const struct cell_command_framebuffer *cmd)
255 {
256 DEBUG_PRINTF("FRAMEBUFFER: %d x %d at %p, cformat 0x%x zformat 0x%x\n",
257 cmd->width,
258 cmd->height,
259 cmd->color_start,
260 cmd->color_format,
261 cmd->depth_format);
262
263 ASSERT_ALIGN16(cmd->color_start);
264 ASSERT_ALIGN16(cmd->depth_start);
265
266 spu.fb.color_start = cmd->color_start;
267 spu.fb.depth_start = cmd->depth_start;
268 spu.fb.color_format = cmd->color_format;
269 spu.fb.depth_format = cmd->depth_format;
270 spu.fb.width = cmd->width;
271 spu.fb.height = cmd->height;
272 spu.fb.width_tiles = (spu.fb.width + TILE_SIZE - 1) / TILE_SIZE;
273 spu.fb.height_tiles = (spu.fb.height + TILE_SIZE - 1) / TILE_SIZE;
274
275 switch (spu.fb.depth_format) {
276 case PIPE_FORMAT_Z32_UNORM:
277 spu.fb.zsize = 4;
278 spu.fb.zscale = (float) 0xffffffffu;
279 break;
280 case PIPE_FORMAT_Z24S8_UNORM:
281 case PIPE_FORMAT_S8Z24_UNORM:
282 case PIPE_FORMAT_Z24X8_UNORM:
283 case PIPE_FORMAT_X8Z24_UNORM:
284 spu.fb.zsize = 4;
285 spu.fb.zscale = (float) 0x00ffffffu;
286 break;
287 case PIPE_FORMAT_Z16_UNORM:
288 spu.fb.zsize = 2;
289 spu.fb.zscale = (float) 0xffffu;
290 break;
291 default:
292 spu.fb.zsize = 0;
293 break;
294 }
295 }
296
297
298 static void
299 cmd_state_sampler(const struct cell_command_sampler *sampler)
300 {
301 DEBUG_PRINTF("SAMPLER [%u]\n", sampler->unit);
302
303 spu.sampler[sampler->unit] = sampler->state;
304 #if 0
305 if (spu.sampler[sampler->unit].min_mip_filter != PIPE_TEX_MIPFILTER_NONE) {
306 spu.sample_texture4[sampler->unit] = sample_texture4_lod;
307 }
308 else
309 #endif
310 if (spu.sampler[sampler->unit].min_img_filter == PIPE_TEX_FILTER_LINEAR) {
311 spu.sample_texture4[sampler->unit] = sample_texture4_bilinear;
312 }
313 else {
314 spu.sample_texture4[sampler->unit] = sample_texture4_nearest;
315 }
316 }
317
318
319 static void
320 cmd_state_texture(const struct cell_command_texture *texture)
321 {
322 const uint unit = texture->unit;
323 uint i;
324
325 DEBUG_PRINTF("TEXTURE [%u]\n", texture->unit);
326
327 for (i = 0; i < CELL_MAX_TEXTURE_LEVELS; i++) {
328 uint width = texture->width[i];
329 uint height = texture->height[i];
330
331 DEBUG_PRINTF(" LEVEL %u: at %p size[0] %u x %u\n", i,
332 texture->start[i], texture->width[i], texture->height[i]);
333
334 spu.texture[unit].level[i].start = texture->start[i];
335 spu.texture[unit].level[i].width = width;
336 spu.texture[unit].level[i].height = height;
337
338 spu.texture[unit].level[i].tiles_per_row = width / TILE_SIZE;
339
340 spu.texture[unit].level[i].width4 = spu_splats((float) width);
341 spu.texture[unit].level[i].height4 = spu_splats((float) height);
342
343 spu.texture[unit].level[i].tex_size_x_mask = spu_splats(width - 1);
344 spu.texture[unit].level[i].tex_size_y_mask = spu_splats(height - 1);
345 }
346 }
347
348
349 static void
350 cmd_state_vertex_info(const struct vertex_info *vinfo)
351 {
352 DEBUG_PRINTF("VERTEX_INFO num_attribs=%u\n", vinfo->num_attribs);
353 ASSERT(vinfo->num_attribs >= 1);
354 ASSERT(vinfo->num_attribs <= 8);
355 memcpy(&spu.vertex_info, vinfo, sizeof(*vinfo));
356 }
357
358
359 static void
360 cmd_state_vs_array_info(const struct cell_array_info *vs_info)
361 {
362 const unsigned attr = vs_info->attr;
363
364 ASSERT(attr < PIPE_MAX_ATTRIBS);
365 draw.vertex_fetch.src_ptr[attr] = vs_info->base;
366 draw.vertex_fetch.pitch[attr] = vs_info->pitch;
367 draw.vertex_fetch.size[attr] = vs_info->size;
368 draw.vertex_fetch.code_offset[attr] = vs_info->function_offset;
369 draw.vertex_fetch.dirty = 1;
370 }
371
372
373 static void
374 cmd_state_attrib_fetch(const struct cell_attribute_fetch_code *code)
375 {
376 mfc_get(attribute_fetch_code_buffer,
377 (unsigned int) code->base, /* src */
378 code->size,
379 TAG_BATCH_BUFFER,
380 0, /* tid */
381 0 /* rid */);
382 wait_on_mask(1 << TAG_BATCH_BUFFER);
383
384 draw.vertex_fetch.code = attribute_fetch_code_buffer;
385 }
386
387
388 static void
389 cmd_finish(void)
390 {
391 DEBUG_PRINTF("FINISH\n");
392 really_clear_tiles(0);
393 /* wait for all outstanding DMAs to finish */
394 mfc_write_tag_mask(~0);
395 mfc_read_tag_status_all();
396 /* send mbox message to PPU */
397 spu_write_out_mbox(CELL_CMD_FINISH);
398 }
399
400
401 /**
402 * Execute a batch of commands which was sent to us by the PPU.
403 * See the cell_emit_state.c code to see where the commands come from.
404 *
405 * The opcode param encodes the location of the buffer and its size.
406 */
407 static void
408 cmd_batch(uint opcode)
409 {
410 const uint buf = (opcode >> 8) & 0xff;
411 uint size = (opcode >> 16);
412 uint64_t buffer[CELL_BUFFER_SIZE / 8] ALIGN16_ATTRIB;
413 const unsigned usize = size / sizeof(buffer[0]);
414 uint pos;
415
416 DEBUG_PRINTF("BATCH buffer %u, len %u, from %p\n",
417 buf, size, spu.init.buffers[buf]);
418
419 ASSERT((opcode & CELL_CMD_OPCODE_MASK) == CELL_CMD_BATCH);
420
421 ASSERT_ALIGN16(spu.init.buffers[buf]);
422
423 size = ROUNDUP16(size);
424
425 ASSERT_ALIGN16(spu.init.buffers[buf]);
426
427 mfc_get(buffer, /* dest */
428 (unsigned int) spu.init.buffers[buf], /* src */
429 size,
430 TAG_BATCH_BUFFER,
431 0, /* tid */
432 0 /* rid */);
433 wait_on_mask(1 << TAG_BATCH_BUFFER);
434
435 /* Tell PPU we're done copying the buffer to local store */
436 DEBUG_PRINTF("release batch buf %u\n", buf);
437 release_buffer(buf);
438
439 /*
440 * Loop over commands in the batch buffer
441 */
442 for (pos = 0; pos < usize; /* no incr */) {
443 switch (buffer[pos]) {
444 /*
445 * rendering commands
446 */
447 case CELL_CMD_CLEAR_SURFACE:
448 {
449 struct cell_command_clear_surface *clr
450 = (struct cell_command_clear_surface *) &buffer[pos];
451 cmd_clear_surface(clr);
452 pos += sizeof(*clr) / 8;
453 }
454 break;
455 case CELL_CMD_RENDER:
456 {
457 struct cell_command_render *render
458 = (struct cell_command_render *) &buffer[pos];
459 uint pos_incr;
460 cmd_render(render, &pos_incr);
461 pos += pos_incr;
462 }
463 break;
464 /*
465 * state-update commands
466 */
467 case CELL_CMD_STATE_FRAMEBUFFER:
468 {
469 struct cell_command_framebuffer *fb
470 = (struct cell_command_framebuffer *) &buffer[pos];
471 cmd_state_framebuffer(fb);
472 pos += sizeof(*fb) / 8;
473 }
474 break;
475 case CELL_CMD_STATE_FRAGMENT_OPS:
476 {
477 struct cell_command_fragment_ops *fops
478 = (struct cell_command_fragment_ops *) &buffer[pos];
479 cmd_state_fragment_ops(fops);
480 pos += sizeof(*fops) / 8;
481 }
482 break;
483 case CELL_CMD_STATE_FRAGMENT_PROGRAM:
484 {
485 struct cell_command_fragment_program *fp
486 = (struct cell_command_fragment_program *) &buffer[pos];
487 cmd_state_fragment_program(fp);
488 pos += sizeof(*fp) / 8;
489 }
490 break;
491 case CELL_CMD_STATE_FS_CONSTANTS:
492 pos = cmd_state_fs_constants(buffer, pos);
493 break;
494 case CELL_CMD_STATE_SAMPLER:
495 {
496 struct cell_command_sampler *sampler
497 = (struct cell_command_sampler *) &buffer[pos];
498 cmd_state_sampler(sampler);
499 pos += sizeof(*sampler) / 8;
500 }
501 break;
502 case CELL_CMD_STATE_TEXTURE:
503 {
504 struct cell_command_texture *texture
505 = (struct cell_command_texture *) &buffer[pos];
506 cmd_state_texture(texture);
507 pos += sizeof(*texture) / 8;
508 }
509 break;
510 case CELL_CMD_STATE_VERTEX_INFO:
511 cmd_state_vertex_info((struct vertex_info *) &buffer[pos+1]);
512 pos += (1 + ROUNDUP8(sizeof(struct vertex_info)) / 8);
513 break;
514 case CELL_CMD_STATE_VIEWPORT:
515 (void) memcpy(& draw.viewport, &buffer[pos+1],
516 sizeof(struct pipe_viewport_state));
517 pos += (1 + ROUNDUP8(sizeof(struct pipe_viewport_state)) / 8);
518 break;
519 case CELL_CMD_STATE_UNIFORMS:
520 draw.constants = (const float (*)[4]) (uintptr_t) buffer[pos + 1];
521 pos += 2;
522 break;
523 case CELL_CMD_STATE_VS_ARRAY_INFO:
524 cmd_state_vs_array_info((struct cell_array_info *) &buffer[pos+1]);
525 pos += (1 + ROUNDUP8(sizeof(struct cell_array_info)) / 8);
526 break;
527 case CELL_CMD_STATE_BIND_VS:
528 #if 0
529 spu_bind_vertex_shader(&draw,
530 (struct cell_shader_info *) &buffer[pos+1]);
531 #endif
532 pos += (1 + ROUNDUP8(sizeof(struct cell_shader_info)) / 8);
533 break;
534 case CELL_CMD_STATE_ATTRIB_FETCH:
535 cmd_state_attrib_fetch((struct cell_attribute_fetch_code *)
536 &buffer[pos+1]);
537 pos += (1 + ROUNDUP8(sizeof(struct cell_attribute_fetch_code)) / 8);
538 break;
539 /*
540 * misc commands
541 */
542 case CELL_CMD_FINISH:
543 cmd_finish();
544 pos += 1;
545 break;
546 case CELL_CMD_RELEASE_VERTS:
547 {
548 struct cell_command_release_verts *release
549 = (struct cell_command_release_verts *) &buffer[pos];
550 cmd_release_verts(release);
551 pos += sizeof(*release) / 8;
552 }
553 break;
554 case CELL_CMD_FLUSH_BUFFER_RANGE: {
555 struct cell_buffer_range *br = (struct cell_buffer_range *)
556 &buffer[pos+1];
557
558 spu_dcache_mark_dirty((unsigned) br->base, br->size);
559 pos += (1 + ROUNDUP8(sizeof(struct cell_buffer_range)) / 8);
560 break;
561 }
562 default:
563 printf("SPU %u: bad opcode: 0x%llx\n", spu.init.id, buffer[pos]);
564 ASSERT(0);
565 break;
566 }
567 }
568
569 DEBUG_PRINTF("BATCH complete\n");
570 }
571
572
573
574 /**
575 * Main loop for SPEs: Get a command, execute it, repeat.
576 */
577 void
578 command_loop(void)
579 {
580 struct cell_command cmd;
581 int exitFlag = 0;
582
583 DEBUG_PRINTF("Enter command loop\n");
584
585 ASSERT((sizeof(struct cell_command) & 0xf) == 0);
586 ASSERT_ALIGN16(&cmd);
587
588 while (!exitFlag) {
589 unsigned opcode;
590 int tag = 0;
591
592 DEBUG_PRINTF("Wait for cmd...\n");
593
594 /* read/wait from mailbox */
595 opcode = (unsigned int) spu_read_in_mbox();
596
597 DEBUG_PRINTF("got cmd 0x%x\n", opcode);
598
599 /* command payload */
600 mfc_get(&cmd, /* dest */
601 (unsigned int) spu.init.cmd, /* src */
602 sizeof(struct cell_command), /* bytes */
603 tag,
604 0, /* tid */
605 0 /* rid */);
606 wait_on_mask( 1 << tag );
607
608 /*
609 * NOTE: most commands should be contained in a batch buffer
610 */
611
612 switch (opcode & CELL_CMD_OPCODE_MASK) {
613 case CELL_CMD_EXIT:
614 DEBUG_PRINTF("EXIT\n");
615 exitFlag = 1;
616 break;
617 case CELL_CMD_VS_EXECUTE:
618 #if 0
619 spu_execute_vertex_shader(&draw, &cmd.vs);
620 #endif
621 break;
622 case CELL_CMD_BATCH:
623 cmd_batch(opcode);
624 break;
625 default:
626 printf("Bad opcode 0x%x!\n", opcode & CELL_CMD_OPCODE_MASK);
627 }
628
629 }
630
631 DEBUG_PRINTF("Exit command loop\n");
632
633 spu_dcache_report();
634 }