cell: send rasterizer state to SPUs in proper way, remove front_winding hack
[mesa.git] / src / gallium / drivers / cell / spu / spu_render.c
1 /**************************************************************************
2 *
3 * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28
29 #include <stdio.h>
30 #include <libmisc.h>
31 #include <spu_mfcio.h>
32
33 #include "spu_main.h"
34 #include "spu_render.h"
35 #include "spu_tri.h"
36 #include "spu_tile.h"
37 #include "cell/common.h"
38 #include "util/u_memory.h"
39
40
41 /**
42 * Given a rendering command's bounding box (in pixels) compute the
43 * location of the corresponding screen tile bounding box.
44 */
45 static INLINE void
46 tile_bounding_box(const struct cell_command_render *render,
47 uint *txmin, uint *tymin,
48 uint *box_num_tiles, uint *box_width_tiles)
49 {
50 #if 0
51 /* Debug: full-window bounding box */
52 uint txmax = spu.fb.width_tiles - 1;
53 uint tymax = spu.fb.height_tiles - 1;
54 *txmin = 0;
55 *tymin = 0;
56 *box_num_tiles = spu.fb.width_tiles * spu.fb.height_tiles;
57 *box_width_tiles = spu.fb.width_tiles;
58 (void) render;
59 (void) txmax;
60 (void) tymax;
61 #else
62 uint txmax, tymax, box_height_tiles;
63
64 *txmin = (uint) render->xmin / TILE_SIZE;
65 *tymin = (uint) render->ymin / TILE_SIZE;
66 txmax = (uint) render->xmax / TILE_SIZE;
67 tymax = (uint) render->ymax / TILE_SIZE;
68 if (txmax >= spu.fb.width_tiles)
69 txmax = spu.fb.width_tiles-1;
70 if (tymax >= spu.fb.height_tiles)
71 tymax = spu.fb.height_tiles-1;
72 *box_width_tiles = txmax - *txmin + 1;
73 box_height_tiles = tymax - *tymin + 1;
74 *box_num_tiles = *box_width_tiles * box_height_tiles;
75 #endif
76 #if 0
77 printf("SPU %u: bounds: %g, %g ... %g, %g\n", spu.init.id,
78 render->xmin, render->ymin, render->xmax, render->ymax);
79 printf("SPU %u: tiles: %u, %u .. %u, %u\n",
80 spu.init.id, *txmin, *tymin, txmax, tymax);
81 ASSERT(render->xmin <= render->xmax);
82 ASSERT(render->ymin <= render->ymax);
83 #endif
84 }
85
86
87 /** Check if the tile at (tx,ty) belongs to this SPU */
88 static INLINE boolean
89 my_tile(uint tx, uint ty)
90 {
91 return (spu.fb.width_tiles * ty + tx) % spu.init.num_spus == spu.init.id;
92 }
93
94
95 /**
96 * Start fetching non-clear color/Z tiles from main memory
97 */
98 static INLINE void
99 get_cz_tiles(uint tx, uint ty)
100 {
101 if (spu.read_depth) {
102 if (spu.cur_ztile_status != TILE_STATUS_CLEAR) {
103 //printf("SPU %u: getting Z tile %u, %u\n", spu.init.id, tx, ty);
104 get_tile(tx, ty, &spu.ztile, TAG_READ_TILE_Z, 1);
105 spu.cur_ztile_status = TILE_STATUS_GETTING;
106 }
107 }
108
109 if (spu.cur_ctile_status != TILE_STATUS_CLEAR) {
110 //printf("SPU %u: getting C tile %u, %u\n", spu.init.id, tx, ty);
111 get_tile(tx, ty, &spu.ctile, TAG_READ_TILE_COLOR, 0);
112 spu.cur_ctile_status = TILE_STATUS_GETTING;
113 }
114 }
115
116
117 /**
118 * Start putting dirty color/Z tiles back to main memory
119 */
120 static INLINE void
121 put_cz_tiles(uint tx, uint ty)
122 {
123 if (spu.cur_ztile_status == TILE_STATUS_DIRTY) {
124 /* tile was modified and needs to be written back */
125 //printf("SPU %u: put dirty Z tile %u, %u\n", spu.init.id, tx, ty);
126 put_tile(tx, ty, &spu.ztile, TAG_WRITE_TILE_Z, 1);
127 spu.cur_ztile_status = TILE_STATUS_DEFINED;
128 }
129 else if (spu.cur_ztile_status == TILE_STATUS_GETTING) {
130 /* tile was never used */
131 spu.cur_ztile_status = TILE_STATUS_DEFINED;
132 //printf("SPU %u: put getting Z tile %u, %u\n", spu.init.id, tx, ty);
133 }
134
135 if (spu.cur_ctile_status == TILE_STATUS_DIRTY) {
136 /* tile was modified and needs to be written back */
137 //printf("SPU %u: put dirty C tile %u, %u\n", spu.init.id, tx, ty);
138 put_tile(tx, ty, &spu.ctile, TAG_WRITE_TILE_COLOR, 0);
139 spu.cur_ctile_status = TILE_STATUS_DEFINED;
140 }
141 else if (spu.cur_ctile_status == TILE_STATUS_GETTING) {
142 /* tile was never used */
143 spu.cur_ctile_status = TILE_STATUS_DEFINED;
144 //printf("SPU %u: put getting C tile %u, %u\n", spu.init.id, tx, ty);
145 }
146 }
147
148
149 /**
150 * Wait for 'put' of color/z tiles to complete.
151 */
152 static INLINE void
153 wait_put_cz_tiles(void)
154 {
155 wait_on_mask(1 << TAG_WRITE_TILE_COLOR);
156 if (spu.read_depth) {
157 wait_on_mask(1 << TAG_WRITE_TILE_Z);
158 }
159 }
160
161
162 /**
163 * Render primitives
164 * \param pos_incr returns value indicating how may words to skip after
165 * this command in the batch buffer
166 */
167 void
168 cmd_render(const struct cell_command_render *render, uint *pos_incr)
169 {
170 /* we'll DMA into these buffers */
171 ubyte vertex_data[CELL_BUFFER_SIZE] ALIGN16_ATTRIB;
172 const uint vertex_size = render->vertex_size; /* in bytes */
173 /*const*/ uint total_vertex_bytes = render->num_verts * vertex_size;
174 uint index_bytes;
175 const ubyte *vertices;
176 const ushort *indexes;
177 uint i, j;
178
179
180 #if 0
181 printf("SPU %u: RENDER prim %u, num_vert=%u num_ind=%u "
182 "inline_vert=%u\n",
183 spu.init.id,
184 render->prim_type,
185 render->num_verts,
186 render->num_indexes,
187 render->inline_verts);
188
189 /*
190 printf(" bound: %g, %g .. %g, %g\n",
191 render->xmin, render->ymin, render->xmax, render->ymax);
192 */
193 #endif
194
195 ASSERT(sizeof(*render) % 4 == 0);
196 ASSERT(total_vertex_bytes % 16 == 0);
197 ASSERT(render->prim_type == PIPE_PRIM_TRIANGLES);
198 ASSERT(render->num_indexes % 3 == 0);
199
200
201 /* indexes are right after the render command in the batch buffer */
202 indexes = (const ushort *) (render + 1);
203 index_bytes = ROUNDUP8(render->num_indexes * 2);
204 *pos_incr = index_bytes / 8 + sizeof(*render) / 8;
205
206
207 if (render->inline_verts) {
208 /* Vertices are after indexes in batch buffer at next 16-byte addr */
209 vertices = (const ubyte *) render + (*pos_incr * 8);
210 vertices = (const ubyte *) align_pointer((void *) vertices, 16);
211 ASSERT_ALIGN16(vertices);
212 *pos_incr = ((vertices + total_vertex_bytes) - (ubyte *) render) / 8;
213 }
214 else {
215 /* Begin DMA fetch of vertex buffer */
216 ubyte *src = spu.init.buffers[render->vertex_buf];
217 ubyte *dest = vertex_data;
218
219 /* skip vertex data we won't use */
220 #if 01
221 src += render->min_index * vertex_size;
222 dest += render->min_index * vertex_size;
223 total_vertex_bytes -= render->min_index * vertex_size;
224 #endif
225 ASSERT(total_vertex_bytes % 16 == 0);
226 ASSERT_ALIGN16(dest);
227 ASSERT_ALIGN16(src);
228
229 mfc_get(dest, /* in vertex_data[] array */
230 (unsigned int) src, /* src in main memory */
231 total_vertex_bytes, /* size */
232 TAG_VERTEX_BUFFER,
233 0, /* tid */
234 0 /* rid */);
235
236 vertices = vertex_data;
237
238 wait_on_mask(1 << TAG_VERTEX_BUFFER);
239 }
240
241
242 /**
243 ** find tiles which intersect the prim bounding box
244 **/
245 uint txmin, tymin, box_width_tiles, box_num_tiles;
246 tile_bounding_box(render, &txmin, &tymin,
247 &box_num_tiles, &box_width_tiles);
248
249
250 /* make sure any pending clears have completed */
251 wait_on_mask(1 << TAG_SURFACE_CLEAR); /* XXX temporary */
252
253
254 /**
255 ** loop over tiles, rendering tris
256 **/
257 for (i = 0; i < box_num_tiles; i++) {
258 const uint tx = txmin + i % box_width_tiles;
259 const uint ty = tymin + i / box_width_tiles;
260
261 ASSERT(tx < spu.fb.width_tiles);
262 ASSERT(ty < spu.fb.height_tiles);
263
264 if (!my_tile(tx, ty))
265 continue;
266
267 spu.cur_ctile_status = spu.ctile_status[ty][tx];
268 spu.cur_ztile_status = spu.ztile_status[ty][tx];
269
270 get_cz_tiles(tx, ty);
271
272 uint drawn = 0;
273
274 /* loop over tris */
275 for (j = 0; j < render->num_indexes; j += 3) {
276 const float *v0, *v1, *v2;
277
278 v0 = (const float *) (vertices + indexes[j+0] * vertex_size);
279 v1 = (const float *) (vertices + indexes[j+1] * vertex_size);
280 v2 = (const float *) (vertices + indexes[j+2] * vertex_size);
281
282 drawn += tri_draw(v0, v1, v2, tx, ty);
283 }
284
285 //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3);
286
287 /* write color/z tiles back to main framebuffer, if dirtied */
288 put_cz_tiles(tx, ty);
289
290 wait_put_cz_tiles(); /* XXX seems unnecessary... */
291
292 spu.ctile_status[ty][tx] = spu.cur_ctile_status;
293 spu.ztile_status[ty][tx] = spu.cur_ztile_status;
294 }
295
296 #if 0
297 printf("SPU %u: RENDER done\n",
298 spu.init.id);
299 #endif
300 }