st/nine: Improve fallback when driver doesn't support user buffers.
[mesa.git] / src / gallium / state_trackers / nine / nine_ff.c
1
2 /* FF is big and ugly so feel free to write lines as long as you like.
3 * Aieeeeeeeee !
4 *
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7 */
8
9 #include "device9.h"
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
14 #include "nine_ff.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
19
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
25 #include "util/u_upload_mgr.h"
26
27 #define NINE_TGSI_LAZY_DEVS 1
28
29 #define DBG_CHANNEL DBG_FF
30
31 #define NINE_FF_NUM_VS_CONST 256
32 #define NINE_FF_NUM_PS_CONST 24
33
34 #define NINED3DTSS_TCI_DISABLE 0
35 #define NINED3DTSS_TCI_PASSTHRU 1
36 #define NINED3DTSS_TCI_CAMERASPACENORMAL 2
37 #define NINED3DTSS_TCI_CAMERASPACEPOSITION 3
38 #define NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR 4
39 #define NINED3DTSS_TCI_SPHEREMAP 5
40
41 struct fvec4
42 {
43 float x, y, z, w;
44 };
45
46 struct nine_ff_vs_key
47 {
48 union {
49 struct {
50 uint32_t position_t : 1;
51 uint32_t lighting : 1;
52 uint32_t darkness : 1; /* lighting enabled but no active lights */
53 uint32_t localviewer : 1;
54 uint32_t vertexpointsize : 1;
55 uint32_t pointscale : 1;
56 uint32_t vertexblend : 3;
57 uint32_t vertexblend_indexed : 1;
58 uint32_t vertextween : 1;
59 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
60 uint32_t mtl_ambient : 2;
61 uint32_t mtl_specular : 2;
62 uint32_t mtl_emissive : 2;
63 uint32_t fog_mode : 2;
64 uint32_t fog_range : 1;
65 uint32_t color0in_one : 1;
66 uint32_t color1in_one : 1;
67 uint32_t pad1 : 8;
68 uint32_t tc_gen : 24; /* 8 * 3 bits */
69 uint32_t pad2 : 8;
70 uint32_t tc_idx : 24;
71 uint32_t pad3 : 8;
72 uint32_t tc_dim : 24; /* 8 * 3 bits */
73 uint32_t pad4 : 8;
74 };
75 uint64_t value64[2]; /* don't forget to resize VertexShader9.ff_key */
76 uint32_t value32[4];
77 };
78 };
79
80 /* Texture stage state:
81 *
82 * COLOROP D3DTOP 5 bit
83 * ALPHAOP D3DTOP 5 bit
84 * COLORARG0 D3DTA 3 bit
85 * COLORARG1 D3DTA 3 bit
86 * COLORARG2 D3DTA 3 bit
87 * ALPHAARG0 D3DTA 3 bit
88 * ALPHAARG1 D3DTA 3 bit
89 * ALPHAARG2 D3DTA 3 bit
90 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
91 * TEXCOORDINDEX 0 - 7 3 bit
92 * ===========================
93 * 32 bit per stage
94 */
95 struct nine_ff_ps_key
96 {
97 union {
98 struct {
99 struct {
100 uint32_t colorop : 5;
101 uint32_t alphaop : 5;
102 uint32_t colorarg0 : 3;
103 uint32_t colorarg1 : 3;
104 uint32_t colorarg2 : 3;
105 uint32_t alphaarg0 : 3;
106 uint32_t alphaarg1 : 3;
107 uint32_t alphaarg2 : 3;
108 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
109 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
110 uint32_t projected : 1;
111 /* that's 32 bit exactly */
112 } ts[8];
113 uint32_t fog : 1; /* for vFog with programmable VS */
114 uint32_t fog_mode : 2;
115 uint32_t specular : 1; /* 9 32-bit words with this */
116 uint8_t colorarg_b4[3];
117 uint8_t colorarg_b5[3];
118 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
119 };
120 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
121 uint32_t value32[12];
122 };
123 };
124
125 static unsigned nine_ff_vs_key_hash(void *key)
126 {
127 struct nine_ff_vs_key *vs = key;
128 unsigned i;
129 uint32_t hash = vs->value32[0];
130 for (i = 1; i < Elements(vs->value32); ++i)
131 hash ^= vs->value32[i];
132 return hash;
133 }
134 static int nine_ff_vs_key_comp(void *key1, void *key2)
135 {
136 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
137 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
138
139 return memcmp(a->value64, b->value64, sizeof(a->value64));
140 }
141 static unsigned nine_ff_ps_key_hash(void *key)
142 {
143 struct nine_ff_ps_key *ps = key;
144 unsigned i;
145 uint32_t hash = ps->value32[0];
146 for (i = 1; i < Elements(ps->value32); ++i)
147 hash ^= ps->value32[i];
148 return hash;
149 }
150 static int nine_ff_ps_key_comp(void *key1, void *key2)
151 {
152 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
153 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
154
155 return memcmp(a->value64, b->value64, sizeof(a->value64));
156 }
157 static unsigned nine_ff_fvf_key_hash(void *key)
158 {
159 return *(DWORD *)key;
160 }
161 static int nine_ff_fvf_key_comp(void *key1, void *key2)
162 {
163 return *(DWORD *)key1 != *(DWORD *)key2;
164 }
165
166 static void nine_ff_prune_vs(struct NineDevice9 *);
167 static void nine_ff_prune_ps(struct NineDevice9 *);
168
169 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
170 {
171 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
172 unsigned count;
173 const struct tgsi_token *toks = ureg_get_tokens(ureg, &count);
174 tgsi_dump(toks, 0);
175 ureg_free_tokens(toks);
176 }
177 }
178
179 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
180 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
181 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
182 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
183
184 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
185 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
186 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
187 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
188
189 #define _XYZW(r) (r)
190
191 /* AL should contain base address of lights table. */
192 #define LIGHT_CONST(i) \
193 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
194
195 #define MATERIAL_CONST(i) \
196 ureg_DECL_constant(ureg, 19 + (i))
197
198 #define _CONST(n) ureg_DECL_constant(ureg, n)
199
200 /* VS FF constants layout:
201 *
202 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
203 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
204 * CONST[ 8..11] D3DTS_VIEW * D3DTS_PROJECTION
205 * CONST[12..15] D3DTS_VIEW
206 * CONST[16..18] Normal matrix
207 *
208 * CONST[19] MATERIAL.Emissive + Material.Ambient * RS.Ambient
209 * CONST[20] MATERIAL.Diffuse
210 * CONST[21] MATERIAL.Ambient
211 * CONST[22] MATERIAL.Specular
212 * CONST[23].x___ MATERIAL.Power
213 * CONST[24] MATERIAL.Emissive
214 * CONST[25] RS.Ambient
215 *
216 * CONST[26].x___ RS.PointSizeMin
217 * CONST[26]._y__ RS.PointSizeMax
218 * CONST[26].__z_ RS.PointSize
219 * CONST[26].___w RS.PointScaleA
220 * CONST[27].x___ RS.PointScaleB
221 * CONST[27]._y__ RS.PointScaleC
222 *
223 * CONST[28].x___ RS.FogEnd
224 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
225 * CONST[28].__z_ RS.FogDensity
226 * CONST[29] RS.FogColor
227
228 * CONST[30].x___ TWEENFACTOR
229 *
230 * CONST[32].x___ LIGHT[0].Type
231 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
232 * CONST[33] LIGHT[0].Diffuse
233 * CONST[34] LIGHT[0].Specular
234 * CONST[35] LIGHT[0].Ambient
235 * CONST[36].xyz_ LIGHT[0].Position
236 * CONST[36].___w LIGHT[0].Range
237 * CONST[37].xyz_ LIGHT[0].Direction
238 * CONST[37].___w LIGHT[0].Falloff
239 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
240 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
241 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
242 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
243 * CONST[39].___w 1 if this is the last active light, 0 if not
244 * CONST[40] LIGHT[1]
245 * CONST[48] LIGHT[2]
246 * CONST[56] LIGHT[3]
247 * CONST[64] LIGHT[4]
248 * CONST[72] LIGHT[5]
249 * CONST[80] LIGHT[6]
250 * CONST[88] LIGHT[7]
251 * NOTE: no lighting code is generated if there are no active lights
252 *
253 * CONST[100].x___ Viewport 2/width
254 * CONST[100]._y__ Viewport 2/height
255 * CONST[100].__z_ Viewport 1/(zmax - zmin)
256 * CONST[101].x___ Viewport x0
257 * CONST[101]._y__ Viewport y0
258 * CONST[101].__z_ Viewport z0
259 *
260 * CONST[128..131] D3DTS_TEXTURE0
261 * CONST[132..135] D3DTS_TEXTURE1
262 * CONST[136..139] D3DTS_TEXTURE2
263 * CONST[140..143] D3DTS_TEXTURE3
264 * CONST[144..147] D3DTS_TEXTURE4
265 * CONST[148..151] D3DTS_TEXTURE5
266 * CONST[152..155] D3DTS_TEXTURE6
267 * CONST[156..159] D3DTS_TEXTURE7
268 *
269 * CONST[224] D3DTS_WORLDMATRIX[0]
270 * CONST[228] D3DTS_WORLDMATRIX[1]
271 * ...
272 * CONST[252] D3DTS_WORLDMATRIX[7]
273 */
274 struct vs_build_ctx
275 {
276 struct ureg_program *ureg;
277 const struct nine_ff_vs_key *key;
278
279 uint16_t input[PIPE_MAX_ATTRIBS];
280 unsigned num_inputs;
281
282 struct ureg_src aVtx;
283 struct ureg_src aNrm;
284 struct ureg_src aCol[2];
285 struct ureg_src aTex[8];
286 struct ureg_src aPsz;
287 struct ureg_src aInd;
288 struct ureg_src aWgt;
289
290 struct ureg_src aVtx1; /* tweening */
291 struct ureg_src aNrm1;
292
293 struct ureg_src mtlA;
294 struct ureg_src mtlD;
295 struct ureg_src mtlS;
296 struct ureg_src mtlE;
297 };
298
299 static inline unsigned
300 get_texcoord_sn(struct pipe_screen *screen)
301 {
302 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
303 return TGSI_SEMANTIC_TEXCOORD;
304 return TGSI_SEMANTIC_GENERIC;
305 }
306
307 static inline struct ureg_src
308 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
309 {
310 const unsigned i = vs->num_inputs++;
311 assert(i < PIPE_MAX_ATTRIBS);
312 vs->input[i] = ndecl;
313 return ureg_DECL_vs_input(vs->ureg, i);
314 }
315
316 /* NOTE: dst may alias src */
317 static inline void
318 ureg_normalize3(struct ureg_program *ureg,
319 struct ureg_dst dst, struct ureg_src src,
320 struct ureg_dst tmp)
321 {
322 #ifdef NINE_TGSI_LAZY_DEVS
323 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
324
325 ureg_DP3(ureg, tmp_x, src, src);
326 ureg_RSQ(ureg, tmp_x, _X(tmp));
327 ureg_MUL(ureg, dst, src, _X(tmp));
328 #else
329 ureg_NRM(ureg, dst, src);
330 #endif
331 }
332
333 static void *
334 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
335 {
336 const struct nine_ff_vs_key *key = vs->key;
337 struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
338 struct ureg_dst oPos, oCol[2], oTex[8], oPsz, oFog;
339 struct ureg_dst rCol[2]; /* oCol if no fog, TEMP otherwise */
340 struct ureg_dst rVtx, rNrm;
341 struct ureg_dst r[8];
342 struct ureg_dst AR;
343 struct ureg_dst tmp, tmp_x, tmp_z;
344 unsigned i, c;
345 unsigned label[32], l = 0;
346 unsigned num_r = 8;
347 boolean need_rNrm = key->lighting || key->pointscale;
348 boolean need_rVtx = key->lighting || key->fog_mode;
349 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
350
351 vs->ureg = ureg;
352
353 /* Check which inputs we should transform. */
354 for (i = 0; i < 8 * 3; i += 3) {
355 switch ((key->tc_gen >> i) & 0x3) {
356 case NINED3DTSS_TCI_CAMERASPACENORMAL:
357 need_rNrm = TRUE;
358 break;
359 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
360 need_rVtx = TRUE;
361 break;
362 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
363 need_rVtx = need_rNrm = TRUE;
364 break;
365 default:
366 break;
367 }
368 }
369
370 /* Declare and record used inputs (needed for linkage with vertex format):
371 * (texture coordinates handled later)
372 */
373 vs->aVtx = build_vs_add_input(vs,
374 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
375
376 if (need_rNrm)
377 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
378
379 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
380 vs->aCol[1] = ureg_imm1f(ureg, 1.0f);
381
382 if (key->lighting || key->darkness) {
383 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
384 key->mtl_ambient | key->mtl_emissive;
385 if ((mask & 0x1) && !key->color0in_one)
386 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
387 if ((mask & 0x2) && !key->color1in_one)
388 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
389
390 vs->mtlD = MATERIAL_CONST(1);
391 vs->mtlA = MATERIAL_CONST(2);
392 vs->mtlS = MATERIAL_CONST(3);
393 vs->mtlE = MATERIAL_CONST(5);
394 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
395 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
396 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
397 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
398 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
399 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
400 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
401 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
402 } else {
403 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
404 if (!key->color1in_one) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
405 }
406
407 if (key->vertexpointsize)
408 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
409
410 if (key->vertexblend_indexed)
411 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
412 if (key->vertexblend)
413 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
414 if (key->vertextween) {
415 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
416 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
417 }
418
419 /* Declare outputs:
420 */
421 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
422 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
423 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
424
425 if (key->vertexpointsize || key->pointscale) {
426 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
427 TGSI_WRITEMASK_X, 0, 1);
428 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
429 }
430 if (key->fog_mode) {
431 /* We apply fog to the vertex colors, oFog is for programmable shaders only ?
432 */
433 oFog = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_FOG, 0,
434 TGSI_WRITEMASK_X, 0, 1);
435 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
436 }
437
438 /* Declare TEMPs:
439 */
440 for (i = 0; i < num_r; ++i)
441 r[i] = ureg_DECL_local_temporary(ureg);
442 tmp = r[0];
443 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
444 tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
445 if (key->lighting || key->vertexblend)
446 AR = ureg_DECL_address(ureg);
447
448 if (key->fog_mode) {
449 rCol[0] = r[2];
450 rCol[1] = r[3];
451 } else {
452 rCol[0] = oCol[0];
453 rCol[1] = oCol[1];
454 }
455
456 rVtx = ureg_writemask(r[1], TGSI_WRITEMASK_XYZ);
457 rNrm = ureg_writemask(r[2], TGSI_WRITEMASK_XYZ);
458
459 /* === Vertex transformation / vertex blending:
460 */
461 if (key->vertextween) {
462 assert(!key->vertexblend);
463 ureg_LRP(ureg, r[2], _XXXX(_CONST(30)), vs->aVtx, vs->aVtx1);
464 if (need_rNrm)
465 ureg_LRP(ureg, r[3], _XXXX(_CONST(30)), vs->aNrm, vs->aNrm1);
466 vs->aVtx = ureg_src(r[2]);
467 vs->aNrm = ureg_src(r[3]);
468 }
469
470 if (key->vertexblend) {
471 struct ureg_src cWM[4];
472
473 for (i = 224; i <= 255; ++i)
474 ureg_DECL_constant(ureg, i);
475
476 /* translate world matrix index to constant file index */
477 if (key->vertexblend_indexed) {
478 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 224.0f));
479 ureg_ARL(ureg, AR, ureg_src(tmp));
480 }
481 for (i = 0; i < key->vertexblend; ++i) {
482 for (c = 0; c < 4; ++c) {
483 cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (224 + i * 4) * !key->vertexblend_indexed + c);
484 if (key->vertexblend_indexed)
485 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
486 }
487 /* multiply by WORLD(index) */
488 ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), cWM[0]);
489 ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), cWM[1], ureg_src(r[0]));
490 ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), cWM[2], ureg_src(r[0]));
491 ureg_MAD(ureg, r[0], _WWWW(vs->aVtx), cWM[3], ureg_src(r[0]));
492
493 /* accumulate weighted position value */
494 if (i)
495 ureg_MAD(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
496 else
497 ureg_MUL(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, 0));
498 }
499 /* multiply by VIEW_PROJ */
500 ureg_MUL(ureg, r[0], _X(r[2]), _CONST(8));
501 ureg_MAD(ureg, r[0], _Y(r[2]), _CONST(9), ureg_src(r[0]));
502 ureg_MAD(ureg, r[0], _Z(r[2]), _CONST(10), ureg_src(r[0]));
503 ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(r[0]));
504
505 if (need_rVtx)
506 vs->aVtx = ureg_src(r[2]);
507 } else
508 if (key->position_t && device->driver_caps.window_space_position_support) {
509 ureg_MOV(ureg, oPos, vs->aVtx);
510 } else if (key->position_t) {
511 /* vs->aVtx contains the coordinates buffer wise.
512 * later in the pipeline, clipping, viewport and division
513 * by w (rhw = 1/w) are going to be applied, so do the reverse
514 * of these transformations (except clipping) to have the good
515 * position at the end.*/
516 ureg_MOV(ureg, tmp, vs->aVtx);
517 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
518 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
519 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
520 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
521 /* Y needs to be reversed */
522 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
523 /* inverse rhw */
524 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
525 /* multiply X, Y, Z by w */
526 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
527 ureg_MOV(ureg, oPos, ureg_src(tmp));
528 } else {
529 /* position = vertex * WORLD_VIEW_PROJ */
530 ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), _CONST(0));
531 ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), _CONST(1), ureg_src(r[0]));
532 ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), _CONST(2), ureg_src(r[0]));
533 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(r[0]));
534 }
535
536 if (need_rVtx) {
537 ureg_MUL(ureg, rVtx, _XXXX(vs->aVtx), _CONST(4));
538 ureg_MAD(ureg, rVtx, _YYYY(vs->aVtx), _CONST(5), ureg_src(rVtx));
539 ureg_MAD(ureg, rVtx, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(rVtx));
540 ureg_MAD(ureg, rVtx, _WWWW(vs->aVtx), _CONST(7), ureg_src(rVtx));
541 }
542 if (need_rNrm) {
543 ureg_MUL(ureg, rNrm, _XXXX(vs->aNrm), _CONST(16));
544 ureg_MAD(ureg, rNrm, _YYYY(vs->aNrm), _CONST(17), ureg_src(rNrm));
545 ureg_MAD(ureg, rNrm, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(rNrm));
546 ureg_normalize3(ureg, rNrm, ureg_src(rNrm), tmp);
547 }
548 /* NOTE: don't use vs->aVtx, vs->aNrm after this line */
549
550 /* === Process point size:
551 */
552 if (key->vertexpointsize) {
553 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
554 #ifdef NINE_TGSI_LAZY_DEVS
555 struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg);
556
557 ureg_MAX(ureg, tmp_clamp, vs->aPsz, _XXXX(cPsz1));
558 ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1));
559 ureg_release_temporary(ureg, tmp_clamp);
560 #else
561 ureg_CLAMP(ureg, oPsz, vs->aPsz, _XXXX(cPsz1), _YYYY(cPsz1));
562 #endif
563 } else if (key->pointscale) {
564 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
565 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
566 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
567 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
568
569 ureg_DP3(ureg, tmp_x, ureg_src(r[1]), ureg_src(r[1]));
570 ureg_SQRT(ureg, tmp_y, _X(tmp));
571 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
572 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
573 ureg_RCP(ureg, tmp_x, ureg_src(tmp));
574 ureg_MUL(ureg, tmp_x, ureg_src(tmp), _ZZZZ(cPsz1));
575 #ifdef NINE_TGSI_LAZY_DEVS
576 struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg);
577
578 ureg_MAX(ureg, tmp_clamp, _X(tmp), _XXXX(cPsz1));
579 ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1));
580 ureg_release_temporary(ureg, tmp_clamp);
581 #else
582 ureg_CLAMP(ureg, oPsz, _X(tmp), _XXXX(cPsz1), _YYYY(cPsz1));
583 #endif
584 }
585
586 /* Texture coordinate generation:
587 * XXX: D3DTTFF_PROJECTED, transform matrix
588 */
589 for (i = 0; i < 8; ++i) {
590 struct ureg_dst dst[5];
591 struct ureg_src src;
592 unsigned c;
593 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
594 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
595 const unsigned dim = (key->tc_dim >> (i * 3)) & 0x7;
596
597 if (tci == NINED3DTSS_TCI_DISABLE)
598 continue;
599 oTex[i] = ureg_DECL_output(ureg, texcoord_sn, i);
600
601 if (tci == NINED3DTSS_TCI_PASSTHRU)
602 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
603
604 if (!dim) {
605 dst[c = 4] = oTex[i];
606 } else {
607 dst[4] = r[5];
608 src = ureg_src(dst[4]);
609 for (c = 0; c < (dim - 1); ++c)
610 dst[c] = ureg_writemask(tmp, (1 << dim) - 1);
611 dst[c] = ureg_writemask(oTex[i], (1 << dim) - 1);
612 }
613
614 switch (tci) {
615 case NINED3DTSS_TCI_PASSTHRU:
616 ureg_MOV(ureg, dst[4], vs->aTex[idx]);
617 break;
618 case NINED3DTSS_TCI_CAMERASPACENORMAL:
619 assert(dim <= 3);
620 ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rNrm));
621 ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
622 break;
623 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
624 ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx));
625 ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
626 break;
627 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
628 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
629 ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rNrm));
630 ureg_MUL(ureg, tmp, ureg_src(rNrm), _X(tmp));
631 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
632 ureg_SUB(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp));
633 ureg_MOV(ureg, ureg_writemask(dst[4], TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
634 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
635 break;
636 case NINED3DTSS_TCI_SPHEREMAP:
637 assert(!"TODO");
638 break;
639 default:
640 break;
641 }
642 if (!dim)
643 continue;
644 dst[c].WriteMask = ~dst[c].WriteMask;
645 if (dst[c].WriteMask)
646 ureg_MOV(ureg, dst[c], src); /* store untransformed components */
647 dst[c].WriteMask = ~dst[c].WriteMask;
648 if (dim > 0) ureg_MUL(ureg, dst[0], _XXXX(src), _CONST(128 + i * 4));
649 if (dim > 1) ureg_MAD(ureg, dst[1], _YYYY(src), _CONST(129 + i * 4), ureg_src(tmp));
650 if (dim > 2) ureg_MAD(ureg, dst[2], _ZZZZ(src), _CONST(130 + i * 4), ureg_src(tmp));
651 if (dim > 3) ureg_MAD(ureg, dst[3], _WWWW(src), _CONST(131 + i * 4), ureg_src(tmp));
652 }
653
654 /* === Lighting:
655 *
656 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
657 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
658 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
659 *
660 * vec3 normal = normalize(in.Normal * NormalMatrix);
661 * vec3 hitDir = light.direction;
662 * float atten = 1.0;
663 *
664 * if (light.type != DIRECTIONAL)
665 * {
666 * vec3 hitVec = light.position - eyeVertex;
667 * float d = length(hitVec);
668 * hitDir = hitVec / d;
669 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
670 * }
671 *
672 * if (light.type == SPOTLIGHT)
673 * {
674 * float rho = dp3(-hitVec, light.direction);
675 * if (rho < cos(light.phi / 2))
676 * atten = 0;
677 * if (rho < cos(light.theta / 2))
678 * atten *= pow(some_func(rho), light.falloff);
679 * }
680 *
681 * float nDotHit = dp3_sat(normal, hitVec);
682 * float powFact = 0.0;
683 *
684 * if (nDotHit > 0.0)
685 * {
686 * vec3 midVec = normalize(hitDir + eye);
687 * float nDotMid = dp3_sat(normal, midVec);
688 * pFact = pow(nDotMid, material.power);
689 * }
690 *
691 * ambient += light.ambient * atten;
692 * diffuse += light.diffuse * atten * nDotHit;
693 * specular += light.specular * atten * powFact;
694 */
695 if (key->lighting) {
696 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
697
698 struct ureg_dst rAtt = ureg_writemask(r[1], TGSI_WRITEMASK_W);
699 struct ureg_dst rHit = ureg_writemask(r[3], TGSI_WRITEMASK_XYZ);
700 struct ureg_dst rMid = ureg_writemask(r[4], TGSI_WRITEMASK_XYZ);
701
702 struct ureg_dst rCtr = ureg_writemask(r[2], TGSI_WRITEMASK_W);
703
704 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
705
706 /* Light.*.Alpha is not used. */
707 struct ureg_dst rD = ureg_writemask(r[5], TGSI_WRITEMASK_XYZ);
708 struct ureg_dst rA = ureg_writemask(r[6], TGSI_WRITEMASK_XYZ);
709 struct ureg_dst rS = ureg_writemask(r[7], TGSI_WRITEMASK_XYZ);
710
711 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
712
713 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
714 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
715 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
716 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
717 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
718 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
719 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
720 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
721 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
722 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
723 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
724 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
725 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
726 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
727 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
728
729 const unsigned loop_label = l++;
730
731 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
732 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
733 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
734 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
735 rD = ureg_saturate(rD);
736 rA = ureg_saturate(rA);
737 rS = ureg_saturate(rS);
738
739
740 /* loop management */
741 ureg_BGNLOOP(ureg, &label[loop_label]);
742 ureg_ARL(ureg, AL, _W(rCtr));
743
744 /* if (not DIRECTIONAL light): */
745 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
746 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
747 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
748 ureg_IF(ureg, _X(tmp), &label[l++]);
749 {
750 /* hitDir = light.position - eyeVtx
751 * d = length(hitDir)
752 * hitDir /= d
753 */
754 ureg_SUB(ureg, rHit, cLPos, ureg_src(rVtx));
755 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
756 ureg_RSQ(ureg, tmp_y, _X(tmp));
757 ureg_MUL(ureg, rHit, ureg_src(rHit), _Y(tmp)); /* normalize */
758 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
759
760 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
761 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
762 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
763 ureg_RCP(ureg, rAtt, _W(rAtt));
764 /* cut-off if distance exceeds Light.Range */
765 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
766 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
767 }
768 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
769 ureg_ENDIF(ureg);
770
771 /* if (SPOT light) */
772 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
773 ureg_IF(ureg, _X(tmp), &label[l++]);
774 {
775 /* rho = dp3(-hitDir, light.spotDir)
776 *
777 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
778 * spotAtt = 1
779 * else
780 * if (rho <= light.cphi2)
781 * spotAtt = 0
782 * else
783 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
784 */
785 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
786 ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
787 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
788 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
789 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
790 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
791 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
792 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
793 }
794 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
795 ureg_ENDIF(ureg);
796
797 /* directional factors, let's not use LIT because of clarity */
798 ureg_DP3(ureg, ureg_saturate(tmp_x), ureg_src(rNrm), ureg_src(rHit));
799 ureg_MOV(ureg, tmp_y, ureg_imm1f(ureg, 0.0f));
800 ureg_IF(ureg, _X(tmp), &label[l++]);
801 {
802 /* midVec = normalize(hitDir + eyeDir) */
803 if (key->localviewer) {
804 ureg_normalize3(ureg, rMid, ureg_src(rVtx), tmp);
805 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
806 } else {
807 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
808 }
809 ureg_normalize3(ureg, rMid, ureg_src(rMid), tmp);
810 ureg_DP3(ureg, ureg_saturate(tmp_y), ureg_src(rNrm), ureg_src(rMid));
811 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
812
813 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
814 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
815 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
816 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
817 }
818 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
819 ureg_ENDIF(ureg);
820
821 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
822
823 /* break if this was the last light */
824 ureg_IF(ureg, cLLast, &label[l++]);
825 ureg_BRK(ureg);
826 ureg_ENDIF(ureg);
827 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
828
829 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
830 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
831 ureg_ENDLOOP(ureg, &label[loop_label]);
832
833 /* Set alpha factors of illumination to 1.0 for the multiplications. */
834 rD.WriteMask = TGSI_WRITEMASK_W; rD.Saturate = 0;
835 rS.WriteMask = TGSI_WRITEMASK_W; rS.Saturate = 0;
836 rA.WriteMask = TGSI_WRITEMASK_W; rA.Saturate = 0;
837 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 1.0f));
838 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 1.0f));
839
840 /* Apply to material:
841 *
842 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
843 * material.ambient * ambient +
844 * material.diffuse * diffuse +
845 * oCol[1] = material.specular * specular;
846 */
847 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
848 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 1.0f));
849 ureg_MAD(ureg, tmp, ureg_src(rA), vs->mtlA, _CONST(19));
850 } else {
851 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
852 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
853 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
854 }
855 ureg_MAD(ureg, rCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
856 ureg_MUL(ureg, rCol[1], ureg_src(rS), vs->mtlS);
857 } else
858 /* COLOR */
859 if (key->darkness) {
860 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
861 ureg_MAD(ureg, rCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
862 } else {
863 ureg_MAD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
864 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE);
865 ureg_ADD(ureg, ureg_writemask(rCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
866 }
867 ureg_MUL(ureg, rCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
868 } else {
869 ureg_MOV(ureg, rCol[0], vs->aCol[0]);
870 ureg_MOV(ureg, rCol[1], vs->aCol[1]);
871 }
872
873 /* === Process fog.
874 *
875 * exp(x) = ex2(log2(e) * x)
876 */
877 if (key->fog_mode) {
878 /* Fog doesn't affect alpha, TODO: combine with light code output */
879 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), _W(rCol[0]));
880 ureg_MOV(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_W), _W(rCol[1]));
881
882 if (key->position_t) {
883 ureg_MOV(ureg, ureg_saturate(tmp_x), ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
884 } else
885 if (key->fog_range) {
886 ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rVtx));
887 ureg_RSQ(ureg, tmp_z, _X(tmp));
888 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
889 } else {
890 ureg_MOV(ureg, tmp_z, ureg_abs(_Z(rVtx)));
891 }
892
893 if (key->fog_mode == D3DFOG_EXP) {
894 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
895 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
896 ureg_EX2(ureg, tmp_x, _X(tmp));
897 } else
898 if (key->fog_mode == D3DFOG_EXP2) {
899 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
900 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
901 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
902 ureg_EX2(ureg, tmp_x, _X(tmp));
903 } else
904 if (key->fog_mode == D3DFOG_LINEAR && !key->position_t) {
905 ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
906 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
907 }
908 ureg_MOV(ureg, oFog, _X(tmp));
909 ureg_LRP(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[0]), _CONST(29));
910 ureg_LRP(ureg, ureg_writemask(oCol[1], TGSI_WRITEMASK_XYZ), _X(tmp), ureg_src(rCol[1]), _CONST(29));
911 }
912
913 if (key->position_t && device->driver_caps.window_space_position_support)
914 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
915
916 ureg_END(ureg);
917 nine_ureg_tgsi_dump(ureg, FALSE);
918 return ureg_create_shader_and_destroy(ureg, device->pipe);
919 }
920
921 /* PS FF constants layout:
922 *
923 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
924 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
925 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
926 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
927 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
928 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
929 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
930 *
931 * CONST[20] D3DRS_TEXTUREFACTOR
932 * CONST[21] D3DRS_FOGCOLOR
933 * CONST[22].x___ RS.FogEnd
934 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
935 * CONST[22].__z_ RS.FogDensity
936 */
937 struct ps_build_ctx
938 {
939 struct ureg_program *ureg;
940
941 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
942 struct ureg_src vT[8]; /* TEXCOORD[i] */
943 struct ureg_dst r[6]; /* TEMPs */
944 struct ureg_dst rCur; /* D3DTA_CURRENT */
945 struct ureg_dst rMod;
946 struct ureg_src rCurSrc;
947 struct ureg_dst rTmp; /* D3DTA_TEMP */
948 struct ureg_src rTmpSrc;
949 struct ureg_dst rTex;
950 struct ureg_src rTexSrc;
951 struct ureg_src cBEM[8];
952 struct ureg_src s[8];
953
954 struct {
955 unsigned index;
956 unsigned index_pre_mod;
957 unsigned num_regs;
958 } stage;
959 };
960
961 static struct ureg_src
962 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
963 {
964 struct ureg_src reg;
965
966 switch (ta & D3DTA_SELECTMASK) {
967 case D3DTA_CONSTANT:
968 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
969 break;
970 case D3DTA_CURRENT:
971 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
972 break;
973 case D3DTA_DIFFUSE:
974 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
975 break;
976 case D3DTA_SPECULAR:
977 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
978 break;
979 case D3DTA_TEMP:
980 reg = ps->rTmpSrc;
981 break;
982 case D3DTA_TEXTURE:
983 reg = ps->rTexSrc;
984 break;
985 case D3DTA_TFACTOR:
986 reg = ureg_DECL_constant(ps->ureg, 20);
987 break;
988 default:
989 assert(0);
990 reg = ureg_src_undef();
991 break;
992 }
993 if (ta & D3DTA_COMPLEMENT) {
994 struct ureg_dst dst = ps->r[ps->stage.num_regs++];
995 ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
996 reg = ureg_src(dst);
997 }
998 if (ta & D3DTA_ALPHAREPLICATE)
999 reg = _WWWW(reg);
1000 return reg;
1001 }
1002
1003 static struct ureg_dst
1004 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1005 {
1006 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1007
1008 switch (ta & D3DTA_SELECTMASK) {
1009 case D3DTA_CURRENT:
1010 return ps->rCur;
1011 case D3DTA_TEMP:
1012 return ps->rTmp;
1013 default:
1014 assert(0);
1015 return ureg_dst_undef();
1016 }
1017 }
1018
1019 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1020 {
1021 switch (top) {
1022 case D3DTOP_DISABLE:
1023 return 0x0;
1024 case D3DTOP_SELECTARG1:
1025 case D3DTOP_PREMODULATE:
1026 return 0x2;
1027 case D3DTOP_SELECTARG2:
1028 return 0x4;
1029 case D3DTOP_MULTIPLYADD:
1030 case D3DTOP_LERP:
1031 return 0x7;
1032 default:
1033 return 0x6;
1034 }
1035 }
1036
1037 static inline boolean
1038 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1039 {
1040 return !dst.WriteMask ||
1041 (dst.File == src.File &&
1042 dst.Index == src.Index &&
1043 !dst.Indirect &&
1044 !dst.Saturate &&
1045 !src.Indirect &&
1046 !src.Negate &&
1047 !src.Absolute &&
1048 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1049 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1050 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1051 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1052
1053 }
1054
1055 static void
1056 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1057 {
1058 struct ureg_program *ureg = ps->ureg;
1059 struct ureg_dst tmp = ps->r[ps->stage.num_regs];
1060 struct ureg_dst tmp2 = ps->r[ps->stage.num_regs+1];
1061 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1062
1063 tmp.WriteMask = dst.WriteMask;
1064
1065 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1066 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1067 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1068 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1069 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1070 top != D3DTOP_LERP)
1071 dst = ureg_saturate(dst);
1072
1073 switch (top) {
1074 case D3DTOP_SELECTARG1:
1075 if (!is_MOV_no_op(dst, arg[1]))
1076 ureg_MOV(ureg, dst, arg[1]);
1077 break;
1078 case D3DTOP_SELECTARG2:
1079 if (!is_MOV_no_op(dst, arg[2]))
1080 ureg_MOV(ureg, dst, arg[2]);
1081 break;
1082 case D3DTOP_MODULATE:
1083 ureg_MUL(ureg, dst, arg[1], arg[2]);
1084 break;
1085 case D3DTOP_MODULATE2X:
1086 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1087 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1088 break;
1089 case D3DTOP_MODULATE4X:
1090 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1091 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1092 break;
1093 case D3DTOP_ADD:
1094 ureg_ADD(ureg, dst, arg[1], arg[2]);
1095 break;
1096 case D3DTOP_ADDSIGNED:
1097 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1098 ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
1099 break;
1100 case D3DTOP_ADDSIGNED2X:
1101 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1102 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1103 break;
1104 case D3DTOP_SUBTRACT:
1105 ureg_SUB(ureg, dst, arg[1], arg[2]);
1106 break;
1107 case D3DTOP_ADDSMOOTH:
1108 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1109 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1110 break;
1111 case D3DTOP_BLENDDIFFUSEALPHA:
1112 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1113 break;
1114 case D3DTOP_BLENDTEXTUREALPHA:
1115 /* XXX: alpha taken from previous stage, texture or result ? */
1116 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1117 break;
1118 case D3DTOP_BLENDFACTORALPHA:
1119 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1120 break;
1121 case D3DTOP_BLENDTEXTUREALPHAPM:
1122 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
1123 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1124 break;
1125 case D3DTOP_BLENDCURRENTALPHA:
1126 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1127 break;
1128 case D3DTOP_PREMODULATE:
1129 ureg_MOV(ureg, dst, arg[1]);
1130 ps->stage.index_pre_mod = ps->stage.index + 1;
1131 break;
1132 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1133 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1134 break;
1135 case D3DTOP_MODULATECOLOR_ADDALPHA:
1136 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1137 break;
1138 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1139 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
1140 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1141 break;
1142 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1143 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1144 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1145 break;
1146 case D3DTOP_BUMPENVMAP:
1147 break;
1148 case D3DTOP_BUMPENVMAPLUMINANCE:
1149 break;
1150 case D3DTOP_DOTPRODUCT3:
1151 ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1152 ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1153 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1154 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1155 break;
1156 case D3DTOP_MULTIPLYADD:
1157 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1158 break;
1159 case D3DTOP_LERP:
1160 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1161 break;
1162 case D3DTOP_DISABLE:
1163 /* no-op ? */
1164 break;
1165 default:
1166 assert(!"invalid D3DTOP");
1167 break;
1168 }
1169 }
1170
1171 static void *
1172 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1173 {
1174 struct ps_build_ctx ps;
1175 struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
1176 struct ureg_dst oCol;
1177 unsigned i, s;
1178 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1179
1180 memset(&ps, 0, sizeof(ps));
1181 ps.ureg = ureg;
1182 ps.stage.index_pre_mod = -1;
1183
1184 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1185
1186 /* Declare all TEMPs we might need, serious drivers have a register allocator. */
1187 for (i = 0; i < Elements(ps.r); ++i)
1188 ps.r[i] = ureg_DECL_local_temporary(ureg);
1189 ps.rCur = ps.r[0];
1190 ps.rTmp = ps.r[1];
1191 ps.rTex = ps.r[2];
1192 ps.rCurSrc = ureg_src(ps.rCur);
1193 ps.rTmpSrc = ureg_src(ps.rTmp);
1194 ps.rTexSrc = ureg_src(ps.rTex);
1195
1196 for (s = 0; s < 8; ++s) {
1197 ps.s[s] = ureg_src_undef();
1198
1199 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1200 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1201 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1202 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1203 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
1204
1205 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1206 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1207 key->ts[s].colorarg2 == D3DTA_TEXTURE) {
1208 ps.s[s] = ureg_DECL_sampler(ureg, s);
1209 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1210 }
1211 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1212 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1213 ps.s[s] = ureg_DECL_sampler(ureg, s);
1214 }
1215
1216 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1217 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1218 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1219 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1220 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
1221
1222 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1223 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1224 key->ts[s].alphaarg2 == D3DTA_TEXTURE) {
1225 ps.s[s] = ureg_DECL_sampler(ureg, s);
1226 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1227 }
1228 }
1229 }
1230 if (key->specular)
1231 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
1232
1233 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1234
1235 if (key->ts[0].colorop == D3DTOP_DISABLE &&
1236 key->ts[0].alphaop == D3DTOP_DISABLE)
1237 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1238 /* Or is it undefined then ? */
1239
1240 /* Run stages.
1241 */
1242 for (s = 0; s < 8; ++s) {
1243 unsigned colorarg[3];
1244 unsigned alphaarg[3];
1245 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1246 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1247 struct ureg_dst dst;
1248 struct ureg_src arg[3];
1249
1250 if (key->ts[s].colorop == D3DTOP_DISABLE &&
1251 key->ts[s].alphaop == D3DTOP_DISABLE)
1252 continue;
1253 ps.stage.index = s;
1254 ps.stage.num_regs = 3;
1255
1256 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1257 nine_D3DTOP_to_str(key->ts[s].colorop),
1258 nine_D3DTOP_to_str(key->ts[s].alphaop));
1259
1260 if (!ureg_src_is_undef(ps.s[s])) {
1261 unsigned target;
1262 switch (key->ts[s].textarget) {
1263 case 0: target = TGSI_TEXTURE_1D; break;
1264 case 1: target = TGSI_TEXTURE_2D; break;
1265 case 2: target = TGSI_TEXTURE_3D; break;
1266 case 3: target = TGSI_TEXTURE_CUBE; break;
1267 /* this is a 2 bit bitfield, do I really need a default case ? */
1268 }
1269
1270 /* sample the texture */
1271 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1272 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1273 }
1274 if (key->ts[s].projected)
1275 ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
1276 else
1277 ureg_TEX(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
1278 }
1279
1280 if (s == 0 &&
1281 (key->ts[0].resultarg != 0 /* not current */ ||
1282 key->ts[0].colorop == D3DTOP_DISABLE ||
1283 key->ts[0].alphaop == D3DTOP_DISABLE ||
1284 key->ts[0].colorop == D3DTOP_BLENDCURRENTALPHA ||
1285 key->ts[0].alphaop == D3DTOP_BLENDCURRENTALPHA ||
1286 key->ts[0].colorarg0 == D3DTA_CURRENT ||
1287 key->ts[0].colorarg1 == D3DTA_CURRENT ||
1288 key->ts[0].colorarg2 == D3DTA_CURRENT ||
1289 key->ts[0].alphaarg0 == D3DTA_CURRENT ||
1290 key->ts[0].alphaarg1 == D3DTA_CURRENT ||
1291 key->ts[0].alphaarg2 == D3DTA_CURRENT)
1292 ) {
1293 /* Initialize D3DTA_CURRENT.
1294 * (Yes we can do this before the loop but not until
1295 * NVE4 has an instruction scheduling pass.)
1296 */
1297 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1298 }
1299
1300 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1301
1302 if (ps.stage.index_pre_mod == ps.stage.index) {
1303 ps.rMod = ps.r[ps.stage.num_regs++];
1304 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1305 }
1306
1307 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1308 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1309 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1310 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1311 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1312 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1313
1314 if (key->ts[s].colorop != key->ts[s].alphaop ||
1315 colorarg[0] != alphaarg[0] ||
1316 colorarg[1] != alphaarg[1] ||
1317 colorarg[2] != alphaarg[2])
1318 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1319
1320 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1321 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1322 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1323 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1324
1325 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1326 dst.WriteMask = TGSI_WRITEMASK_W;
1327
1328 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1329 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1330 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1331 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1332 }
1333 }
1334
1335 if (key->specular)
1336 ureg_ADD(ureg, ps.rCur, ps.rCurSrc, ps.vC[1]);
1337
1338 /* Fog.
1339 */
1340 if (key->fog_mode) {
1341 struct ureg_src vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_LINEAR);
1342 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1343 if (key->fog_mode == D3DFOG_EXP) {
1344 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1345 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1346 ureg_EX2(ureg, rFog, _X(rFog));
1347 } else
1348 if (key->fog_mode == D3DFOG_EXP2) {
1349 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1350 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1351 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1352 ureg_EX2(ureg, rFog, _X(rFog));
1353 } else
1354 if (key->fog_mode == D3DFOG_LINEAR) {
1355 ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _ZZZZ(vPos));
1356 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1357 }
1358 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1359 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1360 } else
1361 if (key->fog) {
1362 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1363 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1364 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1365 } else {
1366 ureg_MOV(ureg, oCol, ps.rCurSrc);
1367 }
1368
1369 ureg_END(ureg);
1370 nine_ureg_tgsi_dump(ureg, FALSE);
1371 return ureg_create_shader_and_destroy(ureg, device->pipe);
1372 }
1373
1374 static struct NineVertexShader9 *
1375 nine_ff_get_vs(struct NineDevice9 *device)
1376 {
1377 const struct nine_state *state = &device->state;
1378 struct NineVertexShader9 *vs;
1379 enum pipe_error err;
1380 struct vs_build_ctx bld;
1381 struct nine_ff_vs_key key;
1382 unsigned s, i;
1383 char input_texture_coord[8];
1384
1385 assert(sizeof(key) <= sizeof(key.value32));
1386
1387 memset(&key, 0, sizeof(key));
1388 memset(&bld, 0, sizeof(bld));
1389 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1390
1391 bld.key = &key;
1392
1393 /* FIXME: this shouldn't be NULL, but it is on init */
1394 if (state->vdecl) {
1395 key.color0in_one = 1;
1396 key.color1in_one = 1;
1397 for (i = 0; i < state->vdecl->nelems; i++) {
1398 uint16_t usage = state->vdecl->usage_map[i];
1399 if (usage == NINE_DECLUSAGE_POSITIONT)
1400 key.position_t = 1;
1401 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1402 key.color0in_one = 0;
1403 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1404 key.color1in_one = 0;
1405 else if (usage == NINE_DECLUSAGE_PSIZE)
1406 key.vertexpointsize = 1;
1407 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1408 s = usage / NINE_DECLUSAGE_COUNT;
1409 if (s < 8)
1410 input_texture_coord[s] = 1;
1411 else
1412 DBG("FF given texture coordinate >= 8. Ignoring\n");
1413 }
1414 }
1415 }
1416 if (!key.vertexpointsize)
1417 key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE];
1418
1419 key.lighting = !!state->rs[D3DRS_LIGHTING] && state->ff.num_lights_active;
1420 key.darkness = !!state->rs[D3DRS_LIGHTING] && !state->ff.num_lights_active;
1421 if (key.position_t) {
1422 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1423 key.lighting = 0;
1424 }
1425 if ((key.lighting | key.darkness) && state->rs[D3DRS_COLORVERTEX]) {
1426 key.mtl_diffuse = state->rs[D3DRS_DIFFUSEMATERIALSOURCE];
1427 key.mtl_ambient = state->rs[D3DRS_AMBIENTMATERIALSOURCE];
1428 key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE];
1429 key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE];
1430 }
1431 key.fog_mode = state->rs[D3DRS_FOGENABLE] ? state->rs[D3DRS_FOGVERTEXMODE] : 0;
1432 if (key.fog_mode)
1433 key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE];
1434
1435 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1436 key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE];
1437
1438 switch (state->rs[D3DRS_VERTEXBLEND]) {
1439 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1440 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1441 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1442 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1443 case D3DVBF_TWEENING: key.vertextween = 1; break;
1444 default:
1445 assert(!"invalid D3DVBF");
1446 break;
1447 }
1448 }
1449
1450 for (s = 0; s < 8; ++s) {
1451 unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1452 unsigned dim = MIN2(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7, 4);
1453
1454 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1455 gen = NINED3DTSS_TCI_PASSTHRU;
1456
1457 if (!input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU)
1458 gen = NINED3DTSS_TCI_DISABLE;
1459
1460 key.tc_gen |= gen << (s * 3);
1461 key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3);
1462 key.tc_dim |= dim << (s * 3);
1463 }
1464
1465 vs = util_hash_table_get(device->ff.ht_vs, &key);
1466 if (vs)
1467 return vs;
1468 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1469
1470 nine_ff_prune_vs(device);
1471 if (vs) {
1472 unsigned n;
1473
1474 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1475
1476 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
1477 assert(err == PIPE_OK);
1478 device->ff.num_vs++;
1479 NineUnknown_ConvertRefToBind(NineUnknown(vs));
1480
1481 vs->num_inputs = bld.num_inputs;
1482 for (n = 0; n < bld.num_inputs; ++n)
1483 vs->input_map[n].ndecl = bld.input[n];
1484
1485 vs->position_t = key.position_t;
1486 vs->point_size = key.vertexpointsize | key.pointscale;
1487 }
1488 return vs;
1489 }
1490
1491 static struct NinePixelShader9 *
1492 nine_ff_get_ps(struct NineDevice9 *device)
1493 {
1494 struct nine_state *state = &device->state;
1495 struct NinePixelShader9 *ps;
1496 enum pipe_error err;
1497 struct nine_ff_ps_key key;
1498 unsigned s;
1499 uint8_t sampler_mask = 0;
1500
1501 assert(sizeof(key) <= sizeof(key.value32));
1502
1503 memset(&key, 0, sizeof(key));
1504 for (s = 0; s < 8; ++s) {
1505 key.ts[s].colorop = state->ff.tex_stage[s][D3DTSS_COLOROP];
1506 key.ts[s].alphaop = state->ff.tex_stage[s][D3DTSS_ALPHAOP];
1507 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. */
1508 /* ALPHAOP cannot be disabled if COLOROP is enabled. */
1509 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1510 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1511 break;
1512 }
1513
1514 if (!state->texture[s] &&
1515 state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE) {
1516 /* This should also disable the stage. */
1517 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1518 break;
1519 }
1520
1521 if (state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE)
1522 sampler_mask |= (1 << s);
1523
1524 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1525 uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1526 if (used_c & 0x1) key.ts[s].colorarg0 = state->ff.tex_stage[s][D3DTSS_COLORARG0];
1527 if (used_c & 0x2) key.ts[s].colorarg1 = state->ff.tex_stage[s][D3DTSS_COLORARG1];
1528 if (used_c & 0x4) key.ts[s].colorarg2 = state->ff.tex_stage[s][D3DTSS_COLORARG2];
1529 if (used_c & 0x1) key.colorarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s;
1530 if (used_c & 0x1) key.colorarg_b5[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s;
1531 if (used_c & 0x2) key.colorarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s;
1532 if (used_c & 0x2) key.colorarg_b5[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s;
1533 if (used_c & 0x4) key.colorarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s;
1534 if (used_c & 0x4) key.colorarg_b5[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s;
1535 }
1536 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1537 uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1538 if (used_a & 0x1) key.ts[s].alphaarg0 = state->ff.tex_stage[s][D3DTSS_ALPHAARG0];
1539 if (used_a & 0x2) key.ts[s].alphaarg1 = state->ff.tex_stage[s][D3DTSS_ALPHAARG1];
1540 if (used_a & 0x4) key.ts[s].alphaarg2 = state->ff.tex_stage[s][D3DTSS_ALPHAARG2];
1541 if (used_a & 0x1) key.alphaarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s;
1542 if (used_a & 0x2) key.alphaarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s;
1543 if (used_a & 0x4) key.alphaarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s;
1544 }
1545 key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1546
1547 key.ts[s].projected = !!(state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & D3DTTFF_PROJECTED);
1548
1549 if (state->texture[s]) {
1550 switch (state->texture[s]->base.type) {
1551 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1552 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1553 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1554 default:
1555 assert(!"unexpected texture type");
1556 break;
1557 }
1558 } else {
1559 key.ts[s].textarget = 1;
1560 }
1561 }
1562 for (; s < 8; ++s)
1563 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1564 if (state->rs[D3DRS_FOGENABLE])
1565 key.fog_mode = state->rs[D3DRS_FOGTABLEMODE];
1566
1567 ps = util_hash_table_get(device->ff.ht_ps, &key);
1568 if (ps)
1569 return ps;
1570 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1571
1572 nine_ff_prune_ps(device);
1573 if (ps) {
1574 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1575
1576 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
1577 assert(err == PIPE_OK);
1578 device->ff.num_ps++;
1579 NineUnknown_ConvertRefToBind(NineUnknown(ps));
1580
1581 ps->rt_mask = 0x1;
1582 ps->sampler_mask = sampler_mask;
1583 }
1584 return ps;
1585 }
1586
1587 #define GET_D3DTS(n) nine_state_access_transform(state, D3DTS_##n, FALSE)
1588 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1589 static void
1590 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1591 {
1592 struct nine_state *state = &device->state;
1593 D3DMATRIX T;
1594 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1595 unsigned i;
1596
1597 /* TODO: make this nicer, and only upload the ones we need */
1598 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1599
1600 if (IS_D3DTS_DIRTY(state, WORLD) ||
1601 IS_D3DTS_DIRTY(state, VIEW) ||
1602 IS_D3DTS_DIRTY(state, PROJECTION)) {
1603 /* WVP, WV matrices */
1604 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1605 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1606
1607 /* normal matrix == transpose(inverse(WV)) */
1608 nine_d3d_matrix_inverse_3x3(&T, &M[1]);
1609 nine_d3d_matrix_transpose(&M[4], &T);
1610
1611 /* VP matrix */
1612 nine_d3d_matrix_matrix_mul(&M[2], GET_D3DTS(VIEW), GET_D3DTS(PROJECTION));
1613
1614 /* V and W matrix */
1615 M[3] = *GET_D3DTS(VIEW);
1616 M[56] = *GET_D3DTS(WORLD);
1617 }
1618
1619 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1620 /* load other world matrices */
1621 for (i = 1; i <= 7; ++i)
1622 M[56 + i] = *GET_D3DTS(WORLDMATRIX(i));
1623 }
1624
1625 device->ff.vs_const[30 * 4] = asfloat(state->rs[D3DRS_TWEENFACTOR]);
1626 }
1627
1628 static void
1629 nine_ff_load_lights(struct NineDevice9 *device)
1630 {
1631 struct nine_state *state = &device->state;
1632 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1633 unsigned l;
1634
1635 if (state->changed.group & NINE_STATE_FF_MATERIAL) {
1636 const D3DMATERIAL9 *mtl = &state->ff.material;
1637
1638 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1639 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1640 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1641 dst[23].x = mtl->Power;
1642 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1643 d3dcolor_to_rgba(&dst[25].x, state->rs[D3DRS_AMBIENT]);
1644 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1645 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1646 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1647 dst[19].w = mtl->Ambient.a + mtl->Emissive.a;
1648 }
1649
1650 if (!(state->changed.group & NINE_STATE_FF_LIGHTING))
1651 return;
1652
1653 for (l = 0; l < state->ff.num_lights_active; ++l) {
1654 const D3DLIGHT9 *light = &state->ff.light[state->ff.active_light[l]];
1655
1656 dst[32 + l * 8].x = light->Type;
1657 dst[32 + l * 8].y = light->Attenuation0;
1658 dst[32 + l * 8].z = light->Attenuation1;
1659 dst[32 + l * 8].w = light->Attenuation2;
1660 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1661 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1662 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1663 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1664 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1665 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1666 dst[37 + l * 8].w = light->Falloff;
1667 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1668 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1669 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1670 dst[39 + l * 8].w = (l + 1) == state->ff.num_lights_active;
1671 }
1672 }
1673
1674 static void
1675 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1676 {
1677 const struct nine_state *state = &device->state;
1678 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1679
1680 if (!(state->changed.group & NINE_STATE_FF_OTHER))
1681 return;
1682 dst[26].x = asfloat(state->rs[D3DRS_POINTSIZE_MIN]);
1683 dst[26].y = asfloat(state->rs[D3DRS_POINTSIZE_MAX]);
1684 dst[26].z = asfloat(state->rs[D3DRS_POINTSIZE]);
1685 dst[26].w = asfloat(state->rs[D3DRS_POINTSCALE_A]);
1686 dst[27].x = asfloat(state->rs[D3DRS_POINTSCALE_B]);
1687 dst[27].y = asfloat(state->rs[D3DRS_POINTSCALE_C]);
1688 dst[28].x = asfloat(state->rs[D3DRS_FOGEND]);
1689 dst[28].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1690 if (isinf(dst[28].y))
1691 dst[28].y = 0.0f;
1692 dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1693 d3dcolor_to_rgba(&dst[29].x, state->rs[D3DRS_FOGCOLOR]);
1694 }
1695
1696 static void
1697 nine_ff_load_tex_matrices(struct NineDevice9 *device)
1698 {
1699 struct nine_state *state = &device->state;
1700 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1701 unsigned s;
1702
1703 if (!(state->ff.changed.transform[0] & 0xff0000))
1704 return;
1705 for (s = 0; s < 8; ++s) {
1706 if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
1707 M[32 + s] = *nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE);
1708 }
1709 }
1710
1711 static void
1712 nine_ff_load_ps_params(struct NineDevice9 *device)
1713 {
1714 const struct nine_state *state = &device->state;
1715 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1716 unsigned s;
1717
1718 if (!(state->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER)))
1719 return;
1720
1721 for (s = 0; s < 8; ++s)
1722 d3dcolor_to_rgba(&dst[s].x, state->ff.tex_stage[s][D3DTSS_CONSTANT]);
1723
1724 for (s = 0; s < 8; ++s) {
1725 dst[8 + s].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
1726 dst[8 + s].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
1727 dst[8 + s].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
1728 dst[8 + s].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
1729 if (s & 1) {
1730 dst[8 + s / 2].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1731 dst[8 + s / 2].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1732 } else {
1733 dst[8 + s / 2].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1734 dst[8 + s / 2].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1735 }
1736 }
1737
1738 d3dcolor_to_rgba(&dst[20].x, state->rs[D3DRS_TEXTUREFACTOR]);
1739 d3dcolor_to_rgba(&dst[21].x, state->rs[D3DRS_FOGCOLOR]);
1740 dst[22].x = asfloat(state->rs[D3DRS_FOGEND]);
1741 dst[22].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1742 dst[22].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1743 }
1744
1745 static void
1746 nine_ff_load_viewport_info(struct NineDevice9 *device)
1747 {
1748 D3DVIEWPORT9 *viewport = &device->state.viewport;
1749 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1750 float diffZ = viewport->MaxZ - viewport->MinZ;
1751
1752 /* Note: the other functions avoids to fill the const again if nothing changed.
1753 * But we don't have much to fill, and adding code to allow that may be complex
1754 * so just fill it always */
1755 dst[100].x = 2.0f / (float)(viewport->Width);
1756 dst[100].y = 2.0f / (float)(viewport->Height);
1757 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
1758 dst[101].x = (float)(viewport->X);
1759 dst[101].y = (float)(viewport->Y);
1760 dst[101].z = (float)(viewport->MinZ);
1761 }
1762
1763 void
1764 nine_ff_update(struct NineDevice9 *device)
1765 {
1766 struct pipe_context *pipe = device->pipe;
1767 struct nine_state *state = &device->state;
1768
1769 DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
1770
1771 /* NOTE: the only reference belongs to the hash table */
1772 if (!device->state.vs)
1773 device->ff.vs = nine_ff_get_vs(device);
1774 if (!device->state.ps)
1775 device->ff.ps = nine_ff_get_ps(device);
1776
1777 if (!device->state.vs) {
1778 if (device->state.ff.clobber.vs_const) {
1779 device->state.ff.clobber.vs_const = FALSE;
1780 device->state.changed.group |=
1781 NINE_STATE_FF_VSTRANSF |
1782 NINE_STATE_FF_MATERIAL |
1783 NINE_STATE_FF_LIGHTING |
1784 NINE_STATE_FF_OTHER;
1785 device->state.ff.changed.transform[0] |= 0xff000c;
1786 device->state.ff.changed.transform[8] |= 0xff;
1787 }
1788 nine_ff_load_vs_transforms(device);
1789 nine_ff_load_tex_matrices(device);
1790 nine_ff_load_lights(device);
1791 nine_ff_load_point_and_fog_params(device);
1792 nine_ff_load_viewport_info(device);
1793
1794 memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
1795
1796 device->state.changed.group |= NINE_STATE_VS;
1797 device->state.changed.group |= NINE_STATE_VS_CONST;
1798
1799 if (device->prefer_user_constbuf) {
1800 struct pipe_context *pipe = device->pipe;
1801 struct pipe_constant_buffer cb;
1802 cb.buffer_offset = 0;
1803 cb.buffer = NULL;
1804 cb.user_buffer = device->ff.vs_const;
1805 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
1806
1807 if (!device->driver_caps.user_cbufs) {
1808 u_upload_data(device->constbuf_uploader,
1809 0,
1810 cb.buffer_size,
1811 cb.user_buffer,
1812 &cb.buffer_offset,
1813 &cb.buffer);
1814 u_upload_unmap(device->constbuf_uploader);
1815 cb.user_buffer = NULL;
1816 }
1817 pipe->set_constant_buffer(pipe, PIPE_SHADER_VERTEX, 0, &cb);
1818 } else {
1819 struct pipe_box box;
1820 u_box_1d(0, NINE_FF_NUM_VS_CONST * 4 * sizeof(float), &box);
1821 pipe->transfer_inline_write(pipe, device->constbuf_vs, 0,
1822 0, &box,
1823 device->ff.vs_const, 0, 0);
1824 nine_ranges_insert(&device->state.changed.vs_const_f, 0, NINE_FF_NUM_VS_CONST,
1825 &device->range_pool);
1826 }
1827 }
1828
1829 if (!device->state.ps) {
1830 if (device->state.ff.clobber.ps_const) {
1831 device->state.ff.clobber.ps_const = FALSE;
1832 device->state.changed.group |=
1833 NINE_STATE_FF_PSSTAGES |
1834 NINE_STATE_FF_OTHER;
1835 }
1836 nine_ff_load_ps_params(device);
1837
1838 device->state.changed.group |= NINE_STATE_PS;
1839 device->state.changed.group |= NINE_STATE_PS_CONST;
1840
1841 if (device->prefer_user_constbuf) {
1842 struct pipe_context *pipe = device->pipe;
1843 struct pipe_constant_buffer cb;
1844 cb.buffer_offset = 0;
1845 cb.buffer = NULL;
1846 cb.user_buffer = device->ff.ps_const;
1847 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
1848
1849 if (!device->driver_caps.user_cbufs) {
1850 u_upload_data(device->constbuf_uploader,
1851 0,
1852 cb.buffer_size,
1853 cb.user_buffer,
1854 &cb.buffer_offset,
1855 &cb.buffer);
1856 u_upload_unmap(device->constbuf_uploader);
1857 cb.user_buffer = NULL;
1858 }
1859 pipe->set_constant_buffer(pipe, PIPE_SHADER_FRAGMENT, 0, &cb);
1860 } else {
1861 struct pipe_box box;
1862 u_box_1d(0, NINE_FF_NUM_PS_CONST * 4 * sizeof(float), &box);
1863 pipe->transfer_inline_write(pipe, device->constbuf_ps, 0,
1864 0, &box,
1865 device->ff.ps_const, 0, 0);
1866 nine_ranges_insert(&device->state.changed.ps_const_f, 0, NINE_FF_NUM_PS_CONST,
1867 &device->range_pool);
1868 }
1869 }
1870
1871 device->state.changed.group &= ~NINE_STATE_FF;
1872 }
1873
1874
1875 boolean
1876 nine_ff_init(struct NineDevice9 *device)
1877 {
1878 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash,
1879 nine_ff_vs_key_comp);
1880 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash,
1881 nine_ff_ps_key_comp);
1882
1883 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash,
1884 nine_ff_fvf_key_comp);
1885
1886 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
1887 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
1888
1889 return device->ff.ht_vs && device->ff.ht_ps &&
1890 device->ff.ht_fvf &&
1891 device->ff.vs_const && device->ff.ps_const;
1892 }
1893
1894 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
1895 {
1896 NineUnknown_Unbind(NineUnknown(value));
1897 return PIPE_OK;
1898 }
1899
1900 void
1901 nine_ff_fini(struct NineDevice9 *device)
1902 {
1903 if (device->ff.ht_vs) {
1904 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
1905 util_hash_table_destroy(device->ff.ht_vs);
1906 }
1907 if (device->ff.ht_ps) {
1908 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
1909 util_hash_table_destroy(device->ff.ht_ps);
1910 }
1911 if (device->ff.ht_fvf) {
1912 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
1913 util_hash_table_destroy(device->ff.ht_fvf);
1914 }
1915 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
1916 device->ff.ps = NULL;
1917
1918 FREE(device->ff.vs_const);
1919 FREE(device->ff.ps_const);
1920 }
1921
1922 static void
1923 nine_ff_prune_vs(struct NineDevice9 *device)
1924 {
1925 if (device->ff.num_vs > 100) {
1926 /* could destroy the bound one here, so unbind */
1927 device->pipe->bind_vs_state(device->pipe, NULL);
1928 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
1929 util_hash_table_clear(device->ff.ht_vs);
1930 device->ff.num_vs = 0;
1931 device->state.changed.group |= NINE_STATE_VS;
1932 }
1933 }
1934 static void
1935 nine_ff_prune_ps(struct NineDevice9 *device)
1936 {
1937 if (device->ff.num_ps > 100) {
1938 /* could destroy the bound one here, so unbind */
1939 device->pipe->bind_fs_state(device->pipe, NULL);
1940 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
1941 util_hash_table_clear(device->ff.ht_ps);
1942 device->ff.num_ps = 0;
1943 device->state.changed.group |= NINE_STATE_PS;
1944 }
1945 }
1946
1947 /* ========================================================================== */
1948
1949 /* Matrix multiplication:
1950 *
1951 * in memory: 0 1 2 3 (row major)
1952 * 4 5 6 7
1953 * 8 9 a b
1954 * c d e f
1955 *
1956 * cA cB cC cD
1957 * r0 = (r0 * cA) (r0 * cB) . .
1958 * r1 = (r1 * cA) (r1 * cB)
1959 * r2 = (r2 * cA) .
1960 * r3 = (r3 * cA) .
1961 *
1962 * r: (11) (12) (13) (14)
1963 * (21) (22) (23) (24)
1964 * (31) (32) (33) (34)
1965 * (41) (42) (43) (44)
1966 * l: (11 12 13 14)
1967 * (21 22 23 24)
1968 * (31 32 33 34)
1969 * (41 42 43 44)
1970 *
1971 * v: (x y z 1 )
1972 *
1973 * t.xyzw = MUL(v.xxxx, r[0]);
1974 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
1975 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
1976 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
1977 *
1978 * v.x = DP4(v, c[0]);
1979 * v.y = DP4(v, c[1]);
1980 * v.z = DP4(v, c[2]);
1981 * v.w = DP4(v, c[3]) = 1
1982 */
1983
1984 /*
1985 static void
1986 nine_D3DMATRIX_print(const D3DMATRIX *M)
1987 {
1988 DBG("\n(%f %f %f %f)\n"
1989 "(%f %f %f %f)\n"
1990 "(%f %f %f %f)\n"
1991 "(%f %f %f %f)\n",
1992 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
1993 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
1994 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
1995 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
1996 }
1997 */
1998
1999 static inline float
2000 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2001 {
2002 return A->m[r][0] * B->m[0][c] +
2003 A->m[r][1] * B->m[1][c] +
2004 A->m[r][2] * B->m[2][c] +
2005 A->m[r][3] * B->m[3][c];
2006 }
2007
2008 static inline float
2009 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2010 {
2011 return v->x * M->m[0][c] +
2012 v->y * M->m[1][c] +
2013 v->z * M->m[2][c] +
2014 1.0f * M->m[3][c];
2015 }
2016
2017 static inline float
2018 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2019 {
2020 return v->x * M->m[0][c] +
2021 v->y * M->m[1][c] +
2022 v->z * M->m[2][c];
2023 }
2024
2025 void
2026 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2027 {
2028 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2029 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2030 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2031 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2032
2033 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2034 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2035 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2036 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2037
2038 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2039 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2040 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2041 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2042
2043 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2044 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2045 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2046 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2047 }
2048
2049 void
2050 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2051 {
2052 d->x = nine_DP4_vec_col(v, M, 0);
2053 d->y = nine_DP4_vec_col(v, M, 1);
2054 d->z = nine_DP4_vec_col(v, M, 2);
2055 }
2056
2057 void
2058 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2059 {
2060 d->x = nine_DP3_vec_col(v, M, 0);
2061 d->y = nine_DP3_vec_col(v, M, 1);
2062 d->z = nine_DP3_vec_col(v, M, 2);
2063 }
2064
2065 void
2066 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2067 {
2068 unsigned i, j;
2069 for (i = 0; i < 4; ++i)
2070 for (j = 0; j < 4; ++j)
2071 D->m[i][j] = M->m[j][i];
2072 }
2073
2074 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2075 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2076 if (t > 0.0f) pos += t; else neg += t; } while(0)
2077
2078 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2079 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2080 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2081 float
2082 nine_d3d_matrix_det(const D3DMATRIX *M)
2083 {
2084 float pos = 0.0f;
2085 float neg = 0.0f;
2086
2087 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2088 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2089 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2090
2091 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2092 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2093 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2094
2095 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2096 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2097 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2098
2099 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2100 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2101 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2102
2103 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2104 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2105 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2106
2107 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2108 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2109 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2110
2111 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2112 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2113 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2114
2115 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2116 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2117 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2118
2119 return pos + neg;
2120 }
2121
2122 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2123 * I have no idea where this code came from.
2124 */
2125 void
2126 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2127 {
2128 int i, k;
2129 float det;
2130
2131 D->m[0][0] =
2132 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2133 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2134 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2135 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2136 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2137 M->m[1][3] * M->m[3][1] * M->m[2][2];
2138
2139 D->m[0][1] =
2140 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2141 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2142 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2143 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2144 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2145 M->m[0][3] * M->m[3][1] * M->m[2][2];
2146
2147 D->m[0][2] =
2148 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2149 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2150 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2151 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2152 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2153 M->m[0][3] * M->m[3][1] * M->m[1][2];
2154
2155 D->m[0][3] =
2156 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2157 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2158 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2159 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2160 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2161 M->m[0][3] * M->m[2][1] * M->m[1][2];
2162
2163 D->m[1][0] =
2164 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2165 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2166 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2167 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2168 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2169 M->m[1][3] * M->m[3][0] * M->m[2][2];
2170
2171 D->m[1][1] =
2172 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2173 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2174 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2175 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2176 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2177 M->m[0][3] * M->m[3][0] * M->m[2][2];
2178
2179 D->m[1][2] =
2180 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2181 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2182 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2183 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2184 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2185 M->m[0][3] * M->m[3][0] * M->m[1][2];
2186
2187 D->m[1][3] =
2188 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2189 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2190 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2191 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2192 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2193 M->m[0][3] * M->m[2][0] * M->m[1][2];
2194
2195 D->m[2][0] =
2196 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2197 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2198 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2199 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2200 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2201 M->m[1][3] * M->m[3][0] * M->m[2][1];
2202
2203 D->m[2][1] =
2204 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2205 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2206 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2207 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2208 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2209 M->m[0][3] * M->m[3][0] * M->m[2][1];
2210
2211 D->m[2][2] =
2212 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2213 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2214 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2215 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2216 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2217 M->m[0][3] * M->m[3][0] * M->m[1][1];
2218
2219 D->m[2][3] =
2220 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2221 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2222 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2223 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2224 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2225 M->m[0][3] * M->m[2][0] * M->m[1][1];
2226
2227 D->m[3][0] =
2228 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2229 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2230 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2231 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2232 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2233 M->m[1][2] * M->m[3][0] * M->m[2][1];
2234
2235 D->m[3][1] =
2236 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2237 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2238 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2239 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2240 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2241 M->m[0][2] * M->m[3][0] * M->m[2][1];
2242
2243 D->m[3][2] =
2244 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2245 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2246 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2247 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2248 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2249 M->m[0][2] * M->m[3][0] * M->m[1][1];
2250
2251 D->m[3][3] =
2252 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2253 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2254 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2255 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2256 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2257 M->m[0][2] * M->m[2][0] * M->m[1][1];
2258
2259 det =
2260 M->m[0][0] * D->m[0][0] +
2261 M->m[1][0] * D->m[0][1] +
2262 M->m[2][0] * D->m[0][2] +
2263 M->m[3][0] * D->m[0][3];
2264
2265 det = 1.0 / det;
2266
2267 for (i = 0; i < 4; i++)
2268 for (k = 0; k < 4; k++)
2269 D->m[i][k] *= det;
2270
2271 #ifdef DEBUG
2272 {
2273 D3DMATRIX I;
2274
2275 nine_d3d_matrix_matrix_mul(&I, D, M);
2276
2277 for (i = 0; i < 4; ++i)
2278 for (k = 0; k < 4; ++k)
2279 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2280 DBG("Matrix inversion check FAILED !\n");
2281 }
2282 #endif
2283 }
2284
2285 /* TODO: don't use 4x4 inverse, unless this gets all nicely inlined ? */
2286 void
2287 nine_d3d_matrix_inverse_3x3(D3DMATRIX *D, const D3DMATRIX *M)
2288 {
2289 D3DMATRIX T;
2290 unsigned i, j;
2291
2292 for (i = 0; i < 3; ++i)
2293 for (j = 0; j < 3; ++j)
2294 T.m[i][j] = M->m[i][j];
2295 for (i = 0; i < 3; ++i) {
2296 T.m[i][3] = 0.0f;
2297 T.m[3][i] = 0.0f;
2298 }
2299 T.m[3][3] = 1.0f;
2300
2301 nine_d3d_matrix_inverse(D, &T);
2302 }