Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git] / src / gallium / state_trackers / nine / nine_ff.c
1
2 /* FF is big and ugly so feel free to write lines as long as you like.
3 * Aieeeeeeeee !
4 *
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7 */
8
9 #include "device9.h"
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
14 #include "nine_ff.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
19
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
25 #include "util/u_upload_mgr.h"
26
27 #define NINE_TGSI_LAZY_DEVS 1
28
29 #define DBG_CHANNEL DBG_FF
30
31 #define NINE_FF_NUM_VS_CONST 256
32 #define NINE_FF_NUM_PS_CONST 24
33
34 struct fvec4
35 {
36 float x, y, z, w;
37 };
38
39 struct nine_ff_vs_key
40 {
41 union {
42 struct {
43 uint32_t position_t : 1;
44 uint32_t lighting : 1;
45 uint32_t darkness : 1; /* lighting enabled but no active lights */
46 uint32_t localviewer : 1;
47 uint32_t vertexpointsize : 1;
48 uint32_t pointscale : 1;
49 uint32_t vertexblend : 3;
50 uint32_t vertexblend_indexed : 1;
51 uint32_t vertextween : 1;
52 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
53 uint32_t mtl_ambient : 2;
54 uint32_t mtl_specular : 2;
55 uint32_t mtl_emissive : 2;
56 uint32_t fog_mode : 2;
57 uint32_t fog_range : 1;
58 uint32_t color0in_one : 1;
59 uint32_t color1in_one : 1;
60 uint32_t fog : 1;
61 uint32_t pad1 : 7;
62 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
63 uint32_t pad2 : 16;
64 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
65 uint32_t pad3 : 8;
66 uint32_t tc_gen : 24; /* 8 * 3 bits */
67 uint32_t pad4 : 8;
68 uint32_t tc_idx : 24;
69 uint32_t pad5 : 8;
70 uint32_t passthrough;
71 };
72 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
73 uint32_t value32[6];
74 };
75 };
76
77 /* Texture stage state:
78 *
79 * COLOROP D3DTOP 5 bit
80 * ALPHAOP D3DTOP 5 bit
81 * COLORARG0 D3DTA 3 bit
82 * COLORARG1 D3DTA 3 bit
83 * COLORARG2 D3DTA 3 bit
84 * ALPHAARG0 D3DTA 3 bit
85 * ALPHAARG1 D3DTA 3 bit
86 * ALPHAARG2 D3DTA 3 bit
87 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
88 * TEXCOORDINDEX 0 - 7 3 bit
89 * ===========================
90 * 32 bit per stage
91 */
92 struct nine_ff_ps_key
93 {
94 union {
95 struct {
96 struct {
97 uint32_t colorop : 5;
98 uint32_t alphaop : 5;
99 uint32_t colorarg0 : 3;
100 uint32_t colorarg1 : 3;
101 uint32_t colorarg2 : 3;
102 uint32_t alphaarg0 : 3;
103 uint32_t alphaarg1 : 3;
104 uint32_t alphaarg2 : 3;
105 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
106 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
107 uint32_t pad : 1;
108 /* that's 32 bit exactly */
109 } ts[8];
110 uint32_t projected : 16;
111 uint32_t fog : 1; /* for vFog coming from VS */
112 uint32_t fog_mode : 2;
113 uint32_t specular : 1;
114 uint32_t pad1 : 12; /* 9 32-bit words with this */
115 uint8_t colorarg_b4[3];
116 uint8_t colorarg_b5[3];
117 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
118 uint8_t pad2[3];
119 };
120 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
121 uint32_t value32[12];
122 };
123 };
124
125 static unsigned nine_ff_vs_key_hash(void *key)
126 {
127 struct nine_ff_vs_key *vs = key;
128 unsigned i;
129 uint32_t hash = vs->value32[0];
130 for (i = 1; i < Elements(vs->value32); ++i)
131 hash ^= vs->value32[i];
132 return hash;
133 }
134 static int nine_ff_vs_key_comp(void *key1, void *key2)
135 {
136 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
137 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
138
139 return memcmp(a->value64, b->value64, sizeof(a->value64));
140 }
141 static unsigned nine_ff_ps_key_hash(void *key)
142 {
143 struct nine_ff_ps_key *ps = key;
144 unsigned i;
145 uint32_t hash = ps->value32[0];
146 for (i = 1; i < Elements(ps->value32); ++i)
147 hash ^= ps->value32[i];
148 return hash;
149 }
150 static int nine_ff_ps_key_comp(void *key1, void *key2)
151 {
152 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
153 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
154
155 return memcmp(a->value64, b->value64, sizeof(a->value64));
156 }
157 static unsigned nine_ff_fvf_key_hash(void *key)
158 {
159 return *(DWORD *)key;
160 }
161 static int nine_ff_fvf_key_comp(void *key1, void *key2)
162 {
163 return *(DWORD *)key1 != *(DWORD *)key2;
164 }
165
166 static void nine_ff_prune_vs(struct NineDevice9 *);
167 static void nine_ff_prune_ps(struct NineDevice9 *);
168
169 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
170 {
171 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
172 unsigned count;
173 const struct tgsi_token *toks = ureg_get_tokens(ureg, &count);
174 tgsi_dump(toks, 0);
175 ureg_free_tokens(toks);
176 }
177 }
178
179 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
180 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
181 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
182 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
183
184 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
185 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
186 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
187 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
188
189 #define _XYZW(r) (r)
190
191 /* AL should contain base address of lights table. */
192 #define LIGHT_CONST(i) \
193 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
194
195 #define MATERIAL_CONST(i) \
196 ureg_DECL_constant(ureg, 19 + (i))
197
198 #define _CONST(n) ureg_DECL_constant(ureg, n)
199
200 /* VS FF constants layout:
201 *
202 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
203 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
204 * CONST[ 8..11] D3DTS_VIEW * D3DTS_PROJECTION
205 * CONST[12..15] D3DTS_VIEW
206 * CONST[16..18] Normal matrix
207 *
208 * CONST[19] MATERIAL.Emissive + Material.Ambient * RS.Ambient
209 * CONST[20] MATERIAL.Diffuse
210 * CONST[21] MATERIAL.Ambient
211 * CONST[22] MATERIAL.Specular
212 * CONST[23].x___ MATERIAL.Power
213 * CONST[24] MATERIAL.Emissive
214 * CONST[25] RS.Ambient
215 *
216 * CONST[26].x___ RS.PointSizeMin
217 * CONST[26]._y__ RS.PointSizeMax
218 * CONST[26].__z_ RS.PointSize
219 * CONST[26].___w RS.PointScaleA
220 * CONST[27].x___ RS.PointScaleB
221 * CONST[27]._y__ RS.PointScaleC
222 *
223 * CONST[28].x___ RS.FogEnd
224 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
225 * CONST[28].__z_ RS.FogDensity
226
227 * CONST[30].x___ TWEENFACTOR
228 *
229 * CONST[32].x___ LIGHT[0].Type
230 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
231 * CONST[33] LIGHT[0].Diffuse
232 * CONST[34] LIGHT[0].Specular
233 * CONST[35] LIGHT[0].Ambient
234 * CONST[36].xyz_ LIGHT[0].Position
235 * CONST[36].___w LIGHT[0].Range
236 * CONST[37].xyz_ LIGHT[0].Direction
237 * CONST[37].___w LIGHT[0].Falloff
238 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
239 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
240 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
241 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
242 * CONST[39].___w 1 if this is the last active light, 0 if not
243 * CONST[40] LIGHT[1]
244 * CONST[48] LIGHT[2]
245 * CONST[56] LIGHT[3]
246 * CONST[64] LIGHT[4]
247 * CONST[72] LIGHT[5]
248 * CONST[80] LIGHT[6]
249 * CONST[88] LIGHT[7]
250 * NOTE: no lighting code is generated if there are no active lights
251 *
252 * CONST[100].x___ Viewport 2/width
253 * CONST[100]._y__ Viewport 2/height
254 * CONST[100].__z_ Viewport 1/(zmax - zmin)
255 * CONST[101].x___ Viewport x0
256 * CONST[101]._y__ Viewport y0
257 * CONST[101].__z_ Viewport z0
258 *
259 * CONST[128..131] D3DTS_TEXTURE0
260 * CONST[132..135] D3DTS_TEXTURE1
261 * CONST[136..139] D3DTS_TEXTURE2
262 * CONST[140..143] D3DTS_TEXTURE3
263 * CONST[144..147] D3DTS_TEXTURE4
264 * CONST[148..151] D3DTS_TEXTURE5
265 * CONST[152..155] D3DTS_TEXTURE6
266 * CONST[156..159] D3DTS_TEXTURE7
267 *
268 * CONST[224] D3DTS_WORLDMATRIX[0]
269 * CONST[228] D3DTS_WORLDMATRIX[1]
270 * ...
271 * CONST[252] D3DTS_WORLDMATRIX[7]
272 */
273 struct vs_build_ctx
274 {
275 struct ureg_program *ureg;
276 const struct nine_ff_vs_key *key;
277
278 uint16_t input[PIPE_MAX_ATTRIBS];
279 unsigned num_inputs;
280
281 struct ureg_src aVtx;
282 struct ureg_src aNrm;
283 struct ureg_src aCol[2];
284 struct ureg_src aTex[8];
285 struct ureg_src aPsz;
286 struct ureg_src aInd;
287 struct ureg_src aWgt;
288
289 struct ureg_src aVtx1; /* tweening */
290 struct ureg_src aNrm1;
291
292 struct ureg_src mtlA;
293 struct ureg_src mtlD;
294 struct ureg_src mtlS;
295 struct ureg_src mtlE;
296 };
297
298 static inline unsigned
299 get_texcoord_sn(struct pipe_screen *screen)
300 {
301 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
302 return TGSI_SEMANTIC_TEXCOORD;
303 return TGSI_SEMANTIC_GENERIC;
304 }
305
306 static inline struct ureg_src
307 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
308 {
309 const unsigned i = vs->num_inputs++;
310 assert(i < PIPE_MAX_ATTRIBS);
311 vs->input[i] = ndecl;
312 return ureg_DECL_vs_input(vs->ureg, i);
313 }
314
315 /* NOTE: dst may alias src */
316 static inline void
317 ureg_normalize3(struct ureg_program *ureg,
318 struct ureg_dst dst, struct ureg_src src,
319 struct ureg_dst tmp)
320 {
321 #ifdef NINE_TGSI_LAZY_DEVS
322 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
323
324 ureg_DP3(ureg, tmp_x, src, src);
325 ureg_RSQ(ureg, tmp_x, _X(tmp));
326 ureg_MUL(ureg, dst, src, _X(tmp));
327 #else
328 ureg_NRM(ureg, dst, src);
329 #endif
330 }
331
332 static void *
333 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
334 {
335 const struct nine_ff_vs_key *key = vs->key;
336 struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
337 struct ureg_dst oPos, oCol[2], oPsz, oFog;
338 struct ureg_dst rVtx, rNrm;
339 struct ureg_dst r[8];
340 struct ureg_dst AR;
341 struct ureg_dst tmp, tmp_x, tmp_y, tmp_z;
342 unsigned i, c;
343 unsigned label[32], l = 0;
344 unsigned num_r = 8;
345 boolean need_rNrm = key->lighting || key->pointscale || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
346 boolean need_rVtx = key->lighting || key->fog_mode;
347 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
348
349 vs->ureg = ureg;
350
351 /* Check which inputs we should transform. */
352 for (i = 0; i < 8 * 3; i += 3) {
353 switch ((key->tc_gen >> i) & 0x3) {
354 case NINED3DTSS_TCI_CAMERASPACENORMAL:
355 need_rNrm = TRUE;
356 break;
357 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
358 need_rVtx = TRUE;
359 break;
360 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
361 need_rVtx = need_rNrm = TRUE;
362 break;
363 default:
364 break;
365 }
366 }
367
368 /* Declare and record used inputs (needed for linkage with vertex format):
369 * (texture coordinates handled later)
370 */
371 vs->aVtx = build_vs_add_input(vs,
372 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
373
374 if (need_rNrm)
375 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
376
377 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
378 vs->aCol[1] = ureg_imm1f(ureg, 1.0f);
379
380 if (key->lighting || key->darkness) {
381 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
382 key->mtl_ambient | key->mtl_emissive;
383 if ((mask & 0x1) && !key->color0in_one)
384 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
385 if ((mask & 0x2) && !key->color1in_one)
386 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
387
388 vs->mtlD = MATERIAL_CONST(1);
389 vs->mtlA = MATERIAL_CONST(2);
390 vs->mtlS = MATERIAL_CONST(3);
391 vs->mtlE = MATERIAL_CONST(5);
392 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
393 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
394 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
395 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
396 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
397 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
398 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
399 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
400 } else {
401 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
402 if (!key->color1in_one) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
403 }
404
405 if (key->vertexpointsize)
406 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
407
408 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
409 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
410 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
411 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
412 if (key->vertextween) {
413 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
414 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
415 }
416
417 /* Declare outputs:
418 */
419 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
420 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
421 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
422 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
423 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0);
424 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
425 }
426
427 if (key->vertexpointsize || key->pointscale) {
428 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
429 TGSI_WRITEMASK_X, 0, 1);
430 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
431 }
432
433 /* Declare TEMPs:
434 */
435 for (i = 0; i < num_r; ++i)
436 r[i] = ureg_DECL_local_temporary(ureg);
437 tmp = r[0];
438 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
439 tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
440 tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
441 if (key->lighting || key->vertexblend)
442 AR = ureg_DECL_address(ureg);
443
444 rVtx = ureg_writemask(r[1], TGSI_WRITEMASK_XYZ);
445 rNrm = ureg_writemask(r[2], TGSI_WRITEMASK_XYZ);
446
447 /* === Vertex transformation / vertex blending:
448 */
449 if (key->vertextween) {
450 assert(!key->vertexblend);
451 ureg_LRP(ureg, r[2], _XXXX(_CONST(30)), vs->aVtx, vs->aVtx1);
452 if (need_rNrm)
453 ureg_LRP(ureg, r[3], _XXXX(_CONST(30)), vs->aNrm, vs->aNrm1);
454 vs->aVtx = ureg_src(r[2]);
455 vs->aNrm = ureg_src(r[3]);
456 }
457
458 if (key->vertexblend) {
459 struct ureg_src cWM[4];
460
461 for (i = 224; i <= 255; ++i)
462 ureg_DECL_constant(ureg, i);
463
464 /* translate world matrix index to constant file index */
465 if (key->vertexblend_indexed) {
466 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 224.0f));
467 ureg_ARL(ureg, AR, ureg_src(tmp));
468 }
469 for (i = 0; i < key->vertexblend; ++i) {
470 for (c = 0; c < 4; ++c) {
471 cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (224 + i * 4) * !key->vertexblend_indexed + c);
472 if (key->vertexblend_indexed)
473 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
474 }
475 /* multiply by WORLD(index) */
476 ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), cWM[0]);
477 ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), cWM[1], ureg_src(r[0]));
478 ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), cWM[2], ureg_src(r[0]));
479 ureg_MAD(ureg, r[0], _WWWW(vs->aVtx), cWM[3], ureg_src(r[0]));
480
481 /* accumulate weighted position value */
482 if (i)
483 ureg_MAD(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
484 else
485 ureg_MUL(ureg, r[2], ureg_src(r[0]), ureg_scalar(vs->aWgt, 0));
486 }
487 /* multiply by VIEW_PROJ */
488 ureg_MUL(ureg, r[0], _X(r[2]), _CONST(8));
489 ureg_MAD(ureg, r[0], _Y(r[2]), _CONST(9), ureg_src(r[0]));
490 ureg_MAD(ureg, r[0], _Z(r[2]), _CONST(10), ureg_src(r[0]));
491 ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(r[0]));
492
493 if (need_rVtx)
494 vs->aVtx = ureg_src(r[2]);
495 } else
496 if (key->position_t && device->driver_caps.window_space_position_support) {
497 ureg_MOV(ureg, oPos, vs->aVtx);
498 } else if (key->position_t) {
499 /* vs->aVtx contains the coordinates buffer wise.
500 * later in the pipeline, clipping, viewport and division
501 * by w (rhw = 1/w) are going to be applied, so do the reverse
502 * of these transformations (except clipping) to have the good
503 * position at the end.*/
504 ureg_MOV(ureg, tmp, vs->aVtx);
505 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
506 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
507 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
508 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
509 /* Y needs to be reversed */
510 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
511 /* inverse rhw */
512 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
513 /* multiply X, Y, Z by w */
514 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
515 ureg_MOV(ureg, oPos, ureg_src(tmp));
516 } else {
517 /* position = vertex * WORLD_VIEW_PROJ */
518 ureg_MUL(ureg, r[0], _XXXX(vs->aVtx), _CONST(0));
519 ureg_MAD(ureg, r[0], _YYYY(vs->aVtx), _CONST(1), ureg_src(r[0]));
520 ureg_MAD(ureg, r[0], _ZZZZ(vs->aVtx), _CONST(2), ureg_src(r[0]));
521 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(r[0]));
522 }
523
524 if (need_rVtx) {
525 ureg_MUL(ureg, rVtx, _XXXX(vs->aVtx), _CONST(4));
526 ureg_MAD(ureg, rVtx, _YYYY(vs->aVtx), _CONST(5), ureg_src(rVtx));
527 ureg_MAD(ureg, rVtx, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(rVtx));
528 ureg_MAD(ureg, rVtx, _WWWW(vs->aVtx), _CONST(7), ureg_src(rVtx));
529 }
530 if (need_rNrm) {
531 ureg_MUL(ureg, rNrm, _XXXX(vs->aNrm), _CONST(16));
532 ureg_MAD(ureg, rNrm, _YYYY(vs->aNrm), _CONST(17), ureg_src(rNrm));
533 ureg_MAD(ureg, rNrm, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(rNrm));
534 ureg_normalize3(ureg, rNrm, ureg_src(rNrm), tmp);
535 }
536 /* NOTE: don't use vs->aVtx, vs->aNrm after this line */
537
538 /* === Process point size:
539 */
540 if (key->vertexpointsize) {
541 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
542 #ifdef NINE_TGSI_LAZY_DEVS
543 struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg);
544
545 ureg_MAX(ureg, tmp_clamp, vs->aPsz, _XXXX(cPsz1));
546 ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1));
547 ureg_release_temporary(ureg, tmp_clamp);
548 #else
549 ureg_CLAMP(ureg, oPsz, vs->aPsz, _XXXX(cPsz1), _YYYY(cPsz1));
550 #endif
551 } else if (key->pointscale) {
552 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
553 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
554
555 ureg_DP3(ureg, tmp_x, ureg_src(r[1]), ureg_src(r[1]));
556 ureg_SQRT(ureg, tmp_y, _X(tmp));
557 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
558 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
559 ureg_RCP(ureg, tmp_x, ureg_src(tmp));
560 ureg_MUL(ureg, tmp_x, ureg_src(tmp), _ZZZZ(cPsz1));
561 #ifdef NINE_TGSI_LAZY_DEVS
562 struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg);
563
564 ureg_MAX(ureg, tmp_clamp, _X(tmp), _XXXX(cPsz1));
565 ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1));
566 ureg_release_temporary(ureg, tmp_clamp);
567 #else
568 ureg_CLAMP(ureg, oPsz, _X(tmp), _XXXX(cPsz1), _YYYY(cPsz1));
569 #endif
570 }
571
572 for (i = 0; i < 8; ++i) {
573 struct ureg_dst oTex, input_coord, transformed, t;
574 unsigned c, writemask;
575 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
576 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
577 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
578 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
579
580 /* No texture output of index s */
581 if (tci == NINED3DTSS_TCI_DISABLE)
582 continue;
583 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
584 input_coord = r[5];
585 transformed = r[6];
586
587 /* Get the coordinate */
588 switch (tci) {
589 case NINED3DTSS_TCI_PASSTHRU:
590 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
591 * Else the idx is used only to determine wrapping mode. */
592 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
593 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
594 break;
595 case NINED3DTSS_TCI_CAMERASPACENORMAL:
596 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rNrm));
597 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
598 dim_input = 4;
599 break;
600 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
601 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx));
602 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
603 dim_input = 4;
604 break;
605 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
606 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
607 ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rNrm));
608 ureg_MUL(ureg, tmp, ureg_src(rNrm), _X(tmp));
609 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
610 ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp));
611 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
612 dim_input = 4;
613 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
614 break;
615 case NINED3DTSS_TCI_SPHEREMAP:
616 assert(!"TODO");
617 break;
618 default:
619 assert(0);
620 break;
621 }
622
623 /* Apply the transformation */
624 /* dim_output == 0 => do not transform the components.
625 * XYZRHW also disables transformation */
626 if (!dim_output || key->position_t) {
627 transformed = input_coord;
628 writemask = TGSI_WRITEMASK_XYZW;
629 } else {
630 for (c = 0; c < dim_output; c++) {
631 t = ureg_writemask(transformed, 1 << c);
632 switch (dim_input) {
633 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
634 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
635 break;
636 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
637 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
638 break;
639 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
640 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
641 break;
642 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
643 default:
644 assert(0);
645 }
646 }
647 writemask = (1 << dim_output) - 1;
648 }
649
650 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
651 }
652
653 /* === Lighting:
654 *
655 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
656 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
657 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
658 *
659 * vec3 normal = normalize(in.Normal * NormalMatrix);
660 * vec3 hitDir = light.direction;
661 * float atten = 1.0;
662 *
663 * if (light.type != DIRECTIONAL)
664 * {
665 * vec3 hitVec = light.position - eyeVertex;
666 * float d = length(hitVec);
667 * hitDir = hitVec / d;
668 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
669 * }
670 *
671 * if (light.type == SPOTLIGHT)
672 * {
673 * float rho = dp3(-hitVec, light.direction);
674 * if (rho < cos(light.phi / 2))
675 * atten = 0;
676 * if (rho < cos(light.theta / 2))
677 * atten *= pow(some_func(rho), light.falloff);
678 * }
679 *
680 * float nDotHit = dp3_sat(normal, hitVec);
681 * float powFact = 0.0;
682 *
683 * if (nDotHit > 0.0)
684 * {
685 * vec3 midVec = normalize(hitDir + eye);
686 * float nDotMid = dp3_sat(normal, midVec);
687 * pFact = pow(nDotMid, material.power);
688 * }
689 *
690 * ambient += light.ambient * atten;
691 * diffuse += light.diffuse * atten * nDotHit;
692 * specular += light.specular * atten * powFact;
693 */
694 if (key->lighting) {
695 struct ureg_dst rAtt = ureg_writemask(r[1], TGSI_WRITEMASK_W);
696 struct ureg_dst rHit = ureg_writemask(r[3], TGSI_WRITEMASK_XYZ);
697 struct ureg_dst rMid = ureg_writemask(r[4], TGSI_WRITEMASK_XYZ);
698
699 struct ureg_dst rCtr = ureg_writemask(r[2], TGSI_WRITEMASK_W);
700
701 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
702
703 /* Light.*.Alpha is not used. */
704 struct ureg_dst rD = ureg_writemask(r[5], TGSI_WRITEMASK_XYZ);
705 struct ureg_dst rA = ureg_writemask(r[6], TGSI_WRITEMASK_XYZ);
706 struct ureg_dst rS = ureg_writemask(r[7], TGSI_WRITEMASK_XYZ);
707
708 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
709
710 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
711 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
712 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
713 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
714 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
715 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
716 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
717 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
718 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
719 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
720 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
721 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
722 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
723 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
724 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
725
726 const unsigned loop_label = l++;
727
728 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
729 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
730 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
731 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
732 rD = ureg_saturate(rD);
733 rA = ureg_saturate(rA);
734 rS = ureg_saturate(rS);
735
736
737 /* loop management */
738 ureg_BGNLOOP(ureg, &label[loop_label]);
739 ureg_ARL(ureg, AL, _W(rCtr));
740
741 /* if (not DIRECTIONAL light): */
742 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
743 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
744 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
745 ureg_IF(ureg, _X(tmp), &label[l++]);
746 {
747 /* hitDir = light.position - eyeVtx
748 * d = length(hitDir)
749 * hitDir /= d
750 */
751 ureg_SUB(ureg, rHit, cLPos, ureg_src(rVtx));
752 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
753 ureg_RSQ(ureg, tmp_y, _X(tmp));
754 ureg_MUL(ureg, rHit, ureg_src(rHit), _Y(tmp)); /* normalize */
755 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
756
757 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
758 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
759 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
760 ureg_RCP(ureg, rAtt, _W(rAtt));
761 /* cut-off if distance exceeds Light.Range */
762 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
763 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
764 }
765 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
766 ureg_ENDIF(ureg);
767
768 /* if (SPOT light) */
769 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
770 ureg_IF(ureg, _X(tmp), &label[l++]);
771 {
772 /* rho = dp3(-hitDir, light.spotDir)
773 *
774 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
775 * spotAtt = 1
776 * else
777 * if (rho <= light.cphi2)
778 * spotAtt = 0
779 * else
780 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
781 */
782 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
783 ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
784 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
785 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
786 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
787 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
788 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
789 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
790 }
791 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
792 ureg_ENDIF(ureg);
793
794 /* directional factors, let's not use LIT because of clarity */
795 ureg_DP3(ureg, ureg_saturate(tmp_x), ureg_src(rNrm), ureg_src(rHit));
796 ureg_MOV(ureg, tmp_y, ureg_imm1f(ureg, 0.0f));
797 ureg_IF(ureg, _X(tmp), &label[l++]);
798 {
799 /* midVec = normalize(hitDir + eyeDir) */
800 if (key->localviewer) {
801 ureg_normalize3(ureg, rMid, ureg_src(rVtx), tmp);
802 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid)));
803 } else {
804 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
805 }
806 ureg_normalize3(ureg, rMid, ureg_src(rMid), tmp);
807 ureg_DP3(ureg, ureg_saturate(tmp_y), ureg_src(rNrm), ureg_src(rMid));
808 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
809
810 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
811 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
812 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
813 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
814 }
815 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
816 ureg_ENDIF(ureg);
817
818 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
819
820 /* break if this was the last light */
821 ureg_IF(ureg, cLLast, &label[l++]);
822 ureg_BRK(ureg);
823 ureg_ENDIF(ureg);
824 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
825
826 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
827 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
828 ureg_ENDLOOP(ureg, &label[loop_label]);
829
830 /* Set alpha factors of illumination to 1.0 for the multiplications. */
831 rD.WriteMask = TGSI_WRITEMASK_W; rD.Saturate = 0;
832 rS.WriteMask = TGSI_WRITEMASK_W; rS.Saturate = 0;
833 rA.WriteMask = TGSI_WRITEMASK_W; rA.Saturate = 0;
834 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 1.0f));
835 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 1.0f));
836
837 /* Apply to material:
838 *
839 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
840 * material.ambient * ambient +
841 * material.diffuse * diffuse +
842 * oCol[1] = material.specular * specular;
843 */
844 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
845 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 1.0f));
846 ureg_MAD(ureg, tmp, ureg_src(rA), vs->mtlA, _CONST(19));
847 } else {
848 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
849 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
850 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
851 }
852 ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
853 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
854 } else
855 /* COLOR */
856 if (key->darkness) {
857 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
858 ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
859 } else {
860 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
861 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE);
862 ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
863 }
864 ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
865 } else {
866 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
867 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
868 }
869
870 /* === Process fog.
871 *
872 * exp(x) = ex2(log2(e) * x)
873 */
874 if (key->fog_mode) {
875 if (key->position_t) {
876 ureg_MOV(ureg, ureg_saturate(tmp_x), ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
877 } else
878 if (key->fog_range) {
879 ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rVtx));
880 ureg_RSQ(ureg, tmp_z, _X(tmp));
881 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
882 } else {
883 ureg_MOV(ureg, tmp_z, ureg_abs(_Z(rVtx)));
884 }
885
886 if (key->fog_mode == D3DFOG_EXP) {
887 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
888 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
889 ureg_EX2(ureg, tmp_x, _X(tmp));
890 } else
891 if (key->fog_mode == D3DFOG_EXP2) {
892 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
893 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
894 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
895 ureg_EX2(ureg, tmp_x, _X(tmp));
896 } else
897 if (key->fog_mode == D3DFOG_LINEAR && !key->position_t) {
898 ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
899 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
900 }
901 ureg_MOV(ureg, oFog, _X(tmp));
902 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
903 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
904 }
905
906 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
907 struct ureg_src input;
908 struct ureg_dst output;
909 input = vs->aWgt;
910 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18);
911 ureg_MOV(ureg, output, input);
912 }
913 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
914 struct ureg_src input;
915 struct ureg_dst output;
916 input = vs->aInd;
917 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
918 ureg_MOV(ureg, output, input);
919 }
920 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
921 struct ureg_src input;
922 struct ureg_dst output;
923 input = vs->aNrm;
924 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
925 ureg_MOV(ureg, output, input);
926 }
927 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
928 struct ureg_src input;
929 struct ureg_dst output;
930 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
931 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
932 ureg_MOV(ureg, output, input);
933 }
934 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
935 struct ureg_src input;
936 struct ureg_dst output;
937 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
938 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
939 ureg_MOV(ureg, output, input);
940 }
941 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
942 struct ureg_src input;
943 struct ureg_dst output;
944 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
945 input = ureg_scalar(input, TGSI_SWIZZLE_X);
946 output = oFog;
947 ureg_MOV(ureg, output, input);
948 }
949 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
950 (void) 0; /* TODO: replace z of position output ? */
951 }
952
953
954 if (key->position_t && device->driver_caps.window_space_position_support)
955 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
956
957 ureg_END(ureg);
958 nine_ureg_tgsi_dump(ureg, FALSE);
959 return ureg_create_shader_and_destroy(ureg, device->pipe);
960 }
961
962 /* PS FF constants layout:
963 *
964 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
965 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
966 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
967 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
968 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
969 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
970 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
971 *
972 * CONST[20] D3DRS_TEXTUREFACTOR
973 * CONST[21] D3DRS_FOGCOLOR
974 * CONST[22].x___ RS.FogEnd
975 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
976 * CONST[22].__z_ RS.FogDensity
977 */
978 struct ps_build_ctx
979 {
980 struct ureg_program *ureg;
981
982 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
983 struct ureg_src vT[8]; /* TEXCOORD[i] */
984 struct ureg_dst r[6]; /* TEMPs */
985 struct ureg_dst rCur; /* D3DTA_CURRENT */
986 struct ureg_dst rMod;
987 struct ureg_src rCurSrc;
988 struct ureg_dst rTmp; /* D3DTA_TEMP */
989 struct ureg_src rTmpSrc;
990 struct ureg_dst rTex;
991 struct ureg_src rTexSrc;
992 struct ureg_src cBEM[8];
993 struct ureg_src s[8];
994
995 struct {
996 unsigned index;
997 unsigned index_pre_mod;
998 unsigned num_regs;
999 } stage;
1000 };
1001
1002 static struct ureg_src
1003 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1004 {
1005 struct ureg_src reg;
1006
1007 switch (ta & D3DTA_SELECTMASK) {
1008 case D3DTA_CONSTANT:
1009 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1010 break;
1011 case D3DTA_CURRENT:
1012 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1013 break;
1014 case D3DTA_DIFFUSE:
1015 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1016 break;
1017 case D3DTA_SPECULAR:
1018 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
1019 break;
1020 case D3DTA_TEMP:
1021 reg = ps->rTmpSrc;
1022 break;
1023 case D3DTA_TEXTURE:
1024 reg = ps->rTexSrc;
1025 break;
1026 case D3DTA_TFACTOR:
1027 reg = ureg_DECL_constant(ps->ureg, 20);
1028 break;
1029 default:
1030 assert(0);
1031 reg = ureg_src_undef();
1032 break;
1033 }
1034 if (ta & D3DTA_COMPLEMENT) {
1035 struct ureg_dst dst = ps->r[ps->stage.num_regs++];
1036 ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
1037 reg = ureg_src(dst);
1038 }
1039 if (ta & D3DTA_ALPHAREPLICATE)
1040 reg = _WWWW(reg);
1041 return reg;
1042 }
1043
1044 static struct ureg_dst
1045 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1046 {
1047 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1048
1049 switch (ta & D3DTA_SELECTMASK) {
1050 case D3DTA_CURRENT:
1051 return ps->rCur;
1052 case D3DTA_TEMP:
1053 return ps->rTmp;
1054 default:
1055 assert(0);
1056 return ureg_dst_undef();
1057 }
1058 }
1059
1060 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1061 {
1062 switch (top) {
1063 case D3DTOP_DISABLE:
1064 return 0x0;
1065 case D3DTOP_SELECTARG1:
1066 case D3DTOP_PREMODULATE:
1067 return 0x2;
1068 case D3DTOP_SELECTARG2:
1069 return 0x4;
1070 case D3DTOP_MULTIPLYADD:
1071 case D3DTOP_LERP:
1072 return 0x7;
1073 default:
1074 return 0x6;
1075 }
1076 }
1077
1078 static inline boolean
1079 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1080 {
1081 return !dst.WriteMask ||
1082 (dst.File == src.File &&
1083 dst.Index == src.Index &&
1084 !dst.Indirect &&
1085 !dst.Saturate &&
1086 !src.Indirect &&
1087 !src.Negate &&
1088 !src.Absolute &&
1089 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1090 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1091 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1092 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1093
1094 }
1095
1096 static void
1097 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1098 {
1099 struct ureg_program *ureg = ps->ureg;
1100 struct ureg_dst tmp = ps->r[ps->stage.num_regs];
1101 struct ureg_dst tmp2 = ps->r[ps->stage.num_regs+1];
1102 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1103
1104 tmp.WriteMask = dst.WriteMask;
1105
1106 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1107 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1108 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1109 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1110 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1111 top != D3DTOP_LERP)
1112 dst = ureg_saturate(dst);
1113
1114 switch (top) {
1115 case D3DTOP_SELECTARG1:
1116 if (!is_MOV_no_op(dst, arg[1]))
1117 ureg_MOV(ureg, dst, arg[1]);
1118 break;
1119 case D3DTOP_SELECTARG2:
1120 if (!is_MOV_no_op(dst, arg[2]))
1121 ureg_MOV(ureg, dst, arg[2]);
1122 break;
1123 case D3DTOP_MODULATE:
1124 ureg_MUL(ureg, dst, arg[1], arg[2]);
1125 break;
1126 case D3DTOP_MODULATE2X:
1127 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1128 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1129 break;
1130 case D3DTOP_MODULATE4X:
1131 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1132 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1133 break;
1134 case D3DTOP_ADD:
1135 ureg_ADD(ureg, dst, arg[1], arg[2]);
1136 break;
1137 case D3DTOP_ADDSIGNED:
1138 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1139 ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
1140 break;
1141 case D3DTOP_ADDSIGNED2X:
1142 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1143 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1144 break;
1145 case D3DTOP_SUBTRACT:
1146 ureg_SUB(ureg, dst, arg[1], arg[2]);
1147 break;
1148 case D3DTOP_ADDSMOOTH:
1149 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1150 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1151 break;
1152 case D3DTOP_BLENDDIFFUSEALPHA:
1153 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1154 break;
1155 case D3DTOP_BLENDTEXTUREALPHA:
1156 /* XXX: alpha taken from previous stage, texture or result ? */
1157 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1158 break;
1159 case D3DTOP_BLENDFACTORALPHA:
1160 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1161 break;
1162 case D3DTOP_BLENDTEXTUREALPHAPM:
1163 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
1164 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1165 break;
1166 case D3DTOP_BLENDCURRENTALPHA:
1167 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1168 break;
1169 case D3DTOP_PREMODULATE:
1170 ureg_MOV(ureg, dst, arg[1]);
1171 ps->stage.index_pre_mod = ps->stage.index + 1;
1172 break;
1173 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1174 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1175 break;
1176 case D3DTOP_MODULATECOLOR_ADDALPHA:
1177 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1178 break;
1179 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1180 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
1181 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1182 break;
1183 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1184 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1185 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1186 break;
1187 case D3DTOP_BUMPENVMAP:
1188 break;
1189 case D3DTOP_BUMPENVMAPLUMINANCE:
1190 break;
1191 case D3DTOP_DOTPRODUCT3:
1192 ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1193 ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1194 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1195 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1196 break;
1197 case D3DTOP_MULTIPLYADD:
1198 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1199 break;
1200 case D3DTOP_LERP:
1201 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1202 break;
1203 case D3DTOP_DISABLE:
1204 /* no-op ? */
1205 break;
1206 default:
1207 assert(!"invalid D3DTOP");
1208 break;
1209 }
1210 }
1211
1212 static void *
1213 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1214 {
1215 struct ps_build_ctx ps;
1216 struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
1217 struct ureg_dst oCol;
1218 unsigned i, s;
1219 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1220
1221 memset(&ps, 0, sizeof(ps));
1222 ps.ureg = ureg;
1223 ps.stage.index_pre_mod = -1;
1224
1225 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1226
1227 /* Declare all TEMPs we might need, serious drivers have a register allocator. */
1228 for (i = 0; i < Elements(ps.r); ++i)
1229 ps.r[i] = ureg_DECL_local_temporary(ureg);
1230 ps.rCur = ps.r[0];
1231 ps.rTmp = ps.r[1];
1232 ps.rTex = ps.r[2];
1233 ps.rCurSrc = ureg_src(ps.rCur);
1234 ps.rTmpSrc = ureg_src(ps.rTmp);
1235 ps.rTexSrc = ureg_src(ps.rTex);
1236
1237 for (s = 0; s < 8; ++s) {
1238 ps.s[s] = ureg_src_undef();
1239
1240 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1241 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1242 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1243 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1244 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
1245
1246 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1247 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1248 key->ts[s].colorarg2 == D3DTA_TEXTURE) {
1249 ps.s[s] = ureg_DECL_sampler(ureg, s);
1250 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1251 }
1252 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1253 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1254 ps.s[s] = ureg_DECL_sampler(ureg, s);
1255 }
1256
1257 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1258 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1259 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1260 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1261 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
1262
1263 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1264 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1265 key->ts[s].alphaarg2 == D3DTA_TEXTURE) {
1266 ps.s[s] = ureg_DECL_sampler(ureg, s);
1267 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1268 }
1269 }
1270 }
1271 if (key->specular)
1272 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_PERSPECTIVE);
1273
1274 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1275
1276 if (key->ts[0].colorop == D3DTOP_DISABLE &&
1277 key->ts[0].alphaop == D3DTOP_DISABLE)
1278 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1279 /* Or is it undefined then ? */
1280
1281 /* Run stages.
1282 */
1283 for (s = 0; s < 8; ++s) {
1284 unsigned colorarg[3];
1285 unsigned alphaarg[3];
1286 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1287 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1288 struct ureg_dst dst;
1289 struct ureg_src arg[3];
1290
1291 if (key->ts[s].colorop == D3DTOP_DISABLE &&
1292 key->ts[s].alphaop == D3DTOP_DISABLE)
1293 continue;
1294 ps.stage.index = s;
1295 ps.stage.num_regs = 3;
1296
1297 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1298 nine_D3DTOP_to_str(key->ts[s].colorop),
1299 nine_D3DTOP_to_str(key->ts[s].alphaop));
1300
1301 if (!ureg_src_is_undef(ps.s[s])) {
1302 unsigned target;
1303 switch (key->ts[s].textarget) {
1304 case 0: target = TGSI_TEXTURE_1D; break;
1305 case 1: target = TGSI_TEXTURE_2D; break;
1306 case 2: target = TGSI_TEXTURE_3D; break;
1307 case 3: target = TGSI_TEXTURE_CUBE; break;
1308 /* this is a 2 bit bitfield, do I really need a default case ? */
1309 }
1310
1311 /* sample the texture */
1312 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1313 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1314 }
1315 if (key->projected & (3 << (s *2))) {
1316 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1317 if (dim == 4)
1318 ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
1319 else {
1320 ureg_RCP(ureg, ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X), ureg_scalar(ps.vT[s], dim-1));
1321 ureg_MUL(ureg, ps.rTmp, _XXXX(ps.rTmpSrc), ps.vT[s]);
1322 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1323 }
1324 } else {
1325 ureg_TEX(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
1326 }
1327 }
1328
1329 if (s == 0 &&
1330 (key->ts[0].resultarg != 0 /* not current */ ||
1331 key->ts[0].colorop == D3DTOP_DISABLE ||
1332 key->ts[0].alphaop == D3DTOP_DISABLE ||
1333 key->ts[0].colorop == D3DTOP_BLENDCURRENTALPHA ||
1334 key->ts[0].alphaop == D3DTOP_BLENDCURRENTALPHA ||
1335 key->ts[0].colorarg0 == D3DTA_CURRENT ||
1336 key->ts[0].colorarg1 == D3DTA_CURRENT ||
1337 key->ts[0].colorarg2 == D3DTA_CURRENT ||
1338 key->ts[0].alphaarg0 == D3DTA_CURRENT ||
1339 key->ts[0].alphaarg1 == D3DTA_CURRENT ||
1340 key->ts[0].alphaarg2 == D3DTA_CURRENT)
1341 ) {
1342 /* Initialize D3DTA_CURRENT.
1343 * (Yes we can do this before the loop but not until
1344 * NVE4 has an instruction scheduling pass.)
1345 */
1346 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1347 }
1348
1349 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1350
1351 if (ps.stage.index_pre_mod == ps.stage.index) {
1352 ps.rMod = ps.r[ps.stage.num_regs++];
1353 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1354 }
1355
1356 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1357 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1358 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1359 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1360 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1361 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1362
1363 if (key->ts[s].colorop != key->ts[s].alphaop ||
1364 colorarg[0] != alphaarg[0] ||
1365 colorarg[1] != alphaarg[1] ||
1366 colorarg[2] != alphaarg[2])
1367 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1368
1369 /* Special DOTPRODUCT behaviour (see wine tests) */
1370 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1371 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1372
1373 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1374 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1375 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1376 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1377
1378 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1379 dst.WriteMask = TGSI_WRITEMASK_W;
1380
1381 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1382 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1383 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1384 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1385 }
1386 }
1387
1388 if (key->specular)
1389 ureg_ADD(ureg, ps.rCur, ps.rCurSrc, ps.vC[1]);
1390
1391 /* Fog.
1392 */
1393 if (key->fog_mode) {
1394 struct ureg_src vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, TGSI_INTERPOLATE_LINEAR);
1395 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1396 if (key->fog_mode == D3DFOG_EXP) {
1397 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1398 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1399 ureg_EX2(ureg, rFog, _X(rFog));
1400 } else
1401 if (key->fog_mode == D3DFOG_EXP2) {
1402 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1403 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1404 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1405 ureg_EX2(ureg, rFog, _X(rFog));
1406 } else
1407 if (key->fog_mode == D3DFOG_LINEAR) {
1408 ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _ZZZZ(vPos));
1409 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1410 }
1411 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1412 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1413 } else
1414 if (key->fog) {
1415 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1416 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1417 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1418 } else {
1419 ureg_MOV(ureg, oCol, ps.rCurSrc);
1420 }
1421
1422 ureg_END(ureg);
1423 nine_ureg_tgsi_dump(ureg, FALSE);
1424 return ureg_create_shader_and_destroy(ureg, device->pipe);
1425 }
1426
1427 static struct NineVertexShader9 *
1428 nine_ff_get_vs(struct NineDevice9 *device)
1429 {
1430 const struct nine_state *state = &device->state;
1431 struct NineVertexShader9 *vs;
1432 enum pipe_error err;
1433 struct vs_build_ctx bld;
1434 struct nine_ff_vs_key key;
1435 unsigned s, i;
1436 char input_texture_coord[8];
1437
1438 assert(sizeof(key) <= sizeof(key.value32));
1439
1440 memset(&key, 0, sizeof(key));
1441 memset(&bld, 0, sizeof(bld));
1442 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1443
1444 bld.key = &key;
1445
1446 /* FIXME: this shouldn't be NULL, but it is on init */
1447 if (state->vdecl) {
1448 key.color0in_one = 1;
1449 key.color1in_one = 1;
1450 for (i = 0; i < state->vdecl->nelems; i++) {
1451 uint16_t usage = state->vdecl->usage_map[i];
1452 if (usage == NINE_DECLUSAGE_POSITIONT)
1453 key.position_t = 1;
1454 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1455 key.color0in_one = 0;
1456 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1457 key.color1in_one = 0;
1458 else if (usage == NINE_DECLUSAGE_PSIZE)
1459 key.vertexpointsize = 1;
1460 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1461 s = usage / NINE_DECLUSAGE_COUNT;
1462 if (s < 8)
1463 input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
1464 else
1465 DBG("FF given texture coordinate >= 8. Ignoring\n");
1466 } else if (usage < NINE_DECLUSAGE_NONE)
1467 key.passthrough |= 1 << usage;
1468 }
1469 }
1470 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1471 * We do restrict to indices 0 */
1472 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1473 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1474 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1475 if (!key.vertexpointsize)
1476 key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE];
1477
1478 key.lighting = !!state->rs[D3DRS_LIGHTING] && state->ff.num_lights_active;
1479 key.darkness = !!state->rs[D3DRS_LIGHTING] && !state->ff.num_lights_active;
1480 if (key.position_t) {
1481 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1482 key.lighting = 0;
1483 }
1484 if ((key.lighting | key.darkness) && state->rs[D3DRS_COLORVERTEX]) {
1485 key.mtl_diffuse = state->rs[D3DRS_DIFFUSEMATERIALSOURCE];
1486 key.mtl_ambient = state->rs[D3DRS_AMBIENTMATERIALSOURCE];
1487 key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE];
1488 key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE];
1489 }
1490 key.fog = !!state->rs[D3DRS_FOGENABLE];
1491 key.fog_mode = state->rs[D3DRS_FOGENABLE] ? state->rs[D3DRS_FOGVERTEXMODE] : 0;
1492 if (key.fog_mode)
1493 key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE];
1494
1495 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1496 key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE];
1497
1498 switch (state->rs[D3DRS_VERTEXBLEND]) {
1499 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1500 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1501 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1502 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1503 case D3DVBF_TWEENING: key.vertextween = 1; break;
1504 default:
1505 assert(!"invalid D3DVBF");
1506 break;
1507 }
1508 }
1509
1510 for (s = 0; s < 8; ++s) {
1511 unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1512 unsigned dim;
1513
1514 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1515 gen = NINED3DTSS_TCI_PASSTHRU;
1516
1517 if (!input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU)
1518 gen = NINED3DTSS_TCI_DISABLE;
1519
1520 key.tc_gen |= gen << (s * 3);
1521 key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3);
1522 key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2);
1523
1524 dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1525 if (dim > 4)
1526 dim = input_texture_coord[s];
1527 if (dim == 1) /* NV behaviour */
1528 dim = 0;
1529 key.tc_dim_output |= dim << (s * 3);
1530 }
1531
1532 vs = util_hash_table_get(device->ff.ht_vs, &key);
1533 if (vs)
1534 return vs;
1535 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1536
1537 nine_ff_prune_vs(device);
1538 if (vs) {
1539 unsigned n;
1540
1541 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1542
1543 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
1544 (void)err;
1545 assert(err == PIPE_OK);
1546 device->ff.num_vs++;
1547 NineUnknown_ConvertRefToBind(NineUnknown(vs));
1548
1549 vs->num_inputs = bld.num_inputs;
1550 for (n = 0; n < bld.num_inputs; ++n)
1551 vs->input_map[n].ndecl = bld.input[n];
1552
1553 vs->position_t = key.position_t;
1554 vs->point_size = key.vertexpointsize | key.pointscale;
1555 }
1556 return vs;
1557 }
1558
1559 static struct NinePixelShader9 *
1560 nine_ff_get_ps(struct NineDevice9 *device)
1561 {
1562 struct nine_state *state = &device->state;
1563 struct NinePixelShader9 *ps;
1564 enum pipe_error err;
1565 struct nine_ff_ps_key key;
1566 unsigned s;
1567 uint8_t sampler_mask = 0;
1568
1569 assert(sizeof(key) <= sizeof(key.value32));
1570
1571 memset(&key, 0, sizeof(key));
1572 for (s = 0; s < 8; ++s) {
1573 key.ts[s].colorop = state->ff.tex_stage[s][D3DTSS_COLOROP];
1574 key.ts[s].alphaop = state->ff.tex_stage[s][D3DTSS_ALPHAOP];
1575 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. */
1576 /* ALPHAOP cannot be disabled if COLOROP is enabled. */
1577 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1578 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1579 break;
1580 }
1581
1582 if (!state->texture[s] &&
1583 state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE) {
1584 /* This should also disable the stage. */
1585 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1586 break;
1587 }
1588
1589 if (state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE)
1590 sampler_mask |= (1 << s);
1591
1592 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1593 uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1594 if (used_c & 0x1) key.ts[s].colorarg0 = state->ff.tex_stage[s][D3DTSS_COLORARG0];
1595 if (used_c & 0x2) key.ts[s].colorarg1 = state->ff.tex_stage[s][D3DTSS_COLORARG1];
1596 if (used_c & 0x4) key.ts[s].colorarg2 = state->ff.tex_stage[s][D3DTSS_COLORARG2];
1597 if (used_c & 0x1) key.colorarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s;
1598 if (used_c & 0x1) key.colorarg_b5[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s;
1599 if (used_c & 0x2) key.colorarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s;
1600 if (used_c & 0x2) key.colorarg_b5[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s;
1601 if (used_c & 0x4) key.colorarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s;
1602 if (used_c & 0x4) key.colorarg_b5[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s;
1603 }
1604 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1605 uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1606 if (used_a & 0x1) key.ts[s].alphaarg0 = state->ff.tex_stage[s][D3DTSS_ALPHAARG0];
1607 if (used_a & 0x2) key.ts[s].alphaarg1 = state->ff.tex_stage[s][D3DTSS_ALPHAARG1];
1608 if (used_a & 0x4) key.ts[s].alphaarg2 = state->ff.tex_stage[s][D3DTSS_ALPHAARG2];
1609 if (used_a & 0x1) key.alphaarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s;
1610 if (used_a & 0x2) key.alphaarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s;
1611 if (used_a & 0x4) key.alphaarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s;
1612 }
1613 key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1614
1615 if (state->texture[s]) {
1616 switch (state->texture[s]->base.type) {
1617 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1618 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1619 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1620 default:
1621 assert(!"unexpected texture type");
1622 break;
1623 }
1624 } else {
1625 key.ts[s].textarget = 1;
1626 }
1627 }
1628
1629 key.projected = nine_ff_get_projected_key(state);
1630
1631 for (; s < 8; ++s)
1632 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1633 if (state->rs[D3DRS_FOGENABLE])
1634 key.fog_mode = state->rs[D3DRS_FOGTABLEMODE];
1635 key.fog = !!state->rs[D3DRS_FOGENABLE];
1636
1637 ps = util_hash_table_get(device->ff.ht_ps, &key);
1638 if (ps)
1639 return ps;
1640 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1641
1642 nine_ff_prune_ps(device);
1643 if (ps) {
1644 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1645
1646 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
1647 (void)err;
1648 assert(err == PIPE_OK);
1649 device->ff.num_ps++;
1650 NineUnknown_ConvertRefToBind(NineUnknown(ps));
1651
1652 ps->rt_mask = 0x1;
1653 ps->sampler_mask = sampler_mask;
1654 }
1655 return ps;
1656 }
1657
1658 #define GET_D3DTS(n) nine_state_access_transform(state, D3DTS_##n, FALSE)
1659 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1660 static void
1661 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1662 {
1663 struct nine_state *state = &device->state;
1664 D3DMATRIX T;
1665 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1666 unsigned i;
1667
1668 /* TODO: make this nicer, and only upload the ones we need */
1669 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1670
1671 if (IS_D3DTS_DIRTY(state, WORLD) ||
1672 IS_D3DTS_DIRTY(state, VIEW) ||
1673 IS_D3DTS_DIRTY(state, PROJECTION)) {
1674 /* WVP, WV matrices */
1675 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1676 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1677
1678 /* normal matrix == transpose(inverse(WV)) */
1679 nine_d3d_matrix_inverse_3x3(&T, &M[1]);
1680 nine_d3d_matrix_transpose(&M[4], &T);
1681
1682 /* VP matrix */
1683 nine_d3d_matrix_matrix_mul(&M[2], GET_D3DTS(VIEW), GET_D3DTS(PROJECTION));
1684
1685 /* V and W matrix */
1686 M[3] = *GET_D3DTS(VIEW);
1687 M[56] = *GET_D3DTS(WORLD);
1688 }
1689
1690 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1691 /* load other world matrices */
1692 for (i = 1; i <= 7; ++i)
1693 M[56 + i] = *GET_D3DTS(WORLDMATRIX(i));
1694 }
1695
1696 device->ff.vs_const[30 * 4] = asfloat(state->rs[D3DRS_TWEENFACTOR]);
1697 }
1698
1699 static void
1700 nine_ff_load_lights(struct NineDevice9 *device)
1701 {
1702 struct nine_state *state = &device->state;
1703 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1704 unsigned l;
1705
1706 if (state->changed.group & NINE_STATE_FF_MATERIAL) {
1707 const D3DMATERIAL9 *mtl = &state->ff.material;
1708
1709 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1710 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1711 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1712 dst[23].x = mtl->Power;
1713 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1714 d3dcolor_to_rgba(&dst[25].x, state->rs[D3DRS_AMBIENT]);
1715 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1716 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1717 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1718 dst[19].w = mtl->Ambient.a + mtl->Emissive.a;
1719 }
1720
1721 if (!(state->changed.group & NINE_STATE_FF_LIGHTING))
1722 return;
1723
1724 for (l = 0; l < state->ff.num_lights_active; ++l) {
1725 const D3DLIGHT9 *light = &state->ff.light[state->ff.active_light[l]];
1726
1727 dst[32 + l * 8].x = light->Type;
1728 dst[32 + l * 8].y = light->Attenuation0;
1729 dst[32 + l * 8].z = light->Attenuation1;
1730 dst[32 + l * 8].w = light->Attenuation2;
1731 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1732 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1733 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1734 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1735 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1736 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1737 dst[37 + l * 8].w = light->Falloff;
1738 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1739 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1740 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1741 dst[39 + l * 8].w = (l + 1) == state->ff.num_lights_active;
1742 }
1743 }
1744
1745 static void
1746 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1747 {
1748 const struct nine_state *state = &device->state;
1749 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1750
1751 if (!(state->changed.group & NINE_STATE_FF_OTHER))
1752 return;
1753 dst[26].x = asfloat(state->rs[D3DRS_POINTSIZE_MIN]);
1754 dst[26].y = asfloat(state->rs[D3DRS_POINTSIZE_MAX]);
1755 dst[26].z = asfloat(state->rs[D3DRS_POINTSIZE]);
1756 dst[26].w = asfloat(state->rs[D3DRS_POINTSCALE_A]);
1757 dst[27].x = asfloat(state->rs[D3DRS_POINTSCALE_B]);
1758 dst[27].y = asfloat(state->rs[D3DRS_POINTSCALE_C]);
1759 dst[28].x = asfloat(state->rs[D3DRS_FOGEND]);
1760 dst[28].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1761 if (isinf(dst[28].y))
1762 dst[28].y = 0.0f;
1763 dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1764 }
1765
1766 static void
1767 nine_ff_load_tex_matrices(struct NineDevice9 *device)
1768 {
1769 struct nine_state *state = &device->state;
1770 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1771 unsigned s;
1772
1773 if (!(state->ff.changed.transform[0] & 0xff0000))
1774 return;
1775 for (s = 0; s < 8; ++s) {
1776 if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
1777 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE));
1778 }
1779 }
1780
1781 static void
1782 nine_ff_load_ps_params(struct NineDevice9 *device)
1783 {
1784 const struct nine_state *state = &device->state;
1785 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1786 unsigned s;
1787
1788 if (!(state->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER)))
1789 return;
1790
1791 for (s = 0; s < 8; ++s)
1792 d3dcolor_to_rgba(&dst[s].x, state->ff.tex_stage[s][D3DTSS_CONSTANT]);
1793
1794 for (s = 0; s < 8; ++s) {
1795 dst[8 + s].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
1796 dst[8 + s].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
1797 dst[8 + s].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
1798 dst[8 + s].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
1799 if (s & 1) {
1800 dst[8 + s / 2].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1801 dst[8 + s / 2].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1802 } else {
1803 dst[8 + s / 2].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1804 dst[8 + s / 2].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1805 }
1806 }
1807
1808 d3dcolor_to_rgba(&dst[20].x, state->rs[D3DRS_TEXTUREFACTOR]);
1809 d3dcolor_to_rgba(&dst[21].x, state->rs[D3DRS_FOGCOLOR]);
1810 dst[22].x = asfloat(state->rs[D3DRS_FOGEND]);
1811 dst[22].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1812 dst[22].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1813 }
1814
1815 static void
1816 nine_ff_load_viewport_info(struct NineDevice9 *device)
1817 {
1818 D3DVIEWPORT9 *viewport = &device->state.viewport;
1819 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1820 float diffZ = viewport->MaxZ - viewport->MinZ;
1821
1822 /* Note: the other functions avoids to fill the const again if nothing changed.
1823 * But we don't have much to fill, and adding code to allow that may be complex
1824 * so just fill it always */
1825 dst[100].x = 2.0f / (float)(viewport->Width);
1826 dst[100].y = 2.0f / (float)(viewport->Height);
1827 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
1828 dst[101].x = (float)(viewport->X);
1829 dst[101].y = (float)(viewport->Y);
1830 dst[101].z = (float)(viewport->MinZ);
1831 }
1832
1833 void
1834 nine_ff_update(struct NineDevice9 *device)
1835 {
1836 struct nine_state *state = &device->state;
1837 struct pipe_constant_buffer cb;
1838
1839 DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
1840
1841 /* NOTE: the only reference belongs to the hash table */
1842 if (!device->state.vs) {
1843 device->ff.vs = nine_ff_get_vs(device);
1844 device->state.changed.group |= NINE_STATE_VS;
1845 }
1846 if (!device->state.ps) {
1847 device->ff.ps = nine_ff_get_ps(device);
1848 device->state.changed.group |= NINE_STATE_PS;
1849 }
1850
1851 if (!device->state.vs) {
1852 nine_ff_load_vs_transforms(device);
1853 nine_ff_load_tex_matrices(device);
1854 nine_ff_load_lights(device);
1855 nine_ff_load_point_and_fog_params(device);
1856 nine_ff_load_viewport_info(device);
1857
1858 memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
1859
1860 cb.buffer_offset = 0;
1861 cb.buffer = NULL;
1862 cb.user_buffer = device->ff.vs_const;
1863 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
1864
1865 if (!device->driver_caps.user_cbufs) {
1866 u_upload_data(device->constbuf_uploader,
1867 0,
1868 cb.buffer_size,
1869 cb.user_buffer,
1870 &cb.buffer_offset,
1871 &cb.buffer);
1872 u_upload_unmap(device->constbuf_uploader);
1873 cb.user_buffer = NULL;
1874 }
1875 state->pipe.cb_vs_ff = cb;
1876 state->commit |= NINE_STATE_COMMIT_CONST_VS;
1877 }
1878
1879 if (!device->state.ps) {
1880 nine_ff_load_ps_params(device);
1881
1882 cb.buffer_offset = 0;
1883 cb.buffer = NULL;
1884 cb.user_buffer = device->ff.ps_const;
1885 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
1886
1887 if (!device->driver_caps.user_cbufs) {
1888 u_upload_data(device->constbuf_uploader,
1889 0,
1890 cb.buffer_size,
1891 cb.user_buffer,
1892 &cb.buffer_offset,
1893 &cb.buffer);
1894 u_upload_unmap(device->constbuf_uploader);
1895 cb.user_buffer = NULL;
1896 }
1897 state->pipe.cb_ps_ff = cb;
1898 state->commit |= NINE_STATE_COMMIT_CONST_PS;
1899 }
1900
1901 device->state.changed.group &= ~NINE_STATE_FF;
1902 }
1903
1904
1905 boolean
1906 nine_ff_init(struct NineDevice9 *device)
1907 {
1908 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash,
1909 nine_ff_vs_key_comp);
1910 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash,
1911 nine_ff_ps_key_comp);
1912
1913 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash,
1914 nine_ff_fvf_key_comp);
1915
1916 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
1917 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
1918
1919 return device->ff.ht_vs && device->ff.ht_ps &&
1920 device->ff.ht_fvf &&
1921 device->ff.vs_const && device->ff.ps_const;
1922 }
1923
1924 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
1925 {
1926 NineUnknown_Unbind(NineUnknown(value));
1927 return PIPE_OK;
1928 }
1929
1930 void
1931 nine_ff_fini(struct NineDevice9 *device)
1932 {
1933 if (device->ff.ht_vs) {
1934 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
1935 util_hash_table_destroy(device->ff.ht_vs);
1936 }
1937 if (device->ff.ht_ps) {
1938 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
1939 util_hash_table_destroy(device->ff.ht_ps);
1940 }
1941 if (device->ff.ht_fvf) {
1942 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
1943 util_hash_table_destroy(device->ff.ht_fvf);
1944 }
1945 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
1946 device->ff.ps = NULL;
1947
1948 FREE(device->ff.vs_const);
1949 FREE(device->ff.ps_const);
1950 }
1951
1952 static void
1953 nine_ff_prune_vs(struct NineDevice9 *device)
1954 {
1955 if (device->ff.num_vs > 100) {
1956 /* could destroy the bound one here, so unbind */
1957 device->pipe->bind_vs_state(device->pipe, NULL);
1958 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
1959 util_hash_table_clear(device->ff.ht_vs);
1960 device->ff.num_vs = 0;
1961 device->state.changed.group |= NINE_STATE_VS;
1962 }
1963 }
1964 static void
1965 nine_ff_prune_ps(struct NineDevice9 *device)
1966 {
1967 if (device->ff.num_ps > 100) {
1968 /* could destroy the bound one here, so unbind */
1969 device->pipe->bind_fs_state(device->pipe, NULL);
1970 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
1971 util_hash_table_clear(device->ff.ht_ps);
1972 device->ff.num_ps = 0;
1973 device->state.changed.group |= NINE_STATE_PS;
1974 }
1975 }
1976
1977 /* ========================================================================== */
1978
1979 /* Matrix multiplication:
1980 *
1981 * in memory: 0 1 2 3 (row major)
1982 * 4 5 6 7
1983 * 8 9 a b
1984 * c d e f
1985 *
1986 * cA cB cC cD
1987 * r0 = (r0 * cA) (r0 * cB) . .
1988 * r1 = (r1 * cA) (r1 * cB)
1989 * r2 = (r2 * cA) .
1990 * r3 = (r3 * cA) .
1991 *
1992 * r: (11) (12) (13) (14)
1993 * (21) (22) (23) (24)
1994 * (31) (32) (33) (34)
1995 * (41) (42) (43) (44)
1996 * l: (11 12 13 14)
1997 * (21 22 23 24)
1998 * (31 32 33 34)
1999 * (41 42 43 44)
2000 *
2001 * v: (x y z 1 )
2002 *
2003 * t.xyzw = MUL(v.xxxx, r[0]);
2004 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2005 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2006 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2007 *
2008 * v.x = DP4(v, c[0]);
2009 * v.y = DP4(v, c[1]);
2010 * v.z = DP4(v, c[2]);
2011 * v.w = DP4(v, c[3]) = 1
2012 */
2013
2014 /*
2015 static void
2016 nine_D3DMATRIX_print(const D3DMATRIX *M)
2017 {
2018 DBG("\n(%f %f %f %f)\n"
2019 "(%f %f %f %f)\n"
2020 "(%f %f %f %f)\n"
2021 "(%f %f %f %f)\n",
2022 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2023 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2024 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2025 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2026 }
2027 */
2028
2029 static inline float
2030 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2031 {
2032 return A->m[r][0] * B->m[0][c] +
2033 A->m[r][1] * B->m[1][c] +
2034 A->m[r][2] * B->m[2][c] +
2035 A->m[r][3] * B->m[3][c];
2036 }
2037
2038 static inline float
2039 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2040 {
2041 return v->x * M->m[0][c] +
2042 v->y * M->m[1][c] +
2043 v->z * M->m[2][c] +
2044 1.0f * M->m[3][c];
2045 }
2046
2047 static inline float
2048 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2049 {
2050 return v->x * M->m[0][c] +
2051 v->y * M->m[1][c] +
2052 v->z * M->m[2][c];
2053 }
2054
2055 void
2056 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2057 {
2058 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2059 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2060 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2061 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2062
2063 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2064 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2065 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2066 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2067
2068 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2069 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2070 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2071 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2072
2073 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2074 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2075 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2076 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2077 }
2078
2079 void
2080 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2081 {
2082 d->x = nine_DP4_vec_col(v, M, 0);
2083 d->y = nine_DP4_vec_col(v, M, 1);
2084 d->z = nine_DP4_vec_col(v, M, 2);
2085 }
2086
2087 void
2088 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2089 {
2090 d->x = nine_DP3_vec_col(v, M, 0);
2091 d->y = nine_DP3_vec_col(v, M, 1);
2092 d->z = nine_DP3_vec_col(v, M, 2);
2093 }
2094
2095 void
2096 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2097 {
2098 unsigned i, j;
2099 for (i = 0; i < 4; ++i)
2100 for (j = 0; j < 4; ++j)
2101 D->m[i][j] = M->m[j][i];
2102 }
2103
2104 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2105 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2106 if (t > 0.0f) pos += t; else neg += t; } while(0)
2107
2108 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2109 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2110 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2111 float
2112 nine_d3d_matrix_det(const D3DMATRIX *M)
2113 {
2114 float pos = 0.0f;
2115 float neg = 0.0f;
2116
2117 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2118 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2119 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2120
2121 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2122 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2123 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2124
2125 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2126 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2127 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2128
2129 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2130 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2131 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2132
2133 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2134 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2135 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2136
2137 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2138 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2139 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2140
2141 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2142 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2143 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2144
2145 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2146 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2147 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2148
2149 return pos + neg;
2150 }
2151
2152 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2153 * I have no idea where this code came from.
2154 */
2155 void
2156 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2157 {
2158 int i, k;
2159 float det;
2160
2161 D->m[0][0] =
2162 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2163 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2164 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2165 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2166 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2167 M->m[1][3] * M->m[3][1] * M->m[2][2];
2168
2169 D->m[0][1] =
2170 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2171 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2172 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2173 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2174 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2175 M->m[0][3] * M->m[3][1] * M->m[2][2];
2176
2177 D->m[0][2] =
2178 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2179 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2180 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2181 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2182 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2183 M->m[0][3] * M->m[3][1] * M->m[1][2];
2184
2185 D->m[0][3] =
2186 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2187 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2188 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2189 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2190 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2191 M->m[0][3] * M->m[2][1] * M->m[1][2];
2192
2193 D->m[1][0] =
2194 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2195 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2196 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2197 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2198 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2199 M->m[1][3] * M->m[3][0] * M->m[2][2];
2200
2201 D->m[1][1] =
2202 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2203 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2204 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2205 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2206 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2207 M->m[0][3] * M->m[3][0] * M->m[2][2];
2208
2209 D->m[1][2] =
2210 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2211 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2212 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2213 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2214 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2215 M->m[0][3] * M->m[3][0] * M->m[1][2];
2216
2217 D->m[1][3] =
2218 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2219 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2220 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2221 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2222 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2223 M->m[0][3] * M->m[2][0] * M->m[1][2];
2224
2225 D->m[2][0] =
2226 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2227 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2228 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2229 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2230 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2231 M->m[1][3] * M->m[3][0] * M->m[2][1];
2232
2233 D->m[2][1] =
2234 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2235 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2236 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2237 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2238 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2239 M->m[0][3] * M->m[3][0] * M->m[2][1];
2240
2241 D->m[2][2] =
2242 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2243 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2244 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2245 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2246 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2247 M->m[0][3] * M->m[3][0] * M->m[1][1];
2248
2249 D->m[2][3] =
2250 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2251 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2252 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2253 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2254 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2255 M->m[0][3] * M->m[2][0] * M->m[1][1];
2256
2257 D->m[3][0] =
2258 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2259 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2260 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2261 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2262 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2263 M->m[1][2] * M->m[3][0] * M->m[2][1];
2264
2265 D->m[3][1] =
2266 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2267 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2268 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2269 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2270 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2271 M->m[0][2] * M->m[3][0] * M->m[2][1];
2272
2273 D->m[3][2] =
2274 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2275 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2276 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2277 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2278 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2279 M->m[0][2] * M->m[3][0] * M->m[1][1];
2280
2281 D->m[3][3] =
2282 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2283 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2284 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2285 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2286 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2287 M->m[0][2] * M->m[2][0] * M->m[1][1];
2288
2289 det =
2290 M->m[0][0] * D->m[0][0] +
2291 M->m[1][0] * D->m[0][1] +
2292 M->m[2][0] * D->m[0][2] +
2293 M->m[3][0] * D->m[0][3];
2294
2295 det = 1.0 / det;
2296
2297 for (i = 0; i < 4; i++)
2298 for (k = 0; k < 4; k++)
2299 D->m[i][k] *= det;
2300
2301 #ifdef DEBUG
2302 {
2303 D3DMATRIX I;
2304
2305 nine_d3d_matrix_matrix_mul(&I, D, M);
2306
2307 for (i = 0; i < 4; ++i)
2308 for (k = 0; k < 4; ++k)
2309 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2310 DBG("Matrix inversion check FAILED !\n");
2311 }
2312 #endif
2313 }
2314
2315 /* TODO: don't use 4x4 inverse, unless this gets all nicely inlined ? */
2316 void
2317 nine_d3d_matrix_inverse_3x3(D3DMATRIX *D, const D3DMATRIX *M)
2318 {
2319 D3DMATRIX T;
2320 unsigned i, j;
2321
2322 for (i = 0; i < 3; ++i)
2323 for (j = 0; j < 3; ++j)
2324 T.m[i][j] = M->m[i][j];
2325 for (i = 0; i < 3; ++i) {
2326 T.m[i][3] = 0.0f;
2327 T.m[3][i] = 0.0f;
2328 }
2329 T.m[3][3] = 1.0f;
2330
2331 nine_d3d_matrix_inverse(D, &T);
2332 }