a5466a7bdd4d00d92cac48a54f805b880de0fc09
[mesa.git] / src / gallium / state_trackers / nine / nine_ff.c
1
2 /* FF is big and ugly so feel free to write lines as long as you like.
3 * Aieeeeeeeee !
4 *
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7 */
8
9 #include "device9.h"
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
14 #include "nine_ff.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
19
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
25 #include "util/u_upload_mgr.h"
26
27 #define NINE_TGSI_LAZY_DEVS 1
28
29 #define DBG_CHANNEL DBG_FF
30
31 #define NINE_FF_NUM_VS_CONST 256
32 #define NINE_FF_NUM_PS_CONST 24
33
34 struct fvec4
35 {
36 float x, y, z, w;
37 };
38
39 struct nine_ff_vs_key
40 {
41 union {
42 struct {
43 uint32_t position_t : 1;
44 uint32_t lighting : 1;
45 uint32_t darkness : 1; /* lighting enabled but no active lights */
46 uint32_t localviewer : 1;
47 uint32_t vertexpointsize : 1;
48 uint32_t pointscale : 1;
49 uint32_t vertexblend : 3;
50 uint32_t vertexblend_indexed : 1;
51 uint32_t vertextween : 1;
52 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
53 uint32_t mtl_ambient : 2;
54 uint32_t mtl_specular : 2;
55 uint32_t mtl_emissive : 2;
56 uint32_t fog_mode : 2;
57 uint32_t fog_range : 1;
58 uint32_t color0in_one : 1;
59 uint32_t color1in_one : 1;
60 uint32_t fog : 1;
61 uint32_t specular_enable : 1;
62 uint32_t pad1 : 6;
63 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
64 uint32_t pad2 : 16;
65 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
66 uint32_t pad3 : 8;
67 uint32_t tc_gen : 24; /* 8 * 3 bits */
68 uint32_t pad4 : 8;
69 uint32_t tc_idx : 24;
70 uint32_t pad5 : 8;
71 uint32_t passthrough;
72 };
73 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
74 uint32_t value32[6];
75 };
76 };
77
78 /* Texture stage state:
79 *
80 * COLOROP D3DTOP 5 bit
81 * ALPHAOP D3DTOP 5 bit
82 * COLORARG0 D3DTA 3 bit
83 * COLORARG1 D3DTA 3 bit
84 * COLORARG2 D3DTA 3 bit
85 * ALPHAARG0 D3DTA 3 bit
86 * ALPHAARG1 D3DTA 3 bit
87 * ALPHAARG2 D3DTA 3 bit
88 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
89 * TEXCOORDINDEX 0 - 7 3 bit
90 * ===========================
91 * 32 bit per stage
92 */
93 struct nine_ff_ps_key
94 {
95 union {
96 struct {
97 struct {
98 uint32_t colorop : 5;
99 uint32_t alphaop : 5;
100 uint32_t colorarg0 : 3;
101 uint32_t colorarg1 : 3;
102 uint32_t colorarg2 : 3;
103 uint32_t alphaarg0 : 3;
104 uint32_t alphaarg1 : 3;
105 uint32_t alphaarg2 : 3;
106 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
107 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
108 uint32_t pad : 1;
109 /* that's 32 bit exactly */
110 } ts[8];
111 uint32_t projected : 16;
112 uint32_t fog : 1; /* for vFog coming from VS */
113 uint32_t fog_mode : 2;
114 uint32_t specular : 1;
115 uint32_t pad1 : 12; /* 9 32-bit words with this */
116 uint8_t colorarg_b4[3];
117 uint8_t colorarg_b5[3];
118 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
119 uint8_t pad2[3];
120 };
121 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
122 uint32_t value32[12];
123 };
124 };
125
126 static unsigned nine_ff_vs_key_hash(void *key)
127 {
128 struct nine_ff_vs_key *vs = key;
129 unsigned i;
130 uint32_t hash = vs->value32[0];
131 for (i = 1; i < Elements(vs->value32); ++i)
132 hash ^= vs->value32[i];
133 return hash;
134 }
135 static int nine_ff_vs_key_comp(void *key1, void *key2)
136 {
137 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
138 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
139
140 return memcmp(a->value64, b->value64, sizeof(a->value64));
141 }
142 static unsigned nine_ff_ps_key_hash(void *key)
143 {
144 struct nine_ff_ps_key *ps = key;
145 unsigned i;
146 uint32_t hash = ps->value32[0];
147 for (i = 1; i < Elements(ps->value32); ++i)
148 hash ^= ps->value32[i];
149 return hash;
150 }
151 static int nine_ff_ps_key_comp(void *key1, void *key2)
152 {
153 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
154 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
155
156 return memcmp(a->value64, b->value64, sizeof(a->value64));
157 }
158 static unsigned nine_ff_fvf_key_hash(void *key)
159 {
160 return *(DWORD *)key;
161 }
162 static int nine_ff_fvf_key_comp(void *key1, void *key2)
163 {
164 return *(DWORD *)key1 != *(DWORD *)key2;
165 }
166
167 static void nine_ff_prune_vs(struct NineDevice9 *);
168 static void nine_ff_prune_ps(struct NineDevice9 *);
169
170 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
171 {
172 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
173 unsigned count;
174 const struct tgsi_token *toks = ureg_get_tokens(ureg, &count);
175 tgsi_dump(toks, 0);
176 ureg_free_tokens(toks);
177 }
178 }
179
180 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
181 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
182 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
183 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
184
185 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
186 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
187 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
188 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
189
190 #define _XYZW(r) (r)
191
192 /* AL should contain base address of lights table. */
193 #define LIGHT_CONST(i) \
194 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
195
196 #define MATERIAL_CONST(i) \
197 ureg_DECL_constant(ureg, 19 + (i))
198
199 #define _CONST(n) ureg_DECL_constant(ureg, n)
200
201 /* VS FF constants layout:
202 *
203 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
204 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
205 * CONST[ 8..11] D3DTS_VIEW * D3DTS_PROJECTION
206 * CONST[12..15] D3DTS_VIEW
207 * CONST[16..18] Normal matrix
208 *
209 * CONST[19] MATERIAL.Emissive + Material.Ambient * RS.Ambient
210 * CONST[20] MATERIAL.Diffuse
211 * CONST[21] MATERIAL.Ambient
212 * CONST[22] MATERIAL.Specular
213 * CONST[23].x___ MATERIAL.Power
214 * CONST[24] MATERIAL.Emissive
215 * CONST[25] RS.Ambient
216 *
217 * CONST[26].x___ RS.PointSizeMin
218 * CONST[26]._y__ RS.PointSizeMax
219 * CONST[26].__z_ RS.PointSize
220 * CONST[26].___w RS.PointScaleA
221 * CONST[27].x___ RS.PointScaleB
222 * CONST[27]._y__ RS.PointScaleC
223 *
224 * CONST[28].x___ RS.FogEnd
225 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
226 * CONST[28].__z_ RS.FogDensity
227
228 * CONST[30].x___ TWEENFACTOR
229 *
230 * CONST[32].x___ LIGHT[0].Type
231 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
232 * CONST[33] LIGHT[0].Diffuse
233 * CONST[34] LIGHT[0].Specular
234 * CONST[35] LIGHT[0].Ambient
235 * CONST[36].xyz_ LIGHT[0].Position
236 * CONST[36].___w LIGHT[0].Range
237 * CONST[37].xyz_ LIGHT[0].Direction
238 * CONST[37].___w LIGHT[0].Falloff
239 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
240 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
241 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
242 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
243 * CONST[39].___w 1 if this is the last active light, 0 if not
244 * CONST[40] LIGHT[1]
245 * CONST[48] LIGHT[2]
246 * CONST[56] LIGHT[3]
247 * CONST[64] LIGHT[4]
248 * CONST[72] LIGHT[5]
249 * CONST[80] LIGHT[6]
250 * CONST[88] LIGHT[7]
251 * NOTE: no lighting code is generated if there are no active lights
252 *
253 * CONST[100].x___ Viewport 2/width
254 * CONST[100]._y__ Viewport 2/height
255 * CONST[100].__z_ Viewport 1/(zmax - zmin)
256 * CONST[101].x___ Viewport x0
257 * CONST[101]._y__ Viewport y0
258 * CONST[101].__z_ Viewport z0
259 *
260 * CONST[128..131] D3DTS_TEXTURE0
261 * CONST[132..135] D3DTS_TEXTURE1
262 * CONST[136..139] D3DTS_TEXTURE2
263 * CONST[140..143] D3DTS_TEXTURE3
264 * CONST[144..147] D3DTS_TEXTURE4
265 * CONST[148..151] D3DTS_TEXTURE5
266 * CONST[152..155] D3DTS_TEXTURE6
267 * CONST[156..159] D3DTS_TEXTURE7
268 *
269 * CONST[224] D3DTS_WORLDMATRIX[0]
270 * CONST[228] D3DTS_WORLDMATRIX[1]
271 * ...
272 * CONST[252] D3DTS_WORLDMATRIX[7]
273 */
274 struct vs_build_ctx
275 {
276 struct ureg_program *ureg;
277 const struct nine_ff_vs_key *key;
278
279 uint16_t input[PIPE_MAX_ATTRIBS];
280 unsigned num_inputs;
281
282 struct ureg_src aVtx;
283 struct ureg_src aNrm;
284 struct ureg_src aCol[2];
285 struct ureg_src aTex[8];
286 struct ureg_src aPsz;
287 struct ureg_src aInd;
288 struct ureg_src aWgt;
289
290 struct ureg_src aVtx1; /* tweening */
291 struct ureg_src aNrm1;
292
293 struct ureg_src mtlA;
294 struct ureg_src mtlD;
295 struct ureg_src mtlS;
296 struct ureg_src mtlE;
297 };
298
299 static inline unsigned
300 get_texcoord_sn(struct pipe_screen *screen)
301 {
302 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
303 return TGSI_SEMANTIC_TEXCOORD;
304 return TGSI_SEMANTIC_GENERIC;
305 }
306
307 static inline struct ureg_src
308 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
309 {
310 const unsigned i = vs->num_inputs++;
311 assert(i < PIPE_MAX_ATTRIBS);
312 vs->input[i] = ndecl;
313 return ureg_DECL_vs_input(vs->ureg, i);
314 }
315
316 /* NOTE: dst may alias src */
317 static inline void
318 ureg_normalize3(struct ureg_program *ureg,
319 struct ureg_dst dst, struct ureg_src src,
320 struct ureg_dst tmp)
321 {
322 #ifdef NINE_TGSI_LAZY_DEVS
323 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
324
325 ureg_DP3(ureg, tmp_x, src, src);
326 ureg_RSQ(ureg, tmp_x, _X(tmp));
327 ureg_MUL(ureg, dst, src, _X(tmp));
328 #else
329 ureg_NRM(ureg, dst, src);
330 #endif
331 }
332
333 static void *
334 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
335 {
336 const struct nine_ff_vs_key *key = vs->key;
337 struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_VERTEX);
338 struct ureg_dst oPos, oCol[2], oPsz, oFog;
339 struct ureg_dst rVtx, rNrm;
340 struct ureg_dst r[8];
341 struct ureg_dst AR;
342 struct ureg_dst tmp, tmp_x, tmp_y, tmp_z;
343 unsigned i, c;
344 unsigned label[32], l = 0;
345 unsigned num_r = 8;
346 boolean need_rNrm = key->lighting || key->pointscale || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
347 boolean need_rVtx = key->lighting || key->fog_mode;
348 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
349
350 vs->ureg = ureg;
351
352 /* Check which inputs we should transform. */
353 for (i = 0; i < 8 * 3; i += 3) {
354 switch ((key->tc_gen >> i) & 0x3) {
355 case NINED3DTSS_TCI_CAMERASPACENORMAL:
356 need_rNrm = TRUE;
357 break;
358 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
359 need_rVtx = TRUE;
360 break;
361 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
362 need_rVtx = need_rNrm = TRUE;
363 break;
364 default:
365 break;
366 }
367 }
368
369 /* Declare and record used inputs (needed for linkage with vertex format):
370 * (texture coordinates handled later)
371 */
372 vs->aVtx = build_vs_add_input(vs,
373 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
374
375 if (need_rNrm)
376 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
377
378 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
379 vs->aCol[1] = ureg_imm1f(ureg, 1.0f);
380
381 if (key->lighting || key->darkness) {
382 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
383 key->mtl_ambient | key->mtl_emissive;
384 if ((mask & 0x1) && !key->color0in_one)
385 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
386 if ((mask & 0x2) && !key->color1in_one)
387 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
388
389 vs->mtlD = MATERIAL_CONST(1);
390 vs->mtlA = MATERIAL_CONST(2);
391 vs->mtlS = MATERIAL_CONST(3);
392 vs->mtlE = MATERIAL_CONST(5);
393 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
394 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
395 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
396 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
397 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
398 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
399 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
400 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
401 } else {
402 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
403 if (!key->color1in_one) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
404 }
405
406 if (key->vertexpointsize)
407 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
408
409 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
410 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
411 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
412 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
413 if (key->vertextween) {
414 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
415 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
416 }
417
418 /* Declare outputs:
419 */
420 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
421 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
422 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
423 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
424 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0);
425 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
426 }
427
428 if (key->vertexpointsize || key->pointscale) {
429 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
430 TGSI_WRITEMASK_X, 0, 1);
431 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
432 }
433
434 /* Declare TEMPs:
435 */
436 for (i = 0; i < num_r; ++i)
437 r[i] = ureg_DECL_local_temporary(ureg);
438 tmp = r[0];
439 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
440 tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
441 tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
442 if (key->lighting || key->vertexblend)
443 AR = ureg_DECL_address(ureg);
444
445 rVtx = ureg_writemask(r[1], TGSI_WRITEMASK_XYZ);
446 rNrm = ureg_writemask(r[2], TGSI_WRITEMASK_XYZ);
447
448 /* === Vertex transformation / vertex blending:
449 */
450 if (key->vertextween) {
451 assert(!key->vertexblend);
452 ureg_LRP(ureg, r[2], _XXXX(_CONST(30)), vs->aVtx, vs->aVtx1);
453 if (need_rNrm)
454 ureg_LRP(ureg, r[3], _XXXX(_CONST(30)), vs->aNrm, vs->aNrm1);
455 vs->aVtx = ureg_src(r[2]);
456 vs->aNrm = ureg_src(r[3]);
457 }
458
459 if (key->vertexblend) {
460 struct ureg_src cWM[4];
461
462 for (i = 224; i <= 255; ++i)
463 ureg_DECL_constant(ureg, i);
464
465 /* translate world matrix index to constant file index */
466 if (key->vertexblend_indexed) {
467 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 224.0f));
468 ureg_ARL(ureg, AR, ureg_src(tmp));
469 }
470
471 ureg_MOV(ureg, r[2], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
472 ureg_MOV(ureg, r[3], ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
473
474 for (i = 0; i < key->vertexblend; ++i) {
475 for (c = 0; c < 4; ++c) {
476 cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (224 + i * 4) * !key->vertexblend_indexed + c);
477 if (key->vertexblend_indexed)
478 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
479 }
480 /* multiply by WORLD(index) */
481 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
482 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
483 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
484 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
485
486 if (i < (key->vertexblend - 1)) {
487 /* accumulate weighted position value */
488 ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(r[2]));
489 /* subtract weighted position value for last value */
490 ureg_SUB(ureg, r[3], ureg_src(r[3]), ureg_scalar(vs->aWgt, i));
491 }
492 }
493
494 /* the last weighted position is always 1 - sum_of_previous_weights */
495 ureg_MAD(ureg, r[2], ureg_src(tmp), ureg_scalar(ureg_src(r[3]), key->vertexblend - 1), ureg_src(r[2]));
496
497 /* multiply by VIEW_PROJ */
498 ureg_MUL(ureg, tmp, _X(r[2]), _CONST(8));
499 ureg_MAD(ureg, tmp, _Y(r[2]), _CONST(9), ureg_src(tmp));
500 ureg_MAD(ureg, tmp, _Z(r[2]), _CONST(10), ureg_src(tmp));
501 ureg_MAD(ureg, oPos, _W(r[2]), _CONST(11), ureg_src(tmp));
502
503 if (need_rVtx)
504 vs->aVtx = ureg_src(r[2]);
505 } else
506 if (key->position_t && device->driver_caps.window_space_position_support) {
507 ureg_MOV(ureg, oPos, vs->aVtx);
508 } else if (key->position_t) {
509 /* vs->aVtx contains the coordinates buffer wise.
510 * later in the pipeline, clipping, viewport and division
511 * by w (rhw = 1/w) are going to be applied, so do the reverse
512 * of these transformations (except clipping) to have the good
513 * position at the end.*/
514 ureg_MOV(ureg, tmp, vs->aVtx);
515 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
516 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
517 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
518 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
519 /* Y needs to be reversed */
520 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
521 /* inverse rhw */
522 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
523 /* multiply X, Y, Z by w */
524 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
525 ureg_MOV(ureg, oPos, ureg_src(tmp));
526 } else {
527 /* position = vertex * WORLD_VIEW_PROJ */
528 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
529 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
530 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
531 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
532 }
533
534 if (need_rVtx) {
535 ureg_MUL(ureg, rVtx, _XXXX(vs->aVtx), _CONST(4));
536 ureg_MAD(ureg, rVtx, _YYYY(vs->aVtx), _CONST(5), ureg_src(rVtx));
537 ureg_MAD(ureg, rVtx, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(rVtx));
538 ureg_MAD(ureg, rVtx, _WWWW(vs->aVtx), _CONST(7), ureg_src(rVtx));
539 }
540 if (need_rNrm) {
541 ureg_MUL(ureg, rNrm, _XXXX(vs->aNrm), _CONST(16));
542 ureg_MAD(ureg, rNrm, _YYYY(vs->aNrm), _CONST(17), ureg_src(rNrm));
543 ureg_MAD(ureg, rNrm, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(rNrm));
544 ureg_normalize3(ureg, rNrm, ureg_src(rNrm), tmp);
545 }
546 /* NOTE: don't use vs->aVtx, vs->aNrm after this line */
547
548 /* === Process point size:
549 */
550 if (key->vertexpointsize) {
551 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
552 #ifdef NINE_TGSI_LAZY_DEVS
553 struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg);
554
555 ureg_MAX(ureg, tmp_clamp, vs->aPsz, _XXXX(cPsz1));
556 ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1));
557 ureg_release_temporary(ureg, tmp_clamp);
558 #else
559 ureg_CLAMP(ureg, oPsz, vs->aPsz, _XXXX(cPsz1), _YYYY(cPsz1));
560 #endif
561 } else if (key->pointscale) {
562 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
563 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
564
565 ureg_DP3(ureg, tmp_x, ureg_src(r[1]), ureg_src(r[1]));
566 ureg_SQRT(ureg, tmp_y, _X(tmp));
567 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
568 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
569 ureg_RCP(ureg, tmp_x, ureg_src(tmp));
570 ureg_MUL(ureg, tmp_x, ureg_src(tmp), _ZZZZ(cPsz1));
571 #ifdef NINE_TGSI_LAZY_DEVS
572 struct ureg_dst tmp_clamp = ureg_DECL_temporary(ureg);
573
574 ureg_MAX(ureg, tmp_clamp, _X(tmp), _XXXX(cPsz1));
575 ureg_MIN(ureg, oPsz, ureg_src(tmp_clamp), _YYYY(cPsz1));
576 ureg_release_temporary(ureg, tmp_clamp);
577 #else
578 ureg_CLAMP(ureg, oPsz, _X(tmp), _XXXX(cPsz1), _YYYY(cPsz1));
579 #endif
580 }
581
582 for (i = 0; i < 8; ++i) {
583 struct ureg_dst oTex, input_coord, transformed, t;
584 unsigned c, writemask;
585 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
586 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
587 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
588 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
589
590 /* No texture output of index s */
591 if (tci == NINED3DTSS_TCI_DISABLE)
592 continue;
593 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
594 input_coord = r[5];
595 transformed = r[6];
596
597 /* Get the coordinate */
598 switch (tci) {
599 case NINED3DTSS_TCI_PASSTHRU:
600 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
601 * Else the idx is used only to determine wrapping mode. */
602 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
603 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
604 break;
605 case NINED3DTSS_TCI_CAMERASPACENORMAL:
606 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rNrm));
607 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
608 dim_input = 4;
609 break;
610 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
611 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx));
612 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
613 dim_input = 4;
614 break;
615 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
616 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
617 ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rNrm));
618 ureg_MUL(ureg, tmp, ureg_src(rNrm), _X(tmp));
619 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
620 ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(rVtx), ureg_src(tmp));
621 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
622 dim_input = 4;
623 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
624 break;
625 case NINED3DTSS_TCI_SPHEREMAP:
626 assert(!"TODO");
627 break;
628 default:
629 assert(0);
630 break;
631 }
632
633 /* Apply the transformation */
634 /* dim_output == 0 => do not transform the components.
635 * XYZRHW also disables transformation */
636 if (!dim_output || key->position_t) {
637 transformed = input_coord;
638 writemask = TGSI_WRITEMASK_XYZW;
639 } else {
640 for (c = 0; c < dim_output; c++) {
641 t = ureg_writemask(transformed, 1 << c);
642 switch (dim_input) {
643 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
644 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
645 break;
646 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
647 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
648 break;
649 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
650 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
651 break;
652 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
653 default:
654 assert(0);
655 }
656 }
657 writemask = (1 << dim_output) - 1;
658 }
659
660 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
661 }
662
663 /* === Lighting:
664 *
665 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
666 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
667 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
668 *
669 * vec3 normal = normalize(in.Normal * NormalMatrix);
670 * vec3 hitDir = light.direction;
671 * float atten = 1.0;
672 *
673 * if (light.type != DIRECTIONAL)
674 * {
675 * vec3 hitVec = light.position - eyeVertex;
676 * float d = length(hitVec);
677 * hitDir = hitVec / d;
678 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
679 * }
680 *
681 * if (light.type == SPOTLIGHT)
682 * {
683 * float rho = dp3(-hitVec, light.direction);
684 * if (rho < cos(light.phi / 2))
685 * atten = 0;
686 * if (rho < cos(light.theta / 2))
687 * atten *= pow(some_func(rho), light.falloff);
688 * }
689 *
690 * float nDotHit = dp3_sat(normal, hitVec);
691 * float powFact = 0.0;
692 *
693 * if (nDotHit > 0.0)
694 * {
695 * vec3 midVec = normalize(hitDir + eye);
696 * float nDotMid = dp3_sat(normal, midVec);
697 * pFact = pow(nDotMid, material.power);
698 * }
699 *
700 * ambient += light.ambient * atten;
701 * diffuse += light.diffuse * atten * nDotHit;
702 * specular += light.specular * atten * powFact;
703 */
704 if (key->lighting) {
705 struct ureg_dst rAtt = ureg_writemask(r[1], TGSI_WRITEMASK_W);
706 struct ureg_dst rHit = ureg_writemask(r[3], TGSI_WRITEMASK_XYZ);
707 struct ureg_dst rMid = ureg_writemask(r[4], TGSI_WRITEMASK_XYZ);
708
709 struct ureg_dst rCtr = ureg_writemask(r[2], TGSI_WRITEMASK_W);
710
711 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
712
713 /* Light.*.Alpha is not used. */
714 struct ureg_dst rD = ureg_writemask(r[5], TGSI_WRITEMASK_XYZ);
715 struct ureg_dst rA = ureg_writemask(r[6], TGSI_WRITEMASK_XYZ);
716 struct ureg_dst rS = ureg_writemask(r[7], TGSI_WRITEMASK_XYZ);
717
718 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
719
720 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
721 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
722 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
723 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
724 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
725 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
726 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
727 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
728 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
729 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
730 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
731 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
732 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
733 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
734 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
735
736 const unsigned loop_label = l++;
737
738 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
739 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
740 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
741 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
742 rD = ureg_saturate(rD);
743 rA = ureg_saturate(rA);
744 rS = ureg_saturate(rS);
745
746
747 /* loop management */
748 ureg_BGNLOOP(ureg, &label[loop_label]);
749 ureg_ARL(ureg, AL, _W(rCtr));
750
751 /* if (not DIRECTIONAL light): */
752 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
753 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
754 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
755 ureg_IF(ureg, _X(tmp), &label[l++]);
756 {
757 /* hitDir = light.position - eyeVtx
758 * d = length(hitDir)
759 */
760 ureg_SUB(ureg, rHit, cLPos, ureg_src(rVtx));
761 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
762 ureg_RSQ(ureg, tmp_y, _X(tmp));
763 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
764
765 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
766 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
767 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
768 ureg_RCP(ureg, rAtt, _W(rAtt));
769 /* cut-off if distance exceeds Light.Range */
770 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
771 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
772 }
773 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
774 ureg_ENDIF(ureg);
775
776 /* normalize hitDir */
777 ureg_normalize3(ureg, rHit, ureg_src(rHit), tmp);
778
779 /* if (SPOT light) */
780 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
781 ureg_IF(ureg, _X(tmp), &label[l++]);
782 {
783 /* rho = dp3(-hitDir, light.spotDir)
784 *
785 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
786 * spotAtt = 1
787 * else
788 * if (rho <= light.cphi2)
789 * spotAtt = 0
790 * else
791 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
792 */
793 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
794 ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
795 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
796 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
797 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
798 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
799 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
800 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
801 }
802 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
803 ureg_ENDIF(ureg);
804
805 /* directional factors, let's not use LIT because of clarity */
806 ureg_DP3(ureg, ureg_saturate(tmp_x), ureg_src(rNrm), ureg_src(rHit));
807 ureg_MOV(ureg, tmp_y, ureg_imm1f(ureg, 0.0f));
808 ureg_IF(ureg, _X(tmp), &label[l++]);
809 {
810 /* midVec = normalize(hitDir + eyeDir) */
811 if (key->localviewer) {
812 ureg_normalize3(ureg, rMid, ureg_src(rVtx), tmp);
813 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
814 } else {
815 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
816 }
817 ureg_normalize3(ureg, rMid, ureg_src(rMid), tmp);
818 ureg_DP3(ureg, ureg_saturate(tmp_y), ureg_src(rNrm), ureg_src(rMid));
819 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
820
821 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
822 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
823 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
824 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
825 }
826 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
827 ureg_ENDIF(ureg);
828
829 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
830
831 /* break if this was the last light */
832 ureg_IF(ureg, cLLast, &label[l++]);
833 ureg_BRK(ureg);
834 ureg_ENDIF(ureg);
835 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
836
837 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
838 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
839 ureg_ENDLOOP(ureg, &label[loop_label]);
840
841 /* Set alpha factors of illumination to 1.0 for the multiplications. */
842 rD.WriteMask = TGSI_WRITEMASK_W; rD.Saturate = 0;
843 rS.WriteMask = TGSI_WRITEMASK_W; rS.Saturate = 0;
844 rA.WriteMask = TGSI_WRITEMASK_W; rA.Saturate = 0;
845 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 1.0f));
846 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 1.0f));
847
848 /* Apply to material:
849 *
850 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
851 * material.ambient * ambient +
852 * material.diffuse * diffuse +
853 * oCol[1] = material.specular * specular;
854 */
855 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
856 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 1.0f));
857 ureg_MAD(ureg, tmp, ureg_src(rA), vs->mtlA, _CONST(19));
858 } else {
859 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
860 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
861 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
862 }
863
864 if (key->specular_enable) {
865 /* add oCol[1] to oCol[0] */
866 ureg_MAD(ureg, tmp, ureg_src(rD), vs->mtlD, ureg_src(tmp));
867 ureg_MAD(ureg, oCol[0], ureg_src(rS), vs->mtlS, ureg_src(tmp));
868 } else {
869 ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
870 }
871 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
872 } else
873 /* COLOR */
874 if (key->darkness) {
875 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
876 ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
877 } else {
878 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
879 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE);
880 ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
881 }
882 ureg_MUL(ureg, oCol[1], ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), vs->mtlS);
883 } else {
884 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
885 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
886 }
887
888 /* === Process fog.
889 *
890 * exp(x) = ex2(log2(e) * x)
891 */
892 if (key->fog_mode) {
893 if (key->position_t) {
894 ureg_MOV(ureg, ureg_saturate(tmp_x), ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
895 } else
896 if (key->fog_range) {
897 ureg_DP3(ureg, tmp_x, ureg_src(rVtx), ureg_src(rVtx));
898 ureg_RSQ(ureg, tmp_z, _X(tmp));
899 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
900 } else {
901 ureg_MOV(ureg, tmp_z, ureg_abs(_Z(rVtx)));
902 }
903
904 if (key->fog_mode == D3DFOG_EXP) {
905 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
906 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
907 ureg_EX2(ureg, tmp_x, _X(tmp));
908 } else
909 if (key->fog_mode == D3DFOG_EXP2) {
910 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
911 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
912 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
913 ureg_EX2(ureg, tmp_x, _X(tmp));
914 } else
915 if (key->fog_mode == D3DFOG_LINEAR && !key->position_t) {
916 ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
917 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
918 }
919 ureg_MOV(ureg, oFog, _X(tmp));
920 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
921 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
922 }
923
924 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
925 struct ureg_src input;
926 struct ureg_dst output;
927 input = vs->aWgt;
928 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18);
929 ureg_MOV(ureg, output, input);
930 }
931 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
932 struct ureg_src input;
933 struct ureg_dst output;
934 input = vs->aInd;
935 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
936 ureg_MOV(ureg, output, input);
937 }
938 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
939 struct ureg_src input;
940 struct ureg_dst output;
941 input = vs->aNrm;
942 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
943 ureg_MOV(ureg, output, input);
944 }
945 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
946 struct ureg_src input;
947 struct ureg_dst output;
948 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
949 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
950 ureg_MOV(ureg, output, input);
951 }
952 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
953 struct ureg_src input;
954 struct ureg_dst output;
955 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
956 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
957 ureg_MOV(ureg, output, input);
958 }
959 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
960 struct ureg_src input;
961 struct ureg_dst output;
962 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
963 input = ureg_scalar(input, TGSI_SWIZZLE_X);
964 output = oFog;
965 ureg_MOV(ureg, output, input);
966 }
967 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
968 (void) 0; /* TODO: replace z of position output ? */
969 }
970
971
972 if (key->position_t && device->driver_caps.window_space_position_support)
973 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
974
975 ureg_END(ureg);
976 nine_ureg_tgsi_dump(ureg, FALSE);
977 return ureg_create_shader_and_destroy(ureg, device->pipe);
978 }
979
980 /* PS FF constants layout:
981 *
982 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
983 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
984 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
985 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
986 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
987 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
988 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
989 *
990 * CONST[20] D3DRS_TEXTUREFACTOR
991 * CONST[21] D3DRS_FOGCOLOR
992 * CONST[22].x___ RS.FogEnd
993 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
994 * CONST[22].__z_ RS.FogDensity
995 */
996 struct ps_build_ctx
997 {
998 struct ureg_program *ureg;
999
1000 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1001 struct ureg_src vT[8]; /* TEXCOORD[i] */
1002 struct ureg_dst r[6]; /* TEMPs */
1003 struct ureg_dst rCur; /* D3DTA_CURRENT */
1004 struct ureg_dst rMod;
1005 struct ureg_src rCurSrc;
1006 struct ureg_dst rTmp; /* D3DTA_TEMP */
1007 struct ureg_src rTmpSrc;
1008 struct ureg_dst rTex;
1009 struct ureg_src rTexSrc;
1010 struct ureg_src cBEM[8];
1011 struct ureg_src s[8];
1012
1013 struct {
1014 unsigned index;
1015 unsigned index_pre_mod;
1016 unsigned num_regs;
1017 } stage;
1018 };
1019
1020 static struct ureg_src
1021 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1022 {
1023 struct ureg_src reg;
1024
1025 switch (ta & D3DTA_SELECTMASK) {
1026 case D3DTA_CONSTANT:
1027 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1028 break;
1029 case D3DTA_CURRENT:
1030 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1031 break;
1032 case D3DTA_DIFFUSE:
1033 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1034 break;
1035 case D3DTA_SPECULAR:
1036 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1037 break;
1038 case D3DTA_TEMP:
1039 reg = ps->rTmpSrc;
1040 break;
1041 case D3DTA_TEXTURE:
1042 reg = ps->rTexSrc;
1043 break;
1044 case D3DTA_TFACTOR:
1045 reg = ureg_DECL_constant(ps->ureg, 20);
1046 break;
1047 default:
1048 assert(0);
1049 reg = ureg_src_undef();
1050 break;
1051 }
1052 if (ta & D3DTA_COMPLEMENT) {
1053 struct ureg_dst dst = ps->r[ps->stage.num_regs++];
1054 ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
1055 reg = ureg_src(dst);
1056 }
1057 if (ta & D3DTA_ALPHAREPLICATE)
1058 reg = _WWWW(reg);
1059 return reg;
1060 }
1061
1062 static struct ureg_dst
1063 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1064 {
1065 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1066
1067 switch (ta & D3DTA_SELECTMASK) {
1068 case D3DTA_CURRENT:
1069 return ps->rCur;
1070 case D3DTA_TEMP:
1071 return ps->rTmp;
1072 default:
1073 assert(0);
1074 return ureg_dst_undef();
1075 }
1076 }
1077
1078 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1079 {
1080 switch (top) {
1081 case D3DTOP_DISABLE:
1082 return 0x0;
1083 case D3DTOP_SELECTARG1:
1084 case D3DTOP_PREMODULATE:
1085 return 0x2;
1086 case D3DTOP_SELECTARG2:
1087 return 0x4;
1088 case D3DTOP_MULTIPLYADD:
1089 case D3DTOP_LERP:
1090 return 0x7;
1091 default:
1092 return 0x6;
1093 }
1094 }
1095
1096 static inline boolean
1097 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1098 {
1099 return !dst.WriteMask ||
1100 (dst.File == src.File &&
1101 dst.Index == src.Index &&
1102 !dst.Indirect &&
1103 !dst.Saturate &&
1104 !src.Indirect &&
1105 !src.Negate &&
1106 !src.Absolute &&
1107 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1108 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1109 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1110 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1111
1112 }
1113
1114 static void
1115 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1116 {
1117 struct ureg_program *ureg = ps->ureg;
1118 struct ureg_dst tmp = ps->r[ps->stage.num_regs];
1119 struct ureg_dst tmp2 = ps->r[ps->stage.num_regs+1];
1120 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1121
1122 tmp.WriteMask = dst.WriteMask;
1123
1124 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1125 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1126 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1127 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1128 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1129 top != D3DTOP_LERP)
1130 dst = ureg_saturate(dst);
1131
1132 switch (top) {
1133 case D3DTOP_SELECTARG1:
1134 if (!is_MOV_no_op(dst, arg[1]))
1135 ureg_MOV(ureg, dst, arg[1]);
1136 break;
1137 case D3DTOP_SELECTARG2:
1138 if (!is_MOV_no_op(dst, arg[2]))
1139 ureg_MOV(ureg, dst, arg[2]);
1140 break;
1141 case D3DTOP_MODULATE:
1142 ureg_MUL(ureg, dst, arg[1], arg[2]);
1143 break;
1144 case D3DTOP_MODULATE2X:
1145 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1146 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1147 break;
1148 case D3DTOP_MODULATE4X:
1149 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1150 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1151 break;
1152 case D3DTOP_ADD:
1153 ureg_ADD(ureg, dst, arg[1], arg[2]);
1154 break;
1155 case D3DTOP_ADDSIGNED:
1156 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1157 ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
1158 break;
1159 case D3DTOP_ADDSIGNED2X:
1160 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1161 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1162 break;
1163 case D3DTOP_SUBTRACT:
1164 ureg_SUB(ureg, dst, arg[1], arg[2]);
1165 break;
1166 case D3DTOP_ADDSMOOTH:
1167 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1168 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1169 break;
1170 case D3DTOP_BLENDDIFFUSEALPHA:
1171 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1172 break;
1173 case D3DTOP_BLENDTEXTUREALPHA:
1174 /* XXX: alpha taken from previous stage, texture or result ? */
1175 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1176 break;
1177 case D3DTOP_BLENDFACTORALPHA:
1178 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1179 break;
1180 case D3DTOP_BLENDTEXTUREALPHAPM:
1181 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
1182 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1183 break;
1184 case D3DTOP_BLENDCURRENTALPHA:
1185 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1186 break;
1187 case D3DTOP_PREMODULATE:
1188 ureg_MOV(ureg, dst, arg[1]);
1189 ps->stage.index_pre_mod = ps->stage.index + 1;
1190 break;
1191 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1192 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1193 break;
1194 case D3DTOP_MODULATECOLOR_ADDALPHA:
1195 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1196 break;
1197 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1198 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
1199 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1200 break;
1201 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1202 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1203 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1204 break;
1205 case D3DTOP_BUMPENVMAP:
1206 break;
1207 case D3DTOP_BUMPENVMAPLUMINANCE:
1208 break;
1209 case D3DTOP_DOTPRODUCT3:
1210 ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1211 ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1212 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1213 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1214 break;
1215 case D3DTOP_MULTIPLYADD:
1216 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1217 break;
1218 case D3DTOP_LERP:
1219 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1220 break;
1221 case D3DTOP_DISABLE:
1222 /* no-op ? */
1223 break;
1224 default:
1225 assert(!"invalid D3DTOP");
1226 break;
1227 }
1228 }
1229
1230 static void *
1231 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1232 {
1233 struct ps_build_ctx ps;
1234 struct ureg_program *ureg = ureg_create(TGSI_PROCESSOR_FRAGMENT);
1235 struct ureg_dst oCol;
1236 unsigned i, s;
1237 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1238
1239 memset(&ps, 0, sizeof(ps));
1240 ps.ureg = ureg;
1241 ps.stage.index_pre_mod = -1;
1242
1243 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1244
1245 /* Declare all TEMPs we might need, serious drivers have a register allocator. */
1246 for (i = 0; i < Elements(ps.r); ++i)
1247 ps.r[i] = ureg_DECL_local_temporary(ureg);
1248 ps.rCur = ps.r[0];
1249 ps.rTmp = ps.r[1];
1250 ps.rTex = ps.r[2];
1251 ps.rCurSrc = ureg_src(ps.rCur);
1252 ps.rTmpSrc = ureg_src(ps.rTmp);
1253 ps.rTexSrc = ureg_src(ps.rTex);
1254
1255 for (s = 0; s < 8; ++s) {
1256 ps.s[s] = ureg_src_undef();
1257
1258 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1259 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1260 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1261 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1262 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1263
1264 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1265 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1266 key->ts[s].colorarg2 == D3DTA_TEXTURE) {
1267 ps.s[s] = ureg_DECL_sampler(ureg, s);
1268 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1269 }
1270 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1271 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1272 ps.s[s] = ureg_DECL_sampler(ureg, s);
1273 }
1274
1275 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1276 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1277 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1278 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1279 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1280
1281 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1282 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1283 key->ts[s].alphaarg2 == D3DTA_TEXTURE) {
1284 ps.s[s] = ureg_DECL_sampler(ureg, s);
1285 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1286 }
1287 }
1288 }
1289 if (key->specular)
1290 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1291
1292 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1293
1294 if (key->ts[0].colorop == D3DTOP_DISABLE &&
1295 key->ts[0].alphaop == D3DTOP_DISABLE)
1296 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1297 /* Or is it undefined then ? */
1298
1299 /* Run stages.
1300 */
1301 for (s = 0; s < 8; ++s) {
1302 unsigned colorarg[3];
1303 unsigned alphaarg[3];
1304 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1305 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1306 struct ureg_dst dst;
1307 struct ureg_src arg[3];
1308
1309 if (key->ts[s].colorop == D3DTOP_DISABLE &&
1310 key->ts[s].alphaop == D3DTOP_DISABLE)
1311 continue;
1312 ps.stage.index = s;
1313 ps.stage.num_regs = 3;
1314
1315 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1316 nine_D3DTOP_to_str(key->ts[s].colorop),
1317 nine_D3DTOP_to_str(key->ts[s].alphaop));
1318
1319 if (!ureg_src_is_undef(ps.s[s])) {
1320 unsigned target;
1321 switch (key->ts[s].textarget) {
1322 case 0: target = TGSI_TEXTURE_1D; break;
1323 case 1: target = TGSI_TEXTURE_2D; break;
1324 case 2: target = TGSI_TEXTURE_3D; break;
1325 case 3: target = TGSI_TEXTURE_CUBE; break;
1326 /* this is a 2 bit bitfield, do I really need a default case ? */
1327 }
1328
1329 /* sample the texture */
1330 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1331 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1332 }
1333 if (key->projected & (3 << (s *2))) {
1334 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1335 if (dim == 4)
1336 ureg_TXP(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
1337 else {
1338 ureg_RCP(ureg, ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X), ureg_scalar(ps.vT[s], dim-1));
1339 ureg_MUL(ureg, ps.rTmp, _XXXX(ps.rTmpSrc), ps.vT[s]);
1340 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1341 }
1342 } else {
1343 ureg_TEX(ureg, ps.rTex, target, ps.vT[s], ps.s[s]);
1344 }
1345 }
1346
1347 if (s == 0 &&
1348 (key->ts[0].resultarg != 0 /* not current */ ||
1349 key->ts[0].colorop == D3DTOP_DISABLE ||
1350 key->ts[0].alphaop == D3DTOP_DISABLE ||
1351 key->ts[0].colorop == D3DTOP_BLENDCURRENTALPHA ||
1352 key->ts[0].alphaop == D3DTOP_BLENDCURRENTALPHA ||
1353 key->ts[0].colorarg0 == D3DTA_CURRENT ||
1354 key->ts[0].colorarg1 == D3DTA_CURRENT ||
1355 key->ts[0].colorarg2 == D3DTA_CURRENT ||
1356 key->ts[0].alphaarg0 == D3DTA_CURRENT ||
1357 key->ts[0].alphaarg1 == D3DTA_CURRENT ||
1358 key->ts[0].alphaarg2 == D3DTA_CURRENT)
1359 ) {
1360 /* Initialize D3DTA_CURRENT.
1361 * (Yes we can do this before the loop but not until
1362 * NVE4 has an instruction scheduling pass.)
1363 */
1364 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1365 }
1366
1367 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1368
1369 if (ps.stage.index_pre_mod == ps.stage.index) {
1370 ps.rMod = ps.r[ps.stage.num_regs++];
1371 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1372 }
1373
1374 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1375 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1376 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1377 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1378 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1379 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1380
1381 if (key->ts[s].colorop != key->ts[s].alphaop ||
1382 colorarg[0] != alphaarg[0] ||
1383 colorarg[1] != alphaarg[1] ||
1384 colorarg[2] != alphaarg[2])
1385 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1386
1387 /* Special DOTPRODUCT behaviour (see wine tests) */
1388 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1389 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1390
1391 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1392 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1393 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1394 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1395
1396 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1397 dst.WriteMask = TGSI_WRITEMASK_W;
1398
1399 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1400 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1401 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1402 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1403 }
1404 }
1405
1406 if (key->specular)
1407 ureg_ADD(ureg, ps.rCur, ps.rCurSrc, ps.vC[1]);
1408
1409 /* Fog.
1410 */
1411 if (key->fog_mode) {
1412 struct ureg_src vPos;
1413 if (device->screen->get_param(device->screen,
1414 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
1415 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1416 } else {
1417 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1418 TGSI_INTERPOLATE_LINEAR);
1419 }
1420
1421 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1422 if (key->fog_mode == D3DFOG_EXP) {
1423 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1424 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1425 ureg_EX2(ureg, rFog, _X(rFog));
1426 } else
1427 if (key->fog_mode == D3DFOG_EXP2) {
1428 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1429 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1430 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1431 ureg_EX2(ureg, rFog, _X(rFog));
1432 } else
1433 if (key->fog_mode == D3DFOG_LINEAR) {
1434 ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _ZZZZ(vPos));
1435 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1436 }
1437 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1438 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1439 } else
1440 if (key->fog) {
1441 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1442 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1443 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1444 } else {
1445 ureg_MOV(ureg, oCol, ps.rCurSrc);
1446 }
1447
1448 ureg_END(ureg);
1449 nine_ureg_tgsi_dump(ureg, FALSE);
1450 return ureg_create_shader_and_destroy(ureg, device->pipe);
1451 }
1452
1453 static struct NineVertexShader9 *
1454 nine_ff_get_vs(struct NineDevice9 *device)
1455 {
1456 const struct nine_state *state = &device->state;
1457 struct NineVertexShader9 *vs;
1458 enum pipe_error err;
1459 struct vs_build_ctx bld;
1460 struct nine_ff_vs_key key;
1461 unsigned s, i;
1462 char input_texture_coord[8];
1463
1464 assert(sizeof(key) <= sizeof(key.value32));
1465
1466 memset(&key, 0, sizeof(key));
1467 memset(&bld, 0, sizeof(bld));
1468 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1469
1470 bld.key = &key;
1471
1472 /* FIXME: this shouldn't be NULL, but it is on init */
1473 if (state->vdecl) {
1474 key.color0in_one = 1;
1475 key.color1in_one = 1;
1476 for (i = 0; i < state->vdecl->nelems; i++) {
1477 uint16_t usage = state->vdecl->usage_map[i];
1478 if (usage == NINE_DECLUSAGE_POSITIONT)
1479 key.position_t = 1;
1480 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1481 key.color0in_one = 0;
1482 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1483 key.color1in_one = 0;
1484 else if (usage == NINE_DECLUSAGE_PSIZE)
1485 key.vertexpointsize = 1;
1486 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1487 s = usage / NINE_DECLUSAGE_COUNT;
1488 if (s < 8)
1489 input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
1490 else
1491 DBG("FF given texture coordinate >= 8. Ignoring\n");
1492 } else if (usage < NINE_DECLUSAGE_NONE)
1493 key.passthrough |= 1 << usage;
1494 }
1495 }
1496 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1497 * We do restrict to indices 0 */
1498 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1499 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1500 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1501 if (!key.vertexpointsize)
1502 key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE];
1503
1504 key.lighting = !!state->rs[D3DRS_LIGHTING] && state->ff.num_lights_active;
1505 key.darkness = !!state->rs[D3DRS_LIGHTING] && !state->ff.num_lights_active;
1506 if (key.position_t) {
1507 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1508 key.lighting = 0;
1509 }
1510 if ((key.lighting | key.darkness) && state->rs[D3DRS_COLORVERTEX]) {
1511 key.mtl_diffuse = state->rs[D3DRS_DIFFUSEMATERIALSOURCE];
1512 key.mtl_ambient = state->rs[D3DRS_AMBIENTMATERIALSOURCE];
1513 key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE];
1514 key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE];
1515 }
1516 key.fog = !!state->rs[D3DRS_FOGENABLE];
1517 key.fog_mode = state->rs[D3DRS_FOGENABLE] ? state->rs[D3DRS_FOGVERTEXMODE] : 0;
1518 if (key.fog_mode)
1519 key.fog_range = !key.position_t && state->rs[D3DRS_RANGEFOGENABLE];
1520
1521 key.localviewer = !!state->rs[D3DRS_LOCALVIEWER];
1522 key.specular_enable = !!state->rs[D3DRS_SPECULARENABLE];
1523
1524 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1525 key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE];
1526
1527 switch (state->rs[D3DRS_VERTEXBLEND]) {
1528 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1529 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1530 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1531 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1532 case D3DVBF_TWEENING: key.vertextween = 1; break;
1533 default:
1534 assert(!"invalid D3DVBF");
1535 break;
1536 }
1537 }
1538
1539 for (s = 0; s < 8; ++s) {
1540 unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1541 unsigned dim;
1542
1543 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1544 gen = NINED3DTSS_TCI_PASSTHRU;
1545
1546 if (!input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU)
1547 gen = NINED3DTSS_TCI_DISABLE;
1548
1549 key.tc_gen |= gen << (s * 3);
1550 key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3);
1551 key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2);
1552
1553 dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1554 if (dim > 4)
1555 dim = input_texture_coord[s];
1556 if (dim == 1) /* NV behaviour */
1557 dim = 0;
1558 key.tc_dim_output |= dim << (s * 3);
1559 }
1560
1561 vs = util_hash_table_get(device->ff.ht_vs, &key);
1562 if (vs)
1563 return vs;
1564 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1565
1566 nine_ff_prune_vs(device);
1567 if (vs) {
1568 unsigned n;
1569
1570 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1571
1572 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
1573 (void)err;
1574 assert(err == PIPE_OK);
1575 device->ff.num_vs++;
1576 NineUnknown_ConvertRefToBind(NineUnknown(vs));
1577
1578 vs->num_inputs = bld.num_inputs;
1579 for (n = 0; n < bld.num_inputs; ++n)
1580 vs->input_map[n].ndecl = bld.input[n];
1581
1582 vs->position_t = key.position_t;
1583 vs->point_size = key.vertexpointsize | key.pointscale;
1584 }
1585 return vs;
1586 }
1587
1588 static struct NinePixelShader9 *
1589 nine_ff_get_ps(struct NineDevice9 *device)
1590 {
1591 struct nine_state *state = &device->state;
1592 struct NinePixelShader9 *ps;
1593 enum pipe_error err;
1594 struct nine_ff_ps_key key;
1595 unsigned s;
1596 uint8_t sampler_mask = 0;
1597
1598 assert(sizeof(key) <= sizeof(key.value32));
1599
1600 memset(&key, 0, sizeof(key));
1601 for (s = 0; s < 8; ++s) {
1602 key.ts[s].colorop = state->ff.tex_stage[s][D3DTSS_COLOROP];
1603 key.ts[s].alphaop = state->ff.tex_stage[s][D3DTSS_ALPHAOP];
1604 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. */
1605 /* ALPHAOP cannot be disabled if COLOROP is enabled. */
1606 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1607 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1608 break;
1609 }
1610
1611 if (!state->texture[s] &&
1612 state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE) {
1613 /* This should also disable the stage. */
1614 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1615 break;
1616 }
1617
1618 if (state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE)
1619 sampler_mask |= (1 << s);
1620
1621 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1622 uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1623 if (used_c & 0x1) key.ts[s].colorarg0 = state->ff.tex_stage[s][D3DTSS_COLORARG0];
1624 if (used_c & 0x2) key.ts[s].colorarg1 = state->ff.tex_stage[s][D3DTSS_COLORARG1];
1625 if (used_c & 0x4) key.ts[s].colorarg2 = state->ff.tex_stage[s][D3DTSS_COLORARG2];
1626 if (used_c & 0x1) key.colorarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s;
1627 if (used_c & 0x1) key.colorarg_b5[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s;
1628 if (used_c & 0x2) key.colorarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s;
1629 if (used_c & 0x2) key.colorarg_b5[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s;
1630 if (used_c & 0x4) key.colorarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s;
1631 if (used_c & 0x4) key.colorarg_b5[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s;
1632 }
1633 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1634 uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1635 if (used_a & 0x1) key.ts[s].alphaarg0 = state->ff.tex_stage[s][D3DTSS_ALPHAARG0];
1636 if (used_a & 0x2) key.ts[s].alphaarg1 = state->ff.tex_stage[s][D3DTSS_ALPHAARG1];
1637 if (used_a & 0x4) key.ts[s].alphaarg2 = state->ff.tex_stage[s][D3DTSS_ALPHAARG2];
1638 if (used_a & 0x1) key.alphaarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s;
1639 if (used_a & 0x2) key.alphaarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s;
1640 if (used_a & 0x4) key.alphaarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s;
1641 }
1642 key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1643
1644 if (state->texture[s]) {
1645 switch (state->texture[s]->base.type) {
1646 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1647 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1648 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1649 default:
1650 assert(!"unexpected texture type");
1651 break;
1652 }
1653 } else {
1654 key.ts[s].textarget = 1;
1655 }
1656 }
1657
1658 key.projected = nine_ff_get_projected_key(state);
1659
1660 for (; s < 8; ++s)
1661 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1662 if (state->rs[D3DRS_FOGENABLE])
1663 key.fog_mode = state->rs[D3DRS_FOGTABLEMODE];
1664 key.fog = !!state->rs[D3DRS_FOGENABLE];
1665
1666 ps = util_hash_table_get(device->ff.ht_ps, &key);
1667 if (ps)
1668 return ps;
1669 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1670
1671 nine_ff_prune_ps(device);
1672 if (ps) {
1673 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1674
1675 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
1676 (void)err;
1677 assert(err == PIPE_OK);
1678 device->ff.num_ps++;
1679 NineUnknown_ConvertRefToBind(NineUnknown(ps));
1680
1681 ps->rt_mask = 0x1;
1682 ps->sampler_mask = sampler_mask;
1683 }
1684 return ps;
1685 }
1686
1687 #define GET_D3DTS(n) nine_state_access_transform(state, D3DTS_##n, FALSE)
1688 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1689 static void
1690 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1691 {
1692 struct nine_state *state = &device->state;
1693 D3DMATRIX T;
1694 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1695 unsigned i;
1696
1697 /* TODO: make this nicer, and only upload the ones we need */
1698 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1699
1700 if (IS_D3DTS_DIRTY(state, WORLD) ||
1701 IS_D3DTS_DIRTY(state, VIEW) ||
1702 IS_D3DTS_DIRTY(state, PROJECTION)) {
1703 /* WVP, WV matrices */
1704 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1705 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1706
1707 /* normal matrix == transpose(inverse(WV)) */
1708 nine_d3d_matrix_inverse_3x3(&T, &M[1]);
1709 nine_d3d_matrix_transpose(&M[4], &T);
1710
1711 /* VP matrix */
1712 nine_d3d_matrix_matrix_mul(&M[2], GET_D3DTS(VIEW), GET_D3DTS(PROJECTION));
1713
1714 /* V and W matrix */
1715 M[3] = *GET_D3DTS(VIEW);
1716 M[56] = *GET_D3DTS(WORLD);
1717 }
1718
1719 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1720 /* load other world matrices */
1721 for (i = 1; i <= 7; ++i)
1722 M[56 + i] = *GET_D3DTS(WORLDMATRIX(i));
1723 }
1724
1725 device->ff.vs_const[30 * 4] = asfloat(state->rs[D3DRS_TWEENFACTOR]);
1726 }
1727
1728 static void
1729 nine_ff_load_lights(struct NineDevice9 *device)
1730 {
1731 struct nine_state *state = &device->state;
1732 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1733 unsigned l;
1734
1735 if (state->changed.group & NINE_STATE_FF_MATERIAL) {
1736 const D3DMATERIAL9 *mtl = &state->ff.material;
1737
1738 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1739 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1740 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1741 dst[23].x = mtl->Power;
1742 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1743 d3dcolor_to_rgba(&dst[25].x, state->rs[D3DRS_AMBIENT]);
1744 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1745 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1746 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1747 dst[19].w = mtl->Ambient.a + mtl->Emissive.a;
1748 }
1749
1750 if (!(state->changed.group & NINE_STATE_FF_LIGHTING))
1751 return;
1752
1753 for (l = 0; l < state->ff.num_lights_active; ++l) {
1754 const D3DLIGHT9 *light = &state->ff.light[state->ff.active_light[l]];
1755
1756 dst[32 + l * 8].x = light->Type;
1757 dst[32 + l * 8].y = light->Attenuation0;
1758 dst[32 + l * 8].z = light->Attenuation1;
1759 dst[32 + l * 8].w = light->Attenuation2;
1760 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1761 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1762 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1763 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1764 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1765 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1766 dst[37 + l * 8].w = light->Falloff;
1767 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1768 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1769 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1770 dst[39 + l * 8].w = (l + 1) == state->ff.num_lights_active;
1771 }
1772 }
1773
1774 static void
1775 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1776 {
1777 const struct nine_state *state = &device->state;
1778 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1779
1780 if (!(state->changed.group & NINE_STATE_FF_OTHER))
1781 return;
1782 dst[26].x = asfloat(state->rs[D3DRS_POINTSIZE_MIN]);
1783 dst[26].y = asfloat(state->rs[D3DRS_POINTSIZE_MAX]);
1784 dst[26].z = asfloat(state->rs[D3DRS_POINTSIZE]);
1785 dst[26].w = asfloat(state->rs[D3DRS_POINTSCALE_A]);
1786 dst[27].x = asfloat(state->rs[D3DRS_POINTSCALE_B]);
1787 dst[27].y = asfloat(state->rs[D3DRS_POINTSCALE_C]);
1788 dst[28].x = asfloat(state->rs[D3DRS_FOGEND]);
1789 dst[28].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1790 if (isinf(dst[28].y))
1791 dst[28].y = 0.0f;
1792 dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1793 }
1794
1795 static void
1796 nine_ff_load_tex_matrices(struct NineDevice9 *device)
1797 {
1798 struct nine_state *state = &device->state;
1799 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1800 unsigned s;
1801
1802 if (!(state->ff.changed.transform[0] & 0xff0000))
1803 return;
1804 for (s = 0; s < 8; ++s) {
1805 if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
1806 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE));
1807 }
1808 }
1809
1810 static void
1811 nine_ff_load_ps_params(struct NineDevice9 *device)
1812 {
1813 const struct nine_state *state = &device->state;
1814 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1815 unsigned s;
1816
1817 if (!(state->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER)))
1818 return;
1819
1820 for (s = 0; s < 8; ++s)
1821 d3dcolor_to_rgba(&dst[s].x, state->ff.tex_stage[s][D3DTSS_CONSTANT]);
1822
1823 for (s = 0; s < 8; ++s) {
1824 dst[8 + s].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
1825 dst[8 + s].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
1826 dst[8 + s].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
1827 dst[8 + s].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
1828 if (s & 1) {
1829 dst[8 + s / 2].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1830 dst[8 + s / 2].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1831 } else {
1832 dst[8 + s / 2].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1833 dst[8 + s / 2].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1834 }
1835 }
1836
1837 d3dcolor_to_rgba(&dst[20].x, state->rs[D3DRS_TEXTUREFACTOR]);
1838 d3dcolor_to_rgba(&dst[21].x, state->rs[D3DRS_FOGCOLOR]);
1839 dst[22].x = asfloat(state->rs[D3DRS_FOGEND]);
1840 dst[22].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1841 dst[22].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1842 }
1843
1844 static void
1845 nine_ff_load_viewport_info(struct NineDevice9 *device)
1846 {
1847 D3DVIEWPORT9 *viewport = &device->state.viewport;
1848 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1849 float diffZ = viewport->MaxZ - viewport->MinZ;
1850
1851 /* Note: the other functions avoids to fill the const again if nothing changed.
1852 * But we don't have much to fill, and adding code to allow that may be complex
1853 * so just fill it always */
1854 dst[100].x = 2.0f / (float)(viewport->Width);
1855 dst[100].y = 2.0f / (float)(viewport->Height);
1856 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
1857 dst[101].x = (float)(viewport->X);
1858 dst[101].y = (float)(viewport->Y);
1859 dst[101].z = (float)(viewport->MinZ);
1860 }
1861
1862 void
1863 nine_ff_update(struct NineDevice9 *device)
1864 {
1865 struct nine_state *state = &device->state;
1866 struct pipe_constant_buffer cb;
1867
1868 DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
1869
1870 /* NOTE: the only reference belongs to the hash table */
1871 if (!state->programmable_vs) {
1872 device->ff.vs = nine_ff_get_vs(device);
1873 device->state.changed.group |= NINE_STATE_VS;
1874 }
1875 if (!device->state.ps) {
1876 device->ff.ps = nine_ff_get_ps(device);
1877 device->state.changed.group |= NINE_STATE_PS;
1878 }
1879
1880 if (!state->programmable_vs) {
1881 nine_ff_load_vs_transforms(device);
1882 nine_ff_load_tex_matrices(device);
1883 nine_ff_load_lights(device);
1884 nine_ff_load_point_and_fog_params(device);
1885 nine_ff_load_viewport_info(device);
1886
1887 memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
1888
1889 cb.buffer_offset = 0;
1890 cb.buffer = NULL;
1891 cb.user_buffer = device->ff.vs_const;
1892 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
1893
1894 if (!device->driver_caps.user_cbufs) {
1895 u_upload_data(device->constbuf_uploader,
1896 0,
1897 cb.buffer_size,
1898 device->constbuf_alignment,
1899 cb.user_buffer,
1900 &cb.buffer_offset,
1901 &cb.buffer);
1902 u_upload_unmap(device->constbuf_uploader);
1903 cb.user_buffer = NULL;
1904 }
1905 state->pipe.cb_vs_ff = cb;
1906 state->commit |= NINE_STATE_COMMIT_CONST_VS;
1907 }
1908
1909 if (!device->state.ps) {
1910 nine_ff_load_ps_params(device);
1911
1912 cb.buffer_offset = 0;
1913 cb.buffer = NULL;
1914 cb.user_buffer = device->ff.ps_const;
1915 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
1916
1917 if (!device->driver_caps.user_cbufs) {
1918 u_upload_data(device->constbuf_uploader,
1919 0,
1920 cb.buffer_size,
1921 device->constbuf_alignment,
1922 cb.user_buffer,
1923 &cb.buffer_offset,
1924 &cb.buffer);
1925 u_upload_unmap(device->constbuf_uploader);
1926 cb.user_buffer = NULL;
1927 }
1928 state->pipe.cb_ps_ff = cb;
1929 state->commit |= NINE_STATE_COMMIT_CONST_PS;
1930 }
1931
1932 device->state.changed.group &= ~NINE_STATE_FF;
1933 }
1934
1935
1936 boolean
1937 nine_ff_init(struct NineDevice9 *device)
1938 {
1939 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash,
1940 nine_ff_vs_key_comp);
1941 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash,
1942 nine_ff_ps_key_comp);
1943
1944 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash,
1945 nine_ff_fvf_key_comp);
1946
1947 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
1948 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
1949
1950 return device->ff.ht_vs && device->ff.ht_ps &&
1951 device->ff.ht_fvf &&
1952 device->ff.vs_const && device->ff.ps_const;
1953 }
1954
1955 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
1956 {
1957 NineUnknown_Unbind(NineUnknown(value));
1958 return PIPE_OK;
1959 }
1960
1961 void
1962 nine_ff_fini(struct NineDevice9 *device)
1963 {
1964 if (device->ff.ht_vs) {
1965 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
1966 util_hash_table_destroy(device->ff.ht_vs);
1967 }
1968 if (device->ff.ht_ps) {
1969 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
1970 util_hash_table_destroy(device->ff.ht_ps);
1971 }
1972 if (device->ff.ht_fvf) {
1973 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
1974 util_hash_table_destroy(device->ff.ht_fvf);
1975 }
1976 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
1977 device->ff.ps = NULL;
1978
1979 FREE(device->ff.vs_const);
1980 FREE(device->ff.ps_const);
1981 }
1982
1983 static void
1984 nine_ff_prune_vs(struct NineDevice9 *device)
1985 {
1986 if (device->ff.num_vs > 100) {
1987 /* could destroy the bound one here, so unbind */
1988 device->pipe->bind_vs_state(device->pipe, NULL);
1989 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
1990 util_hash_table_clear(device->ff.ht_vs);
1991 device->ff.num_vs = 0;
1992 device->state.changed.group |= NINE_STATE_VS;
1993 }
1994 }
1995 static void
1996 nine_ff_prune_ps(struct NineDevice9 *device)
1997 {
1998 if (device->ff.num_ps > 100) {
1999 /* could destroy the bound one here, so unbind */
2000 device->pipe->bind_fs_state(device->pipe, NULL);
2001 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2002 util_hash_table_clear(device->ff.ht_ps);
2003 device->ff.num_ps = 0;
2004 device->state.changed.group |= NINE_STATE_PS;
2005 }
2006 }
2007
2008 /* ========================================================================== */
2009
2010 /* Matrix multiplication:
2011 *
2012 * in memory: 0 1 2 3 (row major)
2013 * 4 5 6 7
2014 * 8 9 a b
2015 * c d e f
2016 *
2017 * cA cB cC cD
2018 * r0 = (r0 * cA) (r0 * cB) . .
2019 * r1 = (r1 * cA) (r1 * cB)
2020 * r2 = (r2 * cA) .
2021 * r3 = (r3 * cA) .
2022 *
2023 * r: (11) (12) (13) (14)
2024 * (21) (22) (23) (24)
2025 * (31) (32) (33) (34)
2026 * (41) (42) (43) (44)
2027 * l: (11 12 13 14)
2028 * (21 22 23 24)
2029 * (31 32 33 34)
2030 * (41 42 43 44)
2031 *
2032 * v: (x y z 1 )
2033 *
2034 * t.xyzw = MUL(v.xxxx, r[0]);
2035 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2036 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2037 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2038 *
2039 * v.x = DP4(v, c[0]);
2040 * v.y = DP4(v, c[1]);
2041 * v.z = DP4(v, c[2]);
2042 * v.w = DP4(v, c[3]) = 1
2043 */
2044
2045 /*
2046 static void
2047 nine_D3DMATRIX_print(const D3DMATRIX *M)
2048 {
2049 DBG("\n(%f %f %f %f)\n"
2050 "(%f %f %f %f)\n"
2051 "(%f %f %f %f)\n"
2052 "(%f %f %f %f)\n",
2053 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2054 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2055 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2056 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2057 }
2058 */
2059
2060 static inline float
2061 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2062 {
2063 return A->m[r][0] * B->m[0][c] +
2064 A->m[r][1] * B->m[1][c] +
2065 A->m[r][2] * B->m[2][c] +
2066 A->m[r][3] * B->m[3][c];
2067 }
2068
2069 static inline float
2070 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2071 {
2072 return v->x * M->m[0][c] +
2073 v->y * M->m[1][c] +
2074 v->z * M->m[2][c] +
2075 1.0f * M->m[3][c];
2076 }
2077
2078 static inline float
2079 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2080 {
2081 return v->x * M->m[0][c] +
2082 v->y * M->m[1][c] +
2083 v->z * M->m[2][c];
2084 }
2085
2086 void
2087 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2088 {
2089 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2090 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2091 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2092 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2093
2094 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2095 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2096 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2097 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2098
2099 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2100 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2101 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2102 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2103
2104 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2105 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2106 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2107 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2108 }
2109
2110 void
2111 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2112 {
2113 d->x = nine_DP4_vec_col(v, M, 0);
2114 d->y = nine_DP4_vec_col(v, M, 1);
2115 d->z = nine_DP4_vec_col(v, M, 2);
2116 }
2117
2118 void
2119 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2120 {
2121 d->x = nine_DP3_vec_col(v, M, 0);
2122 d->y = nine_DP3_vec_col(v, M, 1);
2123 d->z = nine_DP3_vec_col(v, M, 2);
2124 }
2125
2126 void
2127 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2128 {
2129 unsigned i, j;
2130 for (i = 0; i < 4; ++i)
2131 for (j = 0; j < 4; ++j)
2132 D->m[i][j] = M->m[j][i];
2133 }
2134
2135 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2136 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2137 if (t > 0.0f) pos += t; else neg += t; } while(0)
2138
2139 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2140 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2141 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2142 float
2143 nine_d3d_matrix_det(const D3DMATRIX *M)
2144 {
2145 float pos = 0.0f;
2146 float neg = 0.0f;
2147
2148 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2149 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2150 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2151
2152 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2153 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2154 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2155
2156 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2157 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2158 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2159
2160 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2161 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2162 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2163
2164 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2165 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2166 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2167
2168 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2169 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2170 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2171
2172 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2173 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2174 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2175
2176 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2177 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2178 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2179
2180 return pos + neg;
2181 }
2182
2183 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2184 * I have no idea where this code came from.
2185 */
2186 void
2187 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2188 {
2189 int i, k;
2190 float det;
2191
2192 D->m[0][0] =
2193 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2194 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2195 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2196 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2197 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2198 M->m[1][3] * M->m[3][1] * M->m[2][2];
2199
2200 D->m[0][1] =
2201 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2202 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2203 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2204 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2205 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2206 M->m[0][3] * M->m[3][1] * M->m[2][2];
2207
2208 D->m[0][2] =
2209 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2210 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2211 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2212 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2213 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2214 M->m[0][3] * M->m[3][1] * M->m[1][2];
2215
2216 D->m[0][3] =
2217 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2218 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2219 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2220 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2221 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2222 M->m[0][3] * M->m[2][1] * M->m[1][2];
2223
2224 D->m[1][0] =
2225 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2226 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2227 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2228 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2229 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2230 M->m[1][3] * M->m[3][0] * M->m[2][2];
2231
2232 D->m[1][1] =
2233 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2234 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2235 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2236 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2237 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2238 M->m[0][3] * M->m[3][0] * M->m[2][2];
2239
2240 D->m[1][2] =
2241 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2242 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2243 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2244 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2245 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2246 M->m[0][3] * M->m[3][0] * M->m[1][2];
2247
2248 D->m[1][3] =
2249 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2250 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2251 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2252 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2253 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2254 M->m[0][3] * M->m[2][0] * M->m[1][2];
2255
2256 D->m[2][0] =
2257 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2258 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2259 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2260 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2261 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2262 M->m[1][3] * M->m[3][0] * M->m[2][1];
2263
2264 D->m[2][1] =
2265 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2266 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2267 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2268 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2269 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2270 M->m[0][3] * M->m[3][0] * M->m[2][1];
2271
2272 D->m[2][2] =
2273 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2274 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2275 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2276 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2277 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2278 M->m[0][3] * M->m[3][0] * M->m[1][1];
2279
2280 D->m[2][3] =
2281 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2282 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2283 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2284 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2285 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2286 M->m[0][3] * M->m[2][0] * M->m[1][1];
2287
2288 D->m[3][0] =
2289 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2290 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2291 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2292 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2293 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2294 M->m[1][2] * M->m[3][0] * M->m[2][1];
2295
2296 D->m[3][1] =
2297 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2298 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2299 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2300 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2301 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2302 M->m[0][2] * M->m[3][0] * M->m[2][1];
2303
2304 D->m[3][2] =
2305 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2306 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2307 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2308 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2309 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2310 M->m[0][2] * M->m[3][0] * M->m[1][1];
2311
2312 D->m[3][3] =
2313 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2314 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2315 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2316 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2317 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2318 M->m[0][2] * M->m[2][0] * M->m[1][1];
2319
2320 det =
2321 M->m[0][0] * D->m[0][0] +
2322 M->m[1][0] * D->m[0][1] +
2323 M->m[2][0] * D->m[0][2] +
2324 M->m[3][0] * D->m[0][3];
2325
2326 det = 1.0 / det;
2327
2328 for (i = 0; i < 4; i++)
2329 for (k = 0; k < 4; k++)
2330 D->m[i][k] *= det;
2331
2332 #ifdef DEBUG
2333 {
2334 D3DMATRIX I;
2335
2336 nine_d3d_matrix_matrix_mul(&I, D, M);
2337
2338 for (i = 0; i < 4; ++i)
2339 for (k = 0; k < 4; ++k)
2340 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2341 DBG("Matrix inversion check FAILED !\n");
2342 }
2343 #endif
2344 }
2345
2346 /* TODO: don't use 4x4 inverse, unless this gets all nicely inlined ? */
2347 void
2348 nine_d3d_matrix_inverse_3x3(D3DMATRIX *D, const D3DMATRIX *M)
2349 {
2350 D3DMATRIX T;
2351 unsigned i, j;
2352
2353 for (i = 0; i < 3; ++i)
2354 for (j = 0; j < 3; ++j)
2355 T.m[i][j] = M->m[i][j];
2356 for (i = 0; i < 3; ++i) {
2357 T.m[i][3] = 0.0f;
2358 T.m[3][i] = 0.0f;
2359 }
2360 T.m[3][3] = 1.0f;
2361
2362 nine_d3d_matrix_inverse(D, &T);
2363 }