st/nine: Properly declare sampler states for ff
[mesa.git] / src / gallium / state_trackers / nine / nine_ff.c
1
2 /* FF is big and ugly so feel free to write lines as long as you like.
3 * Aieeeeeeeee !
4 *
5 * Let me make that clearer:
6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!!
7 */
8
9 #include "device9.h"
10 #include "basetexture9.h"
11 #include "vertexdeclaration9.h"
12 #include "vertexshader9.h"
13 #include "pixelshader9.h"
14 #include "nine_ff.h"
15 #include "nine_defines.h"
16 #include "nine_helpers.h"
17 #include "nine_pipe.h"
18 #include "nine_dump.h"
19
20 #include "pipe/p_context.h"
21 #include "tgsi/tgsi_ureg.h"
22 #include "tgsi/tgsi_dump.h"
23 #include "util/u_box.h"
24 #include "util/u_hash_table.h"
25 #include "util/u_upload_mgr.h"
26
27 #define DBG_CHANNEL DBG_FF
28
29 #define NINE_FF_NUM_VS_CONST 196
30 #define NINE_FF_NUM_PS_CONST 24
31
32 struct fvec4
33 {
34 float x, y, z, w;
35 };
36
37 struct nine_ff_vs_key
38 {
39 union {
40 struct {
41 uint32_t position_t : 1;
42 uint32_t lighting : 1;
43 uint32_t darkness : 1; /* lighting enabled but no active lights */
44 uint32_t localviewer : 1;
45 uint32_t vertexpointsize : 1;
46 uint32_t pointscale : 1;
47 uint32_t vertexblend : 3;
48 uint32_t vertexblend_indexed : 1;
49 uint32_t vertextween : 1;
50 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */
51 uint32_t mtl_ambient : 2;
52 uint32_t mtl_specular : 2;
53 uint32_t mtl_emissive : 2;
54 uint32_t fog_mode : 2;
55 uint32_t fog_range : 1;
56 uint32_t color0in_one : 1;
57 uint32_t color1in_zero : 1;
58 uint32_t has_normal : 1;
59 uint32_t fog : 1;
60 uint32_t normalizenormals : 1;
61 uint32_t ucp : 1;
62 uint32_t pad1 : 4;
63 uint32_t tc_dim_input: 16; /* 8 * 2 bits */
64 uint32_t pad2 : 16;
65 uint32_t tc_dim_output: 24; /* 8 * 3 bits */
66 uint32_t pad3 : 8;
67 uint32_t tc_gen : 24; /* 8 * 3 bits */
68 uint32_t pad4 : 8;
69 uint32_t tc_idx : 24;
70 uint32_t pad5 : 8;
71 uint32_t passthrough;
72 };
73 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */
74 uint32_t value32[6];
75 };
76 };
77
78 /* Texture stage state:
79 *
80 * COLOROP D3DTOP 5 bit
81 * ALPHAOP D3DTOP 5 bit
82 * COLORARG0 D3DTA 3 bit
83 * COLORARG1 D3DTA 3 bit
84 * COLORARG2 D3DTA 3 bit
85 * ALPHAARG0 D3DTA 3 bit
86 * ALPHAARG1 D3DTA 3 bit
87 * ALPHAARG2 D3DTA 3 bit
88 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1)
89 * TEXCOORDINDEX 0 - 7 3 bit
90 * ===========================
91 * 32 bit per stage
92 */
93 struct nine_ff_ps_key
94 {
95 union {
96 struct {
97 struct {
98 uint32_t colorop : 5;
99 uint32_t alphaop : 5;
100 uint32_t colorarg0 : 3;
101 uint32_t colorarg1 : 3;
102 uint32_t colorarg2 : 3;
103 uint32_t alphaarg0 : 3;
104 uint32_t alphaarg1 : 3;
105 uint32_t alphaarg2 : 3;
106 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */
107 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */
108 uint32_t pad : 1;
109 /* that's 32 bit exactly */
110 } ts[8];
111 uint32_t projected : 16;
112 uint32_t fog : 1; /* for vFog coming from VS */
113 uint32_t fog_mode : 2;
114 uint32_t specular : 1;
115 uint32_t pad1 : 12; /* 9 32-bit words with this */
116 uint8_t colorarg_b4[3];
117 uint8_t colorarg_b5[3];
118 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */
119 uint8_t pad2[3];
120 };
121 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */
122 uint32_t value32[12];
123 };
124 };
125
126 static unsigned nine_ff_vs_key_hash(void *key)
127 {
128 struct nine_ff_vs_key *vs = key;
129 unsigned i;
130 uint32_t hash = vs->value32[0];
131 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i)
132 hash ^= vs->value32[i];
133 return hash;
134 }
135 static int nine_ff_vs_key_comp(void *key1, void *key2)
136 {
137 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1;
138 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2;
139
140 return memcmp(a->value64, b->value64, sizeof(a->value64));
141 }
142 static unsigned nine_ff_ps_key_hash(void *key)
143 {
144 struct nine_ff_ps_key *ps = key;
145 unsigned i;
146 uint32_t hash = ps->value32[0];
147 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i)
148 hash ^= ps->value32[i];
149 return hash;
150 }
151 static int nine_ff_ps_key_comp(void *key1, void *key2)
152 {
153 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1;
154 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2;
155
156 return memcmp(a->value64, b->value64, sizeof(a->value64));
157 }
158 static unsigned nine_ff_fvf_key_hash(void *key)
159 {
160 return *(DWORD *)key;
161 }
162 static int nine_ff_fvf_key_comp(void *key1, void *key2)
163 {
164 return *(DWORD *)key1 != *(DWORD *)key2;
165 }
166
167 static void nine_ff_prune_vs(struct NineDevice9 *);
168 static void nine_ff_prune_ps(struct NineDevice9 *);
169
170 static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override)
171 {
172 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) {
173 unsigned count;
174 const struct tgsi_token *toks = ureg_get_tokens(ureg, &count);
175 tgsi_dump(toks, 0);
176 ureg_free_tokens(toks);
177 }
178 }
179
180 #define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X)
181 #define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y)
182 #define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z)
183 #define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W)
184
185 #define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X)
186 #define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y)
187 #define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z)
188 #define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W)
189
190 #define _XYZW(r) (r)
191
192 /* AL should contain base address of lights table. */
193 #define LIGHT_CONST(i) \
194 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL))
195
196 #define MATERIAL_CONST(i) \
197 ureg_DECL_constant(ureg, 19 + (i))
198
199 #define _CONST(n) ureg_DECL_constant(ureg, n)
200
201 /* VS FF constants layout:
202 *
203 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION
204 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW
205 * CONST[ 8..11] D3DTS_PROJECTION
206 * CONST[12..15] D3DTS_VIEW^(-1)
207 * CONST[16..18] Normal matrix
208 *
209 * CONST[19] MATERIAL.Emissive + Material.Ambient * RS.Ambient
210 * CONST[20] MATERIAL.Diffuse
211 * CONST[21] MATERIAL.Ambient
212 * CONST[22] MATERIAL.Specular
213 * CONST[23].x___ MATERIAL.Power
214 * CONST[24] MATERIAL.Emissive
215 * CONST[25] RS.Ambient
216 *
217 * CONST[26].x___ RS.PointSizeMin
218 * CONST[26]._y__ RS.PointSizeMax
219 * CONST[26].__z_ RS.PointSize
220 * CONST[26].___w RS.PointScaleA
221 * CONST[27].x___ RS.PointScaleB
222 * CONST[27]._y__ RS.PointScaleC
223 *
224 * CONST[28].x___ RS.FogEnd
225 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
226 * CONST[28].__z_ RS.FogDensity
227
228 * CONST[30].x___ TWEENFACTOR
229 *
230 * CONST[32].x___ LIGHT[0].Type
231 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2
232 * CONST[33] LIGHT[0].Diffuse
233 * CONST[34] LIGHT[0].Specular
234 * CONST[35] LIGHT[0].Ambient
235 * CONST[36].xyz_ LIGHT[0].Position
236 * CONST[36].___w LIGHT[0].Range
237 * CONST[37].xyz_ LIGHT[0].Direction
238 * CONST[37].___w LIGHT[0].Falloff
239 * CONST[38].x___ cos(LIGHT[0].Theta / 2)
240 * CONST[38]._y__ cos(LIGHT[0].Phi / 2)
241 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2))
242 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights)
243 * CONST[39].___w 1 if this is the last active light, 0 if not
244 * CONST[40] LIGHT[1]
245 * CONST[48] LIGHT[2]
246 * CONST[56] LIGHT[3]
247 * CONST[64] LIGHT[4]
248 * CONST[72] LIGHT[5]
249 * CONST[80] LIGHT[6]
250 * CONST[88] LIGHT[7]
251 * NOTE: no lighting code is generated if there are no active lights
252 *
253 * CONST[100].x___ Viewport 2/width
254 * CONST[100]._y__ Viewport 2/height
255 * CONST[100].__z_ Viewport 1/(zmax - zmin)
256 * CONST[100].___w Viewport width
257 * CONST[101].x___ Viewport x0
258 * CONST[101]._y__ Viewport y0
259 * CONST[101].__z_ Viewport z0
260 *
261 * CONST[128..131] D3DTS_TEXTURE0
262 * CONST[132..135] D3DTS_TEXTURE1
263 * CONST[136..139] D3DTS_TEXTURE2
264 * CONST[140..143] D3DTS_TEXTURE3
265 * CONST[144..147] D3DTS_TEXTURE4
266 * CONST[148..151] D3DTS_TEXTURE5
267 * CONST[152..155] D3DTS_TEXTURE6
268 * CONST[156..159] D3DTS_TEXTURE7
269 *
270 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW
271 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW
272 * ...
273 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW
274 */
275 struct vs_build_ctx
276 {
277 struct ureg_program *ureg;
278 const struct nine_ff_vs_key *key;
279
280 uint16_t input[PIPE_MAX_ATTRIBS];
281 unsigned num_inputs;
282
283 struct ureg_src aVtx;
284 struct ureg_src aNrm;
285 struct ureg_src aCol[2];
286 struct ureg_src aTex[8];
287 struct ureg_src aPsz;
288 struct ureg_src aInd;
289 struct ureg_src aWgt;
290
291 struct ureg_src aVtx1; /* tweening */
292 struct ureg_src aNrm1;
293
294 struct ureg_src mtlA;
295 struct ureg_src mtlD;
296 struct ureg_src mtlS;
297 struct ureg_src mtlE;
298 };
299
300 static inline unsigned
301 get_texcoord_sn(struct pipe_screen *screen)
302 {
303 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD))
304 return TGSI_SEMANTIC_TEXCOORD;
305 return TGSI_SEMANTIC_GENERIC;
306 }
307
308 static inline struct ureg_src
309 build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl)
310 {
311 const unsigned i = vs->num_inputs++;
312 assert(i < PIPE_MAX_ATTRIBS);
313 vs->input[i] = ndecl;
314 return ureg_DECL_vs_input(vs->ureg, i);
315 }
316
317 /* NOTE: dst may alias src */
318 static inline void
319 ureg_normalize3(struct ureg_program *ureg,
320 struct ureg_dst dst, struct ureg_src src)
321 {
322 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
323 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
324
325 ureg_DP3(ureg, tmp_x, src, src);
326 ureg_RSQ(ureg, tmp_x, _X(tmp));
327 ureg_MUL(ureg, dst, src, _X(tmp));
328 ureg_release_temporary(ureg, tmp);
329 }
330
331 static void *
332 nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs)
333 {
334 const struct nine_ff_vs_key *key = vs->key;
335 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX);
336 struct ureg_dst oPos, oCol[2], oPsz, oFog;
337 struct ureg_dst AR;
338 unsigned i, c;
339 unsigned label[32], l = 0;
340 boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL);
341 boolean has_aNrm = need_aNrm && key->has_normal;
342 boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp;
343 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
344
345 vs->ureg = ureg;
346
347 /* Check which inputs we should transform. */
348 for (i = 0; i < 8 * 3; i += 3) {
349 switch ((key->tc_gen >> i) & 0x7) {
350 case NINED3DTSS_TCI_CAMERASPACENORMAL:
351 need_aNrm = TRUE;
352 break;
353 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
354 need_aVtx = TRUE;
355 break;
356 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
357 need_aVtx = need_aNrm = TRUE;
358 break;
359 case NINED3DTSS_TCI_SPHEREMAP:
360 need_aVtx = need_aNrm = TRUE;
361 break;
362 default:
363 break;
364 }
365 }
366
367 /* Declare and record used inputs (needed for linkage with vertex format):
368 * (texture coordinates handled later)
369 */
370 vs->aVtx = build_vs_add_input(vs,
371 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION);
372
373 vs->aNrm = ureg_imm1f(ureg, 0.0f);
374 if (has_aNrm)
375 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL);
376
377 vs->aCol[0] = ureg_imm1f(ureg, 1.0f);
378 vs->aCol[1] = ureg_imm1f(ureg, 0.0f);
379
380 if (key->lighting || key->darkness) {
381 const unsigned mask = key->mtl_diffuse | key->mtl_specular |
382 key->mtl_ambient | key->mtl_emissive;
383 if ((mask & 0x1) && !key->color0in_one)
384 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
385 if ((mask & 0x2) && !key->color1in_zero)
386 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
387
388 vs->mtlD = MATERIAL_CONST(1);
389 vs->mtlA = MATERIAL_CONST(2);
390 vs->mtlS = MATERIAL_CONST(3);
391 vs->mtlE = MATERIAL_CONST(5);
392 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else
393 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1];
394 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else
395 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1];
396 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else
397 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1];
398 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else
399 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1];
400 } else {
401 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0));
402 if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1));
403 }
404
405 if (key->vertexpointsize)
406 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE);
407
408 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES))
409 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES);
410 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT))
411 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT);
412 if (key->vertextween) {
413 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1));
414 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1));
415 }
416
417 /* Declare outputs:
418 */
419 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */
420 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0));
421 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1));
422 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
423 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_FOG, 0);
424 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X);
425 }
426
427 if (key->vertexpointsize || key->pointscale) {
428 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0,
429 TGSI_WRITEMASK_X, 0, 1);
430 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X);
431 }
432
433 if (key->lighting || key->vertexblend)
434 AR = ureg_DECL_address(ureg);
435
436 /* === Vertex transformation / vertex blending:
437 */
438
439 if (key->position_t) {
440 if (device->driver_caps.window_space_position_support) {
441 ureg_MOV(ureg, oPos, vs->aVtx);
442 } else {
443 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
444 /* vs->aVtx contains the coordinates buffer wise.
445 * later in the pipeline, clipping, viewport and division
446 * by w (rhw = 1/w) are going to be applied, so do the reverse
447 * of these transformations (except clipping) to have the good
448 * position at the end.*/
449 ureg_MOV(ureg, tmp, vs->aVtx);
450 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */
451 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(101));
452 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100));
453 ureg_SUB(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 1.0f));
454 /* Y needs to be reversed */
455 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp)));
456 /* inverse rhw */
457 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp));
458 /* multiply X, Y, Z by w */
459 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp));
460 ureg_MOV(ureg, oPos, ureg_src(tmp));
461 ureg_release_temporary(ureg, tmp);
462 }
463 } else if (key->vertexblend) {
464 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
465 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
466 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
467 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
468 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg);
469 struct ureg_src cWM[4];
470
471 for (i = 160; i <= 195; ++i)
472 ureg_DECL_constant(ureg, i);
473
474 /* translate world matrix index to constant file index */
475 if (key->vertexblend_indexed) {
476 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f));
477 ureg_ARL(ureg, AR, ureg_src(tmp));
478 }
479
480 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
481 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f));
482 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f));
483
484 for (i = 0; i < key->vertexblend; ++i) {
485 for (c = 0; c < 4; ++c) {
486 cWM[c] = ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c);
487 if (key->vertexblend_indexed)
488 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i));
489 }
490
491 /* multiply by WORLD(index) */
492 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]);
493 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp));
494 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp));
495 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp));
496
497 if (has_aNrm) {
498 /* Note: the spec says the transpose of the inverse of the
499 * WorldView matrices should be used, but all tests show
500 * otherwise.
501 * Only case unknown: D3DVBF_0WEIGHTS */
502 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]);
503 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2));
504 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2));
505 }
506
507 if (i < (key->vertexblend - 1)) {
508 /* accumulate weighted position value */
509 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst));
510 if (has_aNrm)
511 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst));
512 /* subtract weighted position value for last value */
513 ureg_SUB(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_scalar(vs->aWgt, i));
514 }
515 }
516
517 /* the last weighted position is always 1 - sum_of_previous_weights */
518 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst));
519 if (has_aNrm)
520 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst));
521
522 /* multiply by VIEW_PROJ */
523 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8));
524 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp));
525 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp));
526 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp));
527
528 if (need_aVtx)
529 vs->aVtx = ureg_src(aVtx_dst);
530
531 ureg_release_temporary(ureg, tmp);
532 ureg_release_temporary(ureg, tmp2);
533 ureg_release_temporary(ureg, sum_blendweights);
534 if (!need_aVtx)
535 ureg_release_temporary(ureg, aVtx_dst);
536
537 if (has_aNrm) {
538 if (key->normalizenormals)
539 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
540 vs->aNrm = ureg_src(aNrm_dst);
541 } else
542 ureg_release_temporary(ureg, aNrm_dst);
543 } else {
544 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
545
546 if (key->vertextween) {
547 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg);
548 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx);
549 vs->aVtx = ureg_src(aVtx_dst);
550 if (has_aNrm) {
551 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg);
552 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm);
553 vs->aNrm = ureg_src(aNrm_dst);
554 }
555 }
556
557 /* position = vertex * WORLD_VIEW_PROJ */
558 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0));
559 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp));
560 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp));
561 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp));
562 ureg_release_temporary(ureg, tmp);
563
564 if (need_aVtx) {
565 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
566 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4));
567 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst));
568 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst));
569 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst));
570 vs->aVtx = ureg_src(aVtx_dst);
571 }
572 if (has_aNrm) {
573 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
574 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16));
575 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst));
576 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst));
577 if (key->normalizenormals)
578 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst));
579 vs->aNrm = ureg_src(aNrm_dst);
580 }
581 }
582
583 /* === Process point size:
584 */
585 if (key->vertexpointsize || key->pointscale) {
586 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
587 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
588 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
589 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
590 if (key->vertexpointsize) {
591 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
592 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1));
593 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1));
594 } else {
595 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
596 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1));
597 }
598
599 if (key->pointscale) {
600 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26);
601 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27);
602
603 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
604 ureg_RSQ(ureg, tmp_y, _X(tmp));
605 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp));
606 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f));
607 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2));
608 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1));
609 ureg_RSQ(ureg, tmp_x, _X(tmp));
610 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp));
611 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100)));
612 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1));
613 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1));
614 }
615
616 ureg_MOV(ureg, oPsz, _Z(tmp));
617 ureg_release_temporary(ureg, tmp);
618 }
619
620 for (i = 0; i < 8; ++i) {
621 struct ureg_dst tmp, tmp_x, tmp2;
622 struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed;
623 unsigned c, writemask;
624 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7;
625 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7;
626 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3);
627 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7;
628
629 /* No texture output of index s */
630 if (tci == NINED3DTSS_TCI_DISABLE)
631 continue;
632 oTex = ureg_DECL_output(ureg, texcoord_sn, i);
633 tmp = ureg_DECL_temporary(ureg);
634 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
635 input_coord = ureg_DECL_temporary(ureg);
636 transformed = ureg_DECL_temporary(ureg);
637
638 /* Get the coordinate */
639 switch (tci) {
640 case NINED3DTSS_TCI_PASSTHRU:
641 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx *
642 * Else the idx is used only to determine wrapping mode. */
643 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx));
644 ureg_MOV(ureg, input_coord, vs->aTex[idx]);
645 break;
646 case NINED3DTSS_TCI_CAMERASPACENORMAL:
647 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm);
648 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
649 dim_input = 4;
650 break;
651 case NINED3DTSS_TCI_CAMERASPACEPOSITION:
652 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx);
653 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
654 dim_input = 4;
655 break;
656 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR:
657 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
658 aVtx_normed = ureg_DECL_temporary(ureg);
659 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
660 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
661 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
662 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
663 ureg_SUB(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_src(tmp));
664 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f));
665 ureg_release_temporary(ureg, aVtx_normed);
666 dim_input = 4;
667 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
668 break;
669 case NINED3DTSS_TCI_SPHEREMAP:
670 /* Implement the formula of GL_SPHERE_MAP */
671 tmp.WriteMask = TGSI_WRITEMASK_XYZ;
672 aVtx_normed = ureg_DECL_temporary(ureg);
673 tmp2 = ureg_DECL_temporary(ureg);
674 ureg_normalize3(ureg, aVtx_normed, vs->aVtx);
675 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm);
676 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp));
677 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp));
678 ureg_SUB(ureg, tmp, ureg_src(aVtx_normed), ureg_src(tmp));
679 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */
680 ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp));
681 ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2));
682 ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2));
683 ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2));
684 ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f));
685 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2)
686 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */
687 ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2));
688 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
689 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
690 ureg_release_temporary(ureg, aVtx_normed);
691 ureg_release_temporary(ureg, tmp2);
692 dim_input = 4;
693 tmp.WriteMask = TGSI_WRITEMASK_XYZW;
694 break;
695 default:
696 assert(0);
697 break;
698 }
699
700 /* Apply the transformation */
701 /* dim_output == 0 => do not transform the components.
702 * XYZRHW also disables transformation */
703 if (!dim_output || key->position_t) {
704 ureg_release_temporary(ureg, transformed);
705 transformed = input_coord;
706 writemask = TGSI_WRITEMASK_XYZW;
707 } else {
708 for (c = 0; c < dim_output; c++) {
709 t = ureg_writemask(transformed, 1 << c);
710 switch (dim_input) {
711 /* dim_input = 1 2 3: -> we add trailing 1 to input*/
712 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c)));
713 break;
714 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
715 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c)));
716 break;
717 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c));
718 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c)));
719 break;
720 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break;
721 default:
722 assert(0);
723 }
724 }
725 writemask = (1 << dim_output) - 1;
726 ureg_release_temporary(ureg, input_coord);
727 }
728
729 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed));
730 ureg_release_temporary(ureg, transformed);
731 ureg_release_temporary(ureg, tmp);
732 }
733
734 /* === Lighting:
735 *
736 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation.
737 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation.
738 * SPOT: Finite distance, divergent rays, angular dependence, attenuation.
739 *
740 * vec3 normal = normalize(in.Normal * NormalMatrix);
741 * vec3 hitDir = light.direction;
742 * float atten = 1.0;
743 *
744 * if (light.type != DIRECTIONAL)
745 * {
746 * vec3 hitVec = light.position - eyeVertex;
747 * float d = length(hitVec);
748 * hitDir = hitVec / d;
749 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0);
750 * }
751 *
752 * if (light.type == SPOTLIGHT)
753 * {
754 * float rho = dp3(-hitVec, light.direction);
755 * if (rho < cos(light.phi / 2))
756 * atten = 0;
757 * if (rho < cos(light.theta / 2))
758 * atten *= pow(some_func(rho), light.falloff);
759 * }
760 *
761 * float nDotHit = dp3_sat(normal, hitVec);
762 * float powFact = 0.0;
763 *
764 * if (nDotHit > 0.0)
765 * {
766 * vec3 midVec = normalize(hitDir + eye);
767 * float nDotMid = dp3_sat(normal, midVec);
768 * pFact = pow(nDotMid, material.power);
769 * }
770 *
771 * ambient += light.ambient * atten;
772 * diffuse += light.diffuse * atten * nDotHit;
773 * specular += light.specular * atten * powFact;
774 */
775 if (key->lighting) {
776 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
777 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
778 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y);
779 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
780 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
781 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
782 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
783
784 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W);
785
786 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X);
787
788 /* Light.*.Alpha is not used. */
789 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
790 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ);
791 struct ureg_dst rS = ureg_DECL_temporary(ureg);
792
793 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4));
794
795 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0));
796 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0));
797 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0));
798 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0));
799 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1));
800 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2));
801 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3));
802 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4));
803 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4));
804 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5));
805 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5));
806 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6));
807 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6));
808 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6));
809 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7));
810
811 const unsigned loop_label = l++;
812
813 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */
814 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f));
815 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f));
816 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f));
817 rD = ureg_saturate(rD);
818 rA = ureg_saturate(rA);
819 rS = ureg_saturate(rS);
820
821
822 /* loop management */
823 ureg_BGNLOOP(ureg, &label[loop_label]);
824 ureg_ARL(ureg, AL, _W(rCtr));
825
826 /* if (not DIRECTIONAL light): */
827 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL));
828 ureg_MOV(ureg, rHit, ureg_negate(cLDir));
829 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f));
830 ureg_IF(ureg, _X(tmp), &label[l++]);
831 {
832 /* hitDir = light.position - eyeVtx
833 * d = length(hitDir)
834 */
835 ureg_SUB(ureg, rHit, cLPos, vs->aVtx);
836 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit));
837 ureg_RSQ(ureg, tmp_y, _X(tmp));
838 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */
839
840 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */
841 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1);
842 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0);
843 ureg_RCP(ureg, rAtt, _W(rAtt));
844 /* cut-off if distance exceeds Light.Range */
845 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng);
846 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
847 }
848 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
849 ureg_ENDIF(ureg);
850
851 /* normalize hitDir */
852 ureg_normalize3(ureg, rHit, ureg_src(rHit));
853
854 /* if (SPOT light) */
855 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT));
856 ureg_IF(ureg, _X(tmp), &label[l++]);
857 {
858 /* rho = dp3(-hitDir, light.spotDir)
859 *
860 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi
861 * spotAtt = 1
862 * else
863 * if (rho <= light.cphi2)
864 * spotAtt = 0
865 * else
866 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff
867 */
868 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */
869 ureg_SUB(ureg, tmp_x, _Y(tmp), cLPhi);
870 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv);
871 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */
872 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */
873 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */
874 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp));
875 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp));
876 }
877 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
878 ureg_ENDIF(ureg);
879
880 /* directional factors, let's not use LIT because of clarity */
881
882 if (has_aNrm) {
883 if (key->localviewer) {
884 ureg_normalize3(ureg, rMid, vs->aVtx);
885 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_src(rMid));
886 } else {
887 ureg_SUB(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, 1.0f));
888 }
889 ureg_normalize3(ureg, rMid, ureg_src(rMid));
890 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid));
891 ureg_IF(ureg, _Y(tmp), &label[l++]);
892 {
893 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP);
894 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */
895 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */
896 }
897 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
898 ureg_ENDIF(ureg);
899
900 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit));
901 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */
902 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */
903 }
904
905 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */
906
907 /* break if this was the last light */
908 ureg_IF(ureg, cLLast, &label[l++]);
909 ureg_BRK(ureg);
910 ureg_ENDIF(ureg);
911 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg));
912
913 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f));
914 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg));
915 ureg_ENDLOOP(ureg, &label[loop_label]);
916
917 /* Set alpha factors of illumination to 1.0 for the multiplications. */
918 rD.WriteMask = TGSI_WRITEMASK_W; rD.Saturate = 0;
919 rA.WriteMask = TGSI_WRITEMASK_W; rA.Saturate = 0;
920 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 1.0f));
921
922 /* Apply to material:
923 *
924 * oCol[0] = (material.emissive + material.ambient * rs.ambient) +
925 * material.ambient * ambient +
926 * material.diffuse * diffuse +
927 * oCol[1] = material.specular * specular;
928 */
929 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
930 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 1.0f));
931 ureg_MAD(ureg, tmp, ureg_src(rA), vs->mtlA, _CONST(19));
932 } else {
933 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25));
934 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE);
935 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W ), vs->mtlA, vs->mtlE);
936 }
937
938 ureg_MAD(ureg, oCol[0], ureg_src(rD), vs->mtlD, ureg_src(tmp));
939 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS);
940 ureg_release_temporary(ureg, rAtt);
941 ureg_release_temporary(ureg, rHit);
942 ureg_release_temporary(ureg, rMid);
943 ureg_release_temporary(ureg, rCtr);
944 ureg_release_temporary(ureg, rD);
945 ureg_release_temporary(ureg, rA);
946 ureg_release_temporary(ureg, rS);
947 ureg_release_temporary(ureg, rAtt);
948 ureg_release_temporary(ureg, tmp);
949 } else
950 /* COLOR */
951 if (key->darkness) {
952 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) {
953 ureg_MAD(ureg, oCol[0], vs->mtlD, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f), _CONST(19));
954 } else {
955 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
956 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE);
957 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), vs->mtlA, vs->mtlE);
958 ureg_ADD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD, _W(tmp));
959 ureg_release_temporary(ureg, tmp);
960 }
961 ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f));
962 } else {
963 ureg_MOV(ureg, oCol[0], vs->aCol[0]);
964 ureg_MOV(ureg, oCol[1], vs->aCol[1]);
965 }
966
967 /* === Process fog.
968 *
969 * exp(x) = ex2(log2(e) * x)
970 */
971 if (key->fog_mode) {
972 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
973 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
974 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z);
975 if (key->fog_range) {
976 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx);
977 ureg_RSQ(ureg, tmp_z, _X(tmp));
978 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp));
979 } else {
980 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx)));
981 }
982
983 if (key->fog_mode == D3DFOG_EXP) {
984 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
985 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
986 ureg_EX2(ureg, tmp_x, _X(tmp));
987 } else
988 if (key->fog_mode == D3DFOG_EXP2) {
989 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28)));
990 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp));
991 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f));
992 ureg_EX2(ureg, tmp_x, _X(tmp));
993 } else
994 if (key->fog_mode == D3DFOG_LINEAR) {
995 ureg_SUB(ureg, tmp_x, _XXXX(_CONST(28)), _Z(tmp));
996 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28)));
997 }
998 ureg_MOV(ureg, oFog, _X(tmp));
999 ureg_release_temporary(ureg, tmp);
1000 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) {
1001 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W));
1002 }
1003
1004 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) {
1005 struct ureg_src input;
1006 struct ureg_dst output;
1007 input = vs->aWgt;
1008 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 18);
1009 ureg_MOV(ureg, output, input);
1010 }
1011 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) {
1012 struct ureg_src input;
1013 struct ureg_dst output;
1014 input = vs->aInd;
1015 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19);
1016 ureg_MOV(ureg, output, input);
1017 }
1018 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) {
1019 struct ureg_src input;
1020 struct ureg_dst output;
1021 input = vs->aNrm;
1022 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20);
1023 ureg_MOV(ureg, output, input);
1024 }
1025 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) {
1026 struct ureg_src input;
1027 struct ureg_dst output;
1028 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT);
1029 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21);
1030 ureg_MOV(ureg, output, input);
1031 }
1032 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) {
1033 struct ureg_src input;
1034 struct ureg_dst output;
1035 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL);
1036 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22);
1037 ureg_MOV(ureg, output, input);
1038 }
1039 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) {
1040 struct ureg_src input;
1041 struct ureg_dst output;
1042 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG);
1043 input = ureg_scalar(input, TGSI_SWIZZLE_X);
1044 output = oFog;
1045 ureg_MOV(ureg, output, input);
1046 }
1047 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) {
1048 (void) 0; /* TODO: replace z of position output ? */
1049 }
1050
1051 /* ucp for ff applies on world coordinates.
1052 * aVtx is in worldview coordinates. */
1053 if (key->ucp) {
1054 struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0);
1055 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1056 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12));
1057 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp));
1058 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp));
1059 ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp));
1060 ureg_release_temporary(ureg, tmp);
1061 }
1062
1063 if (key->position_t && device->driver_caps.window_space_position_support)
1064 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE);
1065
1066 ureg_END(ureg);
1067 nine_ureg_tgsi_dump(ureg, FALSE);
1068 return ureg_create_shader_and_destroy(ureg, device->pipe);
1069 }
1070
1071 /* PS FF constants layout:
1072 *
1073 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT
1074 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00
1075 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01
1076 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10
1077 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11
1078 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE
1079 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET
1080 *
1081 * CONST[20] D3DRS_TEXTUREFACTOR
1082 * CONST[21] D3DRS_FOGCOLOR
1083 * CONST[22].x___ RS.FogEnd
1084 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart)
1085 * CONST[22].__z_ RS.FogDensity
1086 */
1087 struct ps_build_ctx
1088 {
1089 struct ureg_program *ureg;
1090
1091 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */
1092 struct ureg_src vT[8]; /* TEXCOORD[i] */
1093 struct ureg_dst rCur; /* D3DTA_CURRENT */
1094 struct ureg_dst rMod;
1095 struct ureg_src rCurSrc;
1096 struct ureg_dst rTmp; /* D3DTA_TEMP */
1097 struct ureg_src rTmpSrc;
1098 struct ureg_dst rTex;
1099 struct ureg_src rTexSrc;
1100 struct ureg_src cBEM[8];
1101 struct ureg_src s[8];
1102
1103 struct {
1104 unsigned index;
1105 unsigned index_pre_mod;
1106 } stage;
1107 };
1108
1109 static struct ureg_src
1110 ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta)
1111 {
1112 struct ureg_src reg;
1113
1114 switch (ta & D3DTA_SELECTMASK) {
1115 case D3DTA_CONSTANT:
1116 reg = ureg_DECL_constant(ps->ureg, ps->stage.index);
1117 break;
1118 case D3DTA_CURRENT:
1119 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc;
1120 break;
1121 case D3DTA_DIFFUSE:
1122 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1123 break;
1124 case D3DTA_SPECULAR:
1125 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1126 break;
1127 case D3DTA_TEMP:
1128 reg = ps->rTmpSrc;
1129 break;
1130 case D3DTA_TEXTURE:
1131 reg = ps->rTexSrc;
1132 break;
1133 case D3DTA_TFACTOR:
1134 reg = ureg_DECL_constant(ps->ureg, 20);
1135 break;
1136 default:
1137 assert(0);
1138 reg = ureg_src_undef();
1139 break;
1140 }
1141 if (ta & D3DTA_COMPLEMENT) {
1142 struct ureg_dst dst = ureg_DECL_temporary(ps->ureg);
1143 ureg_SUB(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), reg);
1144 reg = ureg_src(dst);
1145 }
1146 if (ta & D3DTA_ALPHAREPLICATE)
1147 reg = _WWWW(reg);
1148 return reg;
1149 }
1150
1151 static struct ureg_dst
1152 ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta)
1153 {
1154 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE)));
1155
1156 switch (ta & D3DTA_SELECTMASK) {
1157 case D3DTA_CURRENT:
1158 return ps->rCur;
1159 case D3DTA_TEMP:
1160 return ps->rTmp;
1161 default:
1162 assert(0);
1163 return ureg_dst_undef();
1164 }
1165 }
1166
1167 static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top)
1168 {
1169 switch (top) {
1170 case D3DTOP_DISABLE:
1171 return 0x0;
1172 case D3DTOP_SELECTARG1:
1173 case D3DTOP_PREMODULATE:
1174 return 0x2;
1175 case D3DTOP_SELECTARG2:
1176 return 0x4;
1177 case D3DTOP_MULTIPLYADD:
1178 case D3DTOP_LERP:
1179 return 0x7;
1180 default:
1181 return 0x6;
1182 }
1183 }
1184
1185 static inline boolean
1186 is_MOV_no_op(struct ureg_dst dst, struct ureg_src src)
1187 {
1188 return !dst.WriteMask ||
1189 (dst.File == src.File &&
1190 dst.Index == src.Index &&
1191 !dst.Indirect &&
1192 !dst.Saturate &&
1193 !src.Indirect &&
1194 !src.Negate &&
1195 !src.Absolute &&
1196 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) &&
1197 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) &&
1198 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) &&
1199 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W)));
1200
1201 }
1202
1203 static void
1204 ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg)
1205 {
1206 struct ureg_program *ureg = ps->ureg;
1207 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1208 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg);
1209 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X);
1210
1211 tmp.WriteMask = dst.WriteMask;
1212
1213 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 &&
1214 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE &&
1215 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA &&
1216 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA &&
1217 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE &&
1218 top != D3DTOP_LERP)
1219 dst = ureg_saturate(dst);
1220
1221 switch (top) {
1222 case D3DTOP_SELECTARG1:
1223 if (!is_MOV_no_op(dst, arg[1]))
1224 ureg_MOV(ureg, dst, arg[1]);
1225 break;
1226 case D3DTOP_SELECTARG2:
1227 if (!is_MOV_no_op(dst, arg[2]))
1228 ureg_MOV(ureg, dst, arg[2]);
1229 break;
1230 case D3DTOP_MODULATE:
1231 ureg_MUL(ureg, dst, arg[1], arg[2]);
1232 break;
1233 case D3DTOP_MODULATE2X:
1234 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1235 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp));
1236 break;
1237 case D3DTOP_MODULATE4X:
1238 ureg_MUL(ureg, tmp, arg[1], arg[2]);
1239 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f));
1240 break;
1241 case D3DTOP_ADD:
1242 ureg_ADD(ureg, dst, arg[1], arg[2]);
1243 break;
1244 case D3DTOP_ADDSIGNED:
1245 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1246 ureg_SUB(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 0.5f));
1247 break;
1248 case D3DTOP_ADDSIGNED2X:
1249 ureg_ADD(ureg, tmp, arg[1], arg[2]);
1250 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f));
1251 break;
1252 case D3DTOP_SUBTRACT:
1253 ureg_SUB(ureg, dst, arg[1], arg[2]);
1254 break;
1255 case D3DTOP_ADDSMOOTH:
1256 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1257 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]);
1258 break;
1259 case D3DTOP_BLENDDIFFUSEALPHA:
1260 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]);
1261 break;
1262 case D3DTOP_BLENDTEXTUREALPHA:
1263 /* XXX: alpha taken from previous stage, texture or result ? */
1264 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]);
1265 break;
1266 case D3DTOP_BLENDFACTORALPHA:
1267 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]);
1268 break;
1269 case D3DTOP_BLENDTEXTUREALPHAPM:
1270 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _W(ps->rTex));
1271 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]);
1272 break;
1273 case D3DTOP_BLENDCURRENTALPHA:
1274 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]);
1275 break;
1276 case D3DTOP_PREMODULATE:
1277 ureg_MOV(ureg, dst, arg[1]);
1278 ps->stage.index_pre_mod = ps->stage.index + 1;
1279 break;
1280 case D3DTOP_MODULATEALPHA_ADDCOLOR:
1281 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]);
1282 break;
1283 case D3DTOP_MODULATECOLOR_ADDALPHA:
1284 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1]));
1285 break;
1286 case D3DTOP_MODULATEINVALPHA_ADDCOLOR:
1287 ureg_SUB(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), _WWWW(arg[1]));
1288 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]);
1289 break;
1290 case D3DTOP_MODULATEINVCOLOR_ADDALPHA:
1291 ureg_SUB(ureg, tmp, ureg_imm1f(ureg, 1.0f), arg[1]);
1292 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1]));
1293 break;
1294 case D3DTOP_BUMPENVMAP:
1295 break;
1296 case D3DTOP_BUMPENVMAPLUMINANCE:
1297 break;
1298 case D3DTOP_DOTPRODUCT3:
1299 ureg_SUB(ureg, tmp, arg[1], ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1300 ureg_SUB(ureg, tmp2, arg[2] , ureg_imm4f(ureg,0.5,0.5,0.5,0.5));
1301 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2));
1302 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0));
1303 break;
1304 case D3DTOP_MULTIPLYADD:
1305 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]);
1306 break;
1307 case D3DTOP_LERP:
1308 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]);
1309 break;
1310 case D3DTOP_DISABLE:
1311 /* no-op ? */
1312 break;
1313 default:
1314 assert(!"invalid D3DTOP");
1315 break;
1316 }
1317 ureg_release_temporary(ureg, tmp);
1318 ureg_release_temporary(ureg, tmp2);
1319 }
1320
1321 static void *
1322 nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key)
1323 {
1324 struct ps_build_ctx ps;
1325 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT);
1326 struct ureg_dst oCol;
1327 unsigned s;
1328 const unsigned texcoord_sn = get_texcoord_sn(device->screen);
1329
1330 memset(&ps, 0, sizeof(ps));
1331 ps.ureg = ureg;
1332 ps.stage.index_pre_mod = -1;
1333
1334 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR);
1335
1336 ps.rCur = ureg_DECL_temporary(ureg);
1337 ps.rTmp = ureg_DECL_temporary(ureg);
1338 ps.rTex = ureg_DECL_temporary(ureg);
1339 ps.rCurSrc = ureg_src(ps.rCur);
1340 ps.rTmpSrc = ureg_src(ps.rTmp);
1341 ps.rTexSrc = ureg_src(ps.rTex);
1342
1343 /* Initial values */
1344 ureg_MOV(ureg, ps.rCur, ureg_imm1f(ureg, 0.0f));
1345 ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f));
1346 ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f));
1347
1348 for (s = 0; s < 8; ++s) {
1349 ps.s[s] = ureg_src_undef();
1350
1351 if (key->ts[s].colorop != D3DTOP_DISABLE) {
1352 if (key->ts[s].colorarg0 == D3DTA_SPECULAR ||
1353 key->ts[s].colorarg1 == D3DTA_SPECULAR ||
1354 key->ts[s].colorarg2 == D3DTA_SPECULAR)
1355 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1356
1357 if (key->ts[s].colorarg0 == D3DTA_TEXTURE ||
1358 key->ts[s].colorarg1 == D3DTA_TEXTURE ||
1359 key->ts[s].colorarg2 == D3DTA_TEXTURE) {
1360 ps.s[s] = ureg_DECL_sampler(ureg, s);
1361 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1362 }
1363 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE ||
1364 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE))
1365 ps.s[s] = ureg_DECL_sampler(ureg, s);
1366 }
1367
1368 if (key->ts[s].alphaop != D3DTOP_DISABLE) {
1369 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR ||
1370 key->ts[s].alphaarg1 == D3DTA_SPECULAR ||
1371 key->ts[s].alphaarg2 == D3DTA_SPECULAR)
1372 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1373
1374 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE ||
1375 key->ts[s].alphaarg1 == D3DTA_TEXTURE ||
1376 key->ts[s].alphaarg2 == D3DTA_TEXTURE) {
1377 ps.s[s] = ureg_DECL_sampler(ureg, s);
1378 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE);
1379 }
1380 }
1381 }
1382 if (key->specular)
1383 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR);
1384
1385 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0);
1386
1387 if (key->ts[0].colorop == D3DTOP_DISABLE &&
1388 key->ts[0].alphaop == D3DTOP_DISABLE)
1389 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1390 /* Or is it undefined then ? */
1391
1392 /* Run stages.
1393 */
1394 for (s = 0; s < 8; ++s) {
1395 unsigned colorarg[3];
1396 unsigned alphaarg[3];
1397 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop);
1398 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop);
1399 struct ureg_dst dst;
1400 struct ureg_src arg[3];
1401
1402 if (key->ts[s].colorop == D3DTOP_DISABLE &&
1403 key->ts[s].alphaop == D3DTOP_DISABLE)
1404 continue;
1405 ps.stage.index = s;
1406
1407 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s,
1408 nine_D3DTOP_to_str(key->ts[s].colorop),
1409 nine_D3DTOP_to_str(key->ts[s].alphaop));
1410
1411 if (!ureg_src_is_undef(ps.s[s])) {
1412 unsigned target;
1413 struct ureg_src texture_coord = ps.vT[s];
1414 struct ureg_dst delta;
1415 switch (key->ts[s].textarget) {
1416 case 0: target = TGSI_TEXTURE_1D; break;
1417 case 1: target = TGSI_TEXTURE_2D; break;
1418 case 2: target = TGSI_TEXTURE_3D; break;
1419 case 3: target = TGSI_TEXTURE_CUBE; break;
1420 /* this is a 2 bit bitfield, do I really need a default case ? */
1421 }
1422
1423 /* Modify coordinates */
1424 if (s >= 1 &&
1425 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP ||
1426 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) {
1427 delta = ureg_DECL_temporary(ureg);
1428 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */
1429 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1)));
1430 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta));
1431 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */
1432 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1)));
1433 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta));
1434 texture_coord = ureg_src(ureg_DECL_temporary(ureg));
1435 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]);
1436 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta));
1437 /* Prepare luminance multiplier
1438 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */
1439 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) {
1440 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2));
1441 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2));
1442
1443 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset);
1444 }
1445 }
1446 if (key->projected & (3 << (s *2))) {
1447 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3);
1448 if (dim == 4)
1449 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1450 else {
1451 struct ureg_dst tmp = ureg_DECL_temporary(ureg);
1452 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1));
1453 ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord);
1454 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]);
1455 ureg_release_temporary(ureg, tmp);
1456 }
1457 } else {
1458 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]);
1459 }
1460 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1461 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta));
1462 }
1463
1464 if (((s == 0 && key->ts[0].colorop != D3DTOP_BUMPENVMAP &&
1465 key->ts[0].colorop != D3DTOP_BUMPENVMAPLUMINANCE) ||
1466 (s == 1 &&
1467 (key->ts[0].colorop == D3DTOP_BUMPENVMAP ||
1468 key->ts[0].colorop == D3DTOP_BUMPENVMAPLUMINANCE)))&&
1469 (key->ts[s].resultarg != 0 /* not current */ ||
1470 key->ts[s].colorop == D3DTOP_DISABLE ||
1471 key->ts[s].alphaop == D3DTOP_DISABLE ||
1472 key->ts[s].colorop == D3DTOP_BLENDCURRENTALPHA ||
1473 key->ts[s].alphaop == D3DTOP_BLENDCURRENTALPHA ||
1474 key->ts[s].colorarg0 == D3DTA_CURRENT ||
1475 key->ts[s].colorarg1 == D3DTA_CURRENT ||
1476 key->ts[s].colorarg2 == D3DTA_CURRENT ||
1477 key->ts[s].alphaarg0 == D3DTA_CURRENT ||
1478 key->ts[s].alphaarg1 == D3DTA_CURRENT ||
1479 key->ts[s].alphaarg2 == D3DTA_CURRENT)) {
1480 /* Initialize D3DTA_CURRENT.
1481 * (Yes we can do this before the loop but not until
1482 * NVE4 has an instruction scheduling pass.)
1483 */
1484 ureg_MOV(ureg, ps.rCur, ps.vC[0]);
1485 }
1486
1487 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP ||
1488 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE)
1489 continue;
1490
1491 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT);
1492
1493 if (ps.stage.index_pre_mod == ps.stage.index) {
1494 ps.rMod = ureg_DECL_temporary(ureg);
1495 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc);
1496 }
1497
1498 colorarg[0] = (key->ts[s].colorarg0 | ((key->colorarg_b4[0] >> s) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f;
1499 colorarg[1] = (key->ts[s].colorarg1 | ((key->colorarg_b4[1] >> s) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f;
1500 colorarg[2] = (key->ts[s].colorarg2 | ((key->colorarg_b4[2] >> s) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f;
1501 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f;
1502 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f;
1503 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f;
1504
1505 if (key->ts[s].colorop != key->ts[s].alphaop ||
1506 colorarg[0] != alphaarg[0] ||
1507 colorarg[1] != alphaarg[1] ||
1508 colorarg[2] != alphaarg[2])
1509 dst.WriteMask = TGSI_WRITEMASK_XYZ;
1510
1511 /* Special DOTPRODUCT behaviour (see wine tests) */
1512 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3)
1513 dst.WriteMask = TGSI_WRITEMASK_XYZW;
1514
1515 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]);
1516 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]);
1517 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]);
1518 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg);
1519
1520 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) {
1521 dst.WriteMask = TGSI_WRITEMASK_W;
1522
1523 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]);
1524 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]);
1525 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]);
1526 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg);
1527 }
1528 }
1529
1530 if (key->specular)
1531 ureg_ADD(ureg, ps.rCur, ps.rCurSrc, ps.vC[1]);
1532
1533 /* Fog.
1534 */
1535 if (key->fog_mode) {
1536 struct ureg_src vPos;
1537 if (device->screen->get_param(device->screen,
1538 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) {
1539 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0);
1540 } else {
1541 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0,
1542 TGSI_INTERPOLATE_LINEAR);
1543 }
1544
1545 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X);
1546 if (key->fog_mode == D3DFOG_EXP) {
1547 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1548 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1549 ureg_EX2(ureg, rFog, _X(rFog));
1550 } else
1551 if (key->fog_mode == D3DFOG_EXP2) {
1552 ureg_MUL(ureg, rFog, _ZZZZ(vPos), _ZZZZ(_CONST(22)));
1553 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog));
1554 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f));
1555 ureg_EX2(ureg, rFog, _X(rFog));
1556 } else
1557 if (key->fog_mode == D3DFOG_LINEAR) {
1558 ureg_SUB(ureg, rFog, _XXXX(_CONST(22)), _ZZZZ(vPos));
1559 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22)));
1560 }
1561 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21));
1562 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1563 } else
1564 if (key->fog) {
1565 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_FOG, 0, TGSI_INTERPOLATE_PERSPECTIVE);
1566 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21));
1567 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc);
1568 } else {
1569 ureg_MOV(ureg, oCol, ps.rCurSrc);
1570 }
1571
1572 ureg_END(ureg);
1573 nine_ureg_tgsi_dump(ureg, FALSE);
1574 return ureg_create_shader_and_destroy(ureg, device->pipe);
1575 }
1576
1577 static struct NineVertexShader9 *
1578 nine_ff_get_vs(struct NineDevice9 *device)
1579 {
1580 const struct nine_state *state = &device->state;
1581 struct NineVertexShader9 *vs;
1582 enum pipe_error err;
1583 struct vs_build_ctx bld;
1584 struct nine_ff_vs_key key;
1585 unsigned s, i;
1586 boolean has_indexes = false;
1587 boolean has_weights = false;
1588 char input_texture_coord[8];
1589
1590 assert(sizeof(key) <= sizeof(key.value32));
1591
1592 memset(&key, 0, sizeof(key));
1593 memset(&bld, 0, sizeof(bld));
1594 memset(&input_texture_coord, 0, sizeof(input_texture_coord));
1595
1596 bld.key = &key;
1597
1598 /* FIXME: this shouldn't be NULL, but it is on init */
1599 if (state->vdecl) {
1600 key.color0in_one = 1;
1601 key.color1in_zero = 1;
1602 for (i = 0; i < state->vdecl->nelems; i++) {
1603 uint16_t usage = state->vdecl->usage_map[i];
1604 if (usage == NINE_DECLUSAGE_POSITIONT)
1605 key.position_t = 1;
1606 else if (usage == NINE_DECLUSAGE_i(COLOR, 0))
1607 key.color0in_one = 0;
1608 else if (usage == NINE_DECLUSAGE_i(COLOR, 1))
1609 key.color1in_zero = 0;
1610 else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) {
1611 has_indexes = true;
1612 key.passthrough |= 1 << usage;
1613 } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) {
1614 has_weights = true;
1615 key.passthrough |= 1 << usage;
1616 } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) {
1617 key.has_normal = 1;
1618 key.passthrough |= 1 << usage;
1619 } else if (usage == NINE_DECLUSAGE_PSIZE)
1620 key.vertexpointsize = 1;
1621 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) {
1622 s = usage / NINE_DECLUSAGE_COUNT;
1623 if (s < 8)
1624 input_texture_coord[s] = nine_decltype_get_dim(state->vdecl->decls[i].Type);
1625 else
1626 DBG("FF given texture coordinate >= 8. Ignoring\n");
1627 } else if (usage < NINE_DECLUSAGE_NONE)
1628 key.passthrough |= 1 << usage;
1629 }
1630 }
1631 /* ff vs + ps 3.0: some elements are passed to the ps (wine test).
1632 * We do restrict to indices 0 */
1633 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) |
1634 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) |
1635 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE));
1636 if (!key.position_t)
1637 key.passthrough = 0;
1638 key.pointscale = !!state->rs[D3DRS_POINTSCALEENABLE];
1639
1640 key.lighting = !!state->rs[D3DRS_LIGHTING] && state->ff.num_lights_active;
1641 key.darkness = !!state->rs[D3DRS_LIGHTING] && !state->ff.num_lights_active;
1642 if (key.position_t) {
1643 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */
1644 key.lighting = 0;
1645 }
1646 if ((key.lighting | key.darkness) && state->rs[D3DRS_COLORVERTEX]) {
1647 uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2);
1648 key.mtl_diffuse = state->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask;
1649 key.mtl_ambient = state->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask;
1650 key.mtl_specular = state->rs[D3DRS_SPECULARMATERIALSOURCE] & mask;
1651 key.mtl_emissive = state->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask;
1652 }
1653 key.fog = !!state->rs[D3DRS_FOGENABLE];
1654 key.fog_mode = (!key.position_t && state->rs[D3DRS_FOGENABLE]) ? state->rs[D3DRS_FOGVERTEXMODE] : 0;
1655 if (key.fog_mode)
1656 key.fog_range = state->rs[D3DRS_RANGEFOGENABLE];
1657
1658 key.localviewer = !!state->rs[D3DRS_LOCALVIEWER];
1659 key.normalizenormals = !!state->rs[D3DRS_NORMALIZENORMALS];
1660 key.ucp = !!state->rs[D3DRS_CLIPPLANEENABLE];
1661
1662 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1663 key.vertexblend_indexed = !!state->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes;
1664
1665 switch (state->rs[D3DRS_VERTEXBLEND]) {
1666 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break;
1667 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break;
1668 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break;
1669 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break;
1670 case D3DVBF_TWEENING: key.vertextween = 1; break;
1671 default:
1672 assert(!"invalid D3DVBF");
1673 break;
1674 }
1675 if (!has_weights && state->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS)
1676 key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */
1677 }
1678
1679 for (s = 0; s < 8; ++s) {
1680 unsigned gen = (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1;
1681 unsigned dim;
1682
1683 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU)
1684 gen = NINED3DTSS_TCI_PASSTHRU;
1685
1686 if (!input_texture_coord[s] && gen == NINED3DTSS_TCI_PASSTHRU)
1687 gen = NINED3DTSS_TCI_DISABLE;
1688
1689 key.tc_gen |= gen << (s * 3);
1690 key.tc_idx |= (state->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7) << (s * 3);
1691 key.tc_dim_input |= ((input_texture_coord[s]-1) & 0x3) << (s * 2);
1692
1693 dim = state->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7;
1694 if (dim > 4)
1695 dim = input_texture_coord[s];
1696 if (dim == 1) /* NV behaviour */
1697 dim = 0;
1698 key.tc_dim_output |= dim << (s * 3);
1699 }
1700
1701 vs = util_hash_table_get(device->ff.ht_vs, &key);
1702 if (vs)
1703 return vs;
1704 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld));
1705
1706 nine_ff_prune_vs(device);
1707 if (vs) {
1708 unsigned n;
1709
1710 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key));
1711
1712 err = util_hash_table_set(device->ff.ht_vs, &vs->ff_key, vs);
1713 (void)err;
1714 assert(err == PIPE_OK);
1715 device->ff.num_vs++;
1716 NineUnknown_ConvertRefToBind(NineUnknown(vs));
1717
1718 vs->num_inputs = bld.num_inputs;
1719 for (n = 0; n < bld.num_inputs; ++n)
1720 vs->input_map[n].ndecl = bld.input[n];
1721
1722 vs->position_t = key.position_t;
1723 vs->point_size = key.vertexpointsize | key.pointscale;
1724 }
1725 return vs;
1726 }
1727
1728 static struct NinePixelShader9 *
1729 nine_ff_get_ps(struct NineDevice9 *device)
1730 {
1731 struct nine_state *state = &device->state;
1732 struct NinePixelShader9 *ps;
1733 enum pipe_error err;
1734 struct nine_ff_ps_key key;
1735 unsigned s;
1736 uint8_t sampler_mask = 0;
1737
1738 assert(sizeof(key) <= sizeof(key.value32));
1739
1740 memset(&key, 0, sizeof(key));
1741 for (s = 0; s < 8; ++s) {
1742 key.ts[s].colorop = state->ff.tex_stage[s][D3DTSS_COLOROP];
1743 key.ts[s].alphaop = state->ff.tex_stage[s][D3DTSS_ALPHAOP];
1744 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. */
1745 /* ALPHAOP cannot be disabled if COLOROP is enabled. */
1746 if (key.ts[s].colorop == D3DTOP_DISABLE) {
1747 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */
1748 break;
1749 }
1750
1751 if (!state->texture[s] &&
1752 state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE) {
1753 /* This should also disable the stage. */
1754 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1755 break;
1756 }
1757
1758 if (state->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE ||
1759 state->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE ||
1760 state->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE ||
1761 state->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE ||
1762 state->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE ||
1763 state->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE)
1764 sampler_mask |= (1 << s);
1765
1766 if (key.ts[s].colorop != D3DTOP_DISABLE) {
1767 uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop);
1768 if (used_c & 0x1) key.ts[s].colorarg0 = state->ff.tex_stage[s][D3DTSS_COLORARG0];
1769 if (used_c & 0x2) key.ts[s].colorarg1 = state->ff.tex_stage[s][D3DTSS_COLORARG1];
1770 if (used_c & 0x4) key.ts[s].colorarg2 = state->ff.tex_stage[s][D3DTSS_COLORARG2];
1771 if (used_c & 0x1) key.colorarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) << s;
1772 if (used_c & 0x1) key.colorarg_b5[0] |= (state->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) << s;
1773 if (used_c & 0x2) key.colorarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) << s;
1774 if (used_c & 0x2) key.colorarg_b5[1] |= (state->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) << s;
1775 if (used_c & 0x4) key.colorarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) << s;
1776 if (used_c & 0x4) key.colorarg_b5[2] |= (state->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) << s;
1777 }
1778 if (key.ts[s].alphaop != D3DTOP_DISABLE) {
1779 uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop);
1780 if (used_a & 0x1) key.ts[s].alphaarg0 = state->ff.tex_stage[s][D3DTSS_ALPHAARG0];
1781 if (used_a & 0x2) key.ts[s].alphaarg1 = state->ff.tex_stage[s][D3DTSS_ALPHAARG1];
1782 if (used_a & 0x4) key.ts[s].alphaarg2 = state->ff.tex_stage[s][D3DTSS_ALPHAARG2];
1783 if (used_a & 0x1) key.alphaarg_b4[0] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) << s;
1784 if (used_a & 0x2) key.alphaarg_b4[1] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) << s;
1785 if (used_a & 0x4) key.alphaarg_b4[2] |= (state->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) << s;
1786 }
1787 key.ts[s].resultarg = state->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP;
1788
1789 if (state->texture[s]) {
1790 switch (state->texture[s]->base.type) {
1791 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break;
1792 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break;
1793 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break;
1794 default:
1795 assert(!"unexpected texture type");
1796 break;
1797 }
1798 } else {
1799 key.ts[s].textarget = 1;
1800 }
1801 }
1802
1803 key.projected = nine_ff_get_projected_key(state);
1804 key.specular = !!state->rs[D3DRS_SPECULARENABLE];
1805
1806 for (; s < 8; ++s)
1807 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE;
1808 if (state->rs[D3DRS_FOGENABLE])
1809 key.fog_mode = state->rs[D3DRS_FOGTABLEMODE];
1810 key.fog = !!state->rs[D3DRS_FOGENABLE];
1811
1812 ps = util_hash_table_get(device->ff.ht_ps, &key);
1813 if (ps)
1814 return ps;
1815 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key));
1816
1817 nine_ff_prune_ps(device);
1818 if (ps) {
1819 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key));
1820
1821 err = util_hash_table_set(device->ff.ht_ps, &ps->ff_key, ps);
1822 (void)err;
1823 assert(err == PIPE_OK);
1824 device->ff.num_ps++;
1825 NineUnknown_ConvertRefToBind(NineUnknown(ps));
1826
1827 ps->rt_mask = 0x1;
1828 ps->sampler_mask = sampler_mask;
1829 }
1830 return ps;
1831 }
1832
1833 #define GET_D3DTS(n) nine_state_access_transform(state, D3DTS_##n, FALSE)
1834 #define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32)))
1835 static void
1836 nine_ff_load_vs_transforms(struct NineDevice9 *device)
1837 {
1838 struct nine_state *state = &device->state;
1839 D3DMATRIX T;
1840 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1841 unsigned i;
1842
1843 /* TODO: make this nicer, and only upload the ones we need */
1844 /* TODO: use ff.vs_const as storage of W, V, P matrices */
1845
1846 if (IS_D3DTS_DIRTY(state, WORLD) ||
1847 IS_D3DTS_DIRTY(state, VIEW) ||
1848 IS_D3DTS_DIRTY(state, PROJECTION)) {
1849 /* WVP, WV matrices */
1850 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW));
1851 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION));
1852
1853 /* normal matrix == transpose(inverse(WV)) */
1854 nine_d3d_matrix_inverse(&T, &M[1]);
1855 nine_d3d_matrix_transpose(&M[4], &T);
1856
1857 /* P matrix */
1858 M[2] = *GET_D3DTS(PROJECTION);
1859
1860 /* V and W matrix */
1861 nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW));
1862 M[40] = M[1];
1863 }
1864
1865 if (state->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) {
1866 /* load other world matrices */
1867 for (i = 1; i <= 8; ++i) {
1868 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW));
1869 }
1870 }
1871
1872 device->ff.vs_const[30 * 4] = asfloat(state->rs[D3DRS_TWEENFACTOR]);
1873 }
1874
1875 static void
1876 nine_ff_load_lights(struct NineDevice9 *device)
1877 {
1878 struct nine_state *state = &device->state;
1879 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1880 unsigned l;
1881
1882 if (state->changed.group & NINE_STATE_FF_MATERIAL) {
1883 const D3DMATERIAL9 *mtl = &state->ff.material;
1884
1885 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float));
1886 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float));
1887 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float));
1888 dst[23].x = mtl->Power;
1889 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float));
1890 d3dcolor_to_rgba(&dst[25].x, state->rs[D3DRS_AMBIENT]);
1891 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r;
1892 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g;
1893 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b;
1894 dst[19].w = mtl->Ambient.a + mtl->Emissive.a;
1895 }
1896
1897 if (!(state->changed.group & NINE_STATE_FF_LIGHTING))
1898 return;
1899
1900 for (l = 0; l < state->ff.num_lights_active; ++l) {
1901 const D3DLIGHT9 *light = &state->ff.light[state->ff.active_light[l]];
1902
1903 dst[32 + l * 8].x = light->Type;
1904 dst[32 + l * 8].y = light->Attenuation0;
1905 dst[32 + l * 8].z = light->Attenuation1;
1906 dst[32 + l * 8].w = light->Attenuation2;
1907 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse));
1908 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular));
1909 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient));
1910 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW));
1911 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW));
1912 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range;
1913 dst[37 + l * 8].w = light->Falloff;
1914 dst[38 + l * 8].x = cosf(light->Theta * 0.5f);
1915 dst[38 + l * 8].y = cosf(light->Phi * 0.5f);
1916 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y);
1917 dst[39 + l * 8].w = (l + 1) == state->ff.num_lights_active;
1918 }
1919 }
1920
1921 static void
1922 nine_ff_load_point_and_fog_params(struct NineDevice9 *device)
1923 {
1924 const struct nine_state *state = &device->state;
1925 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1926
1927 if (!(state->changed.group & NINE_STATE_FF_OTHER))
1928 return;
1929 dst[26].x = asfloat(state->rs[D3DRS_POINTSIZE_MIN]);
1930 dst[26].y = asfloat(state->rs[D3DRS_POINTSIZE_MAX]);
1931 dst[26].z = asfloat(state->rs[D3DRS_POINTSIZE]);
1932 dst[26].w = asfloat(state->rs[D3DRS_POINTSCALE_A]);
1933 dst[27].x = asfloat(state->rs[D3DRS_POINTSCALE_B]);
1934 dst[27].y = asfloat(state->rs[D3DRS_POINTSCALE_C]);
1935 dst[28].x = asfloat(state->rs[D3DRS_FOGEND]);
1936 dst[28].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1937 if (isinf(dst[28].y))
1938 dst[28].y = 0.0f;
1939 dst[28].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1940 }
1941
1942 static void
1943 nine_ff_load_tex_matrices(struct NineDevice9 *device)
1944 {
1945 struct nine_state *state = &device->state;
1946 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const;
1947 unsigned s;
1948
1949 if (!(state->ff.changed.transform[0] & 0xff0000))
1950 return;
1951 for (s = 0; s < 8; ++s) {
1952 if (IS_D3DTS_DIRTY(state, TEXTURE0 + s))
1953 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(state, D3DTS_TEXTURE0 + s, FALSE));
1954 }
1955 }
1956
1957 static void
1958 nine_ff_load_ps_params(struct NineDevice9 *device)
1959 {
1960 const struct nine_state *state = &device->state;
1961 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const;
1962 unsigned s;
1963
1964 if (!(state->changed.group & (NINE_STATE_FF_PSSTAGES | NINE_STATE_FF_OTHER)))
1965 return;
1966
1967 for (s = 0; s < 8; ++s)
1968 d3dcolor_to_rgba(&dst[s].x, state->ff.tex_stage[s][D3DTSS_CONSTANT]);
1969
1970 for (s = 0; s < 8; ++s) {
1971 dst[8 + s].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]);
1972 dst[8 + s].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]);
1973 dst[8 + s].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]);
1974 dst[8 + s].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]);
1975 if (s & 1) {
1976 dst[16 + s / 2].z = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1977 dst[16 + s / 2].w = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1978 } else {
1979 dst[16 + s / 2].x = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]);
1980 dst[16 + s / 2].y = asfloat(state->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]);
1981 }
1982 }
1983
1984 d3dcolor_to_rgba(&dst[20].x, state->rs[D3DRS_TEXTUREFACTOR]);
1985 d3dcolor_to_rgba(&dst[21].x, state->rs[D3DRS_FOGCOLOR]);
1986 dst[22].x = asfloat(state->rs[D3DRS_FOGEND]);
1987 dst[22].y = 1.0f / (asfloat(state->rs[D3DRS_FOGEND]) - asfloat(state->rs[D3DRS_FOGSTART]));
1988 dst[22].z = asfloat(state->rs[D3DRS_FOGDENSITY]);
1989 }
1990
1991 static void
1992 nine_ff_load_viewport_info(struct NineDevice9 *device)
1993 {
1994 D3DVIEWPORT9 *viewport = &device->state.viewport;
1995 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const;
1996 float diffZ = viewport->MaxZ - viewport->MinZ;
1997
1998 /* Note: the other functions avoids to fill the const again if nothing changed.
1999 * But we don't have much to fill, and adding code to allow that may be complex
2000 * so just fill it always */
2001 dst[100].x = 2.0f / (float)(viewport->Width);
2002 dst[100].y = 2.0f / (float)(viewport->Height);
2003 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ);
2004 dst[100].w = (float)(viewport->Width);
2005 dst[101].x = (float)(viewport->X);
2006 dst[101].y = (float)(viewport->Y);
2007 dst[101].z = (float)(viewport->MinZ);
2008 }
2009
2010 void
2011 nine_ff_update(struct NineDevice9 *device)
2012 {
2013 struct nine_state *state = &device->state;
2014 struct pipe_constant_buffer cb;
2015
2016 DBG("vs=%p ps=%p\n", device->state.vs, device->state.ps);
2017
2018 /* NOTE: the only reference belongs to the hash table */
2019 if (!state->programmable_vs) {
2020 device->ff.vs = nine_ff_get_vs(device);
2021 device->state.changed.group |= NINE_STATE_VS;
2022 }
2023 if (!device->state.ps) {
2024 device->ff.ps = nine_ff_get_ps(device);
2025 device->state.changed.group |= NINE_STATE_PS;
2026 }
2027
2028 if (!state->programmable_vs) {
2029 nine_ff_load_vs_transforms(device);
2030 nine_ff_load_tex_matrices(device);
2031 nine_ff_load_lights(device);
2032 nine_ff_load_point_and_fog_params(device);
2033 nine_ff_load_viewport_info(device);
2034
2035 memset(state->ff.changed.transform, 0, sizeof(state->ff.changed.transform));
2036
2037 cb.buffer_offset = 0;
2038 cb.buffer = NULL;
2039 cb.user_buffer = device->ff.vs_const;
2040 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float);
2041
2042 if (!device->driver_caps.user_cbufs) {
2043 u_upload_data(device->constbuf_uploader,
2044 0,
2045 cb.buffer_size,
2046 device->constbuf_alignment,
2047 cb.user_buffer,
2048 &cb.buffer_offset,
2049 &cb.buffer);
2050 u_upload_unmap(device->constbuf_uploader);
2051 cb.user_buffer = NULL;
2052 }
2053 state->pipe.cb_vs_ff = cb;
2054 state->commit |= NINE_STATE_COMMIT_CONST_VS;
2055 }
2056
2057 if (!device->state.ps) {
2058 nine_ff_load_ps_params(device);
2059
2060 cb.buffer_offset = 0;
2061 cb.buffer = NULL;
2062 cb.user_buffer = device->ff.ps_const;
2063 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float);
2064
2065 if (!device->driver_caps.user_cbufs) {
2066 u_upload_data(device->constbuf_uploader,
2067 0,
2068 cb.buffer_size,
2069 device->constbuf_alignment,
2070 cb.user_buffer,
2071 &cb.buffer_offset,
2072 &cb.buffer);
2073 u_upload_unmap(device->constbuf_uploader);
2074 cb.user_buffer = NULL;
2075 }
2076 state->pipe.cb_ps_ff = cb;
2077 state->commit |= NINE_STATE_COMMIT_CONST_PS;
2078 }
2079
2080 device->state.changed.group &= ~NINE_STATE_FF;
2081 }
2082
2083
2084 boolean
2085 nine_ff_init(struct NineDevice9 *device)
2086 {
2087 device->ff.ht_vs = util_hash_table_create(nine_ff_vs_key_hash,
2088 nine_ff_vs_key_comp);
2089 device->ff.ht_ps = util_hash_table_create(nine_ff_ps_key_hash,
2090 nine_ff_ps_key_comp);
2091
2092 device->ff.ht_fvf = util_hash_table_create(nine_ff_fvf_key_hash,
2093 nine_ff_fvf_key_comp);
2094
2095 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float));
2096 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float));
2097
2098 return device->ff.ht_vs && device->ff.ht_ps &&
2099 device->ff.ht_fvf &&
2100 device->ff.vs_const && device->ff.ps_const;
2101 }
2102
2103 static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data)
2104 {
2105 NineUnknown_Unbind(NineUnknown(value));
2106 return PIPE_OK;
2107 }
2108
2109 void
2110 nine_ff_fini(struct NineDevice9 *device)
2111 {
2112 if (device->ff.ht_vs) {
2113 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2114 util_hash_table_destroy(device->ff.ht_vs);
2115 }
2116 if (device->ff.ht_ps) {
2117 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2118 util_hash_table_destroy(device->ff.ht_ps);
2119 }
2120 if (device->ff.ht_fvf) {
2121 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL);
2122 util_hash_table_destroy(device->ff.ht_fvf);
2123 }
2124 device->ff.vs = NULL; /* destroyed by unbinding from hash table */
2125 device->ff.ps = NULL;
2126
2127 FREE(device->ff.vs_const);
2128 FREE(device->ff.ps_const);
2129 }
2130
2131 static void
2132 nine_ff_prune_vs(struct NineDevice9 *device)
2133 {
2134 if (device->ff.num_vs > 100) {
2135 /* could destroy the bound one here, so unbind */
2136 device->pipe->bind_vs_state(device->pipe, NULL);
2137 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL);
2138 util_hash_table_clear(device->ff.ht_vs);
2139 device->ff.num_vs = 0;
2140 device->state.changed.group |= NINE_STATE_VS;
2141 }
2142 }
2143 static void
2144 nine_ff_prune_ps(struct NineDevice9 *device)
2145 {
2146 if (device->ff.num_ps > 100) {
2147 /* could destroy the bound one here, so unbind */
2148 device->pipe->bind_fs_state(device->pipe, NULL);
2149 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL);
2150 util_hash_table_clear(device->ff.ht_ps);
2151 device->ff.num_ps = 0;
2152 device->state.changed.group |= NINE_STATE_PS;
2153 }
2154 }
2155
2156 /* ========================================================================== */
2157
2158 /* Matrix multiplication:
2159 *
2160 * in memory: 0 1 2 3 (row major)
2161 * 4 5 6 7
2162 * 8 9 a b
2163 * c d e f
2164 *
2165 * cA cB cC cD
2166 * r0 = (r0 * cA) (r0 * cB) . .
2167 * r1 = (r1 * cA) (r1 * cB)
2168 * r2 = (r2 * cA) .
2169 * r3 = (r3 * cA) .
2170 *
2171 * r: (11) (12) (13) (14)
2172 * (21) (22) (23) (24)
2173 * (31) (32) (33) (34)
2174 * (41) (42) (43) (44)
2175 * l: (11 12 13 14)
2176 * (21 22 23 24)
2177 * (31 32 33 34)
2178 * (41 42 43 44)
2179 *
2180 * v: (x y z 1 )
2181 *
2182 * t.xyzw = MUL(v.xxxx, r[0]);
2183 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw);
2184 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw);
2185 * v.xyzw = MAD(v.wwww, r[3], t.xyzw);
2186 *
2187 * v.x = DP4(v, c[0]);
2188 * v.y = DP4(v, c[1]);
2189 * v.z = DP4(v, c[2]);
2190 * v.w = DP4(v, c[3]) = 1
2191 */
2192
2193 /*
2194 static void
2195 nine_D3DMATRIX_print(const D3DMATRIX *M)
2196 {
2197 DBG("\n(%f %f %f %f)\n"
2198 "(%f %f %f %f)\n"
2199 "(%f %f %f %f)\n"
2200 "(%f %f %f %f)\n",
2201 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3],
2202 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3],
2203 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3],
2204 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]);
2205 }
2206 */
2207
2208 static inline float
2209 nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c)
2210 {
2211 return A->m[r][0] * B->m[0][c] +
2212 A->m[r][1] * B->m[1][c] +
2213 A->m[r][2] * B->m[2][c] +
2214 A->m[r][3] * B->m[3][c];
2215 }
2216
2217 static inline float
2218 nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2219 {
2220 return v->x * M->m[0][c] +
2221 v->y * M->m[1][c] +
2222 v->z * M->m[2][c] +
2223 1.0f * M->m[3][c];
2224 }
2225
2226 static inline float
2227 nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c)
2228 {
2229 return v->x * M->m[0][c] +
2230 v->y * M->m[1][c] +
2231 v->z * M->m[2][c];
2232 }
2233
2234 void
2235 nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R)
2236 {
2237 D->_11 = nine_DP4_row_col(L, 0, R, 0);
2238 D->_12 = nine_DP4_row_col(L, 0, R, 1);
2239 D->_13 = nine_DP4_row_col(L, 0, R, 2);
2240 D->_14 = nine_DP4_row_col(L, 0, R, 3);
2241
2242 D->_21 = nine_DP4_row_col(L, 1, R, 0);
2243 D->_22 = nine_DP4_row_col(L, 1, R, 1);
2244 D->_23 = nine_DP4_row_col(L, 1, R, 2);
2245 D->_24 = nine_DP4_row_col(L, 1, R, 3);
2246
2247 D->_31 = nine_DP4_row_col(L, 2, R, 0);
2248 D->_32 = nine_DP4_row_col(L, 2, R, 1);
2249 D->_33 = nine_DP4_row_col(L, 2, R, 2);
2250 D->_34 = nine_DP4_row_col(L, 2, R, 3);
2251
2252 D->_41 = nine_DP4_row_col(L, 3, R, 0);
2253 D->_42 = nine_DP4_row_col(L, 3, R, 1);
2254 D->_43 = nine_DP4_row_col(L, 3, R, 2);
2255 D->_44 = nine_DP4_row_col(L, 3, R, 3);
2256 }
2257
2258 void
2259 nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2260 {
2261 d->x = nine_DP4_vec_col(v, M, 0);
2262 d->y = nine_DP4_vec_col(v, M, 1);
2263 d->z = nine_DP4_vec_col(v, M, 2);
2264 }
2265
2266 void
2267 nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M)
2268 {
2269 d->x = nine_DP3_vec_col(v, M, 0);
2270 d->y = nine_DP3_vec_col(v, M, 1);
2271 d->z = nine_DP3_vec_col(v, M, 2);
2272 }
2273
2274 void
2275 nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M)
2276 {
2277 unsigned i, j;
2278 for (i = 0; i < 4; ++i)
2279 for (j = 0; j < 4; ++j)
2280 D->m[i][j] = M->m[j][i];
2281 }
2282
2283 #define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2284 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2285 if (t > 0.0f) pos += t; else neg += t; } while(0)
2286
2287 #define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \
2288 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \
2289 if (t > 0.0f) neg -= t; else pos -= t; } while(0)
2290 float
2291 nine_d3d_matrix_det(const D3DMATRIX *M)
2292 {
2293 float pos = 0.0f;
2294 float neg = 0.0f;
2295
2296 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4);
2297 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2);
2298 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3);
2299
2300 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3);
2301 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4);
2302 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1);
2303
2304 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4);
2305 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1);
2306 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2);
2307
2308 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2);
2309 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3);
2310 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1);
2311
2312 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3);
2313 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4);
2314 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2);
2315
2316 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4);
2317 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1);
2318 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3);
2319
2320 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2);
2321 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4);
2322 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1);
2323
2324 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3);
2325 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1);
2326 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2);
2327
2328 return pos + neg;
2329 }
2330
2331 /* XXX: Probably better to just use src/mesa/math/m_matrix.c because
2332 * I have no idea where this code came from.
2333 */
2334 void
2335 nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M)
2336 {
2337 int i, k;
2338 float det;
2339
2340 D->m[0][0] =
2341 M->m[1][1] * M->m[2][2] * M->m[3][3] -
2342 M->m[1][1] * M->m[3][2] * M->m[2][3] -
2343 M->m[1][2] * M->m[2][1] * M->m[3][3] +
2344 M->m[1][2] * M->m[3][1] * M->m[2][3] +
2345 M->m[1][3] * M->m[2][1] * M->m[3][2] -
2346 M->m[1][3] * M->m[3][1] * M->m[2][2];
2347
2348 D->m[0][1] =
2349 -M->m[0][1] * M->m[2][2] * M->m[3][3] +
2350 M->m[0][1] * M->m[3][2] * M->m[2][3] +
2351 M->m[0][2] * M->m[2][1] * M->m[3][3] -
2352 M->m[0][2] * M->m[3][1] * M->m[2][3] -
2353 M->m[0][3] * M->m[2][1] * M->m[3][2] +
2354 M->m[0][3] * M->m[3][1] * M->m[2][2];
2355
2356 D->m[0][2] =
2357 M->m[0][1] * M->m[1][2] * M->m[3][3] -
2358 M->m[0][1] * M->m[3][2] * M->m[1][3] -
2359 M->m[0][2] * M->m[1][1] * M->m[3][3] +
2360 M->m[0][2] * M->m[3][1] * M->m[1][3] +
2361 M->m[0][3] * M->m[1][1] * M->m[3][2] -
2362 M->m[0][3] * M->m[3][1] * M->m[1][2];
2363
2364 D->m[0][3] =
2365 -M->m[0][1] * M->m[1][2] * M->m[2][3] +
2366 M->m[0][1] * M->m[2][2] * M->m[1][3] +
2367 M->m[0][2] * M->m[1][1] * M->m[2][3] -
2368 M->m[0][2] * M->m[2][1] * M->m[1][3] -
2369 M->m[0][3] * M->m[1][1] * M->m[2][2] +
2370 M->m[0][3] * M->m[2][1] * M->m[1][2];
2371
2372 D->m[1][0] =
2373 -M->m[1][0] * M->m[2][2] * M->m[3][3] +
2374 M->m[1][0] * M->m[3][2] * M->m[2][3] +
2375 M->m[1][2] * M->m[2][0] * M->m[3][3] -
2376 M->m[1][2] * M->m[3][0] * M->m[2][3] -
2377 M->m[1][3] * M->m[2][0] * M->m[3][2] +
2378 M->m[1][3] * M->m[3][0] * M->m[2][2];
2379
2380 D->m[1][1] =
2381 M->m[0][0] * M->m[2][2] * M->m[3][3] -
2382 M->m[0][0] * M->m[3][2] * M->m[2][3] -
2383 M->m[0][2] * M->m[2][0] * M->m[3][3] +
2384 M->m[0][2] * M->m[3][0] * M->m[2][3] +
2385 M->m[0][3] * M->m[2][0] * M->m[3][2] -
2386 M->m[0][3] * M->m[3][0] * M->m[2][2];
2387
2388 D->m[1][2] =
2389 -M->m[0][0] * M->m[1][2] * M->m[3][3] +
2390 M->m[0][0] * M->m[3][2] * M->m[1][3] +
2391 M->m[0][2] * M->m[1][0] * M->m[3][3] -
2392 M->m[0][2] * M->m[3][0] * M->m[1][3] -
2393 M->m[0][3] * M->m[1][0] * M->m[3][2] +
2394 M->m[0][3] * M->m[3][0] * M->m[1][2];
2395
2396 D->m[1][3] =
2397 M->m[0][0] * M->m[1][2] * M->m[2][3] -
2398 M->m[0][0] * M->m[2][2] * M->m[1][3] -
2399 M->m[0][2] * M->m[1][0] * M->m[2][3] +
2400 M->m[0][2] * M->m[2][0] * M->m[1][3] +
2401 M->m[0][3] * M->m[1][0] * M->m[2][2] -
2402 M->m[0][3] * M->m[2][0] * M->m[1][2];
2403
2404 D->m[2][0] =
2405 M->m[1][0] * M->m[2][1] * M->m[3][3] -
2406 M->m[1][0] * M->m[3][1] * M->m[2][3] -
2407 M->m[1][1] * M->m[2][0] * M->m[3][3] +
2408 M->m[1][1] * M->m[3][0] * M->m[2][3] +
2409 M->m[1][3] * M->m[2][0] * M->m[3][1] -
2410 M->m[1][3] * M->m[3][0] * M->m[2][1];
2411
2412 D->m[2][1] =
2413 -M->m[0][0] * M->m[2][1] * M->m[3][3] +
2414 M->m[0][0] * M->m[3][1] * M->m[2][3] +
2415 M->m[0][1] * M->m[2][0] * M->m[3][3] -
2416 M->m[0][1] * M->m[3][0] * M->m[2][3] -
2417 M->m[0][3] * M->m[2][0] * M->m[3][1] +
2418 M->m[0][3] * M->m[3][0] * M->m[2][1];
2419
2420 D->m[2][2] =
2421 M->m[0][0] * M->m[1][1] * M->m[3][3] -
2422 M->m[0][0] * M->m[3][1] * M->m[1][3] -
2423 M->m[0][1] * M->m[1][0] * M->m[3][3] +
2424 M->m[0][1] * M->m[3][0] * M->m[1][3] +
2425 M->m[0][3] * M->m[1][0] * M->m[3][1] -
2426 M->m[0][3] * M->m[3][0] * M->m[1][1];
2427
2428 D->m[2][3] =
2429 -M->m[0][0] * M->m[1][1] * M->m[2][3] +
2430 M->m[0][0] * M->m[2][1] * M->m[1][3] +
2431 M->m[0][1] * M->m[1][0] * M->m[2][3] -
2432 M->m[0][1] * M->m[2][0] * M->m[1][3] -
2433 M->m[0][3] * M->m[1][0] * M->m[2][1] +
2434 M->m[0][3] * M->m[2][0] * M->m[1][1];
2435
2436 D->m[3][0] =
2437 -M->m[1][0] * M->m[2][1] * M->m[3][2] +
2438 M->m[1][0] * M->m[3][1] * M->m[2][2] +
2439 M->m[1][1] * M->m[2][0] * M->m[3][2] -
2440 M->m[1][1] * M->m[3][0] * M->m[2][2] -
2441 M->m[1][2] * M->m[2][0] * M->m[3][1] +
2442 M->m[1][2] * M->m[3][0] * M->m[2][1];
2443
2444 D->m[3][1] =
2445 M->m[0][0] * M->m[2][1] * M->m[3][2] -
2446 M->m[0][0] * M->m[3][1] * M->m[2][2] -
2447 M->m[0][1] * M->m[2][0] * M->m[3][2] +
2448 M->m[0][1] * M->m[3][0] * M->m[2][2] +
2449 M->m[0][2] * M->m[2][0] * M->m[3][1] -
2450 M->m[0][2] * M->m[3][0] * M->m[2][1];
2451
2452 D->m[3][2] =
2453 -M->m[0][0] * M->m[1][1] * M->m[3][2] +
2454 M->m[0][0] * M->m[3][1] * M->m[1][2] +
2455 M->m[0][1] * M->m[1][0] * M->m[3][2] -
2456 M->m[0][1] * M->m[3][0] * M->m[1][2] -
2457 M->m[0][2] * M->m[1][0] * M->m[3][1] +
2458 M->m[0][2] * M->m[3][0] * M->m[1][1];
2459
2460 D->m[3][3] =
2461 M->m[0][0] * M->m[1][1] * M->m[2][2] -
2462 M->m[0][0] * M->m[2][1] * M->m[1][2] -
2463 M->m[0][1] * M->m[1][0] * M->m[2][2] +
2464 M->m[0][1] * M->m[2][0] * M->m[1][2] +
2465 M->m[0][2] * M->m[1][0] * M->m[2][1] -
2466 M->m[0][2] * M->m[2][0] * M->m[1][1];
2467
2468 det =
2469 M->m[0][0] * D->m[0][0] +
2470 M->m[1][0] * D->m[0][1] +
2471 M->m[2][0] * D->m[0][2] +
2472 M->m[3][0] * D->m[0][3];
2473
2474 if (det < 1e-30) {/* non inversible */
2475 *D = *M; /* wine tests */
2476 return;
2477 }
2478
2479 det = 1.0 / det;
2480
2481 for (i = 0; i < 4; i++)
2482 for (k = 0; k < 4; k++)
2483 D->m[i][k] *= det;
2484
2485 #ifdef DEBUG
2486 {
2487 D3DMATRIX I;
2488
2489 nine_d3d_matrix_matrix_mul(&I, D, M);
2490
2491 for (i = 0; i < 4; ++i)
2492 for (k = 0; k < 4; ++k)
2493 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3)
2494 DBG("Matrix inversion check FAILED !\n");
2495 }
2496 #endif
2497 }