src/mesa/drivers/dri/r300/r300_vertprog.c

   1 /**************************************************************************
   2
   3 Copyright (C) 2005  Aapo Tahkola <aet@rasterburn.org>
   4 Copyright (C) 2008  Oliver McFadden <z3ro.geek@gmail.com>
   5
   6 All Rights Reserved.
   7
   8 Permission is hereby granted, free of charge, to any person obtaining a
   9 copy of this software and associated documentation files (the "Software"),
  10 to deal in the Software without restriction, including without limitation
  11 on the rights to use, copy, modify, merge, publish, distribute, sub
  12 license, and/or sell copies of the Software, and to permit persons to whom
  13 the Software is furnished to do so, subject to the following conditions:
  14
  15 The above copyright notice and this permission notice (including the next
  16 paragraph) shall be included in all copies or substantial portions of the
  17 Software.
  18
  19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21 FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  22 THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
  23 DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  24 OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  25 USE OR OTHER DEALINGS IN THE SOFTWARE.
  26
  27 **************************************************************************/
  28
  29 /* Radeon R5xx Acceleration, Revision 1.2 */
  30
  31 #include "main/glheader.h"
  32 #include "main/macros.h"
  33 #include "main/enums.h"
  34 #include "program/program.h"
  35 #include "program/programopt.h"
  36 #include "program/prog_instruction.h"
  37 #include "program/prog_parameter.h"
  38 #include "program/prog_print.h"
  39 #include "program/prog_statevars.h"
  40 #include "tnl/tnl.h"
  41
  42 #include "compiler/radeon_compiler.h"
  43 #include "radeon_mesa_to_rc.h"
  44 #include "r300_context.h"
  45 #include "r300_fragprog_common.h"
  46 #include "r300_state.h"
  47
  48 /**
  49  * Write parameter array for the given vertex program into dst.
  50  * Return the total number of components written.
  51  */
  52 static int r300VertexProgUpdateParams(GLcontext * ctx, struct r300_vertex_program *vp, float *dst)
  53 {
  54         int i;
  55
  56         if (vp->Base->IsNVProgram) {
  57                 _mesa_load_tracked_matrices(ctx);
  58         } else {
  59                 if (vp->Base->Base.Parameters) {
  60                         _mesa_load_state_parameters(ctx, vp->Base->Base.Parameters);
  61                 }
  62         }
  63
  64         for(i = 0; i < vp->code.constants.Count; ++i) {
  65                 const float * src = 0;
  66                 const struct rc_constant * constant = &vp->code.constants.Constants[i];
  67
  68                 switch(constant->Type) {
  69                 case RC_CONSTANT_EXTERNAL:
  70                         if (vp->Base->IsNVProgram) {
  71                                 src = ctx->VertexProgram.Parameters[constant->u.External];
  72                         } else {
  73                                 src = vp->Base->Base.Parameters->ParameterValues[constant->u.External];
  74                         }
  75                         break;
  76
  77                 case RC_CONSTANT_IMMEDIATE:
  78                         src = constant->u.Immediate;
  79                         break;
  80                 }
  81
  82                 assert(src);
  83                 dst[4*i] = src[0];
  84                 dst[4*i + 1] = src[1];
  85                 dst[4*i + 2] = src[2];
  86                 dst[4*i + 3] = src[3];
  87         }
  88
  89         return 4 * vp->code.constants.Count;
  90 }
  91
  92 static GLbitfield compute_required_outputs(struct gl_vertex_program * vp, GLbitfield fpreads)
  93 {
  94         GLbitfield outputs = 0;
  95         int i;
  96
  97 #define ADD_OUTPUT(fp_attr, vp_result) \
  98         do { \
  99                 if (fpreads & (1 << (fp_attr))) \
 100                         outputs |= (1 << (vp_result)); \
 101         } while (0)
 102
 103         ADD_OUTPUT(FRAG_ATTRIB_COL0, VERT_RESULT_COL0);
 104         ADD_OUTPUT(FRAG_ATTRIB_COL1, VERT_RESULT_COL1);
 105
 106         for (i = 0; i <= 7; ++i) {
 107                 ADD_OUTPUT(FRAG_ATTRIB_TEX0 + i, VERT_RESULT_TEX0 + i);
 108         }
 109
 110 #undef ADD_OUTPUT
 111
 112         if ((fpreads & (1 << FRAG_ATTRIB_COL0)) &&
 113             (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC0)))
 114                 outputs |= 1 << VERT_RESULT_BFC0;
 115         if ((fpreads & (1 << FRAG_ATTRIB_COL1)) &&
 116             (vp->Base.OutputsWritten & (1 << VERT_RESULT_BFC1)))
 117                 outputs |= 1 << VERT_RESULT_BFC1;
 118
 119         outputs |= 1 << VERT_RESULT_HPOS;
 120         if (vp->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ))
 121                 outputs |= 1 << VERT_RESULT_PSIZ;
 122
 123         return outputs;
 124 }
 125
 126
 127 static void t_inputs_outputs(struct r300_vertex_program_compiler * c)
 128 {
 129         int i;
 130         int cur_reg;
 131         GLuint OutputsWritten, InputsRead;
 132
 133         OutputsWritten = c->Base.Program.OutputsWritten;
 134         InputsRead = c->Base.Program.InputsRead;
 135
 136         cur_reg = -1;
 137         for (i = 0; i < VERT_ATTRIB_MAX; i++) {
 138                 if (InputsRead & (1 << i))
 139                         c->code->inputs[i] = ++cur_reg;
 140                 else
 141                         c->code->inputs[i] = -1;
 142         }
 143
 144         cur_reg = 0;
 145         for (i = 0; i < VERT_RESULT_MAX; i++)
 146                 c->code->outputs[i] = -1;
 147
 148         assert(OutputsWritten & (1 << VERT_RESULT_HPOS));
 149
 150         if (OutputsWritten & (1 << VERT_RESULT_HPOS)) {
 151                 c->code->outputs[VERT_RESULT_HPOS] = cur_reg++;
 152         }
 153
 154         if (OutputsWritten & (1 << VERT_RESULT_PSIZ)) {
 155                 c->code->outputs[VERT_RESULT_PSIZ] = cur_reg++;
 156         }
 157
 158         /* If we're writing back facing colors we need to send
 159          * four colors to make front/back face colors selection work.
 160          * If the vertex program doesn't write all 4 colors, lets
 161          * pretend it does by skipping output index reg so the colors
 162          * get written into appropriate output vectors.
 163          */
 164         if (OutputsWritten & (1 << VERT_RESULT_COL0)) {
 165                 c->code->outputs[VERT_RESULT_COL0] = cur_reg++;
 166         } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
 167                 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
 168                 cur_reg++;
 169         }
 170
 171         if (OutputsWritten & (1 << VERT_RESULT_COL1)) {
 172                 c->code->outputs[VERT_RESULT_COL1] = cur_reg++;
 173         } else if (OutputsWritten & (1 << VERT_RESULT_BFC0) ||
 174                 OutputsWritten & (1 << VERT_RESULT_BFC1)) {
 175                 cur_reg++;
 176         }
 177
 178         if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
 179                 c->code->outputs[VERT_RESULT_BFC0] = cur_reg++;
 180         } else if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
 181                 cur_reg++;
 182         }
 183
 184         if (OutputsWritten & (1 << VERT_RESULT_BFC1)) {
 185                 c->code->outputs[VERT_RESULT_BFC1] = cur_reg++;
 186         } else if (OutputsWritten & (1 << VERT_RESULT_BFC0)) {
 187                 cur_reg++;
 188         }
 189
 190         for (i = VERT_RESULT_TEX0; i <= VERT_RESULT_TEX7; i++) {
 191                 if (OutputsWritten & (1 << i)) {
 192                         c->code->outputs[i] = cur_reg++;
 193                 }
 194         }
 195
 196         if (OutputsWritten & (1 << VERT_RESULT_FOGC)) {
 197                 c->code->outputs[VERT_RESULT_FOGC] = cur_reg++;
 198         }
 199 }
 200
 201 /**
 202  * The NV_vertex_program spec mandates that all registers be
 203  * initialized to zero. We do this here unconditionally.
 204  *
 205  * \note We rely on dead-code elimination in the compiler.
 206  */
 207 static void initialize_NV_registers(struct radeon_compiler * compiler)
 208 {
 209         unsigned int reg;
 210         struct rc_instruction * inst;
 211
 212         for(reg = 0; reg < 12; ++reg) {
 213                 inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
 214                 inst->U.I.Opcode = RC_OPCODE_MOV;
 215                 inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
 216                 inst->U.I.DstReg.Index = reg;
 217                 inst->U.I.SrcReg[0].File = RC_FILE_NONE;
 218                 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
 219         }
 220
 221         inst = rc_insert_new_instruction(compiler, &compiler->Program.Instructions);
 222         inst->U.I.Opcode = RC_OPCODE_ARL;
 223         inst->U.I.DstReg.File = RC_FILE_ADDRESS;
 224         inst->U.I.DstReg.Index = 0;
 225         inst->U.I.DstReg.WriteMask = WRITEMASK_X;
 226         inst->U.I.SrcReg[0].File = RC_FILE_NONE;
 227         inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000;
 228 }
 229
 230 static struct r300_vertex_program *build_program(GLcontext *ctx,
 231                                                  struct r300_vertex_program_key *wanted_key,
 232                                                  const struct gl_vertex_program *mesa_vp)
 233 {
 234         struct r300_vertex_program *vp;
 235         struct r300_vertex_program_compiler compiler;
 236
 237         vp = calloc(1, sizeof(*vp));
 238         vp->Base = _mesa_clone_vertex_program(ctx, mesa_vp);
 239         memcpy(&vp->key, wanted_key, sizeof(vp->key));
 240
 241         rc_init(&compiler.Base);
 242         compiler.Base.Debug = (RADEON_DEBUG & RADEON_VERTS) ? GL_TRUE : GL_FALSE;
 243
 244         compiler.code = &vp->code;
 245         compiler.RequiredOutputs = compute_required_outputs(vp->Base, vp->key.FpReads);
 246         compiler.SetHwInputOutput = &t_inputs_outputs;
 247
 248         if (compiler.Base.Debug) {
 249                 fprintf(stderr, "Initial vertex program:\n");
 250                 _mesa_print_program(&vp->Base->Base);
 251                 fflush(stderr);
 252         }
 253
 254         if (mesa_vp->IsPositionInvariant) {
 255                 _mesa_insert_mvp_code(ctx, vp->Base);
 256         }
 257
 258         radeon_mesa_to_rc_program(&compiler.Base, &vp->Base->Base);
 259
 260         if (mesa_vp->IsNVProgram)
 261                 initialize_NV_registers(&compiler.Base);
 262
 263         rc_move_output(&compiler.Base, VERT_RESULT_PSIZ, VERT_RESULT_PSIZ, WRITEMASK_X);
 264
 265         if (vp->key.WPosAttr != FRAG_ATTRIB_MAX) {
 266                 unsigned int vp_wpos_attr = vp->key.WPosAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
 267
 268                 /* Set empty writemask for instructions writing to vp_wpos_attr
 269                  * before moving the wpos attr there.
 270                  * Such instructions will be removed by DCE.
 271                  */
 272                 rc_move_output(&compiler.Base, vp_wpos_attr, vp->key.WPosAttr, 0);
 273                 rc_copy_output(&compiler.Base, VERT_RESULT_HPOS, vp_wpos_attr);
 274         }
 275
 276         if (vp->key.FogAttr != FRAG_ATTRIB_MAX) {
 277                 unsigned int vp_fog_attr = vp->key.FogAttr - FRAG_ATTRIB_TEX0 + VERT_RESULT_TEX0;
 278
 279                 /* Set empty writemask for instructions writing to vp_fog_attr
 280                  * before moving the fog attr there.
 281                  * Such instructions will be removed by DCE.
 282                  */
 283                 rc_move_output(&compiler.Base, vp_fog_attr, vp->key.FogAttr, 0);
 284                 rc_move_output(&compiler.Base, VERT_RESULT_FOGC, vp_fog_attr, WRITEMASK_X);
 285         }
 286
 287         r3xx_compile_vertex_program(&compiler);
 288
 289         if (vp->code.constants.Count > ctx->Const.VertexProgram.MaxParameters) {
 290                 rc_error(&compiler.Base, "Program exceeds constant buffer size limit\n");
 291         }
 292
 293         vp->error = compiler.Base.Error;
 294
 295         vp->Base->Base.InputsRead = vp->code.InputsRead;
 296         vp->Base->Base.OutputsWritten = vp->code.OutputsWritten;
 297
 298         rc_destroy(&compiler.Base);
 299
 300         return vp;
 301 }
 302
 303 struct r300_vertex_program * r300SelectAndTranslateVertexShader(GLcontext *ctx)
 304 {
 305         r300ContextPtr r300 = R300_CONTEXT(ctx);
 306         struct r300_vertex_program_key wanted_key = { 0 };
 307         struct r300_vertex_program_cont *vpc;
 308         struct r300_vertex_program *vp;
 309
 310         vpc = (struct r300_vertex_program_cont *)ctx->VertexProgram._Current;
 311
 312         if (!r300->selected_fp) {
 313                 /* This can happen when GetProgramiv is called to check
 314                  * whether the program runs natively.
 315                  *
 316                  * To be honest, this is not a very good solution,
 317                  * but solving the problem of reporting good values
 318                  * for those queries is tough anyway considering that
 319                  * we recompile vertex programs based on the precise
 320                  * fragment program that is in use.
 321                  */
 322                 r300SelectAndTranslateFragmentShader(ctx);
 323         }
 324
 325         assert(r300->selected_fp);
 326         wanted_key.FpReads = r300->selected_fp->InputsRead;
 327         wanted_key.FogAttr = r300->selected_fp->fog_attr;
 328         wanted_key.WPosAttr = r300->selected_fp->wpos_attr;
 329
 330         for (vp = vpc->progs; vp; vp = vp->next) {
 331                 if (memcmp(&vp->key, &wanted_key, sizeof(wanted_key)) == 0) {
 332                         return r300->selected_vp = vp;
 333                 }
 334         }
 335
 336         vp = build_program(ctx, &wanted_key, &vpc->mesa_program);
 337         vp->next = vpc->progs;
 338         vpc->progs = vp;
 339
 340         return r300->selected_vp = vp;
 341 }
 342
 343 #define bump_vpu_count(ptr, new_count)   do { \
 344                 drm_r300_cmd_header_t* _p=((drm_r300_cmd_header_t*)(ptr)); \
 345                 int _nc=(new_count)/4; \
 346                 if(_nc>_p->vpu.count)_p->vpu.count=_nc; \
 347         } while(0)
 348
 349 static void r300EmitVertexProgram(r300ContextPtr r300, int dest, struct r300_vertex_program_code *code)
 350 {
 351         int i;
 352
 353         assert((code->length > 0) && (code->length % 4 == 0));
 354
 355         switch ((dest >> 8) & 0xf) {
 356                 case 0:
 357                         R300_STATECHANGE(r300, vpi);
 358                         for (i = 0; i < code->length; i++)
 359                                 r300->hw.vpi.cmd[R300_VPI_INSTR_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
 360                         bump_vpu_count(r300->hw.vpi.cmd, code->length + 4 * (dest & 0xff));
 361                         break;
 362                 case 2:
 363                         R300_STATECHANGE(r300, vpp);
 364                         for (i = 0; i < code->length; i++)
 365                                 r300->hw.vpp.cmd[R300_VPP_PARAM_0 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
 366                         bump_vpu_count(r300->hw.vpp.cmd, code->length + 4 * (dest & 0xff));
 367                         break;
 368                 case 4:
 369                         R300_STATECHANGE(r300, vps);
 370                         for (i = 0; i < code->length; i++)
 371                                 r300->hw.vps.cmd[1 + i + 4 * (dest & 0xff)] = (code->body.d[i]);
 372                         bump_vpu_count(r300->hw.vps.cmd, code->length + 4 * (dest & 0xff));
 373                         break;
 374                 default:
 375                         fprintf(stderr, "%s:%s don't know how to handle dest %04x\n", __FILE__, __FUNCTION__, dest);
 376                         exit(-1);
 377         }
 378 }
 379
 380 void r300SetupVertexProgram(r300ContextPtr rmesa)
 381 {
 382         GLcontext *ctx = rmesa->radeon.glCtx;
 383         struct r300_vertex_program *prog = rmesa->selected_vp;
 384         int inst_count = 0;
 385         int param_count = 0;
 386
 387         /* Reset state, in case we don't use something */
 388         ((drm_r300_cmd_header_t *) rmesa->hw.vpp.cmd)->vpu.count = 0;
 389         ((drm_r300_cmd_header_t *) rmesa->hw.vpi.cmd)->vpu.count = 0;
 390         ((drm_r300_cmd_header_t *) rmesa->hw.vps.cmd)->vpu.count = 0;
 391
 392         R300_STATECHANGE(rmesa, vap_cntl);
 393         R300_STATECHANGE(rmesa, vpp);
 394         param_count = r300VertexProgUpdateParams(ctx, prog, (float *)&rmesa->hw.vpp.cmd[R300_VPP_PARAM_0]);
 395         if (!rmesa->radeon.radeonScreen->kernel_mm && param_count > 255 * 4) {
 396                 WARN_ONCE("Too many VP params, expect rendering errors\n");
 397         }
 398         /* Prevent the overflow (vpu.count is u8) */
 399         bump_vpu_count(rmesa->hw.vpp.cmd, MIN2(255 * 4, param_count));
 400         param_count /= 4;
 401
 402         r300EmitVertexProgram(rmesa, R300_PVS_CODE_START, &(prog->code));
 403         inst_count = (prog->code.length / 4) - 1;
 404
 405         r300VapCntl(rmesa, _mesa_bitcount(prog->code.InputsRead),
 406                                  _mesa_bitcount(prog->code.OutputsWritten), prog->code.num_temporaries);
 407
 408         R300_STATECHANGE(rmesa, pvs);
 409         rmesa->hw.pvs.cmd[R300_PVS_CNTL_1] = (0 << R300_PVS_FIRST_INST_SHIFT) | (inst_count << R300_PVS_XYZW_VALID_INST_SHIFT) |
 410                                 (inst_count << R300_PVS_LAST_INST_SHIFT);
 411
 412         rmesa->hw.pvs.cmd[R300_PVS_CNTL_2] = (0 << R300_PVS_CONST_BASE_OFFSET_SHIFT) | ((param_count - 1) << R300_PVS_MAX_CONST_ADDR_SHIFT);
 413         rmesa->hw.pvs.cmd[R300_PVS_CNTL_3] = (inst_count << R300_PVS_LAST_VTX_SRC_INST_SHIFT);
 414 }