r300g: allow unaligned vertex formats if the stride is dword-aligned
[mesa.git] / src / gallium / drivers / r300 / r300_state.c
index 654983398330ac84cdb7ea96e86911a63f89dab8..e8171e949030e30d1ec10b4257671cce9b0ec2b4 100644 (file)
 #include "pipe/p_config.h"
 
 #include "r300_context.h"
+#include "r300_emit.h"
 #include "r300_reg.h"
 #include "r300_screen.h"
 #include "r300_screen_buffer.h"
+#include "r300_state.h"
 #include "r300_state_inlines.h"
 #include "r300_fs.h"
+#include "r300_texture.h"
 #include "r300_vs.h"
 #include "r300_winsys.h"
 
@@ -640,16 +643,13 @@ static void
     if (!!old_state->zsbuf != !!state->zsbuf) {
         r300->dsa_state.dirty = TRUE;
     }
-    if (!r300->scissor_enabled) {
-        r300->scissor_state.dirty = TRUE;
-    }
 
     r300_fb_update_tiling_flags(r300, r300->fb_state.state, state);
 
     memcpy(r300->fb_state.state, state, sizeof(struct pipe_framebuffer_state));
 
     r300->fb_state.size = (10 * state->nr_cbufs) + (2 * (4 - state->nr_cbufs)) +
-                          (state->zsbuf ? 10 : 0) + 8;
+                          (state->zsbuf ? 10 : 0) + 11;
 
     /* Polygon offset depends on the zbuffer bit depth. */
     if (state->zsbuf && r300->polygon_offset_enabled) {
@@ -681,12 +681,28 @@ static void* r300_create_fs_state(struct pipe_context* pipe,
     fs->state = *shader;
     fs->state.tokens = tgsi_dup_tokens(shader->tokens);
 
-    tgsi_scan_shader(shader->tokens, &fs->info);
-    r300_shader_read_fs_inputs(&fs->info, &fs->inputs);
-
     return (void*)fs;
 }
 
+void r300_mark_fs_code_dirty(struct r300_context *r300)
+{
+    struct r300_fragment_shader* fs = r300_fs(r300);
+
+    r300->fs.dirty = TRUE;
+    r300->fs_rc_constant_state.dirty = TRUE;
+    r300->fs_constants.dirty = TRUE;
+
+    if (r300->screen->caps.is_r500) {
+        r300->fs.size = r500_get_fs_atom_size(r300);
+        r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 7;
+        r300->fs_constants.size = fs->shader->externals_count * 4 + 3;
+    } else {
+        r300->fs.size = r300_get_fs_atom_size(r300);
+        r300->fs_rc_constant_state.size = fs->shader->rc_state_count * 5;
+        r300->fs_constants.size = fs->shader->externals_count * 4 + 1;
+    }
+}
+
 /* Bind fragment shader state. */
 static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
 {
@@ -694,20 +710,19 @@ static void r300_bind_fs_state(struct pipe_context* pipe, void* shader)
     struct r300_fragment_shader* fs = (struct r300_fragment_shader*)shader;
 
     if (fs == NULL) {
-        r300->fs = NULL;
+        r300->fs.state = NULL;
         return;
     }
 
-    r300->fs = fs;
+    r300->fs.state = fs;
     r300_pick_fragment_shader(r300);
+    r300_mark_fs_code_dirty(r300);
 
     r300->rs_block_state.dirty = TRUE; /* Will be updated before the emission. */
 
     if (r300->vs_state.state && r300_vertex_shader_setup_wpos(r300)) {
         r300->vap_output_state.dirty = TRUE;
     }
-
-    r300->dirty_state |= R300_NEW_FRAGMENT_SHADER | R300_NEW_FRAGMENT_SHADER_CONSTANTS;
 }
 
 /* Delete fragment shader state. */
@@ -743,6 +758,7 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
                                   const struct pipe_rasterizer_state* state)
 {
     struct r300_rs_state* rs = CALLOC_STRUCT(r300_rs_state);
+    int i;
 
     /* Copy rasterizer state for Draw. */
     rs->rs = *state;
@@ -835,6 +851,32 @@ static void* r300_create_rs_state(struct pipe_context* pipe,
         rs->color_control = R300_SHADE_MODEL_SMOOTH;
     }
 
+    rs->clip_rule = state->scissor ? 0xAAAA : 0xFFFF;
+
+    /* Point sprites */
+    if (state->sprite_coord_enable) {
+        rs->stuffing_enable = R300_GB_POINT_STUFF_ENABLE;
+       for (i = 0; i < 8; i++) {
+           if (state->sprite_coord_enable & (1 << i))
+               rs->stuffing_enable |=
+                   R300_GB_TEX_STR << (R300_GB_TEX0_SOURCE_SHIFT + (i*2));
+       }
+
+        rs->point_texcoord_left = 0.0f;
+        rs->point_texcoord_right = 1.0f;
+
+        switch (state->sprite_coord_mode) {
+            case PIPE_SPRITE_COORD_UPPER_LEFT:
+                rs->point_texcoord_top = 0.0f;
+                rs->point_texcoord_bottom = 1.0f;
+                break;
+            case PIPE_SPRITE_COORD_LOWER_LEFT:
+                rs->point_texcoord_top = 1.0f;
+                rs->point_texcoord_bottom = 0.0f;
+                break;
+        }
+    }
+
     return (void*)rs;
 }
 
@@ -843,26 +885,26 @@ static void r300_bind_rs_state(struct pipe_context* pipe, void* state)
 {
     struct r300_context* r300 = r300_context(pipe);
     struct r300_rs_state* rs = (struct r300_rs_state*)state;
-    boolean scissor_was_enabled = r300->scissor_enabled;
+    int last_sprite_coord_enable = r300->sprite_coord_enable;
 
     if (r300->draw) {
         draw_flush(r300->draw);
-        draw_set_rasterizer_state(r300->draw, &rs->rs);
+        draw_set_rasterizer_state(r300->draw, &rs->rs, state);
     }
 
     if (rs) {
         r300->polygon_offset_enabled = rs->rs.offset_cw || rs->rs.offset_ccw;
-        r300->scissor_enabled = rs->rs.scissor;
+        r300->sprite_coord_enable = rs->rs.sprite_coord_enable;
     } else {
         r300->polygon_offset_enabled = FALSE;
-        r300->scissor_enabled = FALSE;
+        r300->sprite_coord_enable = 0;
     }
 
     UPDATE_STATE(state, r300->rs_state);
-    r300->rs_state.size = 17 + (r300->polygon_offset_enabled ? 5 : 0);
+    r300->rs_state.size = 26 + (r300->polygon_offset_enabled ? 5 : 0);
 
-    if (scissor_was_enabled != r300->scissor_enabled) {
-        r300->scissor_state.dirty = TRUE;
+    if (last_sprite_coord_enable != r300->sprite_coord_enable) {
+        r300->rs_block_state.dirty = TRUE;
     }
 }
 
@@ -938,17 +980,9 @@ static void r300_bind_sampler_states(struct pipe_context* pipe,
     }
 
     memcpy(state->sampler_states, states, sizeof(void*) * count);
-    state->sampler_count = count;
+    state->sampler_state_count = count;
 
     r300->textures_state.dirty = TRUE;
-
-    /* Pick a fragment shader based on the texture compare state. */
-    if (r300->fs && count) {
-        if (r300_pick_fragment_shader(r300)) {
-            r300->dirty_state |= R300_NEW_FRAGMENT_SHADER |
-                                 R300_NEW_FRAGMENT_SHADER_CONSTANTS;
-        }
-    }
 }
 
 static void r300_lacks_vertex_textures(struct pipe_context* pipe,
@@ -972,7 +1006,6 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
     struct r300_texture *texture;
     unsigned i;
     unsigned tex_units = r300->screen->caps.num_tex_units;
-    boolean is_r500 = r300->screen->caps.is_r500;
     boolean dirty_tex = FALSE;
 
     if (count > tex_units) {
@@ -980,9 +1013,10 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
     }
 
     for (i = 0; i < count; i++) {
-        if (state->fragment_sampler_views[i] != views[i]) {
-            pipe_sampler_view_reference(&state->fragment_sampler_views[i],
-                                        views[i]);
+        if (&state->sampler_views[i]->base != views[i]) {
+            pipe_sampler_view_reference(
+                    (struct pipe_sampler_view**)&state->sampler_views[i],
+                    views[i]);
 
             if (!views[i]) {
                 continue;
@@ -991,24 +1025,24 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
             /* A new sampler view (= texture)... */
             dirty_tex = TRUE;
 
-            /* R300-specific - set the texrect factor in the fragment shader */
+            /* Set the texrect factor in the fragment shader.
+             * Needed for RECT and NPOT fallback. */
             texture = r300_texture(views[i]->texture);
-            if (!is_r500 && texture->uses_pitch) {
-                /* XXX It would be nice to re-emit just 1 constant,
-                 * XXX not all of them */
-                r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+            if (texture->uses_pitch) {
+                r300->fs_rc_constant_state.dirty = TRUE;
             }
         }
     }
 
     for (i = count; i < tex_units; i++) {
-        if (state->fragment_sampler_views[i]) {
-            pipe_sampler_view_reference(&state->fragment_sampler_views[i],
-                                        NULL);
+        if (state->sampler_views[i]) {
+            pipe_sampler_view_reference(
+                    (struct pipe_sampler_view**)&state->sampler_views[i],
+                    NULL);
         }
     }
 
-    state->texture_count = count;
+    state->sampler_view_count = count;
 
     r300->textures_state.dirty = TRUE;
 
@@ -1019,27 +1053,43 @@ static void r300_set_fragment_sampler_views(struct pipe_context* pipe,
 
 static struct pipe_sampler_view *
 r300_create_sampler_view(struct pipe_context *pipe,
-                         struct pipe_texture *texture,
+                         struct pipe_resource *texture,
                          const struct pipe_sampler_view *templ)
 {
-   struct pipe_sampler_view *view = CALLOC_STRUCT(pipe_sampler_view);
-
-   if (view) {
-      *view = *templ;
-      view->reference.count = 1;
-      view->texture = NULL;
-      pipe_texture_reference(&view->texture, texture);
-      view->context = pipe;
-   }
+    struct r300_sampler_view *view = CALLOC_STRUCT(r300_sampler_view);
+    struct r300_texture *tex = r300_texture(texture);
+    unsigned char swizzle[4];
+
+    if (view) {
+        view->base = *templ;
+        view->base.reference.count = 1;
+        view->base.context = pipe;
+        view->base.texture = NULL;
+        pipe_resource_reference(&view->base.texture, texture);
+
+        swizzle[0] = templ->swizzle_r;
+        swizzle[1] = templ->swizzle_g;
+        swizzle[2] = templ->swizzle_b;
+        swizzle[3] = templ->swizzle_a;
+
+        /* XXX Enable swizzles when they become supported. Now we get RGBA
+         * everywhere. And do testing! */
+        view->format = tex->tx_format;
+        view->format.format1 |= r300_translate_texformat(templ->format,
+                                                         0); /*swizzle);*/
+        if (r300_screen(pipe->screen)->caps.is_r500) {
+            view->format.format2 |= r500_tx_format_msb_bit(templ->format);
+        }
+    }
 
-   return view;
+    return (struct pipe_sampler_view*)view;
 }
 
 static void
 r300_sampler_view_destroy(struct pipe_context *pipe,
                           struct pipe_sampler_view *view)
 {
-   pipe_texture_reference(&view->texture, NULL);
+   pipe_resource_reference(&view->texture, NULL);
    FREE(view);
 }
 
@@ -1051,9 +1101,7 @@ static void r300_set_scissor_state(struct pipe_context* pipe,
     memcpy(r300->scissor_state.state, state,
         sizeof(struct pipe_scissor_state));
 
-    if (r300->scissor_enabled) {
-        r300->scissor_state.dirty = TRUE;
-    }
+    r300->scissor_state.dirty = TRUE;
 }
 
 static void r300_set_viewport_state(struct pipe_context* pipe,
@@ -1094,8 +1142,8 @@ static void r300_set_viewport_state(struct pipe_context* pipe,
     }
 
     r300->viewport_state.dirty = TRUE;
-    if (r300->fs && r300->fs->inputs.wpos != ATTR_UNUSED) {
-        r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+    if (r300->fs.state && r300_fs(r300)->shader->inputs.wpos != ATTR_UNUSED) {
+        r300->fs_rc_constant_state.dirty = TRUE;
     }
 }
 
@@ -1119,10 +1167,9 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
         if (buffers[i].buffer) {
             if (buffers[i].stride % 4 != 0) {
                 // XXX Shouldn't we align the buffer?
-                fprintf(stderr, "r300_set_vertex_buffers: "
+                fprintf(stderr, "r300set_vertex_buffers: "
                         "Unaligned buffer stride %i isn't supported.\n",
                         buffers[i].stride);
-                assert(0);
                 abort();
             }
         }
@@ -1133,7 +1180,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
         vbo = (struct pipe_vertex_buffer*)&buffers[i];
 
         /* Reference our buffer. */
-        pipe_buffer_reference(&r300->vertex_buffer[i].buffer, vbo->buffer);
+        pipe_resource_reference(&r300->vertex_buffer[i].buffer, vbo->buffer);
 
         /* Skip NULL buffers */
         if (!buffers[i].buffer) {
@@ -1145,9 +1192,13 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
         }
 
         if (vbo->max_index == ~0) {
-            /* Bogus value from broken state tracker; hax it. */
-            vbo->max_index =
-                (vbo->buffer->size - vbo->buffer_offset) / vbo->stride;
+           /* if no VBO stride then only one vertex value so max index is 1 */
+           /* should think about converting to VS constants like svga does */
+           if (!vbo->stride)
+               vbo->max_index = 1;
+           else
+               vbo->max_index =
+                                (vbo->buffer->width0 - vbo->buffer_offset) / vbo->stride;
         }
 
         max_index = MIN2(vbo->max_index, max_index);
@@ -1155,7 +1206,7 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
 
     for (; i < r300->vertex_buffer_count; i++) {
         /* Dereference any old buffers. */
-        pipe_buffer_reference(&r300->vertex_buffer[i].buffer, NULL);
+        pipe_resource_reference(&r300->vertex_buffer[i].buffer, NULL);
     }
 
     memcpy(r300->vertex_buffer, buffers,
@@ -1179,7 +1230,11 @@ static void r300_vertex_psc(struct r300_vertex_element_state *velems)
     enum pipe_format format;
     unsigned i;
 
-    assert(velems->count <= 16);
+    if (velems->count > 16) {
+        fprintf(stderr, "r300: More than 16 vertex elements are not supported,"
+                " requested %i, using 16.\n", velems->count);
+        velems->count = 16;
+    }
 
     /* Vertex shaders have no semantics on their inputs,
      * so PSC should just route stuff based on the vertex elements,
@@ -1216,6 +1271,7 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
 {
     struct r300_vertex_element_state *velems;
     unsigned i, size;
+    enum pipe_format *format;
 
     assert(count <= PIPE_MAX_ATTRIBS);
     velems = CALLOC_STRUCT(r300_vertex_element_state);
@@ -1226,13 +1282,46 @@ static void* r300_create_vertex_elements_state(struct pipe_context* pipe,
         if (r300_screen(pipe->screen)->caps.has_tcl) {
             /* Check if the format is aligned to the size of DWORD. */
             for (i = 0; i < count; i++) {
-                size = util_format_get_blocksize(attribs[i].src_format);
+                format = &velems->velem[i].src_format;
+
+                /* Replace some formats with their aligned counterparts,
+                 * this is OK because we check for aligned strides too. */
+                /* XXX We need X instead of A in the format names. */
+                switch (*format) {
+                    case PIPE_FORMAT_R8G8B8_UNORM:
+                        *format = PIPE_FORMAT_R8G8B8X8_UNORM;
+                        continue;
+                    case PIPE_FORMAT_R8G8B8_SNORM:
+                        *format = PIPE_FORMAT_R8G8B8A8_SNORM;
+                        continue;
+                    case PIPE_FORMAT_R8G8B8_USCALED:
+                        *format = PIPE_FORMAT_R8G8B8A8_USCALED;
+                        continue;
+                    case PIPE_FORMAT_R8G8B8_SSCALED:
+                        *format = PIPE_FORMAT_R8G8B8A8_SSCALED;
+                        continue;
+                    case PIPE_FORMAT_R16G16B16_UNORM:
+                        *format = PIPE_FORMAT_R16G16B16A16_UNORM;
+                        continue;
+                    case PIPE_FORMAT_R16G16B16_SNORM:
+                        *format = PIPE_FORMAT_R16G16B16A16_SNORM;
+                        continue;
+                    case PIPE_FORMAT_R16G16B16_USCALED:
+                        *format = PIPE_FORMAT_R16G16B16A16_USCALED;
+                        continue;
+                    case PIPE_FORMAT_R16G16B16_SSCALED:
+                        *format = PIPE_FORMAT_R16G16B16A16_SSCALED;
+                        continue;
+                    default:;
+                }
+
+                size = util_format_get_blocksize(*format);
 
                 if (size % 4 != 0) {
                     /* XXX Shouldn't we align the format? */
                     fprintf(stderr, "r300_create_vertex_elements_state: "
                             "Unaligned format %s:%i isn't supported\n",
-                            util_format_name(attribs[i].src_format), size);
+                            util_format_name(*format), size);
                     assert(0);
                     abort();
                 }
@@ -1276,10 +1365,13 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
     struct r300_context* r300 = r300_context(pipe);
 
     struct r300_vertex_shader* vs = CALLOC_STRUCT(r300_vertex_shader);
-    r300_vertex_shader_common_init(vs, shader);
+
+    /* Copy state directly into shader. */
+    vs->state = *shader;
+    vs->state.tokens = tgsi_dup_tokens(shader->tokens);
 
     if (r300->screen->caps.has_tcl) {
-        r300_translate_vertex_shader(r300, vs);
+        r300_translate_vertex_shader(r300, vs, vs->state.tokens);
     } else {
         vs->draw_vs = draw_create_vertex_shader(r300->draw, shader);
     }
@@ -1302,7 +1394,7 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
     r300->vs_state.state = vs;
 
     // VS output mapping for HWTCL or stream mapping for SWTCL to the RS block
-    if (r300->fs) {
+    if (r300->fs.state) {
         r300_vertex_shader_setup_wpos(r300);
     }
     memcpy(r300->vap_output_state.state, &vs->vap_out,
@@ -1314,11 +1406,18 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
 
     if (r300->screen->caps.has_tcl) {
         r300->vs_state.dirty = TRUE;
-        r300->vs_state.size = vs->code.length + 9;
+        r300->vs_state.size =
+                vs->code.length + 9 +
+                (vs->immediates_count ? vs->immediates_count * 4 + 3 : 0);
 
-        r300->pvs_flush.dirty = TRUE;
+        if (vs->externals_count) {
+            r300->vs_constants.dirty = TRUE;
+            r300->vs_constants.size = vs->externals_count * 4 + 3;
+        } else {
+            r300->vs_constants.size = 0;
+        }
 
-        r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS;
+        r300->pvs_flush.dirty = TRUE;
     } else {
         draw_flush(r300->draw);
         draw_bind_vertex_shader(r300->draw,
@@ -1344,62 +1443,66 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
 
 static void r300_set_constant_buffer(struct pipe_context *pipe,
                                      uint shader, uint index,
-                                     struct pipe_buffer *buf)
+                                     struct pipe_resource *buf)
 {
     struct r300_context* r300 = r300_context(pipe);
+    struct r300_constant_buffer *cbuf;
+    struct pipe_transfer *tr;
     void *mapped;
     int max_size = 0;
 
-    if (buf == NULL || buf->size == 0 ||
-        (mapped = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_READ)) == NULL)
-    {
-        r300->shader_constants[shader].count = 0;
-        return;
-    }
-
-    assert((buf->size % 4 * sizeof(float)) == 0);
-
-    /* Check the size of the constant buffer. */
     switch (shader) {
         case PIPE_SHADER_VERTEX:
+            cbuf = (struct r300_constant_buffer*)r300->vs_constants.state;
             max_size = 256;
             break;
         case PIPE_SHADER_FRAGMENT:
+            cbuf = (struct r300_constant_buffer*)r300->fs_constants.state;
             if (r300->screen->caps.is_r500) {
                 max_size = 256;
-            /* XXX Implement emission of r400's extended constant buffer. */
-            /*} else if (r300->screen->caps.is_r400) {
-                max_size = 64;*/
             } else {
                 max_size = 32;
             }
             break;
         default:
             assert(0);
+            return;
+    }
+
+    if (buf == NULL || buf->width0 == 0 ||
+        (mapped = pipe_buffer_map(pipe, buf, PIPE_TRANSFER_READ, &tr)) == NULL)
+    {
+        cbuf->count = 0;
+        return;
     }
 
+    assert((buf->width0 % 4 * sizeof(float)) == 0);
+
+    /* Check the size of the constant buffer. */
     /* XXX Subtract immediates and RC_STATE_* variables. */
-    if (buf->size > (sizeof(float) * 4 * max_size)) {
+    if (buf->width0 > (sizeof(float) * 4 * max_size)) {
         fprintf(stderr, "r300: Max size of the constant buffer is "
                       "%i*4 floats.\n", max_size);
         abort();
     }
 
-    memcpy(r300->shader_constants[shader].constants, mapped, buf->size);
-    r300->shader_constants[shader].count = buf->size / (4 * sizeof(float));
-    pipe_buffer_unmap(pipe->screen, buf);
+    memcpy(cbuf->constants, mapped, buf->width0);
+    cbuf->count = buf->width0 / (4 * sizeof(float));
+    pipe_buffer_unmap(pipe, buf, tr);
 
     if (shader == PIPE_SHADER_VERTEX) {
         if (r300->screen->caps.has_tcl) {
-            r300->dirty_state |= R300_NEW_VERTEX_SHADER_CONSTANTS;
+            if (r300->vs_constants.size) {
+                r300->vs_constants.dirty = TRUE;
+            }
             r300->pvs_flush.dirty = TRUE;
         } else if (r300->draw) {
             draw_set_mapped_constant_buffer(r300->draw, PIPE_SHADER_VERTEX,
-                0, r300->shader_constants[PIPE_SHADER_VERTEX].constants,
-                buf->size);
+                0, cbuf->constants,
+                buf->width0);
         }
     } else if (shader == PIPE_SHADER_FRAGMENT) {
-        r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+        r300->fs_constants.dirty = TRUE;
     }
 }