PCem
changeset 104:41134e597e51
S3D optimisations - between 15% and 30% faster on my machine.
| author | TomW |
|---|---|
| date | Wed Jun 04 19:45:12 2014 +0100 |
| parents | 354491040ce1 |
| children | eb624a751863 |
| files | src/ibm.h src/pc.rc src/vid_s3_virge.c src/win.c |
| diffstat | 4 files changed, 341 insertions(+), 174 deletions(-) [+] |
line diff
1.1 --- a/src/ibm.h Mon May 26 18:13:23 2014 +0100 1.2 +++ b/src/ibm.h Wed Jun 04 19:45:12 2014 +0100 1.3 @@ -455,3 +455,6 @@ 1.4 1.5 1.6 uint8_t *vramp; 1.7 + 1.8 +uint64_t timer_read(); 1.9 +extern uint64_t timer_freq;
2.1 --- a/src/pc.rc Mon May 26 18:13:23 2014 +0100 2.2 +++ b/src/pc.rc Wed Jun 04 19:45:12 2014 +0100 2.3 @@ -155,7 +155,7 @@ 2.4 LTEXT "", IDC_TEXT1, 7, 38, 136, 12 2.5 END 2.6 2.7 -StatusDlg DIALOGEX 0,0,186,186 2.8 +StatusDlg DIALOGEX 0,0,186,186+20 2.9 STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU 2.10 CAPTION "Status" 2.11 FONT 8, "MS Sans Serif"
3.1 --- a/src/vid_s3_virge.c Mon May 26 18:13:23 2014 +0100 3.2 +++ b/src/vid_s3_virge.c Wed Jun 04 19:45:12 2014 +0100 3.3 @@ -11,6 +11,10 @@ 3.4 #include "vid_svga.h" 3.5 #include "vid_svga_render.h" 3.6 3.7 +static uint64_t virge_time = 0; 3.8 +static uint64_t status_time = 0; 3.9 +static int reg_writes = 0; 3.10 + 3.11 typedef struct virge_t 3.12 { 3.13 mem_mapping_t linear_mapping; 3.14 @@ -559,6 +563,7 @@ 3.15 3.16 static uint8_t s3_virge_mmio_read(uint32_t addr, void *p) 3.17 { 3.18 + reg_writes++; 3.19 // pclog("New MMIO readb %08X\n", addr); 3.20 switch (addr & 0xffff) 3.21 { 3.22 @@ -580,6 +585,7 @@ 3.23 } 3.24 static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p) 3.25 { 3.26 + reg_writes++; 3.27 // pclog("New MMIO readw %08X\n", addr); 3.28 switch (addr & 0xfffe) 3.29 { 3.30 @@ -592,6 +598,7 @@ 3.31 { 3.32 virge_t *virge = (virge_t *)p; 3.33 uint32_t ret = 0xffffffff; 3.34 + reg_writes++; 3.35 // pclog("New MMIO readl %08X %04X(%08X):%08X ", addr, CS, cs, pc); 3.36 switch (addr & 0xfffc) 3.37 { 3.38 @@ -723,7 +730,7 @@ 3.39 svga_t *svga = &virge->svga; 3.40 3.41 // pclog("New MMIO writeb %08X %02X %04x(%08x):%08x\n", addr, val, CS, cs, pc); 3.42 - 3.43 + reg_writes++; 3.44 if ((addr & 0xfffc) < 0x8000) 3.45 s3_virge_bitblt(virge, 8, val); 3.46 else switch (addr & 0xffff) 3.47 @@ -749,6 +756,7 @@ 3.48 static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p) 3.49 { 3.50 virge_t *virge = (virge_t *)p; 3.51 + reg_writes++; 3.52 // pclog("New MMIO writew %08X %04X %04x(%08x):%08x\n", addr, val, CS, cs, pc); 3.53 if ((addr & 0xfffc) < 0x8000) 3.54 { 3.55 @@ -769,6 +777,7 @@ 3.56 { 3.57 virge_t *virge = (virge_t *)p; 3.58 svga_t *svga = &virge->svga; 3.59 + reg_writes++; 3.60 // if ((addr & 0xfffc) >= 0x8000) 3.61 // pclog("New MMIO writel %08X %08X %04x(%08x):%08x\n", addr, val, CS, cs, pc); 3.62 3.63 @@ -1533,6 +1542,11 @@ 3.64 3.65 #define RGB24(r, g, b) ((b) | ((g) << 8) | ((r) << 16)) 3.66 3.67 +typedef struct rgba_t 3.68 +{ 3.69 + int r, g, b, a; 3.70 +} rgba_t; 3.71 + 3.72 typedef struct s3d_state_t 3.73 { 3.74 int32_t r, g, b, a, u, v, d, w; 3.75 @@ -1552,6 +1566,8 @@ 3.76 3.77 int32_t x1, x2; 3.78 int y; 3.79 + 3.80 + rgba_t dest_rgba; 3.81 } s3d_state_t; 3.82 3.83 typedef struct s3d_texture_state_t 3.84 @@ -1562,62 +1578,96 @@ 3.85 int32_t u, v; 3.86 } s3d_texture_state_t; 3.87 3.88 -static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, int *r_out, int *g_out, int *b_out, int *a_out); 3.89 -static void (*tex_sample)(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out); 3.90 -static void (*dest_pixel)(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out); 3.91 +static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out); 3.92 +static void (*tex_sample)(s3d_state_t *state); 3.93 +static void (*dest_pixel)(s3d_state_t *state); 3.94 3.95 #define MAX(a, b) ((a) > (b) ? (a) : (b)) 3.96 #define MIN(a, b) ((a) < (b) ? (a) : (b)) 3.97 3.98 static int _x, _y; 3.99 3.100 -static void tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, int *r_out, int *g_out, int *b_out, int *a_out) 3.101 +static void tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) 3.102 { 3.103 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + 3.104 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); 3.105 uint16_t val = state->texture[texture_state->level][offset]; 3.106 3.107 - if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000 && !(state->cmd_set & CMD_SET_TWE)) 3.108 - val = state->tex_bdr_clr; 3.109 - 3.110 - *r_out = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12); 3.111 - *g_out = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7); 3.112 - *b_out = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2); 3.113 - *a_out = (val & 0x8000) ? 0xff : 0; 3.114 + out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12); 3.115 + out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7); 3.116 + out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2); 3.117 + out->a = (val & 0x8000) ? 0xff : 0; 3.118 } 3.119 3.120 -static void tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, int *r_out, int *g_out, int *b_out, int *a_out) 3.121 +static void tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) 3.122 { 3.123 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + 3.124 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); 3.125 uint16_t val = state->texture[texture_state->level][offset]; 3.126 3.127 - if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000 && !(state->cmd_set & CMD_SET_TWE)) 3.128 + if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) 3.129 val = state->tex_bdr_clr; 3.130 3.131 - *r_out = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8); 3.132 - *g_out = (val & 0x00f0) | ((val & 0x00f0) >> 4); 3.133 - *b_out = ((val & 0x000f) << 4) | (val & 0x000f); 3.134 - *a_out = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12); 3.135 - 3.136 + out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12); 3.137 + out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7); 3.138 + out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2); 3.139 + out->a = (val & 0x8000) ? 0xff : 0; 3.140 } 3.141 3.142 -static void tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, int *r_out, int *g_out, int *b_out, int *a_out) 3.143 +static void tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) 3.144 +{ 3.145 + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + 3.146 + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); 3.147 + uint16_t val = state->texture[texture_state->level][offset]; 3.148 + 3.149 + out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8); 3.150 + out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4); 3.151 + out->b = ((val & 0x000f) << 4) | (val & 0x000f); 3.152 + out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12); 3.153 +} 3.154 + 3.155 +static void tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) 3.156 +{ 3.157 + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + 3.158 + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); 3.159 + uint16_t val = state->texture[texture_state->level][offset]; 3.160 + 3.161 + if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) 3.162 + val = state->tex_bdr_clr; 3.163 + 3.164 + out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8); 3.165 + out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4); 3.166 + out->b = ((val & 0x000f) << 4) | (val & 0x000f); 3.167 + out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12); 3.168 +} 3.169 + 3.170 +static void tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) 3.171 { 3.172 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + 3.173 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); 3.174 uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset]; 3.175 3.176 - if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000 && !(state->cmd_set & CMD_SET_TWE)) 3.177 + out->r = (val >> 16) & 0xff; 3.178 + out->g = (val >> 8) & 0xff; 3.179 + out->b = val & 0xff; 3.180 + out->a = (val >> 24) & 0xff; 3.181 +} 3.182 +static void tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out) 3.183 +{ 3.184 + int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) + 3.185 + (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level); 3.186 + uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset]; 3.187 + 3.188 + if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000) 3.189 val = state->tex_bdr_clr; 3.190 3.191 - *r_out = (val >> 16) & 0xff; 3.192 - *g_out = (val >> 8) & 0xff; 3.193 - *b_out = val & 0xff; 3.194 - *a_out = (val >> 24) & 0xff; 3.195 + out->r = (val >> 16) & 0xff; 3.196 + out->g = (val >> 8) & 0xff; 3.197 + out->b = val & 0xff; 3.198 + out->a = (val >> 24) & 0xff; 3.199 } 3.200 3.201 -static void tex_sample_normal(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.202 +static void tex_sample_normal(s3d_state_t *state) 3.203 { 3.204 s3d_texture_state_t texture_state; 3.205 3.206 @@ -1626,14 +1676,14 @@ 3.207 texture_state.u = state->u + state->tbu; 3.208 texture_state.v = state->v + state->tbv; 3.209 3.210 - tex_read(state, &texture_state, r_out, g_out, b_out, a_out); 3.211 + tex_read(state, &texture_state, &state->dest_rgba); 3.212 } 3.213 3.214 -static void tex_sample_normal_filter(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.215 +static void tex_sample_normal_filter(s3d_state_t *state) 3.216 { 3.217 s3d_texture_state_t texture_state; 3.218 int tex_offset; 3.219 - int r[4], g[4], b[4], a[4]; 3.220 + rgba_t tex_samples[4]; 3.221 int du, dv; 3.222 int d[4]; 3.223 3.224 @@ -1643,34 +1693,34 @@ 3.225 3.226 texture_state.u = state->u + state->tbu; 3.227 texture_state.v = state->v + state->tbv; 3.228 - tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]); 3.229 + tex_read(state, &texture_state, &tex_samples[0]); 3.230 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff; 3.231 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff; 3.232 3.233 texture_state.u = state->u + state->tbu + tex_offset; 3.234 texture_state.v = state->v + state->tbv; 3.235 - tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]); 3.236 + tex_read(state, &texture_state, &tex_samples[1]); 3.237 3.238 texture_state.u = state->u + state->tbu; 3.239 texture_state.v = state->v + state->tbv + tex_offset; 3.240 - tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]); 3.241 + tex_read(state, &texture_state, &tex_samples[2]); 3.242 3.243 texture_state.u = state->u + state->tbu + tex_offset; 3.244 texture_state.v = state->v + state->tbv + tex_offset; 3.245 - tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]); 3.246 + tex_read(state, &texture_state, &tex_samples[3]); 3.247 3.248 d[0] = (256 - du) * (256 - dv); 3.249 d[1] = du * (256 - dv); 3.250 d[2] = (256 - du) * dv; 3.251 d[3] = du * dv; 3.252 3.253 - *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16; 3.254 - *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16; 3.255 - *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16; 3.256 - *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16; 3.257 + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; 3.258 + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; 3.259 + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; 3.260 + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; 3.261 } 3.262 3.263 -static void tex_sample_mipmap(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.264 +static void tex_sample_mipmap(s3d_state_t *state) 3.265 { 3.266 s3d_texture_state_t texture_state; 3.267 3.268 @@ -1679,14 +1729,14 @@ 3.269 texture_state.u = state->u + state->tbu; 3.270 texture_state.v = state->v + state->tbv; 3.271 3.272 - tex_read(state, &texture_state, r_out, g_out, b_out, a_out); 3.273 + tex_read(state, &texture_state, &state->dest_rgba); 3.274 } 3.275 3.276 -static void tex_sample_mipmap_filter(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.277 +static void tex_sample_mipmap_filter(s3d_state_t *state) 3.278 { 3.279 s3d_texture_state_t texture_state; 3.280 int tex_offset; 3.281 - int r[4], g[4], b[4], a[4]; 3.282 + rgba_t tex_samples[4]; 3.283 int du, dv; 3.284 int d[4]; 3.285 3.286 @@ -1696,34 +1746,34 @@ 3.287 3.288 texture_state.u = state->u + state->tbu; 3.289 texture_state.v = state->v + state->tbv; 3.290 - tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]); 3.291 + tex_read(state, &texture_state, &tex_samples[0]); 3.292 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff; 3.293 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff; 3.294 3.295 texture_state.u = state->u + state->tbu + tex_offset; 3.296 texture_state.v = state->v + state->tbv; 3.297 - tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]); 3.298 + tex_read(state, &texture_state, &tex_samples[1]); 3.299 3.300 texture_state.u = state->u + state->tbu; 3.301 texture_state.v = state->v + state->tbv + tex_offset; 3.302 - tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]); 3.303 + tex_read(state, &texture_state, &tex_samples[2]); 3.304 3.305 texture_state.u = state->u + state->tbu + tex_offset; 3.306 texture_state.v = state->v + state->tbv + tex_offset; 3.307 - tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]); 3.308 + tex_read(state, &texture_state, &tex_samples[3]); 3.309 3.310 d[0] = (256 - du) * (256 - dv); 3.311 d[1] = du * (256 - dv); 3.312 d[2] = (256 - du) * dv; 3.313 d[3] = du * dv; 3.314 3.315 - *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16; 3.316 - *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16; 3.317 - *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16; 3.318 - *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16; 3.319 + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; 3.320 + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; 3.321 + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; 3.322 + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; 3.323 } 3.324 3.325 -static void tex_sample_persp_normal(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.326 +static void tex_sample_persp_normal(s3d_state_t *state) 3.327 { 3.328 s3d_texture_state_t texture_state; 3.329 int32_t w = 0; 3.330 @@ -1736,15 +1786,15 @@ 3.331 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; 3.332 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; 3.333 3.334 - tex_read(state, &texture_state, r_out, g_out, b_out, a_out); 3.335 + tex_read(state, &texture_state, &state->dest_rgba); 3.336 } 3.337 3.338 -static void tex_sample_persp_normal_filter(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.339 +static void tex_sample_persp_normal_filter(s3d_state_t *state) 3.340 { 3.341 s3d_texture_state_t texture_state; 3.342 int32_t w = 0, u, v; 3.343 int tex_offset; 3.344 - int r[4], g[4], b[4], a[4]; 3.345 + rgba_t tex_samples[4]; 3.346 int du, dv; 3.347 int d[4]; 3.348 3.349 @@ -1760,34 +1810,34 @@ 3.350 3.351 texture_state.u = u; 3.352 texture_state.v = v; 3.353 - tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]); 3.354 + tex_read(state, &texture_state, &tex_samples[0]); 3.355 du = (u >> (texture_state.texture_shift - 8)) & 0xff; 3.356 dv = (v >> (texture_state.texture_shift - 8)) & 0xff; 3.357 3.358 texture_state.u = u + tex_offset; 3.359 texture_state.v = v; 3.360 - tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]); 3.361 + tex_read(state, &texture_state, &tex_samples[1]); 3.362 3.363 texture_state.u = u; 3.364 texture_state.v = v + tex_offset; 3.365 - tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]); 3.366 + tex_read(state, &texture_state, &tex_samples[2]); 3.367 3.368 texture_state.u = u + tex_offset; 3.369 texture_state.v = v + tex_offset; 3.370 - tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]); 3.371 + tex_read(state, &texture_state, &tex_samples[3]); 3.372 3.373 d[0] = (256 - du) * (256 - dv); 3.374 d[1] = du * (256 - dv); 3.375 d[2] = (256 - du) * dv; 3.376 d[3] = du * dv; 3.377 3.378 - *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16; 3.379 - *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16; 3.380 - *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16; 3.381 - *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16; 3.382 + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; 3.383 + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; 3.384 + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; 3.385 + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; 3.386 } 3.387 3.388 -static void tex_sample_persp_normal_375(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.389 +static void tex_sample_persp_normal_375(s3d_state_t *state) 3.390 { 3.391 s3d_texture_state_t texture_state; 3.392 int32_t w = 0; 3.393 @@ -1800,15 +1850,15 @@ 3.394 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; 3.395 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; 3.396 3.397 - tex_read(state, &texture_state, r_out, g_out, b_out, a_out); 3.398 + tex_read(state, &texture_state, &state->dest_rgba); 3.399 } 3.400 3.401 -static void tex_sample_persp_normal_filter_375(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.402 +static void tex_sample_persp_normal_filter_375(s3d_state_t *state) 3.403 { 3.404 s3d_texture_state_t texture_state; 3.405 int32_t w = 0, u, v; 3.406 int tex_offset; 3.407 - int r[4], g[4], b[4], a[4]; 3.408 + rgba_t tex_samples[4]; 3.409 int du, dv; 3.410 int d[4]; 3.411 3.412 @@ -1824,35 +1874,35 @@ 3.413 3.414 texture_state.u = u; 3.415 texture_state.v = v; 3.416 - tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]); 3.417 + tex_read(state, &texture_state, &tex_samples[0]); 3.418 du = (u >> (texture_state.texture_shift - 8)) & 0xff; 3.419 dv = (v >> (texture_state.texture_shift - 8)) & 0xff; 3.420 3.421 texture_state.u = u + tex_offset; 3.422 texture_state.v = v; 3.423 - tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]); 3.424 + tex_read(state, &texture_state, &tex_samples[1]); 3.425 3.426 texture_state.u = u; 3.427 texture_state.v = v + tex_offset; 3.428 - tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]); 3.429 + tex_read(state, &texture_state, &tex_samples[2]); 3.430 3.431 texture_state.u = u + tex_offset; 3.432 texture_state.v = v + tex_offset; 3.433 - tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]); 3.434 + tex_read(state, &texture_state, &tex_samples[3]); 3.435 3.436 d[0] = (256 - du) * (256 - dv); 3.437 d[1] = du * (256 - dv); 3.438 d[2] = (256 - du) * dv; 3.439 d[3] = du * dv; 3.440 3.441 - *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16; 3.442 - *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16; 3.443 - *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16; 3.444 - *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16; 3.445 + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; 3.446 + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; 3.447 + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; 3.448 + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; 3.449 } 3.450 3.451 3.452 -static void tex_sample_persp_mipmap(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.453 +static void tex_sample_persp_mipmap(s3d_state_t *state) 3.454 { 3.455 s3d_texture_state_t texture_state; 3.456 int32_t w = 0; 3.457 @@ -1865,15 +1915,15 @@ 3.458 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu; 3.459 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv; 3.460 3.461 - tex_read(state, &texture_state, r_out, g_out, b_out, a_out); 3.462 + tex_read(state, &texture_state, &state->dest_rgba); 3.463 } 3.464 3.465 -static void tex_sample_persp_mipmap_filter(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.466 +static void tex_sample_persp_mipmap_filter(s3d_state_t *state) 3.467 { 3.468 s3d_texture_state_t texture_state; 3.469 int32_t w = 0, u, v; 3.470 int tex_offset; 3.471 - int r[4], g[4], b[4], a[4]; 3.472 + rgba_t tex_samples[4]; 3.473 int du, dv; 3.474 int d[4]; 3.475 3.476 @@ -1889,34 +1939,34 @@ 3.477 3.478 texture_state.u = u; 3.479 texture_state.v = v; 3.480 - tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]); 3.481 + tex_read(state, &texture_state, &tex_samples[0]); 3.482 du = (u >> (texture_state.texture_shift - 8)) & 0xff; 3.483 dv = (v >> (texture_state.texture_shift - 8)) & 0xff; 3.484 3.485 texture_state.u = u + tex_offset; 3.486 texture_state.v = v; 3.487 - tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]); 3.488 + tex_read(state, &texture_state, &tex_samples[1]); 3.489 3.490 texture_state.u = u; 3.491 texture_state.v = v + tex_offset; 3.492 - tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]); 3.493 + tex_read(state, &texture_state, &tex_samples[2]); 3.494 3.495 texture_state.u = u + tex_offset; 3.496 texture_state.v = v + tex_offset; 3.497 - tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]); 3.498 + tex_read(state, &texture_state, &tex_samples[3]); 3.499 3.500 d[0] = (256 - du) * (256 - dv); 3.501 d[1] = du * (256 - dv); 3.502 d[2] = (256 - du) * dv; 3.503 d[3] = du * dv; 3.504 3.505 - *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16; 3.506 - *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16; 3.507 - *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16; 3.508 - *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16; 3.509 + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; 3.510 + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; 3.511 + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; 3.512 + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; 3.513 } 3.514 3.515 -static void tex_sample_persp_mipmap_375(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.516 +static void tex_sample_persp_mipmap_375(s3d_state_t *state) 3.517 { 3.518 s3d_texture_state_t texture_state; 3.519 int32_t w = 0; 3.520 @@ -1929,15 +1979,15 @@ 3.521 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu; 3.522 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv; 3.523 3.524 - tex_read(state, &texture_state, r_out, g_out, b_out, a_out); 3.525 + tex_read(state, &texture_state, &state->dest_rgba); 3.526 } 3.527 3.528 -static void tex_sample_persp_mipmap_filter_375(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.529 +static void tex_sample_persp_mipmap_filter_375(s3d_state_t *state) 3.530 { 3.531 s3d_texture_state_t texture_state; 3.532 int32_t w = 0, u, v; 3.533 int tex_offset; 3.534 - int r[4], g[4], b[4], a[4]; 3.535 + rgba_t tex_samples[4]; 3.536 int du, dv; 3.537 int d[4]; 3.538 3.539 @@ -1953,55 +2003,51 @@ 3.540 3.541 texture_state.u = u; 3.542 texture_state.v = v; 3.543 - tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]); 3.544 + tex_read(state, &texture_state, &tex_samples[0]); 3.545 du = (u >> (texture_state.texture_shift - 8)) & 0xff; 3.546 dv = (v >> (texture_state.texture_shift - 8)) & 0xff; 3.547 3.548 texture_state.u = u + tex_offset; 3.549 texture_state.v = v; 3.550 - tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]); 3.551 + tex_read(state, &texture_state, &tex_samples[1]); 3.552 3.553 texture_state.u = u; 3.554 texture_state.v = v + tex_offset; 3.555 - tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]); 3.556 + tex_read(state, &texture_state, &tex_samples[2]); 3.557 3.558 texture_state.u = u + tex_offset; 3.559 texture_state.v = v + tex_offset; 3.560 - tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]); 3.561 + tex_read(state, &texture_state, &tex_samples[3]); 3.562 3.563 d[0] = (256 - du) * (256 - dv); 3.564 d[1] = du * (256 - dv); 3.565 d[2] = (256 - du) * dv; 3.566 d[3] = du * dv; 3.567 3.568 - *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16; 3.569 - *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16; 3.570 - *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16; 3.571 - *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16; 3.572 + state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16; 3.573 + state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16; 3.574 + state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16; 3.575 + state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16; 3.576 } 3.577 3.578 3.579 -#define CLAMP_RGBA(r, g, b, a) do \ 3.580 - { \ 3.581 - if ((r) < 0) \ 3.582 - r = 0; \ 3.583 - if ((r) > 0xff) \ 3.584 - r = 0xff; \ 3.585 - if ((g) < 0) \ 3.586 - g = 0; \ 3.587 - if ((g) > 0xff) \ 3.588 - g = 0xff; \ 3.589 - if ((b) < 0) \ 3.590 - b = 0; \ 3.591 - if ((b) > 0xff) \ 3.592 - b = 0xff; \ 3.593 - if ((a) < 0) \ 3.594 - a = 0; \ 3.595 - if ((a) > 0xff) \ 3.596 - a = 0xff; \ 3.597 +#define CLAMP(x) do \ 3.598 + { \ 3.599 + if ((x) & ~0xff) \ 3.600 + x = ((x) < 0) ? 0 : 0xff; \ 3.601 } \ 3.602 while (0) 3.603 3.604 +#define CLAMP_RGBA(r, g, b, a) \ 3.605 + if ((r) & ~0xff) \ 3.606 + r = ((r) < 0) ? 0 : 0xff; \ 3.607 + if ((g) & ~0xff) \ 3.608 + g = ((g) < 0) ? 0 : 0xff; \ 3.609 + if ((b) & ~0xff) \ 3.610 + b = ((b) < 0) ? 0 : 0xff; \ 3.611 + if ((a) & ~0xff) \ 3.612 + a = ((a) < 0) ? 0 : 0xff; 3.613 + 3.614 #define CLAMP_RGB(r, g, b) do \ 3.615 { \ 3.616 if ((r) < 0) \ 3.617 @@ -2019,67 +2065,64 @@ 3.618 } \ 3.619 while (0) 3.620 3.621 -static void dest_pixel_gouraud_shaded_triangle(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.622 +static void dest_pixel_gouraud_shaded_triangle(s3d_state_t *state) 3.623 { 3.624 - *r_out = state->r >> 7; 3.625 - *g_out = state->g >> 7; 3.626 - *b_out = state->b >> 7; 3.627 - *a_out = state->a >> 7; 3.628 - CLAMP_RGBA(*r_out, *g_out, *b_out, *a_out); 3.629 + state->dest_rgba.r = state->r >> 7; 3.630 + CLAMP(state->dest_rgba.r); 3.631 + 3.632 + state->dest_rgba.g = state->g >> 7; 3.633 + CLAMP(state->dest_rgba.g); 3.634 + 3.635 + state->dest_rgba.b = state->b >> 7; 3.636 + CLAMP(state->dest_rgba.b); 3.637 + 3.638 + state->dest_rgba.a = state->a >> 7; 3.639 + CLAMP(state->dest_rgba.a); 3.640 } 3.641 3.642 -static void dest_pixel_unlit_texture_triangle(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.643 +static void dest_pixel_unlit_texture_triangle(s3d_state_t *state) 3.644 { 3.645 - tex_sample(state, r_out, g_out, b_out, a_out); 3.646 + tex_sample(state); 3.647 3.648 if (state->cmd_set & CMD_SET_ABC_SRC) 3.649 - *a_out = state->a >> 7; 3.650 + state->dest_rgba.a = state->a >> 7; 3.651 } 3.652 3.653 -static void dest_pixel_lit_texture_decal(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.654 +static void dest_pixel_lit_texture_decal(s3d_state_t *state) 3.655 { 3.656 - tex_sample(state, r_out, g_out, b_out, a_out); 3.657 + tex_sample(state); 3.658 3.659 if (state->cmd_set & CMD_SET_ABC_SRC) 3.660 - *a_out = state->a >> 7; 3.661 + state->dest_rgba.a = state->a >> 7; 3.662 } 3.663 3.664 -static void dest_pixel_lit_texture_reflection(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.665 +static void dest_pixel_lit_texture_reflection(s3d_state_t *state) 3.666 { 3.667 - int tex_r, tex_g, tex_b, tex_a; 3.668 - 3.669 - tex_sample(state, &tex_r, &tex_g, &tex_b, &tex_a); 3.670 + tex_sample(state); 3.671 3.672 - *r_out = state->r >> 7; 3.673 - *g_out = state->g >> 7; 3.674 - *b_out = state->b >> 7; 3.675 - *a_out = state->a >> 7; 3.676 - CLAMP_RGBA(*r_out, *g_out, *b_out, *a_out); 3.677 + state->dest_rgba.r += (state->r >> 7); 3.678 + state->dest_rgba.g += (state->g >> 7); 3.679 + state->dest_rgba.b += (state->b >> 7); 3.680 + if (state->cmd_set & CMD_SET_ABC_SRC) 3.681 + state->dest_rgba.a += (state->a >> 7); 3.682 3.683 - *(r_out) += tex_r; 3.684 - *(g_out) += tex_g; 3.685 - *(b_out) += tex_b; 3.686 - 3.687 - CLAMP_RGB(*r_out, *g_out, *b_out); 3.688 - 3.689 - if (!(state->cmd_set & CMD_SET_ABC_SRC)) 3.690 - *a_out = tex_a; 3.691 + CLAMP_RGBA(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, state->dest_rgba.a); 3.692 } 3.693 3.694 -static void dest_pixel_lit_texture_modulate(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out) 3.695 +static void dest_pixel_lit_texture_modulate(s3d_state_t *state) 3.696 { 3.697 int r = state->r >> 7, g = state->g >> 7, b = state->b >> 7, a = state->a >> 7; 3.698 3.699 - tex_sample(state, r_out, g_out, b_out, a_out); 3.700 + tex_sample(state); 3.701 3.702 CLAMP_RGBA(r, g, b, a); 3.703 3.704 - *r_out = ((*r_out) * r) >> 8; 3.705 - *g_out = ((*g_out) * g) >> 8; 3.706 - *b_out = ((*b_out) * b) >> 8; 3.707 + state->dest_rgba.r = ((state->dest_rgba.r) * r) >> 8; 3.708 + state->dest_rgba.g = ((state->dest_rgba.g) * g) >> 8; 3.709 + state->dest_rgba.b = ((state->dest_rgba.b) * b) >> 8; 3.710 3.711 if (state->cmd_set & CMD_SET_ABC_SRC) 3.712 - *a_out = a; 3.713 + state->dest_rgba.a = a; 3.714 } 3.715 3.716 static void tri(virge_t *virge, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2) 3.717 @@ -2094,9 +2137,40 @@ 3.718 3.719 int bpp = 1; 3.720 3.721 - uint32_t dest_offset = virge->s3d.dest_base + (state->y * virge->s3d.dest_str); 3.722 - uint32_t z_offset = virge->s3d.z_base + (state->y * virge->s3d.z_str); 3.723 - 3.724 + uint32_t dest_offset, z_offset; 3.725 + 3.726 + if (virge->s3d.cmd_set & CMD_SET_HC) 3.727 + { 3.728 + if (state->y < virge->s3d.clip_t) 3.729 + return; 3.730 + if (state->y > virge->s3d.clip_b) 3.731 + { 3.732 + int diff_y = state->y - virge->s3d.clip_b; 3.733 + 3.734 + if (diff_y > y_count) 3.735 + diff_y = y_count; 3.736 + 3.737 + state->base_u += (virge->s3d.TdUdY * diff_y); 3.738 + state->base_v += (virge->s3d.TdVdY * diff_y); 3.739 + state->base_z += (virge->s3d.TdZdY * diff_y); 3.740 + state->base_r += (virge->s3d.TdRdY * diff_y); 3.741 + state->base_g += (virge->s3d.TdGdY * diff_y); 3.742 + state->base_b += (virge->s3d.TdBdY * diff_y); 3.743 + state->base_a += (virge->s3d.TdAdY * diff_y); 3.744 + state->base_d += (virge->s3d.TdDdY * diff_y); 3.745 + state->base_w += (virge->s3d.TdWdY * diff_y); 3.746 + state->x1 += (dx1 * diff_y); 3.747 + state->x2 += (dx2 * diff_y); 3.748 + state->y -= diff_y; 3.749 + dest_offset -= virge->s3d.dest_str; 3.750 + z_offset -= virge->s3d.z_str; 3.751 + y_count -= diff_y; 3.752 + } 3.753 + } 3.754 + 3.755 + dest_offset = virge->s3d.dest_base + (state->y * virge->s3d.dest_str); 3.756 + z_offset = virge->s3d.z_base + (state->y * virge->s3d.z_str); 3.757 + 3.758 for (; y_count > 0; y_count--) 3.759 { 3.760 int x = state->x1 >> 20; 3.761 @@ -2104,7 +2178,10 @@ 3.762 uint32_t z = state->base_z; 3.763 if (x != xe && (x_dir > 0 && x < xe) || (x_dir < 0 && x > xe)) 3.764 { 3.765 + uint32_t dest_addr, z_addr; 3.766 int dx = (x_dir > 0) ? 8 - ((state->x1 >> 16) & 0xf) : ((state->x1 >> 16) & 0xf) - 8; 3.767 + int x_offset = x_dir << bpp; 3.768 + 3.769 state->r = state->base_r + ((virge->s3d.TdRdX * dx) >> 4); 3.770 state->g = state->base_g + ((virge->s3d.TdGdX * dx) >> 4); 3.771 state->b = state->base_b + ((virge->s3d.TdBdX * dx) >> 4); 3.772 @@ -2116,10 +2193,67 @@ 3.773 z += ((virge->s3d.TdZdX * dx) >> 4); 3.774 // pclog("Draw Y=%i X=%i to XE=%i %i %08x %08x %08x %08x %08x %08x %08x %08x %i %08x\n", state->y, x, xe, dx, state->x1, state->x2, dx1, virge->s3d.TdWdX, state->u, state->v, virge->s3d.TdUdX, virge->s3d.TdUdY, dx, (virge->s3d.TdUdX * dx) >> 4); 3.775 3.776 + if (virge->s3d.cmd_set & CMD_SET_HC) 3.777 + { 3.778 + if (x_dir > 0) 3.779 + { 3.780 + if (x > virge->s3d.clip_r) 3.781 + goto tri_skip_line; 3.782 + if (xe < virge->s3d.clip_l) 3.783 + goto tri_skip_line; 3.784 + if (xe > virge->s3d.clip_r) 3.785 + xe = virge->s3d.clip_r; 3.786 + if (x < virge->s3d.clip_l) 3.787 + { 3.788 + int diff_x = virge->s3d.clip_l - x; 3.789 + 3.790 + z += (virge->s3d.TdZdX * diff_x); 3.791 + state->u += (virge->s3d.TdUdX * diff_x); 3.792 + state->v += (virge->s3d.TdVdX * diff_x); 3.793 + state->r += (virge->s3d.TdRdX * diff_x); 3.794 + state->g += (virge->s3d.TdGdX * diff_x); 3.795 + state->b += (virge->s3d.TdBdX * diff_x); 3.796 + state->a += (virge->s3d.TdAdX * diff_x); 3.797 + state->d += (virge->s3d.TdDdX * diff_x); 3.798 + state->w += (virge->s3d.TdWdX * diff_x); 3.799 + 3.800 + x = virge->s3d.clip_l; 3.801 + } 3.802 + } 3.803 + else 3.804 + { 3.805 + if (x < virge->s3d.clip_l) 3.806 + goto tri_skip_line; 3.807 + if (xe > virge->s3d.clip_r) 3.808 + goto tri_skip_line; 3.809 + if (xe < virge->s3d.clip_l) 3.810 + xe = virge->s3d.clip_l; 3.811 + if (x > virge->s3d.clip_r) 3.812 + { 3.813 + int diff_x = x - virge->s3d.clip_r; 3.814 + 3.815 + z += (virge->s3d.TdZdX * diff_x); 3.816 + state->u += (virge->s3d.TdUdX * diff_x); 3.817 + state->v += (virge->s3d.TdVdX * diff_x); 3.818 + state->r += (virge->s3d.TdRdX * diff_x); 3.819 + state->g += (virge->s3d.TdGdX * diff_x); 3.820 + state->b += (virge->s3d.TdBdX * diff_x); 3.821 + state->a += (virge->s3d.TdAdX * diff_x); 3.822 + state->d += (virge->s3d.TdDdX * diff_x); 3.823 + state->w += (virge->s3d.TdWdX * diff_x); 3.824 + 3.825 + x = virge->s3d.clip_r; 3.826 + } 3.827 + } 3.828 + } 3.829 + 3.830 + virge->svga.changedvram[(dest_offset & 0x3fffff) >> 12] = changeframecount; 3.831 + 3.832 + dest_addr = dest_offset + (x << bpp); 3.833 + z_addr = z_offset + (x << bpp); 3.834 + 3.835 for (; x != ((xe + x_dir) & 0xfff); x = (x + x_dir) & 0xfff) 3.836 { 3.837 - uint32_t dest_addr = dest_offset + (x << bpp); 3.838 - uint32_t z_addr = z_offset + (x << bpp); 3.839 int update = 1; 3.840 int16_t src_z; 3.841 _x = x; _y = state->y; 3.842 @@ -2129,14 +2263,12 @@ 3.843 src_z = Z_READ(z_addr); 3.844 Z_CLIP(src_z, z >> 16); 3.845 } 3.846 - CLIP(x, state->y); 3.847 3.848 if (update) 3.849 { 3.850 - int dest_r, dest_g, dest_b, dest_a; 3.851 uint32_t dest_col; 3.852 3.853 - dest_pixel(state, &dest_r, &dest_g, &dest_b, &dest_a); 3.854 + dest_pixel(state); 3.855 3.856 if (virge->s3d.cmd_set & CMD_SET_ABC_ENABLE) 3.857 { 3.858 @@ -2158,9 +2290,9 @@ 3.859 break; 3.860 } 3.861 3.862 - dest_r = ((dest_r * dest_a) + (src_r * (255 - dest_a))) / 255; 3.863 - dest_g = ((dest_g * dest_a) + (src_g * (255 - dest_a))) / 255; 3.864 - dest_b = ((dest_b * dest_a) + (src_b * (255 - dest_a))) / 255; 3.865 + state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + (src_r * (255 - state->dest_rgba.a))) / 255; 3.866 + state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + (src_g * (255 - state->dest_rgba.a))) / 255; 3.867 + state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + (src_b * (255 - state->dest_rgba.a))) / 255; 3.868 } 3.869 3.870 switch (bpp) 3.871 @@ -2169,14 +2301,12 @@ 3.872 /*Not implemented yet*/ 3.873 break; 3.874 case 1: /*16 bpp*/ 3.875 - dest_col = RGB15(dest_r, dest_g, dest_b); 3.876 - *(uint16_t *)&vram[dest_addr & 0x3fffff] = dest_col; 3.877 - virge->svga.changedvram[(dest_addr & 0x3fffff) >> 12] = changeframecount; 3.878 + dest_col = RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b); 3.879 + *(uint16_t *)&vram[dest_addr] = dest_col; 3.880 break; 3.881 case 2: /*24 bpp*/ 3.882 - dest_col = RGB24(dest_r, dest_g, dest_b); 3.883 - *(uint32_t *)&vram[dest_addr & 0x3fffff] = dest_col; 3.884 - virge->svga.changedvram[(dest_addr & 0x3fffff) >> 12] = changeframecount; 3.885 + dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b); 3.886 + *(uint32_t *)&vram[dest_addr] = dest_col; 3.887 break; 3.888 } 3.889 3.890 @@ -2193,9 +2323,12 @@ 3.891 state->a += virge->s3d.TdAdX; 3.892 state->d += virge->s3d.TdDdX; 3.893 state->w += virge->s3d.TdWdX; 3.894 + dest_addr += x_offset; 3.895 + z_addr += x_offset; 3.896 virge->pixel_count++; 3.897 } 3.898 } 3.899 +tri_skip_line: 3.900 state->x1 += dx1; 3.901 state->x2 += dx2; 3.902 state->base_u += virge->s3d.TdUdY; 3.903 @@ -2231,7 +2364,10 @@ 3.904 3.905 uint32_t tex_base; 3.906 int c; 3.907 - 3.908 + 3.909 + uint64_t start_time = timer_read(); 3.910 + uint64_t end_time; 3.911 + 3.912 state.tbu = virge->s3d.tbu << 11; 3.913 state.tbv = virge->s3d.tbv << 11; 3.914 3.915 @@ -2347,19 +2483,20 @@ 3.916 switch ((virge->s3d.cmd_set >> 5) & 7) 3.917 { 3.918 case 0: 3.919 - tex_read = tex_ARGB8888; 3.920 + tex_read = (virge->s3d.cmd_set & CMD_SET_TWE) ? tex_ARGB8888 : tex_ARGB8888_nowrap; 3.921 break; 3.922 case 1: 3.923 - tex_read = tex_ARGB4444; 3.924 + tex_read = (virge->s3d.cmd_set & CMD_SET_TWE) ? tex_ARGB4444 : tex_ARGB4444_nowrap; 3.925 // pclog("tex_ARGB4444\n"); 3.926 break; 3.927 case 2: 3.928 - tex_read = tex_ARGB1555; 3.929 + tex_read = (virge->s3d.cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap; 3.930 // pclog("tex_ARGB1555 %i\n", (virge->s3d.cmd_set >> 5) & 7); 3.931 break; 3.932 default: 3.933 pclog("bad texture type %i\n", (virge->s3d.cmd_set >> 5) & 7); 3.934 - tex_read = tex_ARGB1555; 3.935 + tex_read = (virge->s3d.cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap; 3.936 + break; 3.937 } 3.938 3.939 // pclog("Triangle %i %i,%i to %i,%i %08x\n", y, x1 >> 20, y, virge->s3d.txend01 >> 20, y - (virge->s3d.ty01 + virge->s3d.ty12), state.cmd_set); 3.940 @@ -2372,6 +2509,10 @@ 3.941 tri(virge, &state, virge->s3d.ty12, virge->s3d.TdXdY02, virge->s3d.TdXdY12); 3.942 3.943 virge->tri_count++; 3.944 + 3.945 + end_time = timer_read(); 3.946 + 3.947 + virge_time += end_time - start_time; 3.948 } 3.949 3.950 3.951 @@ -2693,11 +2834,20 @@ 3.952 int cur_len; 3.953 char temps[256]; 3.954 3.955 + uint64_t new_time = timer_read(); 3.956 + uint64_t status_diff = new_time - status_time; 3.957 + status_time = new_time; 3.958 + 3.959 + if (!status_diff) 3.960 + status_diff = 1; 3.961 + 3.962 cur_len = svga_add_status_info(s, cur_len, &virge->svga); 3.963 - sprintf(temps, "%f Mpixels/sec\n%f ktris/sec\n", (double)virge->pixel_count/1000000.0, (double)virge->tri_count/1000.0); 3.964 + sprintf(temps, "%f Mpixels/sec\n%f ktris/sec\n%f%% CPU\n%f%% CPU (real)\n%d writes", (double)virge->pixel_count/1000000.0, (double)virge->tri_count/1000.0, ((double)virge_time * 100.0) / timer_freq, ((double)virge_time * 100.0) / status_diff, reg_writes); 3.965 strncat(s, temps, cur_len); 3.966 cur_len -= strlen(temps); 3.967 virge->pixel_count = virge->tri_count = 0; 3.968 + virge_time = 0; 3.969 + reg_writes = 0; 3.970 3.971 return max_len - cur_len; 3.972 }
4.1 --- a/src/win.c Mon May 26 18:13:23 2014 +0100 4.2 +++ b/src/win.c Wed Jun 04 19:45:12 2014 +0100 4.3 @@ -31,6 +31,8 @@ 4.4 #include "win-d3d-fs.h" 4.5 //#include "win-opengl.h" 4.6 4.7 +uint64_t timer_freq; 4.8 + 4.9 static struct 4.10 { 4.11 void (*init)(HWND h); 4.12 @@ -264,6 +266,13 @@ 4.13 SetWindowText(ghwnd, s); 4.14 } 4.15 4.16 +uint64_t timer_read() 4.17 +{ 4.18 + LARGE_INTEGER qpc_time; 4.19 + QueryPerformanceCounter(&qpc_time); 4.20 + return qpc_time.QuadPart; 4.21 +} 4.22 + 4.23 int WINAPI WinMain (HINSTANCE hThisInstance, 4.24 HINSTANCE hPrevInstance, 4.25 LPSTR lpszArgument, 4.26 @@ -274,6 +283,7 @@ 4.27 MSG messages; /* Here messages to the application are saved */ 4.28 WNDCLASSEX wincl; /* Data structure for the windowclass */ 4.29 int c, d; 4.30 + LARGE_INTEGER qpc_freq; 4.31 4.32 hinstance=hThisInstance; 4.33 /* The Window structure */ 4.34 @@ -436,6 +446,10 @@ 4.35 install_int_ex(vsyncint,BPS_TO_TIMER(100)); 4.36 4.37 updatewindowsize(640, 480); 4.38 + 4.39 + QueryPerformanceFrequency(&qpc_freq); 4.40 + timer_freq = qpc_freq.QuadPart; 4.41 + 4.42 // focus=1; 4.43 // setrefresh(100); 4.44
