PCem

changeset 103:354491040ce1

S3 ViRGE S3D emulation. Currently very slow, bugs, features missing. Added S3 ViRGE/DX emulation, as the DX drivers are better than those for the original chip.
author TomW
date Mon May 26 18:13:23 2014 +0100
parents 3d3d486d5fce
children 41134e597e51
files src/ibm.h src/vid_s3_virge.c src/vid_s3_virge.h src/video.c src/win.c
diffstat 5 files changed, 1208 insertions(+), 44 deletions(-) [+]
line diff
     1.1 --- a/src/ibm.h	Sun May 11 14:09:13 2014 +0100
     1.2 +++ b/src/ibm.h	Mon May 26 18:13:23 2014 +0100
     1.3 @@ -343,6 +343,7 @@
     1.4  #define GFX_OTI067     15 /*Oak OTI-067*/
     1.5  #define GFX_MACH64GX   16 /*ATI Graphics Pro Turbo (Mach64)*/
     1.6  #define GFX_CL_GD5429  17 /*Cirrus Logic CL-GD5429*/
     1.7 +#define GFX_VIRGEDX    18 /*S3 Virge/DX*/
     1.8  
     1.9  int gfxcard;
    1.10  
     2.1 --- a/src/vid_s3_virge.c	Sun May 11 14:09:13 2014 +0100
     2.2 +++ b/src/vid_s3_virge.c	Mon May 26 18:13:23 2014 +0100
     2.3 @@ -1,6 +1,4 @@
     2.4 -/*S3 ViRGE emulation
     2.5 -
     2.6 -  The SVGA core is largely the same as older S3 chips, but the blitter is totally different*/
     2.7 +/*S3 ViRGE emulation*/
     2.8  #include <stdlib.h>
     2.9  #include "ibm.h"
    2.10  #include "device.h"
    2.11 @@ -12,7 +10,6 @@
    2.12  #include "vid_s3_virge.h"
    2.13  #include "vid_svga.h"
    2.14  #include "vid_svga_render.h"
    2.15 -//#include "vid_sdac_ramdac.h"
    2.16  
    2.17  typedef struct virge_t
    2.18  {
    2.19 @@ -34,7 +31,11 @@
    2.20          uint32_t linear_base, linear_size;
    2.21  
    2.22          uint8_t pci_regs[256];
    2.23 -        
    2.24 +
    2.25 +        int is_375;
    2.26 +
    2.27 +        int pixel_count, tri_count;
    2.28 +                        
    2.29          struct
    2.30          {
    2.31                  uint32_t src_base;
    2.32 @@ -69,6 +70,39 @@
    2.33                  uint32_t pattern_8[8*8];
    2.34                  uint32_t pattern_16[8*8];
    2.35                  uint32_t pattern_32[8*8];
    2.36 +                
    2.37 +                
    2.38 +                uint32_t z_base;
    2.39 +                uint32_t z_str;
    2.40 +
    2.41 +                uint32_t tex_base;
    2.42 +                uint32_t tex_bdr_clr;
    2.43 +                uint32_t tbv, tbu;
    2.44 +                int32_t TdVdX, TdUdX;
    2.45 +                int32_t TdVdY, TdUdY;
    2.46 +                uint32_t tus, tvs;
    2.47 +
    2.48 +                int32_t TdZdX, TdZdY;
    2.49 +                uint32_t tzs;
    2.50 +
    2.51 +                int32_t TdWdX, TdWdY;
    2.52 +                uint32_t tws;
    2.53 +                
    2.54 +                int32_t TdDdX, TdDdY;
    2.55 +                uint32_t tds;
    2.56 +                
    2.57 +                int16_t TdGdX, TdBdX, TdRdX, TdAdX;
    2.58 +                int16_t TdGdY, TdBdY, TdRdY, TdAdY;
    2.59 +                uint32_t tgs, tbs, trs, tas;
    2.60 +                                
    2.61 +                uint32_t TdXdY12;
    2.62 +                uint32_t txend12;
    2.63 +                uint32_t TdXdY01;
    2.64 +                uint32_t txend01;
    2.65 +                uint32_t TdXdY02;
    2.66 +                uint32_t txs;
    2.67 +                uint32_t tys;
    2.68 +                int ty01, ty12, tlr;
    2.69          } s3d;
    2.70          
    2.71          struct
    2.72 @@ -104,6 +138,8 @@
    2.73  
    2.74  static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat);
    2.75  
    2.76 +static void s3_virge_triangle(virge_t *virge);
    2.77 +
    2.78  static uint8_t  s3_virge_mmio_read(uint32_t addr, void *p);
    2.79  static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p);
    2.80  static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *p);
    2.81 @@ -130,6 +166,8 @@
    2.82          CMD_SET_ITA_BYTE = (0 << 10),
    2.83          CMD_SET_ITA_WORD = (1 << 10),
    2.84          CMD_SET_ITA_DWORD = (2 << 10),
    2.85 +        
    2.86 +        CMD_SET_ZB_MODE = (3 << 24),
    2.87  
    2.88          CMD_SET_XP = (1 << 25),
    2.89          CMD_SET_YP = (1 << 26),
    2.90 @@ -137,6 +175,10 @@
    2.91          CMD_SET_COMMAND_MASK = (15 << 27)
    2.92  };
    2.93  
    2.94 +#define CMD_SET_ABC_SRC    (1 << 18)
    2.95 +#define CMD_SET_ABC_ENABLE (1 << 19)
    2.96 +#define CMD_SET_TWE        (1 << 26)
    2.97 +
    2.98  enum
    2.99  {
   2.100          CMD_SET_COMMAND_BITBLT = (0 << 27),
   2.101 @@ -281,11 +323,12 @@
   2.102  {
   2.103          virge_t *virge = (virge_t *)p;
   2.104          svga_t *svga = &virge->svga;
   2.105 +        uint8_t ret;
   2.106          
   2.107          if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1)) 
   2.108                  addr ^= 0x60;
   2.109  
   2.110 -//        if (addr != 0x3da) pclog("S3 in %04X %04X:%08X\n", addr, CS, pc);
   2.111 +//        if (addr != 0x3da) pclog("S3 in %04X %04X:%08X  ", addr, CS, pc);
   2.112          switch (addr)
   2.113          {
   2.114                  //case 0x3C6: case 0x3C7: case 0x3C8: case 0x3C9:
   2.115 @@ -294,29 +337,38 @@
   2.116  
   2.117                  case 0x3c5:
   2.118                  if (svga->seqaddr >= 0x10)
   2.119 -                   return svga->seqregs[svga->seqaddr & 0x1f];
   2.120 +                        ret = svga->seqregs[svga->seqaddr & 0x1f];
   2.121 +                else
   2.122 +                        ret = svga_in(addr, svga);
   2.123                  break;
   2.124  
   2.125                  case 0x3D4:
   2.126 -                return svga->crtcreg;
   2.127 +                ret = svga->crtcreg;
   2.128 +                break;
   2.129                  case 0x3D5:
   2.130  //                pclog("Read CRTC R%02X %04X:%04X (%02x)\n", svga->crtcreg, CS, pc, svga->crtc[svga->crtcreg]);
   2.131                  switch (svga->crtcreg)
   2.132                  {
   2.133 -                        case 0x2d: return virge->virge_id_high; /*Extended chip ID*/
   2.134 -                        case 0x2e: return virge->virge_id_low;  /*New chip ID*/
   2.135 -                        case 0x2f: return virge->virge_rev;
   2.136 -                        case 0x30: return virge->virge_id;      /*Chip ID*/
   2.137 -                        case 0x31: return (svga->crtc[0x31] & 0xcf) | ((virge->ma_ext & 3) << 4);
   2.138 -                        case 0x35: return (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf);
   2.139 -                        case 0x36: return (svga->crtc[0x36] & 0xfc) | 2; /*PCI bus*/
   2.140 -                        case 0x51: return (svga->crtc[0x51] & 0xf0) | ((virge->bank >> 2) & 0xc) | ((virge->ma_ext >> 2) & 3);
   2.141 -                        case 0x69: return virge->ma_ext;
   2.142 -                        case 0x6a: return virge->bank;
   2.143 +                        case 0x2d: ret = virge->virge_id_high; break; /*Extended chip ID*/
   2.144 +                        case 0x2e: ret = virge->virge_id_low;  break; /*New chip ID*/
   2.145 +                        case 0x2f: ret = virge->virge_rev;     break; 
   2.146 +                        case 0x30: ret = virge->virge_id;      break; /*Chip ID*/
   2.147 +                        case 0x31: ret = (svga->crtc[0x31] & 0xcf) | ((virge->ma_ext & 3) << 4); break;
   2.148 +                        case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break;
   2.149 +                        case 0x36: ret = (svga->crtc[0x36] & 0xfc) | 2; break; /*PCI bus*/
   2.150 +                        case 0x51: ret = (svga->crtc[0x51] & 0xf0) | ((virge->bank >> 2) & 0xc) | ((virge->ma_ext >> 2) & 3); break;
   2.151 +                        case 0x69: ret = virge->ma_ext; break;
   2.152 +                        case 0x6a: ret = virge->bank; break;
   2.153 +                        default:   ret = svga->crtc[svga->crtcreg]; break;
   2.154                  }
   2.155 -                return svga->crtc[svga->crtcreg];
   2.156 +                break;
   2.157 +                
   2.158 +                default:
   2.159 +                ret = svga_in(addr, svga);
   2.160 +                break; 
   2.161          }
   2.162 -        return svga_in(addr, svga);
   2.163 +//        if (addr != 0x3da) pclog("%02X\n", ret);
   2.164 +        return ret;
   2.165  }
   2.166  
   2.167  static void s3_virge_recalctimings(svga_t *svga)
   2.168 @@ -335,7 +387,7 @@
   2.169          if ((svga->crtc[0x67] & 0xc) != 0xc) /*VGA mode*/
   2.170          {
   2.171                  svga->ma_latch |= (virge->ma_ext << 16);
   2.172 -
   2.173 +pclog("VGA mode\n");
   2.174                  if (svga->crtc[0x51] & 0x30)      svga->rowoffset += (svga->crtc[0x51] & 0x30) << 4;
   2.175                  else if (svga->crtc[0x43] & 0x04) svga->rowoffset += 0x100;
   2.176                  if (!svga->rowoffset) svga->rowoffset = 256;
   2.177 @@ -379,7 +431,9 @@
   2.178                          svga->ma_latch = virge->streams.pri_fb1 >> 2;
   2.179                  else
   2.180                          svga->ma_latch = virge->streams.pri_fb0 >> 2;
   2.181 -                
   2.182 +                        
   2.183 +                svga->hdisp = virge->streams.pri_w;
   2.184 +pclog("Streams mode   x_disp=%i\n", svga->hdisp);                
   2.185                  svga->rowoffset = virge->streams.pri_stride >> 3;
   2.186  
   2.187                  switch ((virge->streams.pri_ctrl >> 24) & 0x7)
   2.188 @@ -388,9 +442,11 @@
   2.189                          svga->render = svga_render_8bpp_highres; 
   2.190                          break;
   2.191                          case 3: /*KRGB-16 (1.5.5.5)*/ 
   2.192 +                        svga->htotal >>= 1;
   2.193                          svga->render = svga_render_15bpp_highres; 
   2.194                          break;
   2.195                          case 5: /*RGB-16 (5.6.5)*/ 
   2.196 +                        svga->htotal >>= 1;
   2.197                          svga->render = svga_render_16bpp_highres; 
   2.198                          break;
   2.199                          case 6: /*RGB-24 (8.8.8)*/ 
   2.200 @@ -536,7 +592,7 @@
   2.201  {
   2.202          virge_t *virge = (virge_t *)p;
   2.203          uint32_t ret = 0xffffffff;
   2.204 -//        pclog("New MMIO readl %08X\n", addr);
   2.205 +//        pclog("New MMIO readl %08X %04X(%08X):%08X  ", addr, CS, cs, pc);
   2.206          switch (addr & 0xfffc)
   2.207          {
   2.208                  case 0x8180:
   2.209 @@ -607,7 +663,7 @@
   2.210                  break;
   2.211                  
   2.212                  case 0x8504:
   2.213 -                ret = (0x1f << 8) | (1 << 13);
   2.214 +                ret = (0x10 << 8) | (1 << 13);
   2.215                  break;
   2.216                  case 0xa4d4:
   2.217                  ret = virge->s3d.src_base;
   2.218 @@ -656,8 +712,9 @@
   2.219                  break;
   2.220                  
   2.221                  default:
   2.222 -                return s3_virge_mmio_read_w(addr, p) | (s3_virge_mmio_read_w(addr + 2, p) << 16);
   2.223 +                ret = s3_virge_mmio_read_w(addr, p) | (s3_virge_mmio_read_w(addr + 2, p) << 16);
   2.224          }
   2.225 +//        pclog("%02x\n", ret);
   2.226          return ret;
   2.227  }
   2.228  static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p)
   2.229 @@ -665,8 +722,8 @@
   2.230          virge_t *virge = (virge_t *)p;
   2.231          svga_t *svga = &virge->svga;
   2.232          
   2.233 -//        pclog("New MMIO writeb %08X %02X\n", addr, val);
   2.234 -        
   2.235 +//        pclog("New MMIO writeb %08X %02X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
   2.236 +       
   2.237          if ((addr & 0xfffc) < 0x8000)
   2.238                  s3_virge_bitblt(virge, 8, val);
   2.239          else switch (addr & 0xffff)
   2.240 @@ -692,7 +749,7 @@
   2.241  static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p)
   2.242  {
   2.243          virge_t *virge = (virge_t *)p;
   2.244 -//        pclog("New MMIO writew %08X %04X\n", addr, val);
   2.245 +//        pclog("New MMIO writew %08X %04X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
   2.246          if ((addr & 0xfffc) < 0x8000)
   2.247          {
   2.248                  if (virge->s3d.cmd_set & CMD_SET_MS)
   2.249 @@ -713,7 +770,7 @@
   2.250          virge_t *virge = (virge_t *)p;
   2.251          svga_t *svga = &virge->svga;
   2.252  //        if ((addr & 0xfffc) >= 0x8000)
   2.253 -//                pclog("New MMIO writel %08X %08X\n", addr, val);
   2.254 +//                pclog("New MMIO writel %08X %08X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
   2.255  
   2.256          if ((addr & 0xfffc) < 0x8000)
   2.257          {
   2.258 @@ -760,6 +817,7 @@
   2.259                  svga->fullchange = changeframecount;
   2.260                  break;
   2.261                  case 0x81cc:
   2.262 +                        pclog("Write buffer_ctrl %08x\n", val);
   2.263                  virge->streams.buffer_ctrl = val;
   2.264                  s3_virge_recalctimings(svga);
   2.265                  break;
   2.266 @@ -867,18 +925,18 @@
   2.267                  case 0xa4d4: case 0xa8d4:
   2.268                  virge->s3d.src_base = val & 0x3ffff8;
   2.269                  break;
   2.270 -                case 0xa4d8: case 0xa8d8:
   2.271 +                case 0xa4d8: case 0xa8d8: case 0xb4d8:
   2.272                  virge->s3d.dest_base = val & 0x3ffff8;
   2.273                  break;
   2.274 -                case 0xa4dc: case 0xa8dc:
   2.275 +                case 0xa4dc: case 0xa8dc: case 0xb4dc:
   2.276                  virge->s3d.clip_l = (val >> 16) & 0x7ff;
   2.277                  virge->s3d.clip_r = val & 0x7ff;
   2.278                  break;
   2.279 -                case 0xa4e0: case 0xa8e0:
   2.280 +                case 0xa4e0: case 0xa8e0: case 0xb4e0:
   2.281                  virge->s3d.clip_t = (val >> 16) & 0x7ff;
   2.282                  virge->s3d.clip_b = val & 0x7ff;
   2.283                  break;
   2.284 -                case 0xa4e4: case 0xa8e4:
   2.285 +                case 0xa4e4: case 0xa8e4: case 0xb4e4:
   2.286                  virge->s3d.dest_str = (val >> 16) & 0xff8;
   2.287                  virge->s3d.src_str = val & 0xff8;
   2.288                  break;
   2.289 @@ -938,6 +996,128 @@
   2.290                  if (virge->s3d.cmd_set & CMD_SET_AE)
   2.291                          s3_virge_bitblt(virge, -1, 0);
   2.292                  break;
   2.293 +                
   2.294 +                case 0xb4d4:
   2.295 +                virge->s3d.z_base = val & 0x3ffff8;
   2.296 +                break;
   2.297 +                case 0xb4e8:
   2.298 +                virge->s3d.z_str = val & 0xff8;
   2.299 +                break;
   2.300 +                case 0xb4ec:
   2.301 +                virge->s3d.tex_base = val & 0x3ffff8;
   2.302 +                break;
   2.303 +                case 0xb4f0:
   2.304 +                virge->s3d.tex_bdr_clr = val & 0xffffff;
   2.305 +                break;
   2.306 +                case 0xb500:
   2.307 +                virge->s3d.cmd_set = val;
   2.308 +                if (!(val & CMD_SET_AE))
   2.309 +                        s3_virge_triangle(virge);
   2.310 +                break;
   2.311 +                case 0xb504:
   2.312 +                virge->s3d.tbv = val & 0xfffff;
   2.313 +                break;
   2.314 +                case 0xb508:
   2.315 +                virge->s3d.tbu = val & 0xfffff;
   2.316 +                break;
   2.317 +                case 0xb50c:
   2.318 +                virge->s3d.TdWdX = val;
   2.319 +                break;
   2.320 +                case 0xb510:
   2.321 +                virge->s3d.TdWdY = val;
   2.322 +                break;
   2.323 +                case 0xb514:
   2.324 +                virge->s3d.tws = val;
   2.325 +                break;
   2.326 +                case 0xb518:
   2.327 +                virge->s3d.TdDdX = val;
   2.328 +                break;
   2.329 +                case 0xb51c:
   2.330 +                virge->s3d.TdVdX = val;
   2.331 +                break;
   2.332 +                case 0xb520:
   2.333 +                virge->s3d.TdUdX = val;
   2.334 +                break;
   2.335 +                case 0xb524:
   2.336 +                virge->s3d.TdDdY = val;
   2.337 +                break;
   2.338 +                case 0xb528:
   2.339 +                virge->s3d.TdVdY = val;
   2.340 +                break;
   2.341 +                case 0xb52c:
   2.342 +                virge->s3d.TdUdY = val;
   2.343 +                break;
   2.344 +                case 0xb530:
   2.345 +                virge->s3d.tds = val;
   2.346 +                break;
   2.347 +                case 0xb534:
   2.348 +                virge->s3d.tvs = val;
   2.349 +                break;
   2.350 +                case 0xb538:
   2.351 +                virge->s3d.tus = val;
   2.352 +                break;
   2.353 +                case 0xb53c:
   2.354 +                virge->s3d.TdGdX = val >> 16;
   2.355 +                virge->s3d.TdBdX = val & 0xffff;
   2.356 +                break;
   2.357 +                case 0xb540:
   2.358 +                virge->s3d.TdAdX = val >> 16;
   2.359 +                virge->s3d.TdRdX = val & 0xffff;
   2.360 +                break;
   2.361 +                case 0xb544:
   2.362 +                virge->s3d.TdGdY = val >> 16;
   2.363 +                virge->s3d.TdBdY = val & 0xffff;
   2.364 +                break;
   2.365 +                case 0xb548:
   2.366 +                virge->s3d.TdAdY = val >> 16;
   2.367 +                virge->s3d.TdRdY = val & 0xffff;
   2.368 +                break;
   2.369 +                case 0xb54c:
   2.370 +                virge->s3d.tgs = (val >> 16) & 0xffff;
   2.371 +                virge->s3d.tbs = val & 0xffff;
   2.372 +                break;
   2.373 +                case 0xb550:
   2.374 +                virge->s3d.tas = (val >> 16) & 0xffff;
   2.375 +                virge->s3d.trs = val & 0xffff;
   2.376 +                break;
   2.377 +                
   2.378 +                case 0xb554:
   2.379 +                virge->s3d.TdZdX = val;
   2.380 +                break;
   2.381 +                case 0xb558:
   2.382 +                virge->s3d.TdZdY = val;
   2.383 +                break;
   2.384 +                case 0xb55c:
   2.385 +                virge->s3d.tzs = val;
   2.386 +                break;
   2.387 +                case 0xb560:
   2.388 +                virge->s3d.TdXdY12 = val;
   2.389 +                break;
   2.390 +                case 0xb564:
   2.391 +                virge->s3d.txend12 = val;
   2.392 +                break;
   2.393 +                case 0xb568:
   2.394 +                virge->s3d.TdXdY01 = val;
   2.395 +                break;
   2.396 +                case 0xb56c:
   2.397 +                virge->s3d.txend01 = val;
   2.398 +                break;
   2.399 +                case 0xb570:
   2.400 +                virge->s3d.TdXdY02 = val;
   2.401 +                break;
   2.402 +                case 0xb574:
   2.403 +                virge->s3d.txs = val;
   2.404 +                break;
   2.405 +                case 0xb578:
   2.406 +                virge->s3d.tys = val;
   2.407 +                break;
   2.408 +                case 0xb57c:
   2.409 +                virge->s3d.ty01 = (val >> 16) & 0x7ff;
   2.410 +                virge->s3d.ty12 = val & 0x7ff;
   2.411 +                virge->s3d.tlr = val >> 31;
   2.412 +                if (virge->s3d.cmd_set & CMD_SET_AE)
   2.413 +                        s3_virge_triangle(virge);
   2.414 +                break;
   2.415          }
   2.416  }
   2.417  
   2.418 @@ -958,6 +1138,10 @@
   2.419                  }                                                                               \
   2.420          } while (0)
   2.421  
   2.422 +#define Z_READ(addr) *(uint16_t *)&vram[addr & 0x3fffff]
   2.423 +
   2.424 +#define Z_WRITE(addr, val) if (!(virge->s3d.cmd_set & CMD_SET_ZB_MODE)) *(uint16_t *)&vram[addr & 0x3fffff] = val
   2.425 +
   2.426  #define CLIP(x, y)                                              \
   2.427          do                                                      \
   2.428          {                                                       \
   2.429 @@ -969,6 +1153,23 @@
   2.430                          update = 0;                             \
   2.431          } while (0)
   2.432  
   2.433 +#define Z_CLIP(Zzb, Zs)                                                 \
   2.434 +        do                                                              \
   2.435 +        {                                                               \
   2.436 +                if (!(virge->s3d.cmd_set & CMD_SET_ZB_MODE))            \
   2.437 +                switch ((virge->s3d.cmd_set >> 20) & 7)                 \
   2.438 +                {                                                       \
   2.439 +                        case 0: update = 0; break;                      \
   2.440 +                        case 1: if (Zs <= Zzb) update = 0; else Zzb = Zs; break;       \
   2.441 +                        case 2: if (Zs != Zzb) update = 0; else Zzb = Zs; break;       \
   2.442 +                        case 3: if (Zs <  Zzb) update = 0; else Zzb = Zs; break;       \
   2.443 +                        case 4: if (Zs >= Zzb) update = 0; else Zzb = Zs; break;       \
   2.444 +                        case 5: if (Zs == Zzb) update = 0; else Zzb = Zs; break;       \
   2.445 +                        case 6: if (Zs >  Zzb) update = 0; else Zzb = Zs; break;       \
   2.446 +                        case 7: update = 1; Zzb = Zs; break;                      \
   2.447 +                }                                                       \
   2.448 +        } while (0)
   2.449 +        
   2.450  #define MIX()                                                   \
   2.451          do                                                      \
   2.452          {                                                       \
   2.453 @@ -1089,11 +1290,16 @@
   2.454                          virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
   2.455                          virge->s3d.data_left_count = 0;
   2.456                          
   2.457 -  /*                      pclog("BitBlt start %i,%i %i,%i %02X\n", virge->s3d.dest_x,
   2.458 +/*                        pclog("BitBlt start %i,%i %i,%i %i,%i %02X %x %x\n",
   2.459 +                                                                 virge->s3d.src_x,
   2.460 +                                                                 virge->s3d.src_y,
   2.461 +                                                                 virge->s3d.dest_x,
   2.462                                                                   virge->s3d.dest_y,
   2.463                                                                   virge->s3d.w,
   2.464                                                                   virge->s3d.h,
   2.465 -                                                                 virge->s3d.rop);*/
   2.466 +                                                                 virge->s3d.rop,
   2.467 +                                                                 virge->s3d.src_base,
   2.468 +                                                                 virge->s3d.dest_base);*/
   2.469                          
   2.470                          if (virge->s3d.cmd_set & CMD_SET_IDS)
   2.471                                  return;
   2.472 @@ -1214,11 +1420,11 @@
   2.473                          virge->s3d.h = virge->s3d.r_height;
   2.474                          virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
   2.475                          
   2.476 -/*                        pclog("RctFll start %i,%i %i,%i %02X\n", virge->s3d.dest_x,
   2.477 +/*                        pclog("RctFll start %i,%i %i,%i %02X %08x\n", virge->s3d.dest_x,
   2.478                                                                   virge->s3d.dest_y,
   2.479                                                                   virge->s3d.w,
   2.480                                                                   virge->s3d.h,
   2.481 -                                                                 virge->s3d.rop);*/
   2.482 +                                                                 virge->s3d.rop, virge->s3d.dest_base);*/
   2.483                  }
   2.484  
   2.485                  while (count)
   2.486 @@ -1315,6 +1521,859 @@
   2.487          }
   2.488  }
   2.489  
   2.490 +#define RGB15_TO_24(val, r, g, b) b = (val & 0x1f) << 3;      \
   2.491 +                                  g = (val & 0x3e0) >> 2;     \
   2.492 +                                  r = (val & 0x7c00) >> 7
   2.493 +
   2.494 +#define RGB24_TO_24(val, r, g, b) b = val & 0xff;             \
   2.495 +                                  g = (val & 0xff00) >> 8;    \
   2.496 +                                  r = (val & 0xff0000) >> 16
   2.497 +
   2.498 +#define RGB15(r, g, b) ((((b) >> 3) & 0x1f) | ((((g) >> 3) & 0x1f) << 5) | ((((r) >> 3) & 0x1f) << 10))
   2.499 +
   2.500 +#define RGB24(r, g, b) ((b) | ((g) << 8) | ((r) << 16))
   2.501 +
   2.502 +typedef struct s3d_state_t
   2.503 +{
   2.504 +        int32_t r, g, b, a, u, v, d, w;
   2.505 +
   2.506 +        int32_t base_r, base_g, base_b, base_a, base_u, base_v, base_d, base_w;
   2.507 +        
   2.508 +        uint32_t base_z;
   2.509 +
   2.510 +        uint32_t tbu, tbv;
   2.511 +
   2.512 +        uint32_t cmd_set;
   2.513 +        int max_d;
   2.514 +        
   2.515 +        uint16_t *texture[10];
   2.516 +        
   2.517 +        uint32_t tex_bdr_clr;
   2.518 +        
   2.519 +        int32_t x1, x2;
   2.520 +        int y;
   2.521 +} s3d_state_t;
   2.522 +
   2.523 +typedef struct s3d_texture_state_t
   2.524 +{
   2.525 +        int level;
   2.526 +        int texture_shift;
   2.527 +        
   2.528 +        int32_t u, v;
   2.529 +} s3d_texture_state_t;
   2.530 +
   2.531 +static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, int *r_out, int *g_out, int *b_out, int *a_out);
   2.532 +static void (*tex_sample)(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out);
   2.533 +static void (*dest_pixel)(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out);
   2.534 +
   2.535 +#define MAX(a, b) ((a) > (b) ? (a) : (b))
   2.536 +#define MIN(a, b) ((a) < (b) ? (a) : (b))
   2.537 +
   2.538 +static int _x, _y;
   2.539 +
   2.540 +static void tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.541 +{
   2.542 +        int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
   2.543 +                     (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
   2.544 +        uint16_t val = state->texture[texture_state->level][offset];
   2.545 +
   2.546 +        if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000 && !(state->cmd_set & CMD_SET_TWE))
   2.547 +                val = state->tex_bdr_clr;
   2.548 +
   2.549 +        *r_out = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12);
   2.550 +        *g_out = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7);
   2.551 +        *b_out = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2);
   2.552 +        *a_out = (val & 0x8000) ? 0xff : 0;
   2.553 +}
   2.554 +
   2.555 +static void tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.556 +{
   2.557 +        int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
   2.558 +                     (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
   2.559 +        uint16_t val = state->texture[texture_state->level][offset];
   2.560 +
   2.561 +        if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000 && !(state->cmd_set & CMD_SET_TWE))
   2.562 +                val = state->tex_bdr_clr;
   2.563 +
   2.564 +        *r_out = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8);
   2.565 +        *g_out = (val & 0x00f0) | ((val & 0x00f0) >> 4);
   2.566 +        *b_out = ((val & 0x000f) << 4) | (val & 0x000f);
   2.567 +        *a_out = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12);
   2.568 +
   2.569 +}
   2.570 +
   2.571 +static void tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.572 +{
   2.573 +        int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
   2.574 +                     (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
   2.575 +        uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset];
   2.576 +
   2.577 +        if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000 && !(state->cmd_set & CMD_SET_TWE))
   2.578 +                val = state->tex_bdr_clr;
   2.579 +
   2.580 +        *r_out = (val >> 16) & 0xff;
   2.581 +        *g_out = (val >> 8)  & 0xff;
   2.582 +        *b_out =  val        & 0xff;
   2.583 +        *a_out = (val >> 24) & 0xff;
   2.584 +}
   2.585 +
   2.586 +static void tex_sample_normal(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.587 +{
   2.588 +        s3d_texture_state_t texture_state;
   2.589 +        
   2.590 +        texture_state.level = state->max_d;
   2.591 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.592 +        texture_state.u = state->u + state->tbu;
   2.593 +        texture_state.v = state->v + state->tbv;
   2.594 +
   2.595 +        tex_read(state, &texture_state, r_out, g_out, b_out, a_out);
   2.596 +}
   2.597 +
   2.598 +static void tex_sample_normal_filter(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.599 +{
   2.600 +        s3d_texture_state_t texture_state;
   2.601 +        int tex_offset;
   2.602 +        int r[4], g[4], b[4], a[4];
   2.603 +        int du, dv;
   2.604 +        int d[4];
   2.605 +
   2.606 +        texture_state.level = state->max_d;
   2.607 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.608 +        tex_offset = 1 << texture_state.texture_shift;
   2.609 +
   2.610 +        texture_state.u = state->u + state->tbu;
   2.611 +        texture_state.v = state->v + state->tbv;
   2.612 +        tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]);
   2.613 +        du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
   2.614 +        dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
   2.615 +
   2.616 +        texture_state.u = state->u + state->tbu + tex_offset;
   2.617 +        texture_state.v = state->v + state->tbv;
   2.618 +        tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]);
   2.619 +
   2.620 +        texture_state.u = state->u + state->tbu;
   2.621 +        texture_state.v = state->v + state->tbv + tex_offset;
   2.622 +        tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]);
   2.623 +
   2.624 +        texture_state.u = state->u + state->tbu + tex_offset;
   2.625 +        texture_state.v = state->v + state->tbv + tex_offset;
   2.626 +        tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]);
   2.627 +        
   2.628 +        d[0] = (256 - du) * (256 - dv);
   2.629 +        d[1] =  du * (256 - dv);
   2.630 +        d[2] = (256 - du) * dv;
   2.631 +        d[3] = du * dv;
   2.632 +        
   2.633 +        *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16;
   2.634 +        *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16;
   2.635 +        *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16;
   2.636 +        *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16;
   2.637 +}
   2.638 +
   2.639 +static void tex_sample_mipmap(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.640 +{
   2.641 +        s3d_texture_state_t texture_state;
   2.642 +
   2.643 +        texture_state.level = MAX(MIN(9 - ((state->d >> 27) & 0xf), state->max_d), 0);
   2.644 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.645 +        texture_state.u = state->u + state->tbu;
   2.646 +        texture_state.v = state->v + state->tbv;
   2.647 +
   2.648 +        tex_read(state, &texture_state, r_out, g_out, b_out, a_out);
   2.649 +}
   2.650 +
   2.651 +static void tex_sample_mipmap_filter(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.652 +{
   2.653 +        s3d_texture_state_t texture_state;
   2.654 +        int tex_offset;
   2.655 +        int r[4], g[4], b[4], a[4];
   2.656 +        int du, dv;
   2.657 +        int d[4];
   2.658 +
   2.659 +        texture_state.level = MAX(MIN(9 - ((state->d >> 27) & 0xf), state->max_d), 0);
   2.660 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.661 +        tex_offset = 1 << texture_state.texture_shift;
   2.662 +        
   2.663 +        texture_state.u = state->u + state->tbu;
   2.664 +        texture_state.v = state->v + state->tbv;
   2.665 +        tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]);
   2.666 +        du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
   2.667 +        dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
   2.668 +
   2.669 +        texture_state.u = state->u + state->tbu + tex_offset;
   2.670 +        texture_state.v = state->v + state->tbv;
   2.671 +        tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]);
   2.672 +
   2.673 +        texture_state.u = state->u + state->tbu;
   2.674 +        texture_state.v = state->v + state->tbv + tex_offset;
   2.675 +        tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]);
   2.676 +
   2.677 +        texture_state.u = state->u + state->tbu + tex_offset;
   2.678 +        texture_state.v = state->v + state->tbv + tex_offset;
   2.679 +        tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]);
   2.680 +
   2.681 +        d[0] = (256 - du) * (256 - dv);
   2.682 +        d[1] =  du * (256 - dv);
   2.683 +        d[2] = (256 - du) * dv;
   2.684 +        d[3] = du * dv;
   2.685 +        
   2.686 +        *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16;
   2.687 +        *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16;
   2.688 +        *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16;
   2.689 +        *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16;
   2.690 +}
   2.691 +
   2.692 +static void tex_sample_persp_normal(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.693 +{
   2.694 +        s3d_texture_state_t texture_state;
   2.695 +        int32_t w = 0;
   2.696 +
   2.697 +        if (state->w)
   2.698 +                w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
   2.699 +        
   2.700 +        texture_state.level = state->max_d;
   2.701 +        texture_state.texture_shift = 18 + (9 - texture_state.level);      
   2.702 +        texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
   2.703 +        texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
   2.704 +
   2.705 +        tex_read(state, &texture_state, r_out, g_out, b_out, a_out);
   2.706 +}
   2.707 +
   2.708 +static void tex_sample_persp_normal_filter(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.709 +{
   2.710 +        s3d_texture_state_t texture_state;
   2.711 +        int32_t w = 0, u, v;
   2.712 +        int tex_offset;
   2.713 +        int r[4], g[4], b[4], a[4];
   2.714 +        int du, dv;
   2.715 +        int d[4];
   2.716 +
   2.717 +        if (state->w)
   2.718 +                w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
   2.719 +
   2.720 +        u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
   2.721 +        v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
   2.722 +
   2.723 +        texture_state.level = state->max_d;
   2.724 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.725 +        tex_offset = 1 << texture_state.texture_shift;
   2.726 +        
   2.727 +        texture_state.u = u;
   2.728 +        texture_state.v = v;
   2.729 +        tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]);
   2.730 +        du = (u >> (texture_state.texture_shift - 8)) & 0xff;
   2.731 +        dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
   2.732 +
   2.733 +        texture_state.u = u + tex_offset;
   2.734 +        texture_state.v = v;
   2.735 +        tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]);
   2.736 +
   2.737 +        texture_state.u = u;
   2.738 +        texture_state.v = v + tex_offset;
   2.739 +        tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]);
   2.740 +
   2.741 +        texture_state.u = u + tex_offset;
   2.742 +        texture_state.v = v + tex_offset;
   2.743 +        tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]);
   2.744 +
   2.745 +        d[0] = (256 - du) * (256 - dv);
   2.746 +        d[1] =  du * (256 - dv);
   2.747 +        d[2] = (256 - du) * dv;
   2.748 +        d[3] = du * dv;
   2.749 +        
   2.750 +        *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16;
   2.751 +        *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16;
   2.752 +        *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16;
   2.753 +        *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16;
   2.754 +}
   2.755 +
   2.756 +static void tex_sample_persp_normal_375(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.757 +{
   2.758 +        s3d_texture_state_t texture_state;
   2.759 +        int32_t w = 0;
   2.760 +
   2.761 +        if (state->w)
   2.762 +                w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
   2.763 +        
   2.764 +        texture_state.level = state->max_d;
   2.765 +        texture_state.texture_shift = 18 + (9 - texture_state.level);      
   2.766 +        texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
   2.767 +        texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
   2.768 +
   2.769 +        tex_read(state, &texture_state, r_out, g_out, b_out, a_out);
   2.770 +}
   2.771 +
   2.772 +static void tex_sample_persp_normal_filter_375(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.773 +{
   2.774 +        s3d_texture_state_t texture_state;
   2.775 +        int32_t w = 0, u, v;
   2.776 +        int tex_offset;
   2.777 +        int r[4], g[4], b[4], a[4];
   2.778 +        int du, dv;
   2.779 +        int d[4];
   2.780 +
   2.781 +        if (state->w)
   2.782 +                w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
   2.783 +
   2.784 +        u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
   2.785 +        v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
   2.786 +        
   2.787 +        texture_state.level = state->max_d;
   2.788 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.789 +        tex_offset = 1 << texture_state.texture_shift;
   2.790 +
   2.791 +        texture_state.u = u;
   2.792 +        texture_state.v = v;
   2.793 +        tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]);
   2.794 +        du = (u >> (texture_state.texture_shift - 8)) & 0xff;
   2.795 +        dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
   2.796 +
   2.797 +        texture_state.u = u + tex_offset;
   2.798 +        texture_state.v = v;
   2.799 +        tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]);
   2.800 +
   2.801 +        texture_state.u = u;
   2.802 +        texture_state.v = v + tex_offset;
   2.803 +        tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]);
   2.804 +
   2.805 +        texture_state.u = u + tex_offset;
   2.806 +        texture_state.v = v + tex_offset;
   2.807 +        tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]);
   2.808 +
   2.809 +        d[0] = (256 - du) * (256 - dv);
   2.810 +        d[1] =  du * (256 - dv);
   2.811 +        d[2] = (256 - du) * dv;
   2.812 +        d[3] = du * dv;
   2.813 +        
   2.814 +        *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16;
   2.815 +        *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16;
   2.816 +        *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16;
   2.817 +        *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16;
   2.818 +}
   2.819 +
   2.820 +
   2.821 +static void tex_sample_persp_mipmap(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.822 +{
   2.823 +        s3d_texture_state_t texture_state;
   2.824 +        int32_t w = 0;
   2.825 +
   2.826 +        if (state->w)
   2.827 +                w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
   2.828 +        
   2.829 +        texture_state.level = MAX(MIN(9 - ((state->d >> 27) & 0xf), state->max_d), 0);
   2.830 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.831 +        texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
   2.832 +        texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
   2.833 +
   2.834 +        tex_read(state, &texture_state, r_out, g_out, b_out, a_out);
   2.835 +}
   2.836 +
   2.837 +static void tex_sample_persp_mipmap_filter(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.838 +{
   2.839 +        s3d_texture_state_t texture_state;
   2.840 +        int32_t w = 0, u, v;
   2.841 +        int tex_offset;
   2.842 +        int r[4], g[4], b[4], a[4];
   2.843 +        int du, dv;
   2.844 +        int d[4];
   2.845 +
   2.846 +        if (state->w)
   2.847 +                w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
   2.848 +
   2.849 +        u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
   2.850 +        v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
   2.851 +        
   2.852 +        texture_state.level = MAX(MIN(9 - ((state->d >> 27) & 0xf), state->max_d), 0);
   2.853 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.854 +        tex_offset = 1 << texture_state.texture_shift;
   2.855 +
   2.856 +        texture_state.u = u;
   2.857 +        texture_state.v = v;
   2.858 +        tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]);
   2.859 +        du = (u >> (texture_state.texture_shift - 8)) & 0xff;
   2.860 +        dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
   2.861 +
   2.862 +        texture_state.u = u + tex_offset;
   2.863 +        texture_state.v = v;
   2.864 +        tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]);
   2.865 +
   2.866 +        texture_state.u = u;
   2.867 +        texture_state.v = v + tex_offset;
   2.868 +        tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]);
   2.869 +
   2.870 +        texture_state.u = u + tex_offset;
   2.871 +        texture_state.v = v + tex_offset;
   2.872 +        tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]);
   2.873 +
   2.874 +        d[0] = (256 - du) * (256 - dv);
   2.875 +        d[1] =  du * (256 - dv);
   2.876 +        d[2] = (256 - du) * dv;
   2.877 +        d[3] = du * dv;
   2.878 +        
   2.879 +        *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16;
   2.880 +        *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16;
   2.881 +        *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16;
   2.882 +        *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16;
   2.883 +}
   2.884 +
   2.885 +static void tex_sample_persp_mipmap_375(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.886 +{
   2.887 +        s3d_texture_state_t texture_state;
   2.888 +        int32_t w = 0;
   2.889 +
   2.890 +        if (state->w)
   2.891 +                w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
   2.892 +        
   2.893 +        texture_state.level = MAX(MIN(9 - ((state->d >> 27) & 0xf), state->max_d), 0);
   2.894 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.895 +        texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
   2.896 +        texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
   2.897 +
   2.898 +        tex_read(state, &texture_state, r_out, g_out, b_out, a_out);
   2.899 +}
   2.900 +
   2.901 +static void tex_sample_persp_mipmap_filter_375(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.902 +{
   2.903 +        s3d_texture_state_t texture_state;
   2.904 +        int32_t w = 0, u, v;
   2.905 +        int tex_offset;
   2.906 +        int r[4], g[4], b[4], a[4];
   2.907 +        int du, dv;
   2.908 +        int d[4];
   2.909 +
   2.910 +        if (state->w)
   2.911 +                w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
   2.912 +
   2.913 +        u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
   2.914 +        v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
   2.915 +        
   2.916 +        texture_state.level = MAX(MIN(9 - ((state->d >> 27) & 0xf), state->max_d), 0);
   2.917 +        texture_state.texture_shift = 18 + (9 - texture_state.level);
   2.918 +        tex_offset = 1 << texture_state.texture_shift;
   2.919 +        
   2.920 +        texture_state.u = u;
   2.921 +        texture_state.v = v;
   2.922 +        tex_read(state, &texture_state, &r[0], &g[0], &b[0], &a[0]);
   2.923 +        du = (u >> (texture_state.texture_shift - 8)) & 0xff;
   2.924 +        dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
   2.925 +
   2.926 +        texture_state.u = u + tex_offset;
   2.927 +        texture_state.v = v;
   2.928 +        tex_read(state, &texture_state, &r[1], &g[1], &b[1], &a[1]);
   2.929 +
   2.930 +        texture_state.u = u;
   2.931 +        texture_state.v = v + tex_offset;
   2.932 +        tex_read(state, &texture_state, &r[2], &g[2], &b[2], &a[2]);
   2.933 +
   2.934 +        texture_state.u = u + tex_offset;
   2.935 +        texture_state.v = v + tex_offset;
   2.936 +        tex_read(state, &texture_state, &r[3], &g[3], &b[3], &a[3]);
   2.937 +
   2.938 +        d[0] = (256 - du) * (256 - dv);
   2.939 +        d[1] =  du * (256 - dv);
   2.940 +        d[2] = (256 - du) * dv;
   2.941 +        d[3] = du * dv;
   2.942 +        
   2.943 +        *r_out = (r[0] * d[0] + r[1] * d[1] + r[2] * d[2] + r[3] * d[3]) >> 16;
   2.944 +        *g_out = (g[0] * d[0] + g[1] * d[1] + g[2] * d[2] + g[3] * d[3]) >> 16;
   2.945 +        *b_out = (b[0] * d[0] + b[1] * d[1] + b[2] * d[2] + b[3] * d[3]) >> 16;
   2.946 +        *a_out = (a[0] * d[0] + a[1] * d[1] + a[2] * d[2] + a[3] * d[3]) >> 16;
   2.947 +}
   2.948 +
   2.949 +
   2.950 +#define CLAMP_RGBA(r, g, b, a) do       \
   2.951 +        {                               \
   2.952 +                if ((r) < 0)            \
   2.953 +                        r = 0;          \
   2.954 +                if ((r) > 0xff)         \
   2.955 +                        r = 0xff;       \
   2.956 +                if ((g) < 0)            \
   2.957 +                        g = 0;          \
   2.958 +                if ((g) > 0xff)         \
   2.959 +                        g = 0xff;       \
   2.960 +                if ((b) < 0)            \
   2.961 +                        b = 0;          \
   2.962 +                if ((b) > 0xff)         \
   2.963 +                        b = 0xff;       \
   2.964 +                if ((a) < 0)            \
   2.965 +                        a = 0;          \
   2.966 +                if ((a) > 0xff)         \
   2.967 +                        a = 0xff;       \
   2.968 +        }                               \
   2.969 +        while (0)
   2.970 +
   2.971 +#define CLAMP_RGB(r, g, b) do           \
   2.972 +        {                               \
   2.973 +                if ((r) < 0)            \
   2.974 +                        r = 0;          \
   2.975 +                if ((r) > 0xff)         \
   2.976 +                        r = 0xff;       \
   2.977 +                if ((g) < 0)            \
   2.978 +                        g = 0;          \
   2.979 +                if ((g) > 0xff)         \
   2.980 +                        g = 0xff;       \
   2.981 +                if ((b) < 0)            \
   2.982 +                        b = 0;          \
   2.983 +                if ((b) > 0xff)         \
   2.984 +                        b = 0xff;       \
   2.985 +        }                               \
   2.986 +        while (0)
   2.987 +
   2.988 +static void dest_pixel_gouraud_shaded_triangle(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.989 +{
   2.990 +        *r_out = state->r >> 7;
   2.991 +        *g_out = state->g >> 7;
   2.992 +        *b_out = state->b >> 7;
   2.993 +        *a_out = state->a >> 7;
   2.994 +        CLAMP_RGBA(*r_out, *g_out, *b_out, *a_out);
   2.995 +}
   2.996 +
   2.997 +static void dest_pixel_unlit_texture_triangle(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
   2.998 +{
   2.999 +        tex_sample(state, r_out, g_out, b_out, a_out);
  2.1000 +
  2.1001 +        if (state->cmd_set & CMD_SET_ABC_SRC)
  2.1002 +                *a_out = state->a >> 7;
  2.1003 +}
  2.1004 +
  2.1005 +static void dest_pixel_lit_texture_decal(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
  2.1006 +{
  2.1007 +        tex_sample(state, r_out, g_out, b_out, a_out);
  2.1008 +
  2.1009 +        if (state->cmd_set & CMD_SET_ABC_SRC)
  2.1010 +                *a_out = state->a >> 7;
  2.1011 +}
  2.1012 +
  2.1013 +static void dest_pixel_lit_texture_reflection(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
  2.1014 +{
  2.1015 +        int tex_r, tex_g, tex_b, tex_a;
  2.1016 +        
  2.1017 +        tex_sample(state, &tex_r, &tex_g, &tex_b, &tex_a);
  2.1018 +
  2.1019 +        *r_out = state->r >> 7;
  2.1020 +        *g_out = state->g >> 7;
  2.1021 +        *b_out = state->b >> 7;
  2.1022 +        *a_out = state->a >> 7;
  2.1023 +        CLAMP_RGBA(*r_out, *g_out, *b_out, *a_out);
  2.1024 +
  2.1025 +        *(r_out) += tex_r;
  2.1026 +        *(g_out) += tex_g;
  2.1027 +        *(b_out) += tex_b;
  2.1028 +
  2.1029 +        CLAMP_RGB(*r_out, *g_out, *b_out);
  2.1030 +
  2.1031 +        if (!(state->cmd_set & CMD_SET_ABC_SRC))
  2.1032 +                *a_out = tex_a;
  2.1033 +}
  2.1034 +
  2.1035 +static void dest_pixel_lit_texture_modulate(s3d_state_t *state, int *r_out, int *g_out, int *b_out, int *a_out)
  2.1036 +{
  2.1037 +        int r = state->r >> 7, g = state->g >> 7, b = state->b >> 7, a = state->a >> 7;
  2.1038 +        
  2.1039 +        tex_sample(state, r_out, g_out, b_out, a_out);
  2.1040 +        
  2.1041 +        CLAMP_RGBA(r, g, b, a);
  2.1042 +        
  2.1043 +        *r_out = ((*r_out) * r) >> 8;
  2.1044 +        *g_out = ((*g_out) * g) >> 8;
  2.1045 +        *b_out = ((*b_out) * b) >> 8;
  2.1046 +
  2.1047 +        if (state->cmd_set & CMD_SET_ABC_SRC)
  2.1048 +                *a_out = a;               
  2.1049 +}
  2.1050 +
  2.1051 +static void tri(virge_t *virge, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2)
  2.1052 +{
  2.1053 +        uint8_t *vram = virge->svga.vram;
  2.1054 +
  2.1055 +        int x_dir = virge->s3d.tlr ? 1 : -1;
  2.1056 +        
  2.1057 +        int use_z = !(virge->s3d.cmd_set & CMD_SET_ZB_MODE);
  2.1058 +
  2.1059 +        int y_count = yc;
  2.1060 +        
  2.1061 +        int bpp = 1;
  2.1062 +        
  2.1063 +        uint32_t dest_offset = virge->s3d.dest_base + (state->y * virge->s3d.dest_str);
  2.1064 +        uint32_t z_offset = virge->s3d.z_base + (state->y * virge->s3d.z_str);
  2.1065 +                
  2.1066 +        for (; y_count > 0; y_count--)
  2.1067 +        {
  2.1068 +                int x = state->x1 >> 20;
  2.1069 +                int xe = state->x2 >> 20;
  2.1070 +                uint32_t z = state->base_z;
  2.1071 +                if (x != xe && (x_dir > 0 && x < xe) || (x_dir < 0 && x > xe))
  2.1072 +                {
  2.1073 +                        int dx = (x_dir > 0) ? 8 - ((state->x1 >> 16) & 0xf) : ((state->x1 >> 16) & 0xf) - 8;
  2.1074 +                        state->r = state->base_r + ((virge->s3d.TdRdX * dx) >> 4);
  2.1075 +                        state->g = state->base_g + ((virge->s3d.TdGdX * dx) >> 4);
  2.1076 +                        state->b = state->base_b + ((virge->s3d.TdBdX * dx) >> 4);
  2.1077 +                        state->a = state->base_a + ((virge->s3d.TdAdX * dx) >> 4);
  2.1078 +                        state->u = state->base_u + ((virge->s3d.TdUdX * dx) >> 4);
  2.1079 +                        state->v = state->base_v + ((virge->s3d.TdVdX * dx) >> 4);
  2.1080 +                        state->w = state->base_w + ((virge->s3d.TdWdX * dx) >> 4);
  2.1081 +                        state->d = state->base_d + ((virge->s3d.TdDdX * dx) >> 4);
  2.1082 +                        z += ((virge->s3d.TdZdX * dx) >> 4);
  2.1083 +//                        pclog("Draw Y=%i X=%i to XE=%i  %i   %08x %08x %08x %08x  %08x %08x %08x %08x  %i %08x\n", state->y, x, xe, dx, state->x1, state->x2, dx1, virge->s3d.TdWdX, state->u, state->v, virge->s3d.TdUdX, virge->s3d.TdUdY, dx, (virge->s3d.TdUdX * dx) >> 4);
  2.1084 +
  2.1085 +                        for (; x != ((xe + x_dir) & 0xfff); x = (x + x_dir) & 0xfff)
  2.1086 +                        {
  2.1087 +                                uint32_t dest_addr = dest_offset + (x << bpp);
  2.1088 +                                uint32_t z_addr = z_offset + (x << bpp);
  2.1089 +                                int update = 1;
  2.1090 +                                int16_t src_z;
  2.1091 +                                _x = x; _y = state->y;
  2.1092 +
  2.1093 +                                if (use_z)
  2.1094 +                                {
  2.1095 +                                        src_z = Z_READ(z_addr);
  2.1096 +                                        Z_CLIP(src_z, z >> 16);
  2.1097 +                                }
  2.1098 +                                CLIP(x, state->y);
  2.1099 +
  2.1100 +                                if (update)
  2.1101 +                                {
  2.1102 +                                        int dest_r, dest_g, dest_b, dest_a;
  2.1103 +                                        uint32_t dest_col;
  2.1104 +
  2.1105 +                                        dest_pixel(state, &dest_r, &dest_g, &dest_b, &dest_a);
  2.1106 +
  2.1107 +                                        if (virge->s3d.cmd_set & CMD_SET_ABC_ENABLE)
  2.1108 +                                        {
  2.1109 +                                                uint32_t src_col;
  2.1110 +                                                int src_r, src_g, src_b;
  2.1111 +                                                
  2.1112 +                                                switch (bpp)
  2.1113 +                                                {
  2.1114 +                                                        case 0: /*8 bpp*/
  2.1115 +                                                        /*Not implemented yet*/
  2.1116 +                                                        break;
  2.1117 +                                                        case 1: /*16 bpp*/
  2.1118 +                                                        src_col = *(uint16_t *)&vram[dest_addr & 0x3fffff];
  2.1119 +                                                        RGB15_TO_24(src_col, src_r, src_g, src_b);
  2.1120 +                                                        break;
  2.1121 +                                                        case 2: /*24 bpp*/
  2.1122 +                                                        src_col = (*(uint32_t *)&vram[dest_addr & 0x3fffff]) & 0xffffff;
  2.1123 +                                                        RGB24_TO_24(src_col, src_r, src_g, src_b);
  2.1124 +                                                        break;
  2.1125 +                                                }
  2.1126 +
  2.1127 +                                                dest_r = ((dest_r * dest_a) + (src_r * (255 - dest_a))) / 255;
  2.1128 +                                                dest_g = ((dest_g * dest_a) + (src_g * (255 - dest_a))) / 255;
  2.1129 +                                                dest_b = ((dest_b * dest_a) + (src_b * (255 - dest_a))) / 255;
  2.1130 +                                        }
  2.1131 +
  2.1132 +                                        switch (bpp)
  2.1133 +                                        {
  2.1134 +                                                case 0: /*8 bpp*/ 
  2.1135 +                                                /*Not implemented yet*/
  2.1136 +                                                break;
  2.1137 +                                                case 1: /*16 bpp*/
  2.1138 +                                                dest_col = RGB15(dest_r, dest_g, dest_b);
  2.1139 +                                                *(uint16_t *)&vram[dest_addr & 0x3fffff] = dest_col;
  2.1140 +                                                virge->svga.changedvram[(dest_addr & 0x3fffff) >> 12] = changeframecount;
  2.1141 +                                                break;
  2.1142 +                                                case 2: /*24 bpp*/
  2.1143 +                                                dest_col = RGB24(dest_r, dest_g, dest_b);
  2.1144 +                                                *(uint32_t *)&vram[dest_addr & 0x3fffff] = dest_col;
  2.1145 +                                                virge->svga.changedvram[(dest_addr & 0x3fffff) >> 12] = changeframecount;
  2.1146 +                                                break;
  2.1147 +                                        }
  2.1148 +
  2.1149 +                                        if (use_z)
  2.1150 +                                                Z_WRITE(z_addr, src_z);
  2.1151 +                                }
  2.1152 +
  2.1153 +                                z += virge->s3d.TdZdX;
  2.1154 +                                state->u += virge->s3d.TdUdX;
  2.1155 +                                state->v += virge->s3d.TdVdX;
  2.1156 +                                state->r += virge->s3d.TdRdX;
  2.1157 +                                state->g += virge->s3d.TdGdX;
  2.1158 +                                state->b += virge->s3d.TdBdX;
  2.1159 +                                state->a += virge->s3d.TdAdX;
  2.1160 +                                state->d += virge->s3d.TdDdX;
  2.1161 +                                state->w += virge->s3d.TdWdX;
  2.1162 +                                virge->pixel_count++;
  2.1163 +                        }
  2.1164 +                }
  2.1165 +                state->x1 += dx1;
  2.1166 +                state->x2 += dx2;
  2.1167 +                state->base_u += virge->s3d.TdUdY;
  2.1168 +                state->base_v += virge->s3d.TdVdY;
  2.1169 +                state->base_z += virge->s3d.TdZdY;
  2.1170 +                state->base_r += virge->s3d.TdRdY;
  2.1171 +                state->base_g += virge->s3d.TdGdY;
  2.1172 +                state->base_b += virge->s3d.TdBdY;
  2.1173 +                state->base_a += virge->s3d.TdAdY;
  2.1174 +                state->base_d += virge->s3d.TdDdY;
  2.1175 +                state->base_w += virge->s3d.TdWdY;
  2.1176 +                state->y--;
  2.1177 +                dest_offset -= virge->s3d.dest_str;
  2.1178 +                z_offset -= virge->s3d.z_str;
  2.1179 +        }
  2.1180 +}
  2.1181 +
  2.1182 +static int tex_size[8] =
  2.1183 +{
  2.1184 +        4*2,
  2.1185 +        2*2,
  2.1186 +        2*2,
  2.1187 +        1*2,
  2.1188 +        2/1,
  2.1189 +        2/1,
  2.1190 +        1*2,
  2.1191 +        1*2
  2.1192 +};
  2.1193 +
  2.1194 +static void s3_virge_triangle(virge_t *virge)
  2.1195 +{
  2.1196 +        s3d_state_t state;
  2.1197 +
  2.1198 +        uint32_t tex_base;
  2.1199 +        int c;
  2.1200 +        
  2.1201 +        state.tbu = virge->s3d.tbu << 11;
  2.1202 +        state.tbv = virge->s3d.tbv << 11;
  2.1203 +        
  2.1204 +        state.max_d = (virge->s3d.cmd_set >> 8) & 15;
  2.1205 +        
  2.1206 +        state.tex_bdr_clr = virge->s3d.tex_bdr_clr;
  2.1207 +        
  2.1208 +        state.cmd_set = virge->s3d.cmd_set;
  2.1209 +
  2.1210 +        state.base_u = virge->s3d.tus;
  2.1211 +        state.base_v = virge->s3d.tvs;
  2.1212 +        state.base_z = virge->s3d.tzs;
  2.1213 +        state.base_r = (int32_t)virge->s3d.trs;
  2.1214 +        state.base_g = (int32_t)virge->s3d.tgs;
  2.1215 +        state.base_b = (int32_t)virge->s3d.tbs;
  2.1216 +        state.base_a = (int32_t)virge->s3d.tas;
  2.1217 +        state.base_d = virge->s3d.tds;
  2.1218 +        state.base_w = virge->s3d.tws;
  2.1219 +        
  2.1220 +        tex_base = virge->s3d.tex_base;
  2.1221 +        for (c = 9; c >= 0; c--)
  2.1222 +        {
  2.1223 +                state.texture[c] = (uint16_t *)&virge->svga.vram[tex_base];
  2.1224 +                if (c <= state.max_d)
  2.1225 +                        tex_base += ((1 << (c*2)) * tex_size[(virge->s3d.cmd_set >> 5) & 7]) / 2;
  2.1226 +        }
  2.1227 +
  2.1228 +        switch ((virge->s3d.cmd_set >> 27) & 0xf)
  2.1229 +        {
  2.1230 +                case 0:
  2.1231 +                dest_pixel = dest_pixel_gouraud_shaded_triangle;
  2.1232 +//                pclog("dest_pixel_gouraud_shaded_triangle\n");
  2.1233 +                break;
  2.1234 +                case 1:
  2.1235 +                case 5:
  2.1236 +                switch ((virge->s3d.cmd_set >> 15) & 0x3)
  2.1237 +                {
  2.1238 +                        case 0:
  2.1239 +                        dest_pixel = dest_pixel_lit_texture_reflection;
  2.1240 +//                        pclog("dest_pixel_lit_texture_reflection\n");
  2.1241 +                        break;
  2.1242 +                        case 1:
  2.1243 +                        dest_pixel = dest_pixel_lit_texture_modulate;
  2.1244 +//                        pclog("dest_pixel_lit_texture_modulate\n");
  2.1245 +                        break;
  2.1246 +                        case 2:
  2.1247 +                        dest_pixel = dest_pixel_lit_texture_decal;
  2.1248 +//                        pclog("dest_pixel_lit_texture_decal\n");
  2.1249 +                        break;
  2.1250 +                        default:
  2.1251 +                        pclog("bad triangle type %x\n", (virge->s3d.cmd_set >> 27) & 0xf);
  2.1252 +                        return;
  2.1253 +                }
  2.1254 +                break;
  2.1255 +                case 2:
  2.1256 +                case 6:
  2.1257 +                dest_pixel = dest_pixel_unlit_texture_triangle;
  2.1258 +//                pclog("dest_pixel_unlit_texture_triangle\n");
  2.1259 +                break;
  2.1260 +                default:
  2.1261 +                pclog("bad triangle type %x\n", (virge->s3d.cmd_set >> 27) & 0xf);
  2.1262 +                return;
  2.1263 +        }        
  2.1264 +        
  2.1265 +        switch (((virge->s3d.cmd_set >> 12) & 7) | ((virge->s3d.cmd_set & (1 << 29)) ? 8 : 0))
  2.1266 +        {
  2.1267 +                case 0: case 1:
  2.1268 +                tex_sample = tex_sample_mipmap;
  2.1269 +//                pclog("use tex_sample_mipmap\n");
  2.1270 +                break;
  2.1271 +                case 2: case 3:
  2.1272 +                tex_sample = tex_sample_mipmap_filter;
  2.1273 +//                pclog("use tex_sample_mipmap_filter\n");
  2.1274 +                break;
  2.1275 +                case 4: case 5:
  2.1276 +                tex_sample = tex_sample_normal;
  2.1277 +//                pclog("use tex_sample_normal\n");
  2.1278 +                break;
  2.1279 +                case 6: case 7:
  2.1280 +                tex_sample = tex_sample_normal_filter;
  2.1281 +//                pclog("use tex_sample_normal_filter\n");
  2.1282 +                break;
  2.1283 +                case (0 | 8): case (1 | 8):
  2.1284 +                if (virge->is_375)
  2.1285 +                        tex_sample = tex_sample_persp_mipmap_375;
  2.1286 +                else
  2.1287 +                        tex_sample = tex_sample_persp_mipmap;
  2.1288 +//                pclog("use tex_sample_persp_mipmap\n");
  2.1289 +                break;
  2.1290 +                case (2 | 8): case (3 | 8):
  2.1291 +                if (virge->is_375)
  2.1292 +                        tex_sample = tex_sample_persp_mipmap_filter_375;
  2.1293 +                else
  2.1294 +                        tex_sample = tex_sample_persp_mipmap_filter;
  2.1295 +//                pclog("use tex_sample_persp_mipmap_filter\n");
  2.1296 +                break;
  2.1297 +                case (4 | 8): case (5 | 8):
  2.1298 +                if (virge->is_375)
  2.1299 +                        tex_sample = tex_sample_persp_normal_375;
  2.1300 +                else
  2.1301 +                        tex_sample = tex_sample_persp_normal;
  2.1302 +//                pclog("use tex_sample_persp_normal\n");
  2.1303 +                break;
  2.1304 +                case (6 | 8): case (7 | 8):
  2.1305 +                if (virge->is_375)
  2.1306 +                        tex_sample = tex_sample_persp_normal_filter_375;
  2.1307 +                else
  2.1308 +                        tex_sample = tex_sample_persp_normal_filter;
  2.1309 +//                pclog("use tex_sample_persp_normal_filter\n");
  2.1310 +                break;
  2.1311 +        }
  2.1312 +        
  2.1313 +        switch ((virge->s3d.cmd_set >> 5) & 7)
  2.1314 +        {
  2.1315 +                case 0:
  2.1316 +                tex_read = tex_ARGB8888;
  2.1317 +                break;
  2.1318 +                case 1:
  2.1319 +                tex_read = tex_ARGB4444;
  2.1320 +//                pclog("tex_ARGB4444\n");
  2.1321 +                break;
  2.1322 +                case 2:
  2.1323 +                tex_read = tex_ARGB1555;
  2.1324 +//                pclog("tex_ARGB1555 %i\n", (virge->s3d.cmd_set >> 5) & 7);
  2.1325 +                break;
  2.1326 +                default:
  2.1327 +                pclog("bad texture type %i\n", (virge->s3d.cmd_set >> 5) & 7);
  2.1328 +                tex_read = tex_ARGB1555;
  2.1329 +        }
  2.1330 +        
  2.1331 +//        pclog("Triangle %i %i,%i to %i,%i  %08x\n", y, x1 >> 20, y, virge->s3d.txend01 >> 20, y - (virge->s3d.ty01 + virge->s3d.ty12), state.cmd_set);
  2.1332 +
  2.1333 +        state.y  = virge->s3d.tys;
  2.1334 +        state.x1 = virge->s3d.txs;
  2.1335 +        state.x2 = virge->s3d.txend01;
  2.1336 +        tri(virge, &state, virge->s3d.ty01, virge->s3d.TdXdY02, virge->s3d.TdXdY01);
  2.1337 +        state.x2 = virge->s3d.txend12;
  2.1338 +        tri(virge, &state, virge->s3d.ty12, virge->s3d.TdXdY02, virge->s3d.TdXdY12);
  2.1339 +
  2.1340 +        virge->tri_count++;
  2.1341 +}
  2.1342 +
  2.1343  
  2.1344  static void s3_virge_hwcursor_draw(svga_t *svga, int displine)
  2.1345  {
  2.1346 @@ -1434,13 +2493,13 @@
  2.1347                  if (virge->pci_regs[0x30] & 0x01)
  2.1348                  {
  2.1349                          uint32_t addr = (virge->pci_regs[0x32] << 16) | (virge->pci_regs[0x33] << 24);
  2.1350 -                        pclog("Virge bios_rom enabled at %08x\n", addr);
  2.1351 +//                        pclog("Virge bios_rom enabled at %08x\n", addr);
  2.1352                          mem_mapping_set_addr(&virge->bios_rom.mapping, addr, 0x8000);
  2.1353                          mem_mapping_enable(&virge->bios_rom.mapping);
  2.1354                  }
  2.1355                  else
  2.1356                  {
  2.1357 -                        pclog("Virge bios_rom disabled\n");
  2.1358 +//                        pclog("Virge bios_rom disabled\n");
  2.1359                          mem_mapping_disable(&virge->bios_rom.mapping);
  2.1360                  }
  2.1361                  return;
  2.1362 @@ -1512,6 +2571,80 @@
  2.1363          virge->svga.crtc[0x37] = 1;// | (7 << 5);
  2.1364          virge->svga.crtc[0x53] = 1 << 3;
  2.1365          virge->svga.crtc[0x59] = 0x70;
  2.1366 +
  2.1367 +        virge->is_375 = 0;
  2.1368 +        
  2.1369 +        pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
  2.1370 + 
  2.1371 +        return virge;
  2.1372 +}
  2.1373 +
  2.1374 +static void *s3_virge_375_init()
  2.1375 +{
  2.1376 +        virge_t *virge = malloc(sizeof(virge_t));
  2.1377 +        memset(virge, 0, sizeof(virge_t));
  2.1378 +        
  2.1379 +        svga_init(&virge->svga, virge, 1 << 22, /*4mb*/
  2.1380 +                   s3_virge_recalctimings,
  2.1381 +                   s3_virge_in, s3_virge_out,
  2.1382 +                   s3_virge_hwcursor_draw);
  2.1383 +
  2.1384 +        rom_init(&virge->bios_rom, "roms/86c375_1.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
  2.1385 +        if (PCI)
  2.1386 +                mem_mapping_disable(&virge->bios_rom.mapping);
  2.1387 +
  2.1388 +        mem_mapping_add(&virge->mmio_mapping,     0, 0, s3_virge_mmio_read,
  2.1389 +                                                        s3_virge_mmio_read_w,
  2.1390 +                                                        s3_virge_mmio_read_l,
  2.1391 +                                                        s3_virge_mmio_write,
  2.1392 +                                                        s3_virge_mmio_write_w,
  2.1393 +                                                        s3_virge_mmio_write_l,
  2.1394 +                                                        NULL,
  2.1395 +                                                        0,
  2.1396 +                                                        virge);
  2.1397 +        mem_mapping_add(&virge->new_mmio_mapping, 0, 0, s3_virge_mmio_read,
  2.1398 +                                                        s3_virge_mmio_read_w,
  2.1399 +                                                        s3_virge_mmio_read_l,
  2.1400 +                                                        s3_virge_mmio_write,
  2.1401 +                                                        s3_virge_mmio_write_w,
  2.1402 +                                                        s3_virge_mmio_write_l,
  2.1403 +                                                        NULL,
  2.1404 +                                                        0,
  2.1405 +                                                        virge);
  2.1406 +        mem_mapping_add(&virge->linear_mapping,   0, 0, svga_read_linear,
  2.1407 +                                                        svga_readw_linear,
  2.1408 +                                                        svga_readl_linear,
  2.1409 +                                                        svga_write_linear,
  2.1410 +                                                        svga_writew_linear,
  2.1411 +                                                        svga_writel_linear,
  2.1412 +                                                        NULL,
  2.1413 +                                                        0,
  2.1414 +                                                        &virge->svga);
  2.1415 +
  2.1416 +        io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
  2.1417 +
  2.1418 +        virge->pci_regs[4] = 3;
  2.1419 +        virge->pci_regs[5] = 0;        
  2.1420 +        virge->pci_regs[6] = 0;
  2.1421 +        virge->pci_regs[7] = 2;
  2.1422 +        virge->pci_regs[0x32] = 0x0c;
  2.1423 +        virge->pci_regs[0x3d] = 1; 
  2.1424 +        virge->pci_regs[0x3e] = 4;
  2.1425 +        virge->pci_regs[0x3f] = 0xff;
  2.1426 +        
  2.1427 +        virge->virge_id_high = 0x8a;
  2.1428 +        virge->virge_id_low = 0x01;
  2.1429 +        virge->virge_rev = 0;
  2.1430 +        virge->virge_id = 0xe1;
  2.1431 +
  2.1432 +        virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4);
  2.1433 +        virge->svga.crtc[0x37] = 1;// | (7 << 5);
  2.1434 +        virge->svga.crtc[0x53] = 1 << 3;
  2.1435 +        virge->svga.crtc[0x59] = 0x70;
  2.1436 +        
  2.1437 +        virge->svga.crtc[0x6c] = 0x01;
  2.1438 +        
  2.1439 +        virge->is_375 = 1;
  2.1440          
  2.1441          pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
  2.1442   
  2.1443 @@ -1521,6 +2654,9 @@
  2.1444  static void s3_virge_close(void *p)
  2.1445  {
  2.1446          virge_t *virge = (virge_t *)p;
  2.1447 +        FILE *f = fopen("vram.dmp", "wb");
  2.1448 +        fwrite(virge->svga.vram, 4 << 20, 1, f);
  2.1449 +        fclose(f);
  2.1450  
  2.1451          svga_close(&virge->svga);
  2.1452          
  2.1453 @@ -1532,6 +2668,11 @@
  2.1454          return rom_present("roms/s3virge.bin");
  2.1455  }
  2.1456  
  2.1457 +static int s3_virge_375_available()
  2.1458 +{
  2.1459 +        return rom_present("roms/86c375_1.bin");
  2.1460 +}
  2.1461 +
  2.1462  static void s3_virge_speed_changed(void *p)
  2.1463  {
  2.1464          virge_t *virge = (virge_t *)p;
  2.1465 @@ -1549,13 +2690,21 @@
  2.1466  static int s3_virge_add_status_info(char *s, int max_len, void *p)
  2.1467  {
  2.1468          virge_t *virge = (virge_t *)p;
  2.1469 +        int cur_len;
  2.1470 +        char temps[256];
  2.1471 +
  2.1472 +        cur_len = svga_add_status_info(s, cur_len, &virge->svga);
  2.1473 +        sprintf(temps, "%f Mpixels/sec\n%f ktris/sec\n", (double)virge->pixel_count/1000000.0, (double)virge->tri_count/1000.0);
  2.1474 +        strncat(s, temps, cur_len);
  2.1475 +        cur_len -= strlen(temps);
  2.1476 +        virge->pixel_count = virge->tri_count = 0;
  2.1477          
  2.1478 -        return svga_add_status_info(s, max_len, &virge->svga);
  2.1479 +        return max_len - cur_len;
  2.1480  }
  2.1481  
  2.1482  device_t s3_virge_device =
  2.1483  {
  2.1484 -        "Diamond Stealth 3D 2000 (S3 VIRGE)",
  2.1485 +        "Diamond Stealth 3D 2000 (S3 ViRGE)",
  2.1486          DEVICE_NOT_WORKING,
  2.1487          s3_virge_init,
  2.1488          s3_virge_close,
  2.1489 @@ -1564,3 +2713,15 @@
  2.1490          s3_virge_force_redraw,
  2.1491          s3_virge_add_status_info
  2.1492  };
  2.1493 +
  2.1494 +device_t s3_virge_375_device =
  2.1495 +{
  2.1496 +        "S3 ViRGE/DX",
  2.1497 +        DEVICE_NOT_WORKING,
  2.1498 +        s3_virge_375_init,
  2.1499 +        s3_virge_close,
  2.1500 +        s3_virge_375_available,
  2.1501 +        s3_virge_speed_changed,
  2.1502 +        s3_virge_force_redraw,
  2.1503 +        s3_virge_add_status_info
  2.1504 +};
     3.1 --- a/src/vid_s3_virge.h	Sun May 11 14:09:13 2014 +0100
     3.2 +++ b/src/vid_s3_virge.h	Mon May 26 18:13:23 2014 +0100
     3.3 @@ -1,1 +1,2 @@
     3.4  extern device_t s3_virge_device;
     3.5 +extern device_t s3_virge_375_device;
     4.1 --- a/src/video.c	Sun May 11 14:09:13 2014 +0100
     4.2 +++ b/src/video.c	Mon May 26 18:13:23 2014 +0100
     4.3 @@ -54,6 +54,7 @@
     4.4          {"Paradise Bahamas 64 (S3 Vision864)",     &s3_bahamas64_device, GFX_BAHAMAS64},
     4.5          {"Number Nine 9FX (S3 Trio64)",            &s3_9fx_device,       GFX_N9_9FX},
     4.6          {"Diamond Stealth 3D 2000 (S3 ViRGE)",     &s3_virge_device,     GFX_VIRGE},
     4.7 +        {"S3 ViRGE/DX",                            &s3_virge_375_device, GFX_VIRGEDX},
     4.8          {"Trident TGUI9440",                       &tgui9440_device,     GFX_TGUI9440},
     4.9          {"VGA",                                    &vga_device,          GFX_VGA},
    4.10          {"ATI VGA Edge-16 (ATI-18800)",            &ati18800_device,     GFX_VGAEDGE16},
     5.1 --- a/src/win.c	Sun May 11 14:09:13 2014 +0100
     5.2 +++ b/src/win.c	Mon May 26 18:13:23 2014 +0100
     5.3 @@ -51,7 +51,7 @@
     5.4  #define TIMER_1SEC 1
     5.5  
     5.6  int winsizex=640,winsizey=480;
     5.7 -int gfx_present[18];
     5.8 +int gfx_present[19];
     5.9  int wakeups,wokeups;
    5.10  #undef cs
    5.11  CRITICAL_SECTION cs;
    5.12 @@ -387,13 +387,13 @@
    5.13          }
    5.14          
    5.15  
    5.16 -        for (c = 0; c < 18; c++)
    5.17 +        for (c = 0; c < 19; c++)
    5.18                  gfx_present[c] = video_card_available(video_old_to_new(c));
    5.19  
    5.20          if (!video_card_available(video_old_to_new(gfxcard)))
    5.21          {
    5.22                  if (romset!=-1) MessageBox(hwnd,"Configured video BIOS not available.\nDefaulting to available romset.","PCem error",MB_OK);
    5.23 -                for (c = 17; c >= 0; c--)
    5.24 +                for (c = 18; c >= 0; c--)
    5.25                  {
    5.26                          if (gfx_present[c])
    5.27                          {