PCem
view src/vid_s3_virge.c @ 114:9834054948fc
ViRGE S3D improvements :
- 24 bpp mode implemented
- Improved subpixel correction - fixes polygon gaps in some games
- Implemented dithering
- Z Update bit now has an effect - fixes Tomb Raider 2
- Improved mipmapping
| author | TomW |
|---|---|
| date | Sun Jul 06 12:45:35 2014 +0100 |
| parents | f749363ad763 |
| children | 24b744b9a632 |
line source
1 /*S3 ViRGE emulation*/
2 #include <stdlib.h>
3 #include "ibm.h"
4 #include "device.h"
5 #include "io.h"
6 #include "mem.h"
7 #include "pci.h"
8 #include "rom.h"
9 #include "video.h"
10 #include "vid_s3_virge.h"
11 #include "vid_svga.h"
12 #include "vid_svga_render.h"
14 static uint64_t virge_time = 0;
15 static uint64_t status_time = 0;
16 static int reg_writes = 0;
18 static int dither[4][4] =
19 {
20 0, 4, 1, 5,
21 6, 2, 7, 3,
22 1, 5, 0, 4,
23 7, 3, 6, 2,
24 };
26 typedef struct virge_t
27 {
28 mem_mapping_t linear_mapping;
29 mem_mapping_t mmio_mapping;
30 mem_mapping_t new_mmio_mapping;
32 rom_t bios_rom;
34 svga_t svga;
36 uint8_t bank;
37 uint8_t ma_ext;
38 int width;
39 int bpp;
41 uint8_t virge_id, virge_id_high, virge_id_low, virge_rev;
43 uint32_t linear_base, linear_size;
45 uint8_t pci_regs[256];
47 int is_375;
49 int bilinear_enabled;
50 int dithering_enabled;
51 int memory_size;
53 int pixel_count, tri_count;
55 struct
56 {
57 uint32_t src_base;
58 uint32_t dest_base;
59 int clip_l, clip_r, clip_t, clip_b;
60 int dest_str, src_str;
61 uint32_t mono_pat_0;
62 uint32_t mono_pat_1;
63 uint32_t pat_bg_clr;
64 uint32_t pat_fg_clr;
65 uint32_t src_bg_clr;
66 uint32_t src_fg_clr;
67 uint32_t cmd_set;
68 int r_width, r_height;
69 int rsrc_x, rsrc_y;
70 int rdest_x, rdest_y;
72 int lxend0, lxend1;
73 int32_t ldx;
74 uint32_t lxstart, lystart;
75 int lycnt;
76 int line_dir;
78 int src_x, src_y;
79 int dest_x, dest_y;
80 int w, h;
81 uint8_t rop;
83 int data_left_count;
84 uint32_t data_left;
86 uint32_t pattern_8[8*8];
87 uint32_t pattern_16[8*8];
88 uint32_t pattern_32[8*8];
91 uint32_t z_base;
92 uint32_t z_str;
94 uint32_t tex_base;
95 uint32_t tex_bdr_clr;
96 uint32_t tbv, tbu;
97 int32_t TdVdX, TdUdX;
98 int32_t TdVdY, TdUdY;
99 uint32_t tus, tvs;
101 int32_t TdZdX, TdZdY;
102 uint32_t tzs;
104 int32_t TdWdX, TdWdY;
105 uint32_t tws;
107 int32_t TdDdX, TdDdY;
108 uint32_t tds;
110 int16_t TdGdX, TdBdX, TdRdX, TdAdX;
111 int16_t TdGdY, TdBdY, TdRdY, TdAdY;
112 uint32_t tgs, tbs, trs, tas;
114 uint32_t TdXdY12;
115 uint32_t txend12;
116 uint32_t TdXdY01;
117 uint32_t txend01;
118 uint32_t TdXdY02;
119 uint32_t txs;
120 uint32_t tys;
121 int ty01, ty12, tlr;
122 } s3d;
124 struct
125 {
126 uint32_t pri_ctrl;
127 uint32_t chroma_ctrl;
128 uint32_t sec_ctrl;
129 uint32_t chroma_upper_bound;
130 uint32_t sec_filter;
131 uint32_t blend_ctrl;
132 uint32_t pri_fb0, pri_fb1;
133 uint32_t pri_stride;
134 uint32_t buffer_ctrl;
135 uint32_t sec_fb0, sec_fb1;
136 uint32_t sec_stride;
137 uint32_t overlay_ctrl;
138 int32_t k1_vert_scale;
139 int32_t k2_vert_scale;
140 int32_t dda_vert_accumulator;
141 int32_t k1_horiz_scale;
142 int32_t k2_horiz_scale;
143 int32_t dda_horiz_accumulator;
144 uint32_t fifo_ctrl;
145 uint32_t pri_start;
146 uint32_t pri_size;
147 uint32_t sec_start;
148 uint32_t sec_size;
150 int sdif;
152 int pri_x, pri_y, pri_w, pri_h;
153 int sec_x, sec_y, sec_w, sec_h;
154 } streams;
155 } virge_t;
157 static void s3_virge_recalctimings(svga_t *svga);
158 static void s3_virge_updatemapping(virge_t *virge);
160 static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat);
162 static void s3_virge_triangle(virge_t *virge);
164 static uint8_t s3_virge_mmio_read(uint32_t addr, void *p);
165 static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p);
166 static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *p);
167 static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p);
168 static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p);
169 static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *p);
171 enum
172 {
173 CMD_SET_AE = 1,
174 CMD_SET_HC = (1 << 1),
176 CMD_SET_FORMAT_MASK = (7 << 2),
177 CMD_SET_FORMAT_8 = (0 << 2),
178 CMD_SET_FORMAT_16 = (1 << 2),
179 CMD_SET_FORMAT_24 = (2 << 2),
181 CMD_SET_MS = (1 << 6),
182 CMD_SET_IDS = (1 << 7),
183 CMD_SET_MP = (1 << 8),
184 CMD_SET_TP = (1 << 9),
186 CMD_SET_ITA_MASK = (3 << 10),
187 CMD_SET_ITA_BYTE = (0 << 10),
188 CMD_SET_ITA_WORD = (1 << 10),
189 CMD_SET_ITA_DWORD = (2 << 10),
191 CMD_SET_ZUP = (1 << 23),
193 CMD_SET_ZB_MODE = (3 << 24),
195 CMD_SET_XP = (1 << 25),
196 CMD_SET_YP = (1 << 26),
198 CMD_SET_COMMAND_MASK = (15 << 27)
199 };
201 #define CMD_SET_ABC_SRC (1 << 18)
202 #define CMD_SET_ABC_ENABLE (1 << 19)
203 #define CMD_SET_TWE (1 << 26)
205 enum
206 {
207 CMD_SET_COMMAND_BITBLT = (0 << 27),
208 CMD_SET_COMMAND_RECTFILL = (2 << 27),
209 CMD_SET_COMMAND_LINE = (3 << 27),
210 CMD_SET_COMMAND_NOP = (15 << 27)
211 };
213 static void s3_virge_out(uint16_t addr, uint8_t val, void *p)
214 {
215 virge_t *virge = (virge_t *)p;
216 svga_t *svga = &virge->svga;
217 uint8_t old;
219 if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
220 addr ^= 0x60;
222 // pclog("S3 out %04X %02X %04X:%08X %04X %04X %i\n", addr, val, CS, pc, ES, BX, ins);
224 switch (addr)
225 {
226 case 0x3c5:
227 if (svga->seqaddr >= 0x10)
228 {
229 svga->seqregs[svga->seqaddr & 0x1f]=val;
230 s3_virge_recalctimings(svga);
231 return;
232 }
233 if (svga->seqaddr == 4) /*Chain-4 - update banking*/
234 {
235 if (val & 8) svga->write_bank = svga->read_bank = virge->bank << 16;
236 else svga->write_bank = svga->read_bank = virge->bank << 14;
237 }
238 break;
240 //case 0x3C6: case 0x3C7: case 0x3C8: case 0x3C9:
241 // pclog("Write RAMDAC %04X %02X %04X:%04X\n", addr, val, CS, pc);
242 //sdac_ramdac_out(addr,val);
243 //return;
245 case 0x3d4:
246 svga->crtcreg = val;// & 0x7f;
247 return;
248 case 0x3d5:
249 //pclog("Write CRTC R%02X %02X %04x(%08x):%08x\n", svga->crtcreg, val, CS, cs, pc);
250 if (svga->crtcreg <= 7 && svga->crtc[0x11] & 0x80)
251 return;
252 if (svga->crtcreg >= 0x20 && svga->crtcreg != 0x38 && (svga->crtc[0x38] & 0xcc) != 0x48)
253 return;
254 if (svga->crtcreg >= 0x80)
255 return;
256 old = svga->crtc[svga->crtcreg];
257 svga->crtc[svga->crtcreg] = val;
258 switch (svga->crtcreg)
259 {
260 case 0x31:
261 virge->ma_ext = (virge->ma_ext & 0x1c) | ((val & 0x30) >> 4);
262 svga->vrammask = (val & 8) ? 0x3fffff : 0x3ffff;
263 break;
265 case 0x50:
266 switch (svga->crtc[0x50] & 0xc1)
267 {
268 case 0x00: virge->width = (svga->crtc[0x31] & 2) ? 2048 : 1024; break;
269 case 0x01: virge->width = 1152; break;
270 case 0x40: virge->width = 640; break;
271 case 0x80: virge->width = 800; break;
272 case 0x81: virge->width = 1600; break;
273 case 0xc0: virge->width = 1280; break;
274 }
275 virge->bpp = (svga->crtc[0x50] >> 4) & 3;
276 break;
277 case 0x69:
278 virge->ma_ext = val & 0x1f;
279 break;
281 case 0x35:
282 virge->bank = (virge->bank & 0x70) | (val & 0xf);
283 // pclog("CRTC write R35 %02X\n", val);
284 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
285 else svga->write_bank = svga->read_bank = virge->bank << 14;
286 break;
287 case 0x51:
288 virge->bank = (virge->bank & 0x4f) | ((val & 0xc) << 2);
289 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
290 else svga->write_bank = svga->read_bank = virge->bank << 14;
291 virge->ma_ext = (virge->ma_ext & ~0xc) | ((val & 3) << 2);
292 break;
293 case 0x6a:
294 virge->bank = val;
295 // pclog("CRTC write R6a %02X\n", val);
296 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
297 else svga->write_bank = svga->read_bank = virge->bank << 14;
298 break;
300 case 0x3a:
301 if (val & 0x10) svga->gdcreg[5] |= 0x40; /*Horrible cheat*/
302 break;
304 case 0x45:
305 svga->hwcursor.ena = val & 1;
306 break;
307 case 0x46: case 0x47: case 0x48: case 0x49:
308 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
309 svga->hwcursor.x = ((svga->crtc[0x46] << 8) | svga->crtc[0x47]) & 0x7ff;
310 svga->hwcursor.y = ((svga->crtc[0x48] << 8) | svga->crtc[0x49]) & 0x7ff;
311 svga->hwcursor.xoff = svga->crtc[0x4e] & 63;
312 svga->hwcursor.yoff = svga->crtc[0x4f] & 63;
313 svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | svga->crtc[0x4d]) & 0xfff) * 1024) + (svga->hwcursor.yoff * 16);
314 break;
316 case 0x53:
317 case 0x58: case 0x59: case 0x5a:
318 s3_virge_updatemapping(virge);
319 break;
321 case 0x67:
322 switch (val >> 4)
323 {
324 case 3: svga->bpp = 15; break;
325 case 5: svga->bpp = 16; break;
326 case 7: svga->bpp = 24; break;
327 case 13: svga->bpp = 32; break;
328 default: svga->bpp = 8; break;
329 }
330 break;
331 //case 0x55: case 0x43:
332 // pclog("Write CRTC R%02X %02X\n", crtcreg, val);
333 }
334 if (old != val)
335 {
336 if (svga->crtcreg < 0xe || svga->crtcreg > 0x10)
337 {
338 svga->fullchange = changeframecount;
339 svga_recalctimings(svga);
340 }
341 }
342 break;
343 }
344 svga_out(addr, val, svga);
345 }
347 static uint8_t s3_virge_in(uint16_t addr, void *p)
348 {
349 virge_t *virge = (virge_t *)p;
350 svga_t *svga = &virge->svga;
351 uint8_t ret;
353 if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
354 addr ^= 0x60;
356 // if (addr != 0x3da) pclog("S3 in %04X %04X:%08X ", addr, CS, pc);
357 switch (addr)
358 {
359 //case 0x3C6: case 0x3C7: case 0x3C8: case 0x3C9:
360 // pclog("Read RAMDAC %04X %04X:%04X\n", addr, CS, pc);
361 //return sdac_ramdac_in(addr);
363 case 0x3c5:
364 if (svga->seqaddr >= 0x10)
365 ret = svga->seqregs[svga->seqaddr & 0x1f];
366 else
367 ret = svga_in(addr, svga);
368 break;
370 case 0x3D4:
371 ret = svga->crtcreg;
372 break;
373 case 0x3D5:
374 //pclog("Read CRTC R%02X %04X:%04X (%02x)\n", svga->crtcreg, CS, pc, svga->crtc[svga->crtcreg]);
375 switch (svga->crtcreg)
376 {
377 case 0x2d: ret = virge->virge_id_high; break; /*Extended chip ID*/
378 case 0x2e: ret = virge->virge_id_low; break; /*New chip ID*/
379 case 0x2f: ret = virge->virge_rev; break;
380 case 0x30: ret = virge->virge_id; break; /*Chip ID*/
381 case 0x31: ret = (svga->crtc[0x31] & 0xcf) | ((virge->ma_ext & 3) << 4); break;
382 case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break;
383 case 0x36: ret = (svga->crtc[0x36] & 0xfc) | 2; break; /*PCI bus*/
384 case 0x51: ret = (svga->crtc[0x51] & 0xf0) | ((virge->bank >> 2) & 0xc) | ((virge->ma_ext >> 2) & 3); break;
385 case 0x69: ret = virge->ma_ext; break;
386 case 0x6a: ret = virge->bank; break;
387 default: ret = svga->crtc[svga->crtcreg]; break;
388 }
389 break;
391 default:
392 ret = svga_in(addr, svga);
393 break;
394 }
395 // if (addr != 0x3da) pclog("%02X\n", ret);
396 return ret;
397 }
399 static void s3_virge_recalctimings(svga_t *svga)
400 {
401 virge_t *virge = (virge_t *)svga->p;
403 if (svga->crtc[0x5d] & 0x01) svga->htotal += 0x100;
404 if (svga->crtc[0x5d] & 0x02) svga->hdisp += 0x100;
405 if (svga->crtc[0x5e] & 0x01) svga->vtotal += 0x400;
406 if (svga->crtc[0x5e] & 0x02) svga->dispend += 0x400;
407 if (svga->crtc[0x5e] & 0x04) svga->vblankstart += 0x400;
408 if (svga->crtc[0x5e] & 0x10) svga->vsyncstart += 0x400;
409 if (svga->crtc[0x5e] & 0x40) svga->split += 0x400;
410 svga->interlace = svga->crtc[0x42] & 0x20;
412 if ((svga->crtc[0x67] & 0xc) != 0xc) /*VGA mode*/
413 {
414 svga->ma_latch |= (virge->ma_ext << 16);
415 //pclog("VGA mode\n");
416 if (svga->crtc[0x51] & 0x30) svga->rowoffset += (svga->crtc[0x51] & 0x30) << 4;
417 else if (svga->crtc[0x43] & 0x04) svga->rowoffset += 0x100;
418 if (!svga->rowoffset) svga->rowoffset = 256;
420 if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10))
421 {
422 switch (svga->bpp)
423 {
424 case 8:
425 svga->render = svga_render_8bpp_highres;
426 break;
427 case 15:
428 svga->render = svga_render_15bpp_highres;
429 break;
430 case 16:
431 svga->render = svga_render_16bpp_highres;
432 break;
433 case 24:
434 svga->render = svga_render_24bpp_highres;
435 break;
436 case 32:
437 svga->render = svga_render_32bpp_highres;
438 break;
439 }
440 }
442 // pclog("svga->rowoffset = %i bpp=%i\n", svga->rowoffset, svga->bpp);
443 if (svga->bpp == 15 || svga->bpp == 16)
444 {
445 svga->htotal >>= 1;
446 svga->hdisp >>= 1;
447 }
448 if (svga->bpp == 24)
449 {
450 svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/
451 }
452 //pclog("VGA mode x_disp=%i dispend=%i vtotal=%i\n", svga->hdisp, svga->dispend, svga->vtotal);
453 }
454 else /*Streams mode*/
455 {
456 if (virge->streams.buffer_ctrl & 1)
457 svga->ma_latch = virge->streams.pri_fb1 >> 2;
458 else
459 svga->ma_latch = virge->streams.pri_fb0 >> 2;
461 svga->hdisp = virge->streams.pri_w + 1;
462 svga->dispend = virge->streams.pri_h;
464 svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x;
465 svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y;
466 svga->overlay.ysize = virge->streams.sec_h;
468 if (virge->streams.buffer_ctrl & 2)
469 svga->overlay.addr = virge->streams.sec_fb1;
470 else
471 svga->overlay.addr = virge->streams.sec_fb0;
473 svga->overlay.ena = (svga->overlay.x >= 0);
474 svga->overlay.v_acc = virge->streams.dda_vert_accumulator;
475 //pclog("Streams mode x_disp=%i dispend=%i vtotal=%i x=%i y=%i ysize=%i\n", svga->hdisp, svga->dispend, svga->vtotal, svga->overlay.x, svga->overlay.y, svga->overlay.ysize);
476 svga->rowoffset = virge->streams.pri_stride >> 3;
478 switch ((virge->streams.pri_ctrl >> 24) & 0x7)
479 {
480 case 0: /*RGB-8 (CLUT)*/
481 svga->render = svga_render_8bpp_highres;
482 break;
483 case 3: /*KRGB-16 (1.5.5.5)*/
484 svga->htotal >>= 1;
485 svga->render = svga_render_15bpp_highres;
486 break;
487 case 5: /*RGB-16 (5.6.5)*/
488 svga->htotal >>= 1;
489 svga->render = svga_render_16bpp_highres;
490 break;
491 case 6: /*RGB-24 (8.8.8)*/
492 svga->render = svga_render_24bpp_highres;
493 break;
494 case 7: /*XRGB-32 (X.8.8.8)*/
495 svga->render = svga_render_32bpp_highres;
496 break;
497 }
498 }
500 if (((svga->miscout >> 2) & 3) == 3)
501 {
502 int n = svga->seqregs[0x12] & 0x1f;
503 int r = (svga->seqregs[0x12] >> 5) & 3;
504 int m = svga->seqregs[0x13] & 0x7f;
505 double freq = (((double)m + 2) / (((double)n + 2) * (double)(1 << r))) * 14318184.0;
507 svga->clock = cpuclock / freq;
508 }
509 }
511 static void s3_virge_updatemapping(virge_t *virge)
512 {
513 svga_t *svga = &virge->svga;
515 if (!(virge->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM))
516 {
517 // pclog("Update mapping - PCI disabled\n");
518 mem_mapping_disable(&svga->mapping);
519 mem_mapping_disable(&virge->linear_mapping);
520 mem_mapping_disable(&virge->mmio_mapping);
521 mem_mapping_disable(&virge->new_mmio_mapping);
522 return;
523 }
525 pclog("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc);
526 switch (svga->gdcreg[6] & 0xc) /*Banked framebuffer*/
527 {
528 case 0x0: /*128k at A0000*/
529 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000);
530 svga->banked_mask = 0xffff;
531 break;
532 case 0x4: /*64k at A0000*/
533 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000);
534 svga->banked_mask = 0xffff;
535 break;
536 case 0x8: /*32k at B0000*/
537 mem_mapping_set_addr(&svga->mapping, 0xb0000, 0x08000);
538 svga->banked_mask = 0x7fff;
539 break;
540 case 0xC: /*32k at B8000*/
541 mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000);
542 svga->banked_mask = 0x7fff;
543 break;
544 }
546 virge->linear_base = (svga->crtc[0x5a] << 16) | (svga->crtc[0x59] << 24);
548 pclog("Linear framebuffer %02X ", svga->crtc[0x58] & 0x10);
549 if (svga->crtc[0x58] & 0x10) /*Linear framebuffer*/
550 {
551 switch (svga->crtc[0x58] & 3)
552 {
553 case 0: /*64k*/
554 virge->linear_size = 0x10000;
555 break;
556 case 1: /*1mb*/
557 virge->linear_size = 0x100000;
558 break;
559 case 2: /*2mb*/
560 virge->linear_size = 0x200000;
561 break;
562 case 3: /*8mb*/
563 virge->linear_size = 0x400000;
564 break;
565 }
566 virge->linear_base &= ~(virge->linear_size - 1);
567 // pclog("%08X %08X %02X %02X %02X\n", linear_base, linear_size, crtc[0x58], crtc[0x59], crtc[0x5a]);
568 pclog("Linear framebuffer at %08X size %08X\n", virge->linear_base, virge->linear_size);
569 if (virge->linear_base == 0xa0000)
570 {
571 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000);
572 mem_mapping_disable(&virge->linear_mapping);
573 }
574 else
575 mem_mapping_set_addr(&virge->linear_mapping, virge->linear_base, virge->linear_size);
576 }
577 else
578 mem_mapping_disable(&virge->linear_mapping);
580 pclog("Memory mapped IO %02X\n", svga->crtc[0x53] & 0x18);
581 if (svga->crtc[0x53] & 0x10) /*Old MMIO*/
582 {
583 if (svga->crtc[0x53] & 0x20)
584 mem_mapping_set_addr(&virge->mmio_mapping, 0xb8000, 0x8000);
585 else
586 mem_mapping_set_addr(&virge->mmio_mapping, 0xa0000, 0x10000);
587 }
588 else
589 mem_mapping_disable(&virge->mmio_mapping);
591 if (svga->crtc[0x53] & 0x08) /*New MMIO*/
592 mem_mapping_set_addr(&virge->new_mmio_mapping, virge->linear_base + 0x1000000, 0x10000);
593 else
594 mem_mapping_disable(&virge->new_mmio_mapping);
596 }
599 static uint8_t s3_virge_mmio_read(uint32_t addr, void *p)
600 {
601 reg_writes++;
602 // pclog("New MMIO readb %08X\n", addr);
603 switch (addr & 0xffff)
604 {
605 case 0x83b0: case 0x83b1: case 0x83b2: case 0x83b3:
606 case 0x83b4: case 0x83b5: case 0x83b6: case 0x83b7:
607 case 0x83b8: case 0x83b9: case 0x83ba: case 0x83bb:
608 case 0x83bc: case 0x83bd: case 0x83be: case 0x83bf:
609 case 0x83c0: case 0x83c1: case 0x83c2: case 0x83c3:
610 case 0x83c4: case 0x83c5: case 0x83c6: case 0x83c7:
611 case 0x83c8: case 0x83c9: case 0x83ca: case 0x83cb:
612 case 0x83cc: case 0x83cd: case 0x83ce: case 0x83cf:
613 case 0x83d0: case 0x83d1: case 0x83d2: case 0x83d3:
614 case 0x83d4: case 0x83d5: case 0x83d6: case 0x83d7:
615 case 0x83d8: case 0x83d9: case 0x83da: case 0x83db:
616 case 0x83dc: case 0x83dd: case 0x83de: case 0x83df:
617 return s3_virge_in(addr & 0x3ff, p);
618 }
619 return 0xff;
620 }
621 static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p)
622 {
623 reg_writes++;
624 // pclog("New MMIO readw %08X\n", addr);
625 switch (addr & 0xfffe)
626 {
627 default:
628 return s3_virge_mmio_read(addr, p) | (s3_virge_mmio_read(addr + 1, p) << 8);
629 }
630 return 0xffff;
631 }
632 static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *p)
633 {
634 virge_t *virge = (virge_t *)p;
635 uint32_t ret = 0xffffffff;
636 reg_writes++;
637 // pclog("New MMIO readl %08X %04X(%08X):%08X ", addr, CS, cs, pc);
638 switch (addr & 0xfffc)
639 {
640 case 0x8180:
641 ret = virge->streams.pri_ctrl;
642 break;
643 case 0x8184:
644 ret = virge->streams.chroma_ctrl;
645 break;
646 case 0x8190:
647 ret = virge->streams.sec_ctrl;
648 break;
649 case 0x8194:
650 ret = virge->streams.chroma_upper_bound;
651 break;
652 case 0x8198:
653 ret = virge->streams.sec_filter;
654 break;
655 case 0x81a0:
656 ret = virge->streams.blend_ctrl;
657 break;
658 case 0x81c0:
659 ret = virge->streams.pri_fb0;
660 break;
661 case 0x81c4:
662 ret = virge->streams.pri_fb1;
663 break;
664 case 0x81c8:
665 ret = virge->streams.pri_stride;
666 break;
667 case 0x81cc:
668 ret = virge->streams.buffer_ctrl;
669 break;
670 case 0x81d0:
671 ret = virge->streams.sec_fb0;
672 break;
673 case 0x81d4:
674 ret = virge->streams.sec_fb1;
675 break;
676 case 0x81d8:
677 ret = virge->streams.sec_stride;
678 break;
679 case 0x81dc:
680 ret = virge->streams.overlay_ctrl;
681 break;
682 case 0x81e0:
683 ret = virge->streams.k1_vert_scale;
684 break;
685 case 0x81e4:
686 ret = virge->streams.k2_vert_scale;
687 break;
688 case 0x81e8:
689 ret = virge->streams.dda_vert_accumulator;
690 break;
691 case 0x81ec:
692 ret = virge->streams.fifo_ctrl;
693 break;
694 case 0x81f0:
695 ret = virge->streams.pri_start;
696 break;
697 case 0x81f4:
698 ret = virge->streams.pri_size;
699 break;
700 case 0x81f8:
701 ret = virge->streams.sec_start;
702 break;
703 case 0x81fc:
704 ret = virge->streams.sec_size;
705 break;
707 case 0x8504:
708 ret = (0x10 << 8) | (1 << 13);
709 break;
710 case 0xa4d4:
711 ret = virge->s3d.src_base;
712 break;
713 case 0xa4d8:
714 ret = virge->s3d.dest_base;
715 break;
716 case 0xa4dc:
717 ret = (virge->s3d.clip_l << 16) | virge->s3d.clip_r;
718 break;
719 case 0xa4e0:
720 ret = (virge->s3d.clip_t << 16) | virge->s3d.clip_b;
721 break;
722 case 0xa4e4:
723 ret = (virge->s3d.dest_str << 16) | virge->s3d.src_str;
724 break;
725 case 0xa4e8:
726 ret = virge->s3d.mono_pat_0;
727 break;
728 case 0xa4ec:
729 ret = virge->s3d.mono_pat_1;
730 break;
731 case 0xa4f0:
732 ret = virge->s3d.pat_bg_clr;
733 break;
734 case 0xa4f4:
735 ret = virge->s3d.pat_fg_clr;
736 break;
737 case 0xa4f8:
738 ret = virge->s3d.src_bg_clr;
739 break;
740 case 0xa4fc:
741 ret = virge->s3d.src_fg_clr;
742 break;
743 case 0xa500:
744 ret = virge->s3d.cmd_set;
745 break;
746 case 0xa504:
747 ret = (virge->s3d.r_width << 16) | virge->s3d.r_height;
748 break;
749 case 0xa508:
750 ret = (virge->s3d.rsrc_x << 16) | virge->s3d.rsrc_y;
751 break;
752 case 0xa50c:
753 ret = (virge->s3d.rdest_x << 16) | virge->s3d.rdest_y;
754 break;
756 default:
757 ret = s3_virge_mmio_read_w(addr, p) | (s3_virge_mmio_read_w(addr + 2, p) << 16);
758 }
759 // pclog("%02x\n", ret);
760 return ret;
761 }
762 static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p)
763 {
764 virge_t *virge = (virge_t *)p;
765 svga_t *svga = &virge->svga;
767 // pclog("New MMIO writeb %08X %02X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
768 reg_writes++;
769 if ((addr & 0xfffc) < 0x8000)
770 s3_virge_bitblt(virge, 8, val);
771 else switch (addr & 0xffff)
772 {
773 case 0x83b0: case 0x83b1: case 0x83b2: case 0x83b3:
774 case 0x83b4: case 0x83b5: case 0x83b6: case 0x83b7:
775 case 0x83b8: case 0x83b9: case 0x83ba: case 0x83bb:
776 case 0x83bc: case 0x83bd: case 0x83be: case 0x83bf:
777 case 0x83c0: case 0x83c1: case 0x83c2: case 0x83c3:
778 case 0x83c4: case 0x83c5: case 0x83c6: case 0x83c7:
779 case 0x83c8: case 0x83c9: case 0x83ca: case 0x83cb:
780 case 0x83cc: case 0x83cd: case 0x83ce: case 0x83cf:
781 case 0x83d0: case 0x83d1: case 0x83d2: case 0x83d3:
782 case 0x83d4: case 0x83d5: case 0x83d6: case 0x83d7:
783 case 0x83d8: case 0x83d9: case 0x83da: case 0x83db:
784 case 0x83dc: case 0x83dd: case 0x83de: case 0x83df:
785 s3_virge_out(addr & 0x3ff, val, p);
786 break;
787 }
790 }
791 static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p)
792 {
793 virge_t *virge = (virge_t *)p;
794 reg_writes++;
795 // pclog("New MMIO writew %08X %04X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
796 if ((addr & 0xfffc) < 0x8000)
797 {
798 if (virge->s3d.cmd_set & CMD_SET_MS)
799 s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16);
800 else
801 s3_virge_bitblt(virge, 16, val);
802 }
803 else switch (addr & 0xfffe)
804 {
805 case 0x83d4:
806 s3_virge_mmio_write(addr, val, p);
807 s3_virge_mmio_write(addr + 1, val >> 8, p);
808 break;
809 }
810 }
811 static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *p)
812 {
813 virge_t *virge = (virge_t *)p;
814 svga_t *svga = &virge->svga;
815 reg_writes++;
816 // if ((addr & 0xfffc) >= 0xb400 && (addr & 0xfffc) < 0xb800)
817 // pclog("New MMIO writel %08X %08X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
819 if ((addr & 0xfffc) < 0x8000)
820 {
821 if (virge->s3d.cmd_set & CMD_SET_MS)
822 s3_virge_bitblt(virge, 32, ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24));
823 else
824 s3_virge_bitblt(virge, 32, val);
825 }
826 else switch (addr & 0xfffc)
827 {
828 case 0x8180:
829 virge->streams.pri_ctrl = val;
830 s3_virge_recalctimings(svga);
831 svga->fullchange = changeframecount;
832 break;
833 case 0x8184:
834 virge->streams.chroma_ctrl = val;
835 break;
836 case 0x8190:
837 virge->streams.sec_ctrl = val;
838 virge->streams.dda_horiz_accumulator = val & 0xfff;
839 if (val & (1 << 11))
840 virge->streams.dda_horiz_accumulator |= 0xfffff800;
841 virge->streams.sdif = (val >> 24) & 7;
842 break;
843 case 0x8194:
844 virge->streams.chroma_upper_bound = val;
845 break;
846 case 0x8198:
847 virge->streams.sec_filter = val;
848 virge->streams.k1_horiz_scale = val & 0x7ff;
849 if (val & (1 << 10))
850 virge->streams.k1_horiz_scale |= 0xfffff800;
851 virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff;
852 if ((val >> 16) & (1 << 10))
853 virge->streams.k2_horiz_scale |= 0xfffff800;
854 break;
855 case 0x81a0:
856 virge->streams.blend_ctrl = val;
857 break;
858 case 0x81c0:
859 virge->streams.pri_fb0 = val & 0x3fffff;
860 s3_virge_recalctimings(svga);
861 svga->fullchange = changeframecount;
862 break;
863 case 0x81c4:
864 virge->streams.pri_fb1 = val & 0x3fffff;
865 s3_virge_recalctimings(svga);
866 svga->fullchange = changeframecount;
867 break;
868 case 0x81c8:
869 virge->streams.pri_stride = val & 0xfff;
870 s3_virge_recalctimings(svga);
871 svga->fullchange = changeframecount;
872 break;
873 case 0x81cc:
874 pclog("Write buffer_ctrl %08x\n", val);
875 virge->streams.buffer_ctrl = val;
876 s3_virge_recalctimings(svga);
877 break;
878 case 0x81d0:
879 virge->streams.sec_fb0 = val;
880 s3_virge_recalctimings(svga);
881 svga->fullchange = changeframecount;
882 break;
883 case 0x81d4:
884 virge->streams.sec_fb1 = val;
885 s3_virge_recalctimings(svga);
886 svga->fullchange = changeframecount;
887 break;
888 case 0x81d8:
889 virge->streams.sec_stride = val;
890 s3_virge_recalctimings(svga);
891 svga->fullchange = changeframecount;
892 break;
893 case 0x81dc:
894 virge->streams.overlay_ctrl = val;
895 break;
896 case 0x81e0:
897 virge->streams.k1_vert_scale = val & 0x7ff;
898 if (val & (1 << 10))
899 virge->streams.k1_vert_scale |= 0xfffff800;
900 break;
901 case 0x81e4:
902 virge->streams.k2_vert_scale = val & 0x7ff;
903 if (val & (1 << 10))
904 virge->streams.k2_vert_scale |= 0xfffff800;
905 break;
906 case 0x81e8:
907 virge->streams.dda_vert_accumulator = val & 0xfff;
908 if (val & (1 << 11))
909 virge->streams.dda_vert_accumulator |= 0xfffff800;
910 break;
911 case 0x81ec:
912 virge->streams.fifo_ctrl = val;
913 break;
914 case 0x81f0:
915 virge->streams.pri_start = val;
916 virge->streams.pri_x = (val >> 16) & 0x7ff;
917 virge->streams.pri_y = val & 0x7ff;
918 s3_virge_recalctimings(svga);
919 svga->fullchange = changeframecount;
920 break;
921 case 0x81f4:
922 virge->streams.pri_size = val;
923 virge->streams.pri_w = (val >> 16) & 0x7ff;
924 virge->streams.pri_h = val & 0x7ff;
925 s3_virge_recalctimings(svga);
926 svga->fullchange = changeframecount;
927 break;
928 case 0x81f8:
929 virge->streams.sec_start = val;
930 virge->streams.sec_x = (val >> 16) & 0x7ff;
931 virge->streams.sec_y = val & 0x7ff;
932 s3_virge_recalctimings(svga);
933 svga->fullchange = changeframecount;
934 break;
935 case 0x81fc:
936 virge->streams.sec_size = val;
937 virge->streams.sec_w = (val >> 16) & 0x7ff;
938 virge->streams.sec_h = val & 0x7ff;
939 s3_virge_recalctimings(svga);
940 svga->fullchange = changeframecount;
941 break;
943 case 0xa000: case 0xa004: case 0xa008: case 0xa00c:
944 case 0xa010: case 0xa014: case 0xa018: case 0xa01c:
945 case 0xa020: case 0xa024: case 0xa028: case 0xa02c:
946 case 0xa030: case 0xa034: case 0xa038: case 0xa03c:
947 case 0xa040: case 0xa044: case 0xa048: case 0xa04c:
948 case 0xa050: case 0xa054: case 0xa058: case 0xa05c:
949 case 0xa060: case 0xa064: case 0xa068: case 0xa06c:
950 case 0xa070: case 0xa074: case 0xa078: case 0xa07c:
951 case 0xa080: case 0xa084: case 0xa088: case 0xa08c:
952 case 0xa090: case 0xa094: case 0xa098: case 0xa09c:
953 case 0xa0a0: case 0xa0a4: case 0xa0a8: case 0xa0ac:
954 case 0xa0b0: case 0xa0b4: case 0xa0b8: case 0xa0bc:
955 case 0xa0c0: case 0xa0c4: case 0xa0c8: case 0xa0cc:
956 case 0xa0d0: case 0xa0d4: case 0xa0d8: case 0xa0dc:
957 case 0xa0e0: case 0xa0e4: case 0xa0e8: case 0xa0ec:
958 case 0xa0f0: case 0xa0f4: case 0xa0f8: case 0xa0fc:
959 case 0xa100: case 0xa104: case 0xa108: case 0xa10c:
960 case 0xa110: case 0xa114: case 0xa118: case 0xa11c:
961 case 0xa120: case 0xa124: case 0xa128: case 0xa12c:
962 case 0xa130: case 0xa134: case 0xa138: case 0xa13c:
963 case 0xa140: case 0xa144: case 0xa148: case 0xa14c:
964 case 0xa150: case 0xa154: case 0xa158: case 0xa15c:
965 case 0xa160: case 0xa164: case 0xa168: case 0xa16c:
966 case 0xa170: case 0xa174: case 0xa178: case 0xa17c:
967 case 0xa180: case 0xa184: case 0xa188: case 0xa18c:
968 case 0xa190: case 0xa194: case 0xa198: case 0xa19c:
969 case 0xa1a0: case 0xa1a4: case 0xa1a8: case 0xa1ac:
970 case 0xa1b0: case 0xa1b4: case 0xa1b8: case 0xa1bc:
971 case 0xa1c0: case 0xa1c4: case 0xa1c8: case 0xa1cc:
972 case 0xa1d0: case 0xa1d4: case 0xa1d8: case 0xa1dc:
973 case 0xa1e0: case 0xa1e4: case 0xa1e8: case 0xa1ec:
974 case 0xa1f0: case 0xa1f4: case 0xa1f8: case 0xa1fc:
975 {
976 int x = addr & 4;
977 int y = (addr >> 3) & 7;
978 virge->s3d.pattern_8[y*8 + x] = val & 0xff;
979 virge->s3d.pattern_8[y*8 + x + 1] = val >> 8;
980 virge->s3d.pattern_8[y*8 + x + 2] = val >> 16;
981 virge->s3d.pattern_8[y*8 + x + 3] = val >> 24;
983 x = (addr >> 1) & 6;
984 y = (addr >> 4) & 7;
985 virge->s3d.pattern_16[y*8 + x] = val & 0xffff;
986 virge->s3d.pattern_16[y*8 + x + 1] = val >> 16;
988 x = (addr >> 2) & 7;
989 y = (addr >> 5) & 7;
990 virge->s3d.pattern_32[y*8 + x] = val & 0xffffff;
991 }
992 break;
995 case 0xa4d4: case 0xa8d4:
996 virge->s3d.src_base = val & 0x3ffff8;
997 break;
998 case 0xa4d8: case 0xa8d8: case 0xb4d8:
999 virge->s3d.dest_base = val & 0x3ffff8;
1000 break;
1001 case 0xa4dc: case 0xa8dc: case 0xb4dc:
1002 virge->s3d.clip_l = (val >> 16) & 0x7ff;
1003 virge->s3d.clip_r = val & 0x7ff;
1004 break;
1005 case 0xa4e0: case 0xa8e0: case 0xb4e0:
1006 virge->s3d.clip_t = (val >> 16) & 0x7ff;
1007 virge->s3d.clip_b = val & 0x7ff;
1008 break;
1009 case 0xa4e4: case 0xa8e4: case 0xb4e4:
1010 virge->s3d.dest_str = (val >> 16) & 0xff8;
1011 virge->s3d.src_str = val & 0xff8;
1012 break;
1013 case 0xa4e8:
1014 virge->s3d.mono_pat_0 = val;
1015 break;
1016 case 0xa4ec:
1017 virge->s3d.mono_pat_1 = val;
1018 break;
1019 case 0xa4f0:
1020 virge->s3d.pat_bg_clr = val;
1021 break;
1022 case 0xa4f4: case 0xa8f4:
1023 virge->s3d.pat_fg_clr = val;
1024 break;
1025 case 0xa4f8:
1026 virge->s3d.src_bg_clr = val;
1027 break;
1028 case 0xa4fc:
1029 virge->s3d.src_fg_clr = val;
1030 break;
1031 case 0xa500: case 0xa900:
1032 virge->s3d.cmd_set = val;
1033 if (!(val & CMD_SET_AE))
1034 s3_virge_bitblt(virge, -1, 0);
1035 break;
1036 case 0xa504:
1037 virge->s3d.r_width = (val >> 16) & 0x7ff;
1038 virge->s3d.r_height = val & 0x7ff;
1039 break;
1040 case 0xa508:
1041 virge->s3d.rsrc_x = (val >> 16) & 0x7ff;
1042 virge->s3d.rsrc_y = val & 0x7ff;
1043 break;
1044 case 0xa50c:
1045 virge->s3d.rdest_x = (val >> 16) & 0x7ff;
1046 virge->s3d.rdest_y = val & 0x7ff;
1047 if (virge->s3d.cmd_set & CMD_SET_AE)
1048 s3_virge_bitblt(virge, -1, 0);
1049 break;
1050 case 0xa96c:
1051 virge->s3d.lxend0 = (val >> 16) & 0x7ff;
1052 virge->s3d.lxend1 = val & 0x7ff;
1053 break;
1054 case 0xa970:
1055 virge->s3d.ldx = (int32_t)val;
1056 break;
1057 case 0xa974:
1058 virge->s3d.lxstart = val;
1059 break;
1060 case 0xa978:
1061 virge->s3d.lystart = val & 0x7ff;
1062 break;
1063 case 0xa97c:
1064 virge->s3d.lycnt = val & 0x7ff;
1065 virge->s3d.line_dir = val >> 31;
1066 if (virge->s3d.cmd_set & CMD_SET_AE)
1067 s3_virge_bitblt(virge, -1, 0);
1068 break;
1070 case 0xb4d4:
1071 virge->s3d.z_base = val & 0x3ffff8;
1072 break;
1073 case 0xb4e8:
1074 virge->s3d.z_str = val & 0xff8;
1075 break;
1076 case 0xb4ec:
1077 virge->s3d.tex_base = val & 0x3ffff8;
1078 break;
1079 case 0xb4f0:
1080 virge->s3d.tex_bdr_clr = val & 0xffffff;
1081 break;
1082 case 0xb500:
1083 virge->s3d.cmd_set = val;
1084 if (!(val & CMD_SET_AE))
1085 s3_virge_triangle(virge);
1086 break;
1087 case 0xb504:
1088 virge->s3d.tbv = val & 0xfffff;
1089 break;
1090 case 0xb508:
1091 virge->s3d.tbu = val & 0xfffff;
1092 break;
1093 case 0xb50c:
1094 virge->s3d.TdWdX = val;
1095 break;
1096 case 0xb510:
1097 virge->s3d.TdWdY = val;
1098 break;
1099 case 0xb514:
1100 virge->s3d.tws = val;
1101 break;
1102 case 0xb518:
1103 virge->s3d.TdDdX = val;
1104 break;
1105 case 0xb51c:
1106 virge->s3d.TdVdX = val;
1107 break;
1108 case 0xb520:
1109 virge->s3d.TdUdX = val;
1110 break;
1111 case 0xb524:
1112 virge->s3d.TdDdY = val;
1113 break;
1114 case 0xb528:
1115 virge->s3d.TdVdY = val;
1116 break;
1117 case 0xb52c:
1118 virge->s3d.TdUdY = val;
1119 break;
1120 case 0xb530:
1121 virge->s3d.tds = val;
1122 break;
1123 case 0xb534:
1124 virge->s3d.tvs = val;
1125 break;
1126 case 0xb538:
1127 virge->s3d.tus = val;
1128 break;
1129 case 0xb53c:
1130 virge->s3d.TdGdX = val >> 16;
1131 virge->s3d.TdBdX = val & 0xffff;
1132 break;
1133 case 0xb540:
1134 virge->s3d.TdAdX = val >> 16;
1135 virge->s3d.TdRdX = val & 0xffff;
1136 break;
1137 case 0xb544:
1138 virge->s3d.TdGdY = val >> 16;
1139 virge->s3d.TdBdY = val & 0xffff;
1140 break;
1141 case 0xb548:
1142 virge->s3d.TdAdY = val >> 16;
1143 virge->s3d.TdRdY = val & 0xffff;
1144 break;
1145 case 0xb54c:
1146 virge->s3d.tgs = (val >> 16) & 0xffff;
1147 virge->s3d.tbs = val & 0xffff;
1148 break;
1149 case 0xb550:
1150 virge->s3d.tas = (val >> 16) & 0xffff;
1151 virge->s3d.trs = val & 0xffff;
1152 break;
1154 case 0xb554:
1155 virge->s3d.TdZdX = val;
1156 break;
1157 case 0xb558:
1158 virge->s3d.TdZdY = val;
1159 break;
1160 case 0xb55c:
1161 virge->s3d.tzs = val;
1162 break;
1163 case 0xb560:
1164 virge->s3d.TdXdY12 = val;
1165 break;
1166 case 0xb564:
1167 virge->s3d.txend12 = val;
1168 break;
1169 case 0xb568:
1170 virge->s3d.TdXdY01 = val;
1171 break;
1172 case 0xb56c:
1173 virge->s3d.txend01 = val;
1174 break;
1175 case 0xb570:
1176 virge->s3d.TdXdY02 = val;
1177 break;
1178 case 0xb574:
1179 virge->s3d.txs = val;
1180 break;
1181 case 0xb578:
1182 virge->s3d.tys = val;
1183 break;
1184 case 0xb57c:
1185 virge->s3d.ty01 = (val >> 16) & 0x7ff;
1186 virge->s3d.ty12 = val & 0x7ff;
1187 virge->s3d.tlr = val >> 31;
1188 if (virge->s3d.cmd_set & CMD_SET_AE)
1189 s3_virge_triangle(virge);
1190 break;
1191 }
1192 }
1194 #define READ(addr, val) \
1195 do \
1196 { \
1197 switch (bpp) \
1198 { \
1199 case 0: /*8 bpp*/ \
1200 val = vram[addr & 0x3fffff]; \
1201 break; \
1202 case 1: /*16 bpp*/ \
1203 val = *(uint16_t *)&vram[addr & 0x3fffff]; \
1204 break; \
1205 case 2: /*24 bpp*/ \
1206 val = (*(uint32_t *)&vram[addr & 0x3fffff]) & 0xffffff; \
1207 break; \
1208 } \
1209 } while (0)
1211 #define Z_READ(addr) *(uint16_t *)&vram[addr & 0x3fffff]
1213 #define Z_WRITE(addr, val) if (!(virge->s3d.cmd_set & CMD_SET_ZB_MODE)) *(uint16_t *)&vram[addr & 0x3fffff] = val
1215 #define CLIP(x, y) \
1216 do \
1217 { \
1218 if ((virge->s3d.cmd_set & CMD_SET_HC) && \
1219 (x < virge->s3d.clip_l || \
1220 x > virge->s3d.clip_r || \
1221 y < virge->s3d.clip_t || \
1222 y > virge->s3d.clip_b)) \
1223 update = 0; \
1224 } while (0)
1226 #define Z_CLIP(Zzb, Zs) \
1227 do \
1228 { \
1229 if (!(virge->s3d.cmd_set & CMD_SET_ZB_MODE)) \
1230 switch ((virge->s3d.cmd_set >> 20) & 7) \
1231 { \
1232 case 0: update = 0; break; \
1233 case 1: if (Zs <= Zzb) update = 0; else Zzb = Zs; break; \
1234 case 2: if (Zs != Zzb) update = 0; else Zzb = Zs; break; \
1235 case 3: if (Zs < Zzb) update = 0; else Zzb = Zs; break; \
1236 case 4: if (Zs >= Zzb) update = 0; else Zzb = Zs; break; \
1237 case 5: if (Zs == Zzb) update = 0; else Zzb = Zs; break; \
1238 case 6: if (Zs > Zzb) update = 0; else Zzb = Zs; break; \
1239 case 7: update = 1; Zzb = Zs; break; \
1240 } \
1241 } while (0)
1243 #define MIX() \
1244 do \
1245 { \
1246 int c; \
1247 for (c = 0; c < 24; c++) \
1248 { \
1249 int d = (dest & (1 << c)) ? 1 : 0; \
1250 if (source & (1 << c)) d |= 2; \
1251 if (pattern & (1 << c)) d |= 4; \
1252 if (virge->s3d.rop & (1 << d)) out |= (1 << c); \
1253 } \
1254 } while (0)
1256 #define WRITE(addr, val) \
1257 do \
1258 { \
1259 switch (bpp) \
1260 { \
1261 case 0: /*8 bpp*/ \
1262 vram[addr & 0x3fffff] = val; \
1263 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1264 break; \
1265 case 1: /*16 bpp*/ \
1266 *(uint16_t *)&vram[addr & 0x3fffff] = val; \
1267 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1268 break; \
1269 case 2: /*24 bpp*/ \
1270 *(uint32_t *)&vram[addr & 0x3fffff] = (val & 0xffffff) | \
1271 (vram[(addr + 3) & 0x3fffff] << 24); \
1272 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1273 break; \
1274 } \
1275 } while (0)
1277 static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat)
1278 {
1279 int cpu_input = (count != -1);
1280 uint8_t *vram = virge->svga.vram;
1281 uint32_t mono_pattern[64];
1282 int count_mask;
1283 int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1;
1284 int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1;
1285 int bpp;
1286 int x_mul;
1287 int cpu_dat_shift;
1288 uint32_t *pattern_data;
1290 switch (virge->s3d.cmd_set & CMD_SET_FORMAT_MASK)
1291 {
1292 case CMD_SET_FORMAT_8:
1293 bpp = 0;
1294 x_mul = 1;
1295 cpu_dat_shift = 8;
1296 pattern_data = virge->s3d.pattern_8;
1297 break;
1298 case CMD_SET_FORMAT_16:
1299 bpp = 1;
1300 x_mul = 2;
1301 cpu_dat_shift = 16;
1302 pattern_data = virge->s3d.pattern_16;
1303 break;
1304 case CMD_SET_FORMAT_24:
1305 default:
1306 bpp = 2;
1307 x_mul = 3;
1308 cpu_dat_shift = 24;
1309 pattern_data = virge->s3d.pattern_32;
1310 break;
1311 }
1312 if (virge->s3d.cmd_set & CMD_SET_MP)
1313 pattern_data = mono_pattern;
1315 switch (virge->s3d.cmd_set & CMD_SET_ITA_MASK)
1316 {
1317 case CMD_SET_ITA_BYTE:
1318 count_mask = ~0x7;
1319 break;
1320 case CMD_SET_ITA_WORD:
1321 count_mask = ~0xf;
1322 break;
1323 case CMD_SET_ITA_DWORD:
1324 default:
1325 count_mask = ~0x1f;
1326 break;
1327 }
1328 if (virge->s3d.cmd_set & CMD_SET_MP)
1329 {
1330 int x, y;
1331 for (y = 0; y < 4; y++)
1332 {
1333 for (x = 0; x < 8; x++)
1334 {
1335 if (virge->s3d.mono_pat_0 & (1 << (x + y*8)))
1336 mono_pattern[y*8 + x] = virge->s3d.pat_fg_clr;
1337 else
1338 mono_pattern[y*8 + x] = virge->s3d.pat_bg_clr;
1339 if (virge->s3d.mono_pat_1 & (1 << (x + y*8)))
1340 mono_pattern[(y+4)*8 + x] = virge->s3d.pat_fg_clr;
1341 else
1342 mono_pattern[(y+4)*8 + x] = virge->s3d.pat_bg_clr;
1343 }
1344 }
1345 }
1346 switch (virge->s3d.cmd_set & CMD_SET_COMMAND_MASK)
1347 {
1348 case CMD_SET_COMMAND_NOP:
1349 break;
1351 case CMD_SET_COMMAND_BITBLT:
1352 if (count == -1)
1353 {
1354 virge->s3d.src_x = virge->s3d.rsrc_x;
1355 virge->s3d.src_y = virge->s3d.rsrc_y;
1356 virge->s3d.dest_x = virge->s3d.rdest_x;
1357 virge->s3d.dest_y = virge->s3d.rdest_y;
1358 virge->s3d.w = virge->s3d.r_width;
1359 virge->s3d.h = virge->s3d.r_height;
1360 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1361 virge->s3d.data_left_count = 0;
1363 /* pclog("BitBlt start %i,%i %i,%i %i,%i %02X %x %x\n",
1364 virge->s3d.src_x,
1365 virge->s3d.src_y,
1366 virge->s3d.dest_x,
1367 virge->s3d.dest_y,
1368 virge->s3d.w,
1369 virge->s3d.h,
1370 virge->s3d.rop,
1371 virge->s3d.src_base,
1372 virge->s3d.dest_base);*/
1374 if (virge->s3d.cmd_set & CMD_SET_IDS)
1375 return;
1376 }
1377 if (!virge->s3d.h)
1378 return;
1379 while (count)
1380 {
1381 uint32_t src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + (virge->s3d.src_y * virge->s3d.src_str);
1382 uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1383 uint32_t source, dest, pattern;
1384 uint32_t out = 0;
1385 int update = 1;
1387 switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS))
1388 {
1389 case 0:
1390 case CMD_SET_MS:
1391 READ(src_addr, source);
1392 if ((virge->s3d.cmd_set & CMD_SET_TP) && source == virge->s3d.src_fg_clr)
1393 update = 0;
1394 break;
1395 case CMD_SET_IDS:
1396 if (virge->s3d.data_left_count)
1397 {
1398 /*Handle shifting for 24-bit data*/
1399 source = virge->s3d.data_left;
1400 source |= ((cpu_dat << virge->s3d.data_left_count) & ~0xff000000);
1401 cpu_dat >>= (cpu_dat_shift - virge->s3d.data_left_count);
1402 count -= (cpu_dat_shift - virge->s3d.data_left_count);
1403 virge->s3d.data_left_count = 0;
1404 if (count < cpu_dat_shift)
1405 {
1406 virge->s3d.data_left = cpu_dat;
1407 virge->s3d.data_left_count = count;
1408 count = 0;
1409 }
1410 }
1411 else
1412 {
1413 source = cpu_dat;
1414 cpu_dat >>= cpu_dat_shift;
1415 count -= cpu_dat_shift;
1416 if (count < cpu_dat_shift)
1417 {
1418 virge->s3d.data_left = cpu_dat;
1419 virge->s3d.data_left_count = count;
1420 count = 0;
1421 }
1422 }
1423 if ((virge->s3d.cmd_set & CMD_SET_TP) && source == virge->s3d.src_fg_clr)
1424 update = 0;
1425 break;
1426 case CMD_SET_IDS | CMD_SET_MS:
1427 source = (cpu_dat & (1 << 31)) ? virge->s3d.src_fg_clr : virge->s3d.src_bg_clr;
1428 if ((virge->s3d.cmd_set & CMD_SET_TP) && !(cpu_dat & (1 << 31)))
1429 update = 0;
1430 cpu_dat <<= 1;
1431 count--;
1432 break;
1433 }
1435 CLIP(virge->s3d.dest_x, virge->s3d.dest_y);
1437 if (update)
1438 {
1439 READ(dest_addr, dest);
1440 pattern = pattern_data[(virge->s3d.dest_y & 7)*8 + (virge->s3d.dest_x & 7)];
1441 MIX();
1443 WRITE(dest_addr, out);
1444 }
1446 virge->s3d.src_x += x_inc;
1447 virge->s3d.dest_x += x_inc;
1448 if (!virge->s3d.w)
1449 {
1450 virge->s3d.src_x = virge->s3d.rsrc_x;
1451 virge->s3d.dest_x = virge->s3d.rdest_x;
1452 virge->s3d.w = virge->s3d.r_width;
1454 virge->s3d.src_y += y_inc;
1455 virge->s3d.dest_y += y_inc;
1456 virge->s3d.h--;
1458 switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS))
1459 {
1460 case CMD_SET_IDS:
1461 cpu_dat >>= (count - (count & count_mask));
1462 count &= count_mask;
1463 virge->s3d.data_left_count = 0;
1464 break;
1466 case CMD_SET_IDS | CMD_SET_MS:
1467 cpu_dat <<= (count - (count & count_mask));
1468 count &= count_mask;
1469 break;
1470 }
1471 if (!virge->s3d.h)
1472 {
1473 return;
1474 }
1475 }
1476 else
1477 virge->s3d.w--;
1478 }
1479 break;
1481 case CMD_SET_COMMAND_RECTFILL:
1482 /*No source, pattern = pat_fg_clr*/
1483 if (count == -1)
1484 {
1485 virge->s3d.src_x = virge->s3d.rsrc_x;
1486 virge->s3d.src_y = virge->s3d.rsrc_y;
1487 virge->s3d.dest_x = virge->s3d.rdest_x;
1488 virge->s3d.dest_y = virge->s3d.rdest_y;
1489 virge->s3d.w = virge->s3d.r_width;
1490 virge->s3d.h = virge->s3d.r_height;
1491 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1493 /* pclog("RctFll start %i,%i %i,%i %02X %08x\n", virge->s3d.dest_x,
1494 virge->s3d.dest_y,
1495 virge->s3d.w,
1496 virge->s3d.h,
1497 virge->s3d.rop, virge->s3d.dest_base);*/
1498 }
1500 while (count)
1501 {
1502 uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1503 uint32_t source = 0, dest, pattern = virge->s3d.pat_fg_clr;
1504 uint32_t out = 0;
1505 int update = 1;
1507 CLIP(virge->s3d.dest_x, virge->s3d.dest_y);
1509 if (update)
1510 {
1511 READ(dest_addr, dest);
1513 MIX();
1515 WRITE(dest_addr, out);
1516 }
1518 virge->s3d.src_x += x_inc;
1519 virge->s3d.dest_x += x_inc;
1520 if (!virge->s3d.w)
1521 {
1522 virge->s3d.src_x = virge->s3d.rsrc_x;
1523 virge->s3d.dest_x = virge->s3d.rdest_x;
1524 virge->s3d.w = virge->s3d.r_width;
1526 virge->s3d.src_y += y_inc;
1527 virge->s3d.dest_y += y_inc;
1528 virge->s3d.h--;
1529 if (!virge->s3d.h)
1530 {
1531 return;
1532 }
1533 }
1534 else
1535 virge->s3d.w--;
1536 count--;
1537 }
1538 break;
1540 case CMD_SET_COMMAND_LINE:
1541 if (count == -1)
1542 {
1543 virge->s3d.dest_x = virge->s3d.lxstart;
1544 virge->s3d.dest_y = virge->s3d.lystart;
1545 virge->s3d.h = virge->s3d.lycnt;
1546 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1547 if (virge->s3d.ldx >= 0)
1548 virge->s3d.dest_x -= virge->s3d.ldx / 2;
1549 else
1550 virge->s3d.dest_x += virge->s3d.ldx / 2;
1551 //virge->s3d.dest_dest_x = virge->s3d.dest_x + virge->s3d.ldx;
1552 }
1553 while (virge->s3d.h)
1554 {
1555 int x = virge->s3d.dest_x >> 20;
1556 int new_x = (virge->s3d.dest_x + virge->s3d.ldx) >> 20;
1558 do
1559 {
1560 uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1561 uint32_t source = 0, dest, pattern;
1562 uint32_t out = 0;
1563 int update = 1;
1565 CLIP(x, virge->s3d.dest_y);
1567 if (update)
1568 {
1569 READ(dest_addr, dest);
1570 pattern = virge->s3d.pat_fg_clr;
1572 MIX();
1574 WRITE(dest_addr, out);
1575 }
1577 if (x < new_x)
1578 x++;
1579 else if (x > new_x)
1580 x--;
1581 } while (x != new_x);
1583 virge->s3d.dest_x += virge->s3d.ldx;
1584 virge->s3d.dest_y--;
1585 virge->s3d.h--;
1586 }
1587 break;
1589 default:
1590 fatal("s3_virge_bitblt : blit command %i %08x\n", (virge->s3d.cmd_set >> 27) & 0xf, virge->s3d.cmd_set);
1591 }
1592 }
1594 #define RGB15_TO_24(val, r, g, b) b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \
1595 g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \
1596 r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12);
1598 #define RGB24_TO_24(val, r, g, b) b = val & 0xff; \
1599 g = (val & 0xff00) >> 8; \
1600 r = (val & 0xff0000) >> 16
1602 #define RGB15(r, g, b, dest) \
1603 if (virge->dithering_enabled) \
1604 { \
1605 int add = dither[_y & 3][_x & 3]; \
1606 int _r = (r > 248) ? 248 : r+add; \
1607 int _g = (g > 248) ? 248 : g+add; \
1608 int _b = (b > 248) ? 248 : b+add; \
1609 dest = ((_b >> 3) & 0x1f) | (((_g >> 3) & 0x1f) << 5) | (((_r >> 3) & 0x1f) << 10); \
1610 } \
1611 else \
1612 dest = ((b >> 3) & 0x1f) | (((g >> 3) & 0x1f) << 5) | (((r >> 3) & 0x1f) << 10)
1614 #define RGB24(r, g, b) ((b) | ((g) << 8) | ((r) << 16))
1616 typedef struct rgba_t
1617 {
1618 int r, g, b, a;
1619 } rgba_t;
1621 typedef struct s3d_state_t
1622 {
1623 int32_t r, g, b, a, u, v, d, w;
1625 int32_t base_r, base_g, base_b, base_a, base_u, base_v, base_d, base_w;
1627 uint32_t base_z;
1629 uint32_t tbu, tbv;
1631 uint32_t cmd_set;
1632 int max_d;
1634 uint16_t *texture[10];
1636 uint32_t tex_bdr_clr;
1638 int32_t x1, x2;
1639 int y;
1641 rgba_t dest_rgba;
1642 } s3d_state_t;
1644 typedef struct s3d_texture_state_t
1645 {
1646 int level;
1647 int texture_shift;
1649 int32_t u, v;
1650 } s3d_texture_state_t;
1652 static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out);
1653 static void (*tex_sample)(s3d_state_t *state);
1654 static void (*dest_pixel)(s3d_state_t *state);
1656 #define MAX(a, b) ((a) > (b) ? (a) : (b))
1657 #define MIN(a, b) ((a) < (b) ? (a) : (b))
1659 static int _x, _y;
1661 static void tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1662 {
1663 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1664 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1665 uint16_t val = state->texture[texture_state->level][offset];
1667 out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12);
1668 out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7);
1669 out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2);
1670 out->a = (val & 0x8000) ? 0xff : 0;
1671 }
1673 static void tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1674 {
1675 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1676 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1677 uint16_t val = state->texture[texture_state->level][offset];
1679 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1680 val = state->tex_bdr_clr;
1682 out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12);
1683 out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7);
1684 out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2);
1685 out->a = (val & 0x8000) ? 0xff : 0;
1686 }
1688 static void tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1689 {
1690 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1691 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1692 uint16_t val = state->texture[texture_state->level][offset];
1694 out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8);
1695 out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4);
1696 out->b = ((val & 0x000f) << 4) | (val & 0x000f);
1697 out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12);
1698 }
1700 static void tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1701 {
1702 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1703 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1704 uint16_t val = state->texture[texture_state->level][offset];
1706 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1707 val = state->tex_bdr_clr;
1709 out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8);
1710 out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4);
1711 out->b = ((val & 0x000f) << 4) | (val & 0x000f);
1712 out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12);
1713 }
1715 static void tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1716 {
1717 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1718 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1719 uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset];
1721 out->r = (val >> 16) & 0xff;
1722 out->g = (val >> 8) & 0xff;
1723 out->b = val & 0xff;
1724 out->a = (val >> 24) & 0xff;
1725 }
1726 static void tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1727 {
1728 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1729 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1730 uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset];
1732 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1733 val = state->tex_bdr_clr;
1735 out->r = (val >> 16) & 0xff;
1736 out->g = (val >> 8) & 0xff;
1737 out->b = val & 0xff;
1738 out->a = (val >> 24) & 0xff;
1739 }
1741 static void tex_sample_normal(s3d_state_t *state)
1742 {
1743 s3d_texture_state_t texture_state;
1745 texture_state.level = state->max_d;
1746 texture_state.texture_shift = 18 + (9 - texture_state.level);
1747 texture_state.u = state->u + state->tbu;
1748 texture_state.v = state->v + state->tbv;
1750 tex_read(state, &texture_state, &state->dest_rgba);
1751 }
1753 static void tex_sample_normal_filter(s3d_state_t *state)
1754 {
1755 s3d_texture_state_t texture_state;
1756 int tex_offset;
1757 rgba_t tex_samples[4];
1758 int du, dv;
1759 int d[4];
1761 texture_state.level = state->max_d;
1762 texture_state.texture_shift = 18 + (9 - texture_state.level);
1763 tex_offset = 1 << texture_state.texture_shift;
1765 texture_state.u = state->u + state->tbu;
1766 texture_state.v = state->v + state->tbv;
1767 tex_read(state, &texture_state, &tex_samples[0]);
1768 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
1769 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
1771 texture_state.u = state->u + state->tbu + tex_offset;
1772 texture_state.v = state->v + state->tbv;
1773 tex_read(state, &texture_state, &tex_samples[1]);
1775 texture_state.u = state->u + state->tbu;
1776 texture_state.v = state->v + state->tbv + tex_offset;
1777 tex_read(state, &texture_state, &tex_samples[2]);
1779 texture_state.u = state->u + state->tbu + tex_offset;
1780 texture_state.v = state->v + state->tbv + tex_offset;
1781 tex_read(state, &texture_state, &tex_samples[3]);
1783 d[0] = (256 - du) * (256 - dv);
1784 d[1] = du * (256 - dv);
1785 d[2] = (256 - du) * dv;
1786 d[3] = du * dv;
1788 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1789 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1790 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1791 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1792 }
1794 static void tex_sample_mipmap(s3d_state_t *state)
1795 {
1796 s3d_texture_state_t texture_state;
1798 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
1799 if (texture_state.level < 0)
1800 texture_state.level = 0;
1801 texture_state.texture_shift = 18 + (9 - texture_state.level);
1802 texture_state.u = state->u + state->tbu;
1803 texture_state.v = state->v + state->tbv;
1805 tex_read(state, &texture_state, &state->dest_rgba);
1806 }
1808 static void tex_sample_mipmap_filter(s3d_state_t *state)
1809 {
1810 s3d_texture_state_t texture_state;
1811 int tex_offset;
1812 rgba_t tex_samples[4];
1813 int du, dv;
1814 int d[4];
1816 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
1817 if (texture_state.level < 0)
1818 texture_state.level = 0;
1819 texture_state.texture_shift = 18 + (9 - texture_state.level);
1820 tex_offset = 1 << texture_state.texture_shift;
1822 texture_state.u = state->u + state->tbu;
1823 texture_state.v = state->v + state->tbv;
1824 tex_read(state, &texture_state, &tex_samples[0]);
1825 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
1826 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
1828 texture_state.u = state->u + state->tbu + tex_offset;
1829 texture_state.v = state->v + state->tbv;
1830 tex_read(state, &texture_state, &tex_samples[1]);
1832 texture_state.u = state->u + state->tbu;
1833 texture_state.v = state->v + state->tbv + tex_offset;
1834 tex_read(state, &texture_state, &tex_samples[2]);
1836 texture_state.u = state->u + state->tbu + tex_offset;
1837 texture_state.v = state->v + state->tbv + tex_offset;
1838 tex_read(state, &texture_state, &tex_samples[3]);
1840 d[0] = (256 - du) * (256 - dv);
1841 d[1] = du * (256 - dv);
1842 d[2] = (256 - du) * dv;
1843 d[3] = du * dv;
1845 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1846 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1847 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1848 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1849 }
1851 static void tex_sample_persp_normal(s3d_state_t *state)
1852 {
1853 s3d_texture_state_t texture_state;
1854 int32_t w = 0;
1856 if (state->w)
1857 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1859 texture_state.level = state->max_d;
1860 texture_state.texture_shift = 18 + (9 - texture_state.level);
1861 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
1862 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
1864 tex_read(state, &texture_state, &state->dest_rgba);
1865 }
1867 static void tex_sample_persp_normal_filter(s3d_state_t *state)
1868 {
1869 s3d_texture_state_t texture_state;
1870 int32_t w = 0, u, v;
1871 int tex_offset;
1872 rgba_t tex_samples[4];
1873 int du, dv;
1874 int d[4];
1876 if (state->w)
1877 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1879 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
1880 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
1882 texture_state.level = state->max_d;
1883 texture_state.texture_shift = 18 + (9 - texture_state.level);
1884 tex_offset = 1 << texture_state.texture_shift;
1886 texture_state.u = u;
1887 texture_state.v = v;
1888 tex_read(state, &texture_state, &tex_samples[0]);
1889 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
1890 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
1892 texture_state.u = u + tex_offset;
1893 texture_state.v = v;
1894 tex_read(state, &texture_state, &tex_samples[1]);
1896 texture_state.u = u;
1897 texture_state.v = v + tex_offset;
1898 tex_read(state, &texture_state, &tex_samples[2]);
1900 texture_state.u = u + tex_offset;
1901 texture_state.v = v + tex_offset;
1902 tex_read(state, &texture_state, &tex_samples[3]);
1904 d[0] = (256 - du) * (256 - dv);
1905 d[1] = du * (256 - dv);
1906 d[2] = (256 - du) * dv;
1907 d[3] = du * dv;
1909 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1910 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1911 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1912 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1913 }
1915 static void tex_sample_persp_normal_375(s3d_state_t *state)
1916 {
1917 s3d_texture_state_t texture_state;
1918 int32_t w = 0;
1920 if (state->w)
1921 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1923 texture_state.level = state->max_d;
1924 texture_state.texture_shift = 18 + (9 - texture_state.level);
1925 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
1926 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
1928 tex_read(state, &texture_state, &state->dest_rgba);
1929 }
1931 static void tex_sample_persp_normal_filter_375(s3d_state_t *state)
1932 {
1933 s3d_texture_state_t texture_state;
1934 int32_t w = 0, u, v;
1935 int tex_offset;
1936 rgba_t tex_samples[4];
1937 int du, dv;
1938 int d[4];
1940 if (state->w)
1941 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1943 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
1944 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
1946 texture_state.level = state->max_d;
1947 texture_state.texture_shift = 18 + (9 - texture_state.level);
1948 tex_offset = 1 << texture_state.texture_shift;
1950 texture_state.u = u;
1951 texture_state.v = v;
1952 tex_read(state, &texture_state, &tex_samples[0]);
1953 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
1954 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
1956 texture_state.u = u + tex_offset;
1957 texture_state.v = v;
1958 tex_read(state, &texture_state, &tex_samples[1]);
1960 texture_state.u = u;
1961 texture_state.v = v + tex_offset;
1962 tex_read(state, &texture_state, &tex_samples[2]);
1964 texture_state.u = u + tex_offset;
1965 texture_state.v = v + tex_offset;
1966 tex_read(state, &texture_state, &tex_samples[3]);
1968 d[0] = (256 - du) * (256 - dv);
1969 d[1] = du * (256 - dv);
1970 d[2] = (256 - du) * dv;
1971 d[3] = du * dv;
1973 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1974 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1975 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1976 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1977 }
1980 static void tex_sample_persp_mipmap(s3d_state_t *state)
1981 {
1982 s3d_texture_state_t texture_state;
1983 int32_t w = 0;
1985 if (state->w)
1986 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1988 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
1989 if (texture_state.level < 0)
1990 texture_state.level = 0;
1991 texture_state.texture_shift = 18 + (9 - texture_state.level);
1992 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
1993 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
1995 tex_read(state, &texture_state, &state->dest_rgba);
1996 }
1998 static void tex_sample_persp_mipmap_filter(s3d_state_t *state)
1999 {
2000 s3d_texture_state_t texture_state;
2001 int32_t w = 0, u, v;
2002 int tex_offset;
2003 rgba_t tex_samples[4];
2004 int du, dv;
2005 int d[4];
2007 if (state->w)
2008 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2010 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
2011 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
2013 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2014 if (texture_state.level < 0)
2015 texture_state.level = 0;
2016 texture_state.texture_shift = 18 + (9 - texture_state.level);
2017 tex_offset = 1 << texture_state.texture_shift;
2019 texture_state.u = u;
2020 texture_state.v = v;
2021 tex_read(state, &texture_state, &tex_samples[0]);
2022 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2023 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2025 texture_state.u = u + tex_offset;
2026 texture_state.v = v;
2027 tex_read(state, &texture_state, &tex_samples[1]);
2029 texture_state.u = u;
2030 texture_state.v = v + tex_offset;
2031 tex_read(state, &texture_state, &tex_samples[2]);
2033 texture_state.u = u + tex_offset;
2034 texture_state.v = v + tex_offset;
2035 tex_read(state, &texture_state, &tex_samples[3]);
2037 d[0] = (256 - du) * (256 - dv);
2038 d[1] = du * (256 - dv);
2039 d[2] = (256 - du) * dv;
2040 d[3] = du * dv;
2042 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2043 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2044 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2045 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2046 }
2048 static void tex_sample_persp_mipmap_375(s3d_state_t *state)
2049 {
2050 s3d_texture_state_t texture_state;
2051 int32_t w = 0;
2053 if (state->w)
2054 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2056 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2057 if (texture_state.level < 0)
2058 texture_state.level = 0;
2059 texture_state.texture_shift = 18 + (9 - texture_state.level);
2060 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2061 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2063 tex_read(state, &texture_state, &state->dest_rgba);
2064 }
2066 static void tex_sample_persp_mipmap_filter_375(s3d_state_t *state)
2067 {
2068 s3d_texture_state_t texture_state;
2069 int32_t w = 0, u, v;
2070 int tex_offset;
2071 rgba_t tex_samples[4];
2072 int du, dv;
2073 int d[4];
2075 if (state->w)
2076 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2078 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2079 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2081 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2082 if (texture_state.level < 0)
2083 texture_state.level = 0;
2084 texture_state.texture_shift = 18 + (9 - texture_state.level);
2085 tex_offset = 1 << texture_state.texture_shift;
2087 texture_state.u = u;
2088 texture_state.v = v;
2089 tex_read(state, &texture_state, &tex_samples[0]);
2090 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2091 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2093 texture_state.u = u + tex_offset;
2094 texture_state.v = v;
2095 tex_read(state, &texture_state, &tex_samples[1]);
2097 texture_state.u = u;
2098 texture_state.v = v + tex_offset;
2099 tex_read(state, &texture_state, &tex_samples[2]);
2101 texture_state.u = u + tex_offset;
2102 texture_state.v = v + tex_offset;
2103 tex_read(state, &texture_state, &tex_samples[3]);
2105 d[0] = (256 - du) * (256 - dv);
2106 d[1] = du * (256 - dv);
2107 d[2] = (256 - du) * dv;
2108 d[3] = du * dv;
2110 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2111 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2112 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2113 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2114 }
2117 #define CLAMP(x) do \
2118 { \
2119 if ((x) & ~0xff) \
2120 x = ((x) < 0) ? 0 : 0xff; \
2121 } \
2122 while (0)
2124 #define CLAMP_RGBA(r, g, b, a) \
2125 if ((r) & ~0xff) \
2126 r = ((r) < 0) ? 0 : 0xff; \
2127 if ((g) & ~0xff) \
2128 g = ((g) < 0) ? 0 : 0xff; \
2129 if ((b) & ~0xff) \
2130 b = ((b) < 0) ? 0 : 0xff; \
2131 if ((a) & ~0xff) \
2132 a = ((a) < 0) ? 0 : 0xff;
2134 #define CLAMP_RGB(r, g, b) do \
2135 { \
2136 if ((r) < 0) \
2137 r = 0; \
2138 if ((r) > 0xff) \
2139 r = 0xff; \
2140 if ((g) < 0) \
2141 g = 0; \
2142 if ((g) > 0xff) \
2143 g = 0xff; \
2144 if ((b) < 0) \
2145 b = 0; \
2146 if ((b) > 0xff) \
2147 b = 0xff; \
2148 } \
2149 while (0)
2151 static void dest_pixel_gouraud_shaded_triangle(s3d_state_t *state)
2152 {
2153 state->dest_rgba.r = state->r >> 7;
2154 CLAMP(state->dest_rgba.r);
2156 state->dest_rgba.g = state->g >> 7;
2157 CLAMP(state->dest_rgba.g);
2159 state->dest_rgba.b = state->b >> 7;
2160 CLAMP(state->dest_rgba.b);
2162 state->dest_rgba.a = state->a >> 7;
2163 CLAMP(state->dest_rgba.a);
2164 }
2166 static void dest_pixel_unlit_texture_triangle(s3d_state_t *state)
2167 {
2168 tex_sample(state);
2170 if (state->cmd_set & CMD_SET_ABC_SRC)
2171 state->dest_rgba.a = state->a >> 7;
2172 }
2174 static void dest_pixel_lit_texture_decal(s3d_state_t *state)
2175 {
2176 tex_sample(state);
2178 if (state->cmd_set & CMD_SET_ABC_SRC)
2179 state->dest_rgba.a = state->a >> 7;
2180 }
2182 static void dest_pixel_lit_texture_reflection(s3d_state_t *state)
2183 {
2184 tex_sample(state);
2186 state->dest_rgba.r += (state->r >> 7);
2187 state->dest_rgba.g += (state->g >> 7);
2188 state->dest_rgba.b += (state->b >> 7);
2189 if (state->cmd_set & CMD_SET_ABC_SRC)
2190 state->dest_rgba.a += (state->a >> 7);
2192 CLAMP_RGBA(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, state->dest_rgba.a);
2193 }
2195 static void dest_pixel_lit_texture_modulate(s3d_state_t *state)
2196 {
2197 int r = state->r >> 7, g = state->g >> 7, b = state->b >> 7, a = state->a >> 7;
2199 tex_sample(state);
2201 CLAMP_RGBA(r, g, b, a);
2203 state->dest_rgba.r = ((state->dest_rgba.r) * r) >> 8;
2204 state->dest_rgba.g = ((state->dest_rgba.g) * g) >> 8;
2205 state->dest_rgba.b = ((state->dest_rgba.b) * b) >> 8;
2207 if (state->cmd_set & CMD_SET_ABC_SRC)
2208 state->dest_rgba.a = a;
2209 }
2211 static void tri(virge_t *virge, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2)
2212 {
2213 uint8_t *vram = virge->svga.vram;
2215 int x_dir = virge->s3d.tlr ? 1 : -1;
2217 int use_z = !(virge->s3d.cmd_set & CMD_SET_ZB_MODE);
2219 int y_count = yc;
2221 int bpp = (virge->s3d.cmd_set >> 2) & 7;
2223 uint32_t dest_offset, z_offset;
2225 if (virge->s3d.cmd_set & CMD_SET_HC)
2226 {
2227 if (state->y < virge->s3d.clip_t)
2228 return;
2229 if (state->y > virge->s3d.clip_b)
2230 {
2231 int diff_y = state->y - virge->s3d.clip_b;
2233 if (diff_y > y_count)
2234 diff_y = y_count;
2236 state->base_u += (virge->s3d.TdUdY * diff_y);
2237 state->base_v += (virge->s3d.TdVdY * diff_y);
2238 state->base_z += (virge->s3d.TdZdY * diff_y);
2239 state->base_r += (virge->s3d.TdRdY * diff_y);
2240 state->base_g += (virge->s3d.TdGdY * diff_y);
2241 state->base_b += (virge->s3d.TdBdY * diff_y);
2242 state->base_a += (virge->s3d.TdAdY * diff_y);
2243 state->base_d += (virge->s3d.TdDdY * diff_y);
2244 state->base_w += (virge->s3d.TdWdY * diff_y);
2245 state->x1 += (dx1 * diff_y);
2246 state->x2 += (dx2 * diff_y);
2247 state->y -= diff_y;
2248 dest_offset -= virge->s3d.dest_str;
2249 z_offset -= virge->s3d.z_str;
2250 y_count -= diff_y;
2251 }
2252 }
2254 dest_offset = virge->s3d.dest_base + (state->y * virge->s3d.dest_str);
2255 z_offset = virge->s3d.z_base + (state->y * virge->s3d.z_str);
2257 for (; y_count > 0; y_count--)
2258 {
2259 int x = (state->x1 + ((1 << 20) - 1)) >> 20;
2260 int xe = (state->x2 + ((1 << 20) - 1)) >> 20;
2261 uint32_t z = state->base_z;
2262 if (x_dir < 0)
2263 {
2264 x--;
2265 xe--;
2266 }
2268 if (x != xe && (x_dir > 0 && x < xe) || (x_dir < 0 && x > xe))
2269 {
2270 uint32_t dest_addr, z_addr;
2271 int dx = (x_dir > 0) ? ((31 - ((state->x1-1) >> 15)) & 0x1f) : (((state->x1-1) >> 15) & 0x1f);
2272 int x_offset = x_dir * (bpp + 1);
2273 int xz_offset = x_dir << 1;
2274 if (x_dir > 0)
2275 dx += 1;
2276 state->r = state->base_r + ((virge->s3d.TdRdX * dx) >> 5);
2277 state->g = state->base_g + ((virge->s3d.TdGdX * dx) >> 5);
2278 state->b = state->base_b + ((virge->s3d.TdBdX * dx) >> 5);
2279 state->a = state->base_a + ((virge->s3d.TdAdX * dx) >> 5);
2280 state->u = state->base_u + ((virge->s3d.TdUdX * dx) >> 5);
2281 state->v = state->base_v + ((virge->s3d.TdVdX * dx) >> 5);
2282 state->w = state->base_w + ((virge->s3d.TdWdX * dx) >> 5);
2283 state->d = state->base_d + ((virge->s3d.TdDdX * dx) >> 5);
2284 z += ((virge->s3d.TdZdX * dx) >> 5);
2286 // pclog("Draw Y=%i X=%i to XE=%i %i %08x %08x %08x %08x %08x %08x %08x %08x %i %08x\n", state->y, x, xe, dx, state->x1, state->x2, dx1, virge->s3d.TdWdX, state->u, state->v, virge->s3d.TdUdX, virge->s3d.TdUdY, dx, (virge->s3d.TdUdX * dx) >> 4);
2288 if (virge->s3d.cmd_set & CMD_SET_HC)
2289 {
2290 if (x_dir > 0)
2291 {
2292 if (x > virge->s3d.clip_r)
2293 goto tri_skip_line;
2294 if (xe < virge->s3d.clip_l)
2295 goto tri_skip_line;
2296 if (xe > virge->s3d.clip_r)
2297 xe = virge->s3d.clip_r;
2298 if (x < virge->s3d.clip_l)
2299 {
2300 int diff_x = virge->s3d.clip_l - x;
2302 z += (virge->s3d.TdZdX * diff_x);
2303 state->u += (virge->s3d.TdUdX * diff_x);
2304 state->v += (virge->s3d.TdVdX * diff_x);
2305 state->r += (virge->s3d.TdRdX * diff_x);
2306 state->g += (virge->s3d.TdGdX * diff_x);
2307 state->b += (virge->s3d.TdBdX * diff_x);
2308 state->a += (virge->s3d.TdAdX * diff_x);
2309 state->d += (virge->s3d.TdDdX * diff_x);
2310 state->w += (virge->s3d.TdWdX * diff_x);
2312 x = virge->s3d.clip_l;
2313 }
2314 }
2315 else
2316 {
2317 if (x < virge->s3d.clip_l)
2318 goto tri_skip_line;
2319 if (xe > virge->s3d.clip_r)
2320 goto tri_skip_line;
2321 if (xe < virge->s3d.clip_l)
2322 xe = virge->s3d.clip_l;
2323 if (x > virge->s3d.clip_r)
2324 {
2325 int diff_x = x - virge->s3d.clip_r;
2327 z += (virge->s3d.TdZdX * diff_x);
2328 state->u += (virge->s3d.TdUdX * diff_x);
2329 state->v += (virge->s3d.TdVdX * diff_x);
2330 state->r += (virge->s3d.TdRdX * diff_x);
2331 state->g += (virge->s3d.TdGdX * diff_x);
2332 state->b += (virge->s3d.TdBdX * diff_x);
2333 state->a += (virge->s3d.TdAdX * diff_x);
2334 state->d += (virge->s3d.TdDdX * diff_x);
2335 state->w += (virge->s3d.TdWdX * diff_x);
2337 x = virge->s3d.clip_r;
2338 }
2339 }
2340 }
2342 virge->svga.changedvram[(dest_offset & 0x3fffff) >> 12] = changeframecount;
2344 dest_addr = dest_offset + (x * (bpp + 1));
2345 z_addr = z_offset + (x << 1);
2347 for (; x != xe; x = (x + x_dir) & 0xfff)
2348 {
2349 int update = 1;
2350 int16_t src_z;
2351 _x = x; _y = state->y;
2353 if (use_z)
2354 {
2355 src_z = Z_READ(z_addr);
2356 Z_CLIP(src_z, z >> 16);
2357 }
2359 if (update)
2360 {
2361 uint32_t dest_col;
2363 dest_pixel(state);
2365 if (virge->s3d.cmd_set & CMD_SET_ABC_ENABLE)
2366 {
2367 uint32_t src_col;
2368 int src_r, src_g, src_b;
2370 switch (bpp)
2371 {
2372 case 0: /*8 bpp*/
2373 /*Not implemented yet*/
2374 break;
2375 case 1: /*16 bpp*/
2376 src_col = *(uint16_t *)&vram[dest_addr & 0x3fffff];
2377 RGB15_TO_24(src_col, src_r, src_g, src_b);
2378 break;
2379 case 2: /*24 bpp*/
2380 src_col = (*(uint32_t *)&vram[dest_addr & 0x3fffff]) & 0xffffff;
2381 RGB24_TO_24(src_col, src_r, src_g, src_b);
2382 break;
2383 }
2385 state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + (src_r * (255 - state->dest_rgba.a))) / 255;
2386 state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + (src_g * (255 - state->dest_rgba.a))) / 255;
2387 state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + (src_b * (255 - state->dest_rgba.a))) / 255;
2388 }
2390 switch (bpp)
2391 {
2392 case 0: /*8 bpp*/
2393 /*Not implemented yet*/
2394 break;
2395 case 1: /*16 bpp*/
2396 RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col);
2397 *(uint16_t *)&vram[dest_addr] = dest_col;
2398 break;
2399 case 2: /*24 bpp*/
2400 dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b);
2401 *(uint8_t *)&vram[dest_addr] = dest_col & 0xff;
2402 *(uint8_t *)&vram[dest_addr + 1] = (dest_col >> 8) & 0xff;
2403 *(uint8_t *)&vram[dest_addr + 2] = (dest_col >> 16) & 0xff;
2404 break;
2405 }
2407 if (use_z && (virge->s3d.cmd_set & CMD_SET_ZUP))
2408 Z_WRITE(z_addr, src_z);
2409 }
2411 z += virge->s3d.TdZdX;
2412 state->u += virge->s3d.TdUdX;
2413 state->v += virge->s3d.TdVdX;
2414 state->r += virge->s3d.TdRdX;
2415 state->g += virge->s3d.TdGdX;
2416 state->b += virge->s3d.TdBdX;
2417 state->a += virge->s3d.TdAdX;
2418 state->d += virge->s3d.TdDdX;
2419 state->w += virge->s3d.TdWdX;
2420 dest_addr += x_offset;
2421 z_addr += xz_offset;
2422 virge->pixel_count++;
2423 }
2424 }
2425 tri_skip_line:
2426 state->x1 += dx1;
2427 state->x2 += dx2;
2428 state->base_u += virge->s3d.TdUdY;
2429 state->base_v += virge->s3d.TdVdY;
2430 state->base_z += virge->s3d.TdZdY;
2431 state->base_r += virge->s3d.TdRdY;
2432 state->base_g += virge->s3d.TdGdY;
2433 state->base_b += virge->s3d.TdBdY;
2434 state->base_a += virge->s3d.TdAdY;
2435 state->base_d += virge->s3d.TdDdY;
2436 state->base_w += virge->s3d.TdWdY;
2437 state->y--;
2438 dest_offset -= virge->s3d.dest_str;
2439 z_offset -= virge->s3d.z_str;
2440 }
2441 }
2443 static int tex_size[8] =
2444 {
2445 4*2,
2446 2*2,
2447 2*2,
2448 1*2,
2449 2/1,
2450 2/1,
2451 1*2,
2452 1*2
2453 };
2455 static void s3_virge_triangle(virge_t *virge)
2456 {
2457 s3d_state_t state;
2459 uint32_t tex_base;
2460 int c;
2462 uint64_t start_time = timer_read();
2463 uint64_t end_time;
2465 state.tbu = virge->s3d.tbu << 11;
2466 state.tbv = virge->s3d.tbv << 11;
2468 state.max_d = (virge->s3d.cmd_set >> 8) & 15;
2470 state.tex_bdr_clr = virge->s3d.tex_bdr_clr;
2472 state.cmd_set = virge->s3d.cmd_set;
2474 state.base_u = virge->s3d.tus;
2475 state.base_v = virge->s3d.tvs;
2476 state.base_z = virge->s3d.tzs;
2477 state.base_r = (int32_t)virge->s3d.trs;
2478 state.base_g = (int32_t)virge->s3d.tgs;
2479 state.base_b = (int32_t)virge->s3d.tbs;
2480 state.base_a = (int32_t)virge->s3d.tas;
2481 state.base_d = virge->s3d.tds;
2482 state.base_w = virge->s3d.tws;
2484 tex_base = virge->s3d.tex_base;
2485 for (c = 9; c >= 0; c--)
2486 {
2487 state.texture[c] = (uint16_t *)&virge->svga.vram[tex_base];
2488 if (c <= state.max_d)
2489 tex_base += ((1 << (c*2)) * tex_size[(virge->s3d.cmd_set >> 5) & 7]) / 2;
2490 }
2492 switch ((virge->s3d.cmd_set >> 27) & 0xf)
2493 {
2494 case 0:
2495 dest_pixel = dest_pixel_gouraud_shaded_triangle;
2496 // pclog("dest_pixel_gouraud_shaded_triangle\n");
2497 break;
2498 case 1:
2499 case 5:
2500 switch ((virge->s3d.cmd_set >> 15) & 0x3)
2501 {
2502 case 0:
2503 dest_pixel = dest_pixel_lit_texture_reflection;
2504 // pclog("dest_pixel_lit_texture_reflection\n");
2505 break;
2506 case 1:
2507 dest_pixel = dest_pixel_lit_texture_modulate;
2508 // pclog("dest_pixel_lit_texture_modulate\n");
2509 break;
2510 case 2:
2511 dest_pixel = dest_pixel_lit_texture_decal;
2512 // pclog("dest_pixel_lit_texture_decal\n");
2513 break;
2514 default:
2515 pclog("bad triangle type %x\n", (virge->s3d.cmd_set >> 27) & 0xf);
2516 return;
2517 }
2518 break;
2519 case 2:
2520 case 6:
2521 dest_pixel = dest_pixel_unlit_texture_triangle;
2522 // pclog("dest_pixel_unlit_texture_triangle\n");
2523 break;
2524 default:
2525 pclog("bad triangle type %x\n", (virge->s3d.cmd_set >> 27) & 0xf);
2526 return;
2527 }
2529 switch (((virge->s3d.cmd_set >> 12) & 7) | ((virge->s3d.cmd_set & (1 << 29)) ? 8 : 0))
2530 {
2531 case 0: case 1:
2532 tex_sample = tex_sample_mipmap;
2533 // pclog("use tex_sample_mipmap\n");
2534 break;
2535 case 2: case 3:
2536 tex_sample = virge->bilinear_enabled ? tex_sample_mipmap_filter : tex_sample_mipmap;
2537 // pclog("use tex_sample_mipmap_filter\n");
2538 break;
2539 case 4: case 5:
2540 tex_sample = tex_sample_normal;
2541 // pclog("use tex_sample_normal\n");
2542 break;
2543 case 6: case 7:
2544 tex_sample = virge->bilinear_enabled ? tex_sample_normal_filter : tex_sample_normal;
2545 // pclog("use tex_sample_normal_filter\n");
2546 break;
2547 case (0 | 8): case (1 | 8):
2548 if (virge->is_375)
2549 tex_sample = tex_sample_persp_mipmap_375;
2550 else
2551 tex_sample = tex_sample_persp_mipmap;
2552 // pclog("use tex_sample_persp_mipmap\n");
2553 break;
2554 case (2 | 8): case (3 | 8):
2555 if (virge->is_375)
2556 tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : tex_sample_persp_mipmap_375;
2557 else
2558 tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : tex_sample_persp_mipmap;
2559 // pclog("use tex_sample_persp_mipmap_filter\n");
2560 break;
2561 case (4 | 8): case (5 | 8):
2562 if (virge->is_375)
2563 tex_sample = tex_sample_persp_normal_375;
2564 else
2565 tex_sample = tex_sample_persp_normal;
2566 // pclog("use tex_sample_persp_normal\n");
2567 break;
2568 case (6 | 8): case (7 | 8):
2569 if (virge->is_375)
2570 tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : tex_sample_persp_normal_375;
2571 else
2572 tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : tex_sample_persp_normal;
2573 // pclog("use tex_sample_persp_normal_filter\n");
2574 break;
2575 }
2577 switch ((virge->s3d.cmd_set >> 5) & 7)
2578 {
2579 case 0:
2580 tex_read = (virge->s3d.cmd_set & CMD_SET_TWE) ? tex_ARGB8888 : tex_ARGB8888_nowrap;
2581 break;
2582 case 1:
2583 tex_read = (virge->s3d.cmd_set & CMD_SET_TWE) ? tex_ARGB4444 : tex_ARGB4444_nowrap;
2584 // pclog("tex_ARGB4444\n");
2585 break;
2586 case 2:
2587 tex_read = (virge->s3d.cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap;
2588 // pclog("tex_ARGB1555 %i\n", (virge->s3d.cmd_set >> 5) & 7);
2589 break;
2590 default:
2591 pclog("bad texture type %i\n", (virge->s3d.cmd_set >> 5) & 7);
2592 tex_read = (virge->s3d.cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap;
2593 break;
2594 }
2596 // pclog("Triangle %i %i,%i to %i,%i %08x\n", y, x1 >> 20, y, virge->s3d.txend01 >> 20, y - (virge->s3d.ty01 + virge->s3d.ty12), state.cmd_set);
2598 state.y = virge->s3d.tys;
2599 state.x1 = virge->s3d.txs;
2600 state.x2 = virge->s3d.txend01;
2601 tri(virge, &state, virge->s3d.ty01, virge->s3d.TdXdY02, virge->s3d.TdXdY01);
2602 state.x2 = virge->s3d.txend12;
2603 tri(virge, &state, virge->s3d.ty12, virge->s3d.TdXdY02, virge->s3d.TdXdY12);
2605 virge->tri_count++;
2607 end_time = timer_read();
2609 virge_time += end_time - start_time;
2610 }
2613 static void s3_virge_hwcursor_draw(svga_t *svga, int displine)
2614 {
2615 int x;
2616 uint16_t dat[2];
2617 int xx;
2618 int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff;
2620 // pclog("HWcursor %i %i\n", svga->hwcursor_latch.x, svga->hwcursor_latch.y);
2621 for (x = 0; x < 64; x += 16)
2622 {
2623 dat[0] = (svga->vram[svga->hwcursor_latch.addr] << 8) | svga->vram[svga->hwcursor_latch.addr + 1];
2624 dat[1] = (svga->vram[svga->hwcursor_latch.addr + 2] << 8) | svga->vram[svga->hwcursor_latch.addr + 3];
2625 for (xx = 0; xx < 16; xx++)
2626 {
2627 if (offset >= svga->hwcursor_latch.x)
2628 {
2629 if (!(dat[0] & 0x8000))
2630 ((uint32_t *)buffer32->line[displine])[offset + 32] = (dat[1] & 0x8000) ? 0xffffff : 0;
2631 else if (dat[1] & 0x8000)
2632 ((uint32_t *)buffer32->line[displine])[offset + 32] ^= 0xffffff;
2633 // pclog("Plot %i, %i (%i %i) %04X %04X\n", offset, displine, x+xx, svga->hwcursor_on, dat[0], dat[1]);
2634 }
2636 offset++;
2637 dat[0] <<= 1;
2638 dat[1] <<= 1;
2639 }
2640 svga->hwcursor_latch.addr += 4;
2641 }
2642 }
2644 #define DECODE_YCbCr() \
2645 do \
2646 { \
2647 int c; \
2648 \
2649 for (c = 0; c < 2; c++) \
2650 { \
2651 uint8_t y1, y2; \
2652 int8_t Cr, Cb; \
2653 int dR, dG, dB; \
2654 \
2655 y1 = src[0]; \
2656 Cr = src[1] - 0x80; \
2657 y2 = src[2]; \
2658 Cb = src[3] - 0x80; \
2659 src += 4; \
2660 \
2661 dR = (359*Cr) >> 8; \
2662 dG = (88*Cb + 183*Cr) >> 8; \
2663 dB = (453*Cb) >> 8; \
2664 \
2665 r[x_write] = y1 + dR; \
2666 CLAMP(r[x_write]); \
2667 g[x_write] = y1 - dG; \
2668 CLAMP(g[x_write]); \
2669 b[x_write] = y1 + dB; \
2670 CLAMP(b[x_write]); \
2671 \
2672 r[x_write+1] = y2 + dR; \
2673 CLAMP(r[x_write+1]); \
2674 g[x_write+1] = y2 - dG; \
2675 CLAMP(g[x_write+1]); \
2676 b[x_write+1] = y2 + dB; \
2677 CLAMP(b[x_write+1]); \
2678 \
2679 x_write = (x_write + 2) & 7; \
2680 } \
2681 } while (0)
2683 /*Both YUV formats are untested*/
2684 #define DECODE_YUV211() \
2685 do \
2686 { \
2687 uint8_t y1, y2, y3, y4; \
2688 int8_t U, V; \
2689 int dR, dG, dB; \
2690 \
2691 U = src[0] - 0x80; \
2692 y1 = (298 * (src[1] - 16)) >> 8; \
2693 y2 = (298 * (src[2] - 16)) >> 8; \
2694 V = src[3] - 0x80; \
2695 y3 = (298 * (src[4] - 16)) >> 8; \
2696 y4 = (298 * (src[5] - 16)) >> 8; \
2697 src += 6; \
2698 \
2699 dR = (309*V) >> 8; \
2700 dG = (100*U + 208*V) >> 8; \
2701 dB = (516*U) >> 8; \
2702 \
2703 r[x_write] = y1 + dR; \
2704 CLAMP(r[x_write]); \
2705 g[x_write] = y1 - dG; \
2706 CLAMP(g[x_write]); \
2707 b[x_write] = y1 + dB; \
2708 CLAMP(b[x_write]); \
2709 \
2710 r[x_write+1] = y2 + dR; \
2711 CLAMP(r[x_write+1]); \
2712 g[x_write+1] = y2 - dG; \
2713 CLAMP(g[x_write+1]); \
2714 b[x_write+1] = y2 + dB; \
2715 CLAMP(b[x_write+1]); \
2716 \
2717 r[x_write+2] = y2 + dR; \
2718 CLAMP(r[x_write+2]); \
2719 g[x_write+2] = y2 - dG; \
2720 CLAMP(g[x_write+2]); \
2721 b[x_write+2] = y2 + dB; \
2722 CLAMP(b[x_write+2]); \
2723 \
2724 r[x_write+3] = y2 + dR; \
2725 CLAMP(r[x_write+3]); \
2726 g[x_write+3] = y2 - dG; \
2727 CLAMP(g[x_write+3]); \
2728 b[x_write+3] = y2 + dB; \
2729 CLAMP(b[x_write+3]); \
2730 \
2731 x_write = (x_write + 4) & 7; \
2732 } while (0)
2734 #define DECODE_YUV422() \
2735 do \
2736 { \
2737 int c; \
2738 \
2739 for (c = 0; c < 2; c++) \
2740 { \
2741 uint8_t y1, y2; \
2742 int8_t U, V; \
2743 int dR, dG, dB; \
2744 \
2745 U = src[0] - 0x80; \
2746 y1 = (298 * (src[1] - 16)) >> 8; \
2747 V = src[2] - 0x80; \
2748 y2 = (298 * (src[3] - 16)) >> 8; \
2749 src += 4; \
2750 \
2751 dR = (309*V) >> 8; \
2752 dG = (100*U + 208*V) >> 8; \
2753 dB = (516*U) >> 8; \
2754 \
2755 r[x_write] = y1 + dR; \
2756 CLAMP(r[x_write]); \
2757 g[x_write] = y1 - dG; \
2758 CLAMP(g[x_write]); \
2759 b[x_write] = y1 + dB; \
2760 CLAMP(b[x_write]); \
2761 \
2762 r[x_write+1] = y2 + dR; \
2763 CLAMP(r[x_write+1]); \
2764 g[x_write+1] = y2 - dG; \
2765 CLAMP(g[x_write+1]); \
2766 b[x_write+1] = y2 + dB; \
2767 CLAMP(b[x_write+1]); \
2768 \
2769 x_write = (x_write + 2) & 7; \
2770 } \
2771 } while (0)
2773 #define DECODE_RGB555() \
2774 do \
2775 { \
2776 int c; \
2777 \
2778 for (c = 0; c < 4; c++) \
2779 { \
2780 uint16_t dat; \
2781 \
2782 dat = *(uint16_t *)src; \
2783 src += 2; \
2784 \
2785 r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
2786 g[x_write + c] = ((dat & 0x03e0) >> 2) | ((dat & 0x03e0) >> 7); \
2787 b[x_write + c] = ((dat & 0x7c00) >> 7) | ((dat & 0x7c00) >> 12); \
2788 } \
2789 x_write = (x_write + 4) & 7; \
2790 } while (0)
2792 #define DECODE_RGB565() \
2793 do \
2794 { \
2795 int c; \
2796 \
2797 for (c = 0; c < 4; c++) \
2798 { \
2799 uint16_t dat; \
2800 \
2801 dat = *(uint16_t *)src; \
2802 src += 2; \
2803 \
2804 r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
2805 g[x_write + c] = ((dat & 0x07e0) >> 3) | ((dat & 0x07e0) >> 9); \
2806 b[x_write + c] = ((dat & 0xf800) >> 8) | ((dat & 0xf800) >> 13); \
2807 } \
2808 x_write = (x_write + 4) & 7; \
2809 } while (0)
2811 #define DECODE_RGB888() \
2812 do \
2813 { \
2814 int c; \
2815 \
2816 for (c = 0; c < 4; c++) \
2817 { \
2818 r[x_write + c] = src[0]; \
2819 g[x_write + c] = src[1]; \
2820 b[x_write + c] = src[2]; \
2821 src += 3; \
2822 } \
2823 x_write = (x_write + 4) & 7; \
2824 } while (0)
2826 #define DECODE_XRGB8888() \
2827 do \
2828 { \
2829 int c; \
2830 \
2831 for (c = 0; c < 4; c++) \
2832 { \
2833 r[x_write + c] = src[0]; \
2834 g[x_write + c] = src[1]; \
2835 b[x_write + c] = src[2]; \
2836 src += 4; \
2837 } \
2838 x_write = (x_write + 4) & 7; \
2839 } while (0)
2841 #define OVERLAY_SAMPLE() \
2842 do \
2843 { \
2844 switch (virge->streams.sdif) \
2845 { \
2846 case 1: \
2847 DECODE_YCbCr(); \
2848 break; \
2849 case 2: \
2850 DECODE_YUV422(); \
2851 break; \
2852 case 3: \
2853 DECODE_RGB555(); \
2854 break; \
2855 case 4: \
2856 DECODE_YUV211(); \
2857 break; \
2858 case 5: \
2859 DECODE_RGB565(); \
2860 break; \
2861 case 6: \
2862 DECODE_RGB888(); \
2863 break; \
2864 case 7: \
2865 default: \
2866 DECODE_XRGB8888(); \
2867 break; \
2868 } \
2869 } while (0)
2871 static void s3_virge_overlay_draw(svga_t *svga, int displine)
2872 {
2873 virge_t *virge = (virge_t *)svga->p;
2874 int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1;
2875 int h_acc = virge->streams.dda_horiz_accumulator;
2876 int r[8], g[8], b[8];
2877 int r_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
2878 int g_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
2879 int b_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
2880 int x_size, x_read = 4, x_write = 4;
2881 int x;
2882 uint32_t *p;
2883 uint8_t *src = &svga->vram[svga->overlay_latch.addr];
2885 p = &((uint32_t *)buffer32->line[displine])[offset + 32];
2887 if ((offset + virge->streams.sec_w) > virge->streams.pri_w)
2888 x_size = (virge->streams.pri_w - virge->streams.sec_x) + 1;
2889 else
2890 x_size = virge->streams.sec_w + 1;
2892 OVERLAY_SAMPLE();
2894 for (x = 0; x < x_size; x++)
2895 {
2896 *p++ = r[x_read] | (g[x_read] << 8) | (b[x_read] << 16);
2898 h_acc += virge->streams.k1_horiz_scale;
2899 if (h_acc >= 0)
2900 {
2901 if ((x_read ^ (x_read + 1)) & ~3)
2902 OVERLAY_SAMPLE();
2903 x_read = (x_read + 1) & 7;
2905 h_acc += (virge->streams.k2_horiz_scale - virge->streams.k1_horiz_scale);
2906 }
2907 }
2909 svga->overlay_latch.v_acc += virge->streams.k1_vert_scale;
2910 if (svga->overlay_latch.v_acc >= 0)
2911 {
2912 svga->overlay_latch.v_acc += (virge->streams.k2_vert_scale - virge->streams.k1_vert_scale);
2913 svga->overlay_latch.addr += virge->streams.sec_stride;
2914 }
2915 }
2917 static uint8_t s3_virge_pci_read(int func, int addr, void *p)
2918 {
2919 virge_t *virge = (virge_t *)p;
2920 svga_t *svga = &virge->svga;
2921 uint8_t ret = 0;
2922 // pclog("S3 PCI read %08X ", addr);
2923 switch (addr)
2924 {
2925 case 0x00: ret = 0x33; break; /*'S3'*/
2926 case 0x01: ret = 0x53; break;
2928 case 0x02: ret = virge->virge_id_low; break;
2929 case 0x03: ret = virge->virge_id_high; break;
2931 case 0x04: ret = virge->pci_regs[0x04] & 0x27; break;
2933 case 0x07: ret = virge->pci_regs[0x07] & 0x36; break;
2935 case 0x08: ret = 0; break; /*Revision ID*/
2936 case 0x09: ret = 0; break; /*Programming interface*/
2938 case 0x0a: ret = 0x00; break; /*Supports VGA interface*/
2939 case 0x0b: ret = 0x03; /*output = 3; */break;
2941 case 0x0d: ret = virge->pci_regs[0x0d] & 0xf8; break;
2943 case 0x10: ret = 0x00; break;/*Linear frame buffer address*/
2944 case 0x11: ret = 0x00; break;
2945 case 0x12: ret = 0x00; break;
2946 case 0x13: ret = svga->crtc[0x59] & 0xfc; break;
2948 case 0x30: ret = virge->pci_regs[0x30] & 0x01; break; /*BIOS ROM address*/
2949 case 0x31: ret = 0x00; break;
2950 case 0x32: ret = virge->pci_regs[0x32]; break;
2951 case 0x33: ret = virge->pci_regs[0x33]; break;
2953 case 0x3c: ret = virge->pci_regs[0x3c]; break;
2955 case 0x3d: ret = 0x01; break; /*INTA*/
2957 case 0x3e: ret = 0x04; break;
2958 case 0x3f: ret = 0xff; break;
2960 }
2961 // pclog("%02X\n", ret);
2962 return ret;
2963 }
2965 static void s3_virge_pci_write(int func, int addr, uint8_t val, void *p)
2966 {
2967 virge_t *virge = (virge_t *)p;
2968 svga_t *svga = &virge->svga;
2969 // pclog("S3 PCI write %08X %02X %04X:%08X\n", addr, val, CS, pc);
2970 switch (addr)
2971 {
2972 case 0x00: case 0x01: case 0x02: case 0x03:
2973 case 0x08: case 0x09: case 0x0a: case 0x0b:
2974 case 0x3d: case 0x3e: case 0x3f:
2975 return;
2977 case PCI_REG_COMMAND:
2978 if (val & PCI_COMMAND_IO)
2979 {
2980 io_removehandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
2981 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
2982 }
2983 else
2984 io_removehandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
2985 virge->pci_regs[PCI_REG_COMMAND] = val & 0x27;
2986 return;
2987 case 0x07:
2988 virge->pci_regs[0x07] = val & 0x3e;
2989 return;
2990 case 0x0d:
2991 virge->pci_regs[0x0d] = val & 0xf8;
2992 return;
2994 case 0x13:
2995 svga->crtc[0x59] = val & 0xfc;
2996 s3_virge_updatemapping(virge);
2997 return;
2999 case 0x30: case 0x32: case 0x33:
3000 virge->pci_regs[addr] = val;
3001 if (virge->pci_regs[0x30] & 0x01)
3002 {
3003 uint32_t addr = (virge->pci_regs[0x32] << 16) | (virge->pci_regs[0x33] << 24);
3004 // pclog("Virge bios_rom enabled at %08x\n", addr);
3005 mem_mapping_set_addr(&virge->bios_rom.mapping, addr, 0x8000);
3006 mem_mapping_enable(&virge->bios_rom.mapping);
3007 }
3008 else
3009 {
3010 // pclog("Virge bios_rom disabled\n");
3011 mem_mapping_disable(&virge->bios_rom.mapping);
3012 }
3013 return;
3014 case 0x3c:
3015 virge->pci_regs[0x3c] = val;
3016 return;
3017 }
3018 }
3020 static void *s3_virge_init()
3021 {
3022 virge_t *virge = malloc(sizeof(virge_t));
3023 memset(virge, 0, sizeof(virge_t));
3025 virge->bilinear_enabled = device_get_config_int("bilinear");
3026 virge->dithering_enabled = device_get_config_int("dithering");
3027 virge->memory_size = device_get_config_int("memory");
3029 svga_init(&virge->svga, virge, virge->memory_size << 20,
3030 s3_virge_recalctimings,
3031 s3_virge_in, s3_virge_out,
3032 s3_virge_hwcursor_draw,
3033 s3_virge_overlay_draw);
3035 rom_init(&virge->bios_rom, "roms/s3virge.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
3036 if (PCI)
3037 mem_mapping_disable(&virge->bios_rom.mapping);
3039 mem_mapping_add(&virge->mmio_mapping, 0, 0, s3_virge_mmio_read,
3040 s3_virge_mmio_read_w,
3041 s3_virge_mmio_read_l,
3042 s3_virge_mmio_write,
3043 s3_virge_mmio_write_w,
3044 s3_virge_mmio_write_l,
3045 NULL,
3046 0,
3047 virge);
3048 mem_mapping_add(&virge->new_mmio_mapping, 0, 0, s3_virge_mmio_read,
3049 s3_virge_mmio_read_w,
3050 s3_virge_mmio_read_l,
3051 s3_virge_mmio_write,
3052 s3_virge_mmio_write_w,
3053 s3_virge_mmio_write_l,
3054 NULL,
3055 0,
3056 virge);
3057 mem_mapping_add(&virge->linear_mapping, 0, 0, svga_read_linear,
3058 svga_readw_linear,
3059 svga_readl_linear,
3060 svga_write_linear,
3061 svga_writew_linear,
3062 svga_writel_linear,
3063 NULL,
3064 0,
3065 &virge->svga);
3067 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3069 virge->pci_regs[4] = 3;
3070 virge->pci_regs[5] = 0;
3071 virge->pci_regs[6] = 0;
3072 virge->pci_regs[7] = 2;
3073 virge->pci_regs[0x32] = 0x0c;
3074 virge->pci_regs[0x3d] = 1;
3075 virge->pci_regs[0x3e] = 4;
3076 virge->pci_regs[0x3f] = 0xff;
3078 virge->virge_id_high = 0x56;
3079 virge->virge_id_low = 0x31;
3080 virge->virge_rev = 0;
3081 virge->virge_id = 0xe1;
3083 switch (virge->memory_size)
3084 {
3085 case 2:
3086 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (4 << 5);
3087 break;
3088 case 4:
3089 default:
3090 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (0 << 5);
3091 break;
3092 }
3094 virge->svga.crtc[0x37] = 1;// | (7 << 5);
3095 virge->svga.crtc[0x53] = 1 << 3;
3096 virge->svga.crtc[0x59] = 0x70;
3098 virge->is_375 = 0;
3100 pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
3102 return virge;
3103 }
3105 static void *s3_virge_375_init()
3106 {
3107 virge_t *virge = malloc(sizeof(virge_t));
3108 memset(virge, 0, sizeof(virge_t));
3110 virge->bilinear_enabled = device_get_config_int("bilinear");
3111 virge->dithering_enabled = device_get_config_int("dithering");
3112 virge->memory_size = device_get_config_int("memory");
3114 svga_init(&virge->svga, virge, virge->memory_size << 20,
3115 s3_virge_recalctimings,
3116 s3_virge_in, s3_virge_out,
3117 s3_virge_hwcursor_draw,
3118 s3_virge_overlay_draw);
3120 rom_init(&virge->bios_rom, "roms/86c375_1.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
3121 if (PCI)
3122 mem_mapping_disable(&virge->bios_rom.mapping);
3124 mem_mapping_add(&virge->mmio_mapping, 0, 0, s3_virge_mmio_read,
3125 s3_virge_mmio_read_w,
3126 s3_virge_mmio_read_l,
3127 s3_virge_mmio_write,
3128 s3_virge_mmio_write_w,
3129 s3_virge_mmio_write_l,
3130 NULL,
3131 0,
3132 virge);
3133 mem_mapping_add(&virge->new_mmio_mapping, 0, 0, s3_virge_mmio_read,
3134 s3_virge_mmio_read_w,
3135 s3_virge_mmio_read_l,
3136 s3_virge_mmio_write,
3137 s3_virge_mmio_write_w,
3138 s3_virge_mmio_write_l,
3139 NULL,
3140 0,
3141 virge);
3142 mem_mapping_add(&virge->linear_mapping, 0, 0, svga_read_linear,
3143 svga_readw_linear,
3144 svga_readl_linear,
3145 svga_write_linear,
3146 svga_writew_linear,
3147 svga_writel_linear,
3148 NULL,
3149 0,
3150 &virge->svga);
3152 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3154 virge->pci_regs[4] = 3;
3155 virge->pci_regs[5] = 0;
3156 virge->pci_regs[6] = 0;
3157 virge->pci_regs[7] = 2;
3158 virge->pci_regs[0x32] = 0x0c;
3159 virge->pci_regs[0x3d] = 1;
3160 virge->pci_regs[0x3e] = 4;
3161 virge->pci_regs[0x3f] = 0xff;
3163 virge->virge_id_high = 0x8a;
3164 virge->virge_id_low = 0x01;
3165 virge->virge_rev = 0;
3166 virge->virge_id = 0xe1;
3168 switch (virge->memory_size)
3169 {
3170 case 2:
3171 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (4 << 5);
3172 break;
3173 case 4:
3174 default:
3175 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (0 << 5);
3176 break;
3177 }
3178 // virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4);
3179 virge->svga.crtc[0x37] = 1;// | (7 << 5);
3180 virge->svga.crtc[0x53] = 1 << 3;
3181 virge->svga.crtc[0x59] = 0x70;
3183 virge->svga.crtc[0x6c] = 0x01;
3185 virge->is_375 = 1;
3187 pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
3189 return virge;
3190 }
3192 static void s3_virge_close(void *p)
3193 {
3194 virge_t *virge = (virge_t *)p;
3195 FILE *f = fopen("vram.dmp", "wb");
3196 fwrite(virge->svga.vram, 4 << 20, 1, f);
3197 fclose(f);
3199 svga_close(&virge->svga);
3201 free(virge);
3202 }
3204 static int s3_virge_available()
3205 {
3206 return rom_present("roms/s3virge.bin");
3207 }
3209 static int s3_virge_375_available()
3210 {
3211 return rom_present("roms/86c375_1.bin");
3212 }
3214 static void s3_virge_speed_changed(void *p)
3215 {
3216 virge_t *virge = (virge_t *)p;
3218 svga_recalctimings(&virge->svga);
3219 }
3221 static void s3_virge_force_redraw(void *p)
3222 {
3223 virge_t *virge = (virge_t *)p;
3225 virge->svga.fullchange = changeframecount;
3226 }
3228 static int s3_virge_add_status_info(char *s, int max_len, void *p)
3229 {
3230 virge_t *virge = (virge_t *)p;
3231 int cur_len;
3232 char temps[256];
3234 uint64_t new_time = timer_read();
3235 uint64_t status_diff = new_time - status_time;
3236 status_time = new_time;
3238 if (!status_diff)
3239 status_diff = 1;
3241 cur_len = svga_add_status_info(s, cur_len, &virge->svga);
3242 sprintf(temps, "%f Mpixels/sec\n%f ktris/sec\n%f%% CPU\n%f%% CPU (real)\n%d writes", (double)virge->pixel_count/1000000.0, (double)virge->tri_count/1000.0, ((double)virge_time * 100.0) / timer_freq, ((double)virge_time * 100.0) / status_diff, reg_writes);
3243 strncat(s, temps, cur_len);
3244 cur_len -= strlen(temps);
3245 virge->pixel_count = virge->tri_count = 0;
3246 virge_time = 0;
3247 reg_writes = 0;
3249 return max_len - cur_len;
3250 }
3252 static device_config_t s3_virge_config[] =
3253 {
3254 {
3255 .name = "memory",
3256 .description = "Memory size",
3257 .type = CONFIG_SELECTION,
3258 .selection =
3259 {
3260 {
3261 .description = "2 MB",
3262 .value = 2
3263 },
3264 {
3265 .description = "4 MB",
3266 .value = 4
3267 },
3268 {
3269 .description = ""
3270 }
3271 },
3272 .default_int = 4
3273 },
3274 {
3275 .name = "bilinear",
3276 .description = "Bilinear filtering",
3277 .type = CONFIG_BINARY,
3278 .default_int = 1
3279 },
3280 {
3281 .name = "dithering",
3282 .description = "Dithering",
3283 .type = CONFIG_BINARY,
3284 .default_int = 1
3285 },
3286 {
3287 .type = -1
3288 }
3289 };
3291 device_t s3_virge_device =
3292 {
3293 "Diamond Stealth 3D 2000 (S3 ViRGE)",
3294 DEVICE_NOT_WORKING,
3295 s3_virge_init,
3296 s3_virge_close,
3297 s3_virge_available,
3298 s3_virge_speed_changed,
3299 s3_virge_force_redraw,
3300 s3_virge_add_status_info,
3301 s3_virge_config
3302 };
3304 device_t s3_virge_375_device =
3305 {
3306 "S3 ViRGE/DX",
3307 DEVICE_NOT_WORKING,
3308 s3_virge_375_init,
3309 s3_virge_close,
3310 s3_virge_375_available,
3311 s3_virge_speed_changed,
3312 s3_virge_force_redraw,
3313 s3_virge_add_status_info,
3314 s3_virge_config
3315 };
