PCem

view src/vid_s3_virge.c @ 159:7dbd14c4adac

Restricted available range of attribute and sequencer registers on ViRGE, XFree86 now detects card correctly.
author TomW
date Thu Sep 18 21:14:15 2014 +0100
parents ad7d877a3b53
children b6874c4f3917
line source
1 /*S3 ViRGE emulation*/
2 #include <stdlib.h>
3 #include "ibm.h"
4 #include "device.h"
5 #include "io.h"
6 #include "mem.h"
7 #include "pci.h"
8 #include "rom.h"
9 #include "thread.h"
10 #include "video.h"
11 #include "vid_s3_virge.h"
12 #include "vid_svga.h"
13 #include "vid_svga_render.h"
15 static uint64_t virge_time = 0;
16 static uint64_t status_time = 0;
17 static int reg_writes = 0, reg_reads = 0;
19 static int dither[4][4] =
20 {
21 0, 4, 1, 5,
22 6, 2, 7, 3,
23 1, 5, 0, 4,
24 7, 3, 6, 2,
25 };
27 #define RB_SIZE 256
28 #define RB_MASK (RB_SIZE - 1)
30 #define RB_ENTRIES (virge->s3d_write_idx - virge->s3d_read_idx)
31 #define RB_FULL (RB_ENTRIES == RB_SIZE)
32 #define RB_EMPTY (!RB_ENTRIES)
34 typedef struct s3d_t
35 {
36 uint32_t cmd_set;
37 int clip_l, clip_r, clip_t, clip_b;
39 uint32_t dest_base;
40 uint32_t dest_str;
42 uint32_t z_base;
43 uint32_t z_str;
45 uint32_t tex_base;
46 uint32_t tex_bdr_clr;
47 uint32_t tbv, tbu;
48 int32_t TdVdX, TdUdX;
49 int32_t TdVdY, TdUdY;
50 uint32_t tus, tvs;
52 int32_t TdZdX, TdZdY;
53 uint32_t tzs;
55 int32_t TdWdX, TdWdY;
56 uint32_t tws;
58 int32_t TdDdX, TdDdY;
59 uint32_t tds;
61 int16_t TdGdX, TdBdX, TdRdX, TdAdX;
62 int16_t TdGdY, TdBdY, TdRdY, TdAdY;
63 uint32_t tgs, tbs, trs, tas;
65 uint32_t TdXdY12;
66 uint32_t txend12;
67 uint32_t TdXdY01;
68 uint32_t txend01;
69 uint32_t TdXdY02;
70 uint32_t txs;
71 uint32_t tys;
72 int ty01, ty12, tlr;
73 } s3d_t;
75 typedef struct virge_t
76 {
77 mem_mapping_t linear_mapping;
78 mem_mapping_t mmio_mapping;
79 mem_mapping_t new_mmio_mapping;
81 rom_t bios_rom;
83 svga_t svga;
85 uint8_t bank;
86 uint8_t ma_ext;
87 int width;
88 int bpp;
90 uint8_t virge_id, virge_id_high, virge_id_low, virge_rev;
92 uint32_t linear_base, linear_size;
94 uint8_t pci_regs[256];
96 int is_375;
98 int bilinear_enabled;
99 int dithering_enabled;
100 int memory_size;
102 int pixel_count, tri_count;
104 thread_t *render_thread;
105 event_t *wake_render_thread;
106 event_t *wake_main_thread;
107 event_t *not_full_event;
109 struct
110 {
111 uint32_t src_base;
112 uint32_t dest_base;
113 int clip_l, clip_r, clip_t, clip_b;
114 int dest_str, src_str;
115 uint32_t mono_pat_0;
116 uint32_t mono_pat_1;
117 uint32_t pat_bg_clr;
118 uint32_t pat_fg_clr;
119 uint32_t src_bg_clr;
120 uint32_t src_fg_clr;
121 uint32_t cmd_set;
122 int r_width, r_height;
123 int rsrc_x, rsrc_y;
124 int rdest_x, rdest_y;
126 int lxend0, lxend1;
127 int32_t ldx;
128 uint32_t lxstart, lystart;
129 int lycnt;
130 int line_dir;
132 int src_x, src_y;
133 int dest_x, dest_y;
134 int w, h;
135 uint8_t rop;
137 int data_left_count;
138 uint32_t data_left;
140 uint32_t pattern_8[8*8];
141 uint32_t pattern_16[8*8];
142 uint32_t pattern_32[8*8];
143 } s3d;
145 s3d_t s3d_tri;
147 s3d_t s3d_buffer[RB_SIZE];
148 int s3d_read_idx, s3d_write_idx;
149 int s3d_busy;
151 struct
152 {
153 uint32_t pri_ctrl;
154 uint32_t chroma_ctrl;
155 uint32_t sec_ctrl;
156 uint32_t chroma_upper_bound;
157 uint32_t sec_filter;
158 uint32_t blend_ctrl;
159 uint32_t pri_fb0, pri_fb1;
160 uint32_t pri_stride;
161 uint32_t buffer_ctrl;
162 uint32_t sec_fb0, sec_fb1;
163 uint32_t sec_stride;
164 uint32_t overlay_ctrl;
165 int32_t k1_vert_scale;
166 int32_t k2_vert_scale;
167 int32_t dda_vert_accumulator;
168 int32_t k1_horiz_scale;
169 int32_t k2_horiz_scale;
170 int32_t dda_horiz_accumulator;
171 uint32_t fifo_ctrl;
172 uint32_t pri_start;
173 uint32_t pri_size;
174 uint32_t sec_start;
175 uint32_t sec_size;
177 int sdif;
179 int pri_x, pri_y, pri_w, pri_h;
180 int sec_x, sec_y, sec_w, sec_h;
181 } streams;
182 } virge_t;
184 static void queue_triangle(virge_t *virge);
186 static void s3_virge_recalctimings(svga_t *svga);
187 static void s3_virge_updatemapping(virge_t *virge);
189 static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat);
191 static uint8_t s3_virge_mmio_read(uint32_t addr, void *p);
192 static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p);
193 static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *p);
194 static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p);
195 static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p);
196 static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *p);
198 enum
199 {
200 CMD_SET_AE = 1,
201 CMD_SET_HC = (1 << 1),
203 CMD_SET_FORMAT_MASK = (7 << 2),
204 CMD_SET_FORMAT_8 = (0 << 2),
205 CMD_SET_FORMAT_16 = (1 << 2),
206 CMD_SET_FORMAT_24 = (2 << 2),
208 CMD_SET_MS = (1 << 6),
209 CMD_SET_IDS = (1 << 7),
210 CMD_SET_MP = (1 << 8),
211 CMD_SET_TP = (1 << 9),
213 CMD_SET_ITA_MASK = (3 << 10),
214 CMD_SET_ITA_BYTE = (0 << 10),
215 CMD_SET_ITA_WORD = (1 << 10),
216 CMD_SET_ITA_DWORD = (2 << 10),
218 CMD_SET_ZUP = (1 << 23),
220 CMD_SET_ZB_MODE = (3 << 24),
222 CMD_SET_XP = (1 << 25),
223 CMD_SET_YP = (1 << 26),
225 CMD_SET_COMMAND_MASK = (15 << 27)
226 };
228 #define CMD_SET_ABC_SRC (1 << 18)
229 #define CMD_SET_ABC_ENABLE (1 << 19)
230 #define CMD_SET_TWE (1 << 26)
232 enum
233 {
234 CMD_SET_COMMAND_BITBLT = (0 << 27),
235 CMD_SET_COMMAND_RECTFILL = (2 << 27),
236 CMD_SET_COMMAND_LINE = (3 << 27),
237 CMD_SET_COMMAND_NOP = (15 << 27)
238 };
240 static void s3_virge_out(uint16_t addr, uint8_t val, void *p)
241 {
242 virge_t *virge = (virge_t *)p;
243 svga_t *svga = &virge->svga;
244 uint8_t old;
246 if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
247 addr ^= 0x60;
249 // pclog("S3 out %04X %02X %04X:%08X %04X %04X %i\n", addr, val, CS, pc, ES, BX, ins);
251 switch (addr)
252 {
253 case 0x3c5:
254 if (svga->seqaddr >= 0x10)
255 {
256 svga->seqregs[svga->seqaddr & 0x1f]=val;
257 s3_virge_recalctimings(svga);
258 return;
259 }
260 if (svga->seqaddr == 4) /*Chain-4 - update banking*/
261 {
262 if (val & 8) svga->write_bank = svga->read_bank = virge->bank << 16;
263 else svga->write_bank = svga->read_bank = virge->bank << 14;
264 }
265 break;
267 //case 0x3C6: case 0x3C7: case 0x3C8: case 0x3C9:
268 // pclog("Write RAMDAC %04X %02X %04X:%04X\n", addr, val, CS, pc);
269 //sdac_ramdac_out(addr,val);
270 //return;
272 case 0x3d4:
273 svga->crtcreg = val;// & 0x7f;
274 return;
275 case 0x3d5:
276 //pclog("Write CRTC R%02X %02X %04x(%08x):%08x\n", svga->crtcreg, val, CS, cs, pc);
277 if (svga->crtcreg <= 7 && svga->crtc[0x11] & 0x80)
278 return;
279 if (svga->crtcreg >= 0x20 && svga->crtcreg != 0x38 && (svga->crtc[0x38] & 0xcc) != 0x48)
280 return;
281 if (svga->crtcreg >= 0x80)
282 return;
283 old = svga->crtc[svga->crtcreg];
284 svga->crtc[svga->crtcreg] = val;
285 switch (svga->crtcreg)
286 {
287 case 0x31:
288 virge->ma_ext = (virge->ma_ext & 0x1c) | ((val & 0x30) >> 4);
289 svga->vrammask = (val & 8) ? 0x3fffff : 0x3ffff;
290 break;
292 case 0x50:
293 switch (svga->crtc[0x50] & 0xc1)
294 {
295 case 0x00: virge->width = (svga->crtc[0x31] & 2) ? 2048 : 1024; break;
296 case 0x01: virge->width = 1152; break;
297 case 0x40: virge->width = 640; break;
298 case 0x80: virge->width = 800; break;
299 case 0x81: virge->width = 1600; break;
300 case 0xc0: virge->width = 1280; break;
301 }
302 virge->bpp = (svga->crtc[0x50] >> 4) & 3;
303 break;
304 case 0x69:
305 virge->ma_ext = val & 0x1f;
306 break;
308 case 0x35:
309 virge->bank = (virge->bank & 0x70) | (val & 0xf);
310 // pclog("CRTC write R35 %02X\n", val);
311 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
312 else svga->write_bank = svga->read_bank = virge->bank << 14;
313 break;
314 case 0x51:
315 virge->bank = (virge->bank & 0x4f) | ((val & 0xc) << 2);
316 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
317 else svga->write_bank = svga->read_bank = virge->bank << 14;
318 virge->ma_ext = (virge->ma_ext & ~0xc) | ((val & 3) << 2);
319 break;
320 case 0x6a:
321 virge->bank = val;
322 // pclog("CRTC write R6a %02X\n", val);
323 if (svga->chain4) svga->write_bank = svga->read_bank = virge->bank << 16;
324 else svga->write_bank = svga->read_bank = virge->bank << 14;
325 break;
327 case 0x3a:
328 if (val & 0x10) svga->gdcreg[5] |= 0x40; /*Horrible cheat*/
329 break;
331 case 0x45:
332 svga->hwcursor.ena = val & 1;
333 break;
334 case 0x46: case 0x47: case 0x48: case 0x49:
335 case 0x4c: case 0x4d: case 0x4e: case 0x4f:
336 svga->hwcursor.x = ((svga->crtc[0x46] << 8) | svga->crtc[0x47]) & 0x7ff;
337 svga->hwcursor.y = ((svga->crtc[0x48] << 8) | svga->crtc[0x49]) & 0x7ff;
338 svga->hwcursor.xoff = svga->crtc[0x4e] & 63;
339 svga->hwcursor.yoff = svga->crtc[0x4f] & 63;
340 svga->hwcursor.addr = ((((svga->crtc[0x4c] << 8) | svga->crtc[0x4d]) & 0xfff) * 1024) + (svga->hwcursor.yoff * 16);
341 break;
343 case 0x53:
344 case 0x58: case 0x59: case 0x5a:
345 s3_virge_updatemapping(virge);
346 break;
348 case 0x67:
349 switch (val >> 4)
350 {
351 case 3: svga->bpp = 15; break;
352 case 5: svga->bpp = 16; break;
353 case 7: svga->bpp = 24; break;
354 case 13: svga->bpp = 32; break;
355 default: svga->bpp = 8; break;
356 }
357 break;
358 //case 0x55: case 0x43:
359 // pclog("Write CRTC R%02X %02X\n", crtcreg, val);
360 }
361 if (old != val)
362 {
363 if (svga->crtcreg < 0xe || svga->crtcreg > 0x10)
364 {
365 svga->fullchange = changeframecount;
366 svga_recalctimings(svga);
367 }
368 }
369 break;
370 }
371 svga_out(addr, val, svga);
372 }
374 static uint8_t s3_virge_in(uint16_t addr, void *p)
375 {
376 virge_t *virge = (virge_t *)p;
377 svga_t *svga = &virge->svga;
378 uint8_t ret;
380 if (((addr & 0xfff0) == 0x3d0 || (addr & 0xfff0) == 0x3b0) && !(svga->miscout & 1))
381 addr ^= 0x60;
383 // if (addr != 0x3da) pclog("S3 in %04X %04X:%08X ", addr, CS, pc);
384 switch (addr)
385 {
386 case 0x3c1:
387 if (svga->attraddr > 0x14)
388 ret = 0xff;
389 else
390 ret = svga_in(addr, svga);
391 break;
392 //case 0x3C6: case 0x3C7: case 0x3C8: case 0x3C9:
393 // pclog("Read RAMDAC %04X %04X:%04X\n", addr, CS, pc);
394 //return sdac_ramdac_in(addr);
396 case 0x3c5:
397 if (svga->seqaddr >= 8)
398 ret = svga->seqregs[svga->seqaddr & 0x1f];
399 else if (svga->seqaddr <= 4)
400 ret = svga_in(addr, svga);
401 else
402 ret = 0xff;
403 break;
405 case 0x3D4:
406 ret = svga->crtcreg;
407 break;
408 case 0x3D5:
409 //pclog("Read CRTC R%02X %04X:%04X (%02x)\n", svga->crtcreg, CS, pc, svga->crtc[svga->crtcreg]);
410 switch (svga->crtcreg)
411 {
412 case 0x2d: ret = virge->virge_id_high; break; /*Extended chip ID*/
413 case 0x2e: ret = virge->virge_id_low; break; /*New chip ID*/
414 case 0x2f: ret = virge->virge_rev; break;
415 case 0x30: ret = virge->virge_id; break; /*Chip ID*/
416 case 0x31: ret = (svga->crtc[0x31] & 0xcf) | ((virge->ma_ext & 3) << 4); break;
417 case 0x35: ret = (svga->crtc[0x35] & 0xf0) | (virge->bank & 0xf); break;
418 case 0x36: ret = (svga->crtc[0x36] & 0xfc) | 2; break; /*PCI bus*/
419 case 0x51: ret = (svga->crtc[0x51] & 0xf0) | ((virge->bank >> 2) & 0xc) | ((virge->ma_ext >> 2) & 3); break;
420 case 0x69: ret = virge->ma_ext; break;
421 case 0x6a: ret = virge->bank; break;
422 default: ret = svga->crtc[svga->crtcreg]; break;
423 }
424 break;
426 default:
427 ret = svga_in(addr, svga);
428 break;
429 }
430 // if (addr != 0x3da) pclog("%02X\n", ret);
431 return ret;
432 }
434 static void s3_virge_recalctimings(svga_t *svga)
435 {
436 virge_t *virge = (virge_t *)svga->p;
438 if (svga->crtc[0x5d] & 0x01) svga->htotal += 0x100;
439 if (svga->crtc[0x5d] & 0x02) svga->hdisp += 0x100;
440 if (svga->crtc[0x5e] & 0x01) svga->vtotal += 0x400;
441 if (svga->crtc[0x5e] & 0x02) svga->dispend += 0x400;
442 if (svga->crtc[0x5e] & 0x04) svga->vblankstart += 0x400;
443 if (svga->crtc[0x5e] & 0x10) svga->vsyncstart += 0x400;
444 if (svga->crtc[0x5e] & 0x40) svga->split += 0x400;
445 svga->interlace = svga->crtc[0x42] & 0x20;
447 if ((svga->crtc[0x67] & 0xc) != 0xc) /*VGA mode*/
448 {
449 svga->ma_latch |= (virge->ma_ext << 16);
450 //pclog("VGA mode\n");
451 if (svga->crtc[0x51] & 0x30) svga->rowoffset += (svga->crtc[0x51] & 0x30) << 4;
452 else if (svga->crtc[0x43] & 0x04) svga->rowoffset += 0x100;
453 if (!svga->rowoffset) svga->rowoffset = 256;
455 if ((svga->gdcreg[5] & 0x40) && (svga->crtc[0x3a] & 0x10))
456 {
457 switch (svga->bpp)
458 {
459 case 8:
460 svga->render = svga_render_8bpp_highres;
461 break;
462 case 15:
463 svga->render = svga_render_15bpp_highres;
464 break;
465 case 16:
466 svga->render = svga_render_16bpp_highres;
467 break;
468 case 24:
469 svga->render = svga_render_24bpp_highres;
470 break;
471 case 32:
472 svga->render = svga_render_32bpp_highres;
473 break;
474 }
475 }
477 // pclog("svga->rowoffset = %i bpp=%i\n", svga->rowoffset, svga->bpp);
478 if (svga->bpp == 15 || svga->bpp == 16)
479 {
480 svga->htotal >>= 1;
481 svga->hdisp >>= 1;
482 }
483 if (svga->bpp == 24)
484 {
485 svga->rowoffset = (svga->rowoffset * 3) / 4; /*Hack*/
486 }
487 //pclog("VGA mode x_disp=%i dispend=%i vtotal=%i\n", svga->hdisp, svga->dispend, svga->vtotal);
488 }
489 else /*Streams mode*/
490 {
491 if (virge->streams.buffer_ctrl & 1)
492 svga->ma_latch = virge->streams.pri_fb1 >> 2;
493 else
494 svga->ma_latch = virge->streams.pri_fb0 >> 2;
496 svga->hdisp = virge->streams.pri_w + 1;
497 svga->dispend = virge->streams.pri_h;
499 svga->overlay.x = virge->streams.sec_x - virge->streams.pri_x;
500 svga->overlay.y = virge->streams.sec_y - virge->streams.pri_y;
501 svga->overlay.ysize = virge->streams.sec_h;
503 if (virge->streams.buffer_ctrl & 2)
504 svga->overlay.addr = virge->streams.sec_fb1;
505 else
506 svga->overlay.addr = virge->streams.sec_fb0;
508 svga->overlay.ena = (svga->overlay.x >= 0);
509 svga->overlay.v_acc = virge->streams.dda_vert_accumulator;
510 //pclog("Streams mode x_disp=%i dispend=%i vtotal=%i x=%i y=%i ysize=%i\n", svga->hdisp, svga->dispend, svga->vtotal, svga->overlay.x, svga->overlay.y, svga->overlay.ysize);
511 svga->rowoffset = virge->streams.pri_stride >> 3;
513 switch ((virge->streams.pri_ctrl >> 24) & 0x7)
514 {
515 case 0: /*RGB-8 (CLUT)*/
516 svga->render = svga_render_8bpp_highres;
517 break;
518 case 3: /*KRGB-16 (1.5.5.5)*/
519 svga->htotal >>= 1;
520 svga->render = svga_render_15bpp_highres;
521 break;
522 case 5: /*RGB-16 (5.6.5)*/
523 svga->htotal >>= 1;
524 svga->render = svga_render_16bpp_highres;
525 break;
526 case 6: /*RGB-24 (8.8.8)*/
527 svga->render = svga_render_24bpp_highres;
528 break;
529 case 7: /*XRGB-32 (X.8.8.8)*/
530 svga->render = svga_render_32bpp_highres;
531 break;
532 }
533 }
535 if (((svga->miscout >> 2) & 3) == 3)
536 {
537 int n = svga->seqregs[0x12] & 0x1f;
538 int r = (svga->seqregs[0x12] >> 5) & 3;
539 int m = svga->seqregs[0x13] & 0x7f;
540 double freq = (((double)m + 2) / (((double)n + 2) * (double)(1 << r))) * 14318184.0;
542 svga->clock = cpuclock / freq;
543 }
544 }
546 static void s3_virge_updatemapping(virge_t *virge)
547 {
548 svga_t *svga = &virge->svga;
550 if (!(virge->pci_regs[PCI_REG_COMMAND] & PCI_COMMAND_MEM))
551 {
552 // pclog("Update mapping - PCI disabled\n");
553 mem_mapping_disable(&svga->mapping);
554 mem_mapping_disable(&virge->linear_mapping);
555 mem_mapping_disable(&virge->mmio_mapping);
556 mem_mapping_disable(&virge->new_mmio_mapping);
557 return;
558 }
560 pclog("Update mapping - bank %02X ", svga->gdcreg[6] & 0xc);
561 switch (svga->gdcreg[6] & 0xc) /*Banked framebuffer*/
562 {
563 case 0x0: /*128k at A0000*/
564 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x20000);
565 svga->banked_mask = 0xffff;
566 break;
567 case 0x4: /*64k at A0000*/
568 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000);
569 svga->banked_mask = 0xffff;
570 break;
571 case 0x8: /*32k at B0000*/
572 mem_mapping_set_addr(&svga->mapping, 0xb0000, 0x08000);
573 svga->banked_mask = 0x7fff;
574 break;
575 case 0xC: /*32k at B8000*/
576 mem_mapping_set_addr(&svga->mapping, 0xb8000, 0x08000);
577 svga->banked_mask = 0x7fff;
578 break;
579 }
581 virge->linear_base = (svga->crtc[0x5a] << 16) | (svga->crtc[0x59] << 24);
583 pclog("Linear framebuffer %02X ", svga->crtc[0x58] & 0x10);
584 if (svga->crtc[0x58] & 0x10) /*Linear framebuffer*/
585 {
586 switch (svga->crtc[0x58] & 3)
587 {
588 case 0: /*64k*/
589 virge->linear_size = 0x10000;
590 break;
591 case 1: /*1mb*/
592 virge->linear_size = 0x100000;
593 break;
594 case 2: /*2mb*/
595 virge->linear_size = 0x200000;
596 break;
597 case 3: /*8mb*/
598 virge->linear_size = 0x400000;
599 break;
600 }
601 virge->linear_base &= ~(virge->linear_size - 1);
602 // pclog("%08X %08X %02X %02X %02X\n", linear_base, linear_size, crtc[0x58], crtc[0x59], crtc[0x5a]);
603 pclog("Linear framebuffer at %08X size %08X\n", virge->linear_base, virge->linear_size);
604 if (virge->linear_base == 0xa0000)
605 {
606 mem_mapping_set_addr(&svga->mapping, 0xa0000, 0x10000);
607 mem_mapping_disable(&virge->linear_mapping);
608 }
609 else
610 mem_mapping_set_addr(&virge->linear_mapping, virge->linear_base, virge->linear_size);
611 }
612 else
613 mem_mapping_disable(&virge->linear_mapping);
615 pclog("Memory mapped IO %02X\n", svga->crtc[0x53] & 0x18);
616 if (svga->crtc[0x53] & 0x10) /*Old MMIO*/
617 {
618 if (svga->crtc[0x53] & 0x20)
619 mem_mapping_set_addr(&virge->mmio_mapping, 0xb8000, 0x8000);
620 else
621 mem_mapping_set_addr(&virge->mmio_mapping, 0xa0000, 0x10000);
622 }
623 else
624 mem_mapping_disable(&virge->mmio_mapping);
626 if (svga->crtc[0x53] & 0x08) /*New MMIO*/
627 mem_mapping_set_addr(&virge->new_mmio_mapping, virge->linear_base + 0x1000000, 0x10000);
628 else
629 mem_mapping_disable(&virge->new_mmio_mapping);
631 }
634 static uint8_t s3_virge_mmio_read(uint32_t addr, void *p)
635 {
636 reg_reads++;
637 // pclog("New MMIO readb %08X\n", addr);
638 switch (addr & 0xffff)
639 {
640 case 0x83b0: case 0x83b1: case 0x83b2: case 0x83b3:
641 case 0x83b4: case 0x83b5: case 0x83b6: case 0x83b7:
642 case 0x83b8: case 0x83b9: case 0x83ba: case 0x83bb:
643 case 0x83bc: case 0x83bd: case 0x83be: case 0x83bf:
644 case 0x83c0: case 0x83c1: case 0x83c2: case 0x83c3:
645 case 0x83c4: case 0x83c5: case 0x83c6: case 0x83c7:
646 case 0x83c8: case 0x83c9: case 0x83ca: case 0x83cb:
647 case 0x83cc: case 0x83cd: case 0x83ce: case 0x83cf:
648 case 0x83d0: case 0x83d1: case 0x83d2: case 0x83d3:
649 case 0x83d4: case 0x83d5: case 0x83d6: case 0x83d7:
650 case 0x83d8: case 0x83d9: case 0x83da: case 0x83db:
651 case 0x83dc: case 0x83dd: case 0x83de: case 0x83df:
652 return s3_virge_in(addr & 0x3ff, p);
653 }
654 return 0xff;
655 }
656 static uint16_t s3_virge_mmio_read_w(uint32_t addr, void *p)
657 {
658 reg_reads++;
659 // pclog("New MMIO readw %08X\n", addr);
660 switch (addr & 0xfffe)
661 {
662 default:
663 return s3_virge_mmio_read(addr, p) | (s3_virge_mmio_read(addr + 1, p) << 8);
664 }
665 return 0xffff;
666 }
667 static uint32_t s3_virge_mmio_read_l(uint32_t addr, void *p)
668 {
669 virge_t *virge = (virge_t *)p;
670 uint32_t ret = 0xffffffff;
671 reg_reads++;
672 // pclog("New MMIO readl %08X %04X(%08X):%08X ", addr, CS, cs, pc);
673 switch (addr & 0xfffc)
674 {
675 case 0x8180:
676 ret = virge->streams.pri_ctrl;
677 break;
678 case 0x8184:
679 ret = virge->streams.chroma_ctrl;
680 break;
681 case 0x8190:
682 ret = virge->streams.sec_ctrl;
683 break;
684 case 0x8194:
685 ret = virge->streams.chroma_upper_bound;
686 break;
687 case 0x8198:
688 ret = virge->streams.sec_filter;
689 break;
690 case 0x81a0:
691 ret = virge->streams.blend_ctrl;
692 break;
693 case 0x81c0:
694 ret = virge->streams.pri_fb0;
695 break;
696 case 0x81c4:
697 ret = virge->streams.pri_fb1;
698 break;
699 case 0x81c8:
700 ret = virge->streams.pri_stride;
701 break;
702 case 0x81cc:
703 ret = virge->streams.buffer_ctrl;
704 break;
705 case 0x81d0:
706 ret = virge->streams.sec_fb0;
707 break;
708 case 0x81d4:
709 ret = virge->streams.sec_fb1;
710 break;
711 case 0x81d8:
712 ret = virge->streams.sec_stride;
713 break;
714 case 0x81dc:
715 ret = virge->streams.overlay_ctrl;
716 break;
717 case 0x81e0:
718 ret = virge->streams.k1_vert_scale;
719 break;
720 case 0x81e4:
721 ret = virge->streams.k2_vert_scale;
722 break;
723 case 0x81e8:
724 ret = virge->streams.dda_vert_accumulator;
725 break;
726 case 0x81ec:
727 ret = virge->streams.fifo_ctrl;
728 break;
729 case 0x81f0:
730 ret = virge->streams.pri_start;
731 break;
732 case 0x81f4:
733 ret = virge->streams.pri_size;
734 break;
735 case 0x81f8:
736 ret = virge->streams.sec_start;
737 break;
738 case 0x81fc:
739 ret = virge->streams.sec_size;
740 break;
742 case 0x8504:
743 if (virge->s3d_busy)
744 ret = (0x10 << 8);
745 else
746 ret = (0x10 << 8) | (1 << 13);
747 // pclog("Read status %04x %i\n", ret, virge->s3d_busy);
748 break;
749 case 0xa4d4:
750 ret = virge->s3d.src_base;
751 break;
752 case 0xa4d8:
753 ret = virge->s3d.dest_base;
754 break;
755 case 0xa4dc:
756 ret = (virge->s3d.clip_l << 16) | virge->s3d.clip_r;
757 break;
758 case 0xa4e0:
759 ret = (virge->s3d.clip_t << 16) | virge->s3d.clip_b;
760 break;
761 case 0xa4e4:
762 ret = (virge->s3d.dest_str << 16) | virge->s3d.src_str;
763 break;
764 case 0xa4e8:
765 ret = virge->s3d.mono_pat_0;
766 break;
767 case 0xa4ec:
768 ret = virge->s3d.mono_pat_1;
769 break;
770 case 0xa4f0:
771 ret = virge->s3d.pat_bg_clr;
772 break;
773 case 0xa4f4:
774 ret = virge->s3d.pat_fg_clr;
775 break;
776 case 0xa4f8:
777 ret = virge->s3d.src_bg_clr;
778 break;
779 case 0xa4fc:
780 ret = virge->s3d.src_fg_clr;
781 break;
782 case 0xa500:
783 ret = virge->s3d.cmd_set;
784 break;
785 case 0xa504:
786 ret = (virge->s3d.r_width << 16) | virge->s3d.r_height;
787 break;
788 case 0xa508:
789 ret = (virge->s3d.rsrc_x << 16) | virge->s3d.rsrc_y;
790 break;
791 case 0xa50c:
792 ret = (virge->s3d.rdest_x << 16) | virge->s3d.rdest_y;
793 break;
795 default:
796 ret = s3_virge_mmio_read_w(addr, p) | (s3_virge_mmio_read_w(addr + 2, p) << 16);
797 }
798 // /*if ((addr & 0xfffc) != 0x8504) */pclog("%02x\n", ret);
799 return ret;
800 }
801 static void s3_virge_mmio_write(uint32_t addr, uint8_t val, void *p)
802 {
803 virge_t *virge = (virge_t *)p;
804 svga_t *svga = &virge->svga;
806 // pclog("New MMIO writeb %08X %02X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
807 reg_writes++;
808 if ((addr & 0xfffc) < 0x8000)
809 s3_virge_bitblt(virge, 8, val);
810 else switch (addr & 0xffff)
811 {
812 case 0x83b0: case 0x83b1: case 0x83b2: case 0x83b3:
813 case 0x83b4: case 0x83b5: case 0x83b6: case 0x83b7:
814 case 0x83b8: case 0x83b9: case 0x83ba: case 0x83bb:
815 case 0x83bc: case 0x83bd: case 0x83be: case 0x83bf:
816 case 0x83c0: case 0x83c1: case 0x83c2: case 0x83c3:
817 case 0x83c4: case 0x83c5: case 0x83c6: case 0x83c7:
818 case 0x83c8: case 0x83c9: case 0x83ca: case 0x83cb:
819 case 0x83cc: case 0x83cd: case 0x83ce: case 0x83cf:
820 case 0x83d0: case 0x83d1: case 0x83d2: case 0x83d3:
821 case 0x83d4: case 0x83d5: case 0x83d6: case 0x83d7:
822 case 0x83d8: case 0x83d9: case 0x83da: case 0x83db:
823 case 0x83dc: case 0x83dd: case 0x83de: case 0x83df:
824 s3_virge_out(addr & 0x3ff, val, p);
825 break;
826 }
829 }
830 static void s3_virge_mmio_write_w(uint32_t addr, uint16_t val, void *p)
831 {
832 virge_t *virge = (virge_t *)p;
833 reg_writes++;
834 // pclog("New MMIO writew %08X %04X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
835 if ((addr & 0xfffc) < 0x8000)
836 {
837 if (virge->s3d.cmd_set & CMD_SET_MS)
838 s3_virge_bitblt(virge, 16, ((val >> 8) | (val << 8)) << 16);
839 else
840 s3_virge_bitblt(virge, 16, val);
841 }
842 else switch (addr & 0xfffe)
843 {
844 case 0x83d4:
845 s3_virge_mmio_write(addr, val, p);
846 s3_virge_mmio_write(addr + 1, val >> 8, p);
847 break;
848 }
849 }
850 static void s3_virge_mmio_write_l(uint32_t addr, uint32_t val, void *p)
851 {
852 virge_t *virge = (virge_t *)p;
853 svga_t *svga = &virge->svga;
854 reg_writes++;
855 // if ((addr & 0xfffc) >= 0xb400 && (addr & 0xfffc) < 0xb800)
856 // pclog("New MMIO writel %08X %08X %04x(%08x):%08x\n", addr, val, CS, cs, pc);
858 if ((addr & 0xfffc) < 0x8000)
859 {
860 if (virge->s3d.cmd_set & CMD_SET_MS)
861 s3_virge_bitblt(virge, 32, ((val & 0xff000000) >> 24) | ((val & 0x00ff0000) >> 8) | ((val & 0x0000ff00) << 8) | ((val & 0x000000ff) << 24));
862 else
863 s3_virge_bitblt(virge, 32, val);
864 }
865 else switch (addr & 0xfffc)
866 {
867 case 0x8180:
868 virge->streams.pri_ctrl = val;
869 s3_virge_recalctimings(svga);
870 svga->fullchange = changeframecount;
871 break;
872 case 0x8184:
873 virge->streams.chroma_ctrl = val;
874 break;
875 case 0x8190:
876 virge->streams.sec_ctrl = val;
877 virge->streams.dda_horiz_accumulator = val & 0xfff;
878 if (val & (1 << 11))
879 virge->streams.dda_horiz_accumulator |= 0xfffff800;
880 virge->streams.sdif = (val >> 24) & 7;
881 break;
882 case 0x8194:
883 virge->streams.chroma_upper_bound = val;
884 break;
885 case 0x8198:
886 virge->streams.sec_filter = val;
887 virge->streams.k1_horiz_scale = val & 0x7ff;
888 if (val & (1 << 10))
889 virge->streams.k1_horiz_scale |= 0xfffff800;
890 virge->streams.k2_horiz_scale = (val >> 16) & 0x7ff;
891 if ((val >> 16) & (1 << 10))
892 virge->streams.k2_horiz_scale |= 0xfffff800;
893 break;
894 case 0x81a0:
895 virge->streams.blend_ctrl = val;
896 break;
897 case 0x81c0:
898 // pclog("Write pri_fb0 %08x\n", val);
899 virge->streams.pri_fb0 = val & 0x3fffff;
900 s3_virge_recalctimings(svga);
901 svga->fullchange = changeframecount;
902 break;
903 case 0x81c4:
904 // pclog("Write pri_fb1 %08x\n", val);
905 virge->streams.pri_fb1 = val & 0x3fffff;
906 s3_virge_recalctimings(svga);
907 svga->fullchange = changeframecount;
908 break;
909 case 0x81c8:
910 virge->streams.pri_stride = val & 0xfff;
911 s3_virge_recalctimings(svga);
912 svga->fullchange = changeframecount;
913 break;
914 case 0x81cc:
915 // pclog("Write buffer_ctrl %08x\n", val);
916 virge->streams.buffer_ctrl = val;
917 s3_virge_recalctimings(svga);
918 svga->fullchange = changeframecount;
919 break;
920 case 0x81d0:
921 virge->streams.sec_fb0 = val;
922 s3_virge_recalctimings(svga);
923 svga->fullchange = changeframecount;
924 break;
925 case 0x81d4:
926 virge->streams.sec_fb1 = val;
927 s3_virge_recalctimings(svga);
928 svga->fullchange = changeframecount;
929 break;
930 case 0x81d8:
931 virge->streams.sec_stride = val;
932 s3_virge_recalctimings(svga);
933 svga->fullchange = changeframecount;
934 break;
935 case 0x81dc:
936 virge->streams.overlay_ctrl = val;
937 break;
938 case 0x81e0:
939 virge->streams.k1_vert_scale = val & 0x7ff;
940 if (val & (1 << 10))
941 virge->streams.k1_vert_scale |= 0xfffff800;
942 break;
943 case 0x81e4:
944 virge->streams.k2_vert_scale = val & 0x7ff;
945 if (val & (1 << 10))
946 virge->streams.k2_vert_scale |= 0xfffff800;
947 break;
948 case 0x81e8:
949 virge->streams.dda_vert_accumulator = val & 0xfff;
950 if (val & (1 << 11))
951 virge->streams.dda_vert_accumulator |= 0xfffff800;
952 break;
953 case 0x81ec:
954 virge->streams.fifo_ctrl = val;
955 break;
956 case 0x81f0:
957 virge->streams.pri_start = val;
958 virge->streams.pri_x = (val >> 16) & 0x7ff;
959 virge->streams.pri_y = val & 0x7ff;
960 s3_virge_recalctimings(svga);
961 svga->fullchange = changeframecount;
962 break;
963 case 0x81f4:
964 virge->streams.pri_size = val;
965 virge->streams.pri_w = (val >> 16) & 0x7ff;
966 virge->streams.pri_h = val & 0x7ff;
967 s3_virge_recalctimings(svga);
968 svga->fullchange = changeframecount;
969 break;
970 case 0x81f8:
971 virge->streams.sec_start = val;
972 virge->streams.sec_x = (val >> 16) & 0x7ff;
973 virge->streams.sec_y = val & 0x7ff;
974 s3_virge_recalctimings(svga);
975 svga->fullchange = changeframecount;
976 break;
977 case 0x81fc:
978 virge->streams.sec_size = val;
979 virge->streams.sec_w = (val >> 16) & 0x7ff;
980 virge->streams.sec_h = val & 0x7ff;
981 s3_virge_recalctimings(svga);
982 svga->fullchange = changeframecount;
983 break;
985 case 0xa000: case 0xa004: case 0xa008: case 0xa00c:
986 case 0xa010: case 0xa014: case 0xa018: case 0xa01c:
987 case 0xa020: case 0xa024: case 0xa028: case 0xa02c:
988 case 0xa030: case 0xa034: case 0xa038: case 0xa03c:
989 case 0xa040: case 0xa044: case 0xa048: case 0xa04c:
990 case 0xa050: case 0xa054: case 0xa058: case 0xa05c:
991 case 0xa060: case 0xa064: case 0xa068: case 0xa06c:
992 case 0xa070: case 0xa074: case 0xa078: case 0xa07c:
993 case 0xa080: case 0xa084: case 0xa088: case 0xa08c:
994 case 0xa090: case 0xa094: case 0xa098: case 0xa09c:
995 case 0xa0a0: case 0xa0a4: case 0xa0a8: case 0xa0ac:
996 case 0xa0b0: case 0xa0b4: case 0xa0b8: case 0xa0bc:
997 case 0xa0c0: case 0xa0c4: case 0xa0c8: case 0xa0cc:
998 case 0xa0d0: case 0xa0d4: case 0xa0d8: case 0xa0dc:
999 case 0xa0e0: case 0xa0e4: case 0xa0e8: case 0xa0ec:
1000 case 0xa0f0: case 0xa0f4: case 0xa0f8: case 0xa0fc:
1001 case 0xa100: case 0xa104: case 0xa108: case 0xa10c:
1002 case 0xa110: case 0xa114: case 0xa118: case 0xa11c:
1003 case 0xa120: case 0xa124: case 0xa128: case 0xa12c:
1004 case 0xa130: case 0xa134: case 0xa138: case 0xa13c:
1005 case 0xa140: case 0xa144: case 0xa148: case 0xa14c:
1006 case 0xa150: case 0xa154: case 0xa158: case 0xa15c:
1007 case 0xa160: case 0xa164: case 0xa168: case 0xa16c:
1008 case 0xa170: case 0xa174: case 0xa178: case 0xa17c:
1009 case 0xa180: case 0xa184: case 0xa188: case 0xa18c:
1010 case 0xa190: case 0xa194: case 0xa198: case 0xa19c:
1011 case 0xa1a0: case 0xa1a4: case 0xa1a8: case 0xa1ac:
1012 case 0xa1b0: case 0xa1b4: case 0xa1b8: case 0xa1bc:
1013 case 0xa1c0: case 0xa1c4: case 0xa1c8: case 0xa1cc:
1014 case 0xa1d0: case 0xa1d4: case 0xa1d8: case 0xa1dc:
1015 case 0xa1e0: case 0xa1e4: case 0xa1e8: case 0xa1ec:
1016 case 0xa1f0: case 0xa1f4: case 0xa1f8: case 0xa1fc:
1018 int x = addr & 4;
1019 int y = (addr >> 3) & 7;
1020 virge->s3d.pattern_8[y*8 + x] = val & 0xff;
1021 virge->s3d.pattern_8[y*8 + x + 1] = val >> 8;
1022 virge->s3d.pattern_8[y*8 + x + 2] = val >> 16;
1023 virge->s3d.pattern_8[y*8 + x + 3] = val >> 24;
1025 x = (addr >> 1) & 6;
1026 y = (addr >> 4) & 7;
1027 virge->s3d.pattern_16[y*8 + x] = val & 0xffff;
1028 virge->s3d.pattern_16[y*8 + x + 1] = val >> 16;
1030 x = (addr >> 2) & 7;
1031 y = (addr >> 5) & 7;
1032 virge->s3d.pattern_32[y*8 + x] = val & 0xffffff;
1034 break;
1036 case 0xa4d4: case 0xa8d4:
1037 virge->s3d.src_base = val & 0x3ffff8;
1038 break;
1039 case 0xa4d8: case 0xa8d8:
1040 virge->s3d.dest_base = val & 0x3ffff8;
1041 break;
1042 case 0xa4dc: case 0xa8dc:
1043 virge->s3d.clip_l = (val >> 16) & 0x7ff;
1044 virge->s3d.clip_r = val & 0x7ff;
1045 break;
1046 case 0xa4e0: case 0xa8e0:
1047 virge->s3d.clip_t = (val >> 16) & 0x7ff;
1048 virge->s3d.clip_b = val & 0x7ff;
1049 break;
1050 case 0xa4e4: case 0xa8e4:
1051 virge->s3d.dest_str = (val >> 16) & 0xff8;
1052 virge->s3d.src_str = val & 0xff8;
1053 break;
1054 case 0xa4e8:
1055 virge->s3d.mono_pat_0 = val;
1056 break;
1057 case 0xa4ec:
1058 virge->s3d.mono_pat_1 = val;
1059 break;
1060 case 0xa4f0:
1061 virge->s3d.pat_bg_clr = val;
1062 break;
1063 case 0xa4f4: case 0xa8f4:
1064 virge->s3d.pat_fg_clr = val;
1065 break;
1066 case 0xa4f8:
1067 virge->s3d.src_bg_clr = val;
1068 break;
1069 case 0xa4fc:
1070 virge->s3d.src_fg_clr = val;
1071 break;
1072 case 0xa500: case 0xa900:
1073 virge->s3d.cmd_set = val;
1074 if (!(val & CMD_SET_AE))
1075 s3_virge_bitblt(virge, -1, 0);
1076 break;
1077 case 0xa504:
1078 virge->s3d.r_width = (val >> 16) & 0x7ff;
1079 virge->s3d.r_height = val & 0x7ff;
1080 break;
1081 case 0xa508:
1082 virge->s3d.rsrc_x = (val >> 16) & 0x7ff;
1083 virge->s3d.rsrc_y = val & 0x7ff;
1084 break;
1085 case 0xa50c:
1086 virge->s3d.rdest_x = (val >> 16) & 0x7ff;
1087 virge->s3d.rdest_y = val & 0x7ff;
1088 if (virge->s3d.cmd_set & CMD_SET_AE)
1089 s3_virge_bitblt(virge, -1, 0);
1090 break;
1091 case 0xa96c:
1092 virge->s3d.lxend0 = (val >> 16) & 0x7ff;
1093 virge->s3d.lxend1 = val & 0x7ff;
1094 break;
1095 case 0xa970:
1096 virge->s3d.ldx = (int32_t)val;
1097 break;
1098 case 0xa974:
1099 virge->s3d.lxstart = val;
1100 break;
1101 case 0xa978:
1102 virge->s3d.lystart = val & 0x7ff;
1103 break;
1104 case 0xa97c:
1105 virge->s3d.lycnt = val & 0x7ff;
1106 virge->s3d.line_dir = val >> 31;
1107 if (virge->s3d.cmd_set & CMD_SET_AE)
1108 s3_virge_bitblt(virge, -1, 0);
1109 break;
1111 case 0xb4d4:
1112 virge->s3d_tri.z_base = val & 0x3ffff8;
1113 break;
1114 case 0xb4d8:
1115 virge->s3d_tri.dest_base = val & 0x3ffff8;
1116 break;
1117 case 0xb4dc:
1118 virge->s3d_tri.clip_l = (val >> 16) & 0x7ff;
1119 virge->s3d_tri.clip_r = val & 0x7ff;
1120 break;
1121 case 0xb4e0:
1122 virge->s3d_tri.clip_t = (val >> 16) & 0x7ff;
1123 virge->s3d_tri.clip_b = val & 0x7ff;
1124 break;
1125 case 0xb4e4:
1126 virge->s3d_tri.dest_str = (val >> 16) & 0xff8;
1127 virge->s3d.src_str = val & 0xff8;
1128 break;
1129 case 0xb4e8:
1130 virge->s3d_tri.z_str = val & 0xff8;
1131 break;
1132 case 0xb4ec:
1133 virge->s3d_tri.tex_base = val & 0x3ffff8;
1134 break;
1135 case 0xb4f0:
1136 virge->s3d_tri.tex_bdr_clr = val & 0xffffff;
1137 break;
1138 case 0xb500:
1139 virge->s3d_tri.cmd_set = val;
1140 if (!(val & CMD_SET_AE))
1141 queue_triangle(virge);
1142 /* {
1143 thread_set_event(virge->wake_render_thread);
1144 thread_wait_event(virge->wake_main_thread, -1);
1145 } */
1146 // s3_virge_triangle(virge);
1147 break;
1148 case 0xb504:
1149 virge->s3d_tri.tbv = val & 0xfffff;
1150 break;
1151 case 0xb508:
1152 virge->s3d_tri.tbu = val & 0xfffff;
1153 break;
1154 case 0xb50c:
1155 virge->s3d_tri.TdWdX = val;
1156 break;
1157 case 0xb510:
1158 virge->s3d_tri.TdWdY = val;
1159 break;
1160 case 0xb514:
1161 virge->s3d_tri.tws = val;
1162 break;
1163 case 0xb518:
1164 virge->s3d_tri.TdDdX = val;
1165 break;
1166 case 0xb51c:
1167 virge->s3d_tri.TdVdX = val;
1168 break;
1169 case 0xb520:
1170 virge->s3d_tri.TdUdX = val;
1171 break;
1172 case 0xb524:
1173 virge->s3d_tri.TdDdY = val;
1174 break;
1175 case 0xb528:
1176 virge->s3d_tri.TdVdY = val;
1177 break;
1178 case 0xb52c:
1179 virge->s3d_tri.TdUdY = val;
1180 break;
1181 case 0xb530:
1182 virge->s3d_tri.tds = val;
1183 break;
1184 case 0xb534:
1185 virge->s3d_tri.tvs = val;
1186 break;
1187 case 0xb538:
1188 virge->s3d_tri.tus = val;
1189 break;
1190 case 0xb53c:
1191 virge->s3d_tri.TdGdX = val >> 16;
1192 virge->s3d_tri.TdBdX = val & 0xffff;
1193 break;
1194 case 0xb540:
1195 virge->s3d_tri.TdAdX = val >> 16;
1196 virge->s3d_tri.TdRdX = val & 0xffff;
1197 break;
1198 case 0xb544:
1199 virge->s3d_tri.TdGdY = val >> 16;
1200 virge->s3d_tri.TdBdY = val & 0xffff;
1201 break;
1202 case 0xb548:
1203 virge->s3d_tri.TdAdY = val >> 16;
1204 virge->s3d_tri.TdRdY = val & 0xffff;
1205 break;
1206 case 0xb54c:
1207 virge->s3d_tri.tgs = (val >> 16) & 0xffff;
1208 virge->s3d_tri.tbs = val & 0xffff;
1209 break;
1210 case 0xb550:
1211 virge->s3d_tri.tas = (val >> 16) & 0xffff;
1212 virge->s3d_tri.trs = val & 0xffff;
1213 break;
1215 case 0xb554:
1216 virge->s3d_tri.TdZdX = val;
1217 break;
1218 case 0xb558:
1219 virge->s3d_tri.TdZdY = val;
1220 break;
1221 case 0xb55c:
1222 virge->s3d_tri.tzs = val;
1223 break;
1224 case 0xb560:
1225 virge->s3d_tri.TdXdY12 = val;
1226 break;
1227 case 0xb564:
1228 virge->s3d_tri.txend12 = val;
1229 break;
1230 case 0xb568:
1231 virge->s3d_tri.TdXdY01 = val;
1232 break;
1233 case 0xb56c:
1234 virge->s3d_tri.txend01 = val;
1235 break;
1236 case 0xb570:
1237 virge->s3d_tri.TdXdY02 = val;
1238 break;
1239 case 0xb574:
1240 virge->s3d_tri.txs = val;
1241 break;
1242 case 0xb578:
1243 virge->s3d_tri.tys = val;
1244 break;
1245 case 0xb57c:
1246 virge->s3d_tri.ty01 = (val >> 16) & 0x7ff;
1247 virge->s3d_tri.ty12 = val & 0x7ff;
1248 virge->s3d_tri.tlr = val >> 31;
1249 if (virge->s3d_tri.cmd_set & CMD_SET_AE)
1250 queue_triangle(virge);
1251 /* {
1252 thread_set_event(virge->wake_render_thread);
1253 thread_wait_event(virge->wake_main_thread, -1);
1254 }*/
1256 // s3_virge_triangle(virge);
1257 break;
1261 #define READ(addr, val) \
1262 do \
1263 { \
1264 switch (bpp) \
1265 { \
1266 case 0: /*8 bpp*/ \
1267 val = vram[addr & 0x3fffff]; \
1268 break; \
1269 case 1: /*16 bpp*/ \
1270 val = *(uint16_t *)&vram[addr & 0x3fffff]; \
1271 break; \
1272 case 2: /*24 bpp*/ \
1273 val = (*(uint32_t *)&vram[addr & 0x3fffff]) & 0xffffff; \
1274 break; \
1275 } \
1276 } while (0)
1278 #define Z_READ(addr) *(uint16_t *)&vram[addr & 0x3fffff]
1280 #define Z_WRITE(addr, val) if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) *(uint16_t *)&vram[addr & 0x3fffff] = val
1282 #define CLIP(x, y) \
1283 do \
1284 { \
1285 if ((virge->s3d.cmd_set & CMD_SET_HC) && \
1286 (x < virge->s3d.clip_l || \
1287 x > virge->s3d.clip_r || \
1288 y < virge->s3d.clip_t || \
1289 y > virge->s3d.clip_b)) \
1290 update = 0; \
1291 } while (0)
1293 #define CLIP_3D(x, y) \
1294 do \
1295 { \
1296 if ((s3d_tri->cmd_set & CMD_SET_HC) && \
1297 (x < s3d_tri->clip_l || \
1298 x > s3d_tri->clip_r || \
1299 y < s3d_tri->clip_t || \
1300 y > s3d_tri->clip_b)) \
1301 update = 0; \
1302 } while (0)
1304 #define Z_CLIP(Zzb, Zs) \
1305 do \
1306 { \
1307 if (!(s3d_tri->cmd_set & CMD_SET_ZB_MODE)) \
1308 switch ((s3d_tri->cmd_set >> 20) & 7) \
1309 { \
1310 case 0: update = 0; break; \
1311 case 1: if (Zs <= Zzb) update = 0; else Zzb = Zs; break; \
1312 case 2: if (Zs != Zzb) update = 0; else Zzb = Zs; break; \
1313 case 3: if (Zs < Zzb) update = 0; else Zzb = Zs; break; \
1314 case 4: if (Zs >= Zzb) update = 0; else Zzb = Zs; break; \
1315 case 5: if (Zs == Zzb) update = 0; else Zzb = Zs; break; \
1316 case 6: if (Zs > Zzb) update = 0; else Zzb = Zs; break; \
1317 case 7: update = 1; Zzb = Zs; break; \
1318 } \
1319 } while (0)
1321 #define MIX() \
1322 do \
1323 { \
1324 int c; \
1325 for (c = 0; c < 24; c++) \
1326 { \
1327 int d = (dest & (1 << c)) ? 1 : 0; \
1328 if (source & (1 << c)) d |= 2; \
1329 if (pattern & (1 << c)) d |= 4; \
1330 if (virge->s3d.rop & (1 << d)) out |= (1 << c); \
1331 } \
1332 } while (0)
1334 #define WRITE(addr, val) \
1335 do \
1336 { \
1337 switch (bpp) \
1338 { \
1339 case 0: /*8 bpp*/ \
1340 vram[addr & 0x3fffff] = val; \
1341 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1342 break; \
1343 case 1: /*16 bpp*/ \
1344 *(uint16_t *)&vram[addr & 0x3fffff] = val; \
1345 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1346 break; \
1347 case 2: /*24 bpp*/ \
1348 *(uint32_t *)&vram[addr & 0x3fffff] = (val & 0xffffff) | \
1349 (vram[(addr + 3) & 0x3fffff] << 24); \
1350 virge->svga.changedvram[(addr & 0x3fffff) >> 12] = changeframecount; \
1351 break; \
1352 } \
1353 } while (0)
1355 static void s3_virge_bitblt(virge_t *virge, int count, uint32_t cpu_dat)
1357 int cpu_input = (count != -1);
1358 uint8_t *vram = virge->svga.vram;
1359 uint32_t mono_pattern[64];
1360 int count_mask;
1361 int x_inc = (virge->s3d.cmd_set & CMD_SET_XP) ? 1 : -1;
1362 int y_inc = (virge->s3d.cmd_set & CMD_SET_YP) ? 1 : -1;
1363 int bpp;
1364 int x_mul;
1365 int cpu_dat_shift;
1366 uint32_t *pattern_data;
1368 switch (virge->s3d.cmd_set & CMD_SET_FORMAT_MASK)
1370 case CMD_SET_FORMAT_8:
1371 bpp = 0;
1372 x_mul = 1;
1373 cpu_dat_shift = 8;
1374 pattern_data = virge->s3d.pattern_8;
1375 break;
1376 case CMD_SET_FORMAT_16:
1377 bpp = 1;
1378 x_mul = 2;
1379 cpu_dat_shift = 16;
1380 pattern_data = virge->s3d.pattern_16;
1381 break;
1382 case CMD_SET_FORMAT_24:
1383 default:
1384 bpp = 2;
1385 x_mul = 3;
1386 cpu_dat_shift = 24;
1387 pattern_data = virge->s3d.pattern_32;
1388 break;
1390 if (virge->s3d.cmd_set & CMD_SET_MP)
1391 pattern_data = mono_pattern;
1393 switch (virge->s3d.cmd_set & CMD_SET_ITA_MASK)
1395 case CMD_SET_ITA_BYTE:
1396 count_mask = ~0x7;
1397 break;
1398 case CMD_SET_ITA_WORD:
1399 count_mask = ~0xf;
1400 break;
1401 case CMD_SET_ITA_DWORD:
1402 default:
1403 count_mask = ~0x1f;
1404 break;
1406 if (virge->s3d.cmd_set & CMD_SET_MP)
1408 int x, y;
1409 for (y = 0; y < 4; y++)
1411 for (x = 0; x < 8; x++)
1413 if (virge->s3d.mono_pat_0 & (1 << (x + y*8)))
1414 mono_pattern[y*8 + x] = virge->s3d.pat_fg_clr;
1415 else
1416 mono_pattern[y*8 + x] = virge->s3d.pat_bg_clr;
1417 if (virge->s3d.mono_pat_1 & (1 << (x + y*8)))
1418 mono_pattern[(y+4)*8 + x] = virge->s3d.pat_fg_clr;
1419 else
1420 mono_pattern[(y+4)*8 + x] = virge->s3d.pat_bg_clr;
1424 switch (virge->s3d.cmd_set & CMD_SET_COMMAND_MASK)
1426 case CMD_SET_COMMAND_NOP:
1427 break;
1429 case CMD_SET_COMMAND_BITBLT:
1430 if (count == -1)
1432 virge->s3d.src_x = virge->s3d.rsrc_x;
1433 virge->s3d.src_y = virge->s3d.rsrc_y;
1434 virge->s3d.dest_x = virge->s3d.rdest_x;
1435 virge->s3d.dest_y = virge->s3d.rdest_y;
1436 virge->s3d.w = virge->s3d.r_width;
1437 virge->s3d.h = virge->s3d.r_height;
1438 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1439 virge->s3d.data_left_count = 0;
1441 /* pclog("BitBlt start %i,%i %i,%i %i,%i %02X %x %x\n",
1442 virge->s3d.src_x,
1443 virge->s3d.src_y,
1444 virge->s3d.dest_x,
1445 virge->s3d.dest_y,
1446 virge->s3d.w,
1447 virge->s3d.h,
1448 virge->s3d.rop,
1449 virge->s3d.src_base,
1450 virge->s3d.dest_base);*/
1452 if (virge->s3d.cmd_set & CMD_SET_IDS)
1453 return;
1455 if (!virge->s3d.h)
1456 return;
1457 while (count)
1459 uint32_t src_addr = virge->s3d.src_base + (virge->s3d.src_x * x_mul) + (virge->s3d.src_y * virge->s3d.src_str);
1460 uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1461 uint32_t source, dest, pattern;
1462 uint32_t out = 0;
1463 int update = 1;
1465 switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS))
1467 case 0:
1468 case CMD_SET_MS:
1469 READ(src_addr, source);
1470 if ((virge->s3d.cmd_set & CMD_SET_TP) && source == virge->s3d.src_fg_clr)
1471 update = 0;
1472 break;
1473 case CMD_SET_IDS:
1474 if (virge->s3d.data_left_count)
1476 /*Handle shifting for 24-bit data*/
1477 source = virge->s3d.data_left;
1478 source |= ((cpu_dat << virge->s3d.data_left_count) & ~0xff000000);
1479 cpu_dat >>= (cpu_dat_shift - virge->s3d.data_left_count);
1480 count -= (cpu_dat_shift - virge->s3d.data_left_count);
1481 virge->s3d.data_left_count = 0;
1482 if (count < cpu_dat_shift)
1484 virge->s3d.data_left = cpu_dat;
1485 virge->s3d.data_left_count = count;
1486 count = 0;
1489 else
1491 source = cpu_dat;
1492 cpu_dat >>= cpu_dat_shift;
1493 count -= cpu_dat_shift;
1494 if (count < cpu_dat_shift)
1496 virge->s3d.data_left = cpu_dat;
1497 virge->s3d.data_left_count = count;
1498 count = 0;
1501 if ((virge->s3d.cmd_set & CMD_SET_TP) && source == virge->s3d.src_fg_clr)
1502 update = 0;
1503 break;
1504 case CMD_SET_IDS | CMD_SET_MS:
1505 source = (cpu_dat & (1 << 31)) ? virge->s3d.src_fg_clr : virge->s3d.src_bg_clr;
1506 if ((virge->s3d.cmd_set & CMD_SET_TP) && !(cpu_dat & (1 << 31)))
1507 update = 0;
1508 cpu_dat <<= 1;
1509 count--;
1510 break;
1513 CLIP(virge->s3d.dest_x, virge->s3d.dest_y);
1515 if (update)
1517 READ(dest_addr, dest);
1518 pattern = pattern_data[(virge->s3d.dest_y & 7)*8 + (virge->s3d.dest_x & 7)];
1519 MIX();
1521 WRITE(dest_addr, out);
1524 virge->s3d.src_x += x_inc;
1525 virge->s3d.dest_x += x_inc;
1526 if (!virge->s3d.w)
1528 virge->s3d.src_x = virge->s3d.rsrc_x;
1529 virge->s3d.dest_x = virge->s3d.rdest_x;
1530 virge->s3d.w = virge->s3d.r_width;
1532 virge->s3d.src_y += y_inc;
1533 virge->s3d.dest_y += y_inc;
1534 virge->s3d.h--;
1536 switch (virge->s3d.cmd_set & (CMD_SET_MS | CMD_SET_IDS))
1538 case CMD_SET_IDS:
1539 cpu_dat >>= (count - (count & count_mask));
1540 count &= count_mask;
1541 virge->s3d.data_left_count = 0;
1542 break;
1544 case CMD_SET_IDS | CMD_SET_MS:
1545 cpu_dat <<= (count - (count & count_mask));
1546 count &= count_mask;
1547 break;
1549 if (!virge->s3d.h)
1551 return;
1554 else
1555 virge->s3d.w--;
1557 break;
1559 case CMD_SET_COMMAND_RECTFILL:
1560 /*No source, pattern = pat_fg_clr*/
1561 if (count == -1)
1563 virge->s3d.src_x = virge->s3d.rsrc_x;
1564 virge->s3d.src_y = virge->s3d.rsrc_y;
1565 virge->s3d.dest_x = virge->s3d.rdest_x;
1566 virge->s3d.dest_y = virge->s3d.rdest_y;
1567 virge->s3d.w = virge->s3d.r_width;
1568 virge->s3d.h = virge->s3d.r_height;
1569 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1571 /* pclog("RctFll start %i,%i %i,%i %02X %08x\n", virge->s3d.dest_x,
1572 virge->s3d.dest_y,
1573 virge->s3d.w,
1574 virge->s3d.h,
1575 virge->s3d.rop, virge->s3d.dest_base);*/
1578 while (count)
1580 uint32_t dest_addr = virge->s3d.dest_base + (virge->s3d.dest_x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1581 uint32_t source = 0, dest, pattern = virge->s3d.pat_fg_clr;
1582 uint32_t out = 0;
1583 int update = 1;
1585 CLIP(virge->s3d.dest_x, virge->s3d.dest_y);
1587 if (update)
1589 READ(dest_addr, dest);
1591 MIX();
1593 WRITE(dest_addr, out);
1596 virge->s3d.src_x += x_inc;
1597 virge->s3d.dest_x += x_inc;
1598 if (!virge->s3d.w)
1600 virge->s3d.src_x = virge->s3d.rsrc_x;
1601 virge->s3d.dest_x = virge->s3d.rdest_x;
1602 virge->s3d.w = virge->s3d.r_width;
1604 virge->s3d.src_y += y_inc;
1605 virge->s3d.dest_y += y_inc;
1606 virge->s3d.h--;
1607 if (!virge->s3d.h)
1609 return;
1612 else
1613 virge->s3d.w--;
1614 count--;
1616 break;
1618 case CMD_SET_COMMAND_LINE:
1619 if (count == -1)
1621 virge->s3d.dest_x = virge->s3d.lxstart;
1622 virge->s3d.dest_y = virge->s3d.lystart;
1623 virge->s3d.h = virge->s3d.lycnt;
1624 virge->s3d.rop = (virge->s3d.cmd_set >> 17) & 0xff;
1625 if (virge->s3d.ldx >= 0)
1626 virge->s3d.dest_x -= virge->s3d.ldx / 2;
1627 else
1628 virge->s3d.dest_x += virge->s3d.ldx / 2;
1629 //virge->s3d.dest_dest_x = virge->s3d.dest_x + virge->s3d.ldx;
1631 while (virge->s3d.h)
1633 int x = virge->s3d.dest_x >> 20;
1634 int new_x = (virge->s3d.dest_x + virge->s3d.ldx) >> 20;
1636 do
1638 uint32_t dest_addr = virge->s3d.dest_base + (x * x_mul) + (virge->s3d.dest_y * virge->s3d.dest_str);
1639 uint32_t source = 0, dest, pattern;
1640 uint32_t out = 0;
1641 int update = 1;
1643 CLIP(x, virge->s3d.dest_y);
1645 if (update)
1647 READ(dest_addr, dest);
1648 pattern = virge->s3d.pat_fg_clr;
1650 MIX();
1652 WRITE(dest_addr, out);
1655 if (x < new_x)
1656 x++;
1657 else if (x > new_x)
1658 x--;
1659 } while (x != new_x);
1661 virge->s3d.dest_x += virge->s3d.ldx;
1662 virge->s3d.dest_y--;
1663 virge->s3d.h--;
1665 break;
1667 default:
1668 fatal("s3_virge_bitblt : blit command %i %08x\n", (virge->s3d.cmd_set >> 27) & 0xf, virge->s3d.cmd_set);
1672 #define RGB15_TO_24(val, r, g, b) b = ((val & 0x001f) << 3) | ((val & 0x001f) >> 2); \
1673 g = ((val & 0x03e0) >> 2) | ((val & 0x03e0) >> 7); \
1674 r = ((val & 0x7c00) >> 7) | ((val & 0x7c00) >> 12);
1676 #define RGB24_TO_24(val, r, g, b) b = val & 0xff; \
1677 g = (val & 0xff00) >> 8; \
1678 r = (val & 0xff0000) >> 16
1680 #define RGB15(r, g, b, dest) \
1681 if (virge->dithering_enabled) \
1682 { \
1683 int add = dither[_y & 3][_x & 3]; \
1684 int _r = (r > 248) ? 248 : r+add; \
1685 int _g = (g > 248) ? 248 : g+add; \
1686 int _b = (b > 248) ? 248 : b+add; \
1687 dest = ((_b >> 3) & 0x1f) | (((_g >> 3) & 0x1f) << 5) | (((_r >> 3) & 0x1f) << 10); \
1688 } \
1689 else \
1690 dest = ((b >> 3) & 0x1f) | (((g >> 3) & 0x1f) << 5) | (((r >> 3) & 0x1f) << 10)
1692 #define RGB24(r, g, b) ((b) | ((g) << 8) | ((r) << 16))
1694 typedef struct rgba_t
1696 int r, g, b, a;
1697 } rgba_t;
1699 typedef struct s3d_state_t
1701 int32_t r, g, b, a, u, v, d, w;
1703 int32_t base_r, base_g, base_b, base_a, base_u, base_v, base_d, base_w;
1705 uint32_t base_z;
1707 uint32_t tbu, tbv;
1709 uint32_t cmd_set;
1710 int max_d;
1712 uint16_t *texture[10];
1714 uint32_t tex_bdr_clr;
1716 int32_t x1, x2;
1717 int y;
1719 rgba_t dest_rgba;
1720 } s3d_state_t;
1722 typedef struct s3d_texture_state_t
1724 int level;
1725 int texture_shift;
1727 int32_t u, v;
1728 } s3d_texture_state_t;
1730 static void (*tex_read)(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out);
1731 static void (*tex_sample)(s3d_state_t *state);
1732 static void (*dest_pixel)(s3d_state_t *state);
1734 #define MAX(a, b) ((a) > (b) ? (a) : (b))
1735 #define MIN(a, b) ((a) < (b) ? (a) : (b))
1737 static int _x, _y;
1739 static void tex_ARGB1555(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1741 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1742 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1743 uint16_t val = state->texture[texture_state->level][offset];
1745 out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12);
1746 out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7);
1747 out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2);
1748 out->a = (val & 0x8000) ? 0xff : 0;
1751 static void tex_ARGB1555_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1753 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1754 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1755 uint16_t val = state->texture[texture_state->level][offset];
1757 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1758 val = state->tex_bdr_clr;
1760 out->r = ((val & 0x7c00) >> 7) | ((val & 0x7000) >> 12);
1761 out->g = ((val & 0x03e0) >> 2) | ((val & 0x0380) >> 7);
1762 out->b = ((val & 0x001f) << 3) | ((val & 0x001c) >> 2);
1763 out->a = (val & 0x8000) ? 0xff : 0;
1766 static void tex_ARGB4444(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1768 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1769 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1770 uint16_t val = state->texture[texture_state->level][offset];
1772 out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8);
1773 out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4);
1774 out->b = ((val & 0x000f) << 4) | (val & 0x000f);
1775 out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12);
1778 static void tex_ARGB4444_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1780 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1781 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1782 uint16_t val = state->texture[texture_state->level][offset];
1784 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1785 val = state->tex_bdr_clr;
1787 out->r = ((val & 0x0f00) >> 4) | ((val & 0x0f00) >> 8);
1788 out->g = (val & 0x00f0) | ((val & 0x00f0) >> 4);
1789 out->b = ((val & 0x000f) << 4) | (val & 0x000f);
1790 out->a = ((val & 0xf000) >> 8) | ((val & 0xf000) >> 12);
1793 static void tex_ARGB8888(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1795 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1796 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1797 uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset];
1799 out->r = (val >> 16) & 0xff;
1800 out->g = (val >> 8) & 0xff;
1801 out->b = val & 0xff;
1802 out->a = (val >> 24) & 0xff;
1804 static void tex_ARGB8888_nowrap(s3d_state_t *state, s3d_texture_state_t *texture_state, rgba_t *out)
1806 int offset = ((texture_state->u & 0x7fc0000) >> texture_state->texture_shift) +
1807 (((texture_state->v & 0x7fc0000) >> texture_state->texture_shift) << texture_state->level);
1808 uint32_t val = ((uint32_t *)state->texture[texture_state->level])[offset];
1810 if (((texture_state->u | texture_state->v) & 0xf8000000) == 0xf8000000)
1811 val = state->tex_bdr_clr;
1813 out->r = (val >> 16) & 0xff;
1814 out->g = (val >> 8) & 0xff;
1815 out->b = val & 0xff;
1816 out->a = (val >> 24) & 0xff;
1819 static void tex_sample_normal(s3d_state_t *state)
1821 s3d_texture_state_t texture_state;
1823 texture_state.level = state->max_d;
1824 texture_state.texture_shift = 18 + (9 - texture_state.level);
1825 texture_state.u = state->u + state->tbu;
1826 texture_state.v = state->v + state->tbv;
1828 tex_read(state, &texture_state, &state->dest_rgba);
1831 static void tex_sample_normal_filter(s3d_state_t *state)
1833 s3d_texture_state_t texture_state;
1834 int tex_offset;
1835 rgba_t tex_samples[4];
1836 int du, dv;
1837 int d[4];
1839 texture_state.level = state->max_d;
1840 texture_state.texture_shift = 18 + (9 - texture_state.level);
1841 tex_offset = 1 << texture_state.texture_shift;
1843 texture_state.u = state->u + state->tbu;
1844 texture_state.v = state->v + state->tbv;
1845 tex_read(state, &texture_state, &tex_samples[0]);
1846 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
1847 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
1849 texture_state.u = state->u + state->tbu + tex_offset;
1850 texture_state.v = state->v + state->tbv;
1851 tex_read(state, &texture_state, &tex_samples[1]);
1853 texture_state.u = state->u + state->tbu;
1854 texture_state.v = state->v + state->tbv + tex_offset;
1855 tex_read(state, &texture_state, &tex_samples[2]);
1857 texture_state.u = state->u + state->tbu + tex_offset;
1858 texture_state.v = state->v + state->tbv + tex_offset;
1859 tex_read(state, &texture_state, &tex_samples[3]);
1861 d[0] = (256 - du) * (256 - dv);
1862 d[1] = du * (256 - dv);
1863 d[2] = (256 - du) * dv;
1864 d[3] = du * dv;
1866 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1867 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1868 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1869 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1872 static void tex_sample_mipmap(s3d_state_t *state)
1874 s3d_texture_state_t texture_state;
1876 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
1877 if (texture_state.level < 0)
1878 texture_state.level = 0;
1879 texture_state.texture_shift = 18 + (9 - texture_state.level);
1880 texture_state.u = state->u + state->tbu;
1881 texture_state.v = state->v + state->tbv;
1883 tex_read(state, &texture_state, &state->dest_rgba);
1886 static void tex_sample_mipmap_filter(s3d_state_t *state)
1888 s3d_texture_state_t texture_state;
1889 int tex_offset;
1890 rgba_t tex_samples[4];
1891 int du, dv;
1892 int d[4];
1894 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
1895 if (texture_state.level < 0)
1896 texture_state.level = 0;
1897 texture_state.texture_shift = 18 + (9 - texture_state.level);
1898 tex_offset = 1 << texture_state.texture_shift;
1900 texture_state.u = state->u + state->tbu;
1901 texture_state.v = state->v + state->tbv;
1902 tex_read(state, &texture_state, &tex_samples[0]);
1903 du = (texture_state.u >> (texture_state.texture_shift - 8)) & 0xff;
1904 dv = (texture_state.v >> (texture_state.texture_shift - 8)) & 0xff;
1906 texture_state.u = state->u + state->tbu + tex_offset;
1907 texture_state.v = state->v + state->tbv;
1908 tex_read(state, &texture_state, &tex_samples[1]);
1910 texture_state.u = state->u + state->tbu;
1911 texture_state.v = state->v + state->tbv + tex_offset;
1912 tex_read(state, &texture_state, &tex_samples[2]);
1914 texture_state.u = state->u + state->tbu + tex_offset;
1915 texture_state.v = state->v + state->tbv + tex_offset;
1916 tex_read(state, &texture_state, &tex_samples[3]);
1918 d[0] = (256 - du) * (256 - dv);
1919 d[1] = du * (256 - dv);
1920 d[2] = (256 - du) * dv;
1921 d[3] = du * dv;
1923 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1924 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1925 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1926 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1929 static void tex_sample_persp_normal(s3d_state_t *state)
1931 s3d_texture_state_t texture_state;
1932 int32_t w = 0;
1934 if (state->w)
1935 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1937 texture_state.level = state->max_d;
1938 texture_state.texture_shift = 18 + (9 - texture_state.level);
1939 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
1940 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
1942 tex_read(state, &texture_state, &state->dest_rgba);
1945 static void tex_sample_persp_normal_filter(s3d_state_t *state)
1947 s3d_texture_state_t texture_state;
1948 int32_t w = 0, u, v;
1949 int tex_offset;
1950 rgba_t tex_samples[4];
1951 int du, dv;
1952 int d[4];
1954 if (state->w)
1955 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
1957 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
1958 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
1960 texture_state.level = state->max_d;
1961 texture_state.texture_shift = 18 + (9 - texture_state.level);
1962 tex_offset = 1 << texture_state.texture_shift;
1964 texture_state.u = u;
1965 texture_state.v = v;
1966 tex_read(state, &texture_state, &tex_samples[0]);
1967 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
1968 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
1970 texture_state.u = u + tex_offset;
1971 texture_state.v = v;
1972 tex_read(state, &texture_state, &tex_samples[1]);
1974 texture_state.u = u;
1975 texture_state.v = v + tex_offset;
1976 tex_read(state, &texture_state, &tex_samples[2]);
1978 texture_state.u = u + tex_offset;
1979 texture_state.v = v + tex_offset;
1980 tex_read(state, &texture_state, &tex_samples[3]);
1982 d[0] = (256 - du) * (256 - dv);
1983 d[1] = du * (256 - dv);
1984 d[2] = (256 - du) * dv;
1985 d[3] = du * dv;
1987 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
1988 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
1989 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
1990 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
1993 static void tex_sample_persp_normal_375(s3d_state_t *state)
1995 s3d_texture_state_t texture_state;
1996 int32_t w = 0;
1998 if (state->w)
1999 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2001 texture_state.level = state->max_d;
2002 texture_state.texture_shift = 18 + (9 - texture_state.level);
2003 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2004 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2006 tex_read(state, &texture_state, &state->dest_rgba);
2009 static void tex_sample_persp_normal_filter_375(s3d_state_t *state)
2011 s3d_texture_state_t texture_state;
2012 int32_t w = 0, u, v;
2013 int tex_offset;
2014 rgba_t tex_samples[4];
2015 int du, dv;
2016 int d[4];
2018 if (state->w)
2019 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2021 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2022 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2024 texture_state.level = state->max_d;
2025 texture_state.texture_shift = 18 + (9 - texture_state.level);
2026 tex_offset = 1 << texture_state.texture_shift;
2028 texture_state.u = u;
2029 texture_state.v = v;
2030 tex_read(state, &texture_state, &tex_samples[0]);
2031 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2032 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2034 texture_state.u = u + tex_offset;
2035 texture_state.v = v;
2036 tex_read(state, &texture_state, &tex_samples[1]);
2038 texture_state.u = u;
2039 texture_state.v = v + tex_offset;
2040 tex_read(state, &texture_state, &tex_samples[2]);
2042 texture_state.u = u + tex_offset;
2043 texture_state.v = v + tex_offset;
2044 tex_read(state, &texture_state, &tex_samples[3]);
2046 d[0] = (256 - du) * (256 - dv);
2047 d[1] = du * (256 - dv);
2048 d[2] = (256 - du) * dv;
2049 d[3] = du * dv;
2051 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2052 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2053 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2054 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2058 static void tex_sample_persp_mipmap(s3d_state_t *state)
2060 s3d_texture_state_t texture_state;
2061 int32_t w = 0;
2063 if (state->w)
2064 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2066 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2067 if (texture_state.level < 0)
2068 texture_state.level = 0;
2069 texture_state.texture_shift = 18 + (9 - texture_state.level);
2070 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
2071 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
2073 tex_read(state, &texture_state, &state->dest_rgba);
2076 static void tex_sample_persp_mipmap_filter(s3d_state_t *state)
2078 s3d_texture_state_t texture_state;
2079 int32_t w = 0, u, v;
2080 int tex_offset;
2081 rgba_t tex_samples[4];
2082 int du, dv;
2083 int d[4];
2085 if (state->w)
2086 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2088 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (12 + state->max_d)) + state->tbu;
2089 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (12 + state->max_d)) + state->tbv;
2091 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2092 if (texture_state.level < 0)
2093 texture_state.level = 0;
2094 texture_state.texture_shift = 18 + (9 - texture_state.level);
2095 tex_offset = 1 << texture_state.texture_shift;
2097 texture_state.u = u;
2098 texture_state.v = v;
2099 tex_read(state, &texture_state, &tex_samples[0]);
2100 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2101 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2103 texture_state.u = u + tex_offset;
2104 texture_state.v = v;
2105 tex_read(state, &texture_state, &tex_samples[1]);
2107 texture_state.u = u;
2108 texture_state.v = v + tex_offset;
2109 tex_read(state, &texture_state, &tex_samples[2]);
2111 texture_state.u = u + tex_offset;
2112 texture_state.v = v + tex_offset;
2113 tex_read(state, &texture_state, &tex_samples[3]);
2115 d[0] = (256 - du) * (256 - dv);
2116 d[1] = du * (256 - dv);
2117 d[2] = (256 - du) * dv;
2118 d[3] = du * dv;
2120 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2121 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2122 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2123 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2126 static void tex_sample_persp_mipmap_375(s3d_state_t *state)
2128 s3d_texture_state_t texture_state;
2129 int32_t w = 0;
2131 if (state->w)
2132 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2134 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2135 if (texture_state.level < 0)
2136 texture_state.level = 0;
2137 texture_state.texture_shift = 18 + (9 - texture_state.level);
2138 texture_state.u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2139 texture_state.v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2141 tex_read(state, &texture_state, &state->dest_rgba);
2144 static void tex_sample_persp_mipmap_filter_375(s3d_state_t *state)
2146 s3d_texture_state_t texture_state;
2147 int32_t w = 0, u, v;
2148 int tex_offset;
2149 rgba_t tex_samples[4];
2150 int du, dv;
2151 int d[4];
2153 if (state->w)
2154 w = (int32_t)(((1ULL << 27) << 19) / (int64_t)state->w);
2156 u = (int32_t)(((int64_t)state->u * (int64_t)w) >> (8 + state->max_d)) + state->tbu;
2157 v = (int32_t)(((int64_t)state->v * (int64_t)w) >> (8 + state->max_d)) + state->tbv;
2159 texture_state.level = (state->d < 0) ? state->max_d : state->max_d - ((state->d >> 27) & 0xf);
2160 if (texture_state.level < 0)
2161 texture_state.level = 0;
2162 texture_state.texture_shift = 18 + (9 - texture_state.level);
2163 tex_offset = 1 << texture_state.texture_shift;
2165 texture_state.u = u;
2166 texture_state.v = v;
2167 tex_read(state, &texture_state, &tex_samples[0]);
2168 du = (u >> (texture_state.texture_shift - 8)) & 0xff;
2169 dv = (v >> (texture_state.texture_shift - 8)) & 0xff;
2171 texture_state.u = u + tex_offset;
2172 texture_state.v = v;
2173 tex_read(state, &texture_state, &tex_samples[1]);
2175 texture_state.u = u;
2176 texture_state.v = v + tex_offset;
2177 tex_read(state, &texture_state, &tex_samples[2]);
2179 texture_state.u = u + tex_offset;
2180 texture_state.v = v + tex_offset;
2181 tex_read(state, &texture_state, &tex_samples[3]);
2183 d[0] = (256 - du) * (256 - dv);
2184 d[1] = du * (256 - dv);
2185 d[2] = (256 - du) * dv;
2186 d[3] = du * dv;
2188 state->dest_rgba.r = (tex_samples[0].r * d[0] + tex_samples[1].r * d[1] + tex_samples[2].r * d[2] + tex_samples[3].r * d[3]) >> 16;
2189 state->dest_rgba.g = (tex_samples[0].g * d[0] + tex_samples[1].g * d[1] + tex_samples[2].g * d[2] + tex_samples[3].g * d[3]) >> 16;
2190 state->dest_rgba.b = (tex_samples[0].b * d[0] + tex_samples[1].b * d[1] + tex_samples[2].b * d[2] + tex_samples[3].b * d[3]) >> 16;
2191 state->dest_rgba.a = (tex_samples[0].a * d[0] + tex_samples[1].a * d[1] + tex_samples[2].a * d[2] + tex_samples[3].a * d[3]) >> 16;
2195 #define CLAMP(x) do \
2196 { \
2197 if ((x) & ~0xff) \
2198 x = ((x) < 0) ? 0 : 0xff; \
2199 } \
2200 while (0)
2202 #define CLAMP_RGBA(r, g, b, a) \
2203 if ((r) & ~0xff) \
2204 r = ((r) < 0) ? 0 : 0xff; \
2205 if ((g) & ~0xff) \
2206 g = ((g) < 0) ? 0 : 0xff; \
2207 if ((b) & ~0xff) \
2208 b = ((b) < 0) ? 0 : 0xff; \
2209 if ((a) & ~0xff) \
2210 a = ((a) < 0) ? 0 : 0xff;
2212 #define CLAMP_RGB(r, g, b) do \
2213 { \
2214 if ((r) < 0) \
2215 r = 0; \
2216 if ((r) > 0xff) \
2217 r = 0xff; \
2218 if ((g) < 0) \
2219 g = 0; \
2220 if ((g) > 0xff) \
2221 g = 0xff; \
2222 if ((b) < 0) \
2223 b = 0; \
2224 if ((b) > 0xff) \
2225 b = 0xff; \
2226 } \
2227 while (0)
2229 static void dest_pixel_gouraud_shaded_triangle(s3d_state_t *state)
2231 state->dest_rgba.r = state->r >> 7;
2232 CLAMP(state->dest_rgba.r);
2234 state->dest_rgba.g = state->g >> 7;
2235 CLAMP(state->dest_rgba.g);
2237 state->dest_rgba.b = state->b >> 7;
2238 CLAMP(state->dest_rgba.b);
2240 state->dest_rgba.a = state->a >> 7;
2241 CLAMP(state->dest_rgba.a);
2244 static void dest_pixel_unlit_texture_triangle(s3d_state_t *state)
2246 tex_sample(state);
2248 if (state->cmd_set & CMD_SET_ABC_SRC)
2249 state->dest_rgba.a = state->a >> 7;
2252 static void dest_pixel_lit_texture_decal(s3d_state_t *state)
2254 tex_sample(state);
2256 if (state->cmd_set & CMD_SET_ABC_SRC)
2257 state->dest_rgba.a = state->a >> 7;
2260 static void dest_pixel_lit_texture_reflection(s3d_state_t *state)
2262 tex_sample(state);
2264 state->dest_rgba.r += (state->r >> 7);
2265 state->dest_rgba.g += (state->g >> 7);
2266 state->dest_rgba.b += (state->b >> 7);
2267 if (state->cmd_set & CMD_SET_ABC_SRC)
2268 state->dest_rgba.a += (state->a >> 7);
2270 CLAMP_RGBA(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, state->dest_rgba.a);
2273 static void dest_pixel_lit_texture_modulate(s3d_state_t *state)
2275 int r = state->r >> 7, g = state->g >> 7, b = state->b >> 7, a = state->a >> 7;
2277 tex_sample(state);
2279 CLAMP_RGBA(r, g, b, a);
2281 state->dest_rgba.r = ((state->dest_rgba.r) * r) >> 8;
2282 state->dest_rgba.g = ((state->dest_rgba.g) * g) >> 8;
2283 state->dest_rgba.b = ((state->dest_rgba.b) * b) >> 8;
2285 if (state->cmd_set & CMD_SET_ABC_SRC)
2286 state->dest_rgba.a = a;
2289 static void tri(virge_t *virge, s3d_t *s3d_tri, s3d_state_t *state, int yc, int32_t dx1, int32_t dx2)
2291 uint8_t *vram = virge->svga.vram;
2293 int x_dir = s3d_tri->tlr ? 1 : -1;
2295 int use_z = !(s3d_tri->cmd_set & CMD_SET_ZB_MODE);
2297 int y_count = yc;
2299 int bpp = (s3d_tri->cmd_set >> 2) & 7;
2301 uint32_t dest_offset, z_offset;
2303 if (s3d_tri->cmd_set & CMD_SET_HC)
2305 if (state->y < s3d_tri->clip_t)
2306 return;
2307 if (state->y > s3d_tri->clip_b)
2309 int diff_y = state->y - s3d_tri->clip_b;
2311 if (diff_y > y_count)
2312 diff_y = y_count;
2314 state->base_u += (s3d_tri->TdUdY * diff_y);
2315 state->base_v += (s3d_tri->TdVdY * diff_y);
2316 state->base_z += (s3d_tri->TdZdY * diff_y);
2317 state->base_r += (s3d_tri->TdRdY * diff_y);
2318 state->base_g += (s3d_tri->TdGdY * diff_y);
2319 state->base_b += (s3d_tri->TdBdY * diff_y);
2320 state->base_a += (s3d_tri->TdAdY * diff_y);
2321 state->base_d += (s3d_tri->TdDdY * diff_y);
2322 state->base_w += (s3d_tri->TdWdY * diff_y);
2323 state->x1 += (dx1 * diff_y);
2324 state->x2 += (dx2 * diff_y);
2325 state->y -= diff_y;
2326 dest_offset -= s3d_tri->dest_str;
2327 z_offset -= s3d_tri->z_str;
2328 y_count -= diff_y;
2330 if ((state->y - y_count) < s3d_tri->clip_t)
2331 y_count = state->y - s3d_tri->clip_t;
2334 dest_offset = s3d_tri->dest_base + (state->y * s3d_tri->dest_str);
2335 z_offset = s3d_tri->z_base + (state->y * s3d_tri->z_str);
2337 for (; y_count > 0; y_count--)
2339 int x = (state->x1 + ((1 << 20) - 1)) >> 20;
2340 int xe = (state->x2 + ((1 << 20) - 1)) >> 20;
2341 uint32_t z = (state->base_z > 0) ? (state->base_z << 1) : 0;
2342 if (x_dir < 0)
2344 x--;
2345 xe--;
2348 if (x != xe && (x_dir > 0 && x < xe) || (x_dir < 0 && x > xe))
2350 uint32_t dest_addr, z_addr;
2351 int dx = (x_dir > 0) ? ((31 - ((state->x1-1) >> 15)) & 0x1f) : (((state->x1-1) >> 15) & 0x1f);
2352 int x_offset = x_dir * (bpp + 1);
2353 int xz_offset = x_dir << 1;
2354 if (x_dir > 0)
2355 dx += 1;
2356 state->r = state->base_r + ((s3d_tri->TdRdX * dx) >> 5);
2357 state->g = state->base_g + ((s3d_tri->TdGdX * dx) >> 5);
2358 state->b = state->base_b + ((s3d_tri->TdBdX * dx) >> 5);
2359 state->a = state->base_a + ((s3d_tri->TdAdX * dx) >> 5);
2360 state->u = state->base_u + ((s3d_tri->TdUdX * dx) >> 5);
2361 state->v = state->base_v + ((s3d_tri->TdVdX * dx) >> 5);
2362 state->w = state->base_w + ((s3d_tri->TdWdX * dx) >> 5);
2363 state->d = state->base_d + ((s3d_tri->TdDdX * dx) >> 5);
2364 z += ((s3d_tri->TdZdX * dx) >> 5);
2366 // pclog("Draw Y=%i X=%i to XE=%i %i %08x %08x %08x %08x %08x %08x %08x %08x %i %08x\n", state->y, x, xe, dx, state->x1, state->x2, dx1, virge->s3d.TdWdX, state->u, state->v, virge->s3d.TdUdX, virge->s3d.TdUdY, dx, (virge->s3d.TdUdX * dx) >> 4);
2368 if (s3d_tri->cmd_set & CMD_SET_HC)
2370 if (x_dir > 0)
2372 if (x > s3d_tri->clip_r)
2373 goto tri_skip_line;
2374 if (xe < s3d_tri->clip_l)
2375 goto tri_skip_line;
2376 if (xe > s3d_tri->clip_r)
2377 xe = s3d_tri->clip_r;
2378 if (x < s3d_tri->clip_l)
2380 int diff_x = s3d_tri->clip_l - x;
2382 z += (s3d_tri->TdZdX * diff_x);
2383 state->u += (s3d_tri->TdUdX * diff_x);
2384 state->v += (s3d_tri->TdVdX * diff_x);
2385 state->r += (s3d_tri->TdRdX * diff_x);
2386 state->g += (s3d_tri->TdGdX * diff_x);
2387 state->b += (s3d_tri->TdBdX * diff_x);
2388 state->a += (s3d_tri->TdAdX * diff_x);
2389 state->d += (s3d_tri->TdDdX * diff_x);
2390 state->w += (s3d_tri->TdWdX * diff_x);
2392 x = s3d_tri->clip_l;
2395 else
2397 if (x < s3d_tri->clip_l)
2398 goto tri_skip_line;
2399 if (xe > s3d_tri->clip_r)
2400 goto tri_skip_line;
2401 if (xe < s3d_tri->clip_l)
2402 xe = s3d_tri->clip_l;
2403 if (x > s3d_tri->clip_r)
2405 int diff_x = x - s3d_tri->clip_r;
2407 z += (s3d_tri->TdZdX * diff_x);
2408 state->u += (s3d_tri->TdUdX * diff_x);
2409 state->v += (s3d_tri->TdVdX * diff_x);
2410 state->r += (s3d_tri->TdRdX * diff_x);
2411 state->g += (s3d_tri->TdGdX * diff_x);
2412 state->b += (s3d_tri->TdBdX * diff_x);
2413 state->a += (s3d_tri->TdAdX * diff_x);
2414 state->d += (s3d_tri->TdDdX * diff_x);
2415 state->w += (s3d_tri->TdWdX * diff_x);
2417 x = s3d_tri->clip_r;
2422 virge->svga.changedvram[(dest_offset & 0x3fffff) >> 12] = changeframecount;
2424 dest_addr = dest_offset + (x * (bpp + 1));
2425 z_addr = z_offset + (x << 1);
2427 for (; x != xe; x = (x + x_dir) & 0xfff)
2429 int update = 1;
2430 uint16_t src_z;
2431 _x = x; _y = state->y;
2433 if (use_z)
2435 src_z = Z_READ(z_addr);
2436 Z_CLIP(src_z, z >> 16);
2439 if (update)
2441 uint32_t dest_col;
2443 dest_pixel(state);
2445 if (s3d_tri->cmd_set & CMD_SET_ABC_ENABLE)
2447 uint32_t src_col;
2448 int src_r, src_g, src_b;
2450 switch (bpp)
2452 case 0: /*8 bpp*/
2453 /*Not implemented yet*/
2454 break;
2455 case 1: /*16 bpp*/
2456 src_col = *(uint16_t *)&vram[dest_addr & 0x3fffff];
2457 RGB15_TO_24(src_col, src_r, src_g, src_b);
2458 break;
2459 case 2: /*24 bpp*/
2460 src_col = (*(uint32_t *)&vram[dest_addr & 0x3fffff]) & 0xffffff;
2461 RGB24_TO_24(src_col, src_r, src_g, src_b);
2462 break;
2465 state->dest_rgba.r = ((state->dest_rgba.r * state->dest_rgba.a) + (src_r * (255 - state->dest_rgba.a))) / 255;
2466 state->dest_rgba.g = ((state->dest_rgba.g * state->dest_rgba.a) + (src_g * (255 - state->dest_rgba.a))) / 255;
2467 state->dest_rgba.b = ((state->dest_rgba.b * state->dest_rgba.a) + (src_b * (255 - state->dest_rgba.a))) / 255;
2470 switch (bpp)
2472 case 0: /*8 bpp*/
2473 /*Not implemented yet*/
2474 break;
2475 case 1: /*16 bpp*/
2476 RGB15(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b, dest_col);
2477 *(uint16_t *)&vram[dest_addr] = dest_col;
2478 break;
2479 case 2: /*24 bpp*/
2480 dest_col = RGB24(state->dest_rgba.r, state->dest_rgba.g, state->dest_rgba.b);
2481 *(uint8_t *)&vram[dest_addr] = dest_col & 0xff;
2482 *(uint8_t *)&vram[dest_addr + 1] = (dest_col >> 8) & 0xff;
2483 *(uint8_t *)&vram[dest_addr + 2] = (dest_col >> 16) & 0xff;
2484 break;
2487 if (use_z && (s3d_tri->cmd_set & CMD_SET_ZUP))
2488 Z_WRITE(z_addr, src_z);
2491 z += s3d_tri->TdZdX;
2492 state->u += s3d_tri->TdUdX;
2493 state->v += s3d_tri->TdVdX;
2494 state->r += s3d_tri->TdRdX;
2495 state->g += s3d_tri->TdGdX;
2496 state->b += s3d_tri->TdBdX;
2497 state->a += s3d_tri->TdAdX;
2498 state->d += s3d_tri->TdDdX;
2499 state->w += s3d_tri->TdWdX;
2500 dest_addr += x_offset;
2501 z_addr += xz_offset;
2502 virge->pixel_count++;
2505 tri_skip_line:
2506 state->x1 += dx1;
2507 state->x2 += dx2;
2508 state->base_u += s3d_tri->TdUdY;
2509 state->base_v += s3d_tri->TdVdY;
2510 state->base_z += s3d_tri->TdZdY;
2511 state->base_r += s3d_tri->TdRdY;
2512 state->base_g += s3d_tri->TdGdY;
2513 state->base_b += s3d_tri->TdBdY;
2514 state->base_a += s3d_tri->TdAdY;
2515 state->base_d += s3d_tri->TdDdY;
2516 state->base_w += s3d_tri->TdWdY;
2517 state->y--;
2518 dest_offset -= s3d_tri->dest_str;
2519 z_offset -= s3d_tri->z_str;
2523 static int tex_size[8] =
2525 4*2,
2526 2*2,
2527 2*2,
2528 1*2,
2529 2/1,
2530 2/1,
2531 1*2,
2532 1*2
2533 };
2535 static void s3_virge_triangle(virge_t *virge, s3d_t *s3d_tri)
2537 s3d_state_t state;
2539 uint32_t tex_base;
2540 int c;
2542 uint64_t start_time = timer_read();
2543 uint64_t end_time;
2545 state.tbu = s3d_tri->tbu << 11;
2546 state.tbv = s3d_tri->tbv << 11;
2548 state.max_d = (s3d_tri->cmd_set >> 8) & 15;
2550 state.tex_bdr_clr = s3d_tri->tex_bdr_clr;
2552 state.cmd_set = s3d_tri->cmd_set;
2554 state.base_u = s3d_tri->tus;
2555 state.base_v = s3d_tri->tvs;
2556 state.base_z = s3d_tri->tzs;
2557 state.base_r = (int32_t)s3d_tri->trs;
2558 state.base_g = (int32_t)s3d_tri->tgs;
2559 state.base_b = (int32_t)s3d_tri->tbs;
2560 state.base_a = (int32_t)s3d_tri->tas;
2561 state.base_d = s3d_tri->tds;
2562 state.base_w = s3d_tri->tws;
2564 tex_base = s3d_tri->tex_base;
2565 for (c = 9; c >= 0; c--)
2567 state.texture[c] = (uint16_t *)&virge->svga.vram[tex_base];
2568 if (c <= state.max_d)
2569 tex_base += ((1 << (c*2)) * tex_size[(s3d_tri->cmd_set >> 5) & 7]) / 2;
2572 switch ((s3d_tri->cmd_set >> 27) & 0xf)
2574 case 0:
2575 dest_pixel = dest_pixel_gouraud_shaded_triangle;
2576 // pclog("dest_pixel_gouraud_shaded_triangle\n");
2577 break;
2578 case 1:
2579 case 5:
2580 switch ((s3d_tri->cmd_set >> 15) & 0x3)
2582 case 0:
2583 dest_pixel = dest_pixel_lit_texture_reflection;
2584 // pclog("dest_pixel_lit_texture_reflection\n");
2585 break;
2586 case 1:
2587 dest_pixel = dest_pixel_lit_texture_modulate;
2588 // pclog("dest_pixel_lit_texture_modulate\n");
2589 break;
2590 case 2:
2591 dest_pixel = dest_pixel_lit_texture_decal;
2592 // pclog("dest_pixel_lit_texture_decal\n");
2593 break;
2594 default:
2595 pclog("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf);
2596 return;
2598 break;
2599 case 2:
2600 case 6:
2601 dest_pixel = dest_pixel_unlit_texture_triangle;
2602 // pclog("dest_pixel_unlit_texture_triangle\n");
2603 break;
2604 default:
2605 pclog("bad triangle type %x\n", (s3d_tri->cmd_set >> 27) & 0xf);
2606 return;
2609 switch (((s3d_tri->cmd_set >> 12) & 7) | ((s3d_tri->cmd_set & (1 << 29)) ? 8 : 0))
2611 case 0: case 1:
2612 tex_sample = tex_sample_mipmap;
2613 // pclog("use tex_sample_mipmap\n");
2614 break;
2615 case 2: case 3:
2616 tex_sample = virge->bilinear_enabled ? tex_sample_mipmap_filter : tex_sample_mipmap;
2617 // pclog("use tex_sample_mipmap_filter\n");
2618 break;
2619 case 4: case 5:
2620 tex_sample = tex_sample_normal;
2621 // pclog("use tex_sample_normal\n");
2622 break;
2623 case 6: case 7:
2624 tex_sample = virge->bilinear_enabled ? tex_sample_normal_filter : tex_sample_normal;
2625 // pclog("use tex_sample_normal_filter\n");
2626 break;
2627 case (0 | 8): case (1 | 8):
2628 if (virge->is_375)
2629 tex_sample = tex_sample_persp_mipmap_375;
2630 else
2631 tex_sample = tex_sample_persp_mipmap;
2632 // pclog("use tex_sample_persp_mipmap\n");
2633 break;
2634 case (2 | 8): case (3 | 8):
2635 if (virge->is_375)
2636 tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter_375 : tex_sample_persp_mipmap_375;
2637 else
2638 tex_sample = virge->bilinear_enabled ? tex_sample_persp_mipmap_filter : tex_sample_persp_mipmap;
2639 // pclog("use tex_sample_persp_mipmap_filter\n");
2640 break;
2641 case (4 | 8): case (5 | 8):
2642 if (virge->is_375)
2643 tex_sample = tex_sample_persp_normal_375;
2644 else
2645 tex_sample = tex_sample_persp_normal;
2646 // pclog("use tex_sample_persp_normal\n");
2647 break;
2648 case (6 | 8): case (7 | 8):
2649 if (virge->is_375)
2650 tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter_375 : tex_sample_persp_normal_375;
2651 else
2652 tex_sample = virge->bilinear_enabled ? tex_sample_persp_normal_filter : tex_sample_persp_normal;
2653 // pclog("use tex_sample_persp_normal_filter\n");
2654 break;
2657 switch ((s3d_tri->cmd_set >> 5) & 7)
2659 case 0:
2660 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB8888 : tex_ARGB8888_nowrap;
2661 break;
2662 case 1:
2663 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB4444 : tex_ARGB4444_nowrap;
2664 // pclog("tex_ARGB4444\n");
2665 break;
2666 case 2:
2667 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap;
2668 // pclog("tex_ARGB1555 %i\n", (s3d_tri->cmd_set >> 5) & 7);
2669 break;
2670 default:
2671 pclog("bad texture type %i\n", (s3d_tri->cmd_set >> 5) & 7);
2672 tex_read = (s3d_tri->cmd_set & CMD_SET_TWE) ? tex_ARGB1555 : tex_ARGB1555_nowrap;
2673 break;
2676 // pclog("Triangle %i %i,%i to %i,%i %08x\n", y, x1 >> 20, y, s3d_tri->txend01 >> 20, y - (s3d_tri->ty01 + s3d_tri->ty12), state.cmd_set);
2678 state.y = s3d_tri->tys;
2679 state.x1 = s3d_tri->txs;
2680 state.x2 = s3d_tri->txend01;
2681 tri(virge, s3d_tri, &state, s3d_tri->ty01, s3d_tri->TdXdY02, s3d_tri->TdXdY01);
2682 state.x2 = s3d_tri->txend12;
2683 tri(virge, s3d_tri, &state, s3d_tri->ty12, s3d_tri->TdXdY02, s3d_tri->TdXdY12);
2685 virge->tri_count++;
2687 end_time = timer_read();
2689 virge_time += end_time - start_time;
2692 static void render_thread(void *param)
2694 virge_t *virge = (virge_t *)param;
2696 while (1)
2698 thread_wait_event(virge->wake_render_thread, -1);
2699 thread_reset_event(virge->wake_render_thread);
2700 virge->s3d_busy = 1;
2701 while (!RB_EMPTY)
2703 s3_virge_triangle(virge, &virge->s3d_buffer[virge->s3d_read_idx & RB_MASK]);
2704 virge->s3d_read_idx++;
2706 if (RB_ENTRIES == RB_SIZE - 1)
2707 thread_set_event(virge->not_full_event);
2709 virge->s3d_busy = 0;
2713 static void queue_triangle(virge_t *virge)
2715 int c;
2716 // pclog("queue_triangle: read=%i write=%i RB_ENTRIES=%i RB_FULL=%i\n", virge->s3d_read_idx, virge->s3d_write_idx, RB_ENTRIES, RB_FULL);
2717 if (RB_FULL)
2719 thread_reset_event(virge->not_full_event);
2720 if (RB_FULL)
2721 thread_wait_event(virge->not_full_event, -1); /*Wait for room in ringbuffer*/
2723 // pclog(" add at read=%i write=%i %i\n", virge->s3d_read_idx, virge->s3d_write_idx, virge->s3d_write_idx & RB_MASK);
2724 virge->s3d_buffer[virge->s3d_write_idx & RB_MASK] = virge->s3d_tri;
2725 virge->s3d_write_idx++;
2726 if (!virge->s3d_busy)
2727 thread_set_event(virge->wake_render_thread); /*Wake up render thread if moving from idle*/
2730 static void s3_virge_hwcursor_draw(svga_t *svga, int displine)
2732 int x;
2733 uint16_t dat[2];
2734 int xx;
2735 int offset = svga->hwcursor_latch.x - svga->hwcursor_latch.xoff;
2737 // pclog("HWcursor %i %i\n", svga->hwcursor_latch.x, svga->hwcursor_latch.y);
2738 for (x = 0; x < 64; x += 16)
2740 dat[0] = (svga->vram[svga->hwcursor_latch.addr] << 8) | svga->vram[svga->hwcursor_latch.addr + 1];
2741 dat[1] = (svga->vram[svga->hwcursor_latch.addr + 2] << 8) | svga->vram[svga->hwcursor_latch.addr + 3];
2742 for (xx = 0; xx < 16; xx++)
2744 if (offset >= svga->hwcursor_latch.x)
2746 if (!(dat[0] & 0x8000))
2747 ((uint32_t *)buffer32->line[displine])[offset + 32] = (dat[1] & 0x8000) ? 0xffffff : 0;
2748 else if (dat[1] & 0x8000)
2749 ((uint32_t *)buffer32->line[displine])[offset + 32] ^= 0xffffff;
2750 // pclog("Plot %i, %i (%i %i) %04X %04X\n", offset, displine, x+xx, svga->hwcursor_on, dat[0], dat[1]);
2753 offset++;
2754 dat[0] <<= 1;
2755 dat[1] <<= 1;
2757 svga->hwcursor_latch.addr += 4;
2761 #define DECODE_YCbCr() \
2762 do \
2763 { \
2764 int c; \
2766 for (c = 0; c < 2; c++) \
2767 { \
2768 uint8_t y1, y2; \
2769 int8_t Cr, Cb; \
2770 int dR, dG, dB; \
2772 y1 = src[0]; \
2773 Cr = src[1] - 0x80; \
2774 y2 = src[2]; \
2775 Cb = src[3] - 0x80; \
2776 src += 4; \
2778 dR = (359*Cr) >> 8; \
2779 dG = (88*Cb + 183*Cr) >> 8; \
2780 dB = (453*Cb) >> 8; \
2782 r[x_write] = y1 + dR; \
2783 CLAMP(r[x_write]); \
2784 g[x_write] = y1 - dG; \
2785 CLAMP(g[x_write]); \
2786 b[x_write] = y1 + dB; \
2787 CLAMP(b[x_write]); \
2789 r[x_write+1] = y2 + dR; \
2790 CLAMP(r[x_write+1]); \
2791 g[x_write+1] = y2 - dG; \
2792 CLAMP(g[x_write+1]); \
2793 b[x_write+1] = y2 + dB; \
2794 CLAMP(b[x_write+1]); \
2796 x_write = (x_write + 2) & 7; \
2797 } \
2798 } while (0)
2800 /*Both YUV formats are untested*/
2801 #define DECODE_YUV211() \
2802 do \
2803 { \
2804 uint8_t y1, y2, y3, y4; \
2805 int8_t U, V; \
2806 int dR, dG, dB; \
2808 U = src[0] - 0x80; \
2809 y1 = (298 * (src[1] - 16)) >> 8; \
2810 y2 = (298 * (src[2] - 16)) >> 8; \
2811 V = src[3] - 0x80; \
2812 y3 = (298 * (src[4] - 16)) >> 8; \
2813 y4 = (298 * (src[5] - 16)) >> 8; \
2814 src += 6; \
2816 dR = (309*V) >> 8; \
2817 dG = (100*U + 208*V) >> 8; \
2818 dB = (516*U) >> 8; \
2820 r[x_write] = y1 + dR; \
2821 CLAMP(r[x_write]); \
2822 g[x_write] = y1 - dG; \
2823 CLAMP(g[x_write]); \
2824 b[x_write] = y1 + dB; \
2825 CLAMP(b[x_write]); \
2827 r[x_write+1] = y2 + dR; \
2828 CLAMP(r[x_write+1]); \
2829 g[x_write+1] = y2 - dG; \
2830 CLAMP(g[x_write+1]); \
2831 b[x_write+1] = y2 + dB; \
2832 CLAMP(b[x_write+1]); \
2834 r[x_write+2] = y2 + dR; \
2835 CLAMP(r[x_write+2]); \
2836 g[x_write+2] = y2 - dG; \
2837 CLAMP(g[x_write+2]); \
2838 b[x_write+2] = y2 + dB; \
2839 CLAMP(b[x_write+2]); \
2841 r[x_write+3] = y2 + dR; \
2842 CLAMP(r[x_write+3]); \
2843 g[x_write+3] = y2 - dG; \
2844 CLAMP(g[x_write+3]); \
2845 b[x_write+3] = y2 + dB; \
2846 CLAMP(b[x_write+3]); \
2848 x_write = (x_write + 4) & 7; \
2849 } while (0)
2851 #define DECODE_YUV422() \
2852 do \
2853 { \
2854 int c; \
2856 for (c = 0; c < 2; c++) \
2857 { \
2858 uint8_t y1, y2; \
2859 int8_t U, V; \
2860 int dR, dG, dB; \
2862 U = src[0] - 0x80; \
2863 y1 = (298 * (src[1] - 16)) >> 8; \
2864 V = src[2] - 0x80; \
2865 y2 = (298 * (src[3] - 16)) >> 8; \
2866 src += 4; \
2868 dR = (309*V) >> 8; \
2869 dG = (100*U + 208*V) >> 8; \
2870 dB = (516*U) >> 8; \
2872 r[x_write] = y1 + dR; \
2873 CLAMP(r[x_write]); \
2874 g[x_write] = y1 - dG; \
2875 CLAMP(g[x_write]); \
2876 b[x_write] = y1 + dB; \
2877 CLAMP(b[x_write]); \
2879 r[x_write+1] = y2 + dR; \
2880 CLAMP(r[x_write+1]); \
2881 g[x_write+1] = y2 - dG; \
2882 CLAMP(g[x_write+1]); \
2883 b[x_write+1] = y2 + dB; \
2884 CLAMP(b[x_write+1]); \
2886 x_write = (x_write + 2) & 7; \
2887 } \
2888 } while (0)
2890 #define DECODE_RGB555() \
2891 do \
2892 { \
2893 int c; \
2895 for (c = 0; c < 4; c++) \
2896 { \
2897 uint16_t dat; \
2899 dat = *(uint16_t *)src; \
2900 src += 2; \
2902 r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
2903 g[x_write + c] = ((dat & 0x03e0) >> 2) | ((dat & 0x03e0) >> 7); \
2904 b[x_write + c] = ((dat & 0x7c00) >> 7) | ((dat & 0x7c00) >> 12); \
2905 } \
2906 x_write = (x_write + 4) & 7; \
2907 } while (0)
2909 #define DECODE_RGB565() \
2910 do \
2911 { \
2912 int c; \
2914 for (c = 0; c < 4; c++) \
2915 { \
2916 uint16_t dat; \
2918 dat = *(uint16_t *)src; \
2919 src += 2; \
2921 r[x_write + c] = ((dat & 0x001f) << 3) | ((dat & 0x001f) >> 2); \
2922 g[x_write + c] = ((dat & 0x07e0) >> 3) | ((dat & 0x07e0) >> 9); \
2923 b[x_write + c] = ((dat & 0xf800) >> 8) | ((dat & 0xf800) >> 13); \
2924 } \
2925 x_write = (x_write + 4) & 7; \
2926 } while (0)
2928 #define DECODE_RGB888() \
2929 do \
2930 { \
2931 int c; \
2933 for (c = 0; c < 4; c++) \
2934 { \
2935 r[x_write + c] = src[0]; \
2936 g[x_write + c] = src[1]; \
2937 b[x_write + c] = src[2]; \
2938 src += 3; \
2939 } \
2940 x_write = (x_write + 4) & 7; \
2941 } while (0)
2943 #define DECODE_XRGB8888() \
2944 do \
2945 { \
2946 int c; \
2948 for (c = 0; c < 4; c++) \
2949 { \
2950 r[x_write + c] = src[0]; \
2951 g[x_write + c] = src[1]; \
2952 b[x_write + c] = src[2]; \
2953 src += 4; \
2954 } \
2955 x_write = (x_write + 4) & 7; \
2956 } while (0)
2958 #define OVERLAY_SAMPLE() \
2959 do \
2960 { \
2961 switch (virge->streams.sdif) \
2962 { \
2963 case 1: \
2964 DECODE_YCbCr(); \
2965 break; \
2966 case 2: \
2967 DECODE_YUV422(); \
2968 break; \
2969 case 3: \
2970 DECODE_RGB555(); \
2971 break; \
2972 case 4: \
2973 DECODE_YUV211(); \
2974 break; \
2975 case 5: \
2976 DECODE_RGB565(); \
2977 break; \
2978 case 6: \
2979 DECODE_RGB888(); \
2980 break; \
2981 case 7: \
2982 default: \
2983 DECODE_XRGB8888(); \
2984 break; \
2985 } \
2986 } while (0)
2988 static void s3_virge_overlay_draw(svga_t *svga, int displine)
2990 virge_t *virge = (virge_t *)svga->p;
2991 int offset = (virge->streams.sec_x - virge->streams.pri_x) + 1;
2992 int h_acc = virge->streams.dda_horiz_accumulator;
2993 int r[8], g[8], b[8];
2994 int r_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
2995 int g_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
2996 int b_samp[8] = {0, 0, 0, 0, 0, 0, 0, 0};
2997 int x_size, x_read = 4, x_write = 4;
2998 int x;
2999 uint32_t *p;
3000 uint8_t *src = &svga->vram[svga->overlay_latch.addr];
3002 p = &((uint32_t *)buffer32->line[displine])[offset + 32];
3004 if ((offset + virge->streams.sec_w) > virge->streams.pri_w)
3005 x_size = (virge->streams.pri_w - virge->streams.sec_x) + 1;
3006 else
3007 x_size = virge->streams.sec_w + 1;
3009 OVERLAY_SAMPLE();
3011 for (x = 0; x < x_size; x++)
3013 *p++ = r[x_read] | (g[x_read] << 8) | (b[x_read] << 16);
3015 h_acc += virge->streams.k1_horiz_scale;
3016 if (h_acc >= 0)
3018 if ((x_read ^ (x_read + 1)) & ~3)
3019 OVERLAY_SAMPLE();
3020 x_read = (x_read + 1) & 7;
3022 h_acc += (virge->streams.k2_horiz_scale - virge->streams.k1_horiz_scale);
3026 svga->overlay_latch.v_acc += virge->streams.k1_vert_scale;
3027 if (svga->overlay_latch.v_acc >= 0)
3029 svga->overlay_latch.v_acc += (virge->streams.k2_vert_scale - virge->streams.k1_vert_scale);
3030 svga->overlay_latch.addr += virge->streams.sec_stride;
3034 static uint8_t s3_virge_pci_read(int func, int addr, void *p)
3036 virge_t *virge = (virge_t *)p;
3037 svga_t *svga = &virge->svga;
3038 uint8_t ret = 0;
3039 // pclog("S3 PCI read %08X ", addr);
3040 switch (addr)
3042 case 0x00: ret = 0x33; break; /*'S3'*/
3043 case 0x01: ret = 0x53; break;
3045 case 0x02: ret = virge->virge_id_low; break;
3046 case 0x03: ret = virge->virge_id_high; break;
3048 case 0x04: ret = virge->pci_regs[0x04] & 0x27; break;
3050 case 0x07: ret = virge->pci_regs[0x07] & 0x36; break;
3052 case 0x08: ret = 0; break; /*Revision ID*/
3053 case 0x09: ret = 0; break; /*Programming interface*/
3055 case 0x0a: ret = 0x00; break; /*Supports VGA interface*/
3056 case 0x0b: ret = 0x03; /*output = 3; */break;
3058 case 0x0d: ret = virge->pci_regs[0x0d] & 0xf8; break;
3060 case 0x10: ret = 0x00; break;/*Linear frame buffer address*/
3061 case 0x11: ret = 0x00; break;
3062 case 0x12: ret = 0x00; break;
3063 case 0x13: ret = svga->crtc[0x59] & 0xfc; break;
3065 case 0x30: ret = virge->pci_regs[0x30] & 0x01; break; /*BIOS ROM address*/
3066 case 0x31: ret = 0x00; break;
3067 case 0x32: ret = virge->pci_regs[0x32]; break;
3068 case 0x33: ret = virge->pci_regs[0x33]; break;
3070 case 0x3c: ret = virge->pci_regs[0x3c]; break;
3072 case 0x3d: ret = 0x01; break; /*INTA*/
3074 case 0x3e: ret = 0x04; break;
3075 case 0x3f: ret = 0xff; break;
3078 // pclog("%02X\n", ret);
3079 return ret;
3082 static void s3_virge_pci_write(int func, int addr, uint8_t val, void *p)
3084 virge_t *virge = (virge_t *)p;
3085 svga_t *svga = &virge->svga;
3086 // pclog("S3 PCI write %08X %02X %04X:%08X\n", addr, val, CS, pc);
3087 switch (addr)
3089 case 0x00: case 0x01: case 0x02: case 0x03:
3090 case 0x08: case 0x09: case 0x0a: case 0x0b:
3091 case 0x3d: case 0x3e: case 0x3f:
3092 return;
3094 case PCI_REG_COMMAND:
3095 if (val & PCI_COMMAND_IO)
3097 io_removehandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3098 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3100 else
3101 io_removehandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3102 virge->pci_regs[PCI_REG_COMMAND] = val & 0x27;
3103 return;
3104 case 0x07:
3105 virge->pci_regs[0x07] = val & 0x3e;
3106 return;
3107 case 0x0d:
3108 virge->pci_regs[0x0d] = val & 0xf8;
3109 return;
3111 case 0x13:
3112 svga->crtc[0x59] = val & 0xfc;
3113 s3_virge_updatemapping(virge);
3114 return;
3116 case 0x30: case 0x32: case 0x33:
3117 virge->pci_regs[addr] = val;
3118 if (virge->pci_regs[0x30] & 0x01)
3120 uint32_t addr = (virge->pci_regs[0x32] << 16) | (virge->pci_regs[0x33] << 24);
3121 // pclog("Virge bios_rom enabled at %08x\n", addr);
3122 mem_mapping_set_addr(&virge->bios_rom.mapping, addr, 0x8000);
3123 mem_mapping_enable(&virge->bios_rom.mapping);
3125 else
3127 // pclog("Virge bios_rom disabled\n");
3128 mem_mapping_disable(&virge->bios_rom.mapping);
3130 return;
3131 case 0x3c:
3132 virge->pci_regs[0x3c] = val;
3133 return;
3137 static void *s3_virge_init()
3139 virge_t *virge = malloc(sizeof(virge_t));
3140 memset(virge, 0, sizeof(virge_t));
3142 virge->bilinear_enabled = device_get_config_int("bilinear");
3143 virge->dithering_enabled = device_get_config_int("dithering");
3144 virge->memory_size = device_get_config_int("memory");
3146 svga_init(&virge->svga, virge, virge->memory_size << 20,
3147 s3_virge_recalctimings,
3148 s3_virge_in, s3_virge_out,
3149 s3_virge_hwcursor_draw,
3150 s3_virge_overlay_draw);
3152 rom_init(&virge->bios_rom, "roms/s3virge.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
3153 if (PCI)
3154 mem_mapping_disable(&virge->bios_rom.mapping);
3156 mem_mapping_add(&virge->mmio_mapping, 0, 0, s3_virge_mmio_read,
3157 s3_virge_mmio_read_w,
3158 s3_virge_mmio_read_l,
3159 s3_virge_mmio_write,
3160 s3_virge_mmio_write_w,
3161 s3_virge_mmio_write_l,
3162 NULL,
3163 0,
3164 virge);
3165 mem_mapping_add(&virge->new_mmio_mapping, 0, 0, s3_virge_mmio_read,
3166 s3_virge_mmio_read_w,
3167 s3_virge_mmio_read_l,
3168 s3_virge_mmio_write,
3169 s3_virge_mmio_write_w,
3170 s3_virge_mmio_write_l,
3171 NULL,
3172 0,
3173 virge);
3174 mem_mapping_add(&virge->linear_mapping, 0, 0, svga_read_linear,
3175 svga_readw_linear,
3176 svga_readl_linear,
3177 svga_write_linear,
3178 svga_writew_linear,
3179 svga_writel_linear,
3180 NULL,
3181 0,
3182 &virge->svga);
3184 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3186 virge->pci_regs[4] = 3;
3187 virge->pci_regs[5] = 0;
3188 virge->pci_regs[6] = 0;
3189 virge->pci_regs[7] = 2;
3190 virge->pci_regs[0x32] = 0x0c;
3191 virge->pci_regs[0x3d] = 1;
3192 virge->pci_regs[0x3e] = 4;
3193 virge->pci_regs[0x3f] = 0xff;
3195 virge->virge_id_high = 0x56;
3196 virge->virge_id_low = 0x31;
3197 virge->virge_rev = 0;
3198 virge->virge_id = 0xe1;
3200 switch (virge->memory_size)
3202 case 2:
3203 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (4 << 5);
3204 break;
3205 case 4:
3206 default:
3207 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (0 << 5);
3208 break;
3211 virge->svga.crtc[0x37] = 1;// | (7 << 5);
3212 virge->svga.crtc[0x53] = 1 << 3;
3213 virge->svga.crtc[0x59] = 0x70;
3215 virge->is_375 = 0;
3217 pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
3219 virge->wake_render_thread = thread_create_event();
3220 virge->wake_main_thread = thread_create_event();
3221 virge->not_full_event = thread_create_event();
3222 virge->render_thread = thread_create(render_thread, virge);
3224 return virge;
3227 static void *s3_virge_375_init()
3229 virge_t *virge = malloc(sizeof(virge_t));
3230 memset(virge, 0, sizeof(virge_t));
3232 virge->bilinear_enabled = device_get_config_int("bilinear");
3233 virge->dithering_enabled = device_get_config_int("dithering");
3234 virge->memory_size = device_get_config_int("memory");
3236 svga_init(&virge->svga, virge, virge->memory_size << 20,
3237 s3_virge_recalctimings,
3238 s3_virge_in, s3_virge_out,
3239 s3_virge_hwcursor_draw,
3240 s3_virge_overlay_draw);
3242 rom_init(&virge->bios_rom, "roms/86c375_1.bin", 0xc0000, 0x8000, 0x7fff, 0, MEM_MAPPING_EXTERNAL);
3243 if (PCI)
3244 mem_mapping_disable(&virge->bios_rom.mapping);
3246 mem_mapping_add(&virge->mmio_mapping, 0, 0, s3_virge_mmio_read,
3247 s3_virge_mmio_read_w,
3248 s3_virge_mmio_read_l,
3249 s3_virge_mmio_write,
3250 s3_virge_mmio_write_w,
3251 s3_virge_mmio_write_l,
3252 NULL,
3253 0,
3254 virge);
3255 mem_mapping_add(&virge->new_mmio_mapping, 0, 0, s3_virge_mmio_read,
3256 s3_virge_mmio_read_w,
3257 s3_virge_mmio_read_l,
3258 s3_virge_mmio_write,
3259 s3_virge_mmio_write_w,
3260 s3_virge_mmio_write_l,
3261 NULL,
3262 0,
3263 virge);
3264 mem_mapping_add(&virge->linear_mapping, 0, 0, svga_read_linear,
3265 svga_readw_linear,
3266 svga_readl_linear,
3267 svga_write_linear,
3268 svga_writew_linear,
3269 svga_writel_linear,
3270 NULL,
3271 0,
3272 &virge->svga);
3274 io_sethandler(0x03c0, 0x0020, s3_virge_in, NULL, NULL, s3_virge_out, NULL, NULL, virge);
3276 virge->pci_regs[4] = 3;
3277 virge->pci_regs[5] = 0;
3278 virge->pci_regs[6] = 0;
3279 virge->pci_regs[7] = 2;
3280 virge->pci_regs[0x32] = 0x0c;
3281 virge->pci_regs[0x3d] = 1;
3282 virge->pci_regs[0x3e] = 4;
3283 virge->pci_regs[0x3f] = 0xff;
3285 virge->virge_id_high = 0x8a;
3286 virge->virge_id_low = 0x01;
3287 virge->virge_rev = 0;
3288 virge->virge_id = 0xe1;
3290 switch (virge->memory_size)
3292 case 2:
3293 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (4 << 5);
3294 break;
3295 case 4:
3296 default:
3297 virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4) | (0 << 5);
3298 break;
3300 // virge->svga.crtc[0x36] = 2 | (0 << 2) | (1 << 4);
3301 virge->svga.crtc[0x37] = 1;// | (7 << 5);
3302 virge->svga.crtc[0x53] = 1 << 3;
3303 virge->svga.crtc[0x59] = 0x70;
3305 virge->svga.crtc[0x6c] = 0x01;
3307 virge->is_375 = 1;
3309 pci_add(s3_virge_pci_read, s3_virge_pci_write, virge);
3311 virge->wake_render_thread = thread_create_event();
3312 virge->wake_main_thread = thread_create_event();
3313 virge->not_full_event = thread_create_event();
3314 virge->render_thread = thread_create(render_thread, virge);
3316 return virge;
3319 static void s3_virge_close(void *p)
3321 virge_t *virge = (virge_t *)p;
3322 FILE *f = fopen("vram.dmp", "wb");
3323 fwrite(virge->svga.vram, 4 << 20, 1, f);
3324 fclose(f);
3326 thread_kill(virge->render_thread);
3327 thread_destroy_event(virge->not_full_event);
3328 thread_destroy_event(virge->wake_main_thread);
3329 thread_destroy_event(virge->wake_render_thread);
3331 svga_close(&virge->svga);
3333 free(virge);
3336 static int s3_virge_available()
3338 return rom_present("roms/s3virge.bin");
3341 static int s3_virge_375_available()
3343 return rom_present("roms/86c375_1.bin");
3346 static void s3_virge_speed_changed(void *p)
3348 virge_t *virge = (virge_t *)p;
3350 svga_recalctimings(&virge->svga);
3353 static void s3_virge_force_redraw(void *p)
3355 virge_t *virge = (virge_t *)p;
3357 virge->svga.fullchange = changeframecount;
3360 static void s3_virge_add_status_info(char *s, int max_len, void *p)
3362 virge_t *virge = (virge_t *)p;
3363 char temps[256];
3364 uint64_t new_time = timer_read();
3365 uint64_t status_diff = new_time - status_time;
3366 status_time = new_time;
3368 if (!status_diff)
3369 status_diff = 1;
3371 svga_add_status_info(s, max_len, &virge->svga);
3372 sprintf(temps, "%f Mpixels/sec\n%f ktris/sec\n%f%% CPU\n%f%% CPU (real)\n%d writes %i reads\n\n", (double)virge->pixel_count/1000000.0, (double)virge->tri_count/1000.0, ((double)virge_time * 100.0) / timer_freq, ((double)virge_time * 100.0) / status_diff, reg_writes, reg_reads);
3373 strncat(s, temps, max_len);
3375 virge->pixel_count = virge->tri_count = 0;
3376 virge_time = 0;
3377 reg_reads = 0;
3378 reg_writes = 0;
3381 static device_config_t s3_virge_config[] =
3384 .name = "memory",
3385 .description = "Memory size",
3386 .type = CONFIG_SELECTION,
3387 .selection =
3390 .description = "2 MB",
3391 .value = 2
3392 },
3394 .description = "4 MB",
3395 .value = 4
3396 },
3398 .description = ""
3400 },
3401 .default_int = 4
3402 },
3404 .name = "bilinear",
3405 .description = "Bilinear filtering",
3406 .type = CONFIG_BINARY,
3407 .default_int = 1
3408 },
3410 .name = "dithering",
3411 .description = "Dithering",
3412 .type = CONFIG_BINARY,
3413 .default_int = 1
3414 },
3416 .type = -1
3418 };
3420 device_t s3_virge_device =
3422 "Diamond Stealth 3D 2000 (S3 ViRGE)",
3423 DEVICE_NOT_WORKING,
3424 s3_virge_init,
3425 s3_virge_close,
3426 s3_virge_available,
3427 s3_virge_speed_changed,
3428 s3_virge_force_redraw,
3429 s3_virge_add_status_info,
3430 s3_virge_config
3431 };
3433 device_t s3_virge_375_device =
3435 "S3 ViRGE/DX",
3436 DEVICE_NOT_WORKING,
3437 s3_virge_375_init,
3438 s3_virge_close,
3439 s3_virge_375_available,
3440 s3_virge_speed_changed,
3441 s3_virge_force_redraw,
3442 s3_virge_add_status_info,
3443 s3_virge_config
3444 };