From 7b768b4355e35537dc33be17b22c84b84523edeb Mon Sep 17 00:00:00 2001 From: iann Date: Wed, 1 Oct 2025 08:43:02 +0900 Subject: [PATCH 01/13] Begin revival attempt of ps2gl with new pvc no lights implementation (WIP, very slow it seems) --- Makefile | 22 +++- README.md | 6 + examples/nehe/lesson02/lesson2.cpp | 1 + examples/nehe/lesson03/lesson3.cpp | 7 +- examples/nehe/lesson04/lesson4.cpp | 9 +- examples/nehe/lesson05/lesson5.cpp | 17 ++- examples/pvc_box/Makefile | 37 ++++++ examples/pvc_box/lit_pvc_box.cpp | 150 ++++++++++++++++++++++ examples/pvc_box/unlit_pvc_box.cpp | 137 ++++++++++++++++++++ include/ps2gl/base_renderer.h | 3 +- include/ps2gl/gmanager.h | 4 +- include/ps2gl/immgmanager.h | 2 +- include/ps2gl/material.h | 6 +- src/base_renderer.cpp | 57 +++++++-- src/gmanager.cpp | 15 +++ src/immgmanager.cpp | 35 +++--- src/indexed_renderer.cpp | 2 +- src/material.cpp | 6 +- src/renderermanager.cpp | 33 +++++ vu1/fast_no_lights_pvc_tri.vcl | 88 +++++++++++++ vu1/fast_no_lights_pvc_tri_vcl.vsm | 193 +++++++++++++++++++++++++++++ vu1/general.i | 4 +- vu1/geometry.i | 2 +- vu1/io.i | 2 +- vu1/math.i | 2 +- vu1/vu1_context.i | 54 ++++++++ vu1/vu1_mem_linear.i | 17 +++ vu1/vu1renderers.h | 1 + 28 files changed, 853 insertions(+), 59 deletions(-) create mode 100644 examples/pvc_box/Makefile create mode 100644 examples/pvc_box/lit_pvc_box.cpp create mode 100644 examples/pvc_box/unlit_pvc_box.cpp create mode 100644 vu1/fast_no_lights_pvc_tri.vcl create mode 100644 vu1/fast_no_lights_pvc_tri_vcl.vsm create mode 100644 vu1/vu1_context.i create mode 100644 vu1/vu1_mem_linear.i diff --git a/Makefile b/Makefile index e18b72c0..d5b28dea 100644 --- a/Makefile +++ b/Makefile @@ -50,7 +50,8 @@ RENDERERS = \ general_tri \ general \ indexed \ - scei + scei \ + fast_no_lights_pvc_tri EE_OBJS += $(addsuffix .vo, $(addprefix vu1/, $(RENDERERS))) @@ -76,23 +77,42 @@ realclean: clean include $(PS2SDK)/Defs.make include $(PS2SDK)/samples/Makefile.eeglobal +## dvp-as origin in ps2dev toolchain: https://github.com/ps2dev/ps2toolchain/blob/master/scripts/001-dvp.sh +## Build .vo (VU object) from a compiled .vsm %.vo: %_vcl.vsm dvp-as -o $@ $< +# VCL (Vector Command Language): https://ps2linux.no-ip.info/playstation2-linux.com/projects/vcl.html +# for documentation download the x86 or win32 tar above and read the VCL_User_Manual_E_v1.4_1.pdf +# more resources on vsm: http://lukasz.dk/files/vu-instruction-manual.pdf %_vcl.vsm: %_pp4.vcl vcl -o$@ $< +# GCC / CPP flags (-E, -P, -imacros): https://gcc.gnu.org/onlinedocs/cpp/Invocation.html#Invocation +# -E = preprocess only, -P = strip #line, -imacros includes macros without writing #include %indexed_pp4.vcl: %indexed_pp3.vcl cat $< | cc -E -P -imacros vu1/vu1_mem_indexed.h -o $@ - +# GCC / CPP flags (-E, -P, -imacros): https://gcc.gnu.org/onlinedocs/cpp/Invocation.html#Invocation +# -E = preprocess only, -P = strip #line, -imacros includes macros without writing #include %_pp4.vcl: %_pp3.vcl cat $< | cc -E -P -imacros vu1/vu1_mem_linear.h -o $@ - +#TODO: remove this step? This could be covered simply from writing correct vcl code... unless intending to allow new and old syntax? +# you can standardize syntax by using ".syntax old" or ".syntax new" or by passing `-n` to VCL for "new" and writing sources +# accordingly, it might be better to allow for correcting towards that to avoid confusion... %_pp3.vcl: %_pp2.vcl cat $< | sed 's/\[\([0-9]\)\]/_\1/g ; s/\[\([w-zW-Z]\)\]/\1/g' - > $@ +# Expand assembly-style macros and .include with GASP +# -c ';' uses ';' as the comment char; -Ivu1 resolves local .include files. +# GASP (GNU assembler preprocessor) manpage: https://manpages.debian.org/unstable/binutils-m68hc1x/gasp.1.en.html %_pp2.vcl: %_pp1.vcl gasp -c ';' -Ivu1 -o $@ $< +# this is in order to normalize sources for GASP by removing C preprocessor stuff (#include/#define), +# and then fix local .include paths so GASP can resolve them relative to the source dir. +# if the .vcl file ALREADY avoids #include/#define and only use .include/.macro etc +# you can wire %.vcl -> %_pp2.vcl directly and drop this rule %_pp1.vcl: %.vcl cat $< | sed 's/#include[ ]\+.\+// ; s/#define[ ]\+.\+// ; s|\(\.include[ ]\+\)"\([^/].\+\)"|\1"$( $@ diff --git a/README.md b/README.md index b91dbc8e..204c2d54 100644 --- a/README.md +++ b/README.md @@ -152,6 +152,12 @@ Bug reports should be submitted to the appropriate homepage, which hosts a bug t ## Changelog +### 0.3.x (beginning revival attempts) +- Added documentation to Makefile for references to using vcl and other tools in 2025 (also updated some vcl for potential deprecation of vcl preprocess step 2: + `cat $< | sed 's/\[\([0-9]\)\]/_\1/g ; s/\[\([w-zW-Z]\)\]/\1/g' - > $@ ` +- Added a fast no lights per vertex color renderer with pvc_box examples (lit with `general_pv_diff_tri` and unlit `fast_no_lights_pvc_tri`) -- WIP, could be very slow for some reason +- begin testing against https://github.com/raylib4Consoles/raylib4PlayStation2 integration (will be useful for testing expected OpenGL1.1 behavior) + ### 0.3 - Can now define custom prim types and attributes, tying them to custom renderers and override default renderers. - Lots of bug fixes! diff --git a/examples/nehe/lesson02/lesson2.cpp b/examples/nehe/lesson02/lesson2.cpp index 1eb77de6..5bff9cf4 100644 --- a/examples/nehe/lesson02/lesson2.cpp +++ b/examples/nehe/lesson02/lesson2.cpp @@ -38,6 +38,7 @@ void display(void) // Create The Display Fu glVertex3f(1.0f, -1.0f, 0.0f); // Bottom Right glEnd(); // Finished Drawing The Triangle glTranslatef(3.0f, 0.0f, 0.0f); // Move Right 3 Units + //TODO: renderer ordering here in renderermanager causing confusion (non trivial path finding for renderers) glBegin(GL_QUADS); // Draw A Quad glVertex3f(-1.0f, 1.0f, 0.0f); // Top Left glVertex3f(1.0f, 1.0f, 0.0f); // Top Right diff --git a/examples/nehe/lesson03/lesson3.cpp b/examples/nehe/lesson03/lesson3.cpp index b4a35732..d3553942 100644 --- a/examples/nehe/lesson03/lesson3.cpp +++ b/examples/nehe/lesson03/lesson3.cpp @@ -25,11 +25,7 @@ void init(GLvoid) // Create Some Everyday Functions glDepthFunc(GL_LEQUAL); // The Type Of Depth Testing To Do glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST); - // ps2gl needs lighting + color_material for per-vertex colors - glEnable(GL_COLOR_MATERIAL); - glColorMaterial(GL_FRONT_AND_BACK, GL_DIFFUSE); - glEnable(GL_LIGHTING); - glEnable(GL_LIGHT0); + // TODO: next make a fast no lights for QUADS } void display(void) // Create The Display Function @@ -47,6 +43,7 @@ void display(void) // Create The Display Fu glEnd(); // Finished Drawing The Triangle glTranslatef(3.0f, 0.0f, 0.0f); // Move Right 3 Units glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only + //TODO: like lesson02, renderermanager causing potential issues with ordering glBegin(GL_QUADS); // Draw A Quad glVertex3f(-1.0f, 1.0f, 0.0f); // Top Left glVertex3f(1.0f, 1.0f, 0.0f); // Top Right diff --git a/examples/nehe/lesson04/lesson4.cpp b/examples/nehe/lesson04/lesson4.cpp index 2b44825d..d2e254c0 100644 --- a/examples/nehe/lesson04/lesson4.cpp +++ b/examples/nehe/lesson04/lesson4.cpp @@ -28,11 +28,7 @@ void init(GLvoid) // Create Some Everyday Functions glDepthFunc(GL_LEQUAL); // The Type Of Depth Testing To Do glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST); - // ps2gl needs lighting + color_material for per-vertex colors - glEnable(GL_COLOR_MATERIAL); - glColorMaterial(GL_FRONT_AND_BACK, GL_DIFFUSE); - glEnable(GL_LIGHTING); - glEnable(GL_LIGHT0); + // TODO: next make a fast no lights for QUADS } void idle(void) { @@ -45,6 +41,8 @@ void display(void) // Create The Display Fu glPushMatrix(); glTranslatef(-1.5f, 0.0f, -6.0f); // Move Left 1.5 Units And Into The Screen 6.0 glRotatef(rtri, 0.0f, 1.0f, 0.0f); // Rotate The Triangle On The Y axis + constexpr float default_normal_direction_alignment[4] = {0.f, 0.f, 1.f, 0.f}; + glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); glBegin(GL_TRIANGLES); // Drawing Using Triangles glColor3f(1.0f, 0.0f, 0.0f); // Set The Color To Red glVertex3f(0.0f, 1.0f, 0.0f); // Top @@ -58,6 +56,7 @@ void display(void) // Create The Display Fu glTranslatef(1.5f, 0.0f, -6.0f); // Move Right 1.5 Units And Into The Screen 6.0 glRotatef(rquad, 1.0f, 0.0f, 0.0f); // Rotate The Quad On The X axis glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only + glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); glBegin(GL_QUADS); // Draw A Quad glVertex3f(-1.0f, 1.0f, 0.0f); // Top Left glVertex3f(1.0f, 1.0f, 0.0f); // Top Right diff --git a/examples/nehe/lesson05/lesson5.cpp b/examples/nehe/lesson05/lesson5.cpp index 7edaa082..2c872d8c 100644 --- a/examples/nehe/lesson05/lesson5.cpp +++ b/examples/nehe/lesson05/lesson5.cpp @@ -27,12 +27,6 @@ void InitGL(GLvoid) // Create Some Everyday Functions glEnable(GL_DEPTH_TEST); // Enables Depth Testing glDepthFunc(GL_LEQUAL); // The Type Of Depth Testing To Do glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST); - - // ps2gl needs lighting + color_material for per-vertex colors - glEnable(GL_COLOR_MATERIAL); - glColorMaterial(GL_FRONT_AND_BACK, GL_DIFFUSE); - glEnable(GL_LIGHTING); - glEnable(GL_LIGHT0); } void idle(void) { @@ -41,6 +35,9 @@ void idle(void) { void display(void) // Create The Display Function { + glDisable(GL_COLOR_MATERIAL); + glDisable(GL_LIGHTING); + glDisable(GL_LIGHT0); glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear Screen And Depth Buffer glLoadIdentity(); // Reset The Current Modelview Matrix glPushMatrix(); @@ -73,10 +70,18 @@ void display(void) // Create The Display Fu glVertex3f(-1.0f, -1.0f, 1.0f); // Right Of Triangle (Left) glEnd(); // Finished Drawing The Triangle + // TODO: next make a fast no lights for QUADS + glEnable(GL_COLOR_MATERIAL); + glColorMaterial(GL_FRONT_AND_BACK, GL_DIFFUSE); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); + glLoadIdentity(); // Reset The Current Modelview Matrix glTranslatef(1.5f, 0.0f, -6.0f); // Move Right 1.5 Units And Into The Screen 6.0 glRotatef(rquad, 1.0f, 0.0f, 0.0f); // Rotate The Quad On The X axis glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only + constexpr float default_normal_direction_alignment[4] = {0.f, 0.f, 1.f, 0.f}; + glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); glBegin(GL_QUADS); // Draw A Quad glColor3f(0.0f, 1.0f, 0.0f); // Set The Color To Blue glVertex3f(1.0f, 1.0f, -1.0f); // Top Right Of The Quad (Top) diff --git a/examples/pvc_box/Makefile b/examples/pvc_box/Makefile new file mode 100644 index 00000000..40f926c6 --- /dev/null +++ b/examples/pvc_box/Makefile @@ -0,0 +1,37 @@ +EE_BIN = box.elf +EE_CFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CFLAGS) +EE_CXXFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CXXFLAGS) +EE_OBJS = lit_pvc_box.o ../shared_code/text_stuff.o +#EE_OBJS = unlit_pvc_box.o ../shared_code/text_stuff.o +EE_LDFLAGS += -L$(PS2SDK)/ports/lib +EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma + +ifeq ($(DEBUG), 1) + EE_CFLAGS += -D_DEBUG + EE_CXXFLAGS += -D_DEBUG +endif + +# Disabling warnings +WARNING_FLAGS = -Wno-strict-aliasing -Wno-conversion-null + +# VU0 code is broken so disable for now +EE_CFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM +EE_CXXFLAGS += $(WARNING_FLAGS) -DNO_VU0_VECTORS -DNO_ASM + +all: $(EE_BIN) + $(EE_STRIP) --strip-all $(EE_BIN) + +clean: + rm -f $(EE_BIN) $(EE_OBJS) + +run: $(EE_BIN) + ps2client -h 192.168.1.10 execee host:$(EE_BIN) + +reset: + ps2client -h 192.168.1.10 reset + +sim: $(EE_BIN) + PCSX2 --elf=$(PWD)/$(EE_BIN) + +include $(PS2SDK)/samples/Makefile.pref +include $(PS2SDK)/samples/Makefile.eeglobal_cpp diff --git a/examples/pvc_box/lit_pvc_box.cpp b/examples/pvc_box/lit_pvc_box.cpp new file mode 100644 index 00000000..eaaf065b --- /dev/null +++ b/examples/pvc_box/lit_pvc_box.cpp @@ -0,0 +1,150 @@ +#include +#include +#include +#include +#include +#include "ps2gl/renderermanager.h" +#include + +void init_lights_and_color(); +void display(); +void cube_position_and_rotation(); +void draw_rgb_cube(); +static void colored_vertex(float r, float g, float b, float x, float y, float z); +void reshape(int width, int height); +void perspective(float fov, float aspect, float nearClip, float farClip); + +static float cube_spin_angle = 0.0f; +static float cube_z = -6.0f, cube_forward_rotation = -18.0f; + +int main(int argc, char** argv) +{ + glutInit(&argc, argv); + glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE); + glutInitWindowSize(640, 448); + glutCreateWindow("RGB Cube"); + init_lights_and_color(); + glutDisplayFunc(display); + glutReshapeFunc(reshape); + glutMainLoop(); + return 0; +} + +void init_lights_and_color() +{ + glEnable(GL_COLOR_MATERIAL); + glColorMaterial(GL_FRONT, GL_DIFFUSE); + glEnable(GL_LIGHTING); + glEnable(GL_LIGHT0); +} + +void display() +{ + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); + mDebugPrint("[display() function] Renderer = %s\n", pglGetCurRendererName()); + cube_spin_angle += 0.2f; + draw_rgb_cube(); + glLoadIdentity(); +} + +void draw_rgb_cube() +{ + cube_position_and_rotation(); + //See gmanager.cpp + constexpr float default_normal_direction_alignment[4] = {0.f, 0.f, 1.f, 0.f}; + glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); + glBegin(GL_TRIANGLES); + { + // +Z (front): A(1,1,1) B(-1,1,1) C(-1,-1,1) D(1,-1,1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, 1, 1); + colored_vertex(0, 1, 0,-1, 1, 1); + colored_vertex(0, 0, 1, -1, -1, 1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, 1, 1); + colored_vertex(0, 0, 1, -1, -1, 1); + colored_vertex(0, 1, 0, 1, -1, 1); + + // -Z (back): A(1,-1,-1) B(-1,-1,-1) C(-1,1,-1) D(1,1,-1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, -1, -1); + colored_vertex(0, 1, 0,-1, -1, -1); + colored_vertex(0, 0, 1,-1, 1, -1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, -1, -1); + colored_vertex(0, 0, 1,-1, 1, -1); + colored_vertex(0, 1, 0, 1, 1, -1); + + // +Y (top): A(1,1,-1) B(-1,1,-1) C(-1,1,1) D(1,1,1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, 1, -1); + colored_vertex(0, 1, 0,-1, 1, -1); + colored_vertex(0, 0, 1,-1, 1, 1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, 1, -1); + colored_vertex(0, 0, 1,-1, 1, 1); + colored_vertex(0, 1, 0, 1, 1, 1); + + // -Y (bottom): A(1,-1,1) B(-1,-1,1) C(-1,-1,-1) D(1,-1,-1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, -1, 1); + colored_vertex(0, 1, 0,-1, -1, 1); + colored_vertex(0, 0, 1,-1, -1, -1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, -1, 1); + colored_vertex(0, 0, 1,-1, -1, -1); + colored_vertex(0, 1, 0, 1, -1, -1); + + // -X (left): A(-1,1,1) B(-1,1,-1) C(-1,-1,-1) D(-1,-1,1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0,-1, 1, 1); + colored_vertex(0, 1, 0,-1, 1, -1); + colored_vertex(0, 0, 1,-1, -1, -1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0,-1, 1, 1); + colored_vertex(0, 0, 1,-1, -1, -1); + colored_vertex(0, 1, 0,-1, -1, 1); + + // +X (right): A(1,1,-1) B(1,1,1) C(1,-1,1) D(1,-1,-1) + // tri1: (A,B,C) = (R,G,B) + colored_vertex(1, 0, 0, 1, 1, -1); + colored_vertex(0, 1, 0, 1, 1, 1); + colored_vertex(0, 0, 1, 1, -1, 1); + // tri2: (A,C,D) = (R,B,G) + colored_vertex(1, 0, 0, 1, 1, -1); + colored_vertex(0, 0, 1, 1, -1, 1); + colored_vertex(0, 1, 0, 1, -1, -1); + } + glEnd(); +} + +void cube_position_and_rotation() +{ + glTranslatef(0.0f, 0.0f, cube_z); + glRotatef(cube_forward_rotation, -1, 0, 0); + glRotatef(cube_spin_angle, 0.0f, 1.0f, 0.0f); +} + +static void colored_vertex(const float r, const float g, const float b, const float x, const float y, const float z) +{ + glColor3f(r, g, b); + glVertex3f(x, y, z); +} + +void reshape(const int width, int height) +{ + if (height == 0) + height = 1; + glViewport(0, 0, width, height); + glMatrixMode(GL_PROJECTION); + perspective(40.0f, (float)width / (float)height, 0.1f, 4000.0f); + glMatrixMode(GL_MODELVIEW); +} + +void perspective(float fov, const float aspect, const float nearClip, const float farClip) +{ + fov *= 3.141592654f / 180.0f; + const float height = 2.0f * nearClip * tanf(fov / 2.0f); + const float width = height * aspect; + glFrustum(-width / 2.0f, width / 2.0f, -height / 2.0f, height / 2.0f, nearClip, farClip); +} \ No newline at end of file diff --git a/examples/pvc_box/unlit_pvc_box.cpp b/examples/pvc_box/unlit_pvc_box.cpp new file mode 100644 index 00000000..6c6a121a --- /dev/null +++ b/examples/pvc_box/unlit_pvc_box.cpp @@ -0,0 +1,137 @@ +#include +#include +#include +#include +#include +#include +#include "ps2gl/renderermanager.h" +#include + +void init_lights_and_color(); +void display(); +void cube_position_and_rotation(); +void draw_rgb_cube(); +static void colored_vertex(float r, float g, float b, float nx, float ny, float nz, float x, float y, float z); +void reshape(int width, int height); +void perspective(float fov, float aspect, float nearClip, float farClip); + +static float cube_spin_angle = 0.0f; +static float cube_z = -6.0f, cube_forward_rotation = -18.0f; + +int main(int argc, char** argv) +{ + glutInit(&argc, argv); + glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);// | GLUT_DEPTH); + glutInitWindowSize(640, 448); + glutCreateWindow("RGB Cube"); + init_lights_and_color(); + mDebugPrint("Renderer = %s\n", pglGetCurRendererName()); + glutDisplayFunc(display); + glutReshapeFunc(reshape); + glutMainLoop(); + return 0; +} + +void init_lights_and_color() +{ + glDisable(GL_LIGHTING); + glDisable(GL_COLOR_MATERIAL); +} + +void display() +{ + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); //REQUIRED + cube_spin_angle += 0.2f; + draw_rgb_cube(); + glLoadIdentity(); + glutSwapBuffers(); +} + +void draw_rgb_cube() +{ + cube_position_and_rotation(); + glBegin(GL_TRIANGLES); + { + // +Z (front): + colored_vertex(1, 0, 0, 0, 0, 1, 1, 1, 1); + colored_vertex(0, 1, 0, 0, 0, 1, -1, 1, 1); + colored_vertex(0, 0, 1, 0, 0, 1, -1, -1, 1); + colored_vertex(1, 0, 0, 0, 0, 1, 1, 1, 1); + colored_vertex(0, 0, 1, 0, 0, 1, -1, -1, 1); + colored_vertex(0, 1, 0, 0, 0, 1, 1, -1, 1); + + // -Z (back): + colored_vertex(1, 0, 0, 0, 0, -1, 1, -1, -1); + colored_vertex(0, 1, 0, 0, 0, -1, -1, -1, -1); + colored_vertex(0, 0, 1, 0, 0, -1, -1, 1, -1); + colored_vertex(1, 0, 0, 0, 0, -1, 1, -1, -1); + colored_vertex(0, 0, 1, 0, 0, -1, -1, 1, -1); + colored_vertex(0, 1, 0, 0, 0, -1, 1, 1, -1); + + // +Y (top): + colored_vertex(1, 0, 0, 0, 1, 0, 1, 1, -1); + colored_vertex(0, 1, 0, 0, 1, 0, -1, 1, -1); + colored_vertex(0, 0, 1, 0, 1, 0, -1, 1, 1); + colored_vertex(1, 0, 0, 0, 1, 0, 1, 1, -1); + colored_vertex(0, 0, 1, 0, 1, 0, -1, 1, 1); + colored_vertex(0, 1, 0, 0, 1, 0, 1, 1, 1); + + // -Y (bottom) + colored_vertex(1, 0, 0, 0, -1, 0, 1, -1, 1); + colored_vertex(0, 1, 0, 0, -1, 0, -1, -1, 1); + colored_vertex(0, 0, 1, 0, -1, 0, -1, -1, -1); + colored_vertex(1, 0, 0, 0, -1, 0, 1, -1, 1); + colored_vertex(0, 0, 1, 0, -1, 0, -1, -1, -1); + colored_vertex(0, 1, 0, 0, -1, 0, 1, -1, -1); + + // -X (left) + colored_vertex(1, 0, 0, -1, 0, 0, -1, 1, 1); + colored_vertex(0, 1, 0, -1, 0, 0, -1, 1, -1); + colored_vertex(0, 0, 1, -1, 0, 0, -1, -1, -1); + colored_vertex(1, 0, 0, -1, 0, 0, -1, 1, 1); + colored_vertex(0, 0, 1, -1, 0, 0, -1, -1, -1); + colored_vertex(0, 1, 0, -1, 0, 0, -1, -1, 1); + + // +X (right) + colored_vertex(1, 0, 0, 1, 0, 0, 1, 1, -1); + colored_vertex(0, 1, 0, 1, 0, 0, 1, 1, 1); + colored_vertex(0, 0, 1, 1, 0, 0, 1, -1, 1); + colored_vertex(1, 0, 0, 1, 0, 0, 1, 1, -1); + colored_vertex(0, 0, 1, 1, 0, 0, 1, -1, 1); + colored_vertex(0, 1, 0, 1, 0, 0, 1, -1, -1); + } + glEnd(); +} + +void cube_position_and_rotation() +{ + glTranslatef(0.0f, 0.0f, cube_z); + glRotatef(cube_forward_rotation, -1, 0, 0); + glRotatef(cube_spin_angle, 0.0f, 1.0f, 0.0f); +} + +static void colored_vertex(const float r, const float g, const float b, const float nx, const float ny, const float nz, + const float x, const float y, const float z) +{ + glColor3f(r, g, b); + // glNormal3f(nx, ny, nz); //for sending in ignored normals when we want to + glVertex3f(x, y, z); +} + +void reshape(const int width, int height) +{ + if (height == 0) + height = 1; + glViewport(0, 0, width, height); + glMatrixMode(GL_PROJECTION); //REQUIRED (or else black screen) + perspective(40.0f, (float)width / (float)height, 0.1f, 4000.0f); + glMatrixMode(GL_MODELVIEW); //REQUIRED (or else black screen) +} + +void perspective(float fov, const float aspect, const float nearClip, const float farClip) +{ + fov *= 3.141592654f / 180.0f; + const float height = 2.0f * nearClip * tanf(fov / 2.0f); + const float width = height * aspect; + glFrustum(-width / 2.0f, width / 2.0f, -height / 2.0f, height / 2.0f, nearClip, farClip); +} \ No newline at end of file diff --git a/include/ps2gl/base_renderer.h b/include/ps2gl/base_renderer.h index a6e4859a..3abc52f9 100644 --- a/include/ps2gl/base_renderer.h +++ b/include/ps2gl/base_renderer.h @@ -26,7 +26,8 @@ class CBaseRenderer : public CRenderer { // cached in DrawArrays from geometry manager for XferBlock float CurTexCoord[2]; cpu_vec_xyz CurNormal; - CDmaPacket *TexCoordBuf, *NormalBuf; + cpu_vec_xyzw CurGeomColor; + CDmaPacket *TexCoordBuf, *NormalBuf, *ColorBuf; int WordsPerVertex, WordsPerNormal, WordsPerTexCoord, WordsPerColor; unsigned int VertexUnpackMode, NormalUnpackMode; diff --git a/include/ps2gl/gmanager.h b/include/ps2gl/gmanager.h index bc110fb7..931f6c67 100644 --- a/include/ps2gl/gmanager.h +++ b/include/ps2gl/gmanager.h @@ -104,6 +104,7 @@ class CGeomManager { static tUserPrimEntry UserPrimTypes[kMaxUserPrimTypes]; // GL state + cpu_vec_xyzw CurGeomColor; cpu_vec_xyz CurNormal; float CurTexCoord[2]; static bool DoNormalize; @@ -167,8 +168,9 @@ class CGeomManager { void SetUserRenderContextChanged() { UserRenderContextChanged = true; } // GL state - + inline cpu_vec_xyzw GetCurGeomColor() const { return CurGeomColor; } inline cpu_vec_xyz GetCurNormal() const { return CurNormal; } + inline void SetCurGeomColor(cpu_vec_xyzw color) { CurGeomColor = color; } inline void SetCurNormal(cpu_vec_xyz normal) { CurNormal = normal; } inline const float* GetCurTexCoord() const { return CurTexCoord; } diff --git a/include/ps2gl/immgmanager.h b/include/ps2gl/immgmanager.h index cb37e4c3..f0bac2b9 100644 --- a/include/ps2gl/immgmanager.h +++ b/include/ps2gl/immgmanager.h @@ -61,7 +61,7 @@ class CImmGeomManager : public CGeomManager { // normal, tex coord or vertex color is supplied for each vertex inline CDmaPacket& GetNormalBuf() { return *CurNormalBuf; } inline CDmaPacket& GetTexCoordBuf() { return *CurTexCoordBuf; } - + inline CDmaPacket& GetColorBuf() { return *CurColorBuf; } // user state void EnableCustom(uint64_t flag) { RendererManager.EnableCustom(flag); } diff --git a/include/ps2gl/material.h b/include/ps2gl/material.h index efefa761..20012674 100644 --- a/include/ps2gl/material.h +++ b/include/ps2gl/material.h @@ -104,7 +104,7 @@ class CMaterialManager { CDListMaterial DListMaterial; CMaterial* CurMaterial; - cpu_vec_xyzw CurColor; + cpu_vec_xyzw CurMatColor; GLenum ColorMaterialMode; bool UseColorMaterial; bool InDListDef; @@ -115,7 +115,7 @@ class CMaterialManager { , ImmMaterial(context) , DListMaterial(context) , CurMaterial(&ImmMaterial) - , CurColor(1, 1, 1, 1) + , CurMatColor(1, 1, 1, 1) , ColorMaterialMode(GL_AMBIENT_AND_DIFFUSE) , UseColorMaterial(false) , InDListDef(false) @@ -126,7 +126,7 @@ class CMaterialManager { CMaterial& GetCurMaterial() { return *CurMaterial; } CImmMaterial& GetImmMaterial() { return ImmMaterial; } CDListMaterial& GetDListMaterial() { return DListMaterial; } - cpu_vec_xyzw GetCurColor() const { return CurColor; } + cpu_vec_xyzw GetCurMatColor() const { return CurMatColor; } GLenum GetColorMaterialMode() const { return ColorMaterialMode; } bool GetColorMaterialEnabled() const { return UseColorMaterial; } diff --git a/src/base_renderer.cpp b/src/base_renderer.cpp index 82887290..742ba3ef 100644 --- a/src/base_renderer.cpp +++ b/src/base_renderer.cpp @@ -9,6 +9,7 @@ #include "ps2s/packet.h" #include +#include #include "ps2gl/base_renderer.h" #include "ps2gl/drawcontext.h" @@ -65,12 +66,14 @@ void CBaseRenderer::InitXferBlock(CVifSCDmaPacket& packet, NormalBuf = &gmanager.GetNormalBuf(); TexCoordBuf = &gmanager.GetTexCoordBuf(); + ColorBuf = &gmanager.GetColorBuf(); CurNormal = gmanager.GetCurNormal(); const float* texCoord = gmanager.GetCurTexCoord(); CurTexCoord[0] = texCoord[0]; CurTexCoord[1] = texCoord[1]; + CurGeomColor = gmanager.GetCurGeomColor(); // get unpack modes/masks WordsPerVertex = wordsPerVertex; @@ -85,6 +88,11 @@ void CBaseRenderer::InitXferBlock(CVifSCDmaPacket& packet, WordsPerColor = (wordsPerColor > 0) ? wordsPerColor : 3; GetUnpackAttribs(WordsPerColor, ColorUnpackMode, ColorUnpackMask); + XferVertices = (wordsPerVertex > 0); + XferNormals = (wordsPerNormal > 0) && pGLContext->GetImmLighting().GetLightingEnabled(); + XferTexCoords = (wordsPerTex > 0); + XferColors = (WordsPerColor > 0) && (!pGLContext->GetImmLighting().GetLightingEnabled() || pGLContext->GetMaterialManager().GetColorMaterialEnabled()); + // set up the row register to expand vectors with fewer than 4 elements packet.Cnt(); @@ -116,6 +124,12 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, const void* texCoords, const void* colors, int vu1Offset, int firstElement, int numToAdd) { + //TODO: lane mapping V|T|C, V|C|T, V|N|T|C is difficult to figure out with the vu code sometimes + // should be super super clear somewhere probably better than here + const int laneV = 0; + const int laneN = 1; + const int laneT = 2; + const int laneC = 3; // // vertices // @@ -125,7 +139,7 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, XferVectors(packet, (unsigned int*)vertices, firstElement, numToAdd, WordsPerVertex, VertexUnpackMask, VertexUnpackMode, - vu1Offset); + vu1Offset + laneV); } // @@ -146,11 +160,12 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, normalBuf += CurNormal; } - if (XferNormals) + if (XferNormals) { XferVectors(packet, (unsigned int*)normals, firstNormal, numToAdd, WordsPerNormal, NormalUnpackMask, NormalUnpackMode, - vu1Offset + 1); + vu1Offset + laneN); + } // // tex coords @@ -169,22 +184,37 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, texCoordBuf += CurTexCoord[1]; } } - if (XferTexCoords) + if (XferTexCoords) { XferVectors(packet, (unsigned int*)texCoords, firstTexCoord, numToAdd, WordsPerTexCoord, TexCoordUnpackMask, TexCoordUnpackMode, - vu1Offset + 2); + vu1Offset + laneT); + } // // colors // int firstColor = firstElement; + // TODO: perhaps this is not good for EE? but i think this should also somehow default to white or the pushed registered color + // needs more testing with OpenGL expected behavior... + if (XferColors && colors == NULL) { + CDmaPacket& colorBuf = *ColorBuf; + colors = (void*)colorBuf.GetNextPtr(); + firstColor = 0; + for (int i = 0; i < numToAdd; ++i) { + colorBuf += CurGeomColor[0]; + colorBuf += CurGeomColor[1]; + colorBuf += CurGeomColor[2]; + } + } + if (colors != NULL && XferColors) { + mErrorIf(colors == NULL, "XferColors=true but no color data present"); XferVectors(packet, (unsigned int*)colors, firstColor, numToAdd, WordsPerColor, ColorUnpackMask, ColorUnpackMode, - vu1Offset + 3); + vu1Offset + laneC); } } @@ -314,10 +344,18 @@ void CBaseRenderer::AddVu1RendererContext(CVifSCDmaPacket& packet, GLenum primTy // add emissive component cpu_vec_4 emission; + if (doLighting) { + emission = material.GetEmission() * maxColorValue; + } else { + emission = glContext.GetGeomManager().GetCurGeomColor() * maxColorValue; + // emission = glContext.GetMaterialManager().GetCurMatColor() * maxColorValue; + } + /* if (doLighting) emission = material.GetEmission() * maxColorValue; else emission = glContext.GetMaterialManager().GetCurColor() * maxColorValue; + */ packet += emission; // ambient @@ -328,7 +366,8 @@ void CBaseRenderer::AddVu1RendererContext(CVifSCDmaPacket& packet, GLenum primTy // the alpha value is set to the alpha of the diffuse in the renderers; // this should be the current color alpha if lighting is disabled if (!doLighting) - matDiffuse[3] = glContext.GetMaterialManager().GetCurColor()[3]; + matDiffuse[3] = glContext.GetGeomManager().GetCurGeomColor()[3]; + // matDiffuse[3] = glContext.GetMaterialManager().GetCurMatColor()[3]; packet += matDiffuse; // specular @@ -395,7 +434,9 @@ void CBaseRenderer::CacheRendererState() { XferNormals = pGLContext->GetImmLighting().GetLightingEnabled(); XferTexCoords = pGLContext->GetTexManager().GetTexEnabled(); - XferColors = pGLContext->GetMaterialManager().GetColorMaterialEnabled(); + //TODO: something tells me this needs to be restructured i think, it feels too hidden for where its important... + XferColors = false; + // XferColors = pGLContext->GetMaterialManager().GetColorMaterialEnabled(); } void CBaseRenderer::Load() diff --git a/src/gmanager.cpp b/src/gmanager.cpp index 2636b1c3..20af3914 100644 --- a/src/gmanager.cpp +++ b/src/gmanager.cpp @@ -46,6 +46,21 @@ bool CGeomManager::DoNormalize = false; CGeomManager::CGeomManager(CGLContext& context) : GLContext(context) + , CurGeomColor(1.0f, 1.0f, 1.0f, 1.0f) //TODO: I think this is the GL color behavior? for missing colors case... + //NOTE: this default CurNormal allows for "appearing" unlit PVC effect when: + // - ColorMaterial Enabled + // - Lighting Enabled + // - Light0 Enabled + // - Normals are never set during the glBegin/glEnd -> Default CurNormal of {0.0, 0.0, 1.0} (set in this constructor) is set + // VU Renderer: "linear, pvc, tris" is then targetted + // IMPORTANT: EVERY FRAME Light0's direction is set via + // constexpr float direction_towards_per_vertex_normal[4] = {0.0, 0.0, 1.0, 0.0}; + // glLightfv(GL_LIGHT0, GL_POSITION, direction_towards_per_vertex_normal); + // Why the dot product in the VU1 renderer GeneralPVDiff cancels out the Diffuse lighting effect: + // ps2gl converts lights into object/model space for VU1: + // AddVu1RendererContext: lighting calculations are done in object/model space via worldToObjXfrm). + // so LIGHT0's direction {0, 0, 1, 0} remains aligned with every vertices default CurNormal = {0,0,1}. + // Therefore N·L = 1 for the whole object/model , CurNormal(0.0f, 0.0f, 1.0f) , Prim(GL_INVALID_VALUE) , InsideBeginEnd(false) diff --git a/src/immgmanager.cpp b/src/immgmanager.cpp index 378f0706..f5754d39 100644 --- a/src/immgmanager.cpp +++ b/src/immgmanager.cpp @@ -5,6 +5,7 @@ main directory of this archive for more details. */ #include +#include #include "ps2s/cpu_matrix.h" #include "ps2s/displayenv.h" @@ -117,11 +118,14 @@ void CImmGeomManager::Vertex(cpu_vec_xyzw newVert) *CurTexCoordBuf += texCoord[0]; *CurTexCoordBuf += texCoord[1]; + cpu_vec_xyzw color = GetCurGeomColor(); + *CurColorBuf += color; *CurVertexBuf += newVert; Geometry.AddVertices(); Geometry.AddNormals(); Geometry.AddTexCoords(); + Geometry.AddColors(); } void CImmGeomManager::Normal(cpu_vec_xyz normal) @@ -133,10 +137,8 @@ void CImmGeomManager::Normal(cpu_vec_xyz normal) void CImmGeomManager::Color(cpu_vec_xyzw color) { - if (InsideBeginEnd) { - *CurColorBuf += color; - Geometry.AddColors(); - } else { + SetCurGeomColor(color); + if (!InsideBeginEnd) { GLContext.GetMaterialManager().Color(color); } } @@ -156,22 +158,13 @@ void CImmGeomManager::EndGeom() Geometry.SetTexCoordsAreValid(true); // check colors - Geometry.SetColorsAreValid(false); - if (Geometry.GetNumNewColors() > 0) { - mErrorIf(Geometry.GetNumNewVertices() != Geometry.GetNumNewColors(), - "Sorry, but inside glBegin/glEnd you need " - "to specify either one color for each vertex given, or none."); - Geometry.SetColorsAreValid(true); - - SyncColorMaterial(true); - } else { - SyncColorMaterial(false); - } + Geometry.SetColorsAreValid(Geometry.GetNumNewColors() > 0); + SyncColorMaterial(Geometry.GetNumNewColors() > 0); Geometry.SetWordsPerVertex(4); Geometry.SetWordsPerNormal(3); Geometry.SetWordsPerTexCoord(2); - Geometry.SetWordsPerColor(4); + Geometry.SetWordsPerColor(Geometry.GetNumNewColors() > 0 ? 4 : 0); CommitNewGeom(); } @@ -196,7 +189,9 @@ void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) Geometry.SetVerticesAreValid(VertArray->GetVerticesAreValid()); Geometry.SetNormalsAreValid(VertArray->GetNormalsAreValid()); Geometry.SetTexCoordsAreValid(VertArray->GetTexCoordsAreValid()); - Geometry.SetColorsAreValid(VertArray->GetColorsAreValid()); + Geometry.SetColorsAreValid(Geometry.GetNumNewColors() > 0); + SyncColorMaterial(Geometry.GetNumNewColors() > 0); + // Geometry.SetColorsAreValid(VertArray->GetColorsAreValid()); Geometry.SetWordsPerVertex(VertArray->GetWordsPerVertex()); Geometry.SetWordsPerNormal(VertArray->GetWordsPerNormal()); @@ -211,7 +206,8 @@ void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) Geometry.AdjustNewGeomPtrs(first); // do this before sync'ing the vu1 renderer in CommitNewGeom - SyncColorMaterial(VertArray->GetColors() != NULL); + SyncColorMaterial(VertArray->GetColorsAreValid()); + // SyncColorMaterial(VertArray->GetColors() != NULL); CommitNewGeom(); } @@ -409,7 +405,8 @@ void CImmGeomManager::SyncGsContext() void CImmGeomManager::SyncColorMaterial(bool pvColorsArePresent) { CMaterialManager& mm = GLContext.GetMaterialManager(); - if (pvColorsArePresent && mm.GetColorMaterialEnabled()) { + //if (pvColorsArePresent && mm.GetColorMaterialEnabled()) { + if (GLContext.GetImmLighting().GetLightingEnabled() && pvColorsArePresent && mm.GetColorMaterialEnabled()) { switch (mm.GetColorMaterialMode()) { case GL_EMISSION: mNotImplemented("Only GL_DIFFUSE can change per-vertex"); diff --git a/src/indexed_renderer.cpp b/src/indexed_renderer.cpp index 075dd21f..ab4e4ff2 100644 --- a/src/indexed_renderer.cpp +++ b/src/indexed_renderer.cpp @@ -66,7 +66,7 @@ void CIndexedRenderer::InitContext(GLenum primType, uint32_t rcChanges, bool use if (doLighting) materialEmm = material.GetEmission() * maxColorValue; else - materialEmm = glContext.GetMaterialManager().GetCurColor() * maxColorValue; + materialEmm = glContext.GetMaterialManager().GetCurMatColor() * maxColorValue; ConstantVertColor = materialAmb * globalAmb + materialEmm; } diff --git a/src/material.cpp b/src/material.cpp index dfef9a7d..eec318b3 100644 --- a/src/material.cpp +++ b/src/material.cpp @@ -107,7 +107,7 @@ void CDListMaterial::SetShininess(float shine) void CMaterialManager::Color(cpu_vec_xyzw color) { - CurColor = color; + CurMatColor = color; if (UseColorMaterial) { switch (ColorMaterialMode) { @@ -152,7 +152,7 @@ void CMaterialManager::SetUseColorMaterial(bool yesNo) if (!InDListDef) { UseColorMaterial = yesNo; if (yesNo) - Color(CurColor); + Color(CurMatColor); GLContext.CurMaterialChanged(); } else { CDList& dlist = GLContext.GetDListManager().GetOpenDList(); @@ -180,7 +180,7 @@ void CMaterialManager::SetColorMaterialMode(GLenum mode) { if (!InDListDef) { ColorMaterialMode = mode; - Color(CurColor); + Color(CurMatColor); GLContext.CurMaterialChanged(); } else { CDList& dlist = GLContext.GetDListManager().GetOpenDList(); diff --git a/src/renderermanager.cpp b/src/renderermanager.cpp index dda444ca..fc8b0fe3 100644 --- a/src/renderermanager.cpp +++ b/src/renderermanager.cpp @@ -68,6 +68,39 @@ CRendererManager::CRendererManager(CGLContext& context) "indexed")); } + // unlit renderer per vertex color + // TODO: make sure this actually is ordered in here to work with other examples + // (the pathing for this renderermanager approach is concerning how maybe this + // could capture cases that dont want this renderer? + + //TODO: something could be making this horrendously slow, will need to continue to investigate performance of the lighting vcl and the no lights pvc... + { + CRendererProps capabilities = { + .PrimType = kTriangles, + .Lighting = 0, + .NumDirLights = k3DirLights, + .NumPtLights = 0, + .Texture = 1, + .Specular = 0, + .PerVtxMaterial = kNoMaterial, + .Clipping = kNonClipped | kClipped, + .CullFace = 0, + .TwoSidedLighting = 0, + .ArrayAccess = kLinear + }; + RegisterDefaultRenderer( + new CLinearRenderer( + mVsmAddr(FastNoLightsPVCTri), + mVsmSize(FastNoLightsPVCTri), + capabilities, + no_reqs, + 4, + 3, + kInputStart, + kInputBufSize - kInputStart, + "fast no lights, pvc, tri") + ); + } // fast, no lights renderer { CRendererProps capabilities = { diff --git a/vu1/fast_no_lights_pvc_tri.vcl b/vu1/fast_no_lights_pvc_tri.vcl new file mode 100644 index 00000000..535cd166 --- /dev/null +++ b/vu1/fast_no_lights_pvc_tri.vcl @@ -0,0 +1,88 @@ +/* Per-vertex color, no lights, triangles -- based off of fast_no_lights */ + + #include "vu1_mem_linear.h" + + .include "db_in_db_out.i" + .include "math.i" + .include "lighting.i" + .include "clip_cull.i" + .include "geometry.i" + .include "io.i" + .include "general.i" + +kInputQPerV .equ 4 +kOutputQPerV .equ 3 + + .init_vf_all + .init_vi_all + + .name vsmFastNoLightsPVCTri + + --enter + --endenter + + ; ------------------------ initialization --------------------------------- + + load_vert_xfrm vert_xform + + init_constants + + sub trans[0], vf00, vf00 + sub trans[1], vf00, vf00 + sub trans[2], vf00, vf00 + maxw.x trans[0], trans[0], vf00 + maxw.y trans[1], trans[1], vf00 + maxw.z trans[2], trans[2], vf00 + move.xyz trans[3], gs_offsets + move.w trans[3], vf00 + + mul_vec_mat_44 new_xform[0], trans, vert_xform[0] + mul_vec_mat_44 new_xform[1], trans, vert_xform[1] + mul_vec_mat_44 new_xform[2], trans, vert_xform[2] + mul_vec_mat_44 new_xform[3], trans, vert_xform[3] + + --cont + + ; -------------------- transform & texture loop --------------------------- + +main_loop_lid: + + init_io_loop + init_out_buf + + set_strip_adcs + +xform_loop_lid: + --LoopCS 1,3 + + load_vert vert + xform_vert xformed_vert, new_xform, vert + ftoi4.xyz gs_vert, xformed_vert + + load_strip_adc strip_adc + set_adc_s gs_vert, strip_adc + store_xyzf gs_vert + + load_pvcolor vtx_color + loi 255.0 + muli.xyz vtx_color, vtx_color, i + addi.w vtx_color, vf00, i + max vtx_color, vtx_color, vf00 + ftoi0 vtx_color, vtx_color + store_rgba vtx_color + + load_stq tex_stq + xform_tex_stq tex_stq, tex_stq, Q + store_stq tex_stq + + next_io + loop_io xform_loop_lid + + ; -------------------- done! ------------------------------- + +done_lid: + kick_to_gs + --cont + b main_loop_lid + +.END diff --git a/vu1/fast_no_lights_pvc_tri_vcl.vsm b/vu1/fast_no_lights_pvc_tri_vcl.vsm new file mode 100644 index 00000000..ecbdb0af --- /dev/null +++ b/vu1/fast_no_lights_pvc_tri_vcl.vsm @@ -0,0 +1,193 @@ +; === __LP__ EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: +; === ldumb : optimal=14 clid=0 mlid=2 size=(14) +; === normal1 : optimal=14 clid=0 mlid=3 size=(14) +; === hDown : optimal=14 clid=0 mlid=4 size=(14) +; === vuta : optimal=14 clid=0 mlid=2 size=(14) +; === dUp : optimal=14 clid=1 mlid=2 size=(14) +; === normal : optimal=14 clid=0 mlid=4 size=(14) +; === another : optimal=14 clid=0 mlid=4 size=(14) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmFastNoLightsPVCTri_CodeStart + .global vsmFastNoLightsPVCTri_CodeEnd +vsmFastNoLightsPVCTri_CodeStart: +__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 25 [25 0] 25 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_4] + sub VF06,VF00,VF00 lq.w VF07,57(VI00) + sub VF05,VF00,VF00 NOP + sub VF04,VF00,VF00 loi 0x44fff000 + addi.xy VF07,VF00,I NOP + maxw.x VF06,VF06,VF00w mr32.z VF07,VF07 + maxw.y VF05,VF05,VF00w lq VF01,62(VI00) + maxw.z VF04,VF04,VF00w NOP + max.w VF07,VF00,VF00 NOP + max.xyz VF07,VF07,VF07 NOP + mulax ACC,VF06,VF01x lq VF02,63(VI00) + madday ACC,VF05,VF01y NOP + maddaz ACC,VF04,VF01z NOP + maddw VF01,VF07,VF01w NOP + mulax ACC,VF06,VF02x lq VF03,64(VI00) + madday ACC,VF05,VF02y NOP + maddaz ACC,VF04,VF02z NOP + maddw VF02,VF07,VF02w NOP + mulax ACC,VF06,VF03x lq VF08,65(VI00) + madday ACC,VF05,VF03y NOP + maddaz ACC,VF04,VF03z NOP + maddw VF03,VF07,VF03w NOP + mulax ACC,VF06,VF08x NOP + madday ACC,VF05,VF08y NOP + maddaz[E] ACC,VF04,VF08z NOP + maddw VF04,VF07,VF08w NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 20 [20 0] 20 [main_loop_lid] + NOP xtop VI01 + NOP lq VF05,75(VI00) + NOP ilw.x VI04,0(VI01) + NOP iaddiu VI06,VI01,0x00000001 + NOP iaddiu VI02,VI01,0x00000005 + NOP mtir VI05,VF05x + NOP iadd VI03,VI02,VI04 + NOP ior VI05,VI05,VI04 + NOP iadd VI03,VI03,VI04 + NOP mfir.x VF05,VI05 + NOP iadd VI05,VI03,VI04 + NOP iaddiu VI03,VI01,0 + NOP iadd VI04,VI05,VI04 + NOP sq VF05,236(VI03) + NOP iaddiu VI05,VI01,0x00000005 + NOP iaddiu VI07,VI06,0x00000004 + NOP iaddiu VI08,VI00,0x000003ff + NOP iaddiu VI09,VI00,0x00000800 + NOP iaddiu VI10,VI00,0x00000400 + NOP iaddiu VI11,VI00,0x00000020 +adcLoop_lid: +; _LNOPT_w=[ normal2 ] 7 [13 0] 13 [adcLoop_lid] + NOP lq VF05,0(VI06) + ftoi0 VF05,VF05 NOP ; STALL_LATENCY ?3 + NOP mtir VI12,VF05x ; STALL_LATENCY ?3 + NOP iand VI13,VI12,VI10 + NOP NOP + NOP ibeq VI13,VI10,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP NOP +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_8] + NOP iand VI14,VI12,VI08 + NOP mtir VI13,VF05y + NOP iand VI12,VI12,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI13,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI12,4(VI14) +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_9] + NOP iand VI14,VI13,VI08 + NOP mtir VI12,VF05z + NOP iand VI13,VI13,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI12,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI13,4(VI14) +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_10] + NOP iand VI14,VI12,VI08 + NOP mtir VI13,VF05w + NOP iand VI12,VI12,VI09 + NOP iadd VI14,VI14,VI05 + NOP iand VI15,VI13,VI10 + NOP isw.w VI11,0(VI14) + NOP ibeq VI15,VI10,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT + NOP isw.w VI12,4(VI14) +; _LNOPT_w=[ normal2 ] 7 [7 0] 7 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_11] + NOP iand VI12,VI13,VI08 + NOP iaddiu VI06,VI06,0x00000001 + NOP iand VI13,VI13,VI09 + NOP iadd VI12,VI12,VI05 + NOP isw.w VI11,0(VI12) + NOP ibne VI06,VI07,adcLoop_lid + NOP isw.w VI13,4(VI12) +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT: +; _LNOPT_w=[ ] 8 [12 0] 12 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__ENTRY_POINT] + NOP lq.xyz VF05,0(VI02) + mulax ACC,VF01,VF05x iaddiu VI03,VI03,0 ; STALL_LATENCY ?3 + madday ACC,VF02,VF05y lq.xyz VF08,3(VI02) + maddaz ACC,VF03,VF05z iaddiu VI02,VI02,0x00000004 + maddw VF05,VF04,VF00w loi 0x437f0000 + muli.xyz VF08,VF08,I ilw.w VI05,-4(VI02) ; STALL_LATENCY ?1 + addi.w VF08,VF00,I ibeq VI02,VI04,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI1 + NOP div Q,VF00w,VF05w +; _LNOPT_w=[ ] 14 [12 0] 14 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__PRO1] + NOP lq.xyz VF07,0(VI02) + NOP NOP + max VF06,VF08,VF00 NOP + max.xyz VF08,VF05,VF05 NOP + mulax ACC,VF01,VF07x iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF07y lq.xyz VF10,3(VI02) + maddaz ACC,VF03,VF07z iaddiu VI02,VI02,0x00000004 + maddw VF05,VF04,VF00w loi 0x437f0000 + mulq.xyz VF09,VF08,Q lq.xyz VF07,-6(VI02) + muli.xyz VF08,VF10,I iaddiu VI03,VI03,0x000000f3 + addi.w VF08,VF00,I mfir.w VF07,VI05 + ftoi0 VF10,VF06 div Q,VF00w,VF05w + mulq.xyz VF06,VF07,Q ibeq VI02,VI04,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI0 + ftoi4.xyz VF07,VF09 ilw.w VI05,-4(VI02) +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 14 [14 14] 14 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__MAIN_LOOP] + NOP lq.xyz VF09,0(VI02) + NOP sq VF10,-5(VI03) + max VF06,VF08,VF00 sq.xyz VF06,-6(VI03) + max.xyz VF08,VF05,VF05 sq VF07,-4(VI03) + mulax ACC,VF01,VF09x iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF09y lq.xyz VF10,3(VI02) + maddaz ACC,VF03,VF09z lq.xyz VF07,-2(VI02) + maddw VF05,VF04,VF00w iaddiu VI02,VI02,0x00000004 + mulq.xyz VF09,VF08,Q loi 0x437f0000 + muli.xyz VF08,VF10,I iaddiu VI03,VI03,0x00000003 + addi.w VF08,VF00,I mfir.w VF07,VI05 + ftoi0 VF10,VF06 div Q,VF00w,VF05w + mulq.xyz VF06,VF07,Q ibne VI02,VI04,EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__MAIN_LOOP + ftoi4.xyz VF07,VF09 ilw.w VI05,-4(VI02) +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI0: +; _LNOPT_w=[ ] 14 [13 0] 16 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI0] + NOP NOP + NOP NOP + max VF06,VF08,VF00 sq.xyz VF06,-6(VI03) + max.xyz VF08,VF05,VF05 sq VF07,-4(VI03) + NOP lq.xyz VF07,-2(VI02) + mulq.xyz VF08,VF08,Q NOP ; STALL_LATENCY ?2 + ftoi0 VF10,VF06 sq VF10,-5(VI03) + mulq.xyz VF06,VF07,Q iaddiu VI05,VI05,0x00007fff + NOP mfir.w VF07,VI05 + ftoi4.xyz VF07,VF08 NOP + NOP sq VF10,-2(VI03) + NOP sq.xyz VF06,-3(VI03) + NOP b EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EXIT_POINT + NOP sq VF07,-1(VI03) +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI1: +; _LNOPT_w=[ ] 11 [13 0] 16 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EPI1] + NOP NOP + NOP NOP + max VF06,VF08,VF00 NOP + max.xyz VF08,VF05,VF05 lq.xyz VF05,-2(VI02) + mulq.xyz VF08,VF08,Q NOP ; STALL_LATENCY ?3 + ftoi0 VF07,VF06 iaddiu VI05,VI05,0x00007fff + mulq.xyz VF06,VF05,Q mfir.w VF08,VI05 + ftoi4.xyz VF08,VF08 NOP ; STALL_LATENCY ?1 + NOP sq VF07,238(VI03) + NOP sq.xyz VF06,237(VI03) + NOP sq VF08,239(VI03) ; STALL_LATENCY ?1 +EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_fast_no_lights_pvc_tri_pp4_vcl_adcLoop_done_lid__EXIT_POINT] + NOP iaddiu VI01,VI01,0x000000ec + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_fast_no_lights_pvc_tri_pp4_vcl_15] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmFastNoLightsPVCTri_CodeEnd: +; iCount=150 +; register stats: +; 16 VU User integer +; 11 VU User floating point diff --git a/vu1/general.i b/vu1/general.i index d197f314..51c315e1 100644 --- a/vu1/general.i +++ b/vu1/general.i @@ -85,13 +85,13 @@ sqrt q, \atten[z] addw.x \atten, vf00, vf00 addq.y \atten, vf00, q - div q, vf00w, \atten[y] + div q, vf00[w], \atten[y] mulq.xyz \vert_to_light, \vert_to_light, q dot3_to_w \atten, \atten, \atten_coeff .endm .macro atten_color output, vert_color, atten - div q, vf00w, \atten[w] + div q, vf00[w], \atten[w] mulq.xyz \output, \vert_color, q .endm diff --git a/vu1/geometry.i b/vu1/geometry.i index 8abf73d9..44afa4bc 100644 --- a/vu1/geometry.i +++ b/vu1/geometry.i @@ -97,7 +97,7 @@ .macro xform_vert xformed_vert, vert_xform, vert mul_pt_mat_44 \xformed_vert, \vert_xform, \vert - div q, vf00w, \xformed_vert[w] + div q, vf00[w], \xformed_vert[w] mulq.xyz \xformed_vert, \xformed_vert, q ; FIXME: visible vertices are now in range (+-320, +-112, +-2^24-1) .endm diff --git a/vu1/io.i b/vu1/io.i index 1ac0571f..8e2271e3 100644 --- a/vu1/io.i +++ b/vu1/io.i @@ -13,7 +13,7 @@ ; fill in the nloop field in the giftag and store at ; top of the output buffer lq gif_tag\@, kGifTag(vi00) - mtir eop\@, gif_tag\@x + mtir eop\@, gif_tag\@[x] ior eop\@, eop\@, num_verts mfir.x gif_tag\@, eop\@ sq gif_tag\@, -1(next_output) diff --git a/vu1/math.i b/vu1/math.i index 53cb771a..ad0762c9 100644 --- a/vu1/math.i +++ b/vu1/math.i @@ -39,7 +39,7 @@ .endm .macro normalize_3 output, input, scalar - div q, vf00w, \scalar + div q, vf00[w], \scalar mulq.xyz \output, \input, q .endm diff --git a/vu1/vu1_context.i b/vu1/vu1_context.i new file mode 100644 index 00000000..41478f6c --- /dev/null +++ b/vu1/vu1_context.i @@ -0,0 +1,54 @@ +tLightPtrs_dir .equ 0 +tLightPtrs_point .equ 1 +tLightPtrs_spot .equ 2 +tLightPtrs_dummy .equ 3 + +kNumLights .equ kContextStart +kBackFaceCullMult .equ kNumLights + +kLightPointers0 .equ (kNumLights + 1) +kLightPointers1 .equ (kLightPointers0 + 1) +kLightPointers2 .equ (kLightPointers1 + 1) +kLightPointers3 .equ (kLightPointers2 + 1) +kLightPointers4 .equ (kLightPointers3 + 1) +kLightPointers5 .equ (kLightPointers4 + 1) +kLightPointers6 .equ (kLightPointers5 + 1) +kLightPointers7 .equ (kLightPointers6 + 1) + +kLightAmbientOffset .equ 0 +kLightDiffuseOffset .equ 1 +kLightSpecularOffset .equ 2 +kLightPosOffset .equ 3 +kLightSpotDirOffset .equ 4 +kLightAttenCoeffOffset .equ 5 + +kLightStructSize .equ 6 + +kLight0Base .equ (kLightPointers7 + 1) +kLight1Base .equ (kLight0Base + kLightStructSize) +kLight2Base .equ (kLight1Base + kLightStructSize) +kLight3Base .equ (kLight2Base + kLightStructSize) +kLight4Base .equ (kLight3Base + kLightStructSize) +kLight5Base .equ (kLight4Base + kLightStructSize) +kLight6Base .equ (kLight5Base + kLightStructSize) +kLight7Base .equ (kLight6Base + kLightStructSize) + +kGlobalAmbient .equ (kLight7Base + kLightStructSize) + +kClipToGsDepthOffset .equ kGlobalAmbient + +kMaterialEmission .equ (kGlobalAmbient + 1) +kMaterialAmbient .equ (kMaterialEmission + 1) +kMaterialDiffuse .equ (kMaterialAmbient + 1) +kMaterialSpecular .equ (kMaterialDiffuse + 1) + +kVertexXfrm .equ (kMaterialSpecular + 1) +kFixedVertToEye .equ (kVertexXfrm + 4) +kObjToWorldXfrmTrans .equ (kFixedVertToEye + 1) +kWorldToObjXfrm .equ (kObjToWorldXfrmTrans + 4) + +kGifTag .equ (kWorldToObjXfrm + 4) + +kClipInfo .equ (kGifTag + 1) + +kContextLength .equ (kClipInfo - kContextStart + 1) diff --git a/vu1/vu1_mem_linear.i b/vu1/vu1_mem_linear.i new file mode 100644 index 00000000..5d7a9e8d --- /dev/null +++ b/vu1/vu1_mem_linear.i @@ -0,0 +1,17 @@ +kContextStart .equ 0 + +.include "vu1_context.i" + +kDoubleBufBase .equ (kContextStart + kContextLength) +kDoubleBufOffset .equ ((1024 - kDoubleBufBase) / 2) +kDoubleBufSize .equ kDoubleBufOffset + +kNumVertices .equ 0 + +kStripADCs .equ (kNumVertices + 1) + +kInputStart .equ (kStripADCs + 4) + +kInputBufSize .equ (kDoubleBufSize / 2) +kOutputStart .equ (0 + kInputBufSize) +kOutputBufSize .equ (kDoubleBufSize - kOutputStart) diff --git a/vu1/vu1renderers.h b/vu1/vu1renderers.h index 9c269524..44e8c280 100644 --- a/vu1/vu1renderers.h +++ b/vu1/vu1renderers.h @@ -30,6 +30,7 @@ VU_FUNCTIONS(GeneralPVDiffQuad); VU_FUNCTIONS(SCEI); VU_FUNCTIONS(Fast); VU_FUNCTIONS(FastNoLights); +VU_FUNCTIONS(FastNoLightsPVCTri); VU_FUNCTIONS(Indexed); } From b1b682ec1ab6c50f12bb8ef6a6526821d233f3a0 Mon Sep 17 00:00:00 2001 From: iann Date: Wed, 1 Oct 2025 22:55:16 +0900 Subject: [PATCH 02/13] CI compile error fix --- examples/tricked_out/billboard_renderer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/tricked_out/billboard_renderer.cpp b/examples/tricked_out/billboard_renderer.cpp index 1cab68ff..e4395470 100644 --- a/examples/tricked_out/billboard_renderer.cpp +++ b/examples/tricked_out/billboard_renderer.cpp @@ -143,7 +143,7 @@ void CBillboardRenderer::InitContext(GLenum primType, uint32_t rcChanges, bool u packet += cpu_vec_4(0, 0, 1, 0); // set the color, max is 128 because this is unity when texture mapping is enabled - cpu_vec_4 color = glContext.GetMaterialManager().GetCurColor() * 128.0f; + cpu_vec_4 color = glContext.GetMaterialManager().GetCurMatColor() * 128.0f; packet += (unsigned int)color[0]; packet += (unsigned int)color[1]; packet += (unsigned int)color[2]; From 366246ff44a8f70606a661e4802e1c96a6c60e8f Mon Sep 17 00:00:00 2001 From: iann Date: Thu, 2 Oct 2025 00:14:35 +0900 Subject: [PATCH 03/13] add missing color function for opengl1.1 tests --- src/gmanager.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/src/gmanager.cpp b/src/gmanager.cpp index 20af3914..958ca625 100644 --- a/src/gmanager.cpp +++ b/src/gmanager.cpp @@ -403,6 +403,17 @@ void glColor4f(GLfloat red, GLfloat green, GLfloat blue, GLfloat alpha) gmanager.Color(cpu_vec_xyzw(red, green, blue, alpha)); } +//raylib need this function +void glColor4ub(GLubyte red, GLubyte green, GLubyte blue, GLubyte alpha) +{ + GL_FUNC_DEBUG("%s\n", __FUNCTION__); + float r = (float)red/255.0; + float b = (float)blue/255.0; + float g = (float)green/255.0; + float a = (float)alpha/255.0; + glColor4f(r,g,b,a); +} + void glColor4fv(const GLfloat* color) { GL_FUNC_DEBUG("%s\n", __FUNCTION__); From 091d2ebafced2c26c925b2b6b5148c90e196d453 Mon Sep 17 00:00:00 2001 From: iann Date: Thu, 2 Oct 2025 07:35:19 +0900 Subject: [PATCH 04/13] Fixed some incorrect logic, tested lesson03, lesson04, lesson05 now working a bit better. add potential TODOs, pushing to test on raylib cleanly from fork --- Makefile | 39 +++++++++++++++ examples/nehe/lesson02/lesson2.cpp | 2 +- examples/nehe/lesson03/lesson3.cpp | 3 -- examples/nehe/lesson04/lesson4.cpp | 5 -- examples/nehe/lesson05/lesson5.cpp | 6 ++- examples/pvc_box/Makefile | 6 +-- include/ps2gl/gblock.h | 4 +- include/ps2gl/immgmanager.h | 1 + src/base_renderer.cpp | 53 ++++++++++----------- src/immgmanager.cpp | 76 +++++++++++++++++++----------- src/renderermanager.cpp | 42 ++++++++--------- 11 files changed, 144 insertions(+), 93 deletions(-) diff --git a/Makefile b/Makefile index d5b28dea..7e203718 100644 --- a/Makefile +++ b/Makefile @@ -116,3 +116,42 @@ include $(PS2SDK)/samples/Makefile.eeglobal # you can wire %.vcl -> %_pp2.vcl directly and drop this rule %_pp1.vcl: %.vcl cat $< | sed 's/#include[ ]\+.\+// ; s/#define[ ]\+.\+// ; s|\(\.include[ ]\+\)"\([^/].\+\)"|\1"$( $@ + +# ---- build examples and create ./bin launchers --------------------- + +SHELL := /bin/bash +EXAMPLES_DIR := examples +BIN_DIR := bin +PCSX2_BIN ?= pcsx2 +PCSX2_FLAGS ?= -nogui -batch -fastboot -earlyconsolelog -logfile /dev/null + +.PHONY: examples clean-examples + +examples: + mkdir -p $(BIN_DIR) + # build each example (skip shared_code) + find $(EXAMPLES_DIR) -type f -name Makefile ! -path '*/shared_code/*' -print0 \ + | while IFS= read -r -d '' mf; do \ + dir=$$(dirname "$$mf"); \ + echo "==> make -C $$dir"; \ + $(MAKE) -C "$$dir"; \ + done; \ + # create launchers for every .elf (absolute path!) + find $(EXAMPLES_DIR) -type f -name '*.elf' -print0 \ + | while IFS= read -r -d '' elf; do \ + name=$$(basename "$${elf%.elf}"); \ + abs=$$(readlink -f "$$elf"); \ + echo "==> writing $(BIN_DIR)/$$name -> $$abs"; \ + printf '#!/usr/bin/env bash\n%s %s -elf %s "$$@"\n' \ + '$(PCSX2_BIN)' '$(PCSX2_FLAGS)' "$$abs" > "$(BIN_DIR)/$$name"; \ + chmod +x "$(BIN_DIR)/$$name"; \ + done + +clean-examples: + rm -rf $(BIN_DIR) + find $(EXAMPLES_DIR) -type f -name Makefile ! -path '*/shared_code/*' -print0 \ + | while IFS= read -r -d '' mf; do \ + dir=$$(dirname "$$mf"); \ + echo "==> clean $$dir"; \ + $(MAKE) -C "$$dir" clean || true; \ + done diff --git a/examples/nehe/lesson02/lesson2.cpp b/examples/nehe/lesson02/lesson2.cpp index 5bff9cf4..233c8c80 100644 --- a/examples/nehe/lesson02/lesson2.cpp +++ b/examples/nehe/lesson02/lesson2.cpp @@ -32,13 +32,13 @@ void display(void) // Create The Display Fu glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear Screen And Depth Buffer glLoadIdentity(); // Reset The Current Modelview Matrix glTranslatef(-1.5f, 0.0f, -6.0f); // Move Left 1.5 Units And Into The Screen 6.0 + //TODO: colors appearing when they shouldnt be!!! glBegin(GL_TRIANGLES); // Drawing Using Triangles glVertex3f(0.0f, 1.0f, 0.0f); // Top glVertex3f(-1.0f, -1.0f, 0.0f); // Bottom Left glVertex3f(1.0f, -1.0f, 0.0f); // Bottom Right glEnd(); // Finished Drawing The Triangle glTranslatef(3.0f, 0.0f, 0.0f); // Move Right 3 Units - //TODO: renderer ordering here in renderermanager causing confusion (non trivial path finding for renderers) glBegin(GL_QUADS); // Draw A Quad glVertex3f(-1.0f, 1.0f, 0.0f); // Top Left glVertex3f(1.0f, 1.0f, 0.0f); // Top Right diff --git a/examples/nehe/lesson03/lesson3.cpp b/examples/nehe/lesson03/lesson3.cpp index d3553942..d1783972 100644 --- a/examples/nehe/lesson03/lesson3.cpp +++ b/examples/nehe/lesson03/lesson3.cpp @@ -24,8 +24,6 @@ void init(GLvoid) // Create Some Everyday Functions glEnable(GL_DEPTH_TEST); // Enables Depth Testing glDepthFunc(GL_LEQUAL); // The Type Of Depth Testing To Do glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST); - - // TODO: next make a fast no lights for QUADS } void display(void) // Create The Display Function @@ -43,7 +41,6 @@ void display(void) // Create The Display Fu glEnd(); // Finished Drawing The Triangle glTranslatef(3.0f, 0.0f, 0.0f); // Move Right 3 Units glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only - //TODO: like lesson02, renderermanager causing potential issues with ordering glBegin(GL_QUADS); // Draw A Quad glVertex3f(-1.0f, 1.0f, 0.0f); // Top Left glVertex3f(1.0f, 1.0f, 0.0f); // Top Right diff --git a/examples/nehe/lesson04/lesson4.cpp b/examples/nehe/lesson04/lesson4.cpp index d2e254c0..756ea468 100644 --- a/examples/nehe/lesson04/lesson4.cpp +++ b/examples/nehe/lesson04/lesson4.cpp @@ -27,8 +27,6 @@ void init(GLvoid) // Create Some Everyday Functions glEnable(GL_DEPTH_TEST); // Enables Depth Testing glDepthFunc(GL_LEQUAL); // The Type Of Depth Testing To Do glHint(GL_PERSPECTIVE_CORRECTION_HINT, GL_NICEST); - - // TODO: next make a fast no lights for QUADS } void idle(void) { @@ -41,8 +39,6 @@ void display(void) // Create The Display Fu glPushMatrix(); glTranslatef(-1.5f, 0.0f, -6.0f); // Move Left 1.5 Units And Into The Screen 6.0 glRotatef(rtri, 0.0f, 1.0f, 0.0f); // Rotate The Triangle On The Y axis - constexpr float default_normal_direction_alignment[4] = {0.f, 0.f, 1.f, 0.f}; - glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); glBegin(GL_TRIANGLES); // Drawing Using Triangles glColor3f(1.0f, 0.0f, 0.0f); // Set The Color To Red glVertex3f(0.0f, 1.0f, 0.0f); // Top @@ -56,7 +52,6 @@ void display(void) // Create The Display Fu glTranslatef(1.5f, 0.0f, -6.0f); // Move Right 1.5 Units And Into The Screen 6.0 glRotatef(rquad, 1.0f, 0.0f, 0.0f); // Rotate The Quad On The X axis glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only - glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); glBegin(GL_QUADS); // Draw A Quad glVertex3f(-1.0f, 1.0f, 0.0f); // Top Left glVertex3f(1.0f, 1.0f, 0.0f); // Top Right diff --git a/examples/nehe/lesson05/lesson5.cpp b/examples/nehe/lesson05/lesson5.cpp index 2c872d8c..2e5dc5c5 100644 --- a/examples/nehe/lesson05/lesson5.cpp +++ b/examples/nehe/lesson05/lesson5.cpp @@ -38,6 +38,7 @@ void display(void) // Create The Display Fu glDisable(GL_COLOR_MATERIAL); glDisable(GL_LIGHTING); glDisable(GL_LIGHT0); + glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear Screen And Depth Buffer glLoadIdentity(); // Reset The Current Modelview Matrix glPushMatrix(); @@ -71,17 +72,18 @@ void display(void) // Create The Display Fu glEnd(); // Finished Drawing The Triangle // TODO: next make a fast no lights for QUADS + // for now its interesting to see the lighting based one for quads only here: glEnable(GL_COLOR_MATERIAL); glColorMaterial(GL_FRONT_AND_BACK, GL_DIFFUSE); glEnable(GL_LIGHTING); glEnable(GL_LIGHT0); + constexpr float default_normal_direction_alignment[4] = {0.f, 0.f, 1.f, 0.f}; + glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); glLoadIdentity(); // Reset The Current Modelview Matrix glTranslatef(1.5f, 0.0f, -6.0f); // Move Right 1.5 Units And Into The Screen 6.0 glRotatef(rquad, 1.0f, 0.0f, 0.0f); // Rotate The Quad On The X axis glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only - constexpr float default_normal_direction_alignment[4] = {0.f, 0.f, 1.f, 0.f}; - glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); glBegin(GL_QUADS); // Draw A Quad glColor3f(0.0f, 1.0f, 0.0f); // Set The Color To Blue glVertex3f(1.0f, 1.0f, -1.0f); // Top Right Of The Quad (Top) diff --git a/examples/pvc_box/Makefile b/examples/pvc_box/Makefile index 40f926c6..208cd0de 100644 --- a/examples/pvc_box/Makefile +++ b/examples/pvc_box/Makefile @@ -1,8 +1,8 @@ -EE_BIN = box.elf +EE_BIN = pvc_box.elf EE_CFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CFLAGS) EE_CXXFLAGS := -I$(PS2SDK)/ports/include -I../shared_code/ $(EE_CXXFLAGS) -EE_OBJS = lit_pvc_box.o ../shared_code/text_stuff.o -#EE_OBJS = unlit_pvc_box.o ../shared_code/text_stuff.o +#EE_OBJS = lit_pvc_box.o ../shared_code/text_stuff.o +EE_OBJS = unlit_pvc_box.o ../shared_code/text_stuff.o EE_LDFLAGS += -L$(PS2SDK)/ports/lib EE_LIBS = -lps2glut -lps2gl -lps2stuff -lpad -ldma diff --git a/include/ps2gl/gblock.h b/include/ps2gl/gblock.h index b121bfe8..c9107729 100644 --- a/include/ps2gl/gblock.h +++ b/include/ps2gl/gblock.h @@ -72,12 +72,12 @@ class CGeometryBlock { CGeometryBlock() { Reset(); } // get/set info about geometry - + //TODO: make these an actual proper validation based on the prim? inline void SetVerticesAreValid(bool valid) { AreNewVerticesValid = valid; } inline void SetNormalsAreValid(bool valid) { AreNewNormalsValid = valid; } inline void SetTexCoordsAreValid(bool valid) { AreNewTexCoordsValid = valid; } inline void SetColorsAreValid(bool valid) { AreNewColorsValid = valid; } - + //TODO: make these an actual proper validation based on the prim? inline bool GetVerticesAreValid() const { return AreVerticesValid; } inline bool GetNormalsAreValid() const { return AreNormalsValid; } inline bool GetTexCoordsAreValid() const { return AreTexCoordsValid; } diff --git a/include/ps2gl/immgmanager.h b/include/ps2gl/immgmanager.h index f0bac2b9..011b6491 100644 --- a/include/ps2gl/immgmanager.h +++ b/include/ps2gl/immgmanager.h @@ -27,6 +27,7 @@ class CImmGeomManager : public CGeomManager { CGeometryBlock Geometry; void CommitNewGeom(); + bool ColorVariesInPrim = false; public: CImmGeomManager(CGLContext& context, int immBufferQwordSize); diff --git a/src/base_renderer.cpp b/src/base_renderer.cpp index 742ba3ef..d3a6cb36 100644 --- a/src/base_renderer.cpp +++ b/src/base_renderer.cpp @@ -79,19 +79,29 @@ void CBaseRenderer::InitXferBlock(CVifSCDmaPacket& packet, WordsPerVertex = wordsPerVertex; GetUnpackAttribs(WordsPerVertex, VertexUnpackMode, VertexUnpackMask); - WordsPerNormal = (wordsPerNormal > 0) ? wordsPerNormal : 3; - GetUnpackAttribs(WordsPerNormal, NormalUnpackMode, NormalUnpackMask); - - WordsPerTexCoord = (wordsPerTex > 0) ? wordsPerTex : 2; - GetUnpackAttribs(WordsPerTexCoord, TexCoordUnpackMode, TexCoordUnpackMask); - - WordsPerColor = (wordsPerColor > 0) ? wordsPerColor : 3; - GetUnpackAttribs(WordsPerColor, ColorUnpackMode, ColorUnpackMask); - + // WordsPerNormal = (wordsPerNormal > 0) ? wordsPerNormal : 3; + // GetUnpackAttribs(WordsPerNormal, NormalUnpackMode, NormalUnpackMask); + WordsPerNormal = wordsPerNormal; + if (WordsPerNormal > 0) GetUnpackAttribs(WordsPerNormal, NormalUnpackMode, NormalUnpackMask); + + // WordsPerTexCoord = (wordsPerTex > 0) ? wordsPerTex : 2; + // GetUnpackAttribs(WordsPerTexCoord, TexCoordUnpackMode, TexCoordUnpackMask); + WordsPerTexCoord = wordsPerTex; + if (WordsPerTexCoord > 0) GetUnpackAttribs(WordsPerTexCoord, TexCoordUnpackMode, TexCoordUnpackMask); + + //TODO: still confusing, could be improved along with the above ones.. (also confirm this whole 4 vs 3 thing for RGBA and VU1...) + WordsPerColor = wordsPerColor; + if (WordsPerColor > 0) GetUnpackAttribs(WordsPerColor, ColorUnpackMode, ColorUnpackMask); + //NOTE FOR YOU: we can just remove these aswell? because they are decided at the CacheRendererState function which is called immediately after all the glEnable/glDisable context flags are called? + // where as Colors are not known to be Per vertex or constant yet until here? XferVertices = (wordsPerVertex > 0); XferNormals = (wordsPerNormal > 0) && pGLContext->GetImmLighting().GetLightingEnabled(); XferTexCoords = (wordsPerTex > 0); - XferColors = (WordsPerColor > 0) && (!pGLContext->GetImmLighting().GetLightingEnabled() || pGLContext->GetMaterialManager().GetColorMaterialEnabled()); + // NOTE FOR YOU:XferColors means: we need to send full 4 QuadWord Widths of colors PER VERTEX down a lane) thus: + // TRUE: when color material is enabled AND per vertex (is material always per vertex?) + // TRUE: when color geometry is set per vertex + // FALSE: when color geometry is CONSTANT -> colors will be set via a single value, not sent down the lanes...? + XferColors = (WordsPerColor > 0); // set up the row register to expand vectors with fewer than 4 elements @@ -196,21 +206,7 @@ void CBaseRenderer::XferBlock(CVifSCDmaPacket& packet, // int firstColor = firstElement; - // TODO: perhaps this is not good for EE? but i think this should also somehow default to white or the pushed registered color - // needs more testing with OpenGL expected behavior... - if (XferColors && colors == NULL) { - CDmaPacket& colorBuf = *ColorBuf; - colors = (void*)colorBuf.GetNextPtr(); - firstColor = 0; - for (int i = 0; i < numToAdd; ++i) { - colorBuf += CurGeomColor[0]; - colorBuf += CurGeomColor[1]; - colorBuf += CurGeomColor[2]; - } - } - if (colors != NULL && XferColors) { - mErrorIf(colors == NULL, "XferColors=true but no color data present"); XferVectors(packet, (unsigned int*)colors, firstColor, numToAdd, WordsPerColor, ColorUnpackMask, ColorUnpackMode, @@ -432,11 +428,10 @@ CBaseRenderer::BuildGiftag(GLenum primType) void CBaseRenderer::CacheRendererState() { - XferNormals = pGLContext->GetImmLighting().GetLightingEnabled(); - XferTexCoords = pGLContext->GetTexManager().GetTexEnabled(); - //TODO: something tells me this needs to be restructured i think, it feels too hidden for where its important... - XferColors = false; - // XferColors = pGLContext->GetMaterialManager().GetColorMaterialEnabled(); + //TODO: these are too confusing??? look at CommitNewGeom, and SyncRenderer and all that stuff + //XferNormals = pGLContext->GetImmLighting().GetLightingEnabled(); + //XferTexCoords = pGLContext->GetTexManager().GetTexEnabled(); + //TODO: cannot decide Xfercolor state yet because we dont know if its per vertex or constant yet...???? } void CBaseRenderer::Load() diff --git a/src/immgmanager.cpp b/src/immgmanager.cpp index f5754d39..0ff8e9d4 100644 --- a/src/immgmanager.cpp +++ b/src/immgmanager.cpp @@ -100,6 +100,7 @@ void CImmGeomManager::BeginGeom(GLenum mode) Geometry.SetPrimType(mode); Geometry.SetArrayType(kLinear); + ColorVariesInPrim = false; Geometry.SetNormals(CurNormalBuf->GetNextPtr()); Geometry.SetVertices(CurVertexBuf->GetNextPtr()); @@ -118,14 +119,16 @@ void CImmGeomManager::Vertex(cpu_vec_xyzw newVert) *CurTexCoordBuf += texCoord[0]; *CurTexCoordBuf += texCoord[1]; - cpu_vec_xyzw color = GetCurGeomColor(); - *CurColorBuf += color; + if (ColorVariesInPrim) { + const cpu_vec_xyzw color = GetCurGeomColor(); + *CurColorBuf += color; + Geometry.AddColors(); + } *CurVertexBuf += newVert; Geometry.AddVertices(); Geometry.AddNormals(); Geometry.AddTexCoords(); - Geometry.AddColors(); } void CImmGeomManager::Normal(cpu_vec_xyz normal) @@ -137,8 +140,19 @@ void CImmGeomManager::Normal(cpu_vec_xyz normal) void CImmGeomManager::Color(cpu_vec_xyzw color) { - SetCurGeomColor(color); - if (!InsideBeginEnd) { + if (InsideBeginEnd) { + if (!ColorVariesInPrim) { + int backFillVertexCount = Geometry.GetNumNewVertices() - Geometry.GetNumNewColors(); + const cpu_vec_xyzw currentColor = GetCurGeomColor(); + for (int i = 0; i < backFillVertexCount; ++i) { + *CurColorBuf += currentColor; + Geometry.AddColors(); + } + ColorVariesInPrim = true; + } + SetCurGeomColor(color); + } else { + SetCurGeomColor(color); GLContext.GetMaterialManager().Color(color); } } @@ -157,15 +171,18 @@ void CImmGeomManager::EndGeom() Geometry.SetNormalsAreValid(true); Geometry.SetTexCoordsAreValid(true); - // check colors - Geometry.SetColorsAreValid(Geometry.GetNumNewColors() > 0); - SyncColorMaterial(Geometry.GetNumNewColors() > 0); - Geometry.SetWordsPerVertex(4); Geometry.SetWordsPerNormal(3); Geometry.SetWordsPerTexCoord(2); - Geometry.SetWordsPerColor(Geometry.GetNumNewColors() > 0 ? 4 : 0); - + // check colors + // CHANGES!!! + const bool useColorLane = (!GLContext.GetImmLighting().GetLightingEnabled() && ColorVariesInPrim) || + (GLContext.GetImmLighting().GetLightingEnabled() && + GLContext.GetMaterialManager().GetColorMaterialEnabled() && + ColorVariesInPrim); + Geometry.SetColorsAreValid(useColorLane); + Geometry.SetWordsPerColor(useColorLane ? 4 : 0); + SyncColorMaterial(useColorLane); CommitNewGeom(); } @@ -189,26 +206,35 @@ void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) Geometry.SetVerticesAreValid(VertArray->GetVerticesAreValid()); Geometry.SetNormalsAreValid(VertArray->GetNormalsAreValid()); Geometry.SetTexCoordsAreValid(VertArray->GetTexCoordsAreValid()); - Geometry.SetColorsAreValid(Geometry.GetNumNewColors() > 0); - SyncColorMaterial(Geometry.GetNumNewColors() > 0); - // Geometry.SetColorsAreValid(VertArray->GetColorsAreValid()); + Geometry.SetColorsAreValid(VertArray->GetColorsAreValid()); Geometry.SetWordsPerVertex(VertArray->GetWordsPerVertex()); Geometry.SetWordsPerNormal(VertArray->GetWordsPerNormal()); Geometry.SetWordsPerTexCoord(VertArray->GetWordsPerTexCoord()); - Geometry.SetWordsPerColor(VertArray->GetWordsPerColor()); + // CHANGES!!! + // Geometry.SetWordsPerColor(VertArray->GetWordsPerColor()); + //OLD WAY!!! something about glColorPointer is what sets Colors for the VertArray!!?? + // SyncColorMaterial(VertArray->GetColors() != NULL); + const bool lighting = GLContext.GetImmLighting().GetLightingEnabled(); + const bool colormat = GLContext.GetMaterialManager().GetColorMaterialEnabled(); + + const bool arrayHasColors = + VertArray->GetColorsAreValid() && (VertArray->GetWordsPerColor() > 0); + + const bool useColorLane = + (!lighting && arrayHasColors) || + ( lighting && colormat && arrayHasColors); + + Geometry.SetColorsAreValid(useColorLane); + Geometry.SetWordsPerColor(useColorLane ? 4 : 0); + SyncColorMaterial(useColorLane); Geometry.AddVertices(count); Geometry.AddNormals(count); Geometry.AddTexCoords(count); - Geometry.AddColors(count); - + if (useColorLane) Geometry.AddColors(count); Geometry.AdjustNewGeomPtrs(first); - // do this before sync'ing the vu1 renderer in CommitNewGeom - SyncColorMaterial(VertArray->GetColorsAreValid()); - // SyncColorMaterial(VertArray->GetColors() != NULL); - CommitNewGeom(); } @@ -359,12 +385,8 @@ void CImmGeomManager::SyncRenderer() void CImmGeomManager::SyncRendererContext(GLenum primType) { // resend the rendering context if necessary - if (GLContext.GetRendererContextChanged() - || (RendererManager.IsCurRendererCustom() && UserRenderContextChanged)) { - RendererManager.GetCurRenderer().InitContext(primType, - GLContext.GetRendererContextChanged(), - UserRenderContextChanged); - + if (GLContext.GetRendererContextChanged() || (RendererManager.IsCurRendererCustom() && UserRenderContextChanged)) { + RendererManager.GetCurRenderer().InitContext(primType, GLContext.GetRendererContextChanged(), UserRenderContextChanged); GLContext.SetRendererContextChanged(false); UserRenderContextChanged = false; Prim = primType; diff --git a/src/renderermanager.cpp b/src/renderermanager.cpp index fc8b0fe3..00143883 100644 --- a/src/renderermanager.cpp +++ b/src/renderermanager.cpp @@ -68,6 +68,26 @@ CRendererManager::CRendererManager(CGLContext& context) "indexed")); } + // fast, no lights renderer + { + CRendererProps capabilities = { + PrimType : kPtsLinesStripsFans, + Lighting : 0, + NumDirLights : k3DirLights, + NumPtLights : 0, + Texture : 1, + Specular : 0, + PerVtxMaterial : kNoMaterial, + Clipping : kNonClipped, + CullFace : 0, + TwoSidedLighting : 0, + ArrayAccess : kLinear + }; + + RegisterDefaultRenderer(new CLinearRenderer(mVsmAddr(FastNoLights), mVsmSize(FastNoLights), capabilities, no_reqs, 3, 3, + kInputStart, kInputBufSize - kInputStart, + "fast, no lights")); + } // unlit renderer per vertex color // TODO: make sure this actually is ordered in here to work with other examples // (the pathing for this renderermanager approach is concerning how maybe this @@ -80,7 +100,7 @@ CRendererManager::CRendererManager(CGLContext& context) .Lighting = 0, .NumDirLights = k3DirLights, .NumPtLights = 0, - .Texture = 1, + .Texture = 0, .Specular = 0, .PerVtxMaterial = kNoMaterial, .Clipping = kNonClipped | kClipped, @@ -101,26 +121,6 @@ CRendererManager::CRendererManager(CGLContext& context) "fast no lights, pvc, tri") ); } - // fast, no lights renderer - { - CRendererProps capabilities = { - PrimType : kPtsLinesStripsFans, - Lighting : 0, - NumDirLights : k3DirLights, - NumPtLights : 0, - Texture : 1, - Specular : 0, - PerVtxMaterial : kNoMaterial, - Clipping : kNonClipped, - CullFace : 0, - TwoSidedLighting : 0, - ArrayAccess : kLinear - }; - - RegisterDefaultRenderer(new CLinearRenderer(mVsmAddr(FastNoLights), mVsmSize(FastNoLights), capabilities, no_reqs, 3, 3, - kInputStart, kInputBufSize - kInputStart, - "fast, no lights")); - } // fast renderer { CRendererProps capabilities = { From e6c832eb231146297abaf6aee3902344fe55b4ea Mon Sep 17 00:00:00 2001 From: iann Date: Thu, 2 Oct 2025 10:16:02 +0900 Subject: [PATCH 05/13] added the beginnings of a more clear lanes and quadword explicitness, i hope to try to also integrate it more throughout the full pipeline, (I havent gotten to the alignment and stride part yet, but i need to study the .i macros i think to be able to understand that) --- examples/nehe/lesson04/lesson4.cpp | 1 + include/ps2gl/gmanager.h | 41 ++++++++++++++++ src/base_renderer.cpp | 37 ++++++++------- src/immgmanager.cpp | 76 ++++++++++++++++++------------ 4 files changed, 107 insertions(+), 48 deletions(-) diff --git a/examples/nehe/lesson04/lesson4.cpp b/examples/nehe/lesson04/lesson4.cpp index 756ea468..26a0506f 100644 --- a/examples/nehe/lesson04/lesson4.cpp +++ b/examples/nehe/lesson04/lesson4.cpp @@ -52,6 +52,7 @@ void display(void) // Create The Display Fu glTranslatef(1.5f, 0.0f, -6.0f); // Move Right 1.5 Units And Into The Screen 6.0 glRotatef(rquad, 1.0f, 0.0f, 0.0f); // Rotate The Quad On The X axis glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only + //TODO: add fast no lights PVC for QUADS! glBegin(GL_QUADS); // Draw A Quad glVertex3f(-1.0f, 1.0f, 0.0f); // Top Left glVertex3f(1.0f, 1.0f, 0.0f); // Top Right diff --git a/include/ps2gl/gmanager.h b/include/ps2gl/gmanager.h index 931f6c67..cce547d4 100644 --- a/include/ps2gl/gmanager.h +++ b/include/ps2gl/gmanager.h @@ -19,6 +19,47 @@ /******************************************** * constants */ +typedef enum { + QW_NONE = 0x0, // ---- + QW_X = 0x1, // X--- + QW_XY = 0x3, // XY-- + QW_XYZ = 0x7, // XYZ- + QW_XYZW = 0xF // XYZW +} QuadWords; + +typedef struct { + QuadWords vertices; // legal: QW_XYZ or QW_XYZW + QuadWords normals; // legal: QW_NONE or QW_XYZ + QuadWords texcoords; // legal: QW_NONE or QW_XY + QuadWords colors; // legal: QW_NONE or QW_XYZW +} LaneConfig; + +static inline int verticesOk(QuadWords qw) { return (qw == QW_XYZ) || (qw == QW_XYZW); } +static inline int normalsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XYZ); } +static inline int texcoordsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XY); } +static inline int colorsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XYZW); } + +static inline int ValidateLaneConfig(const LaneConfig* lanes, const char* where) { + if (!verticesOk(lanes->vertices) || !normalsOk(lanes->normals) || !texcoordsOk(lanes->texcoords) || !colorsOk(lanes->colors)) { + mError("%s: illegal lane masks (V=%x N=%x T=%x C=%x)", where, lanes->vertices, lanes->normals, lanes->texcoords, lanes->colors); + return 0; + } + return 1; +} + +// Helper: map QW -> float components per vertex +static inline int QWToWords(QuadWords qw) { + switch (qw) { + case QW_NONE: return 0; + case QW_X: return 1; // rarely used; we don’t select it for V/N/T/C + case QW_XY: return 2; + case QW_XYZ: return 3; + case QW_XYZW: return 4; + default: return 0; + } +} + +static inline int LanePresent(QuadWords qw) { return (qw != QW_NONE); } /******************************************** * CVertArray diff --git a/src/base_renderer.cpp b/src/base_renderer.cpp index d3a6cb36..30808e17 100644 --- a/src/base_renderer.cpp +++ b/src/base_renderer.cpp @@ -76,32 +76,35 @@ void CBaseRenderer::InitXferBlock(CVifSCDmaPacket& packet, CurGeomColor = gmanager.GetCurGeomColor(); // get unpack modes/masks - WordsPerVertex = wordsPerVertex; + LaneConfig lanes; + lanes.vertices = (wordsPerVertex == 4) ? QW_XYZW : + (wordsPerVertex == 3) ? QW_XYZ : QW_NONE; + lanes.normals = (wordsPerNormal == 0) ? QW_NONE : + (wordsPerNormal == 3) ? QW_XYZ : QW_NONE; + lanes.texcoords = (wordsPerTex == 0) ? QW_NONE : + (wordsPerTex == 2) ? QW_XY : QW_NONE; + lanes.colors = (wordsPerColor == 0) ? QW_NONE : + (wordsPerColor == 4) ? QW_XYZW : QW_NONE; + + ValidateLaneConfig(&lanes, "InitXferBlock"); + WordsPerVertex = QWToWords(lanes.vertices); GetUnpackAttribs(WordsPerVertex, VertexUnpackMode, VertexUnpackMask); // WordsPerNormal = (wordsPerNormal > 0) ? wordsPerNormal : 3; - // GetUnpackAttribs(WordsPerNormal, NormalUnpackMode, NormalUnpackMask); - WordsPerNormal = wordsPerNormal; + WordsPerNormal = QWToWords(lanes.normals); if (WordsPerNormal > 0) GetUnpackAttribs(WordsPerNormal, NormalUnpackMode, NormalUnpackMask); // WordsPerTexCoord = (wordsPerTex > 0) ? wordsPerTex : 2; - // GetUnpackAttribs(WordsPerTexCoord, TexCoordUnpackMode, TexCoordUnpackMask); - WordsPerTexCoord = wordsPerTex; + WordsPerTexCoord = QWToWords(lanes.texcoords); if (WordsPerTexCoord > 0) GetUnpackAttribs(WordsPerTexCoord, TexCoordUnpackMode, TexCoordUnpackMask); - //TODO: still confusing, could be improved along with the above ones.. (also confirm this whole 4 vs 3 thing for RGBA and VU1...) - WordsPerColor = wordsPerColor; + WordsPerColor = QWToWords(lanes.colors); if (WordsPerColor > 0) GetUnpackAttribs(WordsPerColor, ColorUnpackMode, ColorUnpackMask); - //NOTE FOR YOU: we can just remove these aswell? because they are decided at the CacheRendererState function which is called immediately after all the glEnable/glDisable context flags are called? - // where as Colors are not known to be Per vertex or constant yet until here? - XferVertices = (wordsPerVertex > 0); - XferNormals = (wordsPerNormal > 0) && pGLContext->GetImmLighting().GetLightingEnabled(); - XferTexCoords = (wordsPerTex > 0); - // NOTE FOR YOU:XferColors means: we need to send full 4 QuadWord Widths of colors PER VERTEX down a lane) thus: - // TRUE: when color material is enabled AND per vertex (is material always per vertex?) - // TRUE: when color geometry is set per vertex - // FALSE: when color geometry is CONSTANT -> colors will be set via a single value, not sent down the lanes...? - XferColors = (WordsPerColor > 0); + + XferVertices = LanePresent(lanes.vertices); + XferNormals = LanePresent(lanes.normals); + XferTexCoords = LanePresent(lanes.texcoords); + XferColors = LanePresent(lanes.colors); // set up the row register to expand vectors with fewer than 4 elements diff --git a/src/immgmanager.cpp b/src/immgmanager.cpp index 0ff8e9d4..bd30c63f 100644 --- a/src/immgmanager.cpp +++ b/src/immgmanager.cpp @@ -171,18 +171,28 @@ void CImmGeomManager::EndGeom() Geometry.SetNormalsAreValid(true); Geometry.SetTexCoordsAreValid(true); - Geometry.SetWordsPerVertex(4); - Geometry.SetWordsPerNormal(3); - Geometry.SetWordsPerTexCoord(2); - // check colors - // CHANGES!!! - const bool useColorLane = (!GLContext.GetImmLighting().GetLightingEnabled() && ColorVariesInPrim) || - (GLContext.GetImmLighting().GetLightingEnabled() && - GLContext.GetMaterialManager().GetColorMaterialEnabled() && - ColorVariesInPrim); - Geometry.SetColorsAreValid(useColorLane); - Geometry.SetWordsPerColor(useColorLane ? 4 : 0); - SyncColorMaterial(useColorLane); + const bool useColorLane = + (!GLContext.GetImmLighting().GetLightingEnabled() && ColorVariesInPrim) || + ( GLContext.GetImmLighting().GetLightingEnabled() && + GLContext.GetMaterialManager().GetColorMaterialEnabled() && + ColorVariesInPrim); + + LaneConfig lanes; + lanes.vertices = QW_XYZW; + lanes.normals = GLContext.GetImmLighting().GetLightingEnabled() ? QW_XYZ : QW_NONE; + lanes.texcoords = GLContext.GetTexManager().GetTexEnabled() ? QW_XY : QW_NONE; + lanes.colors = useColorLane ? QW_XYZW : QW_NONE; + + ValidateLaneConfig(&lanes, "EndGeom"); + + Geometry.SetColorsAreValid(LanePresent(lanes.colors)); + SyncColorMaterial(LanePresent(lanes.colors)); + + Geometry.SetWordsPerVertex(QWToWords(lanes.vertices)); + Geometry.SetWordsPerNormal(QWToWords(lanes.normals)); + Geometry.SetWordsPerTexCoord(QWToWords(lanes.texcoords)); + Geometry.SetWordsPerColor(QWToWords(lanes.colors)); + CommitNewGeom(); } @@ -206,33 +216,37 @@ void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) Geometry.SetVerticesAreValid(VertArray->GetVerticesAreValid()); Geometry.SetNormalsAreValid(VertArray->GetNormalsAreValid()); Geometry.SetTexCoordsAreValid(VertArray->GetTexCoordsAreValid()); - Geometry.SetColorsAreValid(VertArray->GetColorsAreValid()); - Geometry.SetWordsPerVertex(VertArray->GetWordsPerVertex()); - Geometry.SetWordsPerNormal(VertArray->GetWordsPerNormal()); - Geometry.SetWordsPerTexCoord(VertArray->GetWordsPerTexCoord()); - // CHANGES!!! - // Geometry.SetWordsPerColor(VertArray->GetWordsPerColor()); - //OLD WAY!!! something about glColorPointer is what sets Colors for the VertArray!!?? - // SyncColorMaterial(VertArray->GetColors() != NULL); - const bool lighting = GLContext.GetImmLighting().GetLightingEnabled(); - const bool colormat = GLContext.GetMaterialManager().GetColorMaterialEnabled(); + const bool arrayHasColors = VertArray->GetColorsAreValid() && (VertArray->GetWordsPerColor() > 0); - const bool arrayHasColors = - VertArray->GetColorsAreValid() && (VertArray->GetWordsPerColor() > 0); + LaneConfig lanes; + lanes.vertices = (VertArray->GetWordsPerVertex() == 4) ? QW_XYZW : + (VertArray->GetWordsPerVertex() == 3) ? QW_XYZ : QW_NONE; + lanes.normals = (VertArray->GetNormalsAreValid() && + VertArray->GetWordsPerNormal() == 3 && + GLContext.GetImmLighting().GetLightingEnabled()) ? QW_XYZ : QW_NONE; + lanes.texcoords = (VertArray->GetTexCoordsAreValid() && + VertArray->GetWordsPerTexCoord() == 2) ? QW_XY : QW_NONE; + lanes.colors = + ((!GLContext.GetImmLighting().GetLightingEnabled() && arrayHasColors) || + ( GLContext.GetImmLighting().GetLightingEnabled() && + GLContext.GetMaterialManager().GetColorMaterialEnabled() && + arrayHasColors)) ? QW_XYZW : QW_NONE; - const bool useColorLane = - (!lighting && arrayHasColors) || - ( lighting && colormat && arrayHasColors); + ValidateLaneConfig(&lanes, "DrawArrays"); + + Geometry.SetWordsPerVertex(QWToWords(lanes.vertices)); + Geometry.SetWordsPerNormal(QWToWords(lanes.normals)); + Geometry.SetWordsPerTexCoord(QWToWords(lanes.texcoords)); + Geometry.SetWordsPerColor(QWToWords(lanes.colors)); - Geometry.SetColorsAreValid(useColorLane); - Geometry.SetWordsPerColor(useColorLane ? 4 : 0); - SyncColorMaterial(useColorLane); + Geometry.SetColorsAreValid(LanePresent(lanes.colors)); + SyncColorMaterial(LanePresent(lanes.colors)); Geometry.AddVertices(count); Geometry.AddNormals(count); Geometry.AddTexCoords(count); - if (useColorLane) Geometry.AddColors(count); + if (LanePresent(lanes.colors)) Geometry.AddColors(count); Geometry.AdjustNewGeomPtrs(first); CommitNewGeom(); From 91ad7026cf65070f16e0e07e857b5288c7cc757a Mon Sep 17 00:00:00 2001 From: iann Date: Sat, 4 Oct 2025 02:26:10 +0900 Subject: [PATCH 06/13] raylib tests for pvc cube working, needs clean up, test for remote now --- examples/nehe/lesson02/lesson2.cpp | 1 - examples/nehe/lesson04/lesson4.cpp | 1 - examples/nehe/lesson05/lesson5.cpp | 11 ++++++----- include/ps2gl/gblock.h | 2 -- include/ps2gl/renderer.h | 2 +- src/base_renderer.cpp | 4 ++-- src/immgmanager.cpp | 2 ++ src/renderermanager.cpp | 7 +------ 8 files changed, 12 insertions(+), 18 deletions(-) diff --git a/examples/nehe/lesson02/lesson2.cpp b/examples/nehe/lesson02/lesson2.cpp index 233c8c80..1eb77de6 100644 --- a/examples/nehe/lesson02/lesson2.cpp +++ b/examples/nehe/lesson02/lesson2.cpp @@ -32,7 +32,6 @@ void display(void) // Create The Display Fu glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT); // Clear Screen And Depth Buffer glLoadIdentity(); // Reset The Current Modelview Matrix glTranslatef(-1.5f, 0.0f, -6.0f); // Move Left 1.5 Units And Into The Screen 6.0 - //TODO: colors appearing when they shouldnt be!!! glBegin(GL_TRIANGLES); // Drawing Using Triangles glVertex3f(0.0f, 1.0f, 0.0f); // Top glVertex3f(-1.0f, -1.0f, 0.0f); // Bottom Left diff --git a/examples/nehe/lesson04/lesson4.cpp b/examples/nehe/lesson04/lesson4.cpp index 26a0506f..756ea468 100644 --- a/examples/nehe/lesson04/lesson4.cpp +++ b/examples/nehe/lesson04/lesson4.cpp @@ -52,7 +52,6 @@ void display(void) // Create The Display Fu glTranslatef(1.5f, 0.0f, -6.0f); // Move Right 1.5 Units And Into The Screen 6.0 glRotatef(rquad, 1.0f, 0.0f, 0.0f); // Rotate The Quad On The X axis glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only - //TODO: add fast no lights PVC for QUADS! glBegin(GL_QUADS); // Draw A Quad glVertex3f(-1.0f, 1.0f, 0.0f); // Top Left glVertex3f(1.0f, 1.0f, 0.0f); // Top Right diff --git a/examples/nehe/lesson05/lesson5.cpp b/examples/nehe/lesson05/lesson5.cpp index 2e5dc5c5..ea69e05b 100644 --- a/examples/nehe/lesson05/lesson5.cpp +++ b/examples/nehe/lesson05/lesson5.cpp @@ -71,6 +71,12 @@ void display(void) // Create The Display Fu glVertex3f(-1.0f, -1.0f, 1.0f); // Right Of Triangle (Left) glEnd(); // Finished Drawing The Triangle + + + glLoadIdentity(); // Reset The Current Modelview Matrix + glTranslatef(1.5f, 0.0f, -6.0f); // Move Right 1.5 Units And Into The Screen 6.0 + glRotatef(rquad, 1.0f, 0.0f, 0.0f); // Rotate The Quad On The X axis + glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only // TODO: next make a fast no lights for QUADS // for now its interesting to see the lighting based one for quads only here: glEnable(GL_COLOR_MATERIAL); @@ -79,11 +85,6 @@ void display(void) // Create The Display Fu glEnable(GL_LIGHT0); constexpr float default_normal_direction_alignment[4] = {0.f, 0.f, 1.f, 0.f}; glLightfv(GL_LIGHT0, GL_POSITION, default_normal_direction_alignment); - - glLoadIdentity(); // Reset The Current Modelview Matrix - glTranslatef(1.5f, 0.0f, -6.0f); // Move Right 1.5 Units And Into The Screen 6.0 - glRotatef(rquad, 1.0f, 0.0f, 0.0f); // Rotate The Quad On The X axis - glColor3f(0.5f, 0.5f, 1.0f); // Set The Color To Blue One Time Only glBegin(GL_QUADS); // Draw A Quad glColor3f(0.0f, 1.0f, 0.0f); // Set The Color To Blue glVertex3f(1.0f, 1.0f, -1.0f); // Top Right Of The Quad (Top) diff --git a/include/ps2gl/gblock.h b/include/ps2gl/gblock.h index c9107729..2567b36b 100644 --- a/include/ps2gl/gblock.h +++ b/include/ps2gl/gblock.h @@ -72,12 +72,10 @@ class CGeometryBlock { CGeometryBlock() { Reset(); } // get/set info about geometry - //TODO: make these an actual proper validation based on the prim? inline void SetVerticesAreValid(bool valid) { AreNewVerticesValid = valid; } inline void SetNormalsAreValid(bool valid) { AreNewNormalsValid = valid; } inline void SetTexCoordsAreValid(bool valid) { AreNewTexCoordsValid = valid; } inline void SetColorsAreValid(bool valid) { AreNewColorsValid = valid; } - //TODO: make these an actual proper validation based on the prim? inline bool GetVerticesAreValid() const { return AreVerticesValid; } inline bool GetNormalsAreValid() const { return AreNormalsValid; } inline bool GetTexCoordsAreValid() const { return AreTexCoordsValid; } diff --git a/include/ps2gl/renderer.h b/include/ps2gl/renderer.h index 37557b62..f4970c4f 100644 --- a/include/ps2gl/renderer.h +++ b/include/ps2gl/renderer.h @@ -68,7 +68,7 @@ typedef enum { k1PtLight = 1 << 0, k8PtLights = 1 << 2 } tNumPtLights; typedef enum { kNoMaterial = 1 << 0, - kDiffuse = 1 << 1, + kDiffuse = 1 << 1, //TODO: it seems important to consolidate where "colors" and "light" differ still kSpecular = 1 << 2 } tPerVtxMaterial; typedef enum { kLinear = 1 << 0, diff --git a/src/base_renderer.cpp b/src/base_renderer.cpp index 30808e17..c7b099e7 100644 --- a/src/base_renderer.cpp +++ b/src/base_renderer.cpp @@ -424,8 +424,8 @@ CBaseRenderer::BuildGiftag(GLenum primType) bool alpha = drawContext.GetBlendEnabled(); unsigned int nreg = OutputQuadsPerVert; - GS::tPrim prim = { prim_type : primType, iip : smoothShading, tme : useTexture, fge : 0, abe : alpha, aa1 : 0, fst : 0, ctxt : 0, fix : 0 }; - tGifTag giftag = { NLOOP : 0, EOP : 1, pad0 : 0, id : 0, PRE : 1, PRIM : *(uint64_t*)&prim, FLG : 0, NREG : nreg, REGS0 : 2, REGS1 : 1, REGS2 : 4 }; + GS::tPrim prim = { .prim_type = primType, .iip = smoothShading, .tme = useTexture, .fge = 0, .abe = alpha, .aa1 = 0, .fst = 0, .ctxt = 0, .fix = 0 }; + tGifTag giftag = { .NLOOP = 0, .EOP = 1, .pad0 = 0, .id = 0, .PRE = 1, .PRIM = *(uint64_t*)&prim, .FLG = 0, .NREG = nreg, .REGS0 = 2, .REGS1 = 1, .REGS2 = 4 }; return giftag; } diff --git a/src/immgmanager.cpp b/src/immgmanager.cpp index bd30c63f..900fe4e4 100644 --- a/src/immgmanager.cpp +++ b/src/immgmanager.cpp @@ -187,6 +187,7 @@ void CImmGeomManager::EndGeom() Geometry.SetColorsAreValid(LanePresent(lanes.colors)); SyncColorMaterial(LanePresent(lanes.colors)); + RendererManager.PerVtxMaterialChanged(useColorLane ? RendererProps::kDiffuse : RendererProps::kNoMaterial); Geometry.SetWordsPerVertex(QWToWords(lanes.vertices)); Geometry.SetWordsPerNormal(QWToWords(lanes.normals)); @@ -242,6 +243,7 @@ void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) Geometry.SetColorsAreValid(LanePresent(lanes.colors)); SyncColorMaterial(LanePresent(lanes.colors)); + RendererManager.PerVtxMaterialChanged(LanePresent(lanes.colors) ? RendererProps::kDiffuse : RendererProps::kNoMaterial); Geometry.AddVertices(count); Geometry.AddNormals(count); diff --git a/src/renderermanager.cpp b/src/renderermanager.cpp index 00143883..37600e9c 100644 --- a/src/renderermanager.cpp +++ b/src/renderermanager.cpp @@ -89,11 +89,6 @@ CRendererManager::CRendererManager(CGLContext& context) "fast, no lights")); } // unlit renderer per vertex color - // TODO: make sure this actually is ordered in here to work with other examples - // (the pathing for this renderermanager approach is concerning how maybe this - // could capture cases that dont want this renderer? - - //TODO: something could be making this horrendously slow, will need to continue to investigate performance of the lighting vcl and the no lights pvc... { CRendererProps capabilities = { .PrimType = kTriangles, @@ -102,7 +97,7 @@ CRendererManager::CRendererManager(CGLContext& context) .NumPtLights = 0, .Texture = 0, .Specular = 0, - .PerVtxMaterial = kNoMaterial, + .PerVtxMaterial = kDiffuse, //TODO: this is just to allow for only certain targets to get pvc (its a hack to get behavior, clean up next .Clipping = kNonClipped | kClipped, .CullFace = 0, .TwoSidedLighting = 0, From bc2c66f1f7b51c6864f079aeb97989a319577e58 Mon Sep 17 00:00:00 2001 From: iann Date: Wed, 8 Oct 2025 11:07:18 +0900 Subject: [PATCH 07/13] Adding quite a lot of experimental code, will need to come back through and clean this up --- Makefile | 8 +- include/ps2gl/dlgmanager.h | 4 +- include/ps2gl/fixed_function.h | 237 ++++++++++++++++++ include/ps2gl/glcontext.h | 9 + include/ps2gl/gmanager.h | 52 +--- include/ps2gl/immgmanager.h | 5 +- src/base_renderer.cpp | 6 + src/dlgmanager.cpp | 4 +- src/glcontext.cpp | 30 +++ src/gmanager.cpp | 61 ++++- src/immgmanager.cpp | 162 +++++++++---- src/indexed_renderer.cpp | 55 +++-- src/renderermanager.cpp | 52 +++- vu1/indexed.vcl | 25 ++ vu1/indexed_no_lights_pvc.vcl | 152 ++++++++++++ vu1/indexed_vcl.vsm | 426 +++++++++++++++++---------------- vu1/vu1renderers.h | 1 + 17 files changed, 970 insertions(+), 319 deletions(-) create mode 100644 include/ps2gl/fixed_function.h create mode 100644 vu1/indexed_no_lights_pvc.vcl diff --git a/Makefile b/Makefile index 7e203718..a2fa9187 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,7 @@ RENDERERS = \ general_tri \ general \ indexed \ + indexed_no_lights_pvc \ scei \ fast_no_lights_pvc_tri @@ -77,6 +78,11 @@ realclean: clean include $(PS2SDK)/Defs.make include $(PS2SDK)/samples/Makefile.eeglobal +.PHONY: one +one: + @test -n "$(VCL)" || (echo "Usage: make $@ VCL=path/to/foo.vcl"; exit 1) + $(MAKE) $(VCL:.vcl=_vcl.vsm) $(VCL:.vcl=.vo) + ## dvp-as origin in ps2dev toolchain: https://github.com/ps2dev/ps2toolchain/blob/master/scripts/001-dvp.sh ## Build .vo (VU object) from a compiled .vsm %.vo: %_vcl.vsm @@ -90,7 +96,7 @@ include $(PS2SDK)/samples/Makefile.eeglobal # GCC / CPP flags (-E, -P, -imacros): https://gcc.gnu.org/onlinedocs/cpp/Invocation.html#Invocation # -E = preprocess only, -P = strip #line, -imacros includes macros without writing #include -%indexed_pp4.vcl: %indexed_pp3.vcl +vu1/indexed%_pp4.vcl: vu1/indexed%_pp3.vcl cat $< | cc -E -P -imacros vu1/vu1_mem_indexed.h -o $@ - # GCC / CPP flags (-E, -P, -imacros): https://gcc.gnu.org/onlinedocs/cpp/Invocation.html#Invocation diff --git a/include/ps2gl/dlgmanager.h b/include/ps2gl/dlgmanager.h index 4136c77b..466878f3 100644 --- a/include/ps2gl/dlgmanager.h +++ b/include/ps2gl/dlgmanager.h @@ -54,8 +54,8 @@ class CDListGeomManager : public CGeomManager { void TexCoord(float u, float v); void Color(cpu_vec_xyzw color); void EndGeom(); - void DrawArrays(GLenum mode, int first, int count); - void DrawIndexedArrays(GLenum primType, + void LinearArraysGeomStage(GLenum mode, int first, int count); + void IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices); void Flush(); diff --git a/include/ps2gl/fixed_function.h b/include/ps2gl/fixed_function.h new file mode 100644 index 00000000..55bd348f --- /dev/null +++ b/include/ps2gl/fixed_function.h @@ -0,0 +1,237 @@ +#ifndef ps2gl_fixed_function_h +#define ps2gl_fixed_function_h + +#pragma once +#include +#include +#ifndef GL_DIFFUSE + #define GL_DIFFUSE 0x1201 +#endif +// typedef unsigned int GLenum; //TODO??? +#include +#include "ps2gl/gmanager.h" +#include "ps2gl/gblock.h" +#include "ps2gl/renderermanager.h" +#include "ps2gl/glcontext.h" + +typedef enum { + FIXED_FUNCTION_ATTR_NONE = 0, + FIXED_FUNCTION_ATTR_CONSTANT, + FIXED_FUNCTION_ATTR_ARRAY +} FixedFunctionDataSrc; + +typedef enum { + FIXED_FUNCTION_COLOR_CONSTANT = 0, // (currentColor * tint), lighting OFF + FIXED_FUNCTION_COLOR_ARRAY, // (vertexColor * tint), lighting OFF + FIXED_FUNCTION_COLOR_LIT // lighting ON +} FixedFunctionColor; //TODO I get that this is for emphasis but it seems ugly idk + +typedef struct { + bool texture2dEnabled; + bool lightingEnabled; + bool colorMaterialEnabled; + GLenum colorMaterialMode; // expected: GL_DIFFUSE or 0 + + bool vertexArrayEnabled; + bool normalArrayEnabled; + bool texcoordArrayEnabled; + bool colorArrayEnabled; + + bool diffuseTextureBound; // a valid diffuse map is actually bound + + float currentColor[4]; // current GL color + float currentNormal[3]; // current GL normal (rarely used in this policy) + float currentTexCoord[2]; // current GL texcoord (we do not rely on this) + + // App-level tint (raylib tint, or 1,1,1,1 if already folded into currentColor) + float tintRgba[4]; + + bool immediateColorVariesInPrimitive; +} FixedFunctionConditions; //TODO: should we merge this with state somehow? + +typedef struct { + FixedFunctionDataSrc vertexSrc; // always ARRAY + FixedFunctionDataSrc normalSrc; // ARRAY or NONE + FixedFunctionDataSrc texcoordSrc; // ARRAY or NONE + FixedFunctionDataSrc colorSrc; // CONSTANT or ARRAY (ARRAY also means “varies per vertex” in immediate mode) + + FixedFunctionColor ffColor; + + bool V, N, T, C; + + bool textureFlag; + bool lightingFlag; + bool colorMaterialAffectsDiffuse; // true if per-vertex color should drive diffuse when lighting is on +} FixedFunctionState; + +typedef enum { + QW_NONE = 0x0, // ---- + QW_X = 0x1, // X--- + QW_XY = 0x3, // XY-- + QW_XYZ = 0x7, // XYZ- + QW_XYZW = 0xF // XYZW +} QuadWords; + +typedef struct { + QuadWords vertices; // legal: QW_XYZ or QW_XYZW // TODO: remove (qw == QW_XYZ) I THINK!! + QuadWords normals; // legal: QW_NONE or QW_XYZ + QuadWords texcoords; // legal: QW_NONE or QW_XY + QuadWords colors; // legal: QW_NONE or QW_XYZW +} LaneConfig; + +static inline int verticesOk(QuadWords qw) { return (qw == QW_XYZ) || (qw == QW_XYZW); } // TODO: remove (qw == QW_XYZ) +static inline int normalsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XYZ); } +static inline int texcoordsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XY); } +static inline int colorsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XYZW); } + +static inline int ValidateLaneConfig(const LaneConfig* lanes, const char* where) { + if (!verticesOk(lanes->vertices) || !normalsOk(lanes->normals) || !texcoordsOk(lanes->texcoords) || !colorsOk(lanes->colors)) { + mError("%s: illegal lane masks (V=%x N=%x T=%x C=%x)", where, lanes->vertices, lanes->normals, lanes->texcoords, lanes->colors); + return 0; + } + return 1; +} + +static inline int QWToWords(QuadWords qw) { + switch (qw) { + case QW_NONE: return 0; + case QW_X: return 1; //TODO: just for brevity + case QW_XY: return 2; + case QW_XYZ: return 3; + case QW_XYZW: return 4; + default: return 0; + } +} + +static inline int LanePresent(QuadWords qw) { return (qw != QW_NONE); } + +static inline FixedFunctionState evaluate(const FixedFunctionConditions* conditions) +{ + FixedFunctionState state; + memset(&state, 0, sizeof(state)); + + // V: always array & lane present + state.vertexSrc = FIXED_FUNCTION_ATTR_ARRAY; + state.V = true; + + // T: requires GL texture enable, a real diffuse bound, AND a texcoord array + state.textureFlag = (conditions->texture2dEnabled && conditions->diffuseTextureBound && conditions->texcoordArrayEnabled); + state.texcoordSrc = state.textureFlag ? FIXED_FUNCTION_ATTR_ARRAY : FIXED_FUNCTION_ATTR_NONE; + state.T = (state.texcoordSrc == FIXED_FUNCTION_ATTR_ARRAY); + + // N: lighting is only meaningful if we have a normal array + const bool lightingFeasible = (conditions->lightingEnabled && conditions->normalArrayEnabled); + state.lightingFlag = lightingFeasible; + state.normalSrc = state.lightingFlag ? FIXED_FUNCTION_ATTR_ARRAY : FIXED_FUNCTION_ATTR_NONE; + state.N = (state.normalSrc == FIXED_FUNCTION_ATTR_ARRAY); + + // C: color path & lane selection + const bool perVertexColorSupplyPresent = conditions->colorArrayEnabled || conditions->immediateColorVariesInPrimitive; + + if (!state.lightingFlag) { + if (perVertexColorSupplyPresent) { + state.colorSrc = FIXED_FUNCTION_ATTR_ARRAY; + state.ffColor = FIXED_FUNCTION_COLOR_ARRAY; // (v.color * tint) + state.C = true; // need per-vertex color lane + } else { + state.colorSrc = FIXED_FUNCTION_ATTR_CONSTANT; + state.ffColor = FIXED_FUNCTION_COLOR_CONSTANT;// (currentColor * tint) + state.C = false; // constant → no C lane + } + } else { + // Lighting ON: final color is computed. PVC only matters for material mapping. + state.colorSrc = perVertexColorSupplyPresent ? FIXED_FUNCTION_ATTR_ARRAY : FIXED_FUNCTION_ATTR_CONSTANT; + state.ffColor = FIXED_FUNCTION_COLOR_LIT; + state.C = false; + } + + // ColorMaterial routing (lighting must be ON, color material enabled and mode=DIFFUSE, + // and there must be a per-vertex supply to make it meaningful) + state.colorMaterialAffectsDiffuse = + (state.lightingFlag && + conditions->colorMaterialEnabled && + conditions->colorMaterialMode == GL_DIFFUSE && + perVertexColorSupplyPresent); + + return state; +} + +static inline void capture( + FixedFunctionConditions* conditions, + CGLContext& glContext, + const CVertArray& vertArray, + bool diffuseTextureIsBound, // see §3 for how to feed this + const float tintRgba[4], // see §3 for how to feed this + bool immediateColorVariesInPrimitive) // pass true in EndGeom() if colors changed mid-primitive +{ + memset(conditions, 0, sizeof(*conditions)); + + conditions->texture2dEnabled = glContext.IsTextureEnabled(); + conditions->lightingEnabled = glContext.IsLightingEnabled(); + conditions->colorMaterialEnabled = glContext.IsColorMaterialEnabled(); + conditions->colorMaterialMode = glContext.GetColorMaterialMode(); + + // Arrays + conditions->vertexArrayEnabled = vertArray.GetVerticesAreValid(); + conditions->normalArrayEnabled = vertArray.GetNormalsAreValid(); + conditions->texcoordArrayEnabled = vertArray.GetTexCoordsAreValid(); + conditions->colorArrayEnabled = vertArray.GetColorsAreValid() && vertArray.GetWordsPerColor() == 4; + + conditions->diffuseTextureBound = diffuseTextureIsBound; + + //cpu_vec_xyzw current = glContext.GetGeomManager().GetCurGeomColor(); + cpu_vec_xyzw currentColor = glContext.GetCurrentGeomColor(); + conditions->currentColor[0] = currentColor[0]; + conditions->currentColor[1] = currentColor[1]; + conditions->currentColor[2] = currentColor[2]; + conditions->currentColor[3] = currentColor[3]; + + //TODO WHAT? WHY WOULDNT WE JUST GET FROM THE actual defaults? (I ADDED BELOW AND FOR TEXCOORd/... + // conditions->currentNormal[0] = 0.0f; + // conditions->currentNormal[1] = 0.0f; + // conditions->currentNormal[2] = 1.0f; + cpu_vec_xyz currentNormal = glContext.GetCurrentNormal(); + conditions->currentNormal[0] = currentNormal[0]; + conditions->currentNormal[1] = currentNormal[1]; + conditions->currentNormal[2] = currentNormal[2]; + + const float* currentTexCoord = glContext.GetCurrentTexCoord(); + conditions->currentTexCoord[0] = currentTexCoord[0]; + conditions->currentTexCoord[1] = currentTexCoord[1]; + + memcpy(conditions->tintRgba, tintRgba, sizeof(float)*4); + conditions->immediateColorVariesInPrimitive = immediateColorVariesInPrimitive; +} + +static inline void apply( + const FixedFunctionState& state, + CGeometryBlock& geometry, + CRendererManager& rendererManager, + CGLContext& glContext) //TODO what would we use glContext here for again??? +{ + //TODO: can we integrate the QuadWord stuff and validation here? would it be helpful? i would like to have at least those enums rather than the shitty ints here that are ambigious... + geometry.SetWordsPerVertex(4); + geometry.SetWordsPerNormal(state.N ? 3 : 0); + geometry.SetWordsPerTexCoord(state.T ? 2 : 0); + geometry.SetWordsPerColor(state.C ? 4 : 0); + + geometry.SetVerticesAreValid(true); + geometry.SetNormalsAreValid(state.N); + geometry.SetTexCoordsAreValid(state.T); + geometry.SetColorsAreValid(state.C); + //TODO: still this i am ughhhhhhhhhhhhhhhh + // Hook ColorMaterial (diffuse) mapping in lit path + // This mirrors what your SyncColorMaterial used to do but is now explicit and single-sourced. + if (state.colorMaterialAffectsDiffuse) { + rendererManager.PerVtxMaterialChanged(RendererProps::kDiffuse); + } else { + rendererManager.PerVtxMaterialChanged(RendererProps::kNoMaterial); + } + + // TODO: do something here? to strictly align renderer choice with *effective* lighting, + // you can keep using GLContext as-is (works functionally), or go further: + // - add an "effective lighting" override the renderers consult. + // OTHERWISE DO NOTHING???? WHAT??? +} + +#endif // ps2gl_fixed_function_h diff --git a/include/ps2gl/glcontext.h b/include/ps2gl/glcontext.h index d9ed0f62..dae779e3 100644 --- a/include/ps2gl/glcontext.h +++ b/include/ps2gl/glcontext.h @@ -13,6 +13,7 @@ #include "ps2s/gsmem.h" #include "ps2s/packet.h" +#include "ps2s/cpu_vector.h" #include "GL/gl.h" @@ -408,6 +409,14 @@ class CGLContext { void WaitForVSync(); void SwapBuffers(); + + bool IsTextureEnabled(); + bool IsLightingEnabled(); + bool IsColorMaterialEnabled(); + GLenum GetColorMaterialMode(); + cpu_vec_xyzw GetCurrentGeomColor(); + cpu_vec_xyz GetCurrentNormal(); + const float* GetCurrentTexCoord(); }; // global pointer to the GLContext diff --git a/include/ps2gl/gmanager.h b/include/ps2gl/gmanager.h index cce547d4..92ec036d 100644 --- a/include/ps2gl/gmanager.h +++ b/include/ps2gl/gmanager.h @@ -19,48 +19,7 @@ /******************************************** * constants */ -typedef enum { - QW_NONE = 0x0, // ---- - QW_X = 0x1, // X--- - QW_XY = 0x3, // XY-- - QW_XYZ = 0x7, // XYZ- - QW_XYZW = 0xF // XYZW -} QuadWords; - -typedef struct { - QuadWords vertices; // legal: QW_XYZ or QW_XYZW - QuadWords normals; // legal: QW_NONE or QW_XYZ - QuadWords texcoords; // legal: QW_NONE or QW_XY - QuadWords colors; // legal: QW_NONE or QW_XYZW -} LaneConfig; - -static inline int verticesOk(QuadWords qw) { return (qw == QW_XYZ) || (qw == QW_XYZW); } -static inline int normalsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XYZ); } -static inline int texcoordsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XY); } -static inline int colorsOk(QuadWords qw) { return (qw == QW_NONE) || (qw == QW_XYZW); } - -static inline int ValidateLaneConfig(const LaneConfig* lanes, const char* where) { - if (!verticesOk(lanes->vertices) || !normalsOk(lanes->normals) || !texcoordsOk(lanes->texcoords) || !colorsOk(lanes->colors)) { - mError("%s: illegal lane masks (V=%x N=%x T=%x C=%x)", where, lanes->vertices, lanes->normals, lanes->texcoords, lanes->colors); - return 0; - } - return 1; -} - -// Helper: map QW -> float components per vertex -static inline int QWToWords(QuadWords qw) { - switch (qw) { - case QW_NONE: return 0; - case QW_X: return 1; // rarely used; we don’t select it for V/N/T/C - case QW_XY: return 2; - case QW_XYZ: return 3; - case QW_XYZW: return 4; - default: return 0; - } -} - -static inline int LanePresent(QuadWords qw) { return (qw != QW_NONE); } - +enum ColorSrc : uint8_t { kColor_Float = 0, kColor_UByte = 1 }; /******************************************** * CVertArray */ @@ -69,6 +28,7 @@ class CVertArray { void *Vertices, *Normals, *TexCoords, *Colors; bool VerticesAreValid, NormalsAreValid, TexCoordsAreValid, ColorsAreValid; char WordsPerVertex, WordsPerNormal, WordsPerTexCoord, WordsPerColor; + ColorSrc ColorSrcType; public: CVertArray(); @@ -92,6 +52,8 @@ class CVertArray { inline void SetNormals(void* newPtr) { Normals = newPtr; } inline void SetTexCoords(void* newPtr) { TexCoords = newPtr; } inline void SetColors(void* newPtr) { Colors = newPtr; } + inline ColorSrc GetColorSrcType() const { return ColorSrcType; } + inline void SetColorSrc(ColorSrc src) { ColorSrcType = src; } inline int GetWordsPerVertex() const { return WordsPerVertex; } inline int GetWordsPerNormal() const { return WordsPerNormal; } @@ -239,8 +201,8 @@ class CGeomManager { virtual void TexCoord(float u, float v) = 0; virtual void Color(cpu_vec_xyzw color) = 0; virtual void EndGeom() = 0; - virtual void DrawArrays(GLenum mode, int first, int count) = 0; - virtual void DrawIndexedArrays(GLenum primType, + virtual void LinearArraysGeomStage(GLenum mode, int first, int count) = 0; + virtual void IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices) = 0; @@ -248,3 +210,5 @@ class CGeomManager { }; #endif // ps2gl_gmanager_h + +// #include "ps2gl/fixed_function.h" \ No newline at end of file diff --git a/include/ps2gl/immgmanager.h b/include/ps2gl/immgmanager.h index 011b6491..9b61de7b 100644 --- a/include/ps2gl/immgmanager.h +++ b/include/ps2gl/immgmanager.h @@ -8,6 +8,7 @@ #define ps2gl_immgmanager_h #include "ps2gl/gmanager.h" +#include "ps2gl/fixed_function.h" /******************************************** * CImmGeomManager - the immediate renderer @@ -76,8 +77,8 @@ class CImmGeomManager : public CGeomManager { void TexCoord(float u, float v); void Color(cpu_vec_xyzw color); void EndGeom(); - void DrawArrays(GLenum mode, int first, int count); - void DrawIndexedArrays(GLenum primType, + void LinearArraysGeomStage(GLenum mode, int first, int count); + void IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices); void Flush(); diff --git a/src/base_renderer.cpp b/src/base_renderer.cpp index c7b099e7..0dbf52f6 100644 --- a/src/base_renderer.cpp +++ b/src/base_renderer.cpp @@ -281,7 +281,11 @@ void CBaseRenderer::AddVu1RendererContext(CVifSCDmaPacket& packet, GLenum primTy packet += numPts; packet += numSpots; } else { + /* packet += (uint64_t)0; + */ + packet += 0; + packet += 0; packet += 0; } @@ -423,6 +427,8 @@ CBaseRenderer::BuildGiftag(GLenum primType) bool useTexture = glContext.GetTexManager().GetTexEnabled(); bool alpha = drawContext.GetBlendEnabled(); unsigned int nreg = OutputQuadsPerVert; + // bool flip = drawContext.CurFrameMem != drawContext.Frame0Mem; + // GS::tPrim prim = { .prim_type = primType, .iip = smoothShading, .tme = useTexture, .fge = 0, .abe = alpha, .aa1 = 0, .fst = 0, .ctxt = flip, .fix = 0 }; GS::tPrim prim = { .prim_type = primType, .iip = smoothShading, .tme = useTexture, .fge = 0, .abe = alpha, .aa1 = 0, .fst = 0, .ctxt = 0, .fix = 0 }; tGifTag giftag = { .NLOOP = 0, .EOP = 1, .pad0 = 0, .id = 0, .PRE = 1, .PRIM = *(uint64_t*)&prim, .FLG = 0, .NREG = nreg, .REGS0 = 2, .REGS1 = 1, .REGS2 = 4 }; diff --git a/src/dlgmanager.cpp b/src/dlgmanager.cpp index 1b5c3d2a..6fa96d53 100644 --- a/src/dlgmanager.cpp +++ b/src/dlgmanager.cpp @@ -247,7 +247,7 @@ void CDListGeomManager::EndGeom() * DrawArrays */ -void CDListGeomManager::DrawArrays(GLenum mode, int first, int count) +void CDListGeomManager::LinearArraysGeomStage(GLenum mode, int first, int count) { if (Prim != mode) PrimChanged(mode); @@ -280,7 +280,7 @@ void CDListGeomManager::DrawArrays(GLenum mode, int first, int count) CommitNewGeom(); } -void CDListGeomManager::DrawIndexedArrays(GLenum primType, +void CDListGeomManager::IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices) { diff --git a/src/glcontext.cpp b/src/glcontext.cpp index b513dd05..43fd1dea 100644 --- a/src/glcontext.cpp +++ b/src/glcontext.cpp @@ -789,3 +789,33 @@ const GLubyte* glGetString(GLenum name) mNotImplemented(); return (GLubyte*)"not implemented"; } + +bool CGLContext::IsTextureEnabled() { + return GetTexManager().GetTexEnabled(); +} + +bool CGLContext::IsLightingEnabled() { + return GetImmLighting().GetLightingEnabled(); +} + +bool CGLContext::IsColorMaterialEnabled() { + return GetMaterialManager().GetColorMaterialEnabled(); +} + +GLenum CGLContext::GetColorMaterialMode() { + return GetMaterialManager().GetColorMaterialMode(); +} + +cpu_vec_xyzw CGLContext::GetCurrentGeomColor() { + return GetGeomManager().GetCurGeomColor(); +} + +cpu_vec_xyz CGLContext::GetCurrentNormal() { + return GetGeomManager().GetCurNormal(); +} + +const float* CGLContext::GetCurrentTexCoord() { + return GetGeomManager().GetCurTexCoord(); +} + + diff --git a/src/gmanager.cpp b/src/gmanager.cpp index 958ca625..056e3ec6 100644 --- a/src/gmanager.cpp +++ b/src/gmanager.cpp @@ -30,6 +30,7 @@ CVertArray::CVertArray() VerticesAreValid = NormalsAreValid = TexCoordsAreValid = ColorsAreValid = false; WordsPerVertex = WordsPerTexCoord = WordsPerColor = 0; WordsPerNormal = 3; // not set by NormalPointer + ColorSrcType = kColor_Float; } /******************************************** @@ -155,23 +156,39 @@ void glTexCoordPointer(GLint size, GLenum type, * @param stride must be zero. Non-zero strides are unsupported and likely * to remain so. */ -void glColorPointer(GLint size, GLenum type, - GLsizei stride, const GLvoid* ptr) +void glColorPointer(GLint size, GLenum type, GLsizei stride, const GLvoid* ptr) { GL_FUNC_DEBUG("%s\n", __FUNCTION__); + mDebugPrint("glColorPointer: size=%d type=0x%X stride=%d ptr=%p\n", (int)size, (unsigned)type, (int)stride, ptr); if (stride != 0) { mNotImplemented("stride must be 0"); return; } if (type != GL_FLOAT) { + if (type == GL_UNSIGNED_BYTE) { + if (ptr) { + const unsigned char* colorSample = (const unsigned char*)ptr; + mDebugPrint("glColorPointer: SAMPLE u8=(%u,%u,%u,%u)\n", colorSample[0], colorSample[1], colorSample[2], colorSample[3]); + } + CVertArray& vertArray = pGLContext->GetGeomManager().GetVertArray(); + vertArray.SetColors((void*)ptr); + vertArray.SetWordsPerColor(4); + vertArray.SetColorSrc(kColor_UByte); + return; + } mNotImplemented("type must be float"); return; } + if (ptr) { + const float* colorSample = (const float*)ptr; + mDebugPrint("glColorPointer: SAMPLE f32=(%.3f,%.3f,%.3f,%.3f)\n", colorSample[0], colorSample[1], colorSample[2], colorSample[3]); + } CVertArray& vertArray = pGLContext->GetGeomManager().GetVertArray(); vertArray.SetColors((void*)ptr); vertArray.SetWordsPerColor(size); + mDebugPrint("glColorPointer: BOUND F32 colors (wpc=%d)\n", (int)size); } /** @@ -190,17 +207,49 @@ void glDrawArrays(GLenum mode, GLint first, GLsizei count) GL_FUNC_DEBUG("%s\n", __FUNCTION__); CGeomManager& gmanager = pGLContext->GetGeomManager(); - gmanager.DrawArrays(mode, first, count); + gmanager.LinearArraysGeomStage(mode, first, count); } /** - * This is not implemented yet + * This is now being implemented/experimental */ void glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indices) { GL_FUNC_DEBUG("%s\n", __FUNCTION__); - mError("glDrawElements is a placeholder ATM and should not be called"); + if (type != GL_UNSIGNED_SHORT) { + mNotImplemented("glDrawElements only supports GL_UNSIGNED_SHORT for now"); + return; + } + + const GLushort* indices_u16 = (const GLushort*)indices; + GLushort max = 0; + + for (GLsizei i = 0; i < count; ++i) { + if (indices_u16[i] > max) max = indices_u16[i]; + } + + const int numVertices = (int)max + 1; + + if (max <= 255) { + static uint8_t* indices_u8_scratch = NULL; + static int scratchCapacity = 0; + if (scratchCapacity < count) { + delete[] indices_u8_scratch; + indices_u8_scratch = new uint8_t[count]; + scratchCapacity = (int)count; + } + for (GLsizei i = 0; i < count; ++i) { + indices_u8_scratch[i] = (uint8_t)indices_u16[i]; + } + mDebugPrint("glDrawElements: primType=%d count=%d type=GL_UNSIGNED_SHORT maxIndex=%u numVertices=%d (u8 path)\n", (int)mode, (int)count, (unsigned)max, (int)numVertices); + CGeomManager& gmanager = pGLContext->GetGeomManager(); + gmanager.IndexedArraysGeomStage(mode, (int)count, indices_u8_scratch, numVertices); + } else { + mDebugPrint("glDrawElements: primType=%d count=%d type=GL_UNSIGNED_SHORT maxIndex=%u numVertices=%d (u16 path)\n", (int)mode, (int)count, (unsigned)max, (int)numVertices); + CGeomManager& gmanager = pGLContext->GetGeomManager(); + gmanager.IndexedArraysGeomStage(mode, (int)count, (const unsigned char*)indices, numVertices); + } } /** @@ -464,7 +513,7 @@ void pglDrawIndexedArrays(GLenum primType, int numIndices, const unsigned char* indices, int numVertices) { - pGLContext->GetGeomManager().DrawIndexedArrays(primType, numIndices, indices, numVertices); + pGLContext->GetGeomManager().IndexedArraysGeomStage(primType, numIndices, indices, numVertices); } /** diff --git a/src/immgmanager.cpp b/src/immgmanager.cpp index 900fe4e4..1a8ad487 100644 --- a/src/immgmanager.cpp +++ b/src/immgmanager.cpp @@ -198,11 +198,32 @@ void CImmGeomManager::EndGeom() } /******************************************** - * DrawArrays + * LinearArraysGeomStage */ -void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) +void CImmGeomManager::LinearArraysGeomStage(GLenum mode, int first, int count) { + //DrawingLinearArray(); + mDebugPrint("LinearArraysGeomStage: ENTER mode=%d first=%d count=%d\n", (int)mode, first, count); + void* beforeColors = VertArray->GetColors(); + mDebugPrint("LinearArraysGeomStage: BEFORE colors=%p valid=%d wpc=%d src=%d\n", + beforeColors, + (int)VertArray->GetColorsAreValid(), + VertArray->GetWordsPerColor(), + VertArray->GetColorSrcType()); + if (beforeColors && VertArray->GetColorsAreValid() && VertArray->GetWordsPerColor() >= 4) { + if (VertArray->GetColorSrcType() == kColor_UByte) { + const unsigned char* sampleColor = (const unsigned char*)beforeColors + 4 * first; + mDebugPrint("LinearArraysGeomStage: BEFORE sample[%d] u8=(%u,%u,%u,%u)\n", + first, sampleColor[0], sampleColor[1], sampleColor[2], sampleColor[3]); + } else { + //TODO: fix the casting insanity to make it more clear? + const float* sampleColor = (const float*)beforeColors + 4 * first; + mDebugPrint("LinearArraysGeomStage: BEFORE sample[%d] f32=(%.3f,%.3f,%.3f,%.3f)\n", + first, sampleColor[0], sampleColor[1], sampleColor[2], sampleColor[3]); + } + } + if (Prim != mode) PrimChanged(mode); @@ -212,7 +233,32 @@ void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) Geometry.SetVertices(VertArray->GetVertices()); Geometry.SetNormals(VertArray->GetNormals()); Geometry.SetTexCoords(VertArray->GetTexCoords()); - Geometry.SetColors(VertArray->GetColors()); + + void* colorsPtr = VertArray->GetColors(); + const bool colorArrayEnabled = VertArray->GetColorsAreValid() && VertArray->GetWordsPerColor() == 4; + + if (colorArrayEnabled && VertArray->GetColorSrcType() == kColor_UByte) { + float* bufStart = (float*)CurColorBuf->GetNextPtr(); + const unsigned char* srcColorBuf_U8 = (const unsigned char*)colorsPtr; + const int totalCount = first + count; + for (int i = 0; i < totalCount; ++i) { + const unsigned char* colorChannels = srcColorBuf_U8 + 4*i; + *CurColorBuf += colorChannels[0] / 255.0f; + *CurColorBuf += colorChannels[1] / 255.0f; + *CurColorBuf += colorChannels[2] / 255.0f; + *CurColorBuf += colorChannels[3] / 255.0f; + } + const float* sampleColor = bufStart + 4*first; + mDebugPrint("LinearArraysGeomStage: AFTER-CONVERT colors=%p sample[%d] f32=(%.3f,%.3f,%.3f,%.3f)\n", + (void*)bufStart, first, sampleColor[0], sampleColor[1], sampleColor[2], sampleColor[3]); + colorsPtr = bufStart; + } else { + mDebugPrint("LinearArraysGeomStage: no UBYTE->FLOAT conversion (enabled=%d src=%d wpc=%d)\n", + (int)colorArrayEnabled, VertArray->GetColorSrcType(), VertArray->GetWordsPerColor()); + } + + Geometry.SetColors(colorsPtr); + mDebugPrint("LinearArraysGeomStage: Geometry.SetColors(%p)\n", colorsPtr); Geometry.SetVerticesAreValid(VertArray->GetVerticesAreValid()); Geometry.SetNormalsAreValid(VertArray->GetNormalsAreValid()); @@ -234,7 +280,7 @@ void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) GLContext.GetMaterialManager().GetColorMaterialEnabled() && arrayHasColors)) ? QW_XYZW : QW_NONE; - ValidateLaneConfig(&lanes, "DrawArrays"); + ValidateLaneConfig(&lanes, "LinearArraysGeomStage"); Geometry.SetWordsPerVertex(QWToWords(lanes.vertices)); Geometry.SetWordsPerNormal(QWToWords(lanes.normals)); @@ -251,46 +297,17 @@ void CImmGeomManager::DrawArrays(GLenum mode, int first, int count) if (LanePresent(lanes.colors)) Geometry.AddColors(count); Geometry.AdjustNewGeomPtrs(first); + mDebugPrint("LinearArraysGeomStage: EXIT\n"); CommitNewGeom(); } -void CImmGeomManager::DrawingIndexedArray() -{ - if (!LastArrayAccessIsValid || !LastArrayAccessWasIndexed) { - GLContext.ArrayAccessChanged(); - RendererManager.ArrayAccessChanged(RendererProps::kIndexed); - LastArrayAccessIsValid = true; - } - LastArrayAccessWasIndexed = true; -} - -void CImmGeomManager::DrawIndexedArrays(GLenum primType, +void CImmGeomManager::IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices) { - /* - // make sure there's no pending geometry - Flush(); - - // do these before sync'ing the vu1 renderer - SyncColorMaterial(VertArray->GetColors() != NULL); - DrawingIndexedArray(); + mDebugPrint("IndexedArraysGeomStage: ENTER primType=%d numIdx=%d numVtx=%d\n", (int)primType, (int)numIndices, (int)numVertices); - // now update the renderer and render - - bool rendererChanged = RendererManager.UpdateRenderer(); - - if ( rendererChanged ) { - RendererManager.LoadRenderer(GLContext.GetVif1Packet()); - } - SyncRendererContext(primType); - SyncGsContext(); - - RendererManager.GetCurRenderer().DrawIndexedArrays( primType, numIndices, indices, - numVertices, *VertArray ); - */ - if (Prim != primType) - PrimChanged(primType); + if (Prim != primType) PrimChanged(primType); Geometry.SetPrimType(primType); Geometry.SetArrayType(kIndexed); @@ -298,30 +315,71 @@ void CImmGeomManager::DrawIndexedArrays(GLenum primType, Geometry.SetVertices(VertArray->GetVertices()); Geometry.SetNormals(VertArray->GetNormals()); Geometry.SetTexCoords(VertArray->GetTexCoords()); - Geometry.SetColors(VertArray->GetColors()); + + void* colorsPtr = VertArray->GetColors(); + const bool colorArrayEnabled = VertArray->GetColorsAreValid() && VertArray->GetWordsPerColor() == 4; + if (colorArrayEnabled && VertArray->GetColorSrcType() == kColor_UByte) { + float* bufStart = (float*)CurColorBuf->GetNextPtr(); + const unsigned char* srcColorBuf_U8 = (const unsigned char*)colorsPtr; + for (int i = 0; i < numVertices; ++i) { + const unsigned char* colorChannels = srcColorBuf_U8 + 4*i; + *CurColorBuf += (float)colorChannels[0] / 255.0f; + *CurColorBuf += (float)colorChannels[1] / 255.0f; + *CurColorBuf += (float)colorChannels[2] / 255.0f; + *CurColorBuf += (float)colorChannels[3] / 255.0f; + } + if (numVertices > 0) { + const float* sampleColor = bufStart; + mDebugPrint("IndexedArraysGeomStage: AFTER-CONVERT colors=%p sample[0] f32=(%.3f,%.3f,%.3f,%.3f)\n", (void*)bufStart, sampleColor[0], sampleColor[1], sampleColor[2], sampleColor[3]); + } + colorsPtr = bufStart; + } else { + mDebugPrint("IndexedArraysGeomStage: no UBYTE->FLOAT conversion (enabled=%d src=%d wpc=%d)\n",(int)colorArrayEnabled, VertArray->GetColorSrcType(), VertArray->GetWordsPerColor()); + } + Geometry.SetColors(colorsPtr); Geometry.SetVerticesAreValid(VertArray->GetVerticesAreValid()); Geometry.SetNormalsAreValid(VertArray->GetNormalsAreValid()); Geometry.SetTexCoordsAreValid(VertArray->GetTexCoordsAreValid()); - Geometry.SetColorsAreValid(VertArray->GetColorsAreValid()); - Geometry.SetWordsPerVertex(VertArray->GetWordsPerVertex()); - Geometry.SetWordsPerNormal(VertArray->GetWordsPerNormal()); - Geometry.SetWordsPerTexCoord(VertArray->GetWordsPerTexCoord()); - Geometry.SetWordsPerColor(VertArray->GetWordsPerColor()); + const bool arrayHasColors = VertArray->GetColorsAreValid() && (VertArray->GetWordsPerColor() > 0); + + LaneConfig lanes; + lanes.vertices = (VertArray->GetWordsPerVertex() == 4) ? QW_XYZW : + (VertArray->GetWordsPerVertex() == 3) ? QW_XYZ : QW_NONE; + lanes.normals = (VertArray->GetNormalsAreValid() && + VertArray->GetWordsPerNormal() == 3 && + GLContext.GetImmLighting().GetLightingEnabled()) ? QW_XYZ : QW_NONE; + lanes.texcoords = (VertArray->GetTexCoordsAreValid() && + VertArray->GetWordsPerTexCoord() == 2) ? QW_XY : QW_NONE; + lanes.colors = + ((!GLContext.GetImmLighting().GetLightingEnabled() && arrayHasColors) || + ( GLContext.GetImmLighting().GetLightingEnabled() && + GLContext.GetMaterialManager().GetColorMaterialEnabled() && + arrayHasColors)) ? QW_XYZW : QW_NONE; + + ValidateLaneConfig(&lanes, "IndexedArraysGeomStage"); + + Geometry.SetWordsPerVertex(QWToWords(lanes.vertices)); + Geometry.SetWordsPerNormal(QWToWords(lanes.normals)); + Geometry.SetWordsPerTexCoord(QWToWords(lanes.texcoords)); + Geometry.SetWordsPerColor(QWToWords(lanes.colors)); + + Geometry.SetColorsAreValid(LanePresent(lanes.colors)); + SyncColorMaterial(LanePresent(lanes.colors)); + RendererManager.PerVtxMaterialChanged(LanePresent(lanes.colors) ? RendererProps::kDiffuse + : RendererProps::kNoMaterial); Geometry.AddVertices(numVertices); Geometry.AddNormals(numVertices); Geometry.AddTexCoords(numVertices); - Geometry.AddColors(numVertices); + if (LanePresent(lanes.colors)) Geometry.AddColors(numVertices); Geometry.SetNumIndices(numIndices); Geometry.SetIndices(indices); Geometry.SetIStripLengths(NULL); - // do this before sync'ing the vu1 renderer in CommitNewGeom - SyncColorMaterial(VertArray->GetColors() != NULL); - + mDebugPrint("IndexedArraysGeomStage: EXIT\n"); CommitNewGeom(); } @@ -329,6 +387,16 @@ void CImmGeomManager::DrawIndexedArrays(GLenum primType, * common and synchronization code */ +void CImmGeomManager::DrawingIndexedArray() +{ + if (!LastArrayAccessIsValid || !LastArrayAccessWasIndexed) { + GLContext.ArrayAccessChanged(); + RendererManager.ArrayAccessChanged(RendererProps::kIndexed); + LastArrayAccessIsValid = true; + } + LastArrayAccessWasIndexed = true; +} + void CImmGeomManager::DrawingLinearArray() { if (!LastArrayAccessIsValid || LastArrayAccessWasIndexed) { diff --git a/src/indexed_renderer.cpp b/src/indexed_renderer.cpp index ab4e4ff2..02301423 100644 --- a/src/indexed_renderer.cpp +++ b/src/indexed_renderer.cpp @@ -105,28 +105,53 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) CVifSCDmaPacket& packet = pGLContext->GetVif1Packet(); + const int kLaneWords = 3; // xyz for all lanes used here + int wordsPerVert = block.GetWordsPerVertex(); - int wordsPerNormal = (block.GetNormalsAreValid()) ? block.GetWordsPerNormal() : 0; - int wordsPerTex = (block.GetTexCoordsAreValid()) ? block.GetWordsPerTexCoord() : 0; - int wordsPerColor = (block.GetColorsAreValid()) ? block.GetWordsPerColor() : 0; + int wordsPerNormal = block.GetNormalsAreValid() ? block.GetWordsPerNormal() : kLaneWords; + int wordsPerTex = block.GetTexCoordsAreValid() ? block.GetWordsPerTexCoord() : kLaneWords; + int wordsPerColor = block.GetColorsAreValid() ? block.GetWordsPerColor() : kLaneWords; InitXferBlock(packet, wordsPerVert, wordsPerNormal, wordsPerTex, wordsPerColor); - for (int curArray = 0; curArray < block.GetNumArrays(); curArray++) { + // Static scratch for dummy lanes (no STL allowed) + static float* sDummyN = nullptr; + static float* sDummySTQ = nullptr; + static float* sDummyPVC = nullptr; + static int sDummyCap = 0; + + for (int curArray = 0; curArray < block.GetNumArrays(); ++curArray) { + + int numVertices = block.GetArrayLength(curArray); + int needFloats = numVertices * kLaneWords; + + if (sDummyCap < needFloats) { + delete[] sDummyN; sDummyN = new float[needFloats]; + delete[] sDummySTQ; sDummySTQ = new float[needFloats]; + delete[] sDummyPVC; sDummyPVC = new float[needFloats]; + sDummyCap = needFloats; + } + + // Fill dummies: N=(0,0,0), STQ=(0,0,1), PVC=(1,1,1) + for (int i = 0; i < numVertices; ++i) { + sDummyN[i*3+0] = 0.0f; sDummyN[i*3+1] = 0.0f; sDummyN[i*3+2] = 0.0f; + sDummySTQ[i*3+0] = 0.0f; sDummySTQ[i*3+1] = 0.0f; sDummySTQ[i*3+2] = 1.0f; + sDummyPVC[i*3+0] = 1.0f; sDummyPVC[i*3+1] = 1.0f; sDummyPVC[i*3+2] = 1.0f; + } - const void *normals, *vertices, *texCoords, *colors; - vertices = block.GetVerticesAreValid() ? block.GetVertices(curArray) : NULL; - normals = block.GetNormalsAreValid() ? block.GetNormals(curArray) : NULL; - texCoords = block.GetTexCoordsAreValid() ? block.GetTexCoords(curArray) : NULL; - colors = block.GetColorsAreValid() ? block.GetColors(curArray) : NULL; + const void* vertices = block.GetVerticesAreValid() ? block.GetVertices(curArray) : nullptr; + const void* normals = block.GetNormalsAreValid() ? block.GetNormals(curArray) : sDummyN; + const void* texCoords= block.GetTexCoordsAreValid() ? block.GetTexCoords(curArray) : sDummySTQ; + const void* colors = block.GetColorsAreValid() ? block.GetColors(curArray) : sDummyPVC; + // Set lane stride: 4 qwords per vertex (V,N,STQ,PVC) packet.Cnt(); - packet.Stcycl(1, 3).Nop(); + packet.Stcycl(1, InputQuadsPerVert).Nop(); packet.CloseTag(); int numIndices = block.GetNumIndices(curArray); const void* indices = block.GetIndices(curArray); - int numVertices = block.GetArrayLength(curArray); + // int numVertices = block.GetArrayLength(curArray); XferBlock(packet, vertices, normals, texCoords, colors, @@ -150,14 +175,16 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) } packet.CloseTag(); - int numIndexQwords = numIndices / 16 + (numIndices % 16 > 0); + // int numIndexQwords = numIndices / 16 + (numIndices % 16 > 0); + int numIndexQwords = (numIndices + 7) / 8; packet.Ref(Core::MakePtrNormal((const unsigned int*)indices), numIndexQwords); { packet.Stcycl(1, 1); packet.OpenUnpack(Vifs::UnpackModes::s_16, kInputGeomStart, Packet::kDoubleBuff, Packet::kMasked); // packet.CloseUnpack( numIndices/2 ); - packet.CloseUnpack(numIndexQwords * 8); + // packet.CloseUnpack(numIndexQwords * 8); + packet.CloseUnpack(numIndices); } // transfer a buffer header & start renderer @@ -170,7 +197,7 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) packet.OpenUnpack(Vifs::UnpackModes::v4_32, 0, Packet::kDoubleBuff); { packet += numVertices; - packet += numIndices / 2 + (numIndices & 1); + packet += (numIndices + 1) / 2; // number of 32-bit words carrying two s16 indices packet += numIndices; packet += 0; } diff --git a/src/renderermanager.cpp b/src/renderermanager.cpp index 37600e9c..1b5f5916 100644 --- a/src/renderermanager.cpp +++ b/src/renderermanager.cpp @@ -88,6 +88,56 @@ CRendererManager::CRendererManager(CGLContext& context) kInputStart, kInputBufSize - kInputStart, "fast, no lights")); } + { + CRendererProps capabilities = { + .PrimType = kTriangles, + .Lighting = 0, + .NumDirLights = k3DirLights, + .NumPtLights = 0, + .Texture = 1, //TODO: huh??? + .Specular = 0, + .PerVtxMaterial = kNoMaterial, + .Clipping = kNonClipped | kClipped, + .CullFace = 0, + .TwoSidedLighting = 0, + .ArrayAccess = kIndexed + }; + RegisterDefaultRenderer( + new CIndexedRenderer( + mVsmAddr(Indexed), + mVsmSize(Indexed), + capabilities, + no_reqs, + 3, + 3, + "indexed, constant color, tri") + ); + } + { + CRendererProps capabilities = { + .PrimType = kTriangles, + .Lighting = 0, + .NumDirLights = k3DirLights, + .NumPtLights = 0, + .Texture = 1, + .Specular = 0, + .PerVtxMaterial = kDiffuse, // <-- PVC path stilll + .Clipping = kNonClipped | kClipped, + .CullFace = 0, + .TwoSidedLighting = 0, + .ArrayAccess = kIndexed + }; + RegisterDefaultRenderer( + new CIndexedRenderer( + mVsmAddr(IndexedPVC), + mVsmSize(IndexedPVC), + capabilities, + no_reqs, + 4, + 3, // output quads per vert + "indexed, pvc, tri") + ); + } // unlit renderer per vertex color { CRendererProps capabilities = { @@ -113,7 +163,7 @@ CRendererManager::CRendererManager(CGLContext& context) 3, kInputStart, kInputBufSize - kInputStart, - "fast no lights, pvc, tri") + "linear fast no lights, pvc, tri") ); } // fast renderer diff --git a/vu1/indexed.vcl b/vu1/indexed.vcl index 678b3f9d..ad7d2575 100644 --- a/vu1/indexed.vcl +++ b/vu1/indexed.vcl @@ -164,6 +164,31 @@ done_lighting_lid: iaddiu input_start, buffer_top, kInputGeomStart iaddiu color_start, buffer_top, kTempAreaStart + ; If lighting is OFF (no dir lights AND no point lights), seed the + ; per-vertex color accumulation buffer at kTempAreaStart with a constant + ; color = emission + (globalAmbient ⊙ materialAmbient). + ; This makes the later "lq.xyz vert_color, (next_color)" path valid even + ; when the lighting loops above were skipped entirely. + + ibne num_dir_lights, vi00, colors_ready_lid + ibne num_pt_lights, vi00, colors_ready_lid + + ; Load vertex count and set write ptr to the temp color buffer + ilw.x num_vertices, kNumVertices(buffer_top) + iaddiu next_color_acc, buffer_top, kTempAreaStart + + ; Build the constant base color once + get_cnst_color vert_color + + ; for (seed_i = 0; seed_i < num_vertices; ++seed_i) + iaddiu seed_i, vi00, 0 +seed_const_colors_lid: + sqi.xyz vert_color, (next_color_acc++) + iaddiu seed_i, seed_i, 1 + ibne seed_i, num_vertices, seed_const_colors_lid + +colors_ready_lid: + ; set up index-decompression iaddiu next_index, buffer_top, kInputGeomStart diff --git a/vu1/indexed_no_lights_pvc.vcl b/vu1/indexed_no_lights_pvc.vcl new file mode 100644 index 00000000..fd3909cf --- /dev/null +++ b/vu1/indexed_no_lights_pvc.vcl @@ -0,0 +1,152 @@ +/* Indexed, per-vertex color, no lights */ + + #include "vu1_mem_indexed.h" + + .include "db_in_sb_out.i" + .include "math.i" + .include "lighting.i" + .include "clip_cull.i" + .include "geometry.i" + .include "io.i" + .include "general.i" + +kInputQPerV .equ 4 +kOutputQPerV .equ 3 + + .init_vf_all + .init_vi_all + + .name vsmIndexedPVC + + --enter + --endenter + + ; ------------------------ init --------------------------------- + + load_vert_xfrm vert_xform + init_constants + init_clip_cnst + +main_loop_lid: + + ; -------------------- set up decompression ---------------------- + + xtop buffer_top + iaddiu next_output, vi00, kOutputGeomStart + iaddiu input_start, buffer_top, kInputGeomStart + + ; num indices / vertex count + ilw.y num_indices_d2, kNumIndicesD2(buffer_top) + ilw.z num_indices, kNumIndices(buffer_top) + ilw.x num_vertices, kNumVertices(buffer_top) + + ; VI mask for low 8 bits of first index (needed by iand) + iaddiu first_index_mask, vi00, 0xff + + ; stride constants for index unpack (kInputQPerV == 4 here) + loi 253.0 + maxi.w index_constants, vf00, i ; keep w=253.0 (used in the 2nd-index trick) + loi 4.0 + maxi.z index_constants, vf00, i ; z=4 → offsets are 4 * index + loi 255.0 + maxi.y index_constants, vf00, i ; y=255 (8-bit mask for second index path) + + ; decompression pointers + iaddiu next_index, vi00, kInputGeomStart + iadd next_index, next_index, buffer_top + iadd last_index, next_index, num_indices_d2 + + ; giftag + lq gif_tag, kGifTag(vi00) + mtir eop, gif_tagx + ior eop, eop, num_indices + mfir.x gif_tag, eop + mfir.w gif_tag, next_output + sq gif_tag, kOutputBufStart(vi00) + + ; -------- figure out where the color lane lives -------- + ; Layout for this kernel = [ V(0) | N(1) | STQ(2) | PVC(3) ] + ; PVC is lane 3 → base is +3 * num_vertices + + iadd color_start, input_start, num_vertices ; +1 + iadd color_start, color_start, num_vertices ; +2 + iadd color_start, color_start, num_vertices ; +3 + + + ; alpha policy: use material diffuse alpha (matches constant-color path) + loi 128.0 + load_mat_diff vert_color, w + muli.w vert_color, vert_color, i + loi 255.0 + minii.w vert_color, vert_color, i + ftoi0.w vert_color, vert_color + maxi.w max_color_val, vf00, i + + ; wait for other buffers + iaddiu zero_giftag, vi00, kGifTag + xgkick zero_giftag + +xform_loop_lid: --LoopCS 1,3 + + ; first index + ilw.w first_index, 0(next_index) + iand first_index, first_index, first_index_mask + + ; first_offset = first_index * 4 (was *3) + iadd first_offset, first_index, first_index ; *2 + iadd first_offset, first_offset, first_offset ; *4 + + + ; second index in packed word + lqi.w indices, (next_index++) + addy.w second_ind, indices, index_constants[y] + mtir second_index, second_ind[w] + mulz.w second_off, indices, index_constants[z] + add.w second_off, second_off, index_constants[w] + mtir second_offset, second_off[w] + + .macro do_vert + ; vertex + iadd next_input, first_offset, input_start + load_vert vert + xform_vert xformed_vert, vert_xform, vert + vert_to_gs gs_vert, xformed_vert + clip_vert xformed_vert + fcand vi01, 0x003ffff + iand vi01, vi01, do_clipping + set_adc_fs gs_vert, vi00 + store_xyzf gs_vert + + ; color (PVC 0..1 → 0..255, alpha from mat diffuse) + iadd next_color, first_index, color_start + lq vert_color, (next_color) + loi 255.0 + muli.xyz vert_color, vert_color, i + miniw.xyz vert_color, vert_color, max_color_val[w] + ftoi0.xyz vert_color, vert_color + store_rgba vert_color + + ; texcoords (if enabled, STQ lane is present) + load_stq tex_stq + xform_tex_stq tex_stq, tex_stq, q + store_stq tex_stq + .endm + + ; first vertex + do_vert + + ; second vertex (just swap to second_* values) + iadd first_offset, second_offset, vi00 + iadd first_index, second_index, vi00 + iaddiu next_output, next_output, kOutputQPerV + do_vert + + ; end loop + next_o + ibne next_index, last_index, xform_loop_lid + + kick_to_gs + --cont + b main_loop_lid + +.END diff --git a/vu1/indexed_vcl.vsm b/vu1/indexed_vcl.vsm index ba7a6c8d..c61d2ec8 100644 --- a/vu1/indexed_vcl.vsm +++ b/vu1/indexed_vcl.vsm @@ -204,10 +204,10 @@ EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: NOP ilw.x VI03,0(VI02) pt_lights_lid: ; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP ibeq VI01,VI00,done_lighting_lid ; STALL_LATENCY ?2 - NOP ilw.y VI03,0(VI02) + NOP ilw.y VI02,0(VI00) + NOP iaddiu VI03,VI00,0x00000001 + NOP ibeq VI02,VI00,done_lighting_lid ; STALL_LATENCY ?2 + NOP ilw.y VI04,0(VI03) ; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_13] maxw.z VF09,VF00,VF00w NOP pt_light_loop_lid: @@ -215,19 +215,19 @@ pt_light_loop_lid: NOP NOP NOP NOP NOP lq.xyz VF10,71(VI00) - NOP lq.xyz VF11,3(VI03) + NOP lq.xyz VF11,3(VI04) NOP lq.xyz VF14,72(VI00) mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) - NOP xtop VI06 - maddaz.xyz ACC,VF13,VF11z iaddiu VI04,VI06,0x00000005 ; STALL_LATENCY ?1 - maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI04) - sub.xyz VF17,VF14,VF17 ilw.x VI05,0(VI06) ; STALL_LATENCY ?3 - mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI03) ; STALL_LATENCY ?3 - adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI03) ; STALL_LATENCY ?3 - maddx.z VF16,VF09,VF16x iadd VI07,VI04,VI05 - NOP iadd VI07,VI07,VI05 - NOP iadd VI05,VI07,VI05 + NOP xtop VI07 + maddaz.xyz ACC,VF13,VF11z iaddiu VI05,VI07,0x00000005 ; STALL_LATENCY ?1 + maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI05) + sub.xyz VF17,VF14,VF17 ilw.x VI06,0(VI07) ; STALL_LATENCY ?3 + mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI04) ; STALL_LATENCY ?3 + maddx.z VF16,VF09,VF16x iadd VI08,VI05,VI06 + NOP iadd VI08,VI08,VI06 + NOP iadd VI06,VI08,VI06 NOP sqrt Q,VF16z ; STALL_LATENCY ?1 NOP NOP NOP NOP @@ -235,22 +235,22 @@ pt_light_loop_lid: NOP NOP NOP NOP NOP NOP - addq.y VF16,VF00,Q lq.xyz VF10,0(VI03) + addq.y VF16,VF00,Q lq.xyz VF10,0(VI04) NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 NOP NOP NOP NOP NOP NOP NOP NOP NOP NOP - addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI03) - mulq.xyz VF17,VF17,Q iaddiu VI04,VI04,0x00000003 + addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI04) + mulq.xyz VF17,VF17,Q iaddiu VI05,VI05,0x00000003 NOP NOP - mul.xyz VF12,VF12,VF07 ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF16,VF16,VF13 iaddiu VI06,VI06,0x000000ac + mul.xyz VF12,VF12,VF07 ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF16,VF16,VF13 iaddiu VI07,VI07,0x000000ac ; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI04) + add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI05) sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 - mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI04) ; STALL_LATENCY ?3 + mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI05) ; STALL_LATENCY ?3 adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 maddx.z VF21,VF09,VF21x NOP NOP waitp @@ -265,12 +265,12 @@ pt_light_loop_lid: NOP NOP mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 mul.xyz VF21,VF17,VF20 NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 NOP move.xyz VF19,VF18 - NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1 + NOP ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1 mulax.w ACC,VF00,VF21x mfp.w VF06,P ; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI05) NOP NOP NOP NOP mulw.xyz VF22,VF19,VF06w NOP @@ -293,18 +293,18 @@ pt_light_loop_lid: addq.y VF21,VF00,Q waitq mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 addw.x VF21,VF00,VF00w NOP - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) maddz.w VF05,VF00,VF15z NOP mul.w VF06,VF06,VF06 NOP mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 mul.xyz VF21,VF17,VF20 NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0 + NOP ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0 mulax.w ACC,VF00,VF21x mfp.w VF06,P EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: ; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI04) + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI05) mul.w VF07,VF07,VF07 NOP madday.w ACC,VF00,VF21y NOP mulw.xyz VF22,VF19,VF06w NOP @@ -320,7 +320,7 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: maddx.z VF21,VF09,VF21x NOP madday.w ACC,VF00,VF20y NOP maddz.w VF07,VF00,VF20z mfp.w VF06,P - mulq.xyz VF23,VF22,Q lq.xyz VF22,0(VI06) + mulq.xyz VF23,VF22,Q lq.xyz VF22,0(VI07) mulax.w ACC,VF00,VF15x sqrt Q,VF21z madday.w ACC,VF00,VF15y NOP maxx.w VF07,VF07,VF00x NOP @@ -329,9 +329,9 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: addw.x VF21,VF00,VF00w NOP mul.w VF06,VF07,VF07 NOP addq.y VF21,VF00,Q NOP - mula.xyz ACC,VF20,VF06 sqi.xyz VF22,(VI06++) + mula.xyz ACC,VF20,VF06 sqi.xyz VF22,(VI07++) maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI04) + mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI05) mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y NOP NOP NOP NOP @@ -339,9 +339,9 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: mul.xyz VF21,VF17,VF20 NOP NOP NOP NOP NOP - mulq.xyz VF17,VF19,Q iaddiu VI04,VI04,0x00000003 + mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibne VI04,VI05,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + NOP ibne VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP mulax.w ACC,VF00,VF21x mfp.w VF06,P EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: ; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0] @@ -366,12 +366,12 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: maxx.w VF07,VF07,VF00x NOP mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF21,Q lq.xyz VF21,0(VI06) + mulq.xyz VF19,VF21,Q lq.xyz VF21,0(VI07) mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 - mul.xyz VF21,VF17,VF20 sqi.xyz VF21,(VI06++) ; STALL_LATENCY ?1 + mul.xyz VF21,VF17,VF20 sqi.xyz VF21,(VI07++) ; STALL_LATENCY ?1 mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 mul.w VF07,VF07,VF07 NOP @@ -395,16 +395,16 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: mulax.w ACC,VF00,VF15x NOP madday.w ACC,VF00,VF15y NOP mul.w VF07,VF06,VF06 NOP - maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI06) + maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI07) mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 madd.xyz VF10,VF10,VF05 NOP - NOP sqi.xyz VF11,(VI06++) - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?2 + NOP sqi.xyz VF11,(VI07++) + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?2 add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?2 + NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?2 EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: ; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1] add.xyz VF13,VF08,VF17 NOP @@ -423,7 +423,7 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI04) + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) NOP waitp ; STALL_LATENCY ?1 mul.w VF07,VF06,VF06 mfp.w VF06,P mul.xyz VF21,VF17,VF20 NOP @@ -450,19 +450,19 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: mulax.w ACC,VF00,VF15x NOP madday.w ACC,VF00,VF15y NOP mul.w VF07,VF06,VF06 NOP - maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI06) + maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI07) mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 madd.xyz VF10,VF10,VF05 NOP - NOP sqi.xyz VF11,(VI06++) - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?2 + NOP sqi.xyz VF11,(VI07++) + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?2 add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?2 + NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?2 EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2: ; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI04) + add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI05) mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 madday.w ACC,VF00,VF17y NOP @@ -508,211 +508,237 @@ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2: NOP div Q,VF00w,VF05w maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI06) ; STALL_LATENCY ?3 + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?3 add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP sqi.xyz VF10,(VI06++) ; STALL_LATENCY ?3 + NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?3 EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: ; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,pt_light_loop_lid - NOP ilw.y VI03,0(VI02) + NOP isubiu VI02,VI02,0x00000001 + NOP iaddiu VI03,VI03,0x00000001 + NOP ibne VI02,VI00,pt_light_loop_lid + NOP ilw.y VI04,0(VI03) done_lighting_lid: -; _LNOPT_w=[ vuta ] 51 [50 0] 58 [done_lighting_lid] - NOP lq.w VF05,60(VI00) - NOP loi 0x43000000 - muli.w VF10,VF05,I xtop VI04 ; STALL_LATENCY ?2 - NOP ilw.y VI08,0(VI04) - NOP loi 0x437f0000 - maxi.w VF12,VF00,I ilw.z VI03,0(VI04) - minii.w VF10,VF10,I loi 0x437f0000 - NOP lq.xyz VF05,75(VI00) +; _LNOPT_w=[ normal2 ] 12 [12 0] 12 [done_lighting_lid] + NOP loi 0x44fff000 + NOP lq.w VF06,57(VI00) + addi.xy VF05,VF00,I loi 0x45000000 + NOP xtop VI04 + NOP fcset 0 + NOP mr32.z VF05,VF06 + NOP lq.xyz VF06,76(VI00) + NOP ilw.w VI03,76(VI00) + NOP iaddiu VI05,VI00,0x0000004e NOP iaddiu VI06,VI04,0x00000005 - maxi.y VF10,VF00,I loi 0x40400000 - NOP mtir VI02,VF05x ; STALL_LATENCY ?1 - NOP ior VI03,VI02,VI03 - NOP mfir.x VF05,VI03 - NOP iaddiu VI03,VI00,0x0000004e - NOP mfir.w VF05,VI03 - maxi.z VF09,VF00,I loi 0x437d0000 - NOP iadd VI08,VI06,VI08 - NOP ilw.w VI09,0(VI06) - NOP sq VF05,77(VI00) - NOP lqi.w VF05,(VI06++) - NOP iaddiu VI05,VI04,0x000000ac - NOP iaddiu VI04,VI04,0x00000005 - NOP iaddiu VI07,VI00,0x000000ff - NOP iand VI09,VI09,VI07 - maxi.w VF08,VF00,I iadd VI01,VI09,VI09 - addy.w VF06,VF05,VF10y iadd VI01,VI01,VI09 - mulz.w VF05,VF05,VF09z iadd VI10,VI01,VI04 - NOP lq.xyz VF11,0(VI10) - add.w VF05,VF05,VF08 lq.w VF09,57(VI00) ; STALL_LATENCY ?2 - mulax ACC,VF01,VF11x loi 0x45000000 - madday ACC,VF02,VF11y NOP - maddaz ACC,VF03,VF11z iadd VI09,VI09,VI05 - maddw VF13,VF04,VF00w lq.xyz VF07,0(VI09) - NOP mtir VI11,VF05w - NOP div Q,VF00w,VF13w ; STALL_LATENCY ?2 - NOP iadd VI09,VI11,VI04 - NOP lq.xyz VF12,0(VI09) - maxi.w VF07,VF00,I mr32.z VF05,VF09 - miniw.xyz VF11,VF07,VF12w loi 0x44fff000 - addi.xy VF05,VF00,I iaddiu VI02,VI00,0x0000004b - mulax ACC,VF01,VF12x xgkick VI02 - mulq.xyz VF08,VF13,Q ilw.w VI02,76(VI00) - madday ACC,VF02,VF12y lq.xyz VF06,76(VI00) - maddaz ACC,VF03,VF12z fcset 0 - maddw VF16,VF04,VF00w mtir VI01,VF06w - add.xyz VF13,VF08,VF05 lq.xyz VF14,2(VI10) - mul.xyz VF08,VF08,VF06 iadd VI01,VI01,VI05 + NOP ibne VI01,VI00,colors_ready_lid + maxi.w VF07,VF00,I iaddiu VI07,VI04,0x000000ac +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_19] + NOP ibne VI02,VI00,colors_ready_lid + NOP NOP +; _LNOPT_w=[ normal2 ] 6 [9 0] 10 [__v_vu1_indexed_pp4_vcl_20] + NOP lq.xyz VF08,57(VI00) + NOP lq.xyz VF09,59(VI00) + mul.xyz VF08,VF08,VF09 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?3 + NOP ilw.x VI01,0(VI04) + NOP iaddiu VI02,VI04,0x000000ac + add.xyz VF07,VF07,VF08 iaddiu VI08,VI00,0 ; STALL_LATENCY ?1 +seed_const_colors_lid: +; _LNOPT_w=[ normal2 ] 4 [4 3] 4 [seed_const_colors_lid] + NOP iaddiu VI08,VI08,0x00000001 + NOP NOP + NOP ibne VI08,VI01,seed_const_colors_lid + NOP sqi.xyz VF07,(VI02++) +colors_ready_lid: +; _LNOPT_w=[ vuta1 ] 47 [40 0] 53 [colors_ready_lid] + NOP loi 0x40400000 + maxi.z VF07,VF00,I loi 0x437f0000 + NOP lq.w VF05,60(VI00) + NOP lq.xyz VF09,75(VI00) + NOP ilw.z VI02,0(VI04) + maxi.y VF08,VF00,I loi 0x43000000 + NOP ilw.y VI09,0(VI04) + NOP mtir VI08,VF09x + NOP ior VI02,VI08,VI02 + muli.w VF05,VF05,I loi 0x437f0000 + NOP mfir.x VF09,VI02 + NOP mfir.w VF09,VI05 + NOP iaddiu VI02,VI04,0x00000005 + NOP iadd VI09,VI02,VI09 + NOP iaddiu VI04,VI00,0x0000004b + NOP sq VF09,77(VI00) + NOP xgkick VI04 + NOP ilw.w VI04,0(VI02) + minii.w VF08,VF05,I lqi.w VF14,(VI02++) + maxi.w VF06,VF00,I loi 0x437d0000 + NOP iaddiu VI08,VI00,0x000000ff + NOP iand VI04,VI04,VI08 + addy.w VF12,VF14,VF08y iadd VI01,VI04,VI04 + maxi.w VF05,VF00,I iadd VI01,VI01,VI04 + mulz.w VF11,VF14,VF07z iadd VI10,VI01,VI06 + NOP lq.xyz VF09,0(VI10) + add.w VF11,VF11,VF05 NOP ; STALL_LATENCY ?2 + mulax ACC,VF01,VF09x NOP + madday ACC,VF02,VF09y NOP + maddaz ACC,VF03,VF09z iadd VI04,VI04,VI07 + maddw VF13,VF04,VF00w mtir VI11,VF11w + NOP lq.xyz VF09,0(VI04) + NOP iadd VI04,VI11,VI06 + NOP lq.xyz VF12,0(VI04) + NOP div Q,VF00w,VF13w + miniw.xyz VF09,VF09,VF06w NOP + mulax ACC,VF01,VF12x NOP ; STALL_LATENCY ?1 + madday ACC,VF02,VF12y NOP + maddaz ACC,VF03,VF12z NOP + mulq.xyz VF11,VF13,Q waitq ; STALL_LATENCY ?1 + maddw VF16,VF04,VF00w mtir VI01,VF12w + add.xyz VF13,VF11,VF05 lq.xyz VF14,2(VI10) ; STALL_LATENCY ?2 + mul.xyz VF11,VF11,VF06 iadd VI01,VI01,VI07 NOP lq.xyz VF12,0(VI01) - ftoi0.w VF11,VF10 div Q,VF00w,VF16w - ftoi0.xyz VF11,VF11 ibeq VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1 - clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI09) + ftoi0.w VF09,VF08 div Q,VF00w,VF16w + ftoi0.xyz VF09,VF09 ibeq VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1 + clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI04) ; _LNOPT_w=[ ] 38 [37 0] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__PRO1] - mulq.xyz VF15,VF14,Q ilw.w VI11,0(VI06) + mulq.xyz VF15,VF14,Q ilw.w VI11,0(VI02) NOP NOP - NOP lqi.w VF05,(VI06++) - NOP sq VF11,1(VI03) - mulq.xyz VF11,VF16,Q fcand VI01,262143 - NOP iand VI11,VI11,VI07 + NOP lqi.w VF14,(VI02++) + NOP sq VF09,1(VI05) + mulq.xyz VF09,VF16,Q fcand VI01,262143 + NOP iand VI11,VI11,VI08 NOP iadd VI10,VI11,VI11 - addy.w VF06,VF05,VF10y iadd VI10,VI10,VI11 - mulz.w VF05,VF05,VF09z iadd VI12,VI10,VI04 - mul.xyz VF07,VF11,VF06 iadd VI11,VI11,VI05 - add.xyz VF14,VF11,VF05 lq.xyz VF11,0(VI12) - ftoi4.xyz VF16,VF13 iand VI09,VI01,VI02 - add.w VF05,VF05,VF08 ior VI09,VI09,VI00 - clipw.xyz VF07xyz,VF07w iaddiu VI01,VI09,0x00007fff - mulax ACC,VF01,VF11x mfir.w VF16,VI01 - madday ACC,VF02,VF11y mtir VI10,VF06w - maddaz ACC,VF03,VF11z mtir VI09,VF05w - maddw VF13,VF04,VF00w lq.xyz VF07,0(VI11) - mulq.xyz VF16,VF08,Q sq VF16,2(VI03) - NOP iadd VI11,VI09,VI04 - NOP sq.xyz VF15,0(VI03) + mulz.w VF11,VF14,VF07z iadd VI10,VI10,VI11 + mul.xyz VF10,VF09,VF06 iadd VI12,VI10,VI06 + add.xyz VF14,VF09,VF05 iadd VI11,VI11,VI07 + addy.w VF12,VF14,VF08y lq.xyz VF09,0(VI12) + ftoi4.xyz VF16,VF13 iand VI04,VI01,VI03 + add.w VF11,VF11,VF05 ior VI04,VI04,VI00 + clipw.xyz VF10xyz,VF07w iaddiu VI01,VI04,0x00007fff + mulax ACC,VF01,VF09x mfir.w VF16,VI01 + madday ACC,VF02,VF09y mtir VI10,VF12w + maddaz ACC,VF03,VF09z mtir VI04,VF11w + maddw VF13,VF04,VF00w lq.xyz VF10,0(VI11) + mulq.xyz VF16,VF11,Q sq VF16,2(VI05) + NOP iadd VI11,VI04,VI06 + NOP sq.xyz VF15,0(VI05) NOP div Q,VF00w,VF13w - miniw.xyz VF11,VF07,VF12w iaddiu VI09,VI03,0 - miniw.xyz VF07,VF12,VF12w lq.xyz VF12,0(VI11) + miniw.xyz VF09,VF10,VF06w iaddiu VI04,VI05,0 + miniw.xyz VF10,VF12,VF06w lq.xyz VF12,0(VI11) NOP fcand VI01,262143 ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) - NOP iand VI03,VI01,VI02 - mulax ACC,VF01,VF12x ior VI03,VI03,VI00 - mulq.xyz VF08,VF13,Q iaddiu VI03,VI03,0x00007fff - madday ACC,VF02,VF12y mfir.w VF15,VI03 - maddaz ACC,VF03,VF12z iadd VI10,VI10,VI05 - maddw VF16,VF04,VF00w sq.xyz VF16,3(VI09) - add.xyz VF13,VF08,VF05 iaddiu VI03,VI09,0x00000006 - mul.xyz VF08,VF08,VF06 lq.xyz VF12,0(VI10) - NOP sq VF15,5(VI09) + NOP iand VI05,VI01,VI03 + mulax ACC,VF01,VF12x ior VI05,VI05,VI00 + mulq.xyz VF11,VF13,Q iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF12y mfir.w VF15,VI05 + maddaz ACC,VF03,VF12z iadd VI10,VI10,VI07 + maddw VF16,VF04,VF00w sq.xyz VF16,3(VI04) + add.xyz VF13,VF11,VF05 iaddiu VI05,VI04,0x00000006 + mul.xyz VF11,VF11,VF06 lq.xyz VF12,0(VI10) + NOP sq VF15,5(VI04) NOP div Q,VF00w,VF16w - ftoi0.xyz VF11,VF11 ibeq VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0 - clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI11) + ftoi0.xyz VF09,VF09 ibeq VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0 + clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI11) EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: ; _LPOPT_w=[ ldumb ] 38 [38 38] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP] - NOP ilw.w VI11,0(VI06) - NOP lqi.w VF05,(VI06++) - NOP sq VF11,1(VI03) + NOP ilw.w VI11,0(VI02) + NOP lqi.w VF14,(VI02++) + NOP sq VF09,1(VI05) mulq.xyz VF15,VF14,Q fcand VI01,262143 - mulq.xyz VF11,VF16,Q iand VI11,VI11,VI07 - addy.w VF06,VF05,VF10y iadd VI10,VI11,VI11 - mulz.w VF05,VF05,VF09z iadd VI10,VI10,VI11 - NOP iadd VI12,VI10,VI04 - add.xyz VF14,VF11,VF05 iadd VI11,VI11,VI05 - mul.xyz VF18,VF11,VF06 mtir VI10,VF06w - add.w VF05,VF05,VF08 lq.xyz VF17,0(VI12) - ftoi4.xyz VF16,VF13 iand VI01,VI01,VI02 - ftoi0.xyz VF11,VF07 ior VI01,VI01,VI00 + mulq.xyz VF09,VF16,Q iand VI11,VI11,VI08 + addy.w VF12,VF14,VF08y iadd VI10,VI11,VI11 + mulz.w VF11,VF14,VF07z iadd VI10,VI10,VI11 + NOP iadd VI12,VI10,VI06 + add.xyz VF14,VF09,VF05 iadd VI11,VI11,VI07 + mul.xyz VF18,VF09,VF06 mtir VI10,VF12w + add.w VF11,VF11,VF05 lq.xyz VF17,0(VI12) + ftoi4.xyz VF16,VF13 iand VI01,VI01,VI03 + ftoi0.xyz VF09,VF10 ior VI01,VI01,VI00 clipw.xyz VF18xyz,VF07w iaddiu VI13,VI01,0x00007fff - mulax ACC,VF01,VF17x mtir VI01,VF05w + mulax ACC,VF01,VF17x mtir VI01,VF11w madday ACC,VF02,VF17y mfir.w VF16,VI13 - maddaz ACC,VF03,VF17z lq.xyz VF07,0(VI11) - maddw VF13,VF04,VF00w iadd VI11,VI01,VI04 - NOP sq.xyz VF15,0(VI03) - NOP sq VF16,2(VI03) - miniw.xyz VF11,VF07,VF12w sq VF11,4(VI09) + maddaz ACC,VF03,VF17z lq.xyz VF10,0(VI11) + maddw VF13,VF04,VF00w iadd VI11,VI01,VI06 + NOP sq.xyz VF15,0(VI05) + NOP sq VF16,2(VI05) + miniw.xyz VF09,VF10,VF06w sq VF09,4(VI04) NOP div Q,VF00w,VF13w - miniw.xyz VF07,VF12,VF12w iaddiu VI09,VI03,0 + miniw.xyz VF10,VF12,VF06w iaddiu VI04,VI05,0 NOP lq.xyz VF12,0(VI11) NOP fcand VI01,262143 ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) - mulq.xyz VF16,VF08,Q iand VI03,VI01,VI02 - mulax ACC,VF01,VF12x ior VI03,VI03,VI00 - mulq.xyz VF08,VF13,Q iaddiu VI03,VI03,0x00007fff - madday ACC,VF02,VF12y mfir.w VF15,VI03 - maddaz ACC,VF03,VF12z iadd VI10,VI10,VI05 - maddw VF16,VF04,VF00w sq.xyz VF16,3(VI09) - add.xyz VF13,VF08,VF05 iaddiu VI03,VI09,0x00000006 - mul.xyz VF08,VF08,VF06 lq.xyz VF12,0(VI10) - NOP sq VF15,5(VI09) + mulq.xyz VF16,VF11,Q iand VI05,VI01,VI03 + mulax ACC,VF01,VF12x ior VI05,VI05,VI00 + mulq.xyz VF11,VF13,Q iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF12y mfir.w VF15,VI05 + maddaz ACC,VF03,VF12z iadd VI10,VI10,VI07 + maddw VF16,VF04,VF00w sq.xyz VF16,3(VI04) + add.xyz VF13,VF11,VF05 iaddiu VI05,VI04,0x00000006 + mul.xyz VF11,VF11,VF06 lq.xyz VF12,0(VI10) + NOP sq VF15,5(VI04) NOP div Q,VF00w,VF16w - ftoi0.xyz VF11,VF11 ibne VI06,VI08,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP - clipw.xyz VF08xyz,VF07w lq.xyz VF08,2(VI11) + ftoi0.xyz VF09,VF09 ibne VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP + clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI11) EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0: ; _LNOPT_w=[ ] 25 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0] - mulq.xyz VF09,VF14,Q NOP + mulq.xyz VF07,VF14,Q NOP NOP NOP NOP NOP NOP NOP - mulq.xyz VF11,VF16,Q sq VF11,1(VI03) + mulq.xyz VF09,VF16,Q sq VF09,1(VI05) NOP NOP NOP fcand VI01,262143 - NOP iand VI01,VI01,VI02 + NOP iand VI01,VI01,VI03 ftoi4.xyz VF16,VF13 ior VI01,VI01,VI00 - mul.xyz VF06,VF11,VF06 iaddiu VI01,VI01,0x00007fff - add.xyz VF14,VF11,VF05 mfir.w VF16,VI01 - ftoi0.xyz VF11,VF07 NOP - clipw.xyz VF06xyz,VF07w sq.xyz VF09,0(VI03) ; STALL_LATENCY ?1 - mulq.xyz VF16,VF08,Q sq VF16,2(VI03) - miniw.xyz VF07,VF12,VF12w sq VF11,4(VI09) - NOP iaddiu VI09,VI03,0 + mul.xyz VF06,VF09,VF06 iaddiu VI01,VI01,0x00007fff + add.xyz VF14,VF09,VF05 mfir.w VF16,VI01 + ftoi0.xyz VF09,VF10 NOP + clipw.xyz VF06xyz,VF07w sq.xyz VF07,0(VI05) ; STALL_LATENCY ?1 + mulq.xyz VF16,VF11,Q sq VF16,2(VI05) + miniw.xyz VF10,VF12,VF06w sq VF09,4(VI04) + NOP iaddiu VI04,VI05,0 NOP fcand VI01,262143 - NOP iand VI03,VI01,VI02 - ftoi0.xyz VF11,VF07 ior VI03,VI03,VI00 - ftoi4.xyz VF12,VF14 iaddiu VI03,VI03,0x00007fff - NOP mfir.w VF12,VI03 - NOP sq.xyz VF16,3(VI09) - NOP sq VF11,4(VI09) + NOP iand VI05,VI01,VI03 + ftoi0.xyz VF09,VF10 ior VI05,VI05,VI00 + ftoi4.xyz VF12,VF14 iaddiu VI05,VI05,0x00007fff + NOP mfir.w VF12,VI05 + NOP sq.xyz VF16,3(VI04) + NOP sq VF09,4(VI04) NOP b EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT - NOP sq VF12,5(VI09) + NOP sq VF12,5(VI04) EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1: ; _LNOPT_w=[ ] 23 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1] mulq.xyz VF07,VF14,Q NOP NOP NOP - NOP sq VF11,1(VI03) + NOP sq VF09,1(VI05) NOP fcand VI01,262143 - mulq.xyz VF11,VF16,Q iand VI04,VI01,VI02 - NOP ior VI04,VI04,VI00 - ftoi4.xyz VF16,VF13 iaddiu VI04,VI04,0x00007fff - NOP mfir.w VF16,VI04 - add.xyz VF14,VF11,VF05 sq.xyz VF07,0(VI03) - mul.xyz VF11,VF11,VF06 NOP - mulq.xyz VF16,VF08,Q sq VF16,2(VI03) ; STALL_LATENCY ?1 - clipw.xyz VF11xyz,VF07w NOP ; STALL_LATENCY ?1 + mulq.xyz VF09,VF16,Q iand VI02,VI01,VI03 + NOP ior VI02,VI02,VI00 + ftoi4.xyz VF16,VF13 iaddiu VI02,VI02,0x00007fff + NOP mfir.w VF16,VI02 + add.xyz VF14,VF09,VF05 sq.xyz VF07,0(VI05) + mul.xyz VF09,VF09,VF06 NOP + mulq.xyz VF16,VF11,Q sq VF16,2(VI05) ; STALL_LATENCY ?1 + clipw.xyz VF09xyz,VF07w NOP ; STALL_LATENCY ?1 NOP NOP NOP NOP - miniw.xyz VF07,VF12,VF12w iaddiu VI04,VI03,0 + miniw.xyz VF09,VF12,VF06w iaddiu VI02,VI05,0 NOP fcand VI01,262143 - NOP iand VI03,VI01,VI02 - ftoi4.xyz VF12,VF14 ior VI03,VI03,VI00 - ftoi0.xyz VF11,VF07 iaddiu VI03,VI03,0x00007fff - NOP mfir.w VF12,VI03 - NOP sq.xyz VF16,3(VI04) - NOP sq VF11,4(VI04) ; STALL_LATENCY ?1 - NOP sq VF12,5(VI04) + NOP iand VI05,VI01,VI03 + ftoi4.xyz VF12,VF14 ior VI05,VI05,VI00 + ftoi0.xyz VF09,VF09 iaddiu VI05,VI05,0x00007fff + NOP mfir.w VF12,VI05 + NOP sq.xyz VF16,3(VI02) + NOP sq VF09,4(VI02) ; STALL_LATENCY ?1 + NOP sq VF12,5(VI02) EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT: ; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT] NOP iaddiu VI01,VI00,0x0000004d NOP xgkick VI01 NOP[E] NOP NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_21] +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_26] NOP b main_loop_lid NOP NOP .align 4 vsmIndexed_CodeEnd: -; iCount=637 +; iCount=657 ; register stats: ; 14 VU User integer ; 24 VU User floating point diff --git a/vu1/vu1renderers.h b/vu1/vu1renderers.h index 44e8c280..b2f9dcfa 100644 --- a/vu1/vu1renderers.h +++ b/vu1/vu1renderers.h @@ -33,6 +33,7 @@ VU_FUNCTIONS(FastNoLights); VU_FUNCTIONS(FastNoLightsPVCTri); VU_FUNCTIONS(Indexed); +VU_FUNCTIONS(IndexedPVC); } #endif // ps2gl_vu1code_h From eac8259bb11da94d8dcda66493e16201878c6fd6 Mon Sep 17 00:00:00 2001 From: iann Date: Wed, 8 Oct 2025 13:26:15 +0900 Subject: [PATCH 08/13] fix magenta box indexed mesh working again --- src/gmanager.cpp | 14 +++++-- src/indexed_renderer.cpp | 90 +++++++++++++++------------------------- 2 files changed, 43 insertions(+), 61 deletions(-) diff --git a/src/gmanager.cpp b/src/gmanager.cpp index 056e3ec6..55d3cf59 100644 --- a/src/gmanager.cpp +++ b/src/gmanager.cpp @@ -232,17 +232,23 @@ void glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indic const int numVertices = (int)max + 1; if (max <= 255) { + const int numIndexQwords = (int)((count + 15) / 16); + const int bytesNeeded = numIndexQwords * 16; + static uint8_t* indices_u8_scratch = NULL; static int scratchCapacity = 0; - if (scratchCapacity < count) { + if (scratchCapacity < bytesNeeded) { delete[] indices_u8_scratch; - indices_u8_scratch = new uint8_t[count]; - scratchCapacity = (int)count; + indices_u8_scratch = new uint8_t[bytesNeeded]; + scratchCapacity = bytesNeeded; } for (GLsizei i = 0; i < count; ++i) { indices_u8_scratch[i] = (uint8_t)indices_u16[i]; } - mDebugPrint("glDrawElements: primType=%d count=%d type=GL_UNSIGNED_SHORT maxIndex=%u numVertices=%d (u8 path)\n", (int)mode, (int)count, (unsigned)max, (int)numVertices); + if (bytesNeeded > count) { + memset(indices_u8_scratch + count, 0, (size_t)(bytesNeeded - count)); + } + mDebugPrint("glDrawElements: primType=%d count=%d type=GL_UNSIGNED_SHORT ""maxIndex=%u numVertices=%d (u8 path, %d QW / %d bytes)\n", (int)mode, (int)count, (unsigned)max, (int)numVertices, numIndexQwords, bytesNeeded); CGeomManager& gmanager = pGLContext->GetGeomManager(); gmanager.IndexedArraysGeomStage(mode, (int)count, indices_u8_scratch, numVertices); } else { diff --git a/src/indexed_renderer.cpp b/src/indexed_renderer.cpp index 02301423..2117ab92 100644 --- a/src/indexed_renderer.cpp +++ b/src/indexed_renderer.cpp @@ -105,53 +105,29 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) CVifSCDmaPacket& packet = pGLContext->GetVif1Packet(); - const int kLaneWords = 3; // xyz for all lanes used here - int wordsPerVert = block.GetWordsPerVertex(); - int wordsPerNormal = block.GetNormalsAreValid() ? block.GetWordsPerNormal() : kLaneWords; - int wordsPerTex = block.GetTexCoordsAreValid() ? block.GetWordsPerTexCoord() : kLaneWords; - int wordsPerColor = block.GetColorsAreValid() ? block.GetWordsPerColor() : kLaneWords; + int wordsPerNormal = (block.GetNormalsAreValid()) ? block.GetWordsPerNormal() : 0; + int wordsPerTex = (block.GetTexCoordsAreValid()) ? block.GetWordsPerTexCoord() : 0; + int wordsPerColor = (block.GetColorsAreValid()) ? block.GetWordsPerColor() : 0; InitXferBlock(packet, wordsPerVert, wordsPerNormal, wordsPerTex, wordsPerColor); - // Static scratch for dummy lanes (no STL allowed) - static float* sDummyN = nullptr; - static float* sDummySTQ = nullptr; - static float* sDummyPVC = nullptr; - static int sDummyCap = 0; - - for (int curArray = 0; curArray < block.GetNumArrays(); ++curArray) { - - int numVertices = block.GetArrayLength(curArray); - int needFloats = numVertices * kLaneWords; - - if (sDummyCap < needFloats) { - delete[] sDummyN; sDummyN = new float[needFloats]; - delete[] sDummySTQ; sDummySTQ = new float[needFloats]; - delete[] sDummyPVC; sDummyPVC = new float[needFloats]; - sDummyCap = needFloats; - } - - // Fill dummies: N=(0,0,0), STQ=(0,0,1), PVC=(1,1,1) - for (int i = 0; i < numVertices; ++i) { - sDummyN[i*3+0] = 0.0f; sDummyN[i*3+1] = 0.0f; sDummyN[i*3+2] = 0.0f; - sDummySTQ[i*3+0] = 0.0f; sDummySTQ[i*3+1] = 0.0f; sDummySTQ[i*3+2] = 1.0f; - sDummyPVC[i*3+0] = 1.0f; sDummyPVC[i*3+1] = 1.0f; sDummyPVC[i*3+2] = 1.0f; - } + for (int curArray = 0; curArray < block.GetNumArrays(); curArray++) { - const void* vertices = block.GetVerticesAreValid() ? block.GetVertices(curArray) : nullptr; - const void* normals = block.GetNormalsAreValid() ? block.GetNormals(curArray) : sDummyN; - const void* texCoords= block.GetTexCoordsAreValid() ? block.GetTexCoords(curArray) : sDummySTQ; - const void* colors = block.GetColorsAreValid() ? block.GetColors(curArray) : sDummyPVC; + const void *normals, *vertices, *texCoords, *colors; + vertices = block.GetVerticesAreValid() ? block.GetVertices(curArray) : NULL; + normals = block.GetNormalsAreValid() ? block.GetNormals(curArray) : NULL; + texCoords = block.GetTexCoordsAreValid() ? block.GetTexCoords(curArray) : NULL; + colors = block.GetColorsAreValid() ? block.GetColors(curArray) : NULL; - // Set lane stride: 4 qwords per vertex (V,N,STQ,PVC) packet.Cnt(); + // Use the renderer’s configured per-vertex lane count (3 for const-color kernels, 4 for PVC). packet.Stcycl(1, InputQuadsPerVert).Nop(); packet.CloseTag(); int numIndices = block.GetNumIndices(curArray); const void* indices = block.GetIndices(curArray); - // int numVertices = block.GetArrayLength(curArray); + int numVertices = block.GetArrayLength(curArray); XferBlock(packet, vertices, normals, texCoords, colors, @@ -165,9 +141,9 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) packet.Stmod(Vifs::AddModes::kOffset); // don't write over the xyz's Vifs::tMask mask = { 3, 3, 3, 0, - 3, 3, 3, 0, - 3, 3, 3, 0, - 3, 3, 3, 0 }; + 3, 3, 3, 0, + 3, 3, 3, 0, + 3, 3, 3, 0 }; packet.Stmask(mask); static const float row[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; packet.Strow(row); @@ -175,16 +151,15 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) } packet.CloseTag(); - // int numIndexQwords = numIndices / 16 + (numIndices % 16 > 0); - int numIndexQwords = (numIndices + 7) / 8; + // NOTE: original layout packs 16 u8 indices per qword; keep that accounting. + int numIndexQwords = numIndices / 16 + (numIndices % 16 > 0); packet.Ref(Core::MakePtrNormal((const unsigned int*)indices), numIndexQwords); { packet.Stcycl(1, 1); packet.OpenUnpack(Vifs::UnpackModes::s_16, kInputGeomStart, - Packet::kDoubleBuff, Packet::kMasked); - // packet.CloseUnpack( numIndices/2 ); - // packet.CloseUnpack(numIndexQwords * 8); - packet.CloseUnpack(numIndices); + Packet::kDoubleBuff, Packet::kMasked); + // close with the number of s16 elements (8 per qword) + packet.CloseUnpack(numIndexQwords * 8); } // transfer a buffer header & start renderer @@ -197,24 +172,25 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) packet.OpenUnpack(Vifs::UnpackModes::v4_32, 0, Packet::kDoubleBuff); { packet += numVertices; - packet += (numIndices + 1) / 2; // number of 32-bit words carrying two s16 indices + packet += numIndices / 2 + (numIndices & 1); packet += numIndices; packet += 0; } packet.CloseUnpack(); - // constant color of each vertex - - packet.Strow(&ConstantVertColor); - packet.Stcycl(numVertices, 0); - Vifs::tMask mask = { 1, 1, 1, 3, - 1, 1, 1, 3, - 1, 1, 1, 3, - 1, 1, 1, 3 }; - packet.Stmask(mask); - packet.OpenUnpack(Vifs::UnpackModes::v4_32, kTempAreaStart, - Packet::kDoubleBuff, Packet::kMasked); - packet.CloseUnpack(numVertices); + // constant color of each vertex (only when no per-vertex color lane is present) + if (!XferColors) { + packet.Strow(&ConstantVertColor); + packet.Stcycl(numVertices, 0); + Vifs::tMask mask = { 1, 1, 1, 3, + 1, 1, 1, 3, + 1, 1, 1, 3, + 1, 1, 1, 3 }; + packet.Stmask(mask); + packet.OpenUnpack(Vifs::UnpackModes::v4_32, kTempAreaStart, + Packet::kDoubleBuff, Packet::kMasked); + packet.CloseUnpack(numVertices); + } // start renderer From 9870c0faca075c46cdc0fa58b445d17aa163ec17 Mon Sep 17 00:00:00 2001 From: iann Date: Sat, 11 Oct 2025 00:20:10 +0900 Subject: [PATCH 09/13] undo unncessary garbage changes, back to only begining constant color index renderer (for preparing for pvc renderer next) --- src/indexed_renderer.cpp | 51 +++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 27 deletions(-) diff --git a/src/indexed_renderer.cpp b/src/indexed_renderer.cpp index 2117ab92..59b4c9a5 100644 --- a/src/indexed_renderer.cpp +++ b/src/indexed_renderer.cpp @@ -106,22 +106,21 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) CVifSCDmaPacket& packet = pGLContext->GetVif1Packet(); int wordsPerVert = block.GetWordsPerVertex(); - int wordsPerNormal = (block.GetNormalsAreValid()) ? block.GetWordsPerNormal() : 0; + int wordsPerNormal = (block.GetNormalsAreValid()) ? block.GetWordsPerNormal() : 0; int wordsPerTex = (block.GetTexCoordsAreValid()) ? block.GetWordsPerTexCoord() : 0; - int wordsPerColor = (block.GetColorsAreValid()) ? block.GetWordsPerColor() : 0; + int wordsPerColor = (block.GetColorsAreValid()) ? block.GetWordsPerColor() : 0; InitXferBlock(packet, wordsPerVert, wordsPerNormal, wordsPerTex, wordsPerColor); for (int curArray = 0; curArray < block.GetNumArrays(); curArray++) { const void *normals, *vertices, *texCoords, *colors; - vertices = block.GetVerticesAreValid() ? block.GetVertices(curArray) : NULL; - normals = block.GetNormalsAreValid() ? block.GetNormals(curArray) : NULL; - texCoords = block.GetTexCoordsAreValid() ? block.GetTexCoords(curArray) : NULL; - colors = block.GetColorsAreValid() ? block.GetColors(curArray) : NULL; + vertices = block.GetVerticesAreValid() ? block.GetVertices(curArray) : NULL; + normals = block.GetNormalsAreValid() ? block.GetNormals(curArray) : NULL; + texCoords = block.GetTexCoordsAreValid() ? block.GetTexCoords(curArray) : NULL; + colors = block.GetColorsAreValid() ? block.GetColors(curArray) : NULL; packet.Cnt(); - // Use the renderer’s configured per-vertex lane count (3 for const-color kernels, 4 for PVC). packet.Stcycl(1, InputQuadsPerVert).Nop(); packet.CloseTag(); @@ -141,9 +140,9 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) packet.Stmod(Vifs::AddModes::kOffset); // don't write over the xyz's Vifs::tMask mask = { 3, 3, 3, 0, - 3, 3, 3, 0, - 3, 3, 3, 0, - 3, 3, 3, 0 }; + 3, 3, 3, 0, + 3, 3, 3, 0, + 3, 3, 3, 0 }; packet.Stmask(mask); static const float row[4] = { 1.0f, 1.0f, 1.0f, 1.0f }; packet.Strow(row); @@ -151,14 +150,13 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) } packet.CloseTag(); - // NOTE: original layout packs 16 u8 indices per qword; keep that accounting. int numIndexQwords = numIndices / 16 + (numIndices % 16 > 0); packet.Ref(Core::MakePtrNormal((const unsigned int*)indices), numIndexQwords); { packet.Stcycl(1, 1); packet.OpenUnpack(Vifs::UnpackModes::s_16, kInputGeomStart, - Packet::kDoubleBuff, Packet::kMasked); - // close with the number of s16 elements (8 per qword) + Packet::kDoubleBuff, Packet::kMasked); + // packet.CloseUnpack( numIndices/2 ); packet.CloseUnpack(numIndexQwords * 8); } @@ -178,19 +176,18 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) } packet.CloseUnpack(); - // constant color of each vertex (only when no per-vertex color lane is present) - if (!XferColors) { - packet.Strow(&ConstantVertColor); - packet.Stcycl(numVertices, 0); - Vifs::tMask mask = { 1, 1, 1, 3, - 1, 1, 1, 3, - 1, 1, 1, 3, - 1, 1, 1, 3 }; - packet.Stmask(mask); - packet.OpenUnpack(Vifs::UnpackModes::v4_32, kTempAreaStart, - Packet::kDoubleBuff, Packet::kMasked); - packet.CloseUnpack(numVertices); - } + // constant color of each vertex + + packet.Strow(&ConstantVertColor); + packet.Stcycl(numVertices, 0); + Vifs::tMask mask = { 1, 1, 1, 3, + 1, 1, 1, 3, + 1, 1, 1, 3, + 1, 1, 1, 3 }; + packet.Stmask(mask); + packet.OpenUnpack(Vifs::UnpackModes::v4_32, kTempAreaStart, + Packet::kDoubleBuff, Packet::kMasked); + packet.CloseUnpack(numVertices); // start renderer @@ -199,4 +196,4 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) } packet.CloseTag(); } -} +} \ No newline at end of file From d393b6e5e004835076476e94a4ed48d8fff0f9fc Mon Sep 17 00:00:00 2001 From: iann Date: Sat, 11 Oct 2025 07:42:29 +0900 Subject: [PATCH 10/13] indexed pvc working, now time for massive clean up --- Makefile | 7 ++ include/ps2gl/fixed_function.h | 59 ++++----- src/gmanager.cpp | 14 +-- src/immgmanager.cpp | 38 ------ src/indexed_renderer.cpp | 29 ++--- src/renderermanager.cpp | 10 +- vu1/indexed_constant_color.vcl | 116 +++++++++++++++++ vu1/indexed_no_lights_pvc.vcl | 224 +++++++++++++-------------------- vu1/vu1_mem_indexed.h | 1 + vu1/vu1renderers.h | 1 + 10 files changed, 260 insertions(+), 239 deletions(-) create mode 100644 vu1/indexed_constant_color.vcl diff --git a/Makefile b/Makefile index a2fa9187..fd0e8cf1 100644 --- a/Makefile +++ b/Makefile @@ -50,6 +50,7 @@ RENDERERS = \ general_tri \ general \ indexed \ + indexed_constant_color \ indexed_no_lights_pvc \ scei \ fast_no_lights_pvc_tri @@ -99,6 +100,11 @@ one: vu1/indexed%_pp4.vcl: vu1/indexed%_pp3.vcl cat $< | cc -E -P -imacros vu1/vu1_mem_indexed.h -o $@ - +#TODO: this is too allow for the old "single indexed.vcl naming, figure out a way thats better to integrate all namings of indexed. +# also shouldnt even match this on naming, it should be controlled better when adding new indexed renderers. +vu1/indexed_pp4.vcl: vu1/indexed_pp3.vcl + cat $< | cc -E -P -imacros vu1/vu1_mem_indexed.h -o $@ - + # GCC / CPP flags (-E, -P, -imacros): https://gcc.gnu.org/onlinedocs/cpp/Invocation.html#Invocation # -E = preprocess only, -P = strip #line, -imacros includes macros without writing #include %_pp4.vcl: %_pp3.vcl @@ -154,6 +160,7 @@ examples: done clean-examples: + rm -rf ./GS_DUMP/hard/* rm -rf $(BIN_DIR) find $(EXAMPLES_DIR) -type f -name Makefile ! -path '*/shared_code/*' -print0 \ | while IFS= read -r -d '' mf; do \ diff --git a/include/ps2gl/fixed_function.h b/include/ps2gl/fixed_function.h index 55bd348f..f0469951 100644 --- a/include/ps2gl/fixed_function.h +++ b/include/ps2gl/fixed_function.h @@ -1,6 +1,9 @@ #ifndef ps2gl_fixed_function_h #define ps2gl_fixed_function_h +//TODO: this is half composed, i need to really justify the existinance of this if going this course... +// it may purely be something that would "help" me, and thus i am uncertain if its truly justified to rewrite all the logic. +// This will be the biggest merge conflict once i perhaps integrate it. #pragma once #include #include @@ -21,39 +24,38 @@ typedef enum { } FixedFunctionDataSrc; typedef enum { - FIXED_FUNCTION_COLOR_CONSTANT = 0, // (currentColor * tint), lighting OFF - FIXED_FUNCTION_COLOR_ARRAY, // (vertexColor * tint), lighting OFF - FIXED_FUNCTION_COLOR_LIT // lighting ON + FIXED_FUNCTION_COLOR_CONSTANT = 0, + FIXED_FUNCTION_COLOR_ARRAY, + FIXED_FUNCTION_COLOR_LIT } FixedFunctionColor; //TODO I get that this is for emphasis but it seems ugly idk typedef struct { bool texture2dEnabled; bool lightingEnabled; bool colorMaterialEnabled; - GLenum colorMaterialMode; // expected: GL_DIFFUSE or 0 + GLenum colorMaterialMode; bool vertexArrayEnabled; bool normalArrayEnabled; bool texcoordArrayEnabled; bool colorArrayEnabled; - bool diffuseTextureBound; // a valid diffuse map is actually bound + bool diffuseTextureBound; - float currentColor[4]; // current GL color - float currentNormal[3]; // current GL normal (rarely used in this policy) - float currentTexCoord[2]; // current GL texcoord (we do not rely on this) + float currentColor[4]; + float currentNormal[3]; + float currentTexCoord[2]; - // App-level tint (raylib tint, or 1,1,1,1 if already folded into currentColor) float tintRgba[4]; bool immediateColorVariesInPrimitive; } FixedFunctionConditions; //TODO: should we merge this with state somehow? typedef struct { - FixedFunctionDataSrc vertexSrc; // always ARRAY - FixedFunctionDataSrc normalSrc; // ARRAY or NONE - FixedFunctionDataSrc texcoordSrc; // ARRAY or NONE - FixedFunctionDataSrc colorSrc; // CONSTANT or ARRAY (ARRAY also means “varies per vertex” in immediate mode) + FixedFunctionDataSrc vertexSrc; + FixedFunctionDataSrc normalSrc; + FixedFunctionDataSrc texcoordSrc; + FixedFunctionDataSrc colorSrc; FixedFunctionColor ffColor; @@ -61,7 +63,7 @@ typedef struct { bool textureFlag; bool lightingFlag; - bool colorMaterialAffectsDiffuse; // true if per-vertex color should drive diffuse when lighting is on + bool colorMaterialAffectsDiffuse; } FixedFunctionState; typedef enum { @@ -110,43 +112,36 @@ static inline FixedFunctionState evaluate(const FixedFunctionConditions* conditi FixedFunctionState state; memset(&state, 0, sizeof(state)); - // V: always array & lane present state.vertexSrc = FIXED_FUNCTION_ATTR_ARRAY; state.V = true; - // T: requires GL texture enable, a real diffuse bound, AND a texcoord array state.textureFlag = (conditions->texture2dEnabled && conditions->diffuseTextureBound && conditions->texcoordArrayEnabled); state.texcoordSrc = state.textureFlag ? FIXED_FUNCTION_ATTR_ARRAY : FIXED_FUNCTION_ATTR_NONE; state.T = (state.texcoordSrc == FIXED_FUNCTION_ATTR_ARRAY); - // N: lighting is only meaningful if we have a normal array const bool lightingFeasible = (conditions->lightingEnabled && conditions->normalArrayEnabled); state.lightingFlag = lightingFeasible; state.normalSrc = state.lightingFlag ? FIXED_FUNCTION_ATTR_ARRAY : FIXED_FUNCTION_ATTR_NONE; state.N = (state.normalSrc == FIXED_FUNCTION_ATTR_ARRAY); - // C: color path & lane selection const bool perVertexColorSupplyPresent = conditions->colorArrayEnabled || conditions->immediateColorVariesInPrimitive; if (!state.lightingFlag) { if (perVertexColorSupplyPresent) { state.colorSrc = FIXED_FUNCTION_ATTR_ARRAY; - state.ffColor = FIXED_FUNCTION_COLOR_ARRAY; // (v.color * tint) - state.C = true; // need per-vertex color lane + state.ffColor = FIXED_FUNCTION_COLOR_ARRAY; + state.C = true; } else { state.colorSrc = FIXED_FUNCTION_ATTR_CONSTANT; - state.ffColor = FIXED_FUNCTION_COLOR_CONSTANT;// (currentColor * tint) - state.C = false; // constant → no C lane + state.ffColor = FIXED_FUNCTION_COLOR_CONSTANT; + state.C = false; } } else { - // Lighting ON: final color is computed. PVC only matters for material mapping. state.colorSrc = perVertexColorSupplyPresent ? FIXED_FUNCTION_ATTR_ARRAY : FIXED_FUNCTION_ATTR_CONSTANT; state.ffColor = FIXED_FUNCTION_COLOR_LIT; state.C = false; } - // ColorMaterial routing (lighting must be ON, color material enabled and mode=DIFFUSE, - // and there must be a per-vertex supply to make it meaningful) state.colorMaterialAffectsDiffuse = (state.lightingFlag && conditions->colorMaterialEnabled && @@ -160,9 +155,9 @@ static inline void capture( FixedFunctionConditions* conditions, CGLContext& glContext, const CVertArray& vertArray, - bool diffuseTextureIsBound, // see §3 for how to feed this - const float tintRgba[4], // see §3 for how to feed this - bool immediateColorVariesInPrimitive) // pass true in EndGeom() if colors changed mid-primitive + bool diffuseTextureIsBound, + const float tintRgba[4], + bool immediateColorVariesInPrimitive) { memset(conditions, 0, sizeof(*conditions)); @@ -171,7 +166,6 @@ static inline void capture( conditions->colorMaterialEnabled = glContext.IsColorMaterialEnabled(); conditions->colorMaterialMode = glContext.GetColorMaterialMode(); - // Arrays conditions->vertexArrayEnabled = vertArray.GetVerticesAreValid(); conditions->normalArrayEnabled = vertArray.GetNormalsAreValid(); conditions->texcoordArrayEnabled = vertArray.GetTexCoordsAreValid(); @@ -219,19 +213,12 @@ static inline void apply( geometry.SetNormalsAreValid(state.N); geometry.SetTexCoordsAreValid(state.T); geometry.SetColorsAreValid(state.C); - //TODO: still this i am ughhhhhhhhhhhhhhhh - // Hook ColorMaterial (diffuse) mapping in lit path - // This mirrors what your SyncColorMaterial used to do but is now explicit and single-sourced. if (state.colorMaterialAffectsDiffuse) { rendererManager.PerVtxMaterialChanged(RendererProps::kDiffuse); } else { rendererManager.PerVtxMaterialChanged(RendererProps::kNoMaterial); } - // TODO: do something here? to strictly align renderer choice with *effective* lighting, - // you can keep using GLContext as-is (works functionally), or go further: - // - add an "effective lighting" override the renderers consult. - // OTHERWISE DO NOTHING???? WHAT??? } #endif // ps2gl_fixed_function_h diff --git a/src/gmanager.cpp b/src/gmanager.cpp index 55d3cf59..a5052d40 100644 --- a/src/gmanager.cpp +++ b/src/gmanager.cpp @@ -232,27 +232,19 @@ void glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indic const int numVertices = (int)max + 1; if (max <= 255) { - const int numIndexQwords = (int)((count + 15) / 16); - const int bytesNeeded = numIndexQwords * 16; - static uint8_t* indices_u8_scratch = NULL; static int scratchCapacity = 0; - if (scratchCapacity < bytesNeeded) { + if (scratchCapacity < count) { delete[] indices_u8_scratch; - indices_u8_scratch = new uint8_t[bytesNeeded]; - scratchCapacity = bytesNeeded; + indices_u8_scratch = new uint8_t[count]; + scratchCapacity = (int)count; } for (GLsizei i = 0; i < count; ++i) { indices_u8_scratch[i] = (uint8_t)indices_u16[i]; } - if (bytesNeeded > count) { - memset(indices_u8_scratch + count, 0, (size_t)(bytesNeeded - count)); - } - mDebugPrint("glDrawElements: primType=%d count=%d type=GL_UNSIGNED_SHORT ""maxIndex=%u numVertices=%d (u8 path, %d QW / %d bytes)\n", (int)mode, (int)count, (unsigned)max, (int)numVertices, numIndexQwords, bytesNeeded); CGeomManager& gmanager = pGLContext->GetGeomManager(); gmanager.IndexedArraysGeomStage(mode, (int)count, indices_u8_scratch, numVertices); } else { - mDebugPrint("glDrawElements: primType=%d count=%d type=GL_UNSIGNED_SHORT maxIndex=%u numVertices=%d (u16 path)\n", (int)mode, (int)count, (unsigned)max, (int)numVertices); CGeomManager& gmanager = pGLContext->GetGeomManager(); gmanager.IndexedArraysGeomStage(mode, (int)count, (const unsigned char*)indices, numVertices); } diff --git a/src/immgmanager.cpp b/src/immgmanager.cpp index 1a8ad487..3dfc0540 100644 --- a/src/immgmanager.cpp +++ b/src/immgmanager.cpp @@ -203,27 +203,6 @@ void CImmGeomManager::EndGeom() void CImmGeomManager::LinearArraysGeomStage(GLenum mode, int first, int count) { - //DrawingLinearArray(); - mDebugPrint("LinearArraysGeomStage: ENTER mode=%d first=%d count=%d\n", (int)mode, first, count); - void* beforeColors = VertArray->GetColors(); - mDebugPrint("LinearArraysGeomStage: BEFORE colors=%p valid=%d wpc=%d src=%d\n", - beforeColors, - (int)VertArray->GetColorsAreValid(), - VertArray->GetWordsPerColor(), - VertArray->GetColorSrcType()); - if (beforeColors && VertArray->GetColorsAreValid() && VertArray->GetWordsPerColor() >= 4) { - if (VertArray->GetColorSrcType() == kColor_UByte) { - const unsigned char* sampleColor = (const unsigned char*)beforeColors + 4 * first; - mDebugPrint("LinearArraysGeomStage: BEFORE sample[%d] u8=(%u,%u,%u,%u)\n", - first, sampleColor[0], sampleColor[1], sampleColor[2], sampleColor[3]); - } else { - //TODO: fix the casting insanity to make it more clear? - const float* sampleColor = (const float*)beforeColors + 4 * first; - mDebugPrint("LinearArraysGeomStage: BEFORE sample[%d] f32=(%.3f,%.3f,%.3f,%.3f)\n", - first, sampleColor[0], sampleColor[1], sampleColor[2], sampleColor[3]); - } - } - if (Prim != mode) PrimChanged(mode); @@ -248,17 +227,10 @@ void CImmGeomManager::LinearArraysGeomStage(GLenum mode, int first, int count) *CurColorBuf += colorChannels[2] / 255.0f; *CurColorBuf += colorChannels[3] / 255.0f; } - const float* sampleColor = bufStart + 4*first; - mDebugPrint("LinearArraysGeomStage: AFTER-CONVERT colors=%p sample[%d] f32=(%.3f,%.3f,%.3f,%.3f)\n", - (void*)bufStart, first, sampleColor[0], sampleColor[1], sampleColor[2], sampleColor[3]); colorsPtr = bufStart; - } else { - mDebugPrint("LinearArraysGeomStage: no UBYTE->FLOAT conversion (enabled=%d src=%d wpc=%d)\n", - (int)colorArrayEnabled, VertArray->GetColorSrcType(), VertArray->GetWordsPerColor()); } Geometry.SetColors(colorsPtr); - mDebugPrint("LinearArraysGeomStage: Geometry.SetColors(%p)\n", colorsPtr); Geometry.SetVerticesAreValid(VertArray->GetVerticesAreValid()); Geometry.SetNormalsAreValid(VertArray->GetNormalsAreValid()); @@ -297,7 +269,6 @@ void CImmGeomManager::LinearArraysGeomStage(GLenum mode, int first, int count) if (LanePresent(lanes.colors)) Geometry.AddColors(count); Geometry.AdjustNewGeomPtrs(first); - mDebugPrint("LinearArraysGeomStage: EXIT\n"); CommitNewGeom(); } @@ -305,8 +276,6 @@ void CImmGeomManager::IndexedArraysGeomStage(GLenum primType, int numIndices, const unsigned char* indices, int numVertices) { - mDebugPrint("IndexedArraysGeomStage: ENTER primType=%d numIdx=%d numVtx=%d\n", (int)primType, (int)numIndices, (int)numVertices); - if (Prim != primType) PrimChanged(primType); Geometry.SetPrimType(primType); @@ -328,13 +297,7 @@ void CImmGeomManager::IndexedArraysGeomStage(GLenum primType, *CurColorBuf += (float)colorChannels[2] / 255.0f; *CurColorBuf += (float)colorChannels[3] / 255.0f; } - if (numVertices > 0) { - const float* sampleColor = bufStart; - mDebugPrint("IndexedArraysGeomStage: AFTER-CONVERT colors=%p sample[0] f32=(%.3f,%.3f,%.3f,%.3f)\n", (void*)bufStart, sampleColor[0], sampleColor[1], sampleColor[2], sampleColor[3]); - } colorsPtr = bufStart; - } else { - mDebugPrint("IndexedArraysGeomStage: no UBYTE->FLOAT conversion (enabled=%d src=%d wpc=%d)\n",(int)colorArrayEnabled, VertArray->GetColorSrcType(), VertArray->GetWordsPerColor()); } Geometry.SetColors(colorsPtr); @@ -379,7 +342,6 @@ void CImmGeomManager::IndexedArraysGeomStage(GLenum primType, Geometry.SetIndices(indices); Geometry.SetIStripLengths(NULL); - mDebugPrint("IndexedArraysGeomStage: EXIT\n"); CommitNewGeom(); } diff --git a/src/indexed_renderer.cpp b/src/indexed_renderer.cpp index 59b4c9a5..bf1045d4 100644 --- a/src/indexed_renderer.cpp +++ b/src/indexed_renderer.cpp @@ -175,20 +175,21 @@ void CIndexedRenderer::DrawIndexedArrays(CGeometryBlock& block) packet += 0; } packet.CloseUnpack(); - - // constant color of each vertex - - packet.Strow(&ConstantVertColor); - packet.Stcycl(numVertices, 0); - Vifs::tMask mask = { 1, 1, 1, 3, - 1, 1, 1, 3, - 1, 1, 1, 3, - 1, 1, 1, 3 }; - packet.Stmask(mask); - packet.OpenUnpack(Vifs::UnpackModes::v4_32, kTempAreaStart, - Packet::kDoubleBuff, Packet::kMasked); - packet.CloseUnpack(numVertices); - + if (!colors) + { + // constant color of each vertex + + packet.Strow(&ConstantVertColor); + packet.Stcycl(numVertices, 0); + Vifs::tMask mask = { 1, 1, 1, 3, + 1, 1, 1, 3, + 1, 1, 1, 3, + 1, 1, 1, 3 }; + packet.Stmask(mask); + packet.OpenUnpack(Vifs::UnpackModes::v4_32, kTempAreaStart, + Packet::kDoubleBuff, Packet::kMasked); + packet.CloseUnpack(numVertices); + } // start renderer packet.Mscnt(); diff --git a/src/renderermanager.cpp b/src/renderermanager.cpp index 1b5f5916..94d2da15 100644 --- a/src/renderermanager.cpp +++ b/src/renderermanager.cpp @@ -94,7 +94,7 @@ CRendererManager::CRendererManager(CGLContext& context) .Lighting = 0, .NumDirLights = k3DirLights, .NumPtLights = 0, - .Texture = 1, //TODO: huh??? + .Texture = 1, .Specular = 0, .PerVtxMaterial = kNoMaterial, .Clipping = kNonClipped | kClipped, @@ -104,8 +104,8 @@ CRendererManager::CRendererManager(CGLContext& context) }; RegisterDefaultRenderer( new CIndexedRenderer( - mVsmAddr(Indexed), - mVsmSize(Indexed), + mVsmAddr(IndexedConstColor), + mVsmSize(IndexedConstColor), capabilities, no_reqs, 3, @@ -121,7 +121,7 @@ CRendererManager::CRendererManager(CGLContext& context) .NumPtLights = 0, .Texture = 1, .Specular = 0, - .PerVtxMaterial = kDiffuse, // <-- PVC path stilll + .PerVtxMaterial = kDiffuse, //TODO: this is just to allow for only certain targets to get pvc (its a hack to get behavior, clean up next .Clipping = kNonClipped | kClipped, .CullFace = 0, .TwoSidedLighting = 0, @@ -134,7 +134,7 @@ CRendererManager::CRendererManager(CGLContext& context) capabilities, no_reqs, 4, - 3, // output quads per vert + 3, "indexed, pvc, tri") ); } diff --git a/vu1/indexed_constant_color.vcl b/vu1/indexed_constant_color.vcl new file mode 100644 index 00000000..007bccc9 --- /dev/null +++ b/vu1/indexed_constant_color.vcl @@ -0,0 +1,116 @@ + #include "vu1_mem_indexed.h" + + .include "db_in_sb_out.i" + .include "math.i" + .include "lighting.i" + .include "clip_cull.i" + .include "geometry.i" + .include "io.i" + .include "general.i" + +kInputQPerV .equ 3 +kOutputQPerV .equ 3 + + .init_vf_all + .init_vi_all + + .name vsmIndexedConstColor + + --enter + --endenter + + load_vert_xfrm vert_xform + + --cont + +main_loop_lid: + init_constants + init_clip_cnst + + xtop buffer_top + iaddiu next_output, vi00, kOutputGeomStart + iaddiu input_start, buffer_top, kInputGeomStart + iaddiu color_start, buffer_top, kTempAreaStart ; CPU prefilled RGB per vertex + + iaddiu next_index, buffer_top, kInputGeomStart + iaddiu first_index_mask, vi00, 0xff + loi 253.0 + maxi.w index_constants, vf00, i + loi 3.0 + maxi.z index_constants, vf00, i + loi 255.0 + maxi.y index_constants, vf00, i + + ilw.y num_indices_d2, kNumIndicesD2(buffer_top) + iadd last_index, next_index, num_indices_d2 + ilw.z num_indices, kNumIndices(buffer_top) + + lq gif_tag, kGifTag(vi00) + mtir eop, gif_tagx + ior eop, eop, num_indices + mfir.x gif_tag, eop + mfir.w gif_tag, next_output + sq gif_tag, kOutputBufStart(vi00) + + loi 255.0 ; TODO: this is a flag or like control logicked during VIF packing actually?? + load_mat_diff vert_color, w + muli.w vert_color, vert_color, i + minii.w vert_color, vert_color, i + ftoi0.w vert_color, vert_color + + + + iaddiu zero_giftag, vi00, kGifTag + xgkick zero_giftag + +xform_loop_lid: --LoopCS 1,3 + ilw.w first_index, 0(next_index) + iand first_index, first_index, first_index_mask + iadd first_offset, first_index, first_index + iadd first_offset, first_offset, first_index + + lqi.w indices, (next_index++) + addy.w second_ind, indices, index_constants[y] + mtir second_index, second_ind[w] + mulz.w second_off, indices, index_constants[z] + add.w second_off, second_off, index_constants[w] + mtir second_offset, second_off[w] + + .macro do_vert + load_vert vert + xform_vert xformed_vert, vert_xform, vert + vert_to_gs gs_vert, xformed_vert + clip_vert xformed_vert + fcand vi01, 0x003ffff + iand vi01, vi01, do_clipping + set_adc_fs gs_vert, vi00 + store_xyzf gs_vert + + lq.xyz vert_color, (next_color) + muli.xyz vert_color, vert_color, i + minii.xyz vert_color, vert_color, i + ftoi0.xyz vert_color, vert_color + store_rgba vert_color + + + load_stq tex_stq + xform_tex_stq tex_stq, tex_stq, q + store_stq tex_stq + .endm + + iadd next_input, first_offset, input_start + iadd next_color, first_index, color_start + do_vert + + iadd next_input, second_offset, input_start + iadd next_color, second_index, color_start + iaddiu next_output, next_output, kOutputQPerV + do_vert + + next_o + ibne next_index, last_index, xform_loop_lid + kick_to_gs + +--cont + b main_loop_lid +.END diff --git a/vu1/indexed_no_lights_pvc.vcl b/vu1/indexed_no_lights_pvc.vcl index fd3909cf..a0f7b202 100644 --- a/vu1/indexed_no_lights_pvc.vcl +++ b/vu1/indexed_no_lights_pvc.vcl @@ -1,152 +1,106 @@ -/* Indexed, per-vertex color, no lights */ + #include "vu1_mem_indexed.h" - #include "vu1_mem_indexed.h" + .include "db_in_sb_out.i" + .include "math.i" + .include "lighting.i" + .include "clip_cull.i" + .include "geometry.i" + .include "io.i" + .include "general.i" - .include "db_in_sb_out.i" - .include "math.i" - .include "lighting.i" - .include "clip_cull.i" - .include "geometry.i" - .include "io.i" - .include "general.i" +kInputQPerV .equ 4 +kOutputQPerV .equ 3 -kInputQPerV .equ 4 -kOutputQPerV .equ 3 + .init_vf_all + .init_vi_all - .init_vf_all - .init_vi_all + .name vsmIndexedPVC - .name vsmIndexedPVC + --enter + --endenter - --enter - --endenter + load_vert_xfrm vert_xform - ; ------------------------ init --------------------------------- - - load_vert_xfrm vert_xform - init_constants - init_clip_cnst + --cont main_loop_lid: + init_constants + init_clip_cnst - ; -------------------- set up decompression ---------------------- - - xtop buffer_top - iaddiu next_output, vi00, kOutputGeomStart - iaddiu input_start, buffer_top, kInputGeomStart - - ; num indices / vertex count - ilw.y num_indices_d2, kNumIndicesD2(buffer_top) - ilw.z num_indices, kNumIndices(buffer_top) - ilw.x num_vertices, kNumVertices(buffer_top) + xtop buffer_top + iaddiu next_output, vi00, kOutputGeomStart + iaddiu input_start, buffer_top, kInputGeomStart - ; VI mask for low 8 bits of first index (needed by iand) - iaddiu first_index_mask, vi00, 0xff - - ; stride constants for index unpack (kInputQPerV == 4 here) + iaddiu next_index, buffer_top, kInputGeomStart + iaddiu first_index_mask, vi00, 0xff loi 253.0 - maxi.w index_constants, vf00, i ; keep w=253.0 (used in the 2nd-index trick) + maxi.w index_constants, vf00, i loi 4.0 - maxi.z index_constants, vf00, i ; z=4 → offsets are 4 * index + maxi.z index_constants, vf00, i loi 255.0 - maxi.y index_constants, vf00, i ; y=255 (8-bit mask for second index path) - - ; decompression pointers - iaddiu next_index, vi00, kInputGeomStart - iadd next_index, next_index, buffer_top - iadd last_index, next_index, num_indices_d2 - - ; giftag - lq gif_tag, kGifTag(vi00) - mtir eop, gif_tagx - ior eop, eop, num_indices - mfir.x gif_tag, eop - mfir.w gif_tag, next_output - sq gif_tag, kOutputBufStart(vi00) - - ; -------- figure out where the color lane lives -------- - ; Layout for this kernel = [ V(0) | N(1) | STQ(2) | PVC(3) ] - ; PVC is lane 3 → base is +3 * num_vertices - - iadd color_start, input_start, num_vertices ; +1 - iadd color_start, color_start, num_vertices ; +2 - iadd color_start, color_start, num_vertices ; +3 - - - ; alpha policy: use material diffuse alpha (matches constant-color path) - loi 128.0 - load_mat_diff vert_color, w - muli.w vert_color, vert_color, i - loi 255.0 - minii.w vert_color, vert_color, i - ftoi0.w vert_color, vert_color - maxi.w max_color_val, vf00, i - - ; wait for other buffers - iaddiu zero_giftag, vi00, kGifTag - xgkick zero_giftag - -xform_loop_lid: --LoopCS 1,3 - - ; first index - ilw.w first_index, 0(next_index) - iand first_index, first_index, first_index_mask + maxi.y index_constants, vf00, i + + ilw.y num_indices_d2, kNumIndicesD2(buffer_top) + iadd last_index, next_index, num_indices_d2 + ilw.z num_indices, kNumIndices(buffer_top) + + lq gif_tag, kGifTag(vi00) + mtir eop, gif_tagx + ior eop, eop, num_indices + mfir.x gif_tag, eop + mfir.w gif_tag, next_output + sq gif_tag, kOutputBufStart(vi00) - ; first_offset = first_index * 4 (was *3) - iadd first_offset, first_index, first_index ; *2 - iadd first_offset, first_offset, first_offset ; *4 - - - ; second index in packed word - lqi.w indices, (next_index++) - addy.w second_ind, indices, index_constants[y] - mtir second_index, second_ind[w] - mulz.w second_off, indices, index_constants[z] - add.w second_off, second_off, index_constants[w] - mtir second_offset, second_off[w] - - .macro do_vert - ; vertex - iadd next_input, first_offset, input_start - load_vert vert - xform_vert xformed_vert, vert_xform, vert - vert_to_gs gs_vert, xformed_vert - clip_vert xformed_vert - fcand vi01, 0x003ffff - iand vi01, vi01, do_clipping - set_adc_fs gs_vert, vi00 - store_xyzf gs_vert - - ; color (PVC 0..1 → 0..255, alpha from mat diffuse) - iadd next_color, first_index, color_start - lq vert_color, (next_color) - loi 255.0 - muli.xyz vert_color, vert_color, i - miniw.xyz vert_color, vert_color, max_color_val[w] - ftoi0.xyz vert_color, vert_color - store_rgba vert_color - - ; texcoords (if enabled, STQ lane is present) - load_stq tex_stq - xform_tex_stq tex_stq, tex_stq, q - store_stq tex_stq - .endm - - ; first vertex - do_vert - - ; second vertex (just swap to second_* values) - iadd first_offset, second_offset, vi00 - iadd first_index, second_index, vi00 - iaddiu next_output, next_output, kOutputQPerV - do_vert - - ; end loop - next_o - ibne next_index, last_index, xform_loop_lid - - kick_to_gs - --cont - b main_loop_lid + iaddiu zero_giftag, vi00, kGifTag + xgkick zero_giftag +xform_loop_lid: --LoopCS 1,3 + ilw.w first_index, 0(next_index) + iand first_index, first_index, first_index_mask + iadd first_offset, first_index, first_index + iadd first_offset, first_offset, first_offset + + lqi.w indices, (next_index++) + addy.w second_ind, indices, index_constants[y] + mtir second_index, second_ind[w] + mulz.w second_off, indices, index_constants[z] + add.w second_off, second_off, index_constants[w] + mtir second_offset, second_off[w] + + .macro do_vert + load_vert vert + xform_vert xformed_vert, vert_xform, vert + vert_to_gs gs_vert, xformed_vert + clip_vert xformed_vert + fcand vi01, 0x003ffff + iand vi01, vi01, do_clipping + set_adc_fs gs_vert, vi00 + store_xyzf gs_vert + + iaddiu color_qw, next_input, kColorQwOff + lq vert_color, 0(color_qw) + loi 255.0 + muli vert_color, vert_color, i + max vert_color, vert_color, vf00 + ftoi0 vert_color, vert_color + store_rgba vert_color + + load_stq tex_stq + xform_tex_stq tex_stq, tex_stq, q + store_stq tex_stq + .endm + + iadd next_input, first_offset, input_start + do_vert + + iadd next_input, second_offset, input_start + iaddiu next_output, next_output, kOutputQPerV + do_vert + + next_o + ibne next_index, last_index, xform_loop_lid + kick_to_gs + +--cont + b main_loop_lid .END diff --git a/vu1/vu1_mem_indexed.h b/vu1/vu1_mem_indexed.h index a4a3165d..c214af0e 100644 --- a/vu1/vu1_mem_indexed.h +++ b/vu1/vu1_mem_indexed.h @@ -57,3 +57,4 @@ #define kInputStart kInputGeomStart // temp #define kTempAreaStart (kInputGeomStart + kInputBufSize) +#define kColorQwOff 3 diff --git a/vu1/vu1renderers.h b/vu1/vu1renderers.h index b2f9dcfa..d9a42a0c 100644 --- a/vu1/vu1renderers.h +++ b/vu1/vu1renderers.h @@ -33,6 +33,7 @@ VU_FUNCTIONS(FastNoLights); VU_FUNCTIONS(FastNoLightsPVCTri); VU_FUNCTIONS(Indexed); +VU_FUNCTIONS(IndexedConstColor); VU_FUNCTIONS(IndexedPVC); } From b716d3f67f6f1b188bbc302374b7484da5cd2381 Mon Sep 17 00:00:00 2001 From: iann Date: Sat, 11 Oct 2025 08:52:23 +0900 Subject: [PATCH 11/13] make file clean --- Makefile | 20 +- vu1/indexed_no_lights_pvc.vcl | 2 +- vu1/indexed_vcl.vsm | 744 ---------------------------------- 3 files changed, 8 insertions(+), 758 deletions(-) delete mode 100644 vu1/indexed_vcl.vsm diff --git a/Makefile b/Makefile index fd0e8cf1..e0d2fb7f 100644 --- a/Makefile +++ b/Makefile @@ -97,18 +97,12 @@ one: # GCC / CPP flags (-E, -P, -imacros): https://gcc.gnu.org/onlinedocs/cpp/Invocation.html#Invocation # -E = preprocess only, -P = strip #line, -imacros includes macros without writing #include -vu1/indexed%_pp4.vcl: vu1/indexed%_pp3.vcl - cat $< | cc -E -P -imacros vu1/vu1_mem_indexed.h -o $@ - - -#TODO: this is too allow for the old "single indexed.vcl naming, figure out a way thats better to integrate all namings of indexed. -# also shouldnt even match this on naming, it should be controlled better when adding new indexed renderers. -vu1/indexed_pp4.vcl: vu1/indexed_pp3.vcl - cat $< | cc -E -P -imacros vu1/vu1_mem_indexed.h -o $@ - - -# GCC / CPP flags (-E, -P, -imacros): https://gcc.gnu.org/onlinedocs/cpp/Invocation.html#Invocation -# -E = preprocess only, -P = strip #line, -imacros includes macros without writing #include -%_pp4.vcl: %_pp3.vcl - cat $< | cc -E -P -imacros vu1/vu1_mem_linear.h -o $@ - +vu1/%_pp4.vcl: vu1/%_pp3.vcl + @hdr=vu1/vu1_mem_linear.h; \ + case "$*" in \ + indexed|indexed_*) hdr=vu1/vu1_mem_indexed.h ;; \ + esac; \ + cat $< | cc -E -P -imacros $$hdr -o $@ - #TODO: remove this step? This could be covered simply from writing correct vcl code... unless intending to allow new and old syntax? # you can standardize syntax by using ".syntax old" or ".syntax new" or by passing `-n` to VCL for "new" and writing sources @@ -125,7 +119,7 @@ vu1/indexed_pp4.vcl: vu1/indexed_pp3.vcl # this is in order to normalize sources for GASP by removing C preprocessor stuff (#include/#define), # and then fix local .include paths so GASP can resolve them relative to the source dir. # if the .vcl file ALREADY avoids #include/#define and only use .include/.macro etc -# you can wire %.vcl -> %_pp2.vcl directly and drop this rule +# and wire %.vcl -> %_pp2.vcl directly and drop this rule?? %_pp1.vcl: %.vcl cat $< | sed 's/#include[ ]\+.\+// ; s/#define[ ]\+.\+// ; s|\(\.include[ ]\+\)"\([^/].\+\)"|\1"$( $@ diff --git a/vu1/indexed_no_lights_pvc.vcl b/vu1/indexed_no_lights_pvc.vcl index a0f7b202..cd79be85 100644 --- a/vu1/indexed_no_lights_pvc.vcl +++ b/vu1/indexed_no_lights_pvc.vcl @@ -77,7 +77,7 @@ xform_loop_lid: --LoopCS 1,3 set_adc_fs gs_vert, vi00 store_xyzf gs_vert - iaddiu color_qw, next_input, kColorQwOff + iaddiu color_qw, next_input, kColorQwOff ;TODO: this probably as a macro, but just emphatic for now... lq vert_color, 0(color_qw) loi 255.0 muli vert_color, vert_color, i diff --git a/vu1/indexed_vcl.vsm b/vu1/indexed_vcl.vsm deleted file mode 100644 index c61d2ec8..00000000 --- a/vu1/indexed_vcl.vsm +++ /dev/null @@ -1,744 +0,0 @@ -; === __LP__ EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; === dumb : optimal=18 clid=10 mlid=10 size=(18) -; === ldumb : optimal=18 clid=0 mlid=3 size=(18) -; === normal1 : optimal=18 clid=0 mlid=3 size=(18) -; === hDown : optimal=18 clid=0 mlid=3 size=(18) -; === vuta : optimal=18 clid=0 mlid=3 size=(18) -; === normal : optimal=18 clid=0 mlid=4 size=(18) -; === another : optimal=18 clid=0 mlid=3 size=(18) -; === vuta1 : optimal=18 clid=0 mlid=5 size=(18) -; === normal2 : optimal=18 clid=0 mlid=5 size=(18) -; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) -; === __LP__ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; === normal1 : optimal=34 clid=0 mlid=4 size=(39) -; === hDown : optimal=34 clid=0 mlid=4 size=(39) -; === vuta : optimal=34 clid=0 mlid=3 size=(39) -; === another : optimal=34 clid=0 mlid=4 size=(39) -; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) -; === __LP__ EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; === ldumb : optimal=38 clid=0 mlid=2 size=(38) -; === vuta : optimal=38 clid=0 mlid=2 size=(38) -; ================================================= -; flowMon::Emit() vcl 1.4beta7 produced this code: - .vu - .align 4 - .global vsmIndexed_CodeStart - .global vsmIndexed_CodeEnd -vsmIndexed_CodeStart: -__v_vu1_indexed_pp4_vcl_4: -; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_indexed_pp4_vcl_4] - NOP lq VF01,62(VI00) - NOP lq VF02,63(VI00) - NOP lq VF03,64(VI00) - NOP[E] lq VF04,65(VI00) - NOP NOP -main_loop_lid: -; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [main_loop_lid] - NOP ilw.x VI01,0(VI00) - NOP iaddiu VI02,VI00,0x00000001 - NOP lq.xyz VF05,59(VI00) - NOP lq.xyz VF06,60(VI00) - NOP lq.xyz VF07,61(VI00) - NOP lq.xyz VF08,66(VI00) - NOP ibeq VI01,VI00,pt_lights_lid - NOP ilw.x VI03,0(VI02) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_7] - maxw.z VF09,VF00,VF00w NOP -dir_light_loop_lid: -; _LNOPT_w=[ another ] 26 [64 0] 67 [dir_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF12,67(VI00) - NOP lq.xyz VF13,3(VI03) - NOP lq.xyz VF14,68(VI00) - NOP lq.xyz VF10,69(VI00) - mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 - madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) - maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 - add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 - NOP esadd P,VF14 ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?9 - NOP mfp.w VF05,P - NOP ersqrt P,VF05w ; STALL_LATENCY ?3 - NOP waitp ; STALL_LATENCY ?16 - NOP mfp.w VF05,P - mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 - mul.xyz VF15,VF14,VF18 iadd VI07,VI04,VI05 ; STALL_LATENCY ?3 - NOP iadd VI07,VI07,VI05 - NOP iadd VI05,VI07,VI05 - mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) - NOP mr32.xyw VF09,VF15 - NOP iaddiu VI04,VI04,0x00000003 - mul.xyz VF18,VF13,VF18 lq.xyz VF10,0(VI03) - NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2 - addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000ac -; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__PRO1] - maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.xyz VF18,VF14,VF17 NOP - mul.w VF05,VF05,VF05 NOP - NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF15z NOP - addax.w ACC,VF09,VF09x ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1 - mul.w VF05,VF05,VF05 NOP -; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__PRO2] - maddy.w VF07,VF00,VF09y NOP - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF18,VF14,VF17 NOP - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 - mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0 - addax.w ACC,VF09,VF09x NOP -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] - maddy.w VF07,VF00,VF09y NOP - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) - maxx.z VF16,VF18,VF00x lq.xyz VF18,0(VI06) - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mul.xyz VF19,VF14,VF17 NOP - add.xyz VF18,VF18,VF15 NOP - maddaw.xyz ACC,VF12,VF06w NOP - mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 - madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 - mul.xyz VF18,VF13,VF17 sqi.xyz VF18,(VI06++) - mulz.xyz VF17,VF11,VF16z NOP - mul.w VF05,VF05,VF05 ibne VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP - addax.w ACC,VF09,VF09x NOP -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF06,VF06,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - maxx.z VF17,VF18,VF00x lq.xyz VF18,0(VI06) - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?1 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,0(VI06) - mul.w VF06,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF17z NOP - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 - madd.xyz VF15,VF10,VF05 lq.xyz VF18,0(VI06) - add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1] - maddy.w VF07,VF00,VF09y NOP - NOP NOP - NOP NOP - mul.w VF06,VF05,VF05 NOP - maxx.w VF05,VF07,VF00x NOP - adday.z ACC,VF18,VF18y NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 - maxx.z VF18,VF18,VF00x NOP - mul.w VF06,VF06,VF06 NOP - mula.xyz ACC,VF17,VF06 NOP - mul.w VF05,VF05,VF05 NOP - mulz.xyz VF17,VF11,VF18z NOP - maddaw.xyz ACC,VF12,VF06w NOP - madd.xyz VF13,VF10,VF05 lq.xyz VF18,0(VI06) - mul.w VF06,VF05,VF05 NOP - mula.xyz ACC,VF17,VF06 NOP - add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP - maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?3 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,0(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2] - maddy.w VF07,VF00,VF09y NOP - maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 - adday.z ACC,VF18,VF18y NOP - maddx.z VF18,VF09,VF18x NOP - mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 - maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 - mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 - mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 - maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 - madd.xyz VF10,VF10,VF05 lq.xyz VF18,0(VI06) - add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 - NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?3 -EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI01,VI01,0x00000001 - NOP iaddiu VI02,VI02,0x00000001 - NOP ibne VI01,VI00,dir_light_loop_lid - NOP ilw.x VI03,0(VI02) -pt_lights_lid: -; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] - NOP ilw.y VI02,0(VI00) - NOP iaddiu VI03,VI00,0x00000001 - NOP ibeq VI02,VI00,done_lighting_lid ; STALL_LATENCY ?2 - NOP ilw.y VI04,0(VI03) -; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_13] - maxw.z VF09,VF00,VF00w NOP -pt_light_loop_lid: -; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] - NOP NOP - NOP NOP - NOP lq.xyz VF10,71(VI00) - NOP lq.xyz VF11,3(VI04) - NOP lq.xyz VF14,72(VI00) - mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 - madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) - NOP xtop VI07 - maddaz.xyz ACC,VF13,VF11z iaddiu VI05,VI07,0x00000005 ; STALL_LATENCY ?1 - maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI05) - sub.xyz VF17,VF14,VF17 ilw.x VI06,0(VI07) ; STALL_LATENCY ?3 - mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI04) ; STALL_LATENCY ?3 - adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI04) ; STALL_LATENCY ?3 - maddx.z VF16,VF09,VF16x iadd VI08,VI05,VI06 - NOP iadd VI08,VI08,VI06 - NOP iadd VI06,VI08,VI06 - NOP sqrt Q,VF16z ; STALL_LATENCY ?1 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addq.y VF16,VF00,Q lq.xyz VF10,0(VI04) - NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI04) - mulq.xyz VF17,VF17,Q iaddiu VI05,VI05,0x00000003 - NOP NOP - mul.xyz VF12,VF12,VF07 ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2 - mul.xyz VF16,VF16,VF13 iaddiu VI07,VI07,0x000000ac -; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO1] - add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI05) - sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 - mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI05) ; STALL_LATENCY ?3 - adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 - maddx.z VF21,VF09,VF21x NOP - NOP waitp - addw.x VF21,VF00,VF00w mfp.w VF06,P - NOP sqrt Q,VF21z ; STALL_LATENCY ?1 - NOP ersqrt P,VF06w ; STALL_LATENCY ?1 - addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 - NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 NOP - mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 - NOP move.xyz VF19,VF18 - NOP ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO2] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI05) - NOP NOP - NOP NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 NOP - madday.w ACC,VF00,VF21y esadd P,VF18 - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z waitp - maxx.w VF05,VF06,VF00x mfp.w VF06,P - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - addq.y VF21,VF00,Q waitq - mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 - addw.x VF21,VF00,VF00w NOP - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) - maddz.w VF05,VF00,VF15z NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 - mul.xyz VF21,VF17,VF20 NOP - mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0 - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] - add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI05) - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF21y NOP - mulw.xyz VF22,VF19,VF06w NOP - sub.xyz VF19,VF14,VF23 esadd P,VF18 - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF22,VF20 NOP - mul.xyz VF21,VF19,VF19 NOP - madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF20x NOP - adday.z ACC,VF21,VF21y NOP - maddx.z VF21,VF09,VF21x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z mfp.w VF06,P - mulq.xyz VF23,VF22,Q lq.xyz VF22,0(VI07) - mulax.w ACC,VF00,VF15x sqrt Q,VF21z - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w - add.xyz VF22,VF22,VF23 NOP - addw.x VF21,VF00,VF00w NOP - mul.w VF06,VF07,VF07 NOP - addq.y VF21,VF00,Q NOP - mula.xyz ACC,VF20,VF06 sqi.xyz VF22,(VI07++) - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI05) - mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y - NOP NOP - NOP NOP - mul.w VF06,VF06,VF06 NOP - mul.xyz VF21,VF17,VF20 NOP - NOP NOP - NOP NOP - mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 - mul.w VF07,VF06,VF06 move.xyz VF19,VF18 - NOP ibne VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP - mulax.w ACC,VF00,VF21x mfp.w VF06,P -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: -; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - mul.w VF07,VF07,VF07 NOP - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - mul.xyz VF20,VF19,VF20 NOP - maddaw.xyz ACC,VF12,VF07w NOP - madd.xyz VF21,VF10,VF05 NOP - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulq.xyz VF19,VF21,Q lq.xyz VF21,0(VI07) - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) - add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 - mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 - mul.xyz VF21,VF17,VF20 sqi.xyz VF21,(VI07++) ; STALL_LATENCY ?1 - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 NOP - mula.xyz ACC,VF20,VF06 NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulq.xyz VF17,VF13,Q waitq - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP - maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI07) - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 - madd.xyz VF10,VF10,VF05 NOP - NOP sqi.xyz VF11,(VI07++) - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?2 - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?2 -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: -; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1] - add.xyz VF13,VF08,VF17 NOP - mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 - mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF21y NOP - maddz.w VF06,VF00,VF21z NOP - maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF20x mfp.w VF06,P - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - mulw.xyz VF20,VF11,VF05w NOP - mulax.w ACC,VF00,VF15x ersqrt P,VF06w - madday.w ACC,VF00,VF15y NOP - maxx.w VF07,VF07,VF00x NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) - NOP waitp ; STALL_LATENCY ?1 - mul.w VF07,VF06,VF06 mfp.w VF06,P - mul.xyz VF21,VF17,VF20 NOP - mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 - maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 - mul.w VF07,VF07,VF07 NOP - mulax.w ACC,VF00,VF21x NOP - mul.xyz VF20,VF13,VF20 NOP - madday.w ACC,VF00,VF21y NOP - maddaw.xyz ACC,VF12,VF07w NOP - maddz.w VF06,VF00,VF21z NOP - mulax.w ACC,VF00,VF20x NOP - madday.w ACC,VF00,VF20y NOP - maddz.w VF07,VF00,VF20z NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 - maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mulw.xyz VF20,VF11,VF05w NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 - madd.xyz VF13,VF10,VF05 NOP - mula.xyz ACC,VF20,VF06 NOP - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 - mulq.xyz VF17,VF13,Q waitq - mulax.w ACC,VF00,VF15x NOP - madday.w ACC,VF00,VF15y NOP - mul.w VF07,VF06,VF06 NOP - maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI07) - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 - madd.xyz VF10,VF10,VF05 NOP - NOP sqi.xyz VF11,(VI07++) - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?2 - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT - NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?2 -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2: -; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2] - add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI05) - mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 - madday.w ACC,VF00,VF17y NOP - NOP waitp ; STALL_LATENCY ?4 - NOP mfp.w VF06,P - NOP ersqrt P,VF06w ; STALL_LATENCY ?3 - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP NOP - NOP move.xyz VF13,VF13 - NOP mfp.w VF06,P - mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 - maddz.w VF06,VF00,VF17z NOP - mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 - maxx.w VF05,VF06,VF00x NOP - mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 - madday.w ACC,VF00,VF17y NOP - maddz.w VF07,VF00,VF17z NOP - mulw.xyz VF11,VF11,VF05w NOP - maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 - mula.xyz ACC,VF11,VF06 NOP - mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 - mulax.w ACC,VF00,VF16x NOP - madday.w ACC,VF00,VF16y NOP - mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 - maddz.w VF05,VF00,VF16z NOP - mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 - NOP div Q,VF00w,VF05w - maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 - madd.xyz VF10,VF10,VF05 NOP - mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?3 - add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 - NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?3 -EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] - NOP isubiu VI02,VI02,0x00000001 - NOP iaddiu VI03,VI03,0x00000001 - NOP ibne VI02,VI00,pt_light_loop_lid - NOP ilw.y VI04,0(VI03) -done_lighting_lid: -; _LNOPT_w=[ normal2 ] 12 [12 0] 12 [done_lighting_lid] - NOP loi 0x44fff000 - NOP lq.w VF06,57(VI00) - addi.xy VF05,VF00,I loi 0x45000000 - NOP xtop VI04 - NOP fcset 0 - NOP mr32.z VF05,VF06 - NOP lq.xyz VF06,76(VI00) - NOP ilw.w VI03,76(VI00) - NOP iaddiu VI05,VI00,0x0000004e - NOP iaddiu VI06,VI04,0x00000005 - NOP ibne VI01,VI00,colors_ready_lid - maxi.w VF07,VF00,I iaddiu VI07,VI04,0x000000ac -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_19] - NOP ibne VI02,VI00,colors_ready_lid - NOP NOP -; _LNOPT_w=[ normal2 ] 6 [9 0] 10 [__v_vu1_indexed_pp4_vcl_20] - NOP lq.xyz VF08,57(VI00) - NOP lq.xyz VF09,59(VI00) - mul.xyz VF08,VF08,VF09 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?3 - NOP ilw.x VI01,0(VI04) - NOP iaddiu VI02,VI04,0x000000ac - add.xyz VF07,VF07,VF08 iaddiu VI08,VI00,0 ; STALL_LATENCY ?1 -seed_const_colors_lid: -; _LNOPT_w=[ normal2 ] 4 [4 3] 4 [seed_const_colors_lid] - NOP iaddiu VI08,VI08,0x00000001 - NOP NOP - NOP ibne VI08,VI01,seed_const_colors_lid - NOP sqi.xyz VF07,(VI02++) -colors_ready_lid: -; _LNOPT_w=[ vuta1 ] 47 [40 0] 53 [colors_ready_lid] - NOP loi 0x40400000 - maxi.z VF07,VF00,I loi 0x437f0000 - NOP lq.w VF05,60(VI00) - NOP lq.xyz VF09,75(VI00) - NOP ilw.z VI02,0(VI04) - maxi.y VF08,VF00,I loi 0x43000000 - NOP ilw.y VI09,0(VI04) - NOP mtir VI08,VF09x - NOP ior VI02,VI08,VI02 - muli.w VF05,VF05,I loi 0x437f0000 - NOP mfir.x VF09,VI02 - NOP mfir.w VF09,VI05 - NOP iaddiu VI02,VI04,0x00000005 - NOP iadd VI09,VI02,VI09 - NOP iaddiu VI04,VI00,0x0000004b - NOP sq VF09,77(VI00) - NOP xgkick VI04 - NOP ilw.w VI04,0(VI02) - minii.w VF08,VF05,I lqi.w VF14,(VI02++) - maxi.w VF06,VF00,I loi 0x437d0000 - NOP iaddiu VI08,VI00,0x000000ff - NOP iand VI04,VI04,VI08 - addy.w VF12,VF14,VF08y iadd VI01,VI04,VI04 - maxi.w VF05,VF00,I iadd VI01,VI01,VI04 - mulz.w VF11,VF14,VF07z iadd VI10,VI01,VI06 - NOP lq.xyz VF09,0(VI10) - add.w VF11,VF11,VF05 NOP ; STALL_LATENCY ?2 - mulax ACC,VF01,VF09x NOP - madday ACC,VF02,VF09y NOP - maddaz ACC,VF03,VF09z iadd VI04,VI04,VI07 - maddw VF13,VF04,VF00w mtir VI11,VF11w - NOP lq.xyz VF09,0(VI04) - NOP iadd VI04,VI11,VI06 - NOP lq.xyz VF12,0(VI04) - NOP div Q,VF00w,VF13w - miniw.xyz VF09,VF09,VF06w NOP - mulax ACC,VF01,VF12x NOP ; STALL_LATENCY ?1 - madday ACC,VF02,VF12y NOP - maddaz ACC,VF03,VF12z NOP - mulq.xyz VF11,VF13,Q waitq ; STALL_LATENCY ?1 - maddw VF16,VF04,VF00w mtir VI01,VF12w - add.xyz VF13,VF11,VF05 lq.xyz VF14,2(VI10) ; STALL_LATENCY ?2 - mul.xyz VF11,VF11,VF06 iadd VI01,VI01,VI07 - NOP lq.xyz VF12,0(VI01) - ftoi0.w VF09,VF08 div Q,VF00w,VF16w - ftoi0.xyz VF09,VF09 ibeq VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1 - clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI04) -; _LNOPT_w=[ ] 38 [37 0] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__PRO1] - mulq.xyz VF15,VF14,Q ilw.w VI11,0(VI02) - NOP NOP - NOP lqi.w VF14,(VI02++) - NOP sq VF09,1(VI05) - mulq.xyz VF09,VF16,Q fcand VI01,262143 - NOP iand VI11,VI11,VI08 - NOP iadd VI10,VI11,VI11 - mulz.w VF11,VF14,VF07z iadd VI10,VI10,VI11 - mul.xyz VF10,VF09,VF06 iadd VI12,VI10,VI06 - add.xyz VF14,VF09,VF05 iadd VI11,VI11,VI07 - addy.w VF12,VF14,VF08y lq.xyz VF09,0(VI12) - ftoi4.xyz VF16,VF13 iand VI04,VI01,VI03 - add.w VF11,VF11,VF05 ior VI04,VI04,VI00 - clipw.xyz VF10xyz,VF07w iaddiu VI01,VI04,0x00007fff - mulax ACC,VF01,VF09x mfir.w VF16,VI01 - madday ACC,VF02,VF09y mtir VI10,VF12w - maddaz ACC,VF03,VF09z mtir VI04,VF11w - maddw VF13,VF04,VF00w lq.xyz VF10,0(VI11) - mulq.xyz VF16,VF11,Q sq VF16,2(VI05) - NOP iadd VI11,VI04,VI06 - NOP sq.xyz VF15,0(VI05) - NOP div Q,VF00w,VF13w - miniw.xyz VF09,VF10,VF06w iaddiu VI04,VI05,0 - miniw.xyz VF10,VF12,VF06w lq.xyz VF12,0(VI11) - NOP fcand VI01,262143 - ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) - NOP iand VI05,VI01,VI03 - mulax ACC,VF01,VF12x ior VI05,VI05,VI00 - mulq.xyz VF11,VF13,Q iaddiu VI05,VI05,0x00007fff - madday ACC,VF02,VF12y mfir.w VF15,VI05 - maddaz ACC,VF03,VF12z iadd VI10,VI10,VI07 - maddw VF16,VF04,VF00w sq.xyz VF16,3(VI04) - add.xyz VF13,VF11,VF05 iaddiu VI05,VI04,0x00000006 - mul.xyz VF11,VF11,VF06 lq.xyz VF12,0(VI10) - NOP sq VF15,5(VI04) - NOP div Q,VF00w,VF16w - ftoi0.xyz VF09,VF09 ibeq VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0 - clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI11) -EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: -; _LPOPT_w=[ ldumb ] 38 [38 38] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP] - NOP ilw.w VI11,0(VI02) - NOP lqi.w VF14,(VI02++) - NOP sq VF09,1(VI05) - mulq.xyz VF15,VF14,Q fcand VI01,262143 - mulq.xyz VF09,VF16,Q iand VI11,VI11,VI08 - addy.w VF12,VF14,VF08y iadd VI10,VI11,VI11 - mulz.w VF11,VF14,VF07z iadd VI10,VI10,VI11 - NOP iadd VI12,VI10,VI06 - add.xyz VF14,VF09,VF05 iadd VI11,VI11,VI07 - mul.xyz VF18,VF09,VF06 mtir VI10,VF12w - add.w VF11,VF11,VF05 lq.xyz VF17,0(VI12) - ftoi4.xyz VF16,VF13 iand VI01,VI01,VI03 - ftoi0.xyz VF09,VF10 ior VI01,VI01,VI00 - clipw.xyz VF18xyz,VF07w iaddiu VI13,VI01,0x00007fff - mulax ACC,VF01,VF17x mtir VI01,VF11w - madday ACC,VF02,VF17y mfir.w VF16,VI13 - maddaz ACC,VF03,VF17z lq.xyz VF10,0(VI11) - maddw VF13,VF04,VF00w iadd VI11,VI01,VI06 - NOP sq.xyz VF15,0(VI05) - NOP sq VF16,2(VI05) - miniw.xyz VF09,VF10,VF06w sq VF09,4(VI04) - NOP div Q,VF00w,VF13w - miniw.xyz VF10,VF12,VF06w iaddiu VI04,VI05,0 - NOP lq.xyz VF12,0(VI11) - NOP fcand VI01,262143 - ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) - mulq.xyz VF16,VF11,Q iand VI05,VI01,VI03 - mulax ACC,VF01,VF12x ior VI05,VI05,VI00 - mulq.xyz VF11,VF13,Q iaddiu VI05,VI05,0x00007fff - madday ACC,VF02,VF12y mfir.w VF15,VI05 - maddaz ACC,VF03,VF12z iadd VI10,VI10,VI07 - maddw VF16,VF04,VF00w sq.xyz VF16,3(VI04) - add.xyz VF13,VF11,VF05 iaddiu VI05,VI04,0x00000006 - mul.xyz VF11,VF11,VF06 lq.xyz VF12,0(VI10) - NOP sq VF15,5(VI04) - NOP div Q,VF00w,VF16w - ftoi0.xyz VF09,VF09 ibne VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP - clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI11) -EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0: -; _LNOPT_w=[ ] 25 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0] - mulq.xyz VF07,VF14,Q NOP - NOP NOP - NOP NOP - NOP NOP - mulq.xyz VF09,VF16,Q sq VF09,1(VI05) - NOP NOP - NOP fcand VI01,262143 - NOP iand VI01,VI01,VI03 - ftoi4.xyz VF16,VF13 ior VI01,VI01,VI00 - mul.xyz VF06,VF09,VF06 iaddiu VI01,VI01,0x00007fff - add.xyz VF14,VF09,VF05 mfir.w VF16,VI01 - ftoi0.xyz VF09,VF10 NOP - clipw.xyz VF06xyz,VF07w sq.xyz VF07,0(VI05) ; STALL_LATENCY ?1 - mulq.xyz VF16,VF11,Q sq VF16,2(VI05) - miniw.xyz VF10,VF12,VF06w sq VF09,4(VI04) - NOP iaddiu VI04,VI05,0 - NOP fcand VI01,262143 - NOP iand VI05,VI01,VI03 - ftoi0.xyz VF09,VF10 ior VI05,VI05,VI00 - ftoi4.xyz VF12,VF14 iaddiu VI05,VI05,0x00007fff - NOP mfir.w VF12,VI05 - NOP sq.xyz VF16,3(VI04) - NOP sq VF09,4(VI04) - NOP b EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT - NOP sq VF12,5(VI04) -EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1: -; _LNOPT_w=[ ] 23 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1] - mulq.xyz VF07,VF14,Q NOP - NOP NOP - NOP sq VF09,1(VI05) - NOP fcand VI01,262143 - mulq.xyz VF09,VF16,Q iand VI02,VI01,VI03 - NOP ior VI02,VI02,VI00 - ftoi4.xyz VF16,VF13 iaddiu VI02,VI02,0x00007fff - NOP mfir.w VF16,VI02 - add.xyz VF14,VF09,VF05 sq.xyz VF07,0(VI05) - mul.xyz VF09,VF09,VF06 NOP - mulq.xyz VF16,VF11,Q sq VF16,2(VI05) ; STALL_LATENCY ?1 - clipw.xyz VF09xyz,VF07w NOP ; STALL_LATENCY ?1 - NOP NOP - NOP NOP - miniw.xyz VF09,VF12,VF06w iaddiu VI02,VI05,0 - NOP fcand VI01,262143 - NOP iand VI05,VI01,VI03 - ftoi4.xyz VF12,VF14 ior VI05,VI05,VI00 - ftoi0.xyz VF09,VF09 iaddiu VI05,VI05,0x00007fff - NOP mfir.w VF12,VI05 - NOP sq.xyz VF16,3(VI02) - NOP sq VF09,4(VI02) ; STALL_LATENCY ?1 - NOP sq VF12,5(VI02) -EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT: -; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT] - NOP iaddiu VI01,VI00,0x0000004d - NOP xgkick VI01 - NOP[E] NOP - NOP NOP -; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_26] - NOP b main_loop_lid - NOP NOP - .align 4 -vsmIndexed_CodeEnd: -; iCount=657 -; register stats: -; 14 VU User integer -; 24 VU User floating point From cc653f4128734cd998a2c09ebd860f86e7bb3138 Mon Sep 17 00:00:00 2001 From: iann Date: Sat, 11 Oct 2025 08:54:19 +0900 Subject: [PATCH 12/13] more clean up --- Makefile | 1 + vu1/indexed_constant_color_vcl.vsm | 154 ++++++ vu1/indexed_no_lights_pvc_vcl.vsm | 146 ++++++ vu1/indexed_vcl.vsm | 744 +++++++++++++++++++++++++++++ 4 files changed, 1045 insertions(+) create mode 100644 vu1/indexed_constant_color_vcl.vsm create mode 100644 vu1/indexed_no_lights_pvc_vcl.vsm create mode 100644 vu1/indexed_vcl.vsm diff --git a/Makefile b/Makefile index e0d2fb7f..b9b6af45 100644 --- a/Makefile +++ b/Makefile @@ -69,6 +69,7 @@ install: all cp -f $(EE_LIB) $(PS2SDK)/ports/lib clean: + rm -rf ./GS_DUMP/hard/* rm -f $(EE_OBJS_LIB) $(EE_OBJS) $(EE_BIN) $(EE_LIB) realclean: clean diff --git a/vu1/indexed_constant_color_vcl.vsm b/vu1/indexed_constant_color_vcl.vsm new file mode 100644 index 00000000..80c1a3ce --- /dev/null +++ b/vu1/indexed_constant_color_vcl.vsm @@ -0,0 +1,154 @@ +; === __LP__ EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === ldumb : optimal=38 clid=0 mlid=1 size=(38) +; === vuta : optimal=38 clid=0 mlid=2 size=(38) +; === dUp : optimal=38 clid=0 mlid=1 size=(38) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmIndexedConstColor_CodeStart + .global vsmIndexedConstColor_CodeEnd +vsmIndexedConstColor_CodeStart: +__v_vu1_indexed_constant_color_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_indexed_constant_color_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ another ] 55 [49 0] 59 [main_loop_lid] + NOP xtop VI04 + NOP lq.xyz VF07,75(VI00) + NOP ilw.z VI06,0(VI04) + NOP iaddiu VI03,VI00,0x0000004e + NOP mfir.w VF07,VI03 + NOP mtir VI07,VF07x + NOP ior VI06,VI07,VI06 + NOP mfir.x VF07,VI06 + NOP loi 0x437f0000 + maxi.y VF10,VF00,I loi 0x40400000 + maxi.z VF09,VF00,I loi 0x45000000 + NOP sq VF07,77(VI00) + maxi.w VF07,VF00,I loi 0x44fff000 + addi.xy VF05,VF00,I loi 0x437d0000 + NOP lq.w VF06,60(VI00) + NOP lq.w VF05,57(VI00) + NOP ilw.y VI08,0(VI04) + NOP iaddiu VI01,VI00,0x0000004b + NOP iaddiu VI06,VI04,0x00000005 + maxi.w VF08,VF00,I loi 0x437f0000 + muli.w VF06,VF06,I iadd VI08,VI06,VI08 + NOP mr32.z VF05,VF05 + NOP xgkick VI01 + NOP lqi.w VF05,(VI06++) + minii.w VF06,VF06,I ilw.w VI01,-1(VI06) + NOP iaddiu VI07,VI00,0x000000ff + NOP iaddiu VI05,VI04,0x000000ac + NOP iaddiu VI04,VI04,0x00000005 + ftoi0.w VF11,VF06 iand VI10,VI01,VI07 + addy.w VF06,VF05,VF10y iadd VI01,VI10,VI10 + mulz.w VF05,VF05,VF09z iadd VI01,VI01,VI10 + NOP iadd VI01,VI01,VI04 + NOP lq.xyz VF08,0(VI01) + NOP mtir VI09,VF06w + add.w VF05,VF05,VF08 NOP + mulax ACC,VF01,VF08x iadd VI10,VI10,VI05 ; STALL_LATENCY ?1 + madday ACC,VF02,VF08y lq.xyz VF16,0(VI10) + maddaz ACC,VF03,VF08z mtir VI11,VF05w + maddw VF12,VF04,VF00w lq.xyz VF08,2(VI01) + NOP iadd VI01,VI11,VI04 + muli.xyz VF11,VF16,I lq.xyz VF16,0(VI01) + NOP div Q,VF00w,VF12w ; STALL_LATENCY ?1 + mulax ACC,VF01,VF16x NOP ; STALL_LATENCY ?1 + madday ACC,VF02,VF16y NOP + maddaz ACC,VF03,VF16z NOP + maddw VF13,VF04,VF00w lq.xyz VF06,76(VI00) + NOP NOP + mulq.xyz VF16,VF12,Q iadd VI09,VI09,VI05 + minii.xyz VF14,VF11,I lq.xyz VF11,0(VI09) + NOP div Q,VF00w,VF13w + add.xyz VF12,VF16,VF05 NOP ; STALL_LATENCY ?1 + mul.xyz VF16,VF16,VF06 ilw.w VI02,76(VI00) + muli.xyz VF15,VF11,I fcset 0 + ftoi0.xyz VF11,VF14 ibeq VI06,VI08,EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__EPI0 + mulq.xyz VF08,VF08,Q lq.xyz VF07,2(VI01) +EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 38 [38 38] 38 [EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__MAIN_LOOP] + clipw.xyz VF16xyz,VF07w lqi.w VF05,(VI06++) + NOP ilw.w VI09,-1(VI06) + minii.xyz VF11,VF15,I sq VF11,1(VI03) + NOP sq.xyz VF08,0(VI03) + addy.w VF06,VF05,VF10y fcand VI01,262143 + mulz.w VF05,VF05,VF09z iand VI11,VI09,VI07 + mulq.xyz VF08,VF13,Q iadd VI09,VI11,VI11 + NOP iadd VI09,VI09,VI11 + NOP mtir VI10,VF06w + add.w VF05,VF05,VF08 iadd VI09,VI09,VI04 + add.xyz VF13,VF08,VF05 iadd VI11,VI11,VI05 + mul.xyz VF15,VF08,VF06 lq.xyz VF08,0(VI09) + NOP iand VI12,VI01,VI02 + NOP mtir VI01,VF05w + ftoi4.xyz VF14,VF12 ior VI12,VI12,VI00 + mulax ACC,VF01,VF08x iaddiu VI12,VI12,0x00007fff + madday ACC,VF02,VF08y mfir.w VF14,VI12 + maddaz ACC,VF03,VF08z lq.xyz VF16,0(VI11) + maddw VF12,VF04,VF00w lq.xyz VF08,2(VI09) + clipw.xyz VF15xyz,VF07w iadd VI09,VI01,VI04 + NOP sq VF14,2(VI03) + muli.xyz VF14,VF16,I lq.xyz VF16,0(VI09) + mulq.xyz VF07,VF07,Q div Q,VF00w,VF12w + ftoi4.xyz VF15,VF13 iadd VI11,VI10,VI05 + ftoi0.xyz VF11,VF11 iaddiu VI10,VI03,0 + minii.xyz VF14,VF14,I fcand VI01,262143 + mulax ACC,VF01,VF16x iand VI03,VI01,VI02 + madday ACC,VF02,VF16y ior VI03,VI03,VI00 + maddaz ACC,VF03,VF16z iaddiu VI03,VI03,0x00007fff + mulq.xyz VF16,VF12,Q mfir.w VF15,VI03 + NOP sq VF11,4(VI10) + NOP lq.xyz VF11,0(VI11) + maddw VF13,VF04,VF00w iaddiu VI03,VI10,0x00000006 + add.xyz VF12,VF16,VF05 sq VF15,5(VI10) + mul.xyz VF16,VF16,VF06 sq.xyz VF07,3(VI10) + muli.xyz VF15,VF11,I lq.xyz VF07,2(VI09) + ftoi0.xyz VF11,VF14 ibne VI06,VI08,EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__MAIN_LOOP + mulq.xyz VF08,VF08,Q div Q,VF00w,VF13w +EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 28 [21 0] 30 [EXPL_vu1_indexed_constant_color_pp4_vcl_xform_loop_lid__EPI0] + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + clipw.xyz VF16xyz,VF07w sq VF11,1(VI03) + mulq.xyz VF08,VF13,Q sq.xyz VF08,0(VI03) + NOP NOP + NOP NOP + minii.xyz VF11,VF15,I fcand VI01,262143 + mul.xyz VF15,VF08,VF06 iand VI04,VI01,VI02 + NOP ior VI04,VI04,VI00 + ftoi4.xyz VF12,VF12 iaddiu VI04,VI04,0x00007fff + NOP mfir.w VF12,VI04 + clipw.xyz VF15xyz,VF07w iaddiu VI04,VI03,0 + add.xyz VF13,VF08,VF05 sq VF12,2(VI03) ; STALL_LATENCY ?2 + NOP fcand VI01,262143 + mulq.xyz VF07,VF07,Q iand VI03,VI01,VI02 + ftoi0.xyz VF11,VF11 ior VI03,VI03,VI00 + ftoi4.xyz VF15,VF13 iaddiu VI03,VI03,0x00007fff + NOP mfir.w VF15,VI03 + NOP sq.xyz VF07,3(VI04) + NOP sq VF11,4(VI04) + NOP iaddiu VI01,VI00,0x0000004d + NOP sq VF15,5(VI04) + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_constant_color_pp4_vcl_9] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmIndexedConstColor_CodeEnd: +; iCount=128 +; register stats: +; 13 VU User integer +; 17 VU User floating point diff --git a/vu1/indexed_no_lights_pvc_vcl.vsm b/vu1/indexed_no_lights_pvc_vcl.vsm new file mode 100644 index 00000000..d6fe2c08 --- /dev/null +++ b/vu1/indexed_no_lights_pvc_vcl.vsm @@ -0,0 +1,146 @@ +; === __LP__ EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === ldumb : optimal=39 clid=0 mlid=1 size=(39) +; === normal1 : optimal=39 clid=0 mlid=2 size=(39) +; === vuta : optimal=39 clid=0 mlid=1 size=(39) +; === vuta1 : optimal=39 clid=0 mlid=2 size=(39) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmIndexedPVC_CodeStart + .global vsmIndexedPVC_CodeEnd +vsmIndexedPVC_CodeStart: +__v_vu1_indexed_no_lights_pvc_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_indexed_no_lights_pvc_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 50 [48 0] 55 [main_loop_lid] + NOP loi 0x44fff000 + addi.xy VF05,VF00,I loi 0x45000000 + NOP xtop VI04 + NOP iaddiu VI03,VI00,0x0000004e + NOP iaddiu VI05,VI04,0x00000005 + NOP lq.xyz VF10,75(VI00) + NOP ilw.z VI06,0(VI04) + NOP ilw.y VI07,0(VI04) + NOP mfir.w VF10,VI03 + NOP mtir VI08,VF10x + NOP ior VI06,VI08,VI06 + NOP mfir.x VF10,VI06 + maxi.w VF07,VF00,I loi 0x437d0000 + NOP iaddiu VI01,VI00,0x0000004b + NOP iadd VI07,VI05,VI07 + NOP sq VF10,77(VI00) + NOP xgkick VI01 + NOP lqi.w VF05,(VI05++) + NOP ilw.w VI01,-1(VI05) + maxi.w VF08,VF00,I loi 0x40800000 + maxi.z VF09,VF00,I iaddiu VI04,VI04,0x00000005 + NOP iaddiu VI06,VI00,0x000000ff + NOP iand VI01,VI01,VI06 + NOP iadd VI01,VI01,VI01 + mulz.w VF05,VF05,VF09z iadd VI01,VI01,VI01 + NOP iadd VI01,VI01,VI04 + NOP lq.xyz VF08,0(VI01) + add.w VF05,VF05,VF08 iaddiu VI09,VI01,0 ; STALL_LATENCY ?1 + mulax ACC,VF01,VF08x lq VF12,3(VI09) ; STALL_LATENCY ?1 + madday ACC,VF02,VF08y loi 0x437f0000 + maddaz ACC,VF03,VF08z mtir VI08,VF05w + maddw VF10,VF04,VF00w lq.xyz VF08,2(VI01) + muli VF12,VF12,I iadd VI01,VI08,VI04 + NOP lq.xyz VF07,0(VI01) + NOP div Q,VF00w,VF10w ; STALL_LATENCY ?1 + max VF12,VF12,VF00 fcset 0 + mulax ACC,VF01,VF07x lq.xyz VF06,76(VI00) + madday ACC,VF02,VF07y lq.w VF06,57(VI00) + maddaz ACC,VF03,VF07z NOP + maddw VF11,VF04,VF00w NOP + mulq.xyz VF13,VF10,Q mr32.z VF05,VF06 ; STALL_LATENCY ?1 + ftoi0 VF12,VF12 div Q,VF00w,VF11w ; STALL_LATENCY ?1 + mulq.xyz VF07,VF08,Q ilw.w VI02,76(VI00) + add.xyz VF08,VF13,VF05 iaddiu VI08,VI01,0 + mul.xyz VF13,VF13,VF06 loi 0x437f0000 + NOP lq VF10,3(VI08) + NOP sq VF12,1(VI03) + NOP sq.xyz VF07,0(VI03) + clipw.xyz VF13xyz,VF07w ibeq VI05,VI07,EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__EPI0 + NOP lq.xyz VF07,2(VI01) +EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 39 [38 38] 39 [EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__MAIN_LOOP] + NOP lqi.w VF05,(VI05++) + mulq.xyz VF12,VF11,Q ilw.w VI08,-1(VI05) + NOP fcand VI01,262143 + NOP iand VI01,VI01,VI02 + mulz.w VF05,VF05,VF09z ior VI01,VI01,VI00 + add.xyz VF11,VF12,VF05 iand VI08,VI08,VI06 + mul.xyz VF13,VF12,VF06 iadd VI08,VI08,VI08 + muli VF12,VF10,I iadd VI08,VI08,VI08 + add.w VF05,VF05,VF08 iadd VI08,VI08,VI04 + ftoi4.xyz VF10,VF08 lq.xyz VF08,0(VI08) + clipw.xyz VF13xyz,VF07w iaddiu VI01,VI01,0x00007fff + NOP mfir.w VF10,VI01 + max VF12,VF12,VF00 mtir VI01,VF05w + mulax ACC,VF01,VF08x iaddiu VI09,VI08,0 + madday ACC,VF02,VF08y lq VF13,3(VI09) + maddaz ACC,VF03,VF08z sq VF10,2(VI03) + maddw VF10,VF04,VF00w loi 0x437f0000 + NOP lq.xyz VF08,2(VI08) + muli VF13,VF13,I iadd VI08,VI01,VI04 + NOP lq.xyz VF14,0(VI08) + ftoi0 VF12,VF12 div Q,VF00w,VF10w + ftoi4.xyz VF11,VF11 fcand VI01,262143 + max VF13,VF13,VF00 iand VI01,VI01,VI02 + mulax ACC,VF01,VF14x ior VI01,VI01,VI00 + madday ACC,VF02,VF14y iaddiu VI01,VI01,0x00007fff + maddaz ACC,VF03,VF14z mfir.w VF11,VI01 + mulq.xyz VF14,VF07,Q NOP + mulq.xyz VF15,VF10,Q iaddiu VI01,VI08,0 + mulq.xyz VF07,VF08,Q lq VF10,3(VI01) + maddw VF11,VF04,VF00w sq VF11,5(VI03) + ftoi0 VF13,VF13 loi 0x437f0000 + add.xyz VF08,VF15,VF05 sq VF12,4(VI03) + mul.xyz VF12,VF15,VF06 sq.xyz VF14,3(VI03) + NOP div Q,VF00w,VF11w + NOP iaddiu VI03,VI03,0x00000006 + NOP sq.xyz VF07,0(VI03) + clipw.xyz VF12xyz,VF07w sq VF13,1(VI03) + NOP ibne VI05,VI07,EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__MAIN_LOOP + NOP lq.xyz VF07,2(VI08) +EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 23 [21 0] 26 [EXPL_vu1_indexed_no_lights_pvc_pp4_vcl_xform_loop_lid__EPI0] + NOP NOP + mulq.xyz VF09,VF11,Q NOP + NOP fcand VI01,262143 + add.xyz VF11,VF09,VF05 iand VI01,VI01,VI02 ; STALL_LATENCY ?2 + mul.xyz VF05,VF09,VF06 ior VI01,VI01,VI00 + NOP iaddiu VI01,VI01,0x00007fff + muli VF06,VF10,I mfir.w VF10,VI01 + ftoi4.xyz VF10,VF08 NOP + clipw.xyz VF05xyz,VF07w NOP + max VF08,VF06,VF00 NOP ; STALL_LATENCY ?1 + NOP sq VF10,2(VI03) + NOP fcand VI01,262143 + mulq.xyz VF07,VF07,Q iand VI02,VI01,VI02 + ftoi0 VF10,VF08 ior VI02,VI02,VI00 + ftoi4.xyz VF11,VF11 iaddiu VI02,VI02,0x00007fff + NOP mfir.w VF11,VI02 + NOP sq.xyz VF07,3(VI03) + NOP sq VF10,4(VI03) + NOP iaddiu VI01,VI00,0x0000004d + NOP sq VF11,5(VI03) + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_no_lights_pvc_pp4_vcl_9] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmIndexedPVC_CodeEnd: +; iCount=119 +; register stats: +; 10 VU User integer +; 16 VU User floating point diff --git a/vu1/indexed_vcl.vsm b/vu1/indexed_vcl.vsm new file mode 100644 index 00000000..c61d2ec8 --- /dev/null +++ b/vu1/indexed_vcl.vsm @@ -0,0 +1,744 @@ +; === __LP__ EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; === dumb : optimal=18 clid=10 mlid=10 size=(18) +; === ldumb : optimal=18 clid=0 mlid=3 size=(18) +; === normal1 : optimal=18 clid=0 mlid=3 size=(18) +; === hDown : optimal=18 clid=0 mlid=3 size=(18) +; === vuta : optimal=18 clid=0 mlid=3 size=(18) +; === normal : optimal=18 clid=0 mlid=4 size=(18) +; === another : optimal=18 clid=0 mlid=3 size=(18) +; === vuta1 : optimal=18 clid=0 mlid=5 size=(18) +; === normal2 : optimal=18 clid=0 mlid=5 size=(18) +; === dumb2 : optimal=18 clid=5 mlid=5 size=(18) +; === __LP__ EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; === normal1 : optimal=34 clid=0 mlid=4 size=(39) +; === hDown : optimal=34 clid=0 mlid=4 size=(39) +; === vuta : optimal=34 clid=0 mlid=3 size=(39) +; === another : optimal=34 clid=0 mlid=4 size=(39) +; === vuta1 : optimal=34 clid=0 mlid=3 size=(39) +; === __LP__ EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; === ldumb : optimal=38 clid=0 mlid=2 size=(38) +; === vuta : optimal=38 clid=0 mlid=2 size=(38) +; ================================================= +; flowMon::Emit() vcl 1.4beta7 produced this code: + .vu + .align 4 + .global vsmIndexed_CodeStart + .global vsmIndexed_CodeEnd +vsmIndexed_CodeStart: +__v_vu1_indexed_pp4_vcl_4: +; _LNOPT_w=[ normal2 ] 5 [4 0] 5 [__v_vu1_indexed_pp4_vcl_4] + NOP lq VF01,62(VI00) + NOP lq VF02,63(VI00) + NOP lq VF03,64(VI00) + NOP[E] lq VF04,65(VI00) + NOP NOP +main_loop_lid: +; _LNOPT_w=[ normal2 ] 8 [8 0] 8 [main_loop_lid] + NOP ilw.x VI01,0(VI00) + NOP iaddiu VI02,VI00,0x00000001 + NOP lq.xyz VF05,59(VI00) + NOP lq.xyz VF06,60(VI00) + NOP lq.xyz VF07,61(VI00) + NOP lq.xyz VF08,66(VI00) + NOP ibeq VI01,VI00,pt_lights_lid + NOP ilw.x VI03,0(VI02) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_7] + maxw.z VF09,VF00,VF00w NOP +dir_light_loop_lid: +; _LNOPT_w=[ another ] 26 [64 0] 67 [dir_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF12,67(VI00) + NOP lq.xyz VF13,3(VI03) + NOP lq.xyz VF14,68(VI00) + NOP lq.xyz VF10,69(VI00) + mulax.xyz ACC,VF12,VF13x xtop VI06 ; STALL_LATENCY ?1 + madday.xyz ACC,VF14,VF13y ilw.x VI05,0(VI06) + maddz.xyz VF13,VF10,VF13z iaddiu VI04,VI06,0x00000005 + add.xyz VF14,VF08,VF13 lq.xyz VF11,2(VI03) ; STALL_LATENCY ?3 + NOP esadd P,VF14 ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?9 + NOP mfp.w VF05,P + NOP ersqrt P,VF05w ; STALL_LATENCY ?3 + NOP waitp ; STALL_LATENCY ?16 + NOP mfp.w VF05,P + mulw.xyz VF14,VF14,VF05w lq.xyz VF18,1(VI04) ; STALL_LATENCY ?3 + mul.xyz VF15,VF14,VF18 iadd VI07,VI04,VI05 ; STALL_LATENCY ?3 + NOP iadd VI07,VI07,VI05 + NOP iadd VI05,VI07,VI05 + mul.xyz VF12,VF11,VF07 lq.xyz VF11,1(VI03) + NOP mr32.xyw VF09,VF15 + NOP iaddiu VI04,VI04,0x00000003 + mul.xyz VF18,VF13,VF18 lq.xyz VF10,0(VI03) + NOP ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2 + addax.w ACC,VF09,VF09x iaddiu VI06,VI06,0x000000ac +; _LNOPT_w=[ ] 13 [17 0] 17 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__PRO1] + maddy.w VF07,VF00,VF09y lq.xyz VF17,1(VI04) + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + maxx.z VF15,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.xyz VF18,VF14,VF17 NOP + mul.w VF05,VF05,VF05 NOP + NOP mr32.xyw VF09,VF18 ; STALL_LATENCY ?2 + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF15z NOP + addax.w ACC,VF09,VF09x ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1 + mul.w VF05,VF05,VF05 NOP +; _LNOPT_w=[ ] 17 [17 0] 18 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__PRO2] + maddy.w VF07,VF00,VF09y NOP + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF18,VF14,VF17 NOP + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF18 + mul.xyz VF18,VF13,VF17 iaddiu VI04,VI04,0x00000003 + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibeq VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0 + addax.w ACC,VF09,VF09x NOP +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 18 [18 18] 18 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP] + maddy.w VF07,VF00,VF09y NOP + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + mula.xyz ACC,VF17,VF06 lq.xyz VF17,1(VI04) + maxx.z VF16,VF18,VF00x lq.xyz VF18,0(VI06) + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mul.xyz VF19,VF14,VF17 NOP + add.xyz VF18,VF18,VF15 NOP + maddaw.xyz ACC,VF12,VF06w NOP + mul.w VF05,VF05,VF05 iaddiu VI04,VI04,0x00000003 + madd.xyz VF15,VF10,VF05 mr32.xyw VF09,VF19 + mul.xyz VF18,VF13,VF17 sqi.xyz VF18,(VI06++) + mulz.xyz VF17,VF11,VF16z NOP + mul.w VF05,VF05,VF05 ibne VI04,VI05,EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__MAIN_LOOP + addax.w ACC,VF09,VF09x NOP +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI0] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?1 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF06,VF06,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + maxx.z VF17,VF18,VF00x lq.xyz VF18,0(VI06) + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?1 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,0(VI06) + mul.w VF06,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF17z NOP + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 + madd.xyz VF15,VF10,VF05 lq.xyz VF18,0(VI06) + add.xyz VF18,VF18,VF15 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 25 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI1] + maddy.w VF07,VF00,VF09y NOP + NOP NOP + NOP NOP + mul.w VF06,VF05,VF05 NOP + maxx.w VF05,VF07,VF00x NOP + adday.z ACC,VF18,VF18y NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?2 + maxx.z VF18,VF18,VF00x NOP + mul.w VF06,VF06,VF06 NOP + mula.xyz ACC,VF17,VF06 NOP + mul.w VF05,VF05,VF05 NOP + mulz.xyz VF17,VF11,VF18z NOP + maddaw.xyz ACC,VF12,VF06w NOP + madd.xyz VF13,VF10,VF05 lq.xyz VF18,0(VI06) + mul.w VF06,VF05,VF05 NOP + mula.xyz ACC,VF17,VF06 NOP + add.xyz VF18,VF18,VF13 NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP + maddaw.xyz ACC,VF12,VF06w sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?3 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,0(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT + NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?2 +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 16 [38 0] 38 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EPI2] + maddy.w VF07,VF00,VF09y NOP + maxx.w VF05,VF07,VF00x NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?3 + adday.z ACC,VF18,VF18y NOP + maddx.z VF18,VF09,VF18x NOP + mul.w VF05,VF05,VF05 NOP ; STALL_LATENCY ?1 + maxx.z VF18,VF18,VF00x NOP ; STALL_LATENCY ?1 + mul.w VF06,VF05,VF05 NOP ; STALL_LATENCY ?1 + mulz.xyz VF11,VF11,VF18z NOP ; STALL_LATENCY ?1 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mula.xyz ACC,VF11,VF06 NOP ; STALL_LATENCY ?1 + maddaw.xyz ACC,VF12,VF06w NOP ; STALL_LATENCY ?1 + madd.xyz VF10,VF10,VF05 lq.xyz VF18,0(VI06) + add.xyz VF18,VF18,VF10 NOP ; STALL_LATENCY ?3 + NOP sqi.xyz VF18,(VI06++) ; STALL_LATENCY ?3 +EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_dir_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI01,VI01,0x00000001 + NOP iaddiu VI02,VI02,0x00000001 + NOP ibne VI01,VI00,dir_light_loop_lid + NOP ilw.x VI03,0(VI02) +pt_lights_lid: +; _LNOPT_w=[ normal2 ] 4 [6 0] 6 [pt_lights_lid] + NOP ilw.y VI02,0(VI00) + NOP iaddiu VI03,VI00,0x00000001 + NOP ibeq VI02,VI00,done_lighting_lid ; STALL_LATENCY ?2 + NOP ilw.y VI04,0(VI03) +; _LNOPT_w=[ normal2 ] 1 [4 0] 1 [__v_vu1_indexed_pp4_vcl_13] + maxw.z VF09,VF00,VF00w NOP +pt_light_loop_lid: +; _LNOPT_w=[ another ] 35 [43 0] 51 [pt_light_loop_lid] + NOP NOP + NOP NOP + NOP lq.xyz VF10,71(VI00) + NOP lq.xyz VF11,3(VI04) + NOP lq.xyz VF14,72(VI00) + mulax.xyz ACC,VF10,VF11x lq.xyz VF13,73(VI00) ; STALL_LATENCY ?2 + madday.xyz ACC,VF14,VF11y lq.xyz VF14,74(VI00) + NOP xtop VI07 + maddaz.xyz ACC,VF13,VF11z iaddiu VI05,VI07,0x00000005 ; STALL_LATENCY ?1 + maddw.xyz VF14,VF14,VF00w lq.xyz VF17,0(VI05) + sub.xyz VF17,VF14,VF17 ilw.x VI06,0(VI07) ; STALL_LATENCY ?3 + mul.xyz VF16,VF17,VF17 lq.xyz VF12,2(VI04) ; STALL_LATENCY ?3 + adday.z ACC,VF16,VF16y lq.xyz VF11,1(VI04) ; STALL_LATENCY ?3 + maddx.z VF16,VF09,VF16x iadd VI08,VI05,VI06 + NOP iadd VI08,VI08,VI06 + NOP iadd VI06,VI08,VI06 + NOP sqrt Q,VF16z ; STALL_LATENCY ?1 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addq.y VF16,VF00,Q lq.xyz VF10,0(VI04) + NOP div Q,VF00w,VF16y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + addw.x VF16,VF00,VF00w lq.xyz VF13,5(VI04) + mulq.xyz VF17,VF17,Q iaddiu VI05,VI05,0x00000003 + NOP NOP + mul.xyz VF12,VF12,VF07 ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2 + mul.xyz VF16,VF16,VF13 iaddiu VI07,VI07,0x000000ac +; _LNOPT_w=[ ] 21 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO1] + add.xyz VF18,VF08,VF17 lq.xyz VF19,0(VI05) + sub.xyz VF19,VF14,VF19 esadd P,VF18 ; STALL_LATENCY ?3 + mul.xyz VF21,VF19,VF19 lq.xyz VF20,-2(VI05) ; STALL_LATENCY ?3 + adday.z ACC,VF21,VF21y NOP ; STALL_LATENCY ?3 + maddx.z VF21,VF09,VF21x NOP + NOP waitp + addw.x VF21,VF00,VF00w mfp.w VF06,P + NOP sqrt Q,VF21z ; STALL_LATENCY ?1 + NOP ersqrt P,VF06w ; STALL_LATENCY ?1 + addq.y VF21,VF00,Q waitq ; STALL_LATENCY ?4 + NOP div Q,VF00w,VF21y ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 NOP + mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 + NOP move.xyz VF19,VF18 + NOP ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +; _LNOPT_w=[ ] 32 [38 0] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__PRO2] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI05) + NOP NOP + NOP NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 NOP + madday.w ACC,VF00,VF21y esadd P,VF18 + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + mulax.w ACC,VF00,VF20x NOP ; STALL_LATENCY ?2 + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z waitp + maxx.w VF05,VF06,VF00x mfp.w VF06,P + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + addq.y VF21,VF00,Q waitq + mul.w VF06,VF06,VF06 div Q,VF00w,VF21y ; STALL_LATENCY ?3 + addw.x VF21,VF00,VF00w NOP + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) + maddz.w VF05,VF00,VF15z NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF16,VF21,VF13 move.xyz VF15,VF16 + mul.xyz VF21,VF17,VF20 NOP + mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibeq VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0 + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ vuta ] 39 [38 34] 39 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP] + add.xyz VF18,VF08,VF17 lq.xyz VF23,0(VI05) + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF21y NOP + mulw.xyz VF22,VF19,VF06w NOP + sub.xyz VF19,VF14,VF23 esadd P,VF18 + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF22,VF20 NOP + mul.xyz VF21,VF19,VF19 NOP + madd.xyz VF22,VF10,VF05 div Q,VF00w,VF05w + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF20x NOP + adday.z ACC,VF21,VF21y NOP + maddx.z VF21,VF09,VF21x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z mfp.w VF06,P + mulq.xyz VF23,VF22,Q lq.xyz VF22,0(VI07) + mulax.w ACC,VF00,VF15x sqrt Q,VF21z + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mulw.xyz VF20,VF11,VF05w ersqrt P,VF06w + add.xyz VF22,VF22,VF23 NOP + addw.x VF21,VF00,VF00w NOP + mul.w VF06,VF07,VF07 NOP + addq.y VF21,VF00,Q NOP + mula.xyz ACC,VF20,VF06 sqi.xyz VF22,(VI07++) + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF06,VF06,VF06 lq.xyz VF20,-2(VI05) + mul.xyz VF16,VF21,VF13 div Q,VF00w,VF21y + NOP NOP + NOP NOP + mul.w VF06,VF06,VF06 NOP + mul.xyz VF21,VF17,VF20 NOP + NOP NOP + NOP NOP + mulq.xyz VF17,VF19,Q iaddiu VI05,VI05,0x00000003 + mul.w VF07,VF06,VF06 move.xyz VF19,VF18 + NOP ibne VI05,VI06,EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__MAIN_LOOP + mulax.w ACC,VF00,VF21x mfp.w VF06,P +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0: +; _LNOPT_w=[ ] 60 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI0] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + mul.w VF07,VF07,VF07 NOP + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + mul.xyz VF20,VF19,VF20 NOP + maddaw.xyz ACC,VF12,VF07w NOP + madd.xyz VF21,VF10,VF05 NOP + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulq.xyz VF19,VF21,Q lq.xyz VF21,0(VI07) + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) + add.xyz VF21,VF21,VF19 move.xyz VF19,VF13 + mul.w VF07,VF06,VF06 mfp.w VF06,P ; STALL_LATENCY ?1 + mul.xyz VF21,VF17,VF20 sqi.xyz VF21,(VI07++) ; STALL_LATENCY ?1 + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 NOP + mula.xyz ACC,VF20,VF06 NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulq.xyz VF17,VF13,Q waitq + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP + maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI07) + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 + madd.xyz VF10,VF10,VF05 NOP + NOP sqi.xyz VF11,(VI07++) + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?2 + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?2 +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1: +; _LNOPT_w=[ ] 53 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI1] + add.xyz VF13,VF08,VF17 NOP + mulw.xyz VF19,VF19,VF06w esadd P,VF13 ; STALL_LATENCY ?3 + mul.xyz VF20,VF19,VF20 move.xyz VF19,VF13 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF21y NOP + maddz.w VF06,VF00,VF21z NOP + maxx.w VF05,VF06,VF00x waitp ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF20x mfp.w VF06,P + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + mulw.xyz VF20,VF11,VF05w NOP + mulax.w ACC,VF00,VF15x ersqrt P,VF06w + madday.w ACC,VF00,VF15y NOP + maxx.w VF07,VF07,VF00x NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mula.xyz ACC,VF20,VF06 lq.xyz VF20,-2(VI05) + NOP waitp ; STALL_LATENCY ?1 + mul.w VF07,VF06,VF06 mfp.w VF06,P + mul.xyz VF21,VF17,VF20 NOP + mulw.xyz VF13,VF19,VF06w NOP ; STALL_LATENCY ?2 + maddz.w VF05,VF00,VF15z move.xyz VF15,VF16 + mul.w VF07,VF07,VF07 NOP + mulax.w ACC,VF00,VF21x NOP + mul.xyz VF20,VF13,VF20 NOP + madday.w ACC,VF00,VF21y NOP + maddaw.xyz ACC,VF12,VF07w NOP + maddz.w VF06,VF00,VF21z NOP + mulax.w ACC,VF00,VF20x NOP + madday.w ACC,VF00,VF20y NOP + maddz.w VF07,VF00,VF20z NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?3 + maxx.w VF05,VF06,VF00x div Q,VF00w,VF05w + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mulw.xyz VF20,VF11,VF05w NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?2 + madd.xyz VF13,VF10,VF05 NOP + mula.xyz ACC,VF20,VF06 NOP + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?1 + mulq.xyz VF17,VF13,Q waitq + mulax.w ACC,VF00,VF15x NOP + madday.w ACC,VF00,VF15y NOP + mul.w VF07,VF06,VF06 NOP + maddz.w VF05,VF00,VF15z lq.xyz VF13,0(VI07) + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + add.xyz VF11,VF13,VF17 div Q,VF00w,VF05w + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 + madd.xyz VF10,VF10,VF05 NOP + NOP sqi.xyz VF11,(VI07++) + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?2 + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 + NOP b EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT + NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?2 +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2: +; _LNOPT_w=[ ] 49 [93 0] 93 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EPI2] + add.xyz VF13,VF08,VF17 lq.xyz VF14,-2(VI05) + mul.xyz VF17,VF17,VF14 esadd P,VF13 ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF17x move.xyz VF16,VF16 ; STALL_LATENCY ?3 + madday.w ACC,VF00,VF17y NOP + NOP waitp ; STALL_LATENCY ?4 + NOP mfp.w VF06,P + NOP ersqrt P,VF06w ; STALL_LATENCY ?3 + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP NOP + NOP move.xyz VF13,VF13 + NOP mfp.w VF06,P + mulw.xyz VF13,VF13,VF06w NOP ; STALL_LATENCY ?3 + maddz.w VF06,VF00,VF17z NOP + mul.xyz VF17,VF13,VF14 NOP ; STALL_LATENCY ?2 + maxx.w VF05,VF06,VF00x NOP + mulax.w ACC,VF00,VF17x NOP ; STALL_LATENCY ?2 + madday.w ACC,VF00,VF17y NOP + maddz.w VF07,VF00,VF17z NOP + mulw.xyz VF11,VF11,VF05w NOP + maxx.w VF07,VF07,VF00x NOP ; STALL_LATENCY ?2 + mula.xyz ACC,VF11,VF06 NOP + mul.w VF06,VF07,VF07 NOP ; STALL_LATENCY ?2 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mul.w VF06,VF06,VF06 NOP ; STALL_LATENCY ?3 + mulax.w ACC,VF00,VF16x NOP + madday.w ACC,VF00,VF16y NOP + mul.w VF07,VF06,VF06 NOP ; STALL_LATENCY ?1 + maddz.w VF05,VF00,VF16z NOP + mul.w VF07,VF07,VF07 NOP ; STALL_LATENCY ?2 + NOP div Q,VF00w,VF05w + maddaw.xyz ACC,VF12,VF07w NOP ; STALL_LATENCY ?2 + madd.xyz VF10,VF10,VF05 NOP + mulq.xyz VF12,VF10,Q lq.xyz VF10,0(VI07) ; STALL_LATENCY ?3 + add.xyz VF10,VF10,VF12 NOP ; STALL_LATENCY ?3 + NOP sqi.xyz VF10,(VI07++) ; STALL_LATENCY ?3 +EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [4 0] 4 [EXPL_vu1_indexed_pp4_vcl_pt_light_vert_loop_lid__EXIT_POINT] + NOP isubiu VI02,VI02,0x00000001 + NOP iaddiu VI03,VI03,0x00000001 + NOP ibne VI02,VI00,pt_light_loop_lid + NOP ilw.y VI04,0(VI03) +done_lighting_lid: +; _LNOPT_w=[ normal2 ] 12 [12 0] 12 [done_lighting_lid] + NOP loi 0x44fff000 + NOP lq.w VF06,57(VI00) + addi.xy VF05,VF00,I loi 0x45000000 + NOP xtop VI04 + NOP fcset 0 + NOP mr32.z VF05,VF06 + NOP lq.xyz VF06,76(VI00) + NOP ilw.w VI03,76(VI00) + NOP iaddiu VI05,VI00,0x0000004e + NOP iaddiu VI06,VI04,0x00000005 + NOP ibne VI01,VI00,colors_ready_lid + maxi.w VF07,VF00,I iaddiu VI07,VI04,0x000000ac +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_19] + NOP ibne VI02,VI00,colors_ready_lid + NOP NOP +; _LNOPT_w=[ normal2 ] 6 [9 0] 10 [__v_vu1_indexed_pp4_vcl_20] + NOP lq.xyz VF08,57(VI00) + NOP lq.xyz VF09,59(VI00) + mul.xyz VF08,VF08,VF09 lq.xyz VF07,58(VI00) ; STALL_LATENCY ?3 + NOP ilw.x VI01,0(VI04) + NOP iaddiu VI02,VI04,0x000000ac + add.xyz VF07,VF07,VF08 iaddiu VI08,VI00,0 ; STALL_LATENCY ?1 +seed_const_colors_lid: +; _LNOPT_w=[ normal2 ] 4 [4 3] 4 [seed_const_colors_lid] + NOP iaddiu VI08,VI08,0x00000001 + NOP NOP + NOP ibne VI08,VI01,seed_const_colors_lid + NOP sqi.xyz VF07,(VI02++) +colors_ready_lid: +; _LNOPT_w=[ vuta1 ] 47 [40 0] 53 [colors_ready_lid] + NOP loi 0x40400000 + maxi.z VF07,VF00,I loi 0x437f0000 + NOP lq.w VF05,60(VI00) + NOP lq.xyz VF09,75(VI00) + NOP ilw.z VI02,0(VI04) + maxi.y VF08,VF00,I loi 0x43000000 + NOP ilw.y VI09,0(VI04) + NOP mtir VI08,VF09x + NOP ior VI02,VI08,VI02 + muli.w VF05,VF05,I loi 0x437f0000 + NOP mfir.x VF09,VI02 + NOP mfir.w VF09,VI05 + NOP iaddiu VI02,VI04,0x00000005 + NOP iadd VI09,VI02,VI09 + NOP iaddiu VI04,VI00,0x0000004b + NOP sq VF09,77(VI00) + NOP xgkick VI04 + NOP ilw.w VI04,0(VI02) + minii.w VF08,VF05,I lqi.w VF14,(VI02++) + maxi.w VF06,VF00,I loi 0x437d0000 + NOP iaddiu VI08,VI00,0x000000ff + NOP iand VI04,VI04,VI08 + addy.w VF12,VF14,VF08y iadd VI01,VI04,VI04 + maxi.w VF05,VF00,I iadd VI01,VI01,VI04 + mulz.w VF11,VF14,VF07z iadd VI10,VI01,VI06 + NOP lq.xyz VF09,0(VI10) + add.w VF11,VF11,VF05 NOP ; STALL_LATENCY ?2 + mulax ACC,VF01,VF09x NOP + madday ACC,VF02,VF09y NOP + maddaz ACC,VF03,VF09z iadd VI04,VI04,VI07 + maddw VF13,VF04,VF00w mtir VI11,VF11w + NOP lq.xyz VF09,0(VI04) + NOP iadd VI04,VI11,VI06 + NOP lq.xyz VF12,0(VI04) + NOP div Q,VF00w,VF13w + miniw.xyz VF09,VF09,VF06w NOP + mulax ACC,VF01,VF12x NOP ; STALL_LATENCY ?1 + madday ACC,VF02,VF12y NOP + maddaz ACC,VF03,VF12z NOP + mulq.xyz VF11,VF13,Q waitq ; STALL_LATENCY ?1 + maddw VF16,VF04,VF00w mtir VI01,VF12w + add.xyz VF13,VF11,VF05 lq.xyz VF14,2(VI10) ; STALL_LATENCY ?2 + mul.xyz VF11,VF11,VF06 iadd VI01,VI01,VI07 + NOP lq.xyz VF12,0(VI01) + ftoi0.w VF09,VF08 div Q,VF00w,VF16w + ftoi0.xyz VF09,VF09 ibeq VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1 + clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI04) +; _LNOPT_w=[ ] 38 [37 0] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__PRO1] + mulq.xyz VF15,VF14,Q ilw.w VI11,0(VI02) + NOP NOP + NOP lqi.w VF14,(VI02++) + NOP sq VF09,1(VI05) + mulq.xyz VF09,VF16,Q fcand VI01,262143 + NOP iand VI11,VI11,VI08 + NOP iadd VI10,VI11,VI11 + mulz.w VF11,VF14,VF07z iadd VI10,VI10,VI11 + mul.xyz VF10,VF09,VF06 iadd VI12,VI10,VI06 + add.xyz VF14,VF09,VF05 iadd VI11,VI11,VI07 + addy.w VF12,VF14,VF08y lq.xyz VF09,0(VI12) + ftoi4.xyz VF16,VF13 iand VI04,VI01,VI03 + add.w VF11,VF11,VF05 ior VI04,VI04,VI00 + clipw.xyz VF10xyz,VF07w iaddiu VI01,VI04,0x00007fff + mulax ACC,VF01,VF09x mfir.w VF16,VI01 + madday ACC,VF02,VF09y mtir VI10,VF12w + maddaz ACC,VF03,VF09z mtir VI04,VF11w + maddw VF13,VF04,VF00w lq.xyz VF10,0(VI11) + mulq.xyz VF16,VF11,Q sq VF16,2(VI05) + NOP iadd VI11,VI04,VI06 + NOP sq.xyz VF15,0(VI05) + NOP div Q,VF00w,VF13w + miniw.xyz VF09,VF10,VF06w iaddiu VI04,VI05,0 + miniw.xyz VF10,VF12,VF06w lq.xyz VF12,0(VI11) + NOP fcand VI01,262143 + ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) + NOP iand VI05,VI01,VI03 + mulax ACC,VF01,VF12x ior VI05,VI05,VI00 + mulq.xyz VF11,VF13,Q iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF12y mfir.w VF15,VI05 + maddaz ACC,VF03,VF12z iadd VI10,VI10,VI07 + maddw VF16,VF04,VF00w sq.xyz VF16,3(VI04) + add.xyz VF13,VF11,VF05 iaddiu VI05,VI04,0x00000006 + mul.xyz VF11,VF11,VF06 lq.xyz VF12,0(VI10) + NOP sq VF15,5(VI04) + NOP div Q,VF00w,VF16w + ftoi0.xyz VF09,VF09 ibeq VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0 + clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI11) +EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP: +; _LPOPT_w=[ ldumb ] 38 [38 38] 38 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP] + NOP ilw.w VI11,0(VI02) + NOP lqi.w VF14,(VI02++) + NOP sq VF09,1(VI05) + mulq.xyz VF15,VF14,Q fcand VI01,262143 + mulq.xyz VF09,VF16,Q iand VI11,VI11,VI08 + addy.w VF12,VF14,VF08y iadd VI10,VI11,VI11 + mulz.w VF11,VF14,VF07z iadd VI10,VI10,VI11 + NOP iadd VI12,VI10,VI06 + add.xyz VF14,VF09,VF05 iadd VI11,VI11,VI07 + mul.xyz VF18,VF09,VF06 mtir VI10,VF12w + add.w VF11,VF11,VF05 lq.xyz VF17,0(VI12) + ftoi4.xyz VF16,VF13 iand VI01,VI01,VI03 + ftoi0.xyz VF09,VF10 ior VI01,VI01,VI00 + clipw.xyz VF18xyz,VF07w iaddiu VI13,VI01,0x00007fff + mulax ACC,VF01,VF17x mtir VI01,VF11w + madday ACC,VF02,VF17y mfir.w VF16,VI13 + maddaz ACC,VF03,VF17z lq.xyz VF10,0(VI11) + maddw VF13,VF04,VF00w iadd VI11,VI01,VI06 + NOP sq.xyz VF15,0(VI05) + NOP sq VF16,2(VI05) + miniw.xyz VF09,VF10,VF06w sq VF09,4(VI04) + NOP div Q,VF00w,VF13w + miniw.xyz VF10,VF12,VF06w iaddiu VI04,VI05,0 + NOP lq.xyz VF12,0(VI11) + NOP fcand VI01,262143 + ftoi4.xyz VF15,VF14 lq.xyz VF14,2(VI12) + mulq.xyz VF16,VF11,Q iand VI05,VI01,VI03 + mulax ACC,VF01,VF12x ior VI05,VI05,VI00 + mulq.xyz VF11,VF13,Q iaddiu VI05,VI05,0x00007fff + madday ACC,VF02,VF12y mfir.w VF15,VI05 + maddaz ACC,VF03,VF12z iadd VI10,VI10,VI07 + maddw VF16,VF04,VF00w sq.xyz VF16,3(VI04) + add.xyz VF13,VF11,VF05 iaddiu VI05,VI04,0x00000006 + mul.xyz VF11,VF11,VF06 lq.xyz VF12,0(VI10) + NOP sq VF15,5(VI04) + NOP div Q,VF00w,VF16w + ftoi0.xyz VF09,VF09 ibne VI02,VI09,EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__MAIN_LOOP + clipw.xyz VF11xyz,VF07w lq.xyz VF11,2(VI11) +EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0: +; _LNOPT_w=[ ] 25 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI0] + mulq.xyz VF07,VF14,Q NOP + NOP NOP + NOP NOP + NOP NOP + mulq.xyz VF09,VF16,Q sq VF09,1(VI05) + NOP NOP + NOP fcand VI01,262143 + NOP iand VI01,VI01,VI03 + ftoi4.xyz VF16,VF13 ior VI01,VI01,VI00 + mul.xyz VF06,VF09,VF06 iaddiu VI01,VI01,0x00007fff + add.xyz VF14,VF09,VF05 mfir.w VF16,VI01 + ftoi0.xyz VF09,VF10 NOP + clipw.xyz VF06xyz,VF07w sq.xyz VF07,0(VI05) ; STALL_LATENCY ?1 + mulq.xyz VF16,VF11,Q sq VF16,2(VI05) + miniw.xyz VF10,VF12,VF06w sq VF09,4(VI04) + NOP iaddiu VI04,VI05,0 + NOP fcand VI01,262143 + NOP iand VI05,VI01,VI03 + ftoi0.xyz VF09,VF10 ior VI05,VI05,VI00 + ftoi4.xyz VF12,VF14 iaddiu VI05,VI05,0x00007fff + NOP mfir.w VF12,VI05 + NOP sq.xyz VF16,3(VI04) + NOP sq VF09,4(VI04) + NOP b EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT + NOP sq VF12,5(VI04) +EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1: +; _LNOPT_w=[ ] 23 [21 0] 26 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EPI1] + mulq.xyz VF07,VF14,Q NOP + NOP NOP + NOP sq VF09,1(VI05) + NOP fcand VI01,262143 + mulq.xyz VF09,VF16,Q iand VI02,VI01,VI03 + NOP ior VI02,VI02,VI00 + ftoi4.xyz VF16,VF13 iaddiu VI02,VI02,0x00007fff + NOP mfir.w VF16,VI02 + add.xyz VF14,VF09,VF05 sq.xyz VF07,0(VI05) + mul.xyz VF09,VF09,VF06 NOP + mulq.xyz VF16,VF11,Q sq VF16,2(VI05) ; STALL_LATENCY ?1 + clipw.xyz VF09xyz,VF07w NOP ; STALL_LATENCY ?1 + NOP NOP + NOP NOP + miniw.xyz VF09,VF12,VF06w iaddiu VI02,VI05,0 + NOP fcand VI01,262143 + NOP iand VI05,VI01,VI03 + ftoi4.xyz VF12,VF14 ior VI05,VI05,VI00 + ftoi0.xyz VF09,VF09 iaddiu VI05,VI05,0x00007fff + NOP mfir.w VF12,VI05 + NOP sq.xyz VF16,3(VI02) + NOP sq VF09,4(VI02) ; STALL_LATENCY ?1 + NOP sq VF12,5(VI02) +EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT: +; _LNOPT_w=[ ] 4 [2 0] 4 [EXPL_vu1_indexed_pp4_vcl_xform_loop_lid__EXIT_POINT] + NOP iaddiu VI01,VI00,0x0000004d + NOP xgkick VI01 + NOP[E] NOP + NOP NOP +; _LNOPT_w=[ normal2 ] 2 [2 0] 2 [__v_vu1_indexed_pp4_vcl_26] + NOP b main_loop_lid + NOP NOP + .align 4 +vsmIndexed_CodeEnd: +; iCount=657 +; register stats: +; 14 VU User integer +; 24 VU User floating point From 89d011b0dc3c779885a7f08256b3f3301a4f7cdb Mon Sep 17 00:00:00 2001 From: iann Date: Mon, 13 Oct 2025 02:28:36 +0900 Subject: [PATCH 13/13] LINES done on EE, workaround for pvc --- src/base_renderer.cpp | 11 +++++++-- src/gmanager.cpp | 55 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 63 insertions(+), 3 deletions(-) diff --git a/src/base_renderer.cpp b/src/base_renderer.cpp index 0dbf52f6..a809bd45 100644 --- a/src/base_renderer.cpp +++ b/src/base_renderer.cpp @@ -396,6 +396,8 @@ void CBaseRenderer::AddVu1RendererContext(CVifSCDmaPacket& packet, GLenum primTy GLenum newPrimType = drawContext.GetPolygonMode(); if (newPrimType == GL_FILL) newPrimType = primType; + if (newPrimType == GL_LINE) + newPrimType = GL_LINES; newPrimType &= 0xff; tGifTag giftag = BuildGiftag(newPrimType); packet += giftag; @@ -420,8 +422,12 @@ tGifTag CBaseRenderer::BuildGiftag(GLenum primType) { CGLContext& glContext = *pGLContext; - - primType &= 0x7; // convert from GL #define to gs prim number + //TODO: JESUS CHRIST + if (primType == GL_LINES) { + primType = 1; + } else { + primType &= 0x7; // convert from GL #define to gs prim number + } CImmDrawContext& drawContext = glContext.GetImmDrawContext(); bool smoothShading = drawContext.GetDoSmoothShading(); bool useTexture = glContext.GetTexManager().GetTexEnabled(); @@ -429,6 +435,7 @@ CBaseRenderer::BuildGiftag(GLenum primType) unsigned int nreg = OutputQuadsPerVert; // bool flip = drawContext.CurFrameMem != drawContext.Frame0Mem; // GS::tPrim prim = { .prim_type = primType, .iip = smoothShading, .tme = useTexture, .fge = 0, .abe = alpha, .aa1 = 0, .fst = 0, .ctxt = flip, .fix = 0 }; + mDebugPrint("primType =%d)\n", (int)primType); GS::tPrim prim = { .prim_type = primType, .iip = smoothShading, .tme = useTexture, .fge = 0, .abe = alpha, .aa1 = 0, .fst = 0, .ctxt = 0, .fix = 0 }; tGifTag giftag = { .NLOOP = 0, .EOP = 1, .pad0 = 0, .id = 0, .PRE = 1, .PRIM = *(uint64_t*)&prim, .FLG = 0, .NREG = nreg, .REGS0 = 2, .REGS1 = 1, .REGS2 = 4 }; diff --git a/src/gmanager.cpp b/src/gmanager.cpp index a5052d40..86e288fa 100644 --- a/src/gmanager.cpp +++ b/src/gmanager.cpp @@ -206,6 +206,33 @@ void glDrawArrays(GLenum mode, GLint first, GLsizei count) { GL_FUNC_DEBUG("%s\n", __FUNCTION__); + if (pGLContext->GetImmDrawContext().GetPolygonMode() == GL_LINE && mode == GL_TRIANGLES) { + GLushort maxIndex = (GLushort)(first + count - 1); + if (maxIndex <= 255) { + GLsizei triangleCount = count / 3; + GLsizei lineIndexCount = triangleCount * 6; + static uint8_t* indices_u8_scratch = NULL; + static int scratchCapacity = 0; + if (scratchCapacity < lineIndexCount) { + delete[] indices_u8_scratch; + indices_u8_scratch = new uint8_t[lineIndexCount]; + scratchCapacity = (int)lineIndexCount; + } + uint8_t* p = indices_u8_scratch; + for (GLsizei i = 0; i + 2 < count; i += 3) { + uint8_t a = (uint8_t)(first + i + 0); + uint8_t b = (uint8_t)(first + i + 1); + uint8_t c = (uint8_t)(first + i + 2); + *p++ = a; *p++ = b; + *p++ = b; *p++ = c; + *p++ = c; *p++ = a; + } + CGeomManager& gmanager = pGLContext->GetGeomManager(); + gmanager.IndexedArraysGeomStage(GL_TRIANGLES, (int)lineIndexCount, indices_u8_scratch, (int)(maxIndex + 1)); + return; + } + mode = GL_LINES; + } CGeomManager& gmanager = pGLContext->GetGeomManager(); gmanager.LinearArraysGeomStage(mode, first, count); } @@ -231,6 +258,32 @@ void glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indic const int numVertices = (int)max + 1; + static GLushort* indices_u16_scratch = NULL; + static int scratch16Capacity = 0; + if (pGLContext->GetImmDrawContext().GetPolygonMode() == GL_LINE && mode == GL_TRIANGLES) + { + int triangleCount = count / 3; + int lineCount = triangleCount * 6; + if (scratch16Capacity < lineCount) { + delete[] indices_u16_scratch; + indices_u16_scratch = new GLushort[lineCount]; + scratch16Capacity = lineCount; + } + GLushort* linesIndexBuffer = indices_u16_scratch; + if (mode == GL_TRIANGLES) { + for (GLsizei i = 0; i + 2 < count; i += 3) { + GLushort a = indices_u16[i+0]; + GLushort b = indices_u16[i+1]; + GLushort c = indices_u16[i+2]; + *linesIndexBuffer++ = a; *linesIndexBuffer++ = b; + *linesIndexBuffer++ = b; *linesIndexBuffer++ = c; + *linesIndexBuffer++ = c; *linesIndexBuffer++ = a; + } + } + indices_u16 = indices_u16_scratch; + count = (GLsizei)(linesIndexBuffer - indices_u16_scratch); + //mode = GL_LINES; //TODO: add a renderer for lines? nah? + } if (max <= 255) { static uint8_t* indices_u8_scratch = NULL; static int scratchCapacity = 0; @@ -246,7 +299,7 @@ void glDrawElements(GLenum mode, GLsizei count, GLenum type, const GLvoid* indic gmanager.IndexedArraysGeomStage(mode, (int)count, indices_u8_scratch, numVertices); } else { CGeomManager& gmanager = pGLContext->GetGeomManager(); - gmanager.IndexedArraysGeomStage(mode, (int)count, (const unsigned char*)indices, numVertices); + gmanager.IndexedArraysGeomStage(mode, (int)count, (const unsigned char*)indices_u16, numVertices); } }