|
11 | 11 | .address_size 64 |
12 | 12 |
|
13 | 13 | // .globl __raygen__main |
14 | | -.const .align 8 .b8 params[24]; |
| 14 | +.const .align 8 .b8 params[40]; |
15 | 15 |
|
16 | 16 | .visible .entry __raygen__main() |
17 | 17 | { |
| 18 | + .reg .pred %p<3>; |
18 | 19 | .reg .f32 %f<10>; |
19 | | - .reg .b32 %r<83>; |
20 | | - .reg .b64 %rd<10>; |
| 20 | + .reg .b32 %r<117>; |
| 21 | + .reg .b64 %rd<19>; |
21 | 22 |
|
22 | 23 |
|
23 | 24 | // begin inline asm |
24 | | - call (%r1), _optix_get_launch_index_x, (); |
| 25 | + call (%r33), _optix_get_launch_index_x, (); |
25 | 26 | // end inline asm |
26 | 27 | // begin inline asm |
27 | | - call (%r2), _optix_get_launch_index_y, (); |
| 28 | + call (%r34), _optix_get_launch_index_y, (); |
28 | 29 | // end inline asm |
29 | 30 | // begin inline asm |
30 | | - call (%r3), _optix_get_launch_index_z, (); |
| 31 | + call (%r35), _optix_get_launch_index_z, (); |
31 | 32 | // end inline asm |
32 | 33 | // begin inline asm |
33 | | - call (%r4), _optix_get_launch_dimension_x, (); |
| 34 | + call (%r36), _optix_get_launch_dimension_x, (); |
34 | 35 | // end inline asm |
35 | 36 | // begin inline asm |
36 | | - call (%r5), _optix_get_launch_dimension_y, (); |
| 37 | + call (%r37), _optix_get_launch_dimension_y, (); |
37 | 38 | // end inline asm |
38 | | - mad.lo.s32 %r77, %r5, %r3, %r2; |
39 | | - mad.lo.s32 %r78, %r77, %r4, %r1; |
40 | | - ld.const.u64 %rd2, [params+8]; |
41 | | - cvta.to.global.u64 %rd3, %rd2; |
42 | | - mul.wide.u32 %rd4, %r78, 32; |
43 | | - add.s64 %rd5, %rd3, %rd4; |
44 | | - ld.global.f32 %f1, [%rd5]; |
45 | | - ld.global.f32 %f2, [%rd5+4]; |
46 | | - ld.global.f32 %f3, [%rd5+8]; |
47 | | - ld.global.f32 %f7, [%rd5+12]; |
48 | | - ld.global.f32 %f4, [%rd5+16]; |
49 | | - ld.global.f32 %f5, [%rd5+20]; |
50 | | - ld.global.f32 %f6, [%rd5+24]; |
51 | | - ld.global.f32 %f8, [%rd5+28]; |
52 | | - ld.const.u64 %rd1, [params]; |
| 39 | + mad.lo.s32 %r109, %r37, %r35, %r34; |
| 40 | + mad.lo.s32 %r110, %r109, %r36, %r33; |
| 41 | + cvt.u64.u32 %rd1, %r110; |
| 42 | + ld.const.u64 %rd5, [params+8]; |
| 43 | + cvta.to.global.u64 %rd6, %rd5; |
| 44 | + mul.wide.u32 %rd7, %r110, 32; |
| 45 | + add.s64 %rd8, %rd6, %rd7; |
| 46 | + ld.global.f32 %f1, [%rd8]; |
| 47 | + ld.global.f32 %f2, [%rd8+4]; |
| 48 | + ld.global.f32 %f3, [%rd8+8]; |
| 49 | + ld.global.f32 %f7, [%rd8+12]; |
| 50 | + ld.global.f32 %f4, [%rd8+16]; |
| 51 | + ld.global.f32 %f5, [%rd8+20]; |
| 52 | + ld.global.f32 %f6, [%rd8+24]; |
| 53 | + ld.global.f32 %f8, [%rd8+28]; |
| 54 | + ld.const.u64 %rd4, [params]; |
53 | 55 | mov.f32 %f9, 0f00000000; |
54 | | - mov.u32 %r42, 1; |
55 | | - mov.u32 %r44, 4; |
56 | | - mov.u32 %r76, 0; |
57 | | - // begin inline asm |
58 | | - call(%r6,%r7,%r8,%r9,%r10,%r11,%r12,%r13,%r14,%r15,%r16,%r17,%r18,%r19,%r20,%r21,%r22,%r23,%r24,%r25,%r26,%r27,%r28,%r29,%r30,%r31,%r32,%r33,%r34,%r35,%r36,%r37),_optix_trace_typed_32,(%r76,%rd1,%f1,%f2,%f3,%f4,%f5,%f6,%f7,%f8,%f9,%r42,%r76,%r76,%r42,%r76,%r44,%r79,%r80,%r81,%r82,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76,%r76); |
59 | | - // end inline asm |
60 | | - ld.const.u64 %rd6, [params+16]; |
61 | | - cvta.to.global.u64 %rd7, %rd6; |
62 | | - mul.wide.u32 %rd8, %r78, 16; |
63 | | - add.s64 %rd9, %rd7, %rd8; |
64 | | - st.global.u32 [%rd9], %r6; |
65 | | - st.global.u32 [%rd9+4], %r7; |
66 | | - st.global.u32 [%rd9+8], %r8; |
67 | | - st.global.u32 [%rd9+12], %r9; |
| 56 | + mov.u32 %r74, 1; |
| 57 | + mov.u32 %r76, 6; |
| 58 | + mov.u32 %r108, 0; |
| 59 | + // begin inline asm |
| 60 | + call(%r38,%r39,%r40,%r41,%r42,%r43,%r44,%r45,%r46,%r47,%r48,%r49,%r50,%r51,%r52,%r53,%r54,%r55,%r56,%r57,%r58,%r59,%r60,%r61,%r62,%r63,%r64,%r65,%r66,%r67,%r68,%r69),_optix_trace_typed_32,(%r108,%rd4,%f1,%f2,%f3,%f4,%f5,%f6,%f7,%f8,%f9,%r74,%r108,%r108,%r74,%r108,%r76,%r111,%r112,%r113,%r114,%r115,%r116,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108,%r108); |
| 61 | + // end inline asm |
| 62 | + ld.const.u64 %rd9, [params+16]; |
| 63 | + cvta.to.global.u64 %rd10, %rd9; |
| 64 | + mul.wide.u32 %rd11, %r110, 16; |
| 65 | + add.s64 %rd12, %rd10, %rd11; |
| 66 | + st.global.u32 [%rd12], %r38; |
| 67 | + st.global.u32 [%rd12+4], %r39; |
| 68 | + st.global.u32 [%rd12+8], %r40; |
| 69 | + st.global.u32 [%rd12+12], %r41; |
| 70 | + ld.const.u64 %rd2, [params+24]; |
| 71 | + setp.eq.s64 %p1, %rd2, 0; |
| 72 | + @%p1 bra $L__BB0_2; |
| 73 | + |
| 74 | + cvta.to.global.u64 %rd13, %rd2; |
| 75 | + shl.b64 %rd14, %rd1, 2; |
| 76 | + add.s64 %rd15, %rd13, %rd14; |
| 77 | + st.global.u32 [%rd15], %r42; |
| 78 | + |
| 79 | +$L__BB0_2: |
| 80 | + ld.const.u64 %rd3, [params+32]; |
| 81 | + setp.eq.s64 %p2, %rd3, 0; |
| 82 | + @%p2 bra $L__BB0_4; |
| 83 | + |
| 84 | + cvta.to.global.u64 %rd16, %rd3; |
| 85 | + shl.b64 %rd17, %rd1, 2; |
| 86 | + add.s64 %rd18, %rd16, %rd17; |
| 87 | + st.global.u32 [%rd18], %r43; |
| 88 | + |
| 89 | +$L__BB0_4: |
68 | 90 | ret; |
69 | 91 |
|
70 | 92 | } |
71 | 93 | // .globl __miss__miss |
72 | 94 | .visible .entry __miss__miss() |
73 | 95 | { |
74 | | - .reg .b32 %r<9>; |
| 96 | + .reg .b32 %r<13>; |
75 | 97 |
|
76 | 98 |
|
77 | 99 | mov.u32 %r8, 0; |
|
92 | 114 | // begin inline asm |
93 | 115 | call _optix_set_payload, (%r7, %r8); |
94 | 116 | // end inline asm |
| 117 | + mov.u32 %r9, 4; |
| 118 | + mov.u32 %r12, -1; |
| 119 | + // begin inline asm |
| 120 | + call _optix_set_payload, (%r9, %r12); |
| 121 | + // end inline asm |
| 122 | + mov.u32 %r11, 5; |
| 123 | + // begin inline asm |
| 124 | + call _optix_set_payload, (%r11, %r12); |
| 125 | + // end inline asm |
95 | 126 | ret; |
96 | 127 |
|
97 | 128 | } |
98 | 129 | // .globl __closesthit__chit |
99 | 130 | .visible .entry __closesthit__chit() |
100 | 131 | { |
101 | 132 | .reg .f32 %f<37>; |
102 | | - .reg .b32 %r<14>; |
| 133 | + .reg .b32 %r<19>; |
103 | 134 | .reg .b64 %rd<3>; |
104 | 135 |
|
105 | 136 |
|
106 | 137 | // begin inline asm |
107 | 138 | call (%f1), _optix_get_ray_tmax, (); |
108 | 139 | // end inline asm |
109 | | - cvt.rzi.ftz.u32.f32 %r13, %f1; |
| 140 | + cvt.rzi.ftz.u32.f32 %r18, %f1; |
110 | 141 | // begin inline asm |
111 | 142 | call (%rd1), _optix_get_gas_traversable_handle, (); |
112 | 143 | // end inline asm |
|
145 | 176 | mul.ftz.f32 %f33, %f24, %f31; |
146 | 177 | neg.ftz.f32 %f34, %f33; |
147 | 178 | mul.ftz.f32 %f35, %f31, %f27; |
148 | | - cvt.rn.f32.u32 %f36, %r13; |
| 179 | + cvt.rn.f32.u32 %f36, %r18; |
149 | 180 | mov.b32 %r6, %f36; |
150 | 181 | mov.u32 %r5, 0; |
151 | 182 | // begin inline asm |
|
166 | 197 | // begin inline asm |
167 | 198 | call _optix_set_payload, (%r11, %r12); |
168 | 199 | // end inline asm |
| 200 | + mov.u32 %r13, 4; |
| 201 | + // begin inline asm |
| 202 | + call _optix_set_payload, (%r13, %r1); |
| 203 | + // end inline asm |
| 204 | + // begin inline asm |
| 205 | + call (%r15), _optix_read_instance_id, (); |
| 206 | + // end inline asm |
| 207 | + mov.u32 %r16, 5; |
| 208 | + // begin inline asm |
| 209 | + call _optix_set_payload, (%r16, %r15); |
| 210 | + // end inline asm |
169 | 211 | ret; |
170 | 212 |
|
171 | 213 | } |
|
0 commit comments