Skip to content

Commit aac24a2

Browse files
author
Jason Sams
committed
Improve rsForEach overhead.
Change-Id: Iaabef7bb573233ef7c5756077f840ee933ee0c39 fix spacing, reduce rsForEach overhead about 50%
1 parent 384bf04 commit aac24a2

File tree

3 files changed

+97
-48
lines changed

3 files changed

+97
-48
lines changed

libs/rs/driver/rsdBcc.cpp

Lines changed: 15 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -226,6 +226,7 @@ static void wc_xy(void *usr, uint32_t idx) {
226226
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
227227
uint32_t sig = mtls->sig;
228228

229+
outer_foreach_t fn = dc->mForEachLaunch[sig];
229230
while (1) {
230231
uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
231232
uint32_t yStart = mtls->yStart + slice * mtls->mSliceSize;
@@ -239,16 +240,10 @@ static void wc_xy(void *usr, uint32_t idx) {
239240
//LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
240241
for (p.y = yStart; p.y < yEnd; p.y++) {
241242
uint32_t offset = mtls->dimX * p.y;
242-
uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * offset);
243-
const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * offset);
244-
245-
for (p.x = mtls->xStart; p.x < mtls->xEnd; p.x++) {
246-
p.in = xPtrIn;
247-
p.out = xPtrOut;
248-
dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
249-
xPtrIn += mtls->eStrideIn;
250-
xPtrOut += mtls->eStrideOut;
251-
}
243+
p.out = mtls->ptrOut + (mtls->eStrideOut * offset);
244+
p.in = mtls->ptrIn + (mtls->eStrideIn * offset);
245+
fn(&mtls->script->mHal.info.root, &p, mtls->xStart, mtls->xEnd,
246+
mtls->eStrideIn, mtls->eStrideOut);
252247
}
253248
}
254249
}
@@ -262,6 +257,7 @@ static void wc_x(void *usr, uint32_t idx) {
262257
RsdHal * dc = (RsdHal *)mtls->rsc->mHal.drv;
263258
uint32_t sig = mtls->sig;
264259

260+
outer_foreach_t fn = dc->mForEachLaunch[sig];
265261
while (1) {
266262
uint32_t slice = (uint32_t)android_atomic_inc(&mtls->mSliceNum);
267263
uint32_t xStart = mtls->xStart + slice * mtls->mSliceSize;
@@ -273,15 +269,10 @@ static void wc_x(void *usr, uint32_t idx) {
273269

274270
//LOGE("usr idx %i, x %i,%i y %i,%i", idx, mtls->xStart, mtls->xEnd, yStart, yEnd);
275271
//LOGE("usr ptr in %p, out %p", mtls->ptrIn, mtls->ptrOut);
276-
uint8_t *xPtrOut = mtls->ptrOut + (mtls->eStrideOut * xStart);
277-
const uint8_t *xPtrIn = mtls->ptrIn + (mtls->eStrideIn * xStart);
278-
for (p.x = xStart; p.x < xEnd; p.x++) {
279-
p.in = xPtrIn;
280-
p.out = xPtrOut;
281-
dc->mForEachLaunch[sig](&mtls->script->mHal.info.root, &p);
282-
xPtrIn += mtls->eStrideIn;
283-
xPtrOut += mtls->eStrideOut;
284-
}
272+
p.out = mtls->ptrOut + (mtls->eStrideOut * xStart);
273+
p.in = mtls->ptrIn + (mtls->eStrideIn * xStart);
274+
fn(&mtls->script->mHal.info.root, &p, mtls->xStart, mtls->xEnd,
275+
mtls->eStrideIn, mtls->eStrideOut);
285276
}
286277
}
287278

@@ -392,22 +383,17 @@ void rsdScriptInvokeForEach(const Context *rsc,
392383
uint32_t sig = mtls.sig;
393384

394385
//LOGE("launch 3");
386+
outer_foreach_t fn = dc->mForEachLaunch[sig];
395387
for (p.ar[0] = mtls.arrayStart; p.ar[0] < mtls.arrayEnd; p.ar[0]++) {
396388
for (p.z = mtls.zStart; p.z < mtls.zEnd; p.z++) {
397389
for (p.y = mtls.yStart; p.y < mtls.yEnd; p.y++) {
398390
uint32_t offset = mtls.dimX * mtls.dimY * mtls.dimZ * p.ar[0] +
399391
mtls.dimX * mtls.dimY * p.z +
400392
mtls.dimX * p.y;
401-
uint8_t *xPtrOut = mtls.ptrOut + (mtls.eStrideOut * offset);
402-
const uint8_t *xPtrIn = mtls.ptrIn + (mtls.eStrideIn * offset);
403-
404-
for (p.x = mtls.xStart; p.x < mtls.xEnd; p.x++) {
405-
p.in = xPtrIn;
406-
p.out = xPtrOut;
407-
dc->mForEachLaunch[sig](&s->mHal.info.root, &p);
408-
xPtrIn += mtls.eStrideIn;
409-
xPtrOut += mtls.eStrideOut;
410-
}
393+
p.out = mtls.ptrOut + (mtls.eStrideOut * offset);
394+
p.in = mtls.ptrIn + (mtls.eStrideIn * offset);
395+
fn(&mtls.script->mHal.info.root, &p, mtls.xStart, mtls.xEnd,
396+
mtls.eStrideIn, mtls.eStrideOut);
411397
}
412398
}
413399
}

libs/rs/driver/rsdCore.cpp

Lines changed: 79 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -292,75 +292,136 @@ void Shutdown(Context *rsc) {
292292
}
293293

294294
static void rsdForEach17(const void *vRoot,
295-
const android::renderscript::RsForEachStubParamStruct *p) {
295+
const android::renderscript::RsForEachStubParamStruct *p,
296+
uint32_t x1, uint32_t x2,
297+
uint32_t instep, uint32_t outstep) {
296298
typedef void (*fe)(const void *, uint32_t);
297299
(*(fe*)vRoot)(p->in, p->y);
298300
}
299301

300302
static void rsdForEach18(const void *vRoot,
301-
const android::renderscript::RsForEachStubParamStruct *p) {
303+
const android::renderscript::RsForEachStubParamStruct *p,
304+
uint32_t x1, uint32_t x2,
305+
uint32_t instep, uint32_t outstep) {
302306
typedef void (*fe)(void *, uint32_t);
303307
(*(fe*)vRoot)(p->out, p->y);
304308
}
305309

306310
static void rsdForEach19(const void *vRoot,
307-
const android::renderscript::RsForEachStubParamStruct *p) {
311+
const android::renderscript::RsForEachStubParamStruct *p,
312+
uint32_t x1, uint32_t x2,
313+
uint32_t instep, uint32_t outstep) {
308314
typedef void (*fe)(const void *, void *, uint32_t);
309315
(*(fe*)vRoot)(p->in, p->out, p->y);
310316
}
311317

312318
static void rsdForEach21(const void *vRoot,
313-
const android::renderscript::RsForEachStubParamStruct *p) {
319+
const android::renderscript::RsForEachStubParamStruct *p,
320+
uint32_t x1, uint32_t x2,
321+
uint32_t instep, uint32_t outstep) {
314322
typedef void (*fe)(const void *, const void *, uint32_t);
315323
(*(fe*)vRoot)(p->in, p->usr, p->y);
316324
}
317325

318326
static void rsdForEach22(const void *vRoot,
319-
const android::renderscript::RsForEachStubParamStruct *p) {
327+
const android::renderscript::RsForEachStubParamStruct *p,
328+
uint32_t x1, uint32_t x2,
329+
uint32_t instep, uint32_t outstep) {
320330
typedef void (*fe)(void *, const void *, uint32_t);
321331
(*(fe*)vRoot)(p->out, p->usr, p->y);
322332
}
323333

324334
static void rsdForEach23(const void *vRoot,
325-
const android::renderscript::RsForEachStubParamStruct *p) {
335+
const android::renderscript::RsForEachStubParamStruct *p,
336+
uint32_t x1, uint32_t x2,
337+
uint32_t instep, uint32_t outstep) {
326338
typedef void (*fe)(const void *, void *, const void *, uint32_t);
327339
(*(fe*)vRoot)(p->in, p->out, p->usr, p->y);
328340
}
329341

330342
static void rsdForEach25(const void *vRoot,
331-
const android::renderscript::RsForEachStubParamStruct *p) {
343+
const android::renderscript::RsForEachStubParamStruct *p,
344+
uint32_t x1, uint32_t x2,
345+
uint32_t instep, uint32_t outstep) {
332346
typedef void (*fe)(const void *, uint32_t, uint32_t);
333-
(*(fe*)vRoot)(p->in, p->x, p->y);
347+
const uint8_t *pin = (const uint8_t *)p->in;
348+
uint32_t y = p->y;
349+
for (uint32_t x = x1; x < x2; x++) {
350+
(*(fe*)vRoot)(pin, x, y);
351+
pin += instep;
352+
}
334353
}
335354

336355
static void rsdForEach26(const void *vRoot,
337-
const android::renderscript::RsForEachStubParamStruct *p) {
356+
const android::renderscript::RsForEachStubParamStruct *p,
357+
uint32_t x1, uint32_t x2,
358+
uint32_t instep, uint32_t outstep) {
338359
typedef void (*fe)(void *, uint32_t, uint32_t);
339-
(*(fe*)vRoot)(p->out, p->x, p->y);
360+
uint8_t *pout = (uint8_t *)p->out;
361+
uint32_t y = p->y;
362+
for (uint32_t x = x1; x < x2; x++) {
363+
(*(fe*)vRoot)(pout, x, y);
364+
pout += outstep;
365+
}
340366
}
341367

342368
static void rsdForEach27(const void *vRoot,
343-
const android::renderscript::RsForEachStubParamStruct *p) {
369+
const android::renderscript::RsForEachStubParamStruct *p,
370+
uint32_t x1, uint32_t x2,
371+
uint32_t instep, uint32_t outstep) {
344372
typedef void (*fe)(const void *, void *, uint32_t, uint32_t);
345-
(*(fe*)vRoot)(p->in, p->out, p->x, p->y);
373+
uint8_t *pout = (uint8_t *)p->out;
374+
const uint8_t *pin = (const uint8_t *)p->in;
375+
uint32_t y = p->y;
376+
for (uint32_t x = x1; x < x2; x++) {
377+
(*(fe*)vRoot)(pin, pout, x, y);
378+
pin += instep;
379+
pout += outstep;
380+
}
346381
}
347382

348383
static void rsdForEach29(const void *vRoot,
349-
const android::renderscript::RsForEachStubParamStruct *p) {
384+
const android::renderscript::RsForEachStubParamStruct *p,
385+
uint32_t x1, uint32_t x2,
386+
uint32_t instep, uint32_t outstep) {
350387
typedef void (*fe)(const void *, const void *, uint32_t, uint32_t);
351-
(*(fe*)vRoot)(p->in, p->usr, p->x, p->y);
388+
const uint8_t *pin = (const uint8_t *)p->in;
389+
const void *usr = p->usr;
390+
const uint32_t y = p->y;
391+
for (uint32_t x = x1; x < x2; x++) {
392+
(*(fe*)vRoot)(pin, usr, x, y);
393+
pin += instep;
394+
}
352395
}
353396

354397
static void rsdForEach30(const void *vRoot,
355-
const android::renderscript::RsForEachStubParamStruct *p) {
398+
const android::renderscript::RsForEachStubParamStruct *p,
399+
uint32_t x1, uint32_t x2,
400+
uint32_t instep, uint32_t outstep) {
356401
typedef void (*fe)(void *, const void *, uint32_t, uint32_t);
357-
(*(fe*)vRoot)(p->out, p->usr, p->x, p->y);
402+
uint8_t *pout = (uint8_t *)p->out;
403+
const void *usr = p->usr;
404+
const uint32_t y = p->y;
405+
for (uint32_t x = x1; x < x2; x++) {
406+
(*(fe*)vRoot)(pout, usr, x, y);
407+
pout += outstep;
408+
}
358409
}
359410

360411
static void rsdForEach31(const void *vRoot,
361-
const android::renderscript::RsForEachStubParamStruct *p) {
412+
const android::renderscript::RsForEachStubParamStruct *p,
413+
uint32_t x1, uint32_t x2,
414+
uint32_t instep, uint32_t outstep) {
362415
typedef void (*fe)(const void *, void *, const void *, uint32_t, uint32_t);
363-
(*(fe*)vRoot)(p->in, p->out, p->usr, p->x, p->y);
416+
uint8_t *pout = (uint8_t *)p->out;
417+
const uint8_t *pin = (const uint8_t *)p->in;
418+
const void *usr = p->usr;
419+
const uint32_t y = p->y;
420+
for (uint32_t x = x1; x < x2; x++) {
421+
(*(fe*)vRoot)(pin, pout, usr, x, y);
422+
pin += instep;
423+
pout += outstep;
424+
}
364425
}
365426

366427

libs/rs/driver/rsdCore.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,9 @@ typedef void (* InvokeFunc_t)(void);
2828
typedef void (*WorkerCallback_t)(void *usr, uint32_t idx);
2929

3030
typedef void (*outer_foreach_t)(const void *,
31-
const android::renderscript::RsForEachStubParamStruct *);
31+
const android::renderscript::RsForEachStubParamStruct *,
32+
uint32_t x1, uint32_t x2,
33+
uint32_t instep, uint32_t outstep);
3234

3335
typedef struct RsdSymbolTableRec {
3436
const char * mName;

0 commit comments

Comments
 (0)