Skip to content

Commit 6dc33a8

Browse files
committed
optimize morph9
This reduces the time from about 133ms to about 122ms on my test image on the memento pycamera a similar change to morph did not produce a performance improvement, so I didn't include it.
1 parent 26fe085 commit 6dc33a8

2 files changed

Lines changed: 31 additions & 22 deletions

File tree

shared-bindings/bitmapfilter/__init__.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ void shared_module_bitmapfilter_morph9(
4343
displayio_bitmap_t *bitmap,
4444
displayio_bitmap_t *mask,
4545
const int ksize,
46-
const int *krn,
46+
int krn[9 * (2 * ksize + 1) * (2 * ksize + 1)], // Note: modifies krn[]
4747
const mp_float_t m[3],
4848
const mp_float_t b[3],
4949
bool threshold,

shared-module/bitmapfilter/__init__.c

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ void shared_module_bitmapfilter_morph9(
181181
displayio_bitmap_t *bitmap,
182182
displayio_bitmap_t *mask,
183183
const int ksize,
184-
const int *krn,
184+
int krn[9 * (2 * ksize + 1) * (2 * ksize + 1)], // Note: modifies krn[]
185185
const mp_float_t m[3],
186186
const mp_float_t b[3],
187187
bool threshold,
@@ -190,15 +190,24 @@ void shared_module_bitmapfilter_morph9(
190190

191191
int brows = ksize + 1;
192192

193-
const int32_t m_int[3] = {
194-
(int32_t)MICROPY_FLOAT_C_FUN(round)(65536 * m[0]),
195-
(int32_t)MICROPY_FLOAT_C_FUN(round)(65536 * m[1]),
196-
(int32_t)MICROPY_FLOAT_C_FUN(round)(65536 * m[2])
197-
};
193+
int arrsize = (ksize * 2 + 1) * (ksize * 2 + 1) * 9;
194+
195+
for (int i = 0; i < arrsize; i++) {
196+
int source_channel = i % 3;
197+
int target_channel = (i / 3) % 3;
198+
int source_is_green = (source_channel == 1);
199+
int target_is_green = (target_channel == 1);
200+
201+
int scale = (source_is_green == target_is_green) ? 65536
202+
: source_is_green ? 32768 : 131072;
203+
204+
krn[i] = (int)MICROPY_FLOAT_C_FUN(round)(scale * m[target_channel] * krn[i]);
205+
}
206+
198207
const int32_t b_int[3] = {
199-
(int32_t)MICROPY_FLOAT_C_FUN(round)(65536 * COLOR_G6_MAX * b[0]),
200-
(int32_t)MICROPY_FLOAT_C_FUN(round)(2 * 65536 * COLOR_G6_MAX * b[1]),
201-
(int32_t)MICROPY_FLOAT_C_FUN(round)(65536 * COLOR_G6_MAX * b[2])
208+
(int32_t)MICROPY_FLOAT_C_FUN(round)(65536 * COLOR_R5_MAX * b[0]),
209+
(int32_t)MICROPY_FLOAT_C_FUN(round)(65536 * COLOR_G6_MAX * b[1]),
210+
(int32_t)MICROPY_FLOAT_C_FUN(round)(65536 * COLOR_B5_MAX * b[2])
202211
};
203212

204213
check_matching_details(bitmap, bitmap);
@@ -220,7 +229,7 @@ void shared_module_bitmapfilter_morph9(
220229
continue; // Short circuit.
221230

222231
}
223-
int32_t r_acc = 0, g_acc = 0, b_acc = 0, ptr = 0;
232+
int32_t r_acc = b_int[0], g_acc = b_int[1], b_acc = b_int[2], ptr = 0;
224233

225234
if (x >= ksize && x < bitmap->width - ksize && y >= ksize && y < bitmap->height - ksize) {
226235
for (int j = -ksize; j <= ksize; j++) {
@@ -231,13 +240,13 @@ void shared_module_bitmapfilter_morph9(
231240
int g = COLOR_RGB565_TO_G6(pixel);
232241
int b = COLOR_RGB565_TO_B5(pixel);
233242
r_acc += krn[ptr++] * r;
234-
r_acc += (krn[ptr++] * g) / 2;
243+
r_acc += krn[ptr++] * g;
235244
r_acc += krn[ptr++] * b;
236-
g_acc += (krn[ptr++] * r) * 2;
245+
g_acc += krn[ptr++] * r;
237246
g_acc += krn[ptr++] * g;
238-
g_acc += (krn[ptr++] * b) * 2;
247+
g_acc += krn[ptr++] * b;
239248
b_acc += krn[ptr++] * r;
240-
b_acc += (krn[ptr++] * g) / 2;
249+
b_acc += krn[ptr++] * g;
241250
b_acc += krn[ptr++] * b;
242251
}
243252
}
@@ -252,30 +261,30 @@ void shared_module_bitmapfilter_morph9(
252261
int g = COLOR_RGB565_TO_G6(pixel);
253262
int b = COLOR_RGB565_TO_B5(pixel);
254263
r_acc += krn[ptr++] * r;
255-
r_acc += (krn[ptr++] * g) / 2;
264+
r_acc += krn[ptr++] * g;
256265
r_acc += krn[ptr++] * b;
257-
g_acc += (krn[ptr++] * r) * 2;
266+
g_acc += krn[ptr++] * r;
258267
g_acc += krn[ptr++] * g;
259-
g_acc += (krn[ptr++] * b) * 2;
268+
g_acc += krn[ptr++] * b;
260269
b_acc += krn[ptr++] * r;
261-
b_acc += (krn[ptr++] * g) / 2;
270+
b_acc += krn[ptr++] * g;
262271
b_acc += krn[ptr++] * b;
263272
}
264273
}
265274
}
266-
r_acc = (r_acc * m_int[0] + b_int[0]) >> 16;
275+
r_acc >>= 16;
267276
if (r_acc > COLOR_R5_MAX) {
268277
r_acc = COLOR_R5_MAX;
269278
} else if (r_acc < 0) {
270279
r_acc = 0;
271280
}
272-
g_acc = (g_acc * m_int[1] + b_int[1]) >> 16;
281+
g_acc >>= 16;
273282
if (g_acc > COLOR_G6_MAX) {
274283
g_acc = COLOR_G6_MAX;
275284
} else if (g_acc < 0) {
276285
g_acc = 0;
277286
}
278-
b_acc = (b_acc * m_int[2] + b_int[2]) >> 16;
287+
b_acc >>= 16;
279288
if (b_acc > COLOR_B5_MAX) {
280289
b_acc = COLOR_B5_MAX;
281290
} else if (b_acc < 0) {

0 commit comments

Comments
 (0)