Add NEON accelerated RGB to RGBA/BGRA expansion methods
Categories
(Core :: Graphics, enhancement, P3)
Tracking
()
Tracking | Status | |
---|---|---|
firefox73 | --- | fixed |
People
(Reporter: aosmond, Assigned: aosmond)
References
Details
Attachments
(2 files)
In bug 1551088, I added accelerated RGB to RGBA/BGRA expansion methods for SSSE3 and AVX2 (and the impact was observable with telemetry, although this also included integration with already written premultiplication accelerated methods). I should do the same for NEON. Godbolt suggests ~410 cycles per 100 pixels for the current C++ implementation, and ~314 cycles per 400 pixels for my initial NEON implementation, so likely a substantial improvement for image decoding for Android and other ARM targets.
Assignee | ||
Comment 1•5 years ago
|
||
These new methods will be automatically used by ARM targets for image
decoding. Specifically it should reduce the time required to decode GIFs
and opaque PNGs.
Assignee | ||
Comment 2•5 years ago
|
||
Comment 4•5 years ago
|
||
bugherder |
this commit seems to have caused some regression, with the error: cannot convert 'uint16x8_t' to 'uint8x16_t' in initialization
9:16.49 gmake[4]: Entering directory '/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/gfx/2d'
9:16.50 gfx/2d/SwizzleNEON.o
9:16.50 /usr/bin/armv7a-unknown-linux-gnueabihf-g++ -std=gnu++17 -o SwizzleNEON.o -c -flto -flifetime-dse=1 -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/dist/stl_wrappers -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/dist/system_wrappers -include /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/config/gcc_hidden.h -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fstack-protector-strong -DNDEBUG=1 -DTRIMMED=1 -DUSE_NEON -DOS_POSIX=1 -DOS_LINUX=1 -DUSE_CAIRO -DMOZ2D_HAS_MOZ_CAIRO -DMOZ_ENABLE_FREETYPE -DSTATIC_EXPORTABLE_JS_API -DMOZ_HAS_MOZGLUE -DMOZILLA_INTERNAL_API -DIMPL_LIBXUL -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/gfx/2d -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/ipc/ipdl/_ipdlheaders -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ipc/chromium/src -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ipc/glue -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/skia -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/skia/skia -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/dist/include -I/usr/armv7a-unknown-linux-gnueabihf/usr/include/nspr -I/usr/include/nss -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/dist/include/nss -I/usr/armv7a-unknown-linux-gnueabihf/usr/include/pixman-1 -fPIC -DMOZILLA_CLIENT -include /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/mozilla-config.h -Wall -Wempty-body -Wignored-qualifiers -Woverloaded-virtual -Wpointer-arith -Wsign-compare -Wtype-limits -Wunreachable-code -Wwrite-strings -Wno-invalid-offsetof -Wduplicated-cond -Wimplicit-fallthrough -Wno-error=maybe-uninitialized -Wno-error=deprecated-declarations -Wno-error=array-bounds -Wno-error=coverage-mismatch -Wno-error=free-nonheap-object -Wno-error=multistatement-macros -Wno-error=class-memaccess -Wno-error=deprecated-copy -Wformat -Wformat-security -Wformat-overflow=2 -fno-sized-deallocation -fno-aligned-new -pipe -flifetime-dse=1 -Wno-psabi -Wno-class-memaccess -Wno-int-in-bool-context -Wno-multistatement-macros -Wno-maybe-uninitialized -Wno-deprecated-declarations -mthumb -mno-thumb-interwork -mfpu=neon -mfloat-abi=hard -fno-exceptions -fno-strict-aliasing -fno-rtti -ffunction-sections -fdata-sections -fno-exceptions -fno-math-errno -pthread -pipe -O2 -fomit-frame-pointer -funwind-tables -Wno-error=shadow -I/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/dist/include/cairo -I/usr/armv7a-unknown-linux-gnueabihf/usr/include/freetype2 -I/usr/armv7a-unknown-linux-gnueabihf/usr/include/uuid -I/usr/armv7a-unknown-linux-gnueabihf/usr/include/freetype2 -MD -MP -MF .deps/SwizzleNEON.o.pp -fdiagnostics-color -mfpu=neon /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp
9:16.50 /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp: In function 'void mozilla::gfx::UnpackRowRGB24_NEON(const uint8_t*, uint8_t*, int32_t)':
9:16.50 /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp:423:51: note: use '-flax-vector-conversions' to permit conversions between vectors with differing element types or numbers of subparts
9:16.50 423 | vst1q_u16(reinterpret_cast<uint16_t*>(dst), px);
9:16.50 | ^
9:16.50 /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp:423:49: error: cannot convert 'uint8x16_t' {aka '__vector(16) unsigned char'} to 'uint16x8_t' {aka '__vector(8) short unsigned int'}
9:16.50 423 | vst1q_u16(reinterpret_cast<uint16_t*>(dst), px);
9:16.50 | ^~
9:16.50 | |
9:16.50 | uint8x16_t {aka __vector(16) unsigned char}
9:16.50 In file included from /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp:9:
9:16.50 /usr/lib/gcc/armv7a-unknown-linux-gnueabihf/9.2.0/include/arm_neon.h:10984:39: note: initializing argument 2 of 'void vst1q_u16(uint16_t*, uint16x8_t)'
9:16.50 10984 | vst1q_u16 (uint16_t * __a, uint16x8_t __b)
9:16.50 | ^
9:16.50 /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp: In instantiation of 'void mozilla::gfx::UnpackRowRGB24_NEON(const uint8_t*, uint8_t*, int32_t) [with bool aSwapRB = false; uint8_t = unsigned char; int32_t = int]':
9:16.50 /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp:430:75: required from here
9:16.50 /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp:415:30: error: cannot convert 'uint16x8_t' {aka '__vector(8) short unsigned int'} to 'uint8x16_t' {aka '__vector(16) unsigned char'} in initialization
9:16.50 415 | uint8x16_t px = vld1q_u16(reinterpret_cast<const uint16_t*>(src));
9:16.50 | ^
9:16.50 | |
9:16.50 | uint16x8_t {aka __vector(8) short unsigned int}
9:16.50 /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp: In instantiation of 'void mozilla::gfx::UnpackRowRGB24_NEON(const uint8_t*, uint8_t*, int32_t) [with bool aSwapRB = true; uint8_t = unsigned char; int32_t = int]':
9:16.50 /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp:431:74: required from here
9:16.50 /usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/gfx/2d/SwizzleNEON.cpp:415:30: error: cannot convert 'uint16x8_t' {aka '__vector(8) short unsigned int'} to 'uint8x16_t' {aka '__vector(16) unsigned char'} in initialization
9:16.50 gmake[4]: *** [/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/config/rules.mk:738: SwizzleNEON.o] Error 1
9:16.50 gmake[4]: Leaving directory '/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/ff/gfx/2d'
9:16.50 gmake[3]: *** [/usr/armv7a-unknown-linux-gnueabihf/tmp/portage/www-client/firefox-73.0_beta3/work/firefox-73.0/config/recurse.mk:74: gfx/2d/target-objects] Error 2
9:16.50 gmake[3]: *** Waiting for unfinished jobs....
should I open a new bug for that?
this is the compressed build log, I'm on armv7a hardfloat and have neon instructions enabled.
Assignee | ||
Comment 7•5 years ago
|
||
I filed bug 1610814 about this and put up a patch that should hopefully fix it.
Description
•