I am deploying a custom SO using yocto scarthgap, but i have problems to use the NPU,
i am using this patch to enable the npu
From 0caf9cbbda24fb7450ef6ce9fdc9c9fd611b046b Mon Sep 17 00:00:00 2001
From: Alessandro Villegas <eavillegas@arquimea.com>
Date: Tue, 24 Feb 2026 15:42:29 +0100
Subject: [PATCH] Enable spi and npu for imx8mp
---
arch/arm64/boot/dts/freescale/imx8mp.dtsi | 25 +++++++++++++++--------
1 file changed, 17 insertions(+), 8 deletions(-)
diff --git a/arch/arm64/boot/dts/freescale/imx8mp.dtsi b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
index 3025f89c28b0..f27354981f96 100644
--- a/arch/arm64/boot/dts/freescale/imx8mp.dtsi
+++ b/arch/arm64/boot/dts/freescale/imx8mp.dtsi
@@ -1005,20 +1005,29 @@ spba-bus@30800000 {
ranges;
ecspi1: spi@30820000 {
- #address-cells = <1>;
- #size-cells = <0>;
+ /* Properties inherited from the SoC definition */
compatible = "fsl,imx8mp-ecspi", "fsl,imx6ul-ecspi";
reg = <0x30820000 0x10000>;
interrupts = <GIC_SPI 31 IRQ_TYPE_LEVEL_HIGH>;
- clocks = <&clk IMX8MP_CLK_ECSPI1_ROOT>,
- <&clk IMX8MP_CLK_ECSPI1_ROOT>;
+ clocks = <&clk IMX8MP_CLK_ECSPI1_ROOT>, <&clk IMX8MP_CLK_ECSPI1_ROOT>;
clock-names = "ipg", "per";
assigned-clock-rates = <80000000>;
assigned-clocks = <&clk IMX8MP_CLK_ECSPI1>;
assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_800M>;
dmas = <&sdma1 0 7 1>, <&sdma1 1 7 2>;
dma-names = "rx", "tx";
- status = "disabled";
+
+ /* Properties modified/added by your board file */
+ #address-cells = <1>;
+ #size-cells = <0>;
+ status = "okay"; /* Changed from "disabled" to "okay" */
+
+ /* New child node added by your board file */
+ spidev@0 {
+ compatible = "rohm,dh2228fv";
+ reg = <0>;
+ spi-max-frequency = <10000000>;
+ };
};
ecspi2: spi@30830000 {
@@ -2417,7 +2426,7 @@ gpu_3d: gpu3d@38000000 {
assigned-clock-rates = <1000000000>, <1000000000>,
<800000000>, <400000000>;
power-domains = <&pgc_gpu3d>;
- status = "disabled";
+ status = "okay";
};
gpu_2d: gpu2d@38008000 {
@@ -2436,7 +2445,7 @@ gpu_2d: gpu2d@38008000 {
<&clk IMX8MP_SYS_PLL1_800M>;
assigned-clock-rates = <1000000000>, <800000000>, <400000000>;
power-domains = <&pgc_gpu2d>;
- status = "disabled";
+ status = "okay";
};
ml_vipsi: vipsi@38500000 {
@@ -2465,7 +2474,7 @@ mix_gpu_ml: mix_gpu_ml@40000000 {
reg = <0x0 0x40000000 0x0 0xC0000000>, <0x0 0x0 0x0 0x10000000>;
reg-names = "phys_baseaddr", "contiguous_mem";
memory-region=<&gpu_reserved>;
- status = "disabled";
+ status = "okay";
throttle,max_state = <1>;
#cooling-cells = <2>;
--
2.43.0
i am able to see /dev/galcore and
root@localhost:~# uname -a
Linux localhost.localdomain 6.6.94-0 #1 SMP PREEMPT Tue Jul 29 12:48:48 UTC 2025 aarch64 GNU/Linux
root@localhost:~#
root@localhost:~# ls -l /lib/modules/$(uname -r)/updates/galcore.ko
-rw-r--r-- 1 root root 643856 Mar 9 2018 /lib/modules/6.6.94-0/updates/galcore.ko
root@localhost:~#
root@localhost:~# ls -l /lib/modules/$(uname -r)/updates/galcore.ko
-rw-r--r-- 1 root root 643856 Mar 9 2018 /lib/modules/6.6.94-0/updates/galcore.ko
root@localhost:~# modinfo /lib/modules/$(uname -r)/updates/galcore.ko
filename: /lib/modules/6.6.94-0/updates/galcore.ko
import_ns: VFS_internal_I_am_really_a_filesystem_and_am_NOT_a_driver
license: Dual MIT/GPL
description: Vivante Graphics Driver
import_ns: DMA_BUF
import_ns: DMA_BUF
import_ns: DMA_BUF
import_ns: DMA_BUF
alias: of:N*T*Cfsl,imx6q-gpuC*
alias: of:N*T*Cfsl,imx6q-gpu
alias: of:N*T*Cfsl,imx8-gpu-ssC*
alias: of:N*T*Cfsl,imx8-gpu-ss
depends:
name: galcore
vermagic: 6.6.94-0 SMP preempt mod_unload modversions aarch64
parm: initgpu3DMinClock:int
parm: registerMemBase:Base of bus address of GC core AHB register (ulong)
parm: registerMemSize:Size of bus address range of GC core AHB register (ulong)
parm: registerMemBase2D:Base of bus address of G2D core, if registerMemBase2D is used for a G3D core (ulong)
parm: registerMemSize2D:Size of bus address range of G2D core, if registerMemSize is used for a G3D core (ulong)
parm: registerMemBaseVG:Base of bus address of VG core (ulong)
parm: registerMemSizeVG:Size of bus address range of VG core (ulong)
parm: registerBases:Array of bases of bus address of register of multi-core (array of ullong)
parm: registerSizes:Array of sizes of bus address range of register of multi-core (array of ulong)
parm: register2DBases:Array of bases of bus address of register of multi-2D (array of ullong)
parm: register2DSizes:Array of sizes of bus address range of register of multi-2D (array of ulong)
parm: registerAPB:The offset of APB register to the register base address. (uint)
parm: contiguousSize:Size of reserved system memory (ulong)
parm: contiguousBase:Base address of reserved system memory (ullong)
parm: contiguousSizes:Sizes of reserved system memory array (array of ulong)
parm: contiguousBases:Base addresses of reserved system memory array (array of ullong)
parm: externalSize:Size of external local memory, if it is 0, means there is no external pool (array of ulong)
parm: externalBase:Base address of external memory (array of ullong)
parm: exclusiveSize:Size of exclusive local memory, if it is 0, means there is no exclusive pool (array of ulong)
parm: exclusiveBase:Base address of exclusive memory(GPU access only) (array of ullong)
parm: irqLine:IRQ number of GC core (int)
parm: irqLine2D:IRQ number of G2D core if irqLine is used for a G3D core (int)
parm: irqLineVG:IRQ number of VG core (int)
parm: irqs:Array of IRQ numbers of multi-core (array of int)
parm: irq2Ds:Array of IRQ numbers of multi-2D (array of int)
parm: isrPoll:Bits isr polling for per-core, default 0'1b means disable, 1'1b means auto enable isr polling mode (uint)
parm: platformIDs:Array of platform id of each hardware device (array of uint)
parm: hwDevCounts:Array of hw device count of each platform (array of uint)
parm: devCoreCounts:Array of core count of each hardware device (array of uint)
parm: dev2DCoreCounts:Array of core 2D count of each hardware device (array of uint)
parm: devMemIDs:Array of local memory index of each hardware device (array of uint)
parm: devSysMemIDs:Array of system reserved memory index of each hardware device, each bit represents memory id (array of uint)
parm: devSRAMIDs:Array of SRAM index of each hardware device, each bit represents memory id (array of uint)
parm: chipIDs:Array of chipIDs of multi-chips (array of uint)
parm: fastClear:Disable fast clear if set it to 0, enabled by default (int)
parm: compression:Disable compression if set it to 0, enabled by default (int)
parm: powerManagement:Disable auto power saving if set it to 0, enabled by default (int)
parm: recovery:Recover GPU from stuck (1: Enable, 0: Disable) (uint)
parm: stuckDump:Level of stuck dump content. (uint)
parm: debugLevel:Level of debug. (uint)
parm: showArgs:Display parameters value when driver loaded (int)
parm: userClusterMasks:Array of user defined per-core cluster enable mask (array of uint)
parm: enableNN:How many NN cores will be enabled in one VIP, 0xFF means all enabled, 0 means all disabled, 1 means enable 1 NN core... (uint)
parm: smallBatch:Enable/disable GPU small batch feature, enable by default (int)
parm: allMapInOne:Mapping kernel video memory to user, 0 means mapping every time, otherwise only mapping one time (int)
parm: gpuTimeout:Timeout of operation that needs to wait for the GPU (uint)
parm: softReset:Disable soft reset when insert the driver if set it to 0, enabled by default. (uint)
parm: sRAMLoopMode:Default 0 means SRAM pool must be specified when allocating SRAM memory, 1 means SRAM memory will be looped as default pool. (uint)
parm: sRAMBases:Array of base of bus address of SRAM,INTERNAL, EXTERNAL0, EXTERNAL1..., gcvINVALID_PHYSICAL_ADDRESS means no bus address (array of ullong)
parm: sRAMSizes:Array of size of per-core SRAMs, 0 means no SRAM (array of uint)
parm: extSRAMBases:Shared SRAM physical address bases. (array of ullong)
parm: extSRAMSizes:Shared SRAM sizes. (array of uint)
parm: sRAMRequested:Default 1 means AXI-SRAM is already reserved for GPU, 0 means GPU driver need request the memory region. (uint)
parm: mmu:Disable MMU if set it to 0, enabled by default (int)
parm: mmuPageTablePool:Default 1 means alloc mmu page table in virtual memory(external if PCIE), 0 means auto select memory pool. (uint)
parm: mmuCmdPool:Default 1 means auto select memory pool to allocate mmu initial command, specific the pool type with gcvPOOL_XXXX (uint)
parm: mmuDynamicMap:Default 1 means enable mmu dynamic mapping in virsual memory, 0 means disable dynnamic mapping. (uint)
parm: baseAddress:The pre-flatmapping CPU view base address in MMU page table. It's shared for all core (ulong)
parm: physSize:The pre-flatmapping size in MMU page table. If set it to 0, driver will skip all the pre-flatmapping related work (ulong)
parm: major:major device number for GC device (uint)
parm: type:0 - Char Driver (Default), 1 - Misc Driver (uint)
root@localhost:~#
However the NPU is not working
root@localhost:/usr/bin/tensorflow-lite-2.16.2/examples# USE_GPU_INFERENCE=1 ./label_image \
-m mobilenet_v1_1.0_224_quant.tflite \
-i grace_hopper.bmp \
-l labels.txt \
--external_delegate_path=/usr/lib/libvx_delegate.so
INFO: Loaded model mobilenet_v1_1.0_224_quant.tflite
INFO: resolved reporter
INFO: Vx delegate: allowed_cache_mode set to 0.
INFO: Vx delegate: device num set to 0.
INFO: Vx delegate: allowed_builtin_code set to 0.
INFO: Vx delegate: error_during_init set to 0.
INFO: Vx delegate: error_during_prepare set to 0.
INFO: Vx delegate: error_during_invoke set to 0.
INFO: EXTERNAL delegate created.
INFO: Applied EXTERNAL delegate.
W [query_hardware_caps:89] Unsupported evis version
W [HandleLayoutInfer:332] Op 162: default layout inference pass.
INFO: invoked
INFO: average time: 167.485 ms
INFO: 0.784314: 653 military uniform
INFO: 0.105882: 907 Windsor tie
INFO: 0.0156863: 458 bow tie
INFO: 0.00784314: 466 bulletproof vest
INFO: 0.00392157: 835 suit
root@localhost:/usr/bin/tensorflow-lite-2.16.2/examples# USE_GPU_INFERENCE=0 ./label_image \
-m mobilenet_v1_1.0_224_quant.tflite \
-i grace_hopper.bmp \
-l labels.txt \
--external_delegate_path=/usr/lib/libvx_delegate.so
INFO: Loaded model mobilenet_v1_1.0_224_quant.tflite
INFO: resolved reporter
INFO: Vx delegate: allowed_cache_mode set to 0.
INFO: Vx delegate: device num set to 0.
INFO: Vx delegate: allowed_builtin_code set to 0.
INFO: Vx delegate: error_during_init set to 0.
INFO: Vx delegate: error_during_prepare set to 0.
INFO: Vx delegate: error_during_invoke set to 0.
INFO: EXTERNAL delegate created.
INFO: Applied EXTERNAL delegate.
W [query_hardware_caps:89] Unsupported evis version
W [HandleLayoutInfer:332] Op 162: default layout inference pass.
INFO: invoked
INFO: average time: 167.506 ms
INFO: 0.784314: 653 military uniform
INFO: 0.105882: 907 Windsor tie
INFO: 0.0156863: 458 bow tie
INFO: 0.00784314: 466 bulletproof vest
INFO: 0.00392157: 835 suit
i compared with the reference image and there it is working
i am using the same kernel
root@verdin-imx8mp-08626514:~# uname -a
Linux verdin-imx8mp-08626514 6.6.94-7.4.0-devel #1 SMP PREEMPT Tue Jul 29 12:48:48 UTC 2025 aarch64 GNU/Linux
root@verdin-imx8mp-08626514:~#
root@verdin-imx8mp-08626514:~# ls -l /lib/modules/$(uname -r)/updates/galcore.ko
-rw-r--r-- 1 root root 643872 Mar 9 2018 /lib/modules/6.6.94-7.4.0-devel/updates/galcore.ko
root@verdin-imx8mp-08626514:~#
As far as i see the only difference is i am using 6.6.94-0 and reference image 6.6.94-7.4.0-devel due to i am not using tdx-base.inc
TDX_BUILD ?= “+build.${TDX_BUILDNBR}”
TDX_MATRIX_BUILD_TIME ?= “${DATETIME}”
TDX_MATRIX_BUILD_TIME[vardepsexclude] = “DATETIME”
TDX_PRERELEASE ?= “${@get_tdx_prerelease(d.getVar(‘TDX_PURPOSE’), d.getVar(‘TDX_MATRIX_BUILD_TIME’))}”
TDX_MAJOR ?= “7”
TDX_MINOR ?= “4”
TDX_PATCH ?= “0”
TDX_RELEASE ?= “${TDX_MAJOR}.${TDX_MINOR}.${TDX_PATCH}”
TDX_VERSION ?= “${TDX_RELEASE}${@‘’ if d.getVar(‘TDX_PURPOSE’) == ‘Release’ else ‘-devel’}”
but it shouldnt be a problem because there is no aditional config files or patch.