Raspberry Pi 5 PMUs return 0 when configured manually from EL1

I am working on a Raspberry Pi 5 (aarch64).

uname -a: Linux rpi5 6.8.0-1036-raspi #40-Ubuntu SMP PREEMPT_DYNAMIC Mon Aug 18 09:50:42 UTC 2025 aarch64 aarch64 aarch64 GNU/Linux

I load a kernel module I wrote. The same module was running on a Google Pixel 8, and the PMU configuration worked just fine. Now when I configure the pi whey just return a constant value of 0.

I have printed a bit of debug information from my kernel module:

[71845.713814] PMCR_EL0: 0x00000000410b3001
[71845.713816] Number of general-purpose counters: 7
[71845.713817] Cycle counter exists: NO
[71845.713818] Supported event IDs 0-31 : 0x000000007fff0f3f
[71845.713820] Supported event IDs 32-63: 0x0000000000f2ae7f
[71845.713821] PMCNTENSET_EL0: 0x000000000000000f
[71845.713822] PMEVCNTR0_EL0 initial value: 0
[71845.713824] PMEVCNTR1_EL0 initial value: 0
[71845.713825] PMEVCNTR2_EL0 initial value: 0
[71845.713826] PMEVCNTR3_EL0 initial value: 0
[71845.713827] PMEVCNTR4_EL0 initial value: 0
[71845.713829] PMEVCNTR5_EL0 initial value: 0
[71845.713830] PMEVCNTR6_EL0 initial value: 0
[71845.713831] PMCCNTR_EL0 initial value: 0


This is the code I run to generate the above:

static void pmu_discover(void *arg) {
    unsigned long flags = 0;
    uint64_t pmcr = 0, pmceid0 = 0, pmceid1 =0, n = 0, i = 0;

    preempt_disable();
    local_irq_save(flags);

    asm volatile("mrs %0, pmcr_el0" : "=r"(pmcr));
    pr_info("PMCR_EL0: 0x%016llx\n", pmcr);

    n = ((pmcr >> 11) & 0x1f) + 1;
    pr_info("Number of general-purpose counters: %llu\n", n);

    pr_info("Cycle counter exists: %s\n", (pmcr & (1<<31)) ? "YES" : "NO");

    asm volatile("mrs %0, pmceid0_el0" : "=r"(pmceid0));
    asm volatile("mrs %0, pmceid1_el0" : "=r"(pmceid1));

    pr_info("Supported event IDs 0-31 : 0x%016llx\n", pmceid0);
    pr_info("Supported event IDs 32-63: 0x%016llx\n", pmceid1);

    uint64_t cntenset = 0;
    asm volatile("mrs %0, pmcntenset_el0" : "=r"(cntenset));
    pr_info("PMCNTENSET_EL0: 0x%016llx\n", cntenset);

    for (i = 0; i < n; i++) {
        uint64_t val = 0;
        switch(i) {
            case 0: asm volatile("mrs %0, pmevcntr0_el0" : "=r"(val)); break;
            case 1: asm volatile("mrs %0, pmevcntr1_el0" : "=r"(val)); break;
            case 2: asm volatile("mrs %0, pmevcntr2_el0" : "=r"(val)); break;
            case 3: asm volatile("mrs %0, pmevcntr3_el0" : "=r"(val)); break;
            default: val = 0; break;
        }
        pr_info("PMEVCNTR%llu_EL0 initial value: %llu\n", i, val);
    }

    uint64_t cc = 0;
    asm volatile("mrs %0, pmccntr_el0" : "=r"(cc));
    pr_info("PMCCNTR_EL0 initial value: %llu\n", cc);

    local_irq_restore(flags);
    preempt_enable();
}


Additionally, This is the code I run to configure the PMUs:

static int config_pfc(void) {

    // filter EL0, EL2, EL3
    uint64_t val = 0;
    uint64_t filter_events = (1 << 30) | (1 << 26);

    // disable PMU counters before selecting the event we want
    val = 0;
    asm volatile("mrs %0, pmcr_el0" : "=r" (val));
    asm volatile("msr pmcr_el0, %0" :: "r" ((uint64_t)0x0));
    asm volatile("isb\n");
    asm volatile("msr pmcntenclr_el0, %0" :: "r" ((uint64_t)0b1111));
    asm volatile("isb\n");


    // select events:
    // 1. L1D cache refills (0x3)
    asm volatile("msr pmevtyper0_el0, %0" :: "r" ((uint64_t)(filter_events | 0x03)));
    asm volatile("isb\n");

    // 2. Instructions retired (0x08)
    asm volatile("msr pmevtyper1_el0, %0" :: "r" ((uint64_t)(filter_events | 0x08)));
    asm volatile("isb\n");

    // 3. Instruction speculatively executed (0x1b)
    asm volatile("msr pmevtyper2_el0, %0" :: "r" ((uint64_t)(filter_events | 0x1b)));
    asm volatile("isb\n");

    // 4. L1D cache refills (0x3)
    asm volatile("msr pmevtyper3_el0, %0" :: "r" ((uint64_t)(filter_events | 0x3)));
    asm volatile("isb\n");

    // enable counting
    val = 0;
    asm volatile("msr pmcntenset_el0, %0" :: "r" ((uint64_t)0b1111));
    asm volatile("isb\n");

    // enable PMU counters and reset the counters (using 3 bits)
    val = 0;
    asm volatile("mrs %0, pmcr_el0" : "=r" (val));
    asm volatile("msr pmcr_el0, %0" :: "r" (val | 0b111));
    asm volatile("isb\n");
    // debug prints (view via 'sudo dmesg')
    for (volatile int i = 0; i < 1000000; i++) { }

     val = 0;
     asm volatile("mrs %0, pmuserenr_el0" : "=r" (val));
     module_debug(KERN_ERR "%-24s 0x%0llx\n", "PMUSERENR_EL0:", val);
     asm volatile("mrs %0, pmcr_el0" : "=r" (val));
     module_debug(KERN_ERR "%-24s 0x%0llx\n", "PMCR_EL0:", val);
     asm volatile("mrs %0, pmselr_el0" : "=r" (val));
     module_debug(KERN_ERR "%-24s 0x%0llx\n", "PMSELR_EL0:", val);
     asm volatile("mrs %0, pmevtyper0_el0" : "=r" (val));
     module_debug(KERN_ERR "%-24s 0x%0llx\n", "PMEVTYPER0_EL0:", val);
     asm volatile("mrs %0, pmcntenset_el0" : "=r" (val));
     module_debug(KERN_ERR "%-24s 0x%0llx\n", "PMCNTENSET_EL0:", val);
     asm volatile("mrs %0, pmevcntr1_el0" : "=r" (val));
     module_debug(KERN_ERR "%-24s 0x%0llx\n", "PMECNTR1_EL0:", val);
     asm volatile("mrs %0, pmevcntr2_el0" : "=r" (val));
     module_debug(KERN_ERR "%-24s 0x%0llx\n", "PMECNTR2_EL0:", val);
     asm volatile("mrs %0, pmevcntr3_el0" : "=r" (val));
     module_debug(KERN_ERR "%-24s 0x%0llx\n", "PMECNTR3_EL0:", val);
     asm volatile("mrs %0, pmccntr_el0" : "=r"(val));
     module_debug(KERN_ERR "%-24s 0x%0llx\n", "PNCCNTR_EL0:", val);

    return 0;
}


And this returnes that all of the pmevcntr{0-3}_el0 are equal to 0 constantly.

I downloaded perf to the pi, and it seem to work properly, and also I wrote a userland code which directly calls pef APIs and it seems to work as well.

Any help would be appriciated, Thanks!