Wednesday, August 31, 2022

[SOLVED] mcp251xfd test driver crashes

Issue

Edit: It works now, https://github.com/minima34/mcp251xfd_test

I wrote my first linux module using SPI to just test mcp2158fd chip writing and reading the OSC register but it bugs my system on run and I can't unload it with modprobe -rf or rmmod -f. When I write reboot and press Enter it hangs at some point and can't reboot. The problem is I work remotely from another town and can't do hw reboot when my colleauges are not there. I dont have the log because its on serial console and I cant go upward to see. It spams some "sdio" message which I supress with dmesg -n 1 command.

Im very interested in this topic. Its very interesting to write drivers. What causes the crash?

This is my code:

#include <linux/bitfield.h>
#include <linux/clk.h>
#include <linux/device.h>
#include <linux/module.h>
#include <linux/netdevice.h>
#include <linux/of.h>
#include <linux/of_device.h>
#include <linux/pm_runtime.h>

#include <asm/unaligned.h>
// by Dv: compatibility
#include <linux/version.h>
// by Dv: compatibility /

//#include <linux/iopoll.h>

#include "mcp251xfd.h"
#define DEVICE_NAME "mcptest"

#define TID_CAN_CLK  20000000
#define TID_SPI_FREQ 4000000

#define my_read_poll_timeout(op, val, cond, sleep_us, timeout_us, \
                sleep_before_read, args...) \
({ \
    u64 __timeout_us = (timeout_us); \
    unsigned long __sleep_us = (sleep_us); \
    ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
    might_sleep_if((__sleep_us) != 0); \
    if (sleep_before_read && __sleep_us) \
        usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
    for (;;) { \
        (val) = op(args); \
        if (cond) \
            break; \
        if (__timeout_us && \
            ktime_compare(ktime_get(), __timeout) > 0) { \
            (val) = op(args); \
            break; \
        } \
        if (__sleep_us) \
            usleep_range((__sleep_us >> 2) + 1, __sleep_us); \
    } \
    (cond) ? 0 : -ETIMEDOUT; \
})

#define my_regmap_read_poll_timeout(myspi, addr, val, cond, sleep_us, timeout_us) \
({ \
    int __ret, __tmp; \
    __tmp = my_read_poll_timeout(my_regmap_read, __ret, __ret || (cond), \
            sleep_us, timeout_us, false, (myspi), (addr), &(val)); \
    __ret ?: __tmp; \
})

static const struct mcp251xfd_devtype_data mcp251xfd_devtype_data_mcp2517fd = {
    .quirks = MCP251XFD_QUIRK_MAB_NO_WARN | MCP251XFD_QUIRK_CRC_REG |
        MCP251XFD_QUIRK_CRC_RX | MCP251XFD_QUIRK_CRC_TX |
        MCP251XFD_QUIRK_ECC,
    .model = MCP251XFD_MODEL_MCP2517FD,
};

static const struct mcp251xfd_devtype_data mcp251xfd_devtype_data_mcp2518fd = {
    .quirks = MCP251XFD_QUIRK_CRC_REG | MCP251XFD_QUIRK_CRC_RX |
        MCP251XFD_QUIRK_CRC_TX | MCP251XFD_QUIRK_ECC,
    .model = MCP251XFD_MODEL_MCP2518FD,
};

/* Autodetect model, start with CRC enabled. */
static const struct mcp251xfd_devtype_data mcp251xfd_devtype_data_mcp251xfd = {
    .quirks = MCP251XFD_QUIRK_CRC_REG | MCP251XFD_QUIRK_CRC_RX |
        MCP251XFD_QUIRK_CRC_TX | MCP251XFD_QUIRK_ECC,
    .model = MCP251XFD_MODEL_MCP251XFD,
};

u16 rx_reg;
u32 rx_val;

static int my_regmap_write(struct spi_device *spi, u16 reg, u32 val)
{
    struct spi_transfer xfer[2];

    //uint8_t *reg_bytes[2] = (void *) &reg;
    //uint8_t *val_bytes[4] = (void *) &val;

    reg|=MCP251XFD_SPI_INSTRUCTION_WRITE;

    xfer[0].tx_buf = &reg;
    xfer[0].rx_buf = &rx_reg;
    xfer[0].len = 2;

    xfer[1].tx_buf = &val;
    xfer[1].rx_buf = &rx_val;
    xfer[1].len = 4;

    printk(KERN_NOTICE "%s(%d) writing 0x%04x => 0x%08xx\nrx_reg => 0x%04x", __FUNCTION__, __LINE__, reg, val, rx_reg);

    return spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer));
}

static int my_regmap_read(struct spi_device *spi, u16 reg, u32 *val)
{
    struct spi_transfer xfer[2];

    reg|=MCP251XFD_SPI_INSTRUCTION_READ;

    xfer[0].tx_buf = &reg;
    xfer[0].rx_buf = &rx_reg;
    xfer[0].len = 2;

    u32 klokvam_nuli = 0x00000000;

    xfer[1].tx_buf = &klokvam_nuli;
    xfer[1].rx_buf = val;
    xfer[1].len = 4;

    printk(KERN_NOTICE "%s(%d) reading 0x%04x => 0x%08xx\nrx_reg => 0x%04x", __FUNCTION__, __LINE__, reg, *val, rx_reg);

    return spi_sync_transfer(spi, xfer, ARRAY_SIZE(xfer));
}

static int vikam_te(struct spi_device *spi)
{
    u32 osc, osc_reference, osc_mask;

    /* Set Power On Defaults for "Clock Output Divisor" and remove
     * "Oscillator Disable" bit.
     */
    printk (KERN_NOTICE "mask=0x%08x; clkdiv10=0x%08x;\n",MCP251XFD_REG_OSC_CLKODIV_MASK,MCP251XFD_REG_OSC_CLKODIV_10);
    osc = FIELD_PREP(MCP251XFD_REG_OSC_CLKODIV_MASK, MCP251XFD_REG_OSC_CLKODIV_10);

    
    osc_reference = MCP251XFD_REG_OSC_OSCRDY;
    osc_mask = MCP251XFD_REG_OSC_OSCRDY;
    //osc_mask = MCP251XFD_REG_OSC_OSCRDY | MCP251XFD_REG_OSC_PLLRDY;

    /* Note:
     *
     * If the controller is in Sleep Mode the following write only
     * removes the "Oscillator Disable" bit and powers it up. All
     * other bits are unaffected.
     */

    my_regmap_write(spi, MCP251XFD_REG_OSC, 0x00000060);

    printk(KERN_NOTICE, "%s(%d) w1 osc=0x%08x\n", __FUNCTION__, __LINE__, osc);
    pr_info("[WR] rx_reg = 0x%04X \nrx_val = 0x%08X \n", rx_reg, rx_val);


    int err = my_regmap_read_poll_timeout(spi, MCP251XFD_REG_OSC, osc, (osc & osc_mask) == osc_reference,
                       MCP251XFD_OSC_STAB_SLEEP_US,
                       MCP251XFD_OSC_STAB_TIMEOUT_US);

    printk(KERN_NOTICE, "%s(%d) r0 osc=0x%08x\n", __FUNCTION__, __LINE__, osc);
    pr_info("[RD] rx_reg = 0x%04X \nrx_val = 0x%08X \n", rx_reg, rx_val);


    return err;
}

static int mcptest_probe(struct spi_device *spi)
{
    int err;

    struct mcp251xfd_priv *priv;
    u32 freq = TID_CAN_CLK;

    printk(KERN_NOTICE "YOU'RE WELCOME!, my_freq_is_%u\n",freq);
    
#if (LINUX_VERSION_CODE >= KERNEL_VERSION(5,9,0))
    if(!spi->irq)
    {
        return dev_err_probe(&spi->dev, -ENXIO, "No IRQ specified (maybe node \"interrupts-extended\" in DT missing)!\n");
    }
#else
    if(!spi->irq)
    {
        dev_err(&spi->dev, "No IRQ specified (maybe node \"interrupts-extended\" in DT missing)!\n");
        return -ENXIO;
    }
#endif

    //struct device_node *np;
    //np = spi->dev.of_node;

    spi_set_drvdata(spi, priv);


    priv->spi = spi;
    //priv->rx_int = rx_int;
    //priv->clk = clk;

    
    //
    // CHECK
    const void *match = of_device_get_match_data(&spi->dev);
    //
    if (match)
        priv->devtype_data = *(struct mcp251xfd_devtype_data *)match;
    else
        priv->devtype_data = *(struct mcp251xfd_devtype_data *)spi_get_device_id(spi)->driver_data;
    //
    // or
    //priv->devtype_data = *(struct mcptest_devtype_data *)
    //      spi_get_device_id(spi)->driver_data;



    priv->spi_max_speed_hz_orig = spi->max_speed_hz;
    
#ifndef TID_SPI_FREQ
    spi->max_speed_hz = min(spi->max_speed_hz, freq / 2 / 1000 * 850);
#else
    spi->max_speed_hz = min(TID_SPI_FREQ, freq / 2 / 1000 * 850);
#endif
    
    spi->bits_per_word = 8;
    
    /////////////////////////////////////////zgrep FORCE_UNLOAD /proc/config.gz
    

dev_err( &spi->dev, "%s():%d spi-> max_speed_hz:%d \n", __FUNCTION__, __LINE__, spi->max_speed_hz);
    err = spi_setup(spi);

dev_err( &spi->dev, "%s():%d spi_setup:%d \n", __FUNCTION__, __LINE__, err);
    if (err)
        goto remove;

    //err = mcptest_regmap_init(priv);
//dev_err( &spi->dev, "%s():%d spi_setup:%d \n", __FUNCTION__, __LINE__, err);
    //if (err)
    //  goto remove;

    //err = mcptest_register(priv);
//dev_err( &spi->dev, "%s():%d spi_setup:%d \n", __FUNCTION__, __LINE__, err);
    //if (err)
    //  goto remove;

    err = vikam_te(spi);

    if(err)
        goto remove;

//
//int spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers, unsigned int num_xfers)
//spi – a device with which data will be exchanged.
//xfers – An array of spi_transfers.
//num_xfers – Number of items in the xfer array.

//
//int spi_async(struct spi_device *spi, struct spi_message *message)
//spi – a device with which data will be exchanged.
//message – describes the data transfers, including completion callback.
//

//
//This API is used to write the data and followed by a read. This is synchronous.
//
//int spi_write_then_read(struct spi_device * spi, const void * txbuf, unsigned n_tx, void * rxbuf, unsigned n_rx)
//
//spi – a device with which data will be exchanged.
//txbuf – data to be written.
//n_tx – size of txbuf (in bytes).
//rxbuf – the buffer into which data will be read.
//n_rx – size of rxbuf (in bytes).
//

    return 0;

 remove:
    spi->max_speed_hz = priv->spi_max_speed_hz_orig;

    if(spi)
    {
        spi_unregister_device(spi);    // Unregister the SPI slave
        pr_info("spi device unregistered :) happy hacking :P\nIm waiting the next test :P\n");
    }

    return err;
}

/*
static void __exit etx_spi_exit(void)
{ 
}
module_init(etx_spi_init);
module_exit(etx_spi_exit); // https://embetronicx.com/tutorials/linux/device-drivers/linux-kernel-spi-device-driver-tutorial/
*/

static int mcptest_remove(struct spi_device *spi)
{
    struct mcp251xfd_priv *priv = spi_get_drvdata(spi);

    spi->max_speed_hz = priv->spi_max_speed_hz_orig;

    if(spi)
    {
        spi_unregister_device(spi);    // Unregister the SPI slave
        pr_info("spi device unregistered :) happy hacking :P\nIm waiting the next test :P\n");
    }

    return 0;
}

/*
static const struct dev_pm_ops mcptest_pm_ops = {
    SET_RUNTIME_PM_OPS(mcptest_runtime_suspend,
               mcptest_runtime_resume, NULL)
};
*/

static const struct spi_device_id mcptest_id_table[] = {
    {
        .name = "mcp2517fd",
        .driver_data = (kernel_ulong_t)&mcp251xfd_devtype_data_mcp2517fd,
    }, {
        .name = "mcp2518fd",
        .driver_data = (kernel_ulong_t)&mcp251xfd_devtype_data_mcp2518fd,
    }, {
        .name = "mcp251xfd",
        .driver_data = (kernel_ulong_t)&mcp251xfd_devtype_data_mcp251xfd,
    }, {
        /* sentinel */
    },
};
MODULE_DEVICE_TABLE(spi, mcptest_id_table);

static const struct of_device_id mcptest_of_match[] = {
    {
        .compatible = "microchip,mcp2517fd",
        .data = &mcp251xfd_devtype_data_mcp2517fd,
    }, {
        .compatible = "microchip,mcp2518fd",
        .data = &mcp251xfd_devtype_data_mcp2518fd,
    }, {
        .compatible = "microchip,mcp251xfd",
        .data = &mcp251xfd_devtype_data_mcp251xfd,
    }, {
        /* sentinel */
    },
};
MODULE_DEVICE_TABLE(of, mcptest_of_match);

static struct spi_driver mcptest_driver = {
    .driver = {
        .name = DEVICE_NAME,
        //.pm = &mcptest_pm_ops,
        .of_match_table = mcptest_of_match,
    },
    .probe = mcptest_probe,
    .remove = mcptest_remove,
    .id_table = mcptest_id_table,
};

module_spi_driver(mcptest_driver);

MODULE_AUTHOR("Acho @ T&D Engineering");
MODULE_DESCRIPTION("mcptest test");
MODULE_LICENSE("GPL v2");

Edit: This is the log of the crash:

root@helperboard:/# dmesg -C && insmod mcptest.ko && dmesg && dmesg -n 1
[ 1038.960898] Internal error: Accessing user space memory outside uaccess.h routines: 96000045 [#1] PREEMPT SMP
[ 1038.972067] Modules linked in: mcptest(+) vin_v4l2 ov8858_r2a_4lane vin_io videobuf2_dma_contig gt9xxnew_ts sprdbt_tty mcp251x qcaspi mcp251xfd sprdwl_ng uwe5622_bsp_sdio pvrsrvkm(O) sunxi_gmac tid_irq_test
[ 1038.992839] CPU: 3 PID: 7052 Comm: insmod Tainted: G           O    4.9.170 #13
[ 1039.001060] Hardware name: sun50iw10 (DT)
[ 1039.005572] task: ffffffc03cdb9d00 task.stack: ffffffc03e19c000
[ 1039.012254] PC is at mcptest_probe+0x6c/0x2f0 [mcptest]
[ 1039.018147] LR is at mcptest_probe+0x48/0x2f0 [mcptest]
[ 1039.024026] pc : [<ffffff8000d9b2ec>] lr : [<ffffff8000d9b2c8>] pstate: 80400145
[ 1039.032342] sp : ffffffc03e19f9d0
[ 1039.036069] x29: ffffffc03e19f9d0 x28: ffffffc0364b4d00
[ 1039.042057] x27: 0000000000000001 x26: 0000000000000000
[ 1039.048046] x25: ffffff800920df10 x24: 000000f1e6e21461
[ 1039.054033] x23: 000000000000002f x22: ffffff8009469000
[ 1039.060021] x21: 0000000000000000 x20: ffffff8000d9bd30
[ 1039.066009] x19: ffffffc03d423000 x18: 000000000000000a
[ 1039.071996] x17: 0000007fa74532a8 x16: ffffff80080a9090
[ 1039.077984] x15: 0000000001312d00 x14: ffffff8009418788
[ 1039.083971] x13: ffffffffffffffff x12: 0000000000000020
....
[ 1040.059641] f8e0: ffffffc03f6eba90 0000000005f5e0ff ffffff80094183ce 00000000fffffff4
[ 1040.068389] f900: 0000000000000020 ffffffffffffffff
[ 1040.073841] [<ffffff8000d9b2ec>] mcptest_probe+0x6c/0x2f0 [mcptest]
[ 1040.080849] [<ffffff80085cabd4>] spi_drv_probe+0x88/0xb0
[ 1040.086789] [<ffffff8008539c50>] driver_probe_device+0x1dc/0x41c
[ 1040.093501] [<ffffff8008539f24>] __driver_attach+0x94/0x108
[ 1040.099731] [<ffffff80085379f4>] bus_for_each_dev+0x8c/0xcc
[ 1040.105954] [<ffffff8008539530>] driver_attach+0x30/0x3c
[ 1040.111885] [<ffffff8008538ef4>] bus_add_driver+0xf8/0x248
[ 1040.118012] [<ffffff800853ac80>] driver_register+0x98/0xe4
[ 1040.124139] [<ffffff80085caae8>] __spi_register_driver+0x6c/0x78
[ 1040.130851] [<ffffff8000d9e014>] mcptest_driver_init+0x14/0x30 [mcptest]
[ 1040.138340] [<ffffff8008083a30>] do_one_initcall+0xb4/0x164
[ 1040.144571] [<ffffff800819d3fc>] do_init_module+0x64/0x1c0
[ 1040.150710] [<ffffff80081324b8>] load_module+0x1c0c/0x2060
[ 1040.156842] [<ffffff8008132b98>] SyS_finit_module+0xa8/0xfc
[ 1040.163066] [<ffffff8008083540>] el0_svc_naked+0x34/0x38
[ 1040.169011] Code: 95de6857 14000061 d2800000 f9005260 (f9014013)
[ 1040.175828] ---[ end trace 5ec42185a80c910f ]---

root@helperboard:/# ^C
root@helperboard:/# reboot

[  OK  ] Stopped Create Static Device Nodes in /dev.
[  OK  ] Reached target Shutdown.
[  OK  ] Reached target Final Step.
         Starting Reboot...
[ 1987.095794] systemd-shutdow: 23 output lines suppressed due to ratelimiting
[ 1987.124360] systemd-shutdown[1]: Syncing filesystems and block devices.
[ 1987.152188] systemd-shutdown[1]: Sending SIGTERM to remaining processes...
[ 1987.177346] systemd-journald[1192]: Received SIGTERM from PID 1 (systemd-shutdow).
[ 1987.221361] systemd-shutdown[1]: Sending SIGKILL to remaining processes...
[ 1987.244437] systemd-shutdown[1]: Unmounting file systems.
[ 1987.272053] EXT4-fs (mmcblk0p4): re-mounted. Opts: nobarrier,noauto_da_alloc,data=ordered
[ 1987.290797] systemd-shutdown[1]: All filesystems unmounted.
[ 1987.297107] systemd-shutdown[1]: Deactivating swaps.
[ 1987.302961] systemd-shutdown[1]: All swaps deactivated.
[ 1987.308893] systemd-shutdown[1]: Detaching loop devices.
[ 1987.318052] systemd-shutdown[1]: All loop devices detached.
[ 1987.324325] systemd-shutdown[1]: Detaching DM devices.
[ 2012.905670] WCN_ERR: log_rx_callback open /data/unisoc_cp2log_1.txt error no.-30 retry:1

And hangs here when I do reboot

Solution

I found out the problem, it was args mismatch of my read_poll_timeout macros in their usages.

These are my new macros:

#define tid_poll_timeout(cond, sleep_us, timeout_us, sleep_before_read, op, args...) \
({ \
    ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \
    might_sleep_if(sleep_us); \
    if(sleep_before_read) \
        usleep_range((sleep_us >> 2) + 1, sleep_us); \
    for (;;) { \
        op(args); \
        if (cond) \
            break; \
        if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \
            op(args); \
            break; \
        } \
        if (sleep_us) \
            usleep_range((sleep_us >> 2) + 1, sleep_us); \
    } \
    (cond) ? 0 : -ETIMEDOUT; \
})
#define my_regmap_read_poll_timeout(cond, sleep_us, timeout_us, op, spi, addr, val) \
({ \
    int __ret, __tmp; \
    __tmp = tid_poll_timeout(val || (cond), sleep_us, timeout_us, false, op, spi, addr, &(val)); \
    __ret ?: __tmp; \
})

And this where its used:

int err = my_regmap_read_poll_timeout((osc & osc_mask) == osc_reference, MCP251XFD_OSC_STAB_SLEEP_US, MCP251XFD_OSC_STAB_TIMEOUT_US, my_regmap_read, spi, MCP251XFD_REG_OSC, osc);

And this is my new code if somebody is interested for testing the spi connection to MCP251xFD family

https://github.com/minima34/mcp251xfd_test

git clone https://github.com/minima34/mcp251xfd_test.git

Another thing I edited was that im not using the priv structure, so I commented out these lines related to the structure:

//spi_set_drvdata(spi, priv);
    //priv->spi = spi;
    //priv->rx_int = rx_int;
    //priv->clk = clk;
    //
    //
    // CHECK
    //const void *match = of_device_get_match_data(&spi->dev);
    //
    //if (match)
    //  priv->devtype_data = *(struct mcp251xfd_devtype_data *)match;
    //else
    //  priv->devtype_data = *(struct mcp251xfd_devtype_data *)spi_get_device_id(spi)->driver_data;
    //
    // or
    //priv->devtype_data = *(struct mcptest_devtype_data *)
    //      spi_get_device_id(spi)->driver_data;
    //
    //priv->spi_max_speed_hz_orig = spi->max_speed_hz;

This is it, driver works now and I found out that my spi connection works. yea, happy smile :)

I write 0x00000060 and read 0x00000060

so.. => everything [OK]

Thanks family.



Answered By - Ангел Димитров
Answer Checked By - David Goodson (WPSolving Volunteer)