Issue
I wrote a very basic block device driver follow the steps here https://linux-kernel-labs.github.io/refs/heads/master/labs/block_device_drivers.html. The module_init
function my_block_init
is provided below:
static int create_block_device(struct my_block_dev *dev) {
int err;
dev->size = NR_SECTORS * KERNEL_SECTOR_SIZE;
dev->data = vmalloc(dev->size);
if (dev->data == NULL) {
printk(KERN_ERR "vmalloc: out of memory\n");
err = -ENOMEM;
goto out_vmalloc;
}
/* Initialize tag set. */
dev->tag_set.ops = &my_queue_ops;
dev->tag_set.nr_hw_queues = 1;
dev->tag_set.queue_depth = 128;
dev->tag_set.numa_node = NUMA_NO_NODE;
dev->tag_set.cmd_size = 0;
dev->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
err = blk_mq_alloc_tag_set(&dev->tag_set);
if (err) {
printk(KERN_ERR "blk_mq_alloc_tag_set: can't allocate tag set\n");
goto out_alloc_tag_set;
}
/* Allocate queue. */
dev->queue = blk_mq_init_queue(&dev->tag_set);
if (IS_ERR(dev->queue)) {
printk(KERN_ERR "blk_mq_init_queue: out of memory\n");
err = -ENOMEM;
goto out_blk_init;
}
blk_queue_logical_block_size(dev->queue, KERNEL_SECTOR_SIZE);
dev->queue->queuedata = dev;
/* initialize the gendisk structure */
dev->gd = blk_alloc_disk(NUMA_NO_NODE);
if (!dev->gd) {
printk(KERN_ERR "alloc_disk: failure\n");
err = -ENOMEM;
goto out_alloc_disk;
}
dev->gd->major = MY_BLOCK_MAJOR;
dev->gd->minors = 1;
dev->gd->first_minor = 0;
dev->gd->fops = &my_block_ops;
dev->gd->queue = dev->queue;
dev->gd->private_data = dev;
snprintf(dev->gd->disk_name, DISK_NAME_LEN, "myblock");
set_capacity(dev->gd, NR_SECTORS);
if (add_disk(dev->gd)) {
err = -ENOMEM;
goto out_alloc_disk;
}
return 0;
out_alloc_disk:
blk_put_queue(dev->queue);
out_blk_init:
blk_mq_free_tag_set(&dev->tag_set);
out_alloc_tag_set:
vfree(dev->data);
out_vmalloc:
return err;
}
static int my_block_init(void) {
int status;
status = register_blkdev(MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
if (status < 0) {
printk(KERN_ERR "unable to register mybdev block device\n");
return -EBUSY;
}
printk(KERN_INFO
"Block device with major(%d) and name(%s) successfully created\n",
MY_BLOCK_MAJOR, MY_BLKDEV_NAME);
status = create_block_device(&dev);
if (status < 0) {
printk(KERN_ERR "unable to create block device\n");
return -EBUSY;
}
return 0;
}
After I compiled and executed insmod
on the ko
module file, the insmod
process got killed and exited.
Then I use the dmesg | tail -100
checked the logs and it looks like there is a NULL pointer dereference when calling the add_disk function:
[ 108.621987] Block device with major(240) and name(mybdev) successfully created
[ 108.624629] BUG: kernel NULL pointer dereference, address: 0000000000000264
[ 108.624637] #PF: supervisor read access in kernel mode
[ 108.624639] #PF: error_code(0x0000) - not-present page
[ 108.624641] PGD 0 P4D 0
[ 108.624643] Oops: 0000 [#1] PREEMPT SMP PTI
[ 108.624646] CPU: 0 PID: 2767 Comm: insmod Tainted: G OE 6.2.0-33-generic #33~22.04.1-Ubuntu
[ 108.624648] Hardware name: VMware, Inc. VMware Virtual Platform/440BX Desktop Reference Platform, BIOS 6.00 11/12/2020
[ 108.624651] RIP: 0010:kobject_get+0xe/0x90
[ 108.624658] Code: c2 a8 68 05 b9 eb d2 0f 1f 44 00 00 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 90 55 48 89 e5 41 54 49 89 fc 48 85 ff 74 22 <f6> 47 3c 01 74 2f 49 8d 7c 24 38 b8 01 00 00 00 f0 41 0f c1 44 24
[ 108.624659] RSP: 0018:ffffbef08649fa68 EFLAGS: 00010206
[ 108.624661] RAX: ffffffffb8f920c4 RBX: 0000000000000228 RCX: 0000000000000000
[ 108.624662] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000228
[ 108.624663] RBP: ffffbef08649fa70 R08: 0000000000000000 R09: 0000000000000000
[ 108.624663] R10: 0000000000000000 R11: 0000000000000000 R12: 0000000000000228
[ 108.624664] R13: 0000000000000000 R14: ffff967146f53400 R15: ffff967146f53410
[ 108.624665] FS: 00007f80b1aaa000(0000) GS:ffff967179e00000(0000) knlGS:0000000000000000
[ 108.624666] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 108.624667] CR2: 0000000000000264 CR3: 0000000056e9c003 CR4: 0000000000370ef0
[ 108.624670] Call Trace:
[ 108.624672] <TASK>
[ 108.624676] ? show_regs+0x72/0x90
[ 108.624681] ? __die+0x25/0x80
[ 108.624682] ? page_fault_oops+0x79/0x190
[ 108.624685] ? mod_memcg_lruvec_state+0x2b/0x60
[ 108.624689] ? mod_objcg_state+0x1ad/0x2e0
[ 108.624692] ? do_user_addr_fault+0x30c/0x640
[ 108.624693] ? exc_page_fault+0x81/0x1b0
[ 108.624698] ? asm_exc_page_fault+0x27/0x30
[ 108.624702] ? kobject_get+0xe/0x90
[ 108.624704] kobject_add_internal+0x35/0x310
[ 108.624706] kobject_add+0x7a/0xf0
[ 108.624709] elv_register_queue+0x3a/0xa0
[ 108.624712] blk_register_queue+0xf2/0x220
[ 108.624715] device_add_disk+0x249/0x400
[ 108.624722] ? __pfx_init_module+0x10/0x10 [mybdev]
[ 108.624726] my_block_init+0x193/0xec0 [mybdev]
[ 108.624729] do_one_initcall+0x46/0x240
[ 108.624733] ? kmalloc_trace+0x2a/0xb0
[ 108.624736] do_init_module+0x52/0x240
[ 108.624739] load_module+0xb96/0xd60
[ 108.624741] ? kernel_read_file+0x25c/0x2b0
[ 108.624746] __do_sys_finit_module+0xcc/0x150
[ 108.624748] ? __do_sys_finit_module+0xcc/0x150
[ 108.624750] __x64_sys_finit_module+0x18/0x30
[ 108.624752] do_syscall_64+0x59/0x90
[ 108.624755] ? ksys_mmap_pgoff+0x123/0x270
[ 108.624759] ? exit_to_user_mode_prepare+0x3b/0xd0
[ 108.624761] ? syscall_exit_to_user_mode+0x38/0x60
[ 108.624762] ? do_syscall_64+0x69/0x90
[ 108.624764] ? syscall_exit_to_user_mode+0x38/0x60
[ 108.624766] ? do_syscall_64+0x69/0x90
[ 108.624767] ? do_syscall_64+0x69/0x90
[ 108.624769] ? do_syscall_64+0x69/0x90
[ 108.624771] entry_SYSCALL_64_after_hwframe+0x72/0xdc
[ 108.624772] RIP: 0033:0x7f80b131ea3d
Add the exception happens inside elv_register_queue
with the source code below:
int elv_register_queue(struct request_queue *q, bool uevent)
{
struct elevator_queue *e = q->elevator;
int error;
lockdep_assert_held(&q->sysfs_lock);
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
if (!error) {
struct elv_fs_entry *attr = e->type->elevator_attrs;
if (attr) {
while (attr->attr.name) {
if (sysfs_create_file(&e->kobj, &attr->attr))
break;
attr++;
}
}
if (uevent)
kobject_uevent(&e->kobj, KOBJ_ADD);
set_bit(ELEVATOR_FLAG_REGISTERED, &e->flags);
}
return error;
}
After looked up several sources I couldn't find out what was uninitialized and generates the exception. Is there anyone familiar with this part of kernel or is there a better way to approach this problem?
Kernel Version: v6.2.0
Solution
I got around this issue by checking the source code of block/genhd.c
and discovered that the call to blk_alloc_disk
allocates a request_queue
for the provided gendisk
, so I removed my own initialization of a request_queue
and the code finally worked through.
dev->gd = blk_alloc_disk(NUMA_NO_NODE);
dev->queue = dev->gd->queue; // use the queue allocated during blk_alloc_disk
Answered By - Hantong Liu Answer Checked By - Katrina (WPSolving Volunteer)