KERNEL MODULE PROGRAMMING


This is a quick Kernel Module Programming, written mostly as a reminder for myself. There are more comprehensive books that you should go and read:


A driver example

#include <linux/kernel.h>
#include <linux/module.h>

#if CONFIG_MODVERSIONS == 1
#  define MODVERSIONS
#  include <linux/modversions.h>
#endif

#include <linux/fs.h>
#include <linux/wrapper.h>
#include <asm/uaccess.h>       // get_user put_user

int foo_major = 0;                   // Note [2]


int foo_open(struct inode * inode, struct file * file)
{
  // major = inode->i_rdev << 8;     // Note [3]
  // minor = inode->i_rdev & 0xff; 

  /* initialize ... */
  MOD_INC_USE_COUNT;                 // Note [6]
  return 0;
}

int foo_close(struct inode * inode, struct file * file)
{
  /* clear ... */
  MOD_DEC_USE_COUNT;                 // Note [6]
  return 0;
}

ssize_t foo_read(struct file * file, char * buf, size_t len, loff_t * offset)
{                           // Note [7] [10]
  /* sanity checks ... */
  for (n=0; n<len; ++n)              // Note [8]
    if ( put_user( *(data+n), buf++) < 0)
      break;
  return n;
}

ssize_t foo_write(struct file * file, const char * buf, size_t len, loof_t * offset)
{
  /* sanity checks ... */
  for (n=0; n<len; ++n)              // Note [8]
    if ( get_user( *(data+n), buf++) < 0)      
      break;
  return n;
}

int foo_ioctl(struct inode * inode, struct file * file, unsigned int cmd, unsigned long arg)
{
  /* sanity checks ... */
  switch (cmd) {
    case FOO_IOCSXXX:                  // Note [9]
      // set XXX = *arg
      break;
    case FOO_IOCGXXX:
      // copy XXX into *arg
      break;
    ...
  }
  return 0;
}

struct file_operations foo_fops {    // Note [4]
  NULL,        /* lseek */
  foo_read,
  foo_write,
  NULL,        /* readdir */
  NULL,        /* select */
  foo_ioctl,
  NULL,        /* mmap */
  foo_open,
  NULL,        /* flush */
  foo_close,   /* release */
  NULL,
  /* all the others are NULL */
};

int init_module()
{
  foo_major = module_register_chrdev( foo_major, "foo", &foo_fops);
          // Note [2] [4] [5]
  // allocate and initialize driver's data structures
  return 0;
}

void cleanup_module()
{
  module_unregister_chrdev(foo_major, "foo");
}


Notes.

  1. The compile flags for the module are -DMODULE -D__KERNEL__ -DLINUX and it is good prcatice to add also -Wall. If the module code is spread over several files you should define in all but one #define __NO_VERSION_ before including <linux/module.h> and <linux/version.h>. Combine the objects together with
    ld -m elf_i386 -r -o module objects
  2. Device drivers are used to allow processes to communicate with hardware. Each driver has a major number (see /proc/devices), which coincides with the major number of the special file created with mknod; the special files are put in /dev (by convention). The major is passed to module_register_chrdev; if it is 0, the function assigns a major to the driver and return it. The minor number distiguishes among the individual physical devices managed by the driver.
  3. Devices are of two types: character and block devices (there are also net and scsi). Char devices are simpler: they input one byte at a time. Block devices do i/o one block at a time and the driver must buffer it.
  4. The kernel knows the driver routines through the struct file_operation * struct passed as arg at registration.
  5. If you insmod under X, you need a console X-term (xterm -C) to catch the printk of the driver. It is better to insmod not under X in any case to avoid delaying the printing. Furthermore, both insmod and rmmod can hang the system; run sync before each of them to make sure you do not lose any data.
  6. A device keep a count of usage with teh variable mod_use_count, but you shuold use these two macros to change it. The driver can be removed (rmmod) only when the usage count is zero.
  7. read should return 0 at EOF
  8. Instead of put/get_user the driver could use copy_to/from_user( to, from, len ) whose arguments are all unsigned long, the first two being addresses. This verify the the addresses are accessible by calling access_ok( int type, ulong addr, ulong len), where type can be either VERIFY_READ or VERIFY_WRITE. Write and read are from the kernel point of view.
  9. ioctl numbers are defined in a header file (which must be included by user programs as well):
        #define FOO_MAJOR  254
        
        #define FOO_IOCSxxx  _IOR( FOO_MAJOR, 0, char *)   // kernel read
        #define FOO_IOCGxxx  _IOW( FOO_MAJOR, 1, char *)   // kernel write
        #define FOO_IOCyyy   _IO(  FOO_MAJOR, 2, int)
        #define FOO_IOCXzzz  _IOWR(FOO_MAJOR, 3, int *)    // read-write
      
    There is a convention in ioctl letters: S set (with pointer), T tell (with value), G get, Q query (response is the return value), X exchange (S and G), H shift (T and Q).
  10. Version 2.4.x added an offset argument to read and write. Need to say more.


Using the /proc filesystem

#include <linux/kernel.h>
#include <linux/module.h>

#if CONFIG_MODVERSIONS == 1
#  define MODVERSIONS
#  include <linux/modversions.h>
#endif

#include <linux/proc_fs.h>

#include <linux/.h>

static char bar_buf[ ... ];

int bar_open(struct indoe * inode, struct file * file)
{
  // ...
  MOD_INC_USE_COUNT;
  return 0;
}

int bar_close(struct inode * inode, struct file * file)
{
  // ...
  MOD_DEC_USE_COUNT;
  return 0;
}

static ssize_t bar_read( struct file * file, char * buf, size_t len, loff_t * offset)
{
  for (i=0; i<len; ++i)
    // ...
  return n;
}

static ssize_t bar_write( struct file * file, const char * buf, size_t len, loff_t * offset)
{
  for (n=0; n<len; ++n)
    if ( get_user( bar_buffer[n], buf++ ) < 0) 
      break;
  return n;
}

static int bar_permission( struct inode * inode, int op )
{
  if ( op == 4 || (op == 2 && ... ) )
    return 0;
  return -EACCES;
}


struct inode_operations bar_iops {
  & bar_fops;
  NULL,         // create
  NULL,         // lookup
  NULL,         // link
  NULL,         // unlink
  NULL,         // symlink
  NULL,         // mkdir
  NULL,         // rmdir
  NULL,         // mknod
  NULL,         // rename
  NULL,         // readlink
  NULL,         // follow_link
  NULL,         // read_page
  NULL,         // write_page
  NULL,         // bmap
  NULL,         // truncate
  bar_permission
};

int bar_info( char * kbuf, char ** ubuf, loff_t offset, int len, int zero)
{
  // initialize bar_buf[]
  * ubuf = bar_buf;
  return n;             // nr. of bytes
}

struct proc_dir_entry bar_file {
  0,                  // low_inode
  3,                  // name length
  "bar",              // name string
  S_IFREG | S_IRUGO,
  1,                  // links nr.
  0, 0,               // uid, gid
  80,                 // ls size
  bar_iops,           // struct file_operations
  bar_info,           // get_info function
  NULL                // inode fill
};

int init_module()
{
  return proc_register( &proc_root, &bar_file);
}

void cleanup_module()
{
  proc_unregister( &proc_root, bar_file.low_inode );
}