/* -*-C-*-
*******************************************************************************
*
* File:         mc4020.c
* Description:  Measurement Computing PCI-DAS4020/12 driver
*
*******************************************************************************
*/
/*
 * Copyright 2001,2004 Free Software Foundation, Inc.
 * 
 * This file is part of GNU Radio
 * 
 * GNU Radio is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2, or (at your option)
 * any later version.
 * 
 * GNU Radio is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with GNU Radio; see the file COPYING.  If not, write to
 * the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
 * Boston, MA 02111-1307, USA.
 */

#include "driver.h"
#include <linux/init.h>
#include <linux/types.h>
#include <linux/fs.h>
#include <linux/devfs_fs_kernel.h>
#include <linux/mm.h>
#include <linux/errno.h>
#include <linux/signal.h>
#include <linux/sched.h>
#include <linux/timer.h>
#include <linux/tty.h>
#include <linux/serial.h>
#include <linux/interrupt.h>
#include <linux/string.h>
#include <linux/fcntl.h>
#include <linux/ptrace.h>
#include <linux/delay.h>
#include <linux/major.h>
#include <linux/slab.h>
#include <linux/poll.h>

#include <asm/system.h>
#include <asm/io.h>
#include <asm/segment.h>
#include <asm/bitops.h>
#include <asm/page.h>
#include <asm/pgtable.h>

#include <linux/kernel.h>
#include <linux/pci.h>

#include "mc4020.h"
#include "mc4020_util.h"

MODULE_AUTHOR("Eric Blossom <eb@comsec.com>");
MODULE_DESCRIPTION("PCI-DAS4020/12 driver");
MODULE_LICENSE("GPL");

/*
 * ------------------------------------------------------------------------
 * Notes on concurrency control...
 *
 * We use a semaphore to serialize access to the bulk of the data
 * structures.  For the following resources, used by the interrupt
 * handler, we use a spinlock: plx_dma_intcsr, full_index, state.
 * ------------------------------------------------------------------------
 */

#define	MAX_LATENCY	20	/* ms */


// define these to either 0 or 1
#define DEBUG 	      1
#define	VERBOSE_DEBUG 0

typedef int bool;


static int mc4020_open (struct inode *inode, struct file *file);
static int mc4020_release (struct inode *inode, struct file *file);
static int mc4020_ioctl (struct inode *, struct file *file,
			 unsigned int cmd, unsigned long arg);
static int mc4020_mmap (struct file *file,
			struct vm_area_struct *vma);
#if 0
static unsigned int mc4020_poll (struct file *file, poll_table *wait);
#endif

static void mc4020_interrupt (int irq, void *dev_id, struct pt_regs *regs);

static int mc4020_stop_rx (struct mc4020 *mc);

static unsigned long compute_divisor (unsigned long desired_freq);
static bool ensure_buffer_exists (struct mc4020 *mc);


/* file operations which this driver handles */
static struct file_operations mc4020_fops = {
  .owner   = 	THIS_MODULE,
  .open    =	mc4020_open,
  .release =	mc4020_release,
  .ioctl   =	mc4020_ioctl,
  .mmap    =	mc4020_mmap,
//.poll	   =	mc4020_poll,
};


/* default config */
static struct mc4020_config default_config = {
  .bitmask   =	(MCC_CH0_EN
		 | MCC_CH0_5V | MCC_CH1_5V | MCC_CH2_5V | MCC_CH3_5V
		 | MCC_ASRC_BNC),
  .scan_rate =	20000000,
};

/* ---------------------------------------------------------------- */

inline static int 
minor2board (int minor)
{
  return minor & 0xf;
}

inline static int
minor2subdev (int minor)
{
  return (minor >> 4) & 0x1;
}
     
#if (DEBUG)

#if 0
static void
cause_oops (void)
{
  mdelay (1);
  *(int *) 0 = 0;
}
#endif

static void
__assert (int cond, char *str, char *file, int line)
{
  if (!cond){
    printk (KERN_ERR "assert failed: %s:%d: %s\n", file, line, str);
    // cause_oops ();
  }
}

#define	ASSERT(x) __assert (x, #x, __FILE__, __LINE__)
#else
#define ASSERT(x) (void) 0
#endif


inline static int 
imax (int a, int b)
{
  return a > b ? a : b;
}

inline static int 
imin (int a, int b)
{
  return a < b ? a : b;
}

/*
 * ----------------------------------------------------------------
 * routines for keeping track of all installed boards.
 * note that the list can vary on the fly in a hot swapping 
 * environment.
 * ----------------------------------------------------------------
 */

#define	NR_BOARDS	4	// a non-GNU like fixed limit

struct mc4020	*all_boards[NR_BOARDS];	// indexed by minor number

static int
alloc_minor (struct mc4020 *mc)
{
  int	i;

  for (i = 0; i < NR_BOARDS; i++)
    if (all_boards[i] == 0){
      all_boards[i] = mc;
      return i;
    }

  return -1;
}

static void
free_minor (int minor)
{
  int	boardnum = minor2board (minor);

  if (!(0 <= boardnum && boardnum < NR_BOARDS))
    return;
  
  all_boards[minor] = 0;
}

static struct mc4020 *
find_board_by_minor (int minor)
{
  int	boardnum = minor2board (minor);
  
  if (!(0 <= boardnum && boardnum < NR_BOARDS))
    return 0;

  return all_boards[boardnum];
}

/*
 * ----------------------------------------------------------------
 * 			  EEPROM Access
 * ----------------------------------------------------------------
 */

// max clock rate is 250 kHz, we use 4 EE_UDELAYs per bit.

#define	EE_UDELAY	1

static void
set_ee_clk (struct mc4020 *mc, bool state)
{
  long	r = read_plx_cntrl (mc);

  if (state)
    write_plx_cntrl (mc, r | PLX_EECK);
  else
    write_plx_cntrl (mc, r & ~PLX_EECK);
}

static void
set_ee_cs (struct mc4020 *mc, bool state)
{
  long	r = read_plx_cntrl (mc);

  if (state)
    write_plx_cntrl (mc, r | PLX_EECS);
  else
    write_plx_cntrl (mc, r & ~PLX_EECS);
}

static void
set_ee_data (struct mc4020 *mc, bool state)
{
  long	r = read_plx_cntrl (mc);

  if (state)
    write_plx_cntrl (mc, r | PLX_EEWD);
  else
    write_plx_cntrl (mc, r & ~PLX_EEWD);
}

static int
get_ee_data (struct mc4020 *mc)
{
  long r = read_plx_cntrl (mc);

  return (r & PLX_EERD) ? 1 : 0;
}

static void 
write_ee_bit (struct mc4020 *mc, bool bit)
{
  udelay (EE_UDELAY);

  set_ee_data (mc, bit);
  udelay (EE_UDELAY);

  set_ee_clk (mc, 1);
  udelay (EE_UDELAY);

  set_ee_clk (mc, 0);
  udelay (EE_UDELAY);
}

static int
read_ee_bit (struct mc4020 *mc)
{
  int	bit;
  
  udelay (EE_UDELAY);

  set_ee_clk (mc, 1);
  udelay (EE_UDELAY);

  bit = get_ee_data (mc);
  
  set_ee_clk (mc, 0);
  udelay (EE_UDELAY);

  return bit;
}

static int
read_ee_16 (struct mc4020 *mc)
{
  int	i, r;

  r = 0;

  for (i = 0; i < 16; i++){
    r <<= 1;
    r |= read_ee_bit (mc);
  }

  return r;
}

static void
write_ee_cmd (struct mc4020 *mc, int opcode, int addr)
{
  int		i;
  
  set_ee_clk (mc, 0);		/* ensure clock starts low */
  set_ee_cs (mc, 1);		/* enable */

  write_ee_bit (mc, 1);		/* start */
  write_ee_bit (mc, opcode & 0x2);
  write_ee_bit (mc, opcode & 0x1);

  for (i = 0; i < 8; i++){
    write_ee_bit (mc, addr & 0x80);
    addr <<= 1;
  }
}

static void
eeprom_read (struct mc4020 *mc, int addr, u16 *buf, int n)
{
  write_ee_cmd (mc, 2, addr);

  // N.B., the "dummy bit" that the 93CS56 docs talk about is
  // actually sent by the device while we're transmitting the 
  // last address bit.  Hence, we don't ever see it.
  
  while (n-- > 0)
    *buf++ = read_ee_16 (mc);

  set_ee_cs (mc, 0);		/* we're done */
}

/*
 * --------------------------------------------------------------------
 *			I2C i/o routines
 * --------------------------------------------------------------------
 */

// get scope and measure actual waveforms.
//
// [ Problem is that the pins on the AD5315's are really, really
//   really tiny.  It's some kind of a micro smt package with
//   0.5 mm lead pitch.  Not a chance in hell that I could
//   attach a wire or probe the thing... ]

// I suspect that the board has pull ups that are too big, hence the
// low to high transition is much longer than the bus is actually
// capable of running.  
//
// The AD5315's will run with fSCL at 400 kHz.
//	SCL high = 0.6 us, SCL low = 1.3 us
//
// experiments revealed that the I2C register stopped working
// with UDELAY_HIGH = 300, but did work with UDELAY_HIGH = 400.
// In the name of margin, I'm calling it 500.
//
// Found out that 500 wasn't long enough on some boards.  Now set to 1000.

#define	I2C_UDELAY_LOW	   10
#define	I2C_UDELAY_HIGH	 1000


static void
set_i2c_scl (struct mc4020 *mc, bool state)
{
  long	r = read_plx_cntrl (mc);

  if (!state){			/* active low */
    write_plx_cntrl (mc, r | I2CBUS_SCL);
    udelay (I2C_UDELAY_LOW);
  }
  else {
    write_plx_cntrl (mc, r & ~I2CBUS_SCL);
    udelay (I2C_UDELAY_HIGH);
  }
}

static void
set_i2c_sda (struct mc4020 *mc, bool state)
{
  long	r = read_plx_cntrl (mc);

  if (!state){			/* active low */
    write_plx_cntrl (mc, r | I2CBUS_SDA);
    udelay (I2C_UDELAY_LOW);
  }
  else {
    write_plx_cntrl (mc, r & ~I2CBUS_SDA);
    udelay (I2C_UDELAY_HIGH);
  }
}

// i2c_stop: 
//	entry: SCL = X, SDA = X
//	exit:  SCL = 1, SDA = 1

static void
i2c_stop (struct mc4020 *mc)
{
  set_i2c_scl (mc, 0);
  set_i2c_sda (mc, 0);
  set_i2c_scl (mc, 1);
  set_i2c_sda (mc, 1);		// SDA low -> high while SCL high
}

// i2c_start:
//	entry: SCL = 1, SDA = 1
//	exit:  SCL = 0, SDA = 0

static void
i2c_start (struct mc4020 *mc)
{
  set_i2c_sda (mc, 1);
  set_i2c_scl (mc, 1);
  set_i2c_sda (mc, 0);		// SDA high -> low while SCL high
  set_i2c_scl (mc, 0);
}

// i2c_write_bit:
//	entry: SCL = 0, SDA = X
//	exit:  SCL = 0, SDA = X

static void
i2c_write_bit (struct mc4020 *mc, bool bit)
{
  set_i2c_sda (mc, bit);
  set_i2c_scl (mc, 1);
  set_i2c_scl (mc, 0);
}

// i2c_write_byte:
//	entry: SCL = 0, SDA = X
//	exit:  SCL = 0, SDA = 1

static void
i2c_write_byte (struct mc4020 *mc, char t)
{
  int	i;

  for (i = 0; i < 8; i++){
    i2c_write_bit (mc, t & 0x80);
    t <<= 1;
  }

  // clock #9. This is the ACK bit, which we can't check because
  // we are input challenged...

  set_i2c_sda (mc, 1); 		// tristate SDA
  set_i2c_scl (mc, 1);
  // If we could, we'd read the ack bit here...
  set_i2c_scl (mc, 0);
}

// i2c_write: the high level entry point...
//	entry: SCL = 1, SDA = 1
//	exit:  SCL = 1, SDA = 1

static void
i2c_write (struct mc4020 *mc, int addr, char *buf, int n)
{
  int	i;
  
  i2c_start (mc);
  i2c_write_byte (mc, (addr << 1) | 0);	/* addr plus "read opcode" */

  for (i = 0; i < n; i++)
    i2c_write_byte (mc, buf[i]);

  i2c_stop (mc);
}

// i2c_write_register
//	write the 8 bit control register

static void 
i2c_write_register (struct mc4020 *mc, int value)
{
  char	v = value;

  if ((v & 0xff) == mc->i2c_reg_shadow)
    return;

  mc->i2c_reg_shadow = v & 0xff;
  i2c_write (mc, I2C_REGISTER_ADDR, &v, 1);
}

// i2c_write_caldac
//	INDEX specifies which caldac 0 .. 7 to write
//	VALUE is the 10 bit value to write

static void
i2c_write_caldac (struct mc4020 *mc, int index, int value)
{
  char	buf[3];
  int	addr;

  if (mc->caldac_shadow[index] == value)
    return;

  mc->caldac_shadow[index] = value;

  addr = (index & 0x4) == 0 ? I2C_CAL_DAC0_ADDR : I2C_CAL_DAC1_ADDR;

  buf[0] = 1 << (index & 0x3);		// "pointer byte"
  buf[1] = 0x20 | ((value >> 8) & 0xf);	// top 4 bits of value plus const ctrl bits (see data sheet if you care)
  buf[2] = value & 0xff;		// low 8 bits of value 

  i2c_write (mc, addr, buf, 3);
}

inline static void
init_i2c (struct mc4020 *mc)	// one time init...
{
  int	i;
  
  i2c_stop (mc);

  // fill shadows with invalid values
  mc->i2c_reg_shadow = ~0;
  for (i = 0; i < 8; i++)
    mc->caldac_shadow[i] = ~0;
}

/*
 * --------------------------------------------------------------------
 *			misc routines
 * --------------------------------------------------------------------
 */
static int 
mc4020_stop_rx (struct mc4020 *mc)
{
  mc_write_cr0 (mc, 0);
  mc_write_cr1 (mc, 0);
  write_plx_dma1_csr (mc, 0);
  
  return 0;
}

static void
clear_daq_overrun (struct mc4020 *mc)
{
  int		i;
  unsigned long status;
  
  for (i = 0; i < 256; i++){
    status = mc_read_hw_status (mc);
    if ((status & HWS_DAQ_OVERRUN) == 0)
      break;
    
    udelay (100);
  }
}

/*
 * ------------------------------------------------------------------------
 * Buffer management conventions
 *
 *	   u
 *        |--------------------------------------------------------|
 *         f                                                 e
 *
 *	   u.....
 *        |--------------------------------------------------------|
 *                   f                                       e
 *
 *  empty_index moves in steps of chunksize.  (empty_index % chunksize) == 0
 *  fill_index moves in steps of chunksize.   (fill_index % chunksize) == 0
 *  fill_index points to the next page to be written.
 *  user_index is the index of the first page the user is working with
 *  user_num is the number of pages the user is working with
 *  user_num < bufsize && (((user_index + user_num) % chunksize) == 0)
 *  (user_index - empty_index) >= chunksize
 *  # of pages w/ good data is (fill_index - user_index)
 *  page[empty_index - 1] has the END_OF_CHAIN bit set for it.
 *
 *  if (user_index - empty_index) >= 2 * chunksize	// got some free pages
 *	&& (empty_index - fill_index) > chunksize then	// DMA is not on top of us
 *    it's OK to append the empty pages to the current DMA transfer
 *
 * Basic DMA chain strategy...
 *
 *  We build a a chain or list of regions into which to transfer
 *  data, and how much to transfer.  In addition, we can specify
 *  a couple of magic bits in the descriptor for each chain entry.
 *  We use those bits to cause interrupts.
 *
 *  There are two interrupts we can distinguish using chaining mode DMA.
 *
 *    (1) END_OF_CHAIN
 *    (2) TERMINAL_COUNT (all bytes transfered in a given chain entry)
 *
 *  We use EOC to ensure that we don't overwrite the portion of the
 *  buffer that the user is working with.  At any given time only
 *  one entry has EOC set.
 *
 *  We use TERMINAL_COUNT every nth chain entry, to generate a periodic
 *  interrupt so we can observe the progress of the DMA transfer.
 *  We reflect the progress in fill_index.
 *
 *  In a world where the user application is on the average consuming
 *  data as quickly as we can produce it, EOC will never be hit, and
 *  we will append pages returned from the user to the tail of the
 *  current chain on the fly.
 *
 *  If the user is not keeping up, we keep hitting EOC, which
 *  terminates the transfer.  We will fire the transfer back up when
 *  the user returns some pages to us.  Note that in this condition,
 *  the stream of data has holes in it.  Not very good if you're
 *  trying to build a software radio...
 *
 *  The fifo on the card holds 64K samples, which at 20M samples / sec,
 *  single channel, fills in about 3 ms.  You really want to keep the
 *  DMA streaming.
 *
 *  Also, the data in the buffer is straight binary.  I.e., [0,4095].
 *  You're probably going to want to subtract 0x0800 from each sample
 *  to produce two's compliment data.
 * ------------------------------------------------------------------------
 */

inline static unsigned
index_add (struct mc4020 *mc, unsigned a, unsigned b)
{
  unsigned s = a + b;

  if (s >= mc->bufsize)
    s -= mc->bufsize;

  ASSERT (s < mc->bufsize);
  return s;
}

inline static unsigned
index_sub (struct mc4020 *mc, unsigned a, unsigned b)
{
  int s = a - b;

  if (s < 0)
    s += mc->bufsize;

  ASSERT (s >= 0 && s < mc->bufsize);
  return s;
}

static void
check_constraints (struct mc4020 *mc)
{
  int	chunksize = mc->chunksize;
  
  ASSERT (mc->empty_index < mc->bufsize);
  ASSERT (mc->fill_index < mc->bufsize);
  ASSERT (mc->user_index < mc->bufsize);
  ASSERT (mc->user_num < mc->bufsize);

  ASSERT ((mc->empty_index & (chunksize - 1)) == 0);
  ASSERT ((mc->fill_index & (chunksize - 1)) == 0);
  ASSERT (((mc->user_index + mc->user_num) & (chunksize - 1)) == 0);
}

/*
 * return the log base 2 of x, rounding down.
 *
 *   log2 (0)	  ->  -1
 *   log2 (1)	  ->   0
 *   log2 (2)     ->   1
 *   log2 (3)     ->   1
 *   log2 (4)     ->   2
 */

static int
log2 (unsigned long x)
{
  int	i;

  if (x == 0)
    return -1;

  i = 0;
  while (x != 1){
    i++;
    x >>= 1;
  }

  return i;
}

/*
 * compute_pages_per_irq:
 *	returns integer power of 2 that is the number of pages
 *	that we should transfer before interrupting.
 */
static int
compute_pages_per_irq (unsigned long throughput,
		       int max_latency_in_ms, int total_pages)
{
  unsigned long	     t;

  t = (throughput / 1000) * max_latency_in_ms; 	// bytes
  t /= PAGE_SIZE;				// pages
  
  if (t == 0)
    return 1;
  
  t = (1L << log2 (t));		// round down to power of two

  while (t * 3 > total_pages)
    t >>= 1;

  ASSERT (t != 0);
  return t;
}


static void
set_dma_chain_terminal_count_flags (struct mc4020 *mc, int chunksize)
{
  struct plx_dma_entry	*de;
  int			npages;
  int			i, pg;
  
  de = &mc->buffer->dma_entry[0];
  npages = mc->buffer->npages;

  // setup dma chain such that every chunksize page
  // has the interrupt on terminal count bit set.
  
  pg = 0;
  for (i = 0; i < npages; i++){
    de[i].next_and_flags &= ~(PLX_DMA_DESC_EOC | PLX_DMA_DESC_TC_IE);
    if (pg == chunksize - 1){	// interrupt on this page
      de[i].next_and_flags |= PLX_DMA_DESC_TC_IE;
      pg = 0;
    }
    else
      pg++;
  }
}

static unsigned long
get_dma_descriptor (struct mc4020 *mc, int index)
{
  struct plx_dma_entry	*de = &mc->buffer->dma_entry[0];
  return virt_to_bus (&de[index]) | PLX_DMA_DESC_IS_PCI | PLX_DMA_DESC_TO_HOST;
}

inline static void
clear_end_of_chain_bit (struct mc4020 *mc, int index)
{
  mc->buffer->dma_entry[index].next_and_flags &= ~PLX_DMA_DESC_EOC;
}

inline static void
set_end_of_chain_bit (struct mc4020 *mc, int index)
{
  mc->buffer->dma_entry[index].next_and_flags |= PLX_DMA_DESC_EOC;
}

/*
 * Fire off continuous DMA data acquistion
 */
static void
kick_off_dma (struct mc4020 *mc, unsigned int index)
{
  unsigned long		t;
  unsigned long		cr0, cr1;

  
  clear_daq_overrun (mc);

  cr1 = mc_read_shadow (mc, MC4020_DAQ_CR1_OFF);
  cr1 &= (CR1_CHANMODE_MASK | CR1_UCHAN_MASK | CR1_LCHAN_MASK);
  
  mc_write_cr0 (mc, 0);
  mc_write_cr1 (mc, 0);

  mc_write_buffer_ptr_clear (mc);

  // count must be set (to something), but we don't enable it
  mc_write_daq_count (mc, 40);

  mc_write_cr1 (mc, cr1);

  cr0 = (CR0_TRIG1_SRC_SOFTWARE
	 | CR0_AGATE_SRC_SOFTWARE
	 | CR0_AGATE_TYPE_LEVEL_SENS
	 | CR0_TRIG2_EDGE_FALLING);

  mc_write_cr0 (mc, cr0);

  // Program PLX DMA registers...

  write_plx_dma1_csr (mc, 0);  			// disable DMA
  write_plx_dma_threshold (mc, 0x04000000);
  // write_plx_dma_arb (mc, 0x10000000);	// enable "Read Ahead" mode

  write_plx_dma1_mode (mc, (PLX_DMA_MODE_WIDTH32
			    | PLX_DMA_MODE_BURST
			    | PLX_DMA_MODE_READY
			    | PLX_DMA_MODE_BTERM
			    | PLX_DMA_MODE_CHAIN
			    | PLX_DMA_MODE_DONE_IE
			    | PLX_DMA_MODE_ADDR_HOLD
			    | PLX_DMA_MODE_DEMAND
			 // | PLX_WR_AND_INVL
			    | PLX_DMA_MODE_INTR_PCI));

  write_plx_dma1_pci_addr (mc, 0);
  write_plx_dma1_lcl_addr (mc, 0);
  write_plx_dma1_size (mc, 0);

  write_plx_dma1_descriptor (mc, get_dma_descriptor (mc, index));
  mc->state = ST_DMA_RUNNING;

  mc_write_intr_enable (mc, IE_OVERRUN);


  // enable PCI interrupt, plus DMA1 local interrupt.  See plx errata
  t = read_plx_intcsr (mc);
  t |= PLX_PCI_IE | PLX_PCI_LOCAL_IE | PLX_LCL_DMA1_IE;
  write_plx_intcsr (mc, t);


  // start plx dma (must be done this way, see errata)
  write_plx_dma1_csr (mc, PLX_DMA_CSR_ENABLE);
  write_plx_dma1_csr (mc, PLX_DMA_CSR_ENABLE | PLX_DMA_CSR_START);


  mc->last_hw_status = 0;
  mc->last_plx_intcsr = 0;
  mc->expected_ints = HWS_DAQ_OVERRUN;


  cr0 |= CR0_DAQ_ENB;
  mc_write_cr0 (mc, cr0);

  udelay (1000);

  cr1 |= CR1_SFT_GATE;
  mc_write_cr1 (mc, cr1);

  // Clear any mc4020 interrupts -- MAS
  (void) mc_read_hw_status (mc);

  mc_write_soft_start (mc);
}

static int
do_start_ioctl (struct mc4020 *mc)
{
  if (down_interruptible (&mc->sem))
    return -ERESTARTSYS;
  
  if (mc->state != ST_IDLE){
    up (&mc->sem);
    return -EINVAL;
  }
  
  if (!ensure_buffer_exists (mc)){
    up (&mc->sem);
    return -ENOBUFS;
  }

  mc->chunksize = compute_pages_per_irq (mc->throughput, MAX_LATENCY, mc->bufsize);
  set_dma_chain_terminal_count_flags (mc, mc->chunksize);

  spin_lock_irq (&mc->lock);

  mc->fill_index = 0;
  mc->empty_index = index_sub (mc, mc->fill_index, 2 * mc->chunksize);
  set_end_of_chain_bit (mc, index_sub (mc, mc->empty_index, 1));
  mc->user_index = 0;
  mc->user_num = 0;

  check_constraints (mc);
  
  kick_off_dma (mc, mc->fill_index);

  spin_unlock_irq (&mc->lock);
  up (&mc->sem);
  return 0;
}

static int
do_stop_ioctl (struct mc4020 *mc)
{
  if (down_interruptible (&mc->sem))
    return -ERESTARTSYS;
  
  spin_lock_irq (&mc->lock);
  if (!(mc->state == ST_DMA_RUNNING || mc->state == ST_DMA_STOPPED)){
    spin_unlock_irq (&mc->lock);
    up (&mc->sem);
    return -EINVAL;
  }

  mc4020_stop_rx (mc);
  mc->state = ST_IDLE;

  spin_unlock_irq (&mc->lock);
  up (&mc->sem);
  return 0;
}

static int
do_setget_ioctl (struct mc4020 *mc, struct mc4020_status *status, bool wait)
{
  int 		result = 0;
  unsigned int	npages_avail;

  if (down_interruptible (&mc->sem))
    return -ERESTARTSYS;
  
  spin_lock_irq (&mc->lock);

  if (!(mc->state == ST_DMA_RUNNING || mc->state == ST_DMA_STOPPED)){
    result = -EINVAL;		// invalid state
    goto done;
  }

  if (status->num != 0){	// user is returning pages to us
    unsigned int	npages_freed;

    // caller's index of the beginning of his region must agree with ours
    // and he can't return more pages than we think he owns.
    if (status->index != mc->user_index || status->num > mc->user_num){
      result = -EINVAL;
      goto done;
    }

    // account for freed pages
    npages_freed = status->num;
    mc->user_index = index_add (mc, mc->user_index, npages_freed);
    mc->user_num -= npages_freed;
  }

  // if we're in the STOPPED state, then we've lost some pages
  
  status->lost = mc->state == ST_DMA_STOPPED ? 1 : 0;

  // now see how many empty pages we may be add to the DMA queue

  npages_avail =
    index_sub (mc,
	       mc->user_index & ~(mc->chunksize - 1),   // round down
	       mc->empty_index);

  if (VERBOSE_DEBUG)
    printk (KERN_DEBUG "mc4020: index = %d, freed = %d, avail = %d, chunksize = %d, %s\n",
	    status->index, status->num, npages_avail, mc->chunksize,
	    mc->state == ST_DMA_STOPPED ? "STOPPED" : "RUNNING");
  
  if (npages_avail > mc->chunksize){
    // always leave at least chunksize pages available
    // so we don't have any ambiguity with our indices.
    npages_avail -= mc->chunksize;

    ASSERT ((npages_avail & (mc->chunksize - 1)) == 0);
    
    // if DMA is running, and it's not breathing down our neck,
    // add these pages to the current transfer

    if (mc->state == ST_DMA_RUNNING){
      if (index_sub (mc, mc->empty_index, mc->fill_index) > mc->chunksize){
	// DMA hasn't started on the chain entry that contains
	// the END_OF_CHAIN flag, so we're good to go...

	clear_end_of_chain_bit (mc, index_sub (mc, mc->empty_index, 1));
	mc->empty_index = index_add (mc, mc->empty_index, npages_avail);
	set_end_of_chain_bit (mc, index_sub (mc, mc->empty_index, 1));

	if (VERBOSE_DEBUG)
	  printk (KERN_DEBUG "mc4020: <append>\n");
      }
      else {
	// We're running out of time, and the DMA will soon hit the
	// end of chain flag and enter the ST_DMA_STOPPED state.
	// We'll pick back up next time the user invokes this ioctl.
	if (VERBOSE_DEBUG)
	  printk (KERN_DEBUG "mc4020: <nada>\n");
      }
    }

    else {      // mc->state == ST_DMA_STOPPED

      // If we're missing interrupts, the next assert fails.
      ASSERT (mc->fill_index == mc->empty_index);
      mc->fill_index = mc->empty_index;			// make it true

      // Patch up the DMA chain and restart

      clear_end_of_chain_bit (mc, index_sub (mc, mc->empty_index, 1));
      mc->empty_index = index_add (mc, mc->empty_index, npages_avail);
      set_end_of_chain_bit (mc, index_sub (mc, mc->empty_index, 1));

      kick_off_dma (mc, mc->fill_index);
      if (VERBOSE_DEBUG)
	printk (KERN_DEBUG "mc4020: <start>\n");
    }
  }
  
  // if additional pages are available, give them to the user
  
  npages_avail =
    index_sub (mc, mc->fill_index, mc->user_index + mc->user_num);

  // if there are no pages, and the user has asked us to wait,
  // go ahead and wait for more pages

  if (npages_avail == 0 && wait){
    unsigned int	end_of_user = mc->user_index + mc->user_num;
    
    spin_unlock_irq (&mc->lock);	// release spin lock before sleeping

    wait_event_interruptible
      (mc->wq, (mc->state == ST_DMA_STOPPED
		|| (npages_avail =
		    index_sub (mc, mc->fill_index, end_of_user)) != 0));

    spin_lock_irq (&mc->lock);		// reacquire
  }

  if (npages_avail != 0)
    mc->user_num += npages_avail;

  // fill in return status struct for user

  status->index = mc->user_index;
  status->num = mc->user_num;

  if (VERBOSE_DEBUG)
    printk (KERN_DEBUG "mc4020: index = %d, num = %d\n", mc->user_index, mc->user_num);

 done:
  spin_unlock_irq (&mc->lock);
  check_constraints (mc);
  up (&mc->sem);
  return result;
}

//
// for a given ADC and range, load the appropriate gain and offset
// calibration values into the respective calibration DACs
//
static void
load_caldac (struct mc4020 *mc, int which_adc, bool range_5v)
{
  int	base;

  base = 4 * which_adc;
  if (!range_5v)
    base += 2;

  i2c_write_caldac (mc, which_adc * 2,
		    mc->caldata.cal_data[base]);  	// gain

  i2c_write_caldac (mc, which_adc * 2 + 1,
		    mc->caldata.cal_data[base + 1]);  	// offset
}

// compute divisor for time base
//
// freq = BASE_CLK_FREQ / (divisor + 2)
// divisor = BASE_CLK_FREQ / freq - 2

static unsigned long
compute_divisor (unsigned long desired_freq)
{
  return BASE_CLK_FREQ / desired_freq - 2;
}

static unsigned long
compute_freq (unsigned long divisor)
{
  return BASE_CLK_FREQ / (divisor + 2);
}

const static unsigned long chan_enables[4] = {
  MCC_CH0_EN,
  MCC_CH1_EN,
  MCC_CH2_EN,
  MCC_CH3_EN,
};

static int
find_first_chan (unsigned long bitmask)
{
  int	i;

  for (i = 0; i < 4; i++)
    if (bitmask & chan_enables[i])
      return i;

  return -1;
}

static int
find_second_chan (unsigned long bitmask)
{
  int	i;

  for (i = find_first_chan (bitmask) + 1; i < 4; i++)
    if (bitmask & chan_enables[i])
      return i;

  return -1;
}

static bool
set_config (struct mc4020 *mc, struct mc4020_config *c)
{
  int		nchan;
  unsigned long bitmask;
  unsigned long scan_rate;
  unsigned long max_scan_rate = 0;
  unsigned long	actual_scan_rate;
  int		ireg;
  int		ii;
  unsigned long	r;
  int		hw_conf;


  bitmask = c->bitmask;
  scan_rate = c->scan_rate;

  // check proposed config for sanity...

  // FIXME if we ever set MIN_SCAN_RATE > 0, we'll need to handle external clock case
  if (scan_rate < MIN_SCAN_RATE){
    if (DEBUG)
      printk (KERN_WARNING "set_config: scan rate too low (%ld)\n", scan_rate);
    return 0;
  }

  // count # of channels selected
  nchan = 0;
  if (bitmask & MCC_CH0_EN) nchan++;
  if (bitmask & MCC_CH1_EN) nchan++;
  if (bitmask & MCC_CH2_EN) nchan++;
  if (bitmask & MCC_CH3_EN) nchan++;


  switch (nchan){		// legal values are 1, 2 and 4
  case 1:    max_scan_rate = MAX_SCAN_RATE_1;    break;
  case 2:    max_scan_rate = MAX_SCAN_RATE_2;    break;
  case 4:    max_scan_rate = MAX_SCAN_RATE_4;    break;

  default:
    if (DEBUG)
      printk (KERN_WARNING "set_config: invalid # of channels (%d)\n", nchan);
    return 0;
  }

  if (scan_rate > max_scan_rate){
    if (DEBUG)
      printk (KERN_WARNING "set_config: scan rate too high (%ld)\n", scan_rate);
    return 0;
  }

  // check clock source

  switch (bitmask & MCC_CLK_MASK){
  case MCC_CLK_INTERNAL:	hw_conf = HWC_WCLK_INTERNAL_40MHZ;	break;
  case MCC_CLK_EXT_BNC:		hw_conf = HWC_WCLK_EXT_CLK_BNC;		break;
  case MCC_CLK_AD_START_TRIG_IN:hw_conf = HWC_WCLK_AD_START_TRIG_IN;	break;
  default:
    if (DEBUG)
      printk (KERN_WARNING "set_config: invalid MCC_CLK setting (0x%08lx)\n",
	      bitmask & MCC_CLK_MASK);
    return 0;
  }

  // program the i2c register

  ireg = 0;
  if (bitmask & MCC_EXT_BNC_THRESH_ZERO) ireg |= IREG_THRESH_ZERO;

  if (bitmask & MCC_CH0_5V) ireg |= IREG_ATTEN_CH0;
  if (bitmask & MCC_CH1_5V) ireg |= IREG_ATTEN_CH1;
  if (bitmask & MCC_CH2_5V) ireg |= IREG_ATTEN_CH2;
  if (bitmask & MCC_CH3_5V) ireg |= IREG_ATTEN_CH3;

  switch (bitmask & MCC_ASRC_MASK){
  case MCC_ASRC_BNC:    	ireg |= IREG_ASRC_BNC; 		break;
  case MCC_ASRC_CAL_AGND:	ireg |= IREG_ASRC_CAL_AGND;	break;
  case MCC_ASRC_CAL_0_625:	ireg |= IREG_ASRC_CAL_0_625;	break;
  case MCC_ASRC_CAL_4_375:	ireg |= IREG_ASRC_CAL_4_375;	break;
  case MCC_ASRC_CAL_HDR:	ireg |= IREG_ASRC_CAL_HDR;	break;
  default:
    if (DEBUG)
      printk (KERN_WARNING "set_config: invalid MCC_ASRC (0x%08lx)\n",
	      bitmask & MCC_ASRC_MASK);
    return 0;
    break;
  }
  
  // config is OK...

  i2c_write_register (mc, ireg);
  mc_write_hw_conf (mc, hw_conf);

  mc->config = *c;
  mc->nchannels = nchan;

  // load appropriate cal constants into cal dacs

  load_caldac (mc, 0, bitmask & MCC_CH0_5V);
  load_caldac (mc, 1, bitmask & MCC_CH1_5V);
  load_caldac (mc, 2, bitmask & MCC_CH2_5V);
  load_caldac (mc, 3, bitmask & MCC_CH3_5V);

  //
  // program scan_rate divisor
  //
  // If we're using the internal clock, then scan_rate is the frequency
  // at which we trigger the dacs.  If we're using an external clock, then
  // we don't know the external clock frequency.  Hence, scan_rate is
  // interpretted as the value to jam into the divisor register.  The
  // minimum allowed divisor is two.
  //

  if ((bitmask & MCC_CLK_MASK) == MCC_CLK_INTERNAL){
    unsigned long divisor = compute_divisor (scan_rate);
    actual_scan_rate = compute_freq (divisor);
    mc_write_sample_interval (mc, divisor);
  }
  else {			// external clock
    if (scan_rate < 2){		// treat scan_rate as external clock divisor
      printk (KERN_WARNING "set_config: divisor (scan_rate) is < 2 with external clock (%ld)\n",
	      scan_rate);
      return 0;
    }
    if (scan_rate >= (1L << 24)){
      printk (KERN_WARNING "set_config: divisor (scan_rate) is >= 2^24 with external clock (%ld)\n",
	      scan_rate);
      return 0;
    }

    actual_scan_rate = 20000000;	// call it 20 MHz
    mc_write_sample_interval (mc, scan_rate - 2);
  }

  // compute throughput (bytes/second)

  mc->throughput = actual_scan_rate * nchan * sizeof (u16);

  // setup selected channels

  r = mc_read_shadow (mc, MC4020_DAQ_CR1_OFF);
  r &= ~(CR1_CHANMODE_MASK | CR1_UCHAN_MASK | CR1_LCHAN_MASK | CR1_ATRIG_MD_MASK);
  r |= CR1_ATRIG_MD_INACTIVE;	// no analog trigger

  switch (nchan){
  case 1:
    r |= CR1_CHANMODE_1;
    ii = find_first_chan (bitmask);
    r |= CR1_LCHAN (ii);
    r |= CR1_UCHAN (ii);
    break;
      
  case 2:
    r |= CR1_CHANMODE_2;
    r |= CR1_LCHAN (find_first_chan (bitmask));
    r |= CR1_UCHAN (find_second_chan (bitmask));
    break;

  case 4:
    r |= CR1_CHANMODE_4;
    break;
  }
  mc_write (mc, MC4020_DAQ_CR1_OFF, r);


  return 1;
}

#if (DEBUG) && 0
static void
fill_buffer_with_test_data (struct mc4020_buf *buf)
{
  int		i, j;
  unsigned long	*pg;
  unsigned long	*t;
  unsigned long	count = 0;

  pg = buf->dma_page;
  for (i = 0; i < buf->npages; i++){
    t = (unsigned long *) pg[i];
    for (j = 0; j < PAGE_SIZE/sizeof (unsigned long); j++){
      t[j] = count++;
    }
  }
}
#endif

static unsigned long
get_fifo_local_addr (struct mc4020 *mc)
{
  return 0x00003000L + MC4020_BAR3_FIFO_OFF;
}

void *
mc4020_get_zeroed_reserved_page (unsigned int gfp_mask)
{
  void *p;

  p = (void *) get_zeroed_page (gfp_mask);
  if (p != NULL)
    SetPageReserved (virt_to_page (p));

  return p;
}

static struct offcnt {
  ulong	zero;
  ulong	one;
  ulong	two;
} offcnts;

static void
zerorefcntcnt(void)
{
  memset(&offcnts, 0, sizeof offcnts);
}

static void
chkrefcnts(struct page *pg)
{
  switch (page_count(pg)) {
  case 0:
    offcnts.zero++;
    break;
  case 1:
    offcnts.one++;
    break;
  case 2:
    offcnts.two++;
    break;
  default:
    printk (KERN_WARNING "mc4020: page count > 2\n");
    break;
  }
}

static void
sumrefcntcnt(char *where)
{
  if (VERBOSE_DEBUG){
    printk (KERN_WARNING "mc4020: %ld zero ref counts in %s\n", offcnts.zero, where);
    printk (KERN_WARNING "mc4020: %ld two ref counts in %s\n", offcnts.two, where);
  }
}

static void
mc4020_free_reserved_page (void *ptr)
{
  if (ptr != NULL){
    struct page *pg = virt_to_page(ptr);

    ClearPageReserved (pg);
    chkrefcnts(pg);
    free_page ((unsigned long) ptr);
  }
}

static void
free_buffer (struct mc4020_buf *buf)
{
  void			**pg;
  int			i;

  if (buf == 0)
    return;
  
  if (VERBOSE_DEBUG)
    printk (KERN_WARNING "mc4020: free_buffer called; buf->dma_page[0] = 0x%lx\n",
	    (long)buf->dma_page[0]);

  if (buf->dma_entry)
    kfree (buf->dma_entry);

  if ((pg = buf->dma_page)){
    zerorefcntcnt();
    for (i = 0; i < buf->npages; i++)
      mc4020_free_reserved_page (pg[i]);

    sumrefcntcnt("free_buffer");
    kfree (pg);
  }
  
  kfree (buf);
}

struct mc4020_buf *
alloc_buffer (unsigned long npages, unsigned long fifo_local_addr)
{
  int			i;
  struct mc4020_buf	*buf;
  struct plx_dma_entry	*de;
  void			**pg;
  int			magic_chain_toggle = 0;  

  buf = kmalloc (sizeof (struct mc4020_buf), GFP_KERNEL);
  if (buf == 0)
    return 0;

  memset (buf, 0, sizeof (*buf));
  buf->npages = npages;
  
  // allocate the vector of dma chain entries.
  // for use in DMA chains, low 4 bits must be zero.  This shouldn't be a problem.
  buf->dma_entry = kmalloc (sizeof (struct plx_dma_entry) * npages, GFP_KERNEL);
  if (buf->dma_entry == 0 || ((unsigned long) buf->dma_entry & 0xf) != 0){
    printk (KERN_WARNING "mc4020: failed to alloc dma_entry (%p)\n", buf->dma_entry);
    goto cleanup;
  }
  memset (buf->dma_entry, 0, sizeof (struct plx_dma_entry) * npages);

  // allocate a vector to remember the pages that we're about to allocate
  buf->dma_page = kmalloc (sizeof (void *) * npages, GFP_KERNEL);
  if (buf->dma_page == 0){
    printk (KERN_WARNING "mc4020: failed to alloc dma_page vector\n");
    goto cleanup;
  }
  memset (buf->dma_page, 0, sizeof (void *) * npages);

  // allocate the pages and remember them in the vector
  for (i = 0; i < npages; i++){
    buf->dma_page[i] = mc4020_get_zeroed_reserved_page (GFP_KERNEL);
    if (buf->dma_page[i] == 0){
      printk (KERN_WARNING "mc4020: failed to alloc dma_page\n");
      goto cleanup;
    }
  }
  
  // Now that we've got the pages allocated, make a first pass
  // over the entries and fill in the bus addresses and length.

  de = buf->dma_entry;
  pg = buf->dma_page;
  
  magic_chain_toggle = 0;
  for (i = 0; i < npages; i++){
    
    de[i].pci_addr = virt_to_bus ((void *) pg[i]);
    de[i].local_addr = fifo_local_addr + magic_chain_toggle;
    de[i].length = PAGE_SIZE;

    // magic_chain_toggle ^= 0x08;
  }

  // now link them together

  for (i = 0; i < npages - 1; i++){
    de[i].next_and_flags = virt_to_bus (&de[i+1]); // NB, low 4 bits are zero
    de[i].next_and_flags |= (PLX_DMA_DESC_IS_PCI | PLX_DMA_DESC_TO_HOST);
  }

  de[i].next_and_flags = virt_to_bus (&de[0]);
  de[i].next_and_flags |= (PLX_DMA_DESC_IS_PCI | PLX_DMA_DESC_TO_HOST);

  // note that we haven't set the End of Chain or Terminal Count bits
  // in any of flag fields.  Sombody else does that...

  if (VERBOSE_DEBUG)
    printk (KERN_WARNING "mc4020: alloc_buffer allocated 16MB; buf->dma_page[0] = 0x%lx\n", (long)buf->dma_page[0]);

  return buf;
  
 cleanup:
  free_buffer (buf);
  return 0;
}


static bool
mc4020_init_rxbuf (struct mc4020 *mc, unsigned long nbytes)
{
  unsigned long npages = nbytes / PAGE_SIZE;
  mc->buffer = alloc_buffer (npages, get_fifo_local_addr (mc));
  if(!mc->buffer)
    return 0;

  mc->bufsize = npages;
  mc->empty_index = 0;
  mc->user_index = 0;
  mc->user_num=0;

  return 1;
}

static bool
ensure_buffer_exists (struct mc4020 *mc)
{
  return (mc->buffer != 0 || mc4020_init_rxbuf (mc, MCBUF_DEFAULT));
}

/*
 * support for mmap
 */

static struct page *
mc4020_vma_nopage (struct vm_area_struct *vma,
		   unsigned long address, int write_access)
{
  struct mc4020	*mc = vma->vm_private_data;
  struct page *page;
  unsigned long offset;

  if (mc->buffer == 0)  // fd was closed and buffer deleted.
    return NOPAGE_SIGBUS;

  offset = (address - vma->vm_start) + vma->vm_pgoff * PAGE_SIZE;
  if (offset >= mc->bufsize * PAGE_SIZE)
    return NOPAGE_SIGBUS;

  page = virt_to_page (mc->buffer->dma_page[offset >> PAGE_SHIFT]);
  if (page_count(page) != 1)
    printk (KERN_WARNING "mc4020: vma_nopage ref count %d before get_page\n",
	    page_count(page));

  return page;
}


static struct vm_operations_struct mc4020_vm_ops = {
  nopage:	mc4020_vma_nopage,
};


/*
 * shared interrupt handler...
 */
static void 
mc4020_interrupt (int irq, void *dev_id, struct pt_regs *regs)
{
  struct mc4020 *mc = dev_id;
  int	status;
  long	intcsr;
  long	t;

  // interrupts are already disabled, so we only need spin_lock, not spin_lock_irq
  spin_lock (&mc->lock);
  
  intcsr = read_plx_intcsr (mc);
  mc->last_plx_intcsr = intcsr;

  if ((intcsr & PLX_LCL_DMA1_INT) != 0){
    t = read_plx_dma1_csr (mc);

    // we've either got an END_OF_CHAIN or a TERMINAL_COUNT interrupt.
    // we always ensure that every END_OF_CHAIN also sets the TERMINAL_COUNT.
    // therefore, update fill_index in all cases

    mc->fill_index = index_add (mc, mc->fill_index, mc->chunksize);

    if (t & PLX_DMA_CSR_DONE){      // end of chain detected.
      mc->state = ST_DMA_STOPPED;
    }
    
    t |= PLX_DMA_CSR_CLR_INTR;
    write_plx_dma1_csr (mc, t);

    wake_up_interruptible (&mc->wq);	// wake up anybody waiting for pages
  }

  status = mc_read_hw_status (mc);

  if (status & HWS_DAQ_OVERRUN){
    // This really shouldn't ever happen, but in reality we see it
    // every now and then, depending on the other PCI devices in the
    // system.  It indicates that for some reason or the other, the
    // 4020 was unable to get bus cycles in time.  I suspect that
    // there's a problem with insufficient buffering in the 4020 and
    // that certain other peripherals and/or pci bus arbitrators
    // aggrevate the problem.
    //
    // It appears that the only recovery is to close and reopen the device.
    printk (KERN_WARNING "mc4020: overrun\n");
  }

  if ((status & mc->expected_ints) == 0)	// not for us
    goto done;

 done:
  spin_unlock (&mc->lock);
  return;
}

/*
 * --------------------------------------------------------------------
 *		    character driver entry points
 * --------------------------------------------------------------------
 */

static int 
mc4020_open (struct inode *inode, struct file *file)
{
  unsigned int	minor = MINOR (inode->i_rdev);
  struct mc4020 *mc;

  mc = find_board_by_minor (minor);
  if (mc == 0)
    return -ENODEV;

  if (minor2subdev (minor) != SUBDEV_ADC)
    return 0;
  
  if (file->f_mode & FMODE_WRITE)
    return -EINVAL;		// can't be opened for writing

  if (down_interruptible (&mc->sem))
    return -ERESTARTSYS;

  // enforce a "single accessor"
  
  if (mc->state != ST_CLOSED){
    up (&mc->sem);
    return -EBUSY;
  }

  mc->state = ST_IDLE;
  mc->bufsize = 0;
  mc->empty_index = 0;
  mc->user_index = 0;
  mc->user_num = 0;
  mc->buffer = 0;

  set_config (mc, &default_config);

  up (&mc->sem);
  return 0;
}


static int 
mc4020_release (struct inode *inode, struct file *file)
{
  unsigned int minor = MINOR (inode->i_rdev);
  struct mc4020 *mc;

  mc = find_board_by_minor (minor);
  if (mc == 0)
    return 0;

  if (minor2subdev (minor) != SUBDEV_ADC)
    return 0;
  
  if (down_interruptible (&mc->sem))
    return -ERESTARTSYS;

  if (file->f_mode & FMODE_READ){
    if (VERBOSE_DEBUG)
      printk (KERN_WARNING "mc4020: release of readable file (good)\n");

    mc4020_stop_rx (mc);

    if (mc->buffer != 0){
      free_buffer (mc->buffer);
      mc->buffer = 0;
    }
  }

  else {
    printk (KERN_WARNING "mc4020: can't happen: release of unreadable file\n");
    // can't happen, we can only be opened for reading...
  }

  mc->state = ST_CLOSED;

  up (&mc->sem);
  return 0;
}

static int 
mc4020_ioctl (struct inode *inode, struct file *file,
	      unsigned int cmd, unsigned long arg)
{
  unsigned int 		minor = MINOR (inode->i_rdev);
  bool			subdev_is_adc = (minor2subdev (minor) == SUBDEV_ADC);
  struct mc4020 	*mc;
  struct mc4020_status  status;
  struct mc4020_config  config;
  int			result;


  mc = find_board_by_minor (minor);
  if (mc == 0)
    return -ENODEV;

    
  switch (cmd){

  case GIOCSETBUFSIZE:
    if (!subdev_is_adc)
      return -EINVAL;
    
    if (file->f_mode & FMODE_READ){

      if (arg < MCBUF_MINIMUM || (arg & (MCBUF_MULTIPLE - 1)) != 0)
	return -EINVAL;

      if (down_interruptible (&mc->sem))
	return -ERESTARTSYS;
      
      result = 0;

      if (mc->buffer != 0){
	// buffer already allocated (by mmap for example)
	// size can't be changed
	result = -EINVAL;
	goto done_up;
      }

      if (!mc4020_init_rxbuf (mc, arg)){
	result = -ENOBUFS;
	goto done_up;
      }

    done_up:
      up (&mc->sem);
      return result;
    }

    else
      return -EBADF;

    return 0;

  case GIOCSTART:
    if (!subdev_is_adc)
      return -EINVAL;

    return do_start_ioctl (mc);

  case GIOCSTOP:
    if (!subdev_is_adc)
      return -EINVAL;

    return do_stop_ioctl (mc);

  case GIOCSETGETSTATUS:
  case GIOCSETGETSTATUS_NOWAIT:
    if (!subdev_is_adc)
      return -EINVAL;

    if (file->f_mode & FMODE_READ){
      if (copy_from_user (&status, (struct mc4020_status *) arg, sizeof (status)))
	return -EFAULT;
	  
      result = do_setget_ioctl (mc, &status, cmd == GIOCSETGETSTATUS);

      if (copy_to_user ((struct mc4020_status *) arg, &status, sizeof (status)))
	return -EFAULT;

      return result;
    }
    else
      return -EBADF;

  case GIOCSETCONFIG:
    if (!subdev_is_adc)
      return -EINVAL;

    if (copy_from_user (&config, (struct mc4020_config *) arg, sizeof (config)))
      return -EFAULT;

    if (down_interruptible (&mc->sem))
      return -ERESTARTSYS;
    
    result = set_config (mc, &config);
    up (&mc->sem);

    if (result)
      return 0;
    else
      return -EINVAL;
    
  case GIOCGETCALDATA:
    if (!subdev_is_adc)
      return -EINVAL;

    if (copy_to_user ((struct mc4020_caldata *) arg, &mc->caldata, sizeof (mc->caldata)))
	return -EFAULT;
	
    return 0;

    //
    // auxillary subdevice ioctls
    //

  case GIOCENABLEDACS:
    if (arg)	// enable
      mc->dac_cr1_shadow |= DAC_CR1_DAC_OE;
    else
      mc->dac_cr1_shadow &= ~DAC_CR1_DAC_OE;

    mc_write_DAC_cr1 (mc, mc->dac_cr1_shadow);
    return 0;

  case GIOCSETDAC0RANGE:
    switch (arg){
    case MCDAC_RANGE_5V:
      mc->dac_cr1_shadow |= DAC_CR1_DAC0_RANGE_BIP5V;
      break;
    case MCDAC_RANGE_10V:
      mc->dac_cr1_shadow &= ~DAC_CR1_DAC0_RANGE_BIP5V;
      break;
    default:
      return -EINVAL;
    }
    mc_write_DAC_cr1 (mc, mc->dac_cr1_shadow);
    return 0;

  case GIOCSETDAC1RANGE:
    switch (arg){
    case MCDAC_RANGE_5V:
      mc->dac_cr1_shadow |= DAC_CR1_DAC1_RANGE_BIP5V;
      break;
    case MCDAC_RANGE_10V:
      mc->dac_cr1_shadow &= ~DAC_CR1_DAC1_RANGE_BIP5V;
      break;
    default:
      return -EINVAL;
    }
    mc_write_DAC_cr1 (mc, mc->dac_cr1_shadow);
    return 0;

  case GIOCGETDAC0RANGE:
    {
      int	r;
      if ((mc->dac_cr1_shadow & DAC_CR1_DAC0_RANGE_BIP5V) != 0)
	r = MCDAC_RANGE_5V;
      else
	r = MCDAC_RANGE_10V;

      if (copy_to_user ((int *) arg, &r, sizeof (r)))
	return -EFAULT;
    }
    return 0;

  case GIOCGETDAC1RANGE:
    {
      int	r;
      if ((mc->dac_cr1_shadow & DAC_CR1_DAC1_RANGE_BIP5V) != 0)
	r = MCDAC_RANGE_5V;
      else
	r = MCDAC_RANGE_10V;

      if (copy_to_user ((int *) arg, &r, sizeof (r)))
	return -EFAULT;
    }
    return 0;

  case GIOCWRITEDAC0:
    mc_write_DAC0_data (mc, arg);
    return 0;

  case GIOCWRITEDAC1:
    mc_write_DAC1_data (mc, arg);
    return 0;

  } // end switch

  return -EINVAL;
}

static int 
mc4020_mmap (struct file *file, struct vm_area_struct *vma)
{
  struct mc4020	*mc;
  unsigned int minor = MINOR (file->f_dentry->d_inode->i_rdev);
  unsigned long size = (unsigned long) vma->vm_end - vma->vm_start;
  int		result = 0;

  mc = find_board_by_minor (minor);
  if (mc == 0)
    return -ENODEV;

  if (minor2subdev (minor) != SUBDEV_ADC)
    return -EINVAL;

  if (down_interruptible (&mc->sem))
    return -ERESTARTSYS;
  
  if (vma->vm_pgoff != 0 || size != mc->bufsize * PAGE_SIZE){
    // they must map the entire buffer
    result = -EINVAL;
    goto done_up;
  }

  if (!ensure_buffer_exists (mc)){
    result = -ENOBUFS;
    goto done_up;
  }

  vma->vm_ops = &mc4020_vm_ops;
  vma->vm_flags |= VM_RESERVED;
  vma->vm_private_data = mc;
  result = 0;

 done_up:
  up (&mc->sem);
  return result;
}

/*
 * --------------------------------------------------------------------
 * 		initialization and finalization code
 * --------------------------------------------------------------------
 */

static int mc4020_init_module (void); 

static int mc4020_init_one (struct pci_dev *pdev,
			    const struct pci_device_id *ent);
static void mc4020_remove_one (struct pci_dev *pdev);


static struct pci_device_id mc4020_pci_tbl[] __devinitdata = {
  { PCI_VENDOR_ID_COMPUTER_BOARDS, PCI_DEVICE_ID_PCI_DAS_4020_12, PCI_ANY_ID, PCI_ANY_ID },
  { }				/* terminating entry */
};

MODULE_DEVICE_TABLE (pci, mc4020_pci_tbl);

static struct pci_driver mc4020_driver = {
  .name      =	"mc4020",
  .id_table  =	mc4020_pci_tbl,
  .probe     =	mc4020_init_one,
  .remove    =	mc4020_remove_one,
};


#if (DEBUG)
static void
dump_range32 (unsigned long vaddr, int start, int end)
{
  int		i;

  for (i = start; i <= end; i += 4){
    printk (KERN_INFO "%04x: %08x\n",
	    i, readl (vaddr + i));
  }
}

static void
dump_plx_regs (struct mc4020 *mc)
{
  unsigned long	vaddr = mc->plx_vaddr;
  dump_range32 (vaddr, 0x00, 0x2c);
  dump_range32 (vaddr, 0x68, 0x74);
  dump_range32 (vaddr, 0xf0, 0xf8);
  dump_range32 (vaddr, 0x80, 0xb0);
}

static void
dump_regs (struct mc4020 *mc)
{
  printk (KERN_INFO "hw_status:   %04x\n", mc_read_hw_status (mc));
  printk (KERN_INFO "adc_rd_ptr:  %04x\n", mc_read_adc_read_ptr (mc));
  printk (KERN_INFO "adc_wr_ptr:  %04x\n", mc_read_adc_write_ptr (mc));
  printk (KERN_INFO "xfer_count:  %04x\n", mc_read_user_xfer_count (mc));
  printk (KERN_INFO "pre_post:    %04x\n", mc_read_pre_post_reg (mc));
}
#endif

/*
 * low level init routines
 */

/*
 * Init plx registers.
 *
 * At this point, I'm assuming that it has reasonble defaults.
 * We shall see...
 */
static void
init_plx (struct mc4020 *mc)
{
  long		t;
  
#if 0 && (DEBUG)
  printk (KERN_INFO "mc4020: plx regs before init\n");
  dump_plx_regs (mc);
#endif

  // enable relevant PCI interrupts

  t = read_plx_intcsr (mc);
  t |= PLX_PCI_IE;
  write_plx_intcsr (mc, t);
}

/*
 * Write "safe" values into all regs.
 */
static void
init_regs (struct mc4020 *mc)
{
  mc_write_cr0 (mc, (CR0_DMA_DSBL 			// disable everything
		     | CR0_TRIG1_EDGE_RISING
		     | CR0_TRIG1_SRC_SOFTWARE
		     | CR0_AGATE_POL_HIGH
		     | CR0_AGATE_TYPE_LEVEL_SENS
		     | CR0_AGATE_SRC_SOFTWARE));

  mc_write_intr_enable (mc, 0);
  mc_write_hw_conf (mc, HWC_WCLK_INTERNAL_40MHZ);
  mc_write_fifo_size (mc, FSIZE_MAX);
  mc_write_daq_atrig_lo (mc, 0);
  mc_write_daq_atrig_hi (mc, 0);
  mc_write_cr1 (mc, CR1_CHANMODE_1 | CR1_UCHAN_0 | CR1_LCHAN_0 | CR1_SFT_GATE);
  mc_write_sample_interval (mc, 38); 	// gives "divide by 40" == 1 MHz


  mc_write_daq_count (mc, 26);		// must be programmed, but we don't 
					//   really use it.  26 is just an
					//   even number that we may notice
  mc_write_buffer_ptr_clear (mc);
  mc->dac_cr1_shadow = 0;
  mc_write_DAC_cr1 (mc, 0);		// outputs disabled
  mc_write_DAC0_data (mc, 0x800);	// zero value
  mc_write_DAC1_data (mc, 0x800);	// zero value

#if 0 && (DEBUG)
  printk (KERN_INFO "mc4020: regs after init\n");
  dump_regs (mc);
#endif
}

static void
init_caldata (struct mc4020 *mc)
{
  // fetch calibration data out of eeprom
  eeprom_read (mc, 0x30, (u16 *) &mc->caldata.cal_4_375, sizeof (float) / sizeof (u16));
  eeprom_read (mc, 0x32, (u16 *) &mc->caldata.cal_0_625, sizeof (float) / sizeof (u16));
  eeprom_read (mc, 0x4e, (u16 *) &mc->caldata.cal_data[0],
	       sizeof (mc->caldata.cal_data) / sizeof (u16));
}

/*
 * m4020_init_one -- initialize one device, return 0 if successful
 */
static int __devinit
mc4020_init_one (struct pci_dev *pdev,
		const struct pci_device_id *ent)
{
  unsigned long plx_vaddr;
  unsigned long reg_vaddr;
  unsigned long fifo_vaddr;
  struct mc4020 *mc;
	
  // reserve the memory regions...

  if (!request_mem_region (pci_resource_start (pdev, 0),
			   pci_resource_len (pdev, 0), "mc4020")) {
    printk (KERN_ERR "mc4020: cannot reserve plx region\n");
    goto err_out_0;
  }
  if (!request_mem_region (pci_resource_start (pdev, 2),
			   pci_resource_len (pdev, 2), "mc4020")) {
    printk (KERN_ERR "mc4020: cannot reserve reg region\n");
    goto err_out_1;
  }
  if (!request_mem_region (pci_resource_start (pdev, 3),
			   pci_resource_len (pdev, 3), "mc4020")) {
    printk (KERN_ERR "mc4020: cannot reserve fifo region\n");
    goto err_out_2;
  }

  // map them into our virtual address space

  plx_vaddr = (unsigned long) ioremap_nocache (pci_resource_start (pdev, 0),
					       pci_resource_len (pdev, 0));
  if (!plx_vaddr) {
    printk (KERN_ERR "mc4020: cannot remap plx region %lx @ %lx\n",
	    pci_resource_len(pdev, 0), pci_resource_start (pdev, 0));
    goto err_out_3;
  }

  reg_vaddr = (unsigned long) ioremap_nocache (pci_resource_start (pdev, 2),
					       pci_resource_len (pdev, 2));
  if (!reg_vaddr) {
    printk (KERN_ERR "mc4020: cannot remap reg region %lx @ %lx\n",
	    pci_resource_len (pdev, 2), pci_resource_start (pdev, 2));
    goto err_out_4;
  }

  fifo_vaddr = (unsigned long) ioremap_nocache (pci_resource_start (pdev, 3),
						pci_resource_len (pdev, 3));
  if (!fifo_vaddr) {
    printk (KERN_ERR "mc4020: cannot remap fifo region %lx @ %lx\n",
	    pci_resource_len (pdev, 3), pci_resource_start (pdev, 3));
    goto err_out_5;
  }

  // allocate board structure...

  mc = (struct mc4020 *) kmalloc (sizeof (struct mc4020), GFP_KERNEL);
  if (mc == 0)
    goto err_out_6;

  memset (mc, 0, sizeof (struct mc4020));

  pdev->driver_data = mc;
  mc->plx_vaddr = plx_vaddr;
  mc->reg_vaddr = reg_vaddr;
  mc->fifo_vaddr = fifo_vaddr;
  mc->pdev = pdev;
  spin_lock_init (&mc->lock);
  mc->minor = alloc_minor (mc);
  if (mc->minor < 0){
    printk (KERN_WARNING "mc4020: failed to alloc minor number\n");
    goto err_out_7;
  }


  printk (KERN_INFO "mc4020: found board at %lx, irq = %d\n",
	  pci_resource_start (pdev, 0), pdev->irq);

#if 0 && (DEBUG)
  printk (KERN_INFO "mc4020: plx_vaddr = %lx, reg_vaddr = %lx, fifo_vaddr = %lx\n",
	  plx_vaddr, reg_vaddr, fifo_vaddr);
#endif

  init_plx (mc);
  init_regs (mc);
  init_caldata (mc);
  init_i2c (mc);

  // we request a shared interrupt and use mc as the dev_id
  // perhaps we ought to be doing this at open time, not load time?

  if (request_irq (pdev->irq, (void *) &mc4020_interrupt, 
		   SA_SHIRQ, "mc4020", mc)) {
    printk (KERN_ERR "mc4020: request_irq failed (irq = %d)!\n", pdev->irq);
    goto err_out_8;
  }
  
  if (pci_enable_device(pdev))		// maybe this comes sooner?
    goto err_out_9;


  // The PCI latency timer trades off bus bandwidth for latency.
  // In effect, it sets the time limit until we need to rearbitrate
  // for access to the bus, and hence, our maximum burst size.
  // Bigger values increase bus throughput, but increase latency to 
  // access the bus by others.  We're an admitted bus hog.
  // Max out our bandwidth at the expense of everybody else...

  pci_write_config_byte (pdev, PCI_LATENCY_TIMER, 0xff);

  pci_set_master(pdev);
  sema_init (&mc->sem, 1);		// binary semaphore
  init_waitqueue_head (&mc->wq);
  mc->state = ST_CLOSED;
  return 0;


 err_out_9:
  free_irq (pdev->irq, mc);
 err_out_8:
  free_minor (mc->minor);
 err_out_7:
  pdev->driver_data = 0;
  kfree (mc);
 err_out_6:
  iounmap ((void *) fifo_vaddr);
 err_out_5:
  iounmap ((void *) reg_vaddr);
 err_out_4:
  iounmap ((void *) plx_vaddr);
 err_out_3:
  release_mem_region (pci_resource_start (pdev, 3), pci_resource_len (pdev, 3));
 err_out_2:
  release_mem_region (pci_resource_start (pdev, 2), pci_resource_len (pdev, 2));
 err_out_1:
  release_mem_region (pci_resource_start (pdev, 0), pci_resource_len (pdev, 0));
 err_out_0:
  return -ENODEV;
}

/*
 * called for each device when module is unloaded, or when
 * a given device is unplugged (if HOTPLUG is defined).
 */
static void 
mc4020_remove_one (struct pci_dev *pdev)
{
  struct mc4020 *mc = pdev->driver_data;
  
  printk (KERN_INFO "mc4020_remove_one: pdev = %p\n", pdev);


  mc_write_intr_enable (mc, 0);
  mc_write_cr0 (mc, CR0_DMA_DSBL);


  free_irq (pdev->irq, mc);
  free_minor (mc->minor);
  iounmap ((void *) mc->fifo_vaddr);
  iounmap ((void *) mc->reg_vaddr);
  iounmap ((void *) mc->plx_vaddr);
  release_mem_region (pci_resource_start (pdev, 3), pci_resource_len (pdev, 3));
  release_mem_region (pci_resource_start (pdev, 2), pci_resource_len (pdev, 2));
  release_mem_region (pci_resource_start (pdev, 0), pci_resource_len (pdev, 0));

  pdev->driver_data = 0;
  kfree (mc);
}


static int __init
mc4020_init_module (void) 
{
  int	i;
  
  printk (KERN_INFO "mc4020_init_module: version: %s\n",
	  CONFIG_MC4020_VERSION);

  if (devfs_register_chrdev (MC4020_MAJOR, "mc4020", &mc4020_fops))
    printk (KERN_ERR "mc4020: failed to register character device\n");

  i = pci_module_init (&mc4020_driver);

  return i;
}

static void __exit
mc4020_cleanup_module(void) 
{
  int	i;
  
  printk (KERN_INFO "mc4020: cleanup_module\n");
  pci_unregister_driver (&mc4020_driver);

  /* Next, unregister ourselves with the character device driver handler */
  if ((i = devfs_unregister_chrdev(MC4020_MAJOR, "mc4020")))
    printk (KERN_ERR "mc4020: failed to un-register character device, errno=%d\n", -i);
}

module_init(mc4020_init_module);
module_exit(mc4020_cleanup_module);
