Orinoco EIO & SMP fix

Jean Tourrilhes jt at bougret.hpl.hp.com
Wed Dec 5 09:40:54 EST 2001


	Hi David,

	Now that Linux-IrDA is under control, I spent a bit of
"quality time" with the Orinoco driver.
	Included in this patch :
	o Fix for bitrate setting/reading. I already sent you that
earlier.
	o Fix a SMP bug where the Tx queue would remain locked.
	o Fix a IRQ handler bug where the Tx queue would be released
too early (that's the cause of Tx EIO errors).

	I've tested the patch at lenght on an Orinoco card (6.16), a
3Com card (2.20) and a Linsys card (0.08) on my SMP box. One of my
favorite test is to start on *both* side something like this :
		> netperf -H XXX -t UDP_STREAM -l 20 -- -m 256

	I can still the card going crazy from time to time (like
SWSUPPORT0 wiped out), but I could no see the reason for it. Probably
some firmware bug. Anyway, the watchdog fixes it.
	In other words, I'm pretty happy of the state of the driver
after the patch. Ship that to Marco ;-)

	Have fun...

	Jean

P.S. : Don't forget to update the Pcmcia package.
P.S.2 : PrismII still need a bit of tcpdump to get is started.
P.S.3 : While doing Ad-Hoc between a Symbol card and a non-Symbol
card, don't forget to put [IWPRIV="set_preamble 0"] for the Symbol
card in you wireless.opts. I'll add that to the release notes.
-------------- next part --------------
diff -u -p -r linux/drivers/net/wireless.08c/hermes.h linux/drivers/net/wireless/hermes.h
--- linux/drivers/net/wireless.08c/hermes.h	Wed Nov 28 16:47:15 2001
+++ linux/drivers/net/wireless/hermes.h	Wed Nov 28 18:05:15 2001
@@ -192,10 +192,11 @@ struct hermes_tx_descriptor {
 #define HERMES_TXCTRL_802_11		(0x0008)	/* We supply 802.11 header */
 #define HERMES_TXCTRL_ALT_RTRY		(0x0020)
 
-/* Inquiry constants and data types */
+/* Inquiry command & Information frame : constants and data types */
 
 #define HERMES_INQ_TALLIES			0xF100
 #define HERMES_INQ_SCAN				0xF101
+#define HERMES_INF_LINK_STATUS			0xF200
 
 struct hermes_tallies_frame {
 	u16 TxUnicastFrames;
@@ -222,6 +223,27 @@ struct hermes_tallies_frame {
 	/* Those last are probably not available in very old firmwares */
 	u16 RxDiscards_WEPICVError;
 	u16 RxDiscards_WEPExcluded;
+} __attribute__ ((packed));
+
+/* Grabbed from wlan-ng - Thanks Mark... - Jean II
+ * This is the result of a scan inquiry command */
+/* Structure describing info about an Access Point */
+struct dldwd_scan_apinfo {
+  uint16_t	channel;		/* Channel where the AP sits */
+  uint16_t	noise;			/* Noise level */
+  uint16_t	level;			/* Signal level */
+  uint8_t	bssid[ETH_ALEN];	/* MAC address of the Access Point */
+  uint16_t	beacon_interv;		/* Beacon interval ? */
+  uint16_t	capabilities;		/* Capabilities ? */
+  uint8_t	essid[32];		/* ESSID of the network */
+  uint8_t	rates[10];		/* Bit rate supported */
+  uint16_t	proberesp_rate;		/* ???? */
+} __attribute__ ((packed));
+/* Container */
+struct dldwd_scan_frame {
+  uint16_t	rsvd;			/* ??? */
+  uint16_t	scanreason;		/* ??? */
+  struct dldwd_scan_apinfo	aps[35];	/* Scan result */
 } __attribute__ ((packed));
 
 #ifdef __KERNEL__
diff -u -p -r linux/drivers/net/wireless.08c/orinoco.c linux/drivers/net/wireless/orinoco.c
--- linux/drivers/net/wireless.08c/orinoco.c	Wed Nov 28 17:03:46 2001
+++ linux/drivers/net/wireless/orinoco.c	Tue Dec  4 14:25:25 2001
@@ -376,6 +376,36 @@ typedef struct orinoco_commsqual {
 	u16 qual, signal, noise;
 } __attribute__ ((packed)) orinoco_commsqual_t;
 
+#if 0
+/* Let's keep that just in case. I've seen a PrismII firmware (WMP11) that
+ * has vendor = 1. If this firmware is released into Pcmcia cards,
+ * this will come handy... - Jean II
+ */
+
+/* MAC address of the various cards we know... */
+typedef const char prefix[3];
+
+const prefix lucent_mac[] = {
+  { 0x00, 0x60, 0x1D },		/* Lucent Wavelan IEEE */
+  { 0x00, 0x02, 0x2D }		/* Lucent/Agere Orinoco */
+  /* Melco ??? */
+};
+const prefix symbol_mac[] = {
+  { 0x00, 0x02, 0xB3 },		/* Intel */
+  { 0x00, 0x50, 0xDA },		/* 3Com AirConnect */
+  { 0x00, 0xA0, 0xF8 }		/* Symbol ??? */
+  /* Need Symbol + Ericsson MAC address here */
+};
+const prefix intersil_mac[] = {
+  { 0x00, 0x40, 0x05 },		/* D-Link */
+  { 0x00, 0x90, 0xD1 },		/* Addtron */
+  { 0x00, 0x04, 0x5A },		/* LinkSys WPC11 */
+  { 0x00, 0x06, 0x25 },		/* LinkSys WMP11 (PCI card) */
+  { 0x00, 0x04, 0xE2 },		/* SMC */
+  /* Many more to go */
+};
+#endif
+
 /*
  * Function prototypes
  */
@@ -1093,13 +1123,20 @@ void orinoco_interrupt(int irq, void * d
 
 	DEBUG(3, "%s: orinoco_interrupt()\n", priv->ndev.name);
 
+	/* Jiffies is not going to change within the loop, so save a few
+	 * instructions - Jean II */
+	if (jiffies != old_time)
+		timecount = 20;
+	else
+		timecount--;
+
 	while (1) {
-		if (jiffies != old_time)
-			timecount = 0;
-		if ( (++timecount > 50) || (! count--) ) {
+		/* The loop may execute up to 10 times, or the handler may
+		 * be called 20 times within a jiffie, before we complain... */
+		if ( (! count--) || (! timecount) ) {
 			printk(KERN_CRIT "%s: IRQ handler is looping too \
-much! Shutting down.\n",
-			       dev->name);
+much! (timecount = %d - count = %d) Shutting down.\n",
+			       dev->name, timecount, count);
 			/* Perform an emergency shutdown */
 			clear_bit(ORINOCO_STATE_DOIRQ, &priv->state);
 			hermes_set_irqmask(hw, 0);
@@ -1110,18 +1147,48 @@ much! Shutting down.\n",
 		DEBUG(3, "__orinoco_interrupt(): count=%d EVSTAT=0x%04x inten=0x%04x\n",
 		      count, evstat, hw->inten);
 
+		/* Mask unwanted events */
 		events = evstat & hw->inten;
 
 		if (! events) {
-			if (netif_queue_stopped(dev)) {
+			/* If we are waiting for Tx done and if at the first
+			 * iteration of the loop there is no event... */
+			if ((count == (IRQ_LOOP_MAX - 1)) &&
+			    (netif_queue_stopped(dev))) {
 				/* There seems to be a firmware bug which
 				   sometimes causes the card to give an
-				   interrupt with no event set, when there
-				   sould be a Tx completed event. */
-				DEBUG(3, "%s: Interrupt with no event (ALLOCFID=0x%04x)\n",
+				   interrupt with no event set. */
+				/* This usually happens when we handle
+				 * multiple Rx packets on one IRQ - Jean II */ 
+				DEBUG(1, "%s: Interrupt with no event (ALLOCFID=0x%04x)\n",
 				      dev->name, (int)hermes_read_regn(hw, ALLOCFID));
+
+#if 0
+				/* Fake those events only as needed.
+				 * We don't want to EVACK "phantom" events,
+				 * the firmware might be confused by that.
+				 * Jean II */
+				if (hermes_read_regn(hw, TXCOMPLFID)
+				    == priv->txfid)
+					events |= HERMES_EV_TX;
+				if (hermes_read_regn(hw, ALLOCFID)
+				    == priv->txfid)
+					events |= HERMES_EV_ALLOC;
+				/* Shall we continue ? */
+				if (! events)
+					break;
+#else
+				/* It's safe to "fake" those events, as
+				 * the handler will "validate" them using
+				 * the FIDs, and they won't be "acked".
+				 * Just be careful about infinite loops ;-)
+				 * Jean II */
 				events = HERMES_EV_TX | HERMES_EV_ALLOC;
-			} else /* Nothing's happening, we're done */
+#endif
+			} else
+				/* Nothing's happening, we're done.
+				   This is the standard exit point of this
+				   loop (after a few iterations) */
 				break;
 		}
 
@@ -1148,7 +1215,12 @@ much! Shutting down.\n",
 		if (events & HERMES_EV_ALLOC)
 			__orinoco_ev_alloc(priv, hw);
 		
-		hermes_write_regn(hw, EVACK, events);
+		/* We ack 'evstat' instead of 'events' to make sure we
+		 * don't ack 'fake' events. Mask it to make sure we
+		 * don't ack events we don't know about, otherwise
+		 * bad things will happen... Jean II */
+		//hermes_write_regn(hw, EVACK, events);
+		hermes_write_regn(hw, EVACK, evstat & hw->inten);
 	}
 
 	clear_bit(ORINOCO_STATE_INIRQ, &priv->state);
@@ -1182,9 +1254,10 @@ static void __orinoco_ev_info(struct ori
 	} __attribute__ ((packed)) info;
 	int err;
 
-	/* This is an answer to an INQUIRE command that we did earlier.
+	/* This is an answer to an INQUIRE command that we did earlier,
+	 * or an information "event" generated by the card
 	 * The controller return to us a pseudo frame containing
-	 * the information we have asked - Jean II */
+	 * the information in question - Jean II */
 	infofid = hermes_read_regn(hw, INFOFID);
 	DEBUG(3, "%s: __dldwd_ev_info(): INFOFID=0x%04x\n", dev->name,
 	      infofid);
@@ -1236,9 +1309,18 @@ static void __orinoco_ev_info(struct ori
 #endif /* WIRELESS_EXT > 11 */
 	}
 	break;
+	case HERMES_INF_LINK_STATUS:
+		/* Link status information frame.
+		 * The link status has changed, and the card tell us about
+		 * it. Most likely, the card has just connected to the cell
+		 * or created an Ad-Hoc cell.
+		 * We may try to plug that into the WE later on...
+		 * Jean II */
+		/* Payload : 1 word - see prism2sta_inf_linkstatus() */
+		break;
 	default:
-		DEBUG(1, "%s: Unknown information frame received.\n",
-		      priv->ndev.name);
+		DEBUG(1, "%s: Unknown information frame received (0x%X).\n",
+		      priv->ndev.name, le16_to_cpu(info.type));
 		/* We don't actually do anything about it */
 		break;
 	}
@@ -1414,25 +1496,58 @@ static void __orinoco_ev_tx(struct orino
 {
 	struct net_device *dev = &priv->ndev;
 	struct net_device_stats *stats = &priv->stats;
+	u16 txcomplfid;
 
-	DEBUG(3, "%s: Transmit completed\n", dev->name);
+	txcomplfid = hermes_read_regn(hw, TXCOMPLFID);
 
-	stats->tx_packets++;
-	netif_wake_queue(dev);
+	/* Because we may fake the event in the interrupt handler, we need
+	 * to "validate" it. Otherwise, we would overwrite the packet beeing
+	 * transmitted, which cause the famous EIO error - Jean II */
+	if (txcomplfid == priv->txfid) {
+		DEBUG(3, "%s: Transmit completed (TXCOMPLFID=0x%04x)\n",
+		      dev->name, txcomplfid);
+
+		/* Clear up the TXCOMPLFID register, because the card doesn't
+		 * do it. Otherwise, it will always read the last value, which
+		 * will defeat the purpose of the test above... - Jean II */
+		hermes_write_regn(hw, TXCOMPLFID, 0xAAAA);
+
+		/* Packet was transmitted */
+		stats->tx_packets++;
+		netif_wake_queue(dev);
+	}
 }
 
 static void __orinoco_ev_alloc(struct orinoco_private *priv, hermes_t *hw)
 {
+	struct net_device *dev = &priv->ndev;
+	struct net_device_stats *stats = &priv->stats;
 	u16 allocfid;
 
-	allocfid = hermes_read_regn(hw, ALLOCFID);
-	DEBUG(3, "%s: Allocation complete FID=0x%04x\n", priv->ndev.name, allocfid);
+	/* In theory, this should *not* happen in normal operation, because
+	 * hermes_allocate() run with irq off and wait for the command
+	 * completion, and we don't do any other allocate.
+	 * However, the Orinoco firmware seems to *never* send TXCOMPL events
+	 * but instead always send ALLOC events. It's not random, but
+	 * systematic, so handle this properly. - Jean II */
 
-	/* For some reason we don't seem to get transmit completed events properly */
-	if (allocfid == priv->txfid)
-		__orinoco_ev_tx(priv, hw);
+	allocfid = hermes_read_regn(hw, ALLOCFID);
 
-/* 	hermes_write_regn(hw, ALLOCFID, 0); */
+	/* Same as txcomplfid, "validate" the event - Jean II */
+	if (allocfid == priv->txfid) {
+		DEBUG(3, "%s: Allocation complete ALLOCFID=0x%04x\n",
+		      priv->ndev.name, allocfid);
+
+		/* Clear up the ALLOCFID register, because the card doesn't
+		 * do it. Otherwise, it will always read the last value, which
+		 * will defeat the purpose of the test above... - Jean II */
+		hermes_write_regn(hw, ALLOCFID, 0xAAAA);
+
+		/* Process like a Tx-Done event. Can't call __orinoco_ev_tx()
+		 * because of txcomplfid check. - Jean II */
+		stats->tx_packets++;
+		netif_wake_queue(dev);
+	}
 }
 
 static void determine_firmware(struct net_device *dev)
@@ -1940,19 +2055,27 @@ orinoco_xmit(struct sk_buff *skb, struct
 		goto fail;
 	}
 
+	/* Stop the queue before calling the send, because the irq handler
+	 * may run immediately after the command, especially on slow or
+	 * loaded MP systems - Jean II */
+	netif_stop_queue(dev);
+	dev->trans_start = jiffies;
+
 	/* Finally, we actually initiate the send */
 	err = hermes_docmd_wait(hw, HERMES_CMD_TX | HERMES_CMD_RECL, txfid, &resp);
 	if (err) {
 		printk(KERN_ERR "%s: Error %d transmitting packet\n", dev->name, err);
 		stats->tx_errors++;
+		/* Don't release the Tx queue to force a Tx Timeout and
+		 * force a reset of the card.
+		 * Most likely, SWSUPPORT0 has been wiped out or the
+		 * firmware has gone crazy, so a reset is needed - Jean II */
+		/* netif_start_queue(dev); */
 		goto fail;
 	}
 
-	dev->trans_start = jiffies;
 	stats->tx_bytes += data_off + data_len;
 
-	netif_stop_queue(dev);
-
 	orinoco_unlock(priv);
 
 	dev_kfree_skb(skb);
@@ -2519,9 +2642,16 @@ static int orinoco_ioctl_setrate(struct 
 	int bitrate; /* 100s of kilobits */
 	int i;
 	
-	bitrate = rrq->value / 100000;
-	if (rrq->value % 100000)
-		return -EINVAL;
+	/* As the user space doesn't know our highest rate, it uses
+	 * -1 to ask us to set the highest rate - Jean II */
+	if(rrq->value == -1)
+		bitrate = 110;
+	else {
+		bitrate = rrq->value / 100000;
+printk(KERN_DEBUG "value = %d - bitrate = %d\n", rrq->value, bitrate);
+		if (rrq->value % 100000)
+			return -EINVAL;
+	}
 
 	if ( (bitrate != 10) && (bitrate != 20) &&
 	     (bitrate != 55) && (bitrate != 110) )
@@ -2560,6 +2690,13 @@ static int orinoco_ioctl_getrate(struct 
 	if ( (ratemode < 0) || (ratemode > BITRATE_TABLE_SIZE) )
 		BUG();
 
+	/* The firmware will tell us the current rate, but won't tell
+	 * us if it's automatic or not, so we need to remember what we
+	 * set - Jean II */
+	rrq->fixed = ! bitrate_table[ratemode].automatic;
+	rrq->value = bitrate_table[ratemode].bitrate * 100000;
+	rrq->disabled = 0;
+
 	if (netif_running(dev)) {
 		/* If the device is actually operation we try to find more,
 		   about the current mode */
@@ -2570,14 +2707,15 @@ static int orinoco_ioctl_getrate(struct 
 		
 		switch (priv->firmware_type) {
 		case FIRMWARE_TYPE_AGERE: /* Lucent style rate */
-			for (i = 0; i < BITRATE_TABLE_SIZE; i++)
-				if (bitrate_table[i].agere_txratectrl == val) {
-					ratemode = i;
-					break;
-				}
-			if (i >= BITRATE_TABLE_SIZE)
-				printk(KERN_INFO "%s: Unable to determine current bitrate (0x%04hx)\n",
-				       dev->name, val);
+			/* Note : in Lucent firmware, the return value of 
+			 * HERMES_RID_CURRENTTXRATE is the bitrate in Mb/s,
+			 * and therefore is totally different from the
+			 * encoding of HERMES_RID_CNFTXRATECONTROL.
+			 * Don't forget that 6Mb/s is really 5.5Mb/s */
+			if(val == 6)
+				rrq->value = 5500000;
+			else
+				rrq->value = val * 1000000;
 			break;
 		case FIRMWARE_TYPE_INTERSIL: /* Intersil style rate */
 		case FIRMWARE_TYPE_SYMBOL: /* Symbol style rate */
@@ -2589,15 +2727,12 @@ static int orinoco_ioctl_getrate(struct 
 			if (i >= BITRATE_TABLE_SIZE)
 				printk(KERN_INFO "%s: Unable to determine current bitrate (0x%04hx)\n",
 				       dev->name, val);
+			rrq->value = bitrate_table[ratemode].bitrate * 100000;
 			break;
 		default:
 			BUG();
 		}
 	}
-
-	rrq->value = bitrate_table[ratemode].bitrate * 100000;
-	rrq->fixed = ! bitrate_table[ratemode].automatic;
-	rrq->disabled = 0;
 
  out:
 	orinoco_unlock(priv);


More information about the wireless mailing list