Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_lock().

Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]
From: David Miller
Date: Sunday, August 17, 2008 - 3:57 pm

From: Jarek Poplawski <jarkao2@gmail.com>
Date: Thu, 14 Aug 2008 08:52:50 +0000


Here is how I propose to plug this hole.  The issue is that there is
this potential gap where both bits are clear while there is a context
that can reschedule the qdisc.

And I'm pretty sure the most desirable fix is to get rid of that gap
if it can easily be done.

It seems like it can be.  What we do in the patch below is something
like this:

1) The one code path that can leave both bits clear then reschedule
   is changed to only leave that state visible while holding the
   root qdisc's lock.

   All other code paths will not reschedule once they clear the bits.

2) dev_deactivate() unconditionally takes the root qdisc spinlock
   and just waits for both bits to clear.  This is now entirely
   sufficient to ensure that no pending runs of the qdisc remain.

Signed-off-by: David S. Miller <davem@davemloft.net>

diff --git a/net/core/dev.c b/net/core/dev.c
index 600bb23..896393d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1338,20 +1338,27 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	rcu_read_unlock();
 }
 
+/* We own the __QDISC_STATE_SCHED state bit, and know that
+ * we are the only entity which can schedule this qdisc on
+ * the output queue.
+ */
+static void __qdisc_schedule(struct Qdisc *q)
+{
+	struct softnet_data *sd;
+	unsigned long flags;
+
+	local_irq_save(flags);
+	sd = &__get_cpu_var(softnet_data);
+	q->next_sched = sd->output_queue;
+	sd->output_queue = q;
+	raise_softirq_irqoff(NET_TX_SOFTIRQ);
+	local_irq_restore(flags);
+}
 
 void __netif_schedule(struct Qdisc *q)
 {
-	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state)) {
-		struct softnet_data *sd;
-		unsigned long flags;
-
-		local_irq_save(flags);
-		sd = &__get_cpu_var(softnet_data);
-		q->next_sched = sd->output_queue;
-		sd->output_queue = q;
-		raise_softirq_irqoff(NET_TX_SOFTIRQ);
-		local_irq_restore(flags);
-	}
+	if (!test_and_set_bit(__QDISC_STATE_SCHED, &q->state))
+		__qdisc_schedule(q);
 }
 EXPORT_SYMBOL(__netif_schedule);
 
@@ -1974,15 +1981,13 @@ static void net_tx_action(struct softirq_action *h)
 
 			head = head->next_sched;
 
-			smp_mb__before_clear_bit();
-			clear_bit(__QDISC_STATE_SCHED, &q->state);
-
 			root_lock = qdisc_lock(q);
 			if (spin_trylock(root_lock)) {
+				clear_bit(__QDISC_STATE_SCHED, &q->state);
 				qdisc_run(q);
 				spin_unlock(root_lock);
 			} else {
-				__netif_schedule(q);
+				__qdisc_schedule(q);
 			}
 		}
 	}
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 4685746..d6ac170 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -647,7 +647,7 @@ static void dev_deactivate_queue(struct net_device *dev,
 	}
 }
 
-static bool some_qdisc_is_busy(struct net_device *dev, int lock)
+static bool some_qdisc_is_busy(struct net_device *dev)
 {
 	unsigned int i;
 
@@ -661,14 +661,12 @@ static bool some_qdisc_is_busy(struct net_device *dev, int lock)
 		q = dev_queue->qdisc_sleeping;
 		root_lock = qdisc_lock(q);
 
-		if (lock)
-			spin_lock_bh(root_lock);
+		spin_lock_bh(root_lock);
 
 		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
 		       test_bit(__QDISC_STATE_SCHED, &q->state));
 
-		if (lock)
-			spin_unlock_bh(root_lock);
+		spin_unlock_bh(root_lock);
 
 		if (val)
 			return true;
@@ -689,25 +687,8 @@ void dev_deactivate(struct net_device *dev)
 	synchronize_rcu();
 
 	/* Wait for outstanding qdisc_run calls. */
-	do {
-		while (some_qdisc_is_busy(dev, 0))
-			yield();
-
-		/*
-		 * Double-check inside queue lock to ensure that all effects
-		 * of the queue run are visible when we return.
-		 */
-		running = some_qdisc_is_busy(dev, 1);
-
-		/*
-		 * The running flag should never be set at this point because
-		 * we've already set dev->qdisc to noop_qdisc *inside* the same
-		 * pair of spin locks.  That is, if any qdisc_run starts after
-		 * our initial test it should see the noop_qdisc and then
-		 * clear the RUNNING bit before dropping the queue lock.  So
-		 * if it is set here then we've found a bug.
-		 */
-	} while (WARN_ON_ONCE(running));
+	while (some_qdisc_is_busy(dev))
+		yield();
 }
 
 static void dev_init_scheduler_queue(struct net_device *dev,
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Previous message: [thread] [date] [author]
Next message: [thread] [date] [author]

Messages in current thread:
[PATCH] pkt_sched: Destroy gen estimators under rtnl_lock()., Jarek Poplawski, (Mon Aug 11, 1:53 pm)
Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_l ..., David Miller, (Sun Aug 17, 3:57 pm)
Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_l ..., Denys Fedoryshchenko, (Sun Aug 17, 11:08 pm)
[PATCH] pkt_sched: Fix qdisc list locking, Jarek Poplawski, (Fri Aug 22, 1:41 am)
[PATCH take 2] pkt_sched: Fix qdisc list locking, Jarek Poplawski, (Fri Aug 22, 2:27 am)
Re: [PATCH] pkt_sched: Fix qdisc list locking, Herbert Xu, (Fri Aug 22, 3:14 am)
Re: [PATCH take 2] pkt_sched: Fix qdisc list locking, Herbert Xu, (Fri Aug 22, 3:15 am)
Re: [PATCH take 2] pkt_sched: Fix qdisc list locking, David Miller, (Fri Aug 22, 3:23 am)
Re: [PATCH take 2] pkt_sched: Fix qdisc list locking, David Miller, (Fri Aug 22, 3:28 am)
Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_l ..., Stephen Hemminger, (Sun Aug 24, 4:26 pm)
Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_l ..., Stephen Hemminger, (Sun Aug 24, 5:29 pm)
Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_l ..., Stephen Hemminger, (Tue Aug 26, 5:24 am)
Re: [PATCH] pkt_sched: Destroy gen estimators under rtnl_l ..., Stephen Hemminger, (Tue Aug 26, 5:50 am)
RE: [PATCH take 2] pkt_sched: Fix qdisc_watchdog() vs. dev ..., Duyck, Alexander H, (Mon Sep 15, 4:44 pm)