Hi!
This version should work, at least it did not fail in my testing. It
does one test 5 second after bootup, then periodically once per 500
seconds. Different parameters are trivial to tweak.
Signed-off-by: Pavel Machek <pavel@suse.cz>
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 29cf145..5bbdb70 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -78,7 +78,7 @@ static inline int is_intr(u8 rtc_intr)
/*----------------------------------------------------------------*/
-static int cmos_read_time(struct device *dev, struct rtc_time *t)
+int cmos_read_time(struct device *dev, struct rtc_time *t)
{
/* REVISIT: if the clock has a "century" register, use
* that instead of the heuristic in get_rtc_time().
@@ -170,7 +170,7 @@ static int cmos_read_alarm(struct device
return 0;
}
-static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
{
struct cmos_rtc *cmos = dev_get_drvdata(dev);
unsigned char mon, mday, hrs, min, sec;
@@ -394,6 +394,33 @@ static const struct rtc_class_ops cmos_r
/*----------------------------------------------------------------*/
static struct cmos_rtc cmos_rtc;
+static struct device *pc_rtc_device;
+
+int set_alarm(int length)
+{
+ ssize_t retval;
+ unsigned long now, alarm;
+ struct rtc_wkalrm alm;
+
+ retval = cmos_read_time(pc_rtc_device, &alm.time);
+ if (retval < 0) {
+ printk("Auto sleep: can't get time?\n");
+ return retval;
+ }
+ rtc_tm_to_time(&alm.time, &now);
+ printk("Auto sleep: Now %ld\n", now);
+
+ alarm = now+length;
+ rtc_time_to_tm(alarm, &alm.time);
+
+ retval = cmos_set_alarm(pc_rtc_device, &alm);
+ if (retval < 0) {
+ printk("Auto sleep: can't set alarm.\n");
+ return retval;
+ }
+ printk("Auto sleep: Alarm set\n");
+ return 0;
+}
static irqreturn_t cmos_interrupt(int irq, void *p)
{
@@ -431,6 +458,8 @@ cmos_do_probe(struct device *dev, struct
if (cmos_rtc.dev)
...Not needed, but I do not know where the nohz problem was. Waiting 5 seconds means userspace comes up and problem is somehow worked around. The nohz problem was of the "you need to press keys to get forward progress" -- the kind I tried to debug few times already :-(. Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
the x86.git qa mix produced this spontaneous reboot failure: [ 18.387888] Calling initcall 0xC0135EA8: test_sleep+0x0/0x1c() [ 18.394189] Auto sleep: Now 1201724894 [ 18.395892] BUG: unable to handle kernel NULL pointer dereference at 00000160 [ 18.405792] IP: [<c0501b1b>] cmos_set_alarm+0xb/0x201 [ 18.411891] *pde = 00000000 [ spontaneous reboot ] config attached. Do you need any more info than this? Should i try to debug this? Ingo
Is it reproducible, or it just happened randomly once during shutdown?
Can you try this? I forgot that you can rmmod rtc-cmos, too...
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c.
Pavel
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 5bbdb70..15050cd 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -402,6 +402,8 @@ int set_alarm(int length)
unsigned long now, alarm;
struct rtc_wkalrm alm;
+ if (!pc_rtc_device)
+ return -EFAULT;
retval = cmos_read_time(pc_rtc_device, &alm.time);
if (retval < 0) {
printk("Auto sleep: can't get time?\n");
@@ -590,6 +592,7 @@ static void __exit cmos_do_remove(struct
struct cmos_rtc *cmos = dev_get_drvdata(dev);
struct resource *ports;
+ pc_rtc_device = NULL;
cmos_do_shutdown();
if (is_valid_irq(cmos->irq))
diff --git a/kernel/power/sleepy.c b/kernel/power/sleepy.c
index b8b2de3..222d22d 100644
--- a/kernel/power/sleepy.c
+++ b/kernel/power/sleepy.c
@@ -31,7 +31,8 @@ int ksleepyd(void *data)
{
msleep(5000);
while (1) {
- set_alarm(5);
+ if (set_alarm(5))
+ return -EFAULT;
pm_suspend(PM_SUSPEND_MEM);
msleep(500000);
}
--
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html
--
could you send me a clean patch against mainline please? The above chunk didnt apply because there's no ksleepyd (only test_sleep()). Thanks, Ingo --
Ok, here's full patch. Good luck,
Pavel
diff --git a/drivers/rtc/rtc-cmos.c b/drivers/rtc/rtc-cmos.c
index 29cf145..15050cd 100644
--- a/drivers/rtc/rtc-cmos.c
+++ b/drivers/rtc/rtc-cmos.c
@@ -78,7 +78,7 @@ static inline int is_intr(u8 rtc_intr)
/*----------------------------------------------------------------*/
-static int cmos_read_time(struct device *dev, struct rtc_time *t)
+int cmos_read_time(struct device *dev, struct rtc_time *t)
{
/* REVISIT: if the clock has a "century" register, use
* that instead of the heuristic in get_rtc_time().
@@ -170,7 +170,7 @@ static int cmos_read_alarm(struct device
return 0;
}
-static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
{
struct cmos_rtc *cmos = dev_get_drvdata(dev);
unsigned char mon, mday, hrs, min, sec;
@@ -394,6 +394,35 @@ static const struct rtc_class_ops cmos_r
/*----------------------------------------------------------------*/
static struct cmos_rtc cmos_rtc;
+static struct device *pc_rtc_device;
+
+int set_alarm(int length)
+{
+ ssize_t retval;
+ unsigned long now, alarm;
+ struct rtc_wkalrm alm;
+
+ if (!pc_rtc_device)
+ return -EFAULT;
+ retval = cmos_read_time(pc_rtc_device, &alm.time);
+ if (retval < 0) {
+ printk("Auto sleep: can't get time?\n");
+ return retval;
+ }
+ rtc_tm_to_time(&alm.time, &now);
+ printk("Auto sleep: Now %ld\n", now);
+
+ alarm = now+length;
+ rtc_time_to_tm(alarm, &alm.time);
+
+ retval = cmos_set_alarm(pc_rtc_device, &alm);
+ if (retval < 0) {
+ printk("Auto sleep: can't set alarm.\n");
+ return retval;
+ }
+ printk("Auto sleep: Alarm set\n");
+ return 0;
+}
static irqreturn_t cmos_interrupt(int irq, void *p)
{
@@ -431,6 +460,8 @@ cmos_do_probe(struct device *dev, struct
if (cmos_rtc.dev)
return -EBUSY;
+ pc_rtc_device = dev;
+
if (!ports)
return -ENODEV;
@@ -546,7 +577,7 @@ cleanup0:
static void ...thanks. Find below it rebased against latest -git. (there was a trivial
merge reject in power/Kconfig)
Ingo
---
drivers/rtc/rtc-cmos.c | 38 ++++++++++++++++++++++++++++++++++---
kernel/power/Kconfig | 10 +++++++--
kernel/power/Makefile | 2 -
kernel/power/sleepy.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 94 insertions(+), 6 deletions(-)
Index: linux/drivers/rtc/rtc-cmos.c
===================================================================
--- linux.orig/drivers/rtc/rtc-cmos.c
+++ linux/drivers/rtc/rtc-cmos.c
@@ -78,7 +78,7 @@ static inline int is_intr(u8 rtc_intr)
/*----------------------------------------------------------------*/
-static int cmos_read_time(struct device *dev, struct rtc_time *t)
+int cmos_read_time(struct device *dev, struct rtc_time *t)
{
/* REVISIT: if the clock has a "century" register, use
* that instead of the heuristic in get_rtc_time().
@@ -170,7 +170,7 @@ static int cmos_read_alarm(struct device
return 0;
}
-static int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
+int cmos_set_alarm(struct device *dev, struct rtc_wkalrm *t)
{
struct cmos_rtc *cmos = dev_get_drvdata(dev);
unsigned char mon, mday, hrs, min, sec;
@@ -394,6 +394,35 @@ static const struct rtc_class_ops cmos_r
/*----------------------------------------------------------------*/
static struct cmos_rtc cmos_rtc;
+static struct device *pc_rtc_device;
+
+int set_alarm(int length)
+{
+ ssize_t retval;
+ unsigned long now, alarm;
+ struct rtc_wkalrm alm;
+
+ if (!pc_rtc_device)
+ return -EFAULT;
+ retval = cmos_read_time(pc_rtc_device, &alm.time);
+ if (retval < 0) {
+ printk("Auto sleep: can't get time?\n");
+ return retval;
+ }
+ rtc_tm_to_time(&alm.time, &now);
+ printk("Auto sleep: Now %ld\n", now);
+
+ alarm = now+length;
+ rtc_time_to_tm(alarm, &alm.time);
+
+ retval = cmos_set_alarm(pc_rtc_device, &alm);
+ if (retval < 0) {
+ printk("Auto sleep: can't set ...got this link failure: kernel/built-in.o: In function `ksleepyd': : undefined reference to `set_alarm' with the attached config. Perhaps make it dependent on whatever RTC infrastructure it relies on? (even if this limits its utility) Ingo
You should be using the standard RTC library calls, exported from drivers/rtc/interface.c ... and making sure this mechanism will work with any wakeup-capable RTC. Otherwise you'll end being needlessly x86-specific, or reinventing those calls. Plus, the way you're doing it now is violating the locking protocol used by that driver. - Dave --
Yep, you are right, but that is the easy issue to fix. There's hard issue: I need struct rtc_device *rtc for the rtc that can be used for system resume, and I'd like to get it without violating too many layers. How to do that? Ideally, I need set_alarm(int) ...that will magically pick the right rtc device to talk to, and set alarm on it. I don't see how to implement it with current code. Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
i'd really love to have a /dev/rtc device compatibility APIs, both inside and outside the kernel. I really dont know why the new RTC code does not do it - why does it put up artificial anti-adoption barriers to make it harder to migrate to the new code? Ingo --
Unfortunately the /dev/rtc code became a legacy API for good reasons. Like not recognizing that all the world's not a PC, with a single RTC that clones a long-obsolete chip from Motorola ... and not having been specified in a hardware-neutral manner. Oh, and of course not all systems actually used the same RTC driver anyway; it's not like there What "anti-adoption" barriers would those be? Pavel's immediate problem was easy to solve, though he didn't know it. From the perspective of almost anyone not using a PC, all the adoption barriers were on the side of the /dev/rtc code. - Dave --
i dont get it - please give me specific technological reasons why on my PC /dev/rtc couldnt be mapped to /dev/rtc0 - without requiring any user-space changes. The APIs seem mostly covered, or at least mappable. Why should the transition to a new driver require user-level changes? (beyond the obvious extensions, but those should show up as extensions.) In fact i detest the old RTC code with a vengence, so dont understand this as some invitation to flame or something - i simply want YOUR new code to be utilized more! I just dont see the specific technological reasons of why there is no .config switch to switch the legacy /dev/rtc over to the new RTC driver and be done with it. I'd enable it in a heartbeat and would encourage distros to do so. Are there missing APIs? Is the ioctl API totally different? It's impossible to wrap it? I'm not really interested in "this isnt a PC" arguments. The incompatibility is such an obvious migration barrier to me - do you really not see it? Ingo --
So far as I'm aware, the only issue visible to userspace relate to the legacy driver's use of "/dev/rtc" not "/dev/rtc0" ... which has previously been "solved" by symlinking "rtc" -> "rtc0", possibly with assistance from udev. (Related: the major/minor number of /dev/rtc.) Is that your understanding too? The "why" is that nobody has been sufficiently bothered by the need Good to know! :) But so you're clear ... not "my" code, mostly. Alessandro Zummo started this framework, based in part on Russell King's framework for RTCs that were integrated into ARM based SOCs. I contributed a bunch, including Initially: because that idea hadn't been suggested. And because that sort of code migration on PC hardware needs to be done slowly enough that the migration issues have a real chance to surface. Issues like: - Change to the ACPI suspend/resume interactions broke RTC wakeup for a couple releases in the new RTC framework; now fixed. - HPET stuff. I think the recently merged HPET update may imply that rtc-cmos needs an HPET hook, but I've not looked at details. - Minor bugfixes, which have been resolved over time. - Your desire to keep using old /dev/rtc nodes (no symlink, so new kernels and old non-udev fileystems can mix). New issue, no patch. See above. Once the HPET thing is resolved, I think distros can Let's just say that all my PCs run the new code just fine, and not all of them have /dev/rtc symlinked to /dev/rtc0 ... but I can very easily imagine it look bit different from a distro perpective. - Dave --
Which is why I was puzzled that you didn't start out doing it
the "right" way ... even just hard-wiring the dubious assumption
Well, "rtc which (a) has an alarm, (b) may be used for system wakeup".
Assuming there *is* such an RTC ... fortunately, there will usually be
Easy enough. Given an RTC device node, you can tell whether it has
no chance at all to meet those requirements:
static int has_wakealarm(struct device *dev, void *name_ptr)
{
struct rtc_device *rtc = to_rtc_device(dev);
/* (a) has an alarm */
if (!rtc->op->set_alarm)
return 0;
/* (b) may be used for system wakeup */
if (device_may_wakeup(dev->parent))
return 0;
*(char **)name_ptr = rtc->name;
return 1;
}
Then there's a new class interface you may not have known about,
since it's been merged barely over a week now. You can use it
to find the name of the rtc to pass to rtc_open():
static char *find_wake_rtc(void)
{
char *pony = NULL;
dev = class_find_device(rtc_class, &rtc, has_wakealarm);
return pony;
}
On most PCs that will return the name "rtc0" ... and it'll do
the same on most of the other systems I have. On both of the
two-RTC systems I have that will return "rtc1". On systems
with no wakealarm-enabled RTC, that will return NULL.
Voila!
- Dave
--
because this was mostly about an quick & easy hack to see whether it makes sense at all to automate the testing of suspend/resume. (People who want to use RTC functionality might not have the desire to get into extending it straight away - they just want something rather obvious done and that's it.) Ingo --
I think you should have written "quick and dirty". ;) It would have been easier to just use the public interface and hard-wire "rtc0". But going directly to the hardware was dirtier, and more in the spirit of "hack that obviously shouldn't go upstream until it gets done properly". - Dave --
Yes, it was "quick and dirty". And I do not think it is going upstream in this form...? Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
which would be a pity - this thing _almost_ started doing suspend and resume cycles on my testsystems, all by itself :-) Ingo --
...which is good reason for me to create cleaned-up patch, right? wakeup in .c now works, so I actually have time to do it now. Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
OK, here's a version that's cleaner and suspends. Resuming ... another story, it's currently broken on this ARM board (no relationship to this testing code). Note to Pavel: yes, the version with the pony *is* better. - Dave ============ CUT HERE Updated verion of Pavel's "sleepy" test. This version doesn't depend on x86-specific RTC support, or suspend-to-RAM capability, and describes itself a bit better. Partially tested on an ARM board ... for some reason system sleep states are currently broken on that system (new behavior since 2.6.24), so this won't yet complete. Note a generic glitch: the message strings aren't in the same section as the function using them, so this has a bigger runtime footprint than it should. --- kernel/power/Kconfig | 14 +++++ kernel/power/Makefile | 2 kernel/power/sleepy.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 126 insertions(+), 3 deletions(-) --- sam9.orig/kernel/power/Kconfig 2008-02-02 03:16:30.000000000 -0800 +++ sam9/kernel/power/Kconfig 2008-02-02 12:47:20.000000000 -0800 @@ -1,3 +1,6 @@ +# +# Note that power management involves more than just system sleep states... +# config PM bool "Power Management support" depends on !IA64_HP_SIM @@ -74,8 +77,8 @@ config PM_TRACE_RTC RTC across reboots, so that you can debug a machine that just hangs during suspend (or more commonly, during resume). - To use this debugging feature you should attempt to suspend the machine, - then reboot it, then run + To use this debugging feature you should attempt to suspend the + machine, then reboot it, then run dmesg -s 1000000 | grep 'hash matches' @@ -104,6 +107,13 @@ config SUSPEND powered and thus its contents are preserved, such as the suspend-to-RAM state (e.g. the ACPI S3 state). +config PM_WAKEALARM_TEST + bool "Test suspend/resume and wakealarm during bootup" + depends on SUSPEND && PM_DEBUG && RTC_LIB + ---help--- + This option will suspend your ...
yay! Threw this into my setup. It built fine with the new option disabled and enabled as well. Unfortunately it said this: [ 23.509562] Calling initcall 0xc0c49e00: be_sleepy+0x0/0x170() [ 23.515837] PM: no wakelarm-capable RTC [ 23.517562] initcall 0xc0c49e00: be_sleepy+0x0/0x170() returned 0. (oh, btw., a small typo: s/wakelarm/wakealarm/) is "wakealarm" something generally available on PC RTCs? I'll try to look into the BIOS setup, maybe it's just disabled ... Ingo --
ok, there was indeed a "RTC Alarm: disabled" option in the BIOS. But no matter how many of those resume-alarm options i enabled, the kernel thinks there's no time capability: [ 23.541565] Calling initcall 0xc0c49e00: be_sleepy+0x0/0x170() [ 23.547840] PM: no wakelarm-capable RTC [ 23.549566] initcall 0xc0c49e00: be_sleepy+0x0/0x170() returned 0. i could set date/time of wakeup alarm in the BIOS, so i suspect the chip itself is capable of it. Bootlog and kernel config attached. (maybe i misconfigured something?) Ingo
i didnt have all RTC drivers enabled (it was a randconfig .config i started out with) - but this did not appear to cure the problem. New bootlog and new config attached. Ingo
i disabled all hpet items in the .config on the theory that they might interfere - but that didnt change the end result: [ 23.893598] Calling initcall 0xc0c518b0: be_sleepy+0x0/0x170() [ 23.901601] PM: no wakelarm-capable RTC [ 23.905599] initcall 0xc0c518b0: be_sleepy+0x0/0x170() returned 0. [ 23.910879] initcall 0xc0c518b0 ran for 3 msecs: be_sleepy+0x0/0x170() so close, yet so far away :-) i also disabled ACPI in the .config, which caused this: [ 13.300838] Calling initcall 0xc0c32370: cmos_init+0x0/0x20() [ 13.307063] initcall 0xc0c32370: cmos_init+0x0/0x20() returned -19. [ 13.312840] initcall 0xc0c32370 ran for 0 msecs: cmos_init+0x0/0x20() is that expected? btw., small observation: from looking at the bootlog it's not apparent to me what kind of "RTC hardware" there is on this box. It's standard PC stuff, so i suspect what covers it is: CONFIG_RTC_DRV_CMOS=y, which says: [ 14.900938] Calling initcall 0xc0c6f9c0: cmos_init+0x0/0x10() [ 14.909178] pnp: the driver 'rtc_cmos' has been registered [ 14.912945] rtc_cmos 00:04: disabling not supported [ 14.916940] rtc_cmos: probe of 00:04 failed with error -16 [ 14.920959] initcall 0xc0c6f9c0: cmos_init+0x0/0x10() returned 0. [ 14.926220] initcall 0xc0c6f9c0 ran for 11 msecs: cmos_init+0x0/0x10() it might make sense to emit something like: PC-style RTC detected. so that people know that your cool new RTC code is in action. Also, it's not apparent what the effects of that failure message is. When we fail something then users generally want to know: was it fatal, is it a warning, and what the limitations of that error condition are. Ingo --
good news: the suspend+resume self-test works perfectly now! :-) [ 24.018541] Calling initcall 0xc0c70a40: be_sleepy+0x0/0x170() [ 24.023780] PM: wakealarm test with Suspend-to-RAM [ 24.027503] PM: Syncing filesystems ... done. [ 24.032177] PM: Preparing system for mem sleep [ .........] [ 30.317160] initcall 0xc0c70a40: be_sleepy+0x0/0x170() returned 0. [ 30.321593] initcall 0xc0c70a40 ran for 6289 msecs: be_sleepy+0x0/0x170() that's a successful suspend+resume cycle, all automatic! now, this was _totally_ non-obvious to set up. (config attached) I had the (surely incorrect) impression that the new RTC code does everything it can to keep itself not used ;-) It did not complain that it's not the default RTC driver, and it did not complain that it's configured in a way that makes it functionally inferior. Please dont be that shy and give us a really prominent option that does something like: CONFIG_USE_THE_COOL_NEW_RTC_CODE=y with all its bells and whistles, and punch out the old drivers/char/rtc.c RTC driver if this option is enabled. Believe me, people _want_ to run your code on all the PC distros too. (and as a side-effect all the non-PC platforms are helped too) Small feature request: could you please measure the duration suspend+resume cycle and print out the result to the kernel console like this: INFO: suspend+resume test succeeded and took 1.289 seconds. such a message would be useful in flagging suspend+resume delay regressions. (and to automate the bisection of such regressions, etc.) (Please keep the millioseconds portion too - that way i can follow the finer details of suspend+resume performance over time as well.) For example recently someone introduced a 5-10 seconds delay into the suspend+resume cycle (yeah, that bloke was me), so it would also be nice to add some hard limit on how much it takes: WARNING: suspend+resume test took more than 5 seconds. and print a stack trace via WARN_ON(1), so that ...
randconfig testing found the following build failure: kernel/built-in.o: In function `be_sleepy': sleepy.c:(.init.text+0x1952): undefined reference to `rtc_class' sleepy.c:(.init.text+0x1963): undefined reference to `rtc_class_open' config attached. Ingo
should depend on RTC_LIB=y ... which will prevent that build error, but would still allow the RTC to live in a module. Updated patch appended ... it should address most config issues. - Dave ====== CUT HERE Updated verion of Pavel's "sleepy" test. This version doesn't depend on x86-specific RTC support, or suspend-to-RAM capability, and describes itself a bit better. Partially tested on an ARM board ... for some reason system sleep states are currently broken on that system (new behavior since 2.6.24), so this won't yet complete. Note a generic glitch: the message strings aren't in the same section as the function using them, so this has a bigger runtime footprint than it should ... --- drivers/char/Kconfig | 5 +- drivers/rtc/Kconfig | 1 kernel/power/Kconfig | 17 ++++++- kernel/power/Makefile | 2 kernel/power/sleepy.c | 113 ++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 134 insertions(+), 4 deletions(-) --- g26.orig/drivers/char/Kconfig 2008-02-02 22:42:15.000000000 -0800 +++ g26/drivers/char/Kconfig 2008-02-02 22:51:53.000000000 -0800 @@ -715,9 +715,12 @@ config NVRAM To compile this driver as a module, choose M here: the module will be called nvram. +comment "You are using the RTC framework, not the legacy CMOS RTC driver" + depends on RTC_DRV_CMOS + config RTC tristate "Enhanced Real Time Clock Support" - depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV && !ARM && !SUPERH && !S390 + depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV && !ARM && !SUPERH && !S390 && !RTC_DRV_CMOS ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you --- g26.orig/drivers/rtc/Kconfig 2008-02-02 22:37:28.000000000 -0800 +++ g26/drivers/rtc/Kconfig 2008-02-02 22:38:23.000000000 -0800 @@ -281,6 +281,7 @@ comment "Platform RTC drivers" config RTC_DRV_CMOS tristate "PC-style ...
I guess it also should depend on CONFIG_RTC_DRV_CMOS (not being a module) -- "Premature optimization is the root of all evil." - Donald Knuth --
No -- we need a *generic* test, not one that's needlessly coupled
to PC hardware. Coping with the legacy RTC driver headache is a
different issue. And ISTR that initramfs/initrd should let modules
get loaded before late_initcall... yes?
See the appended; it includes more of Ingo's suggestions.
Since this is increasingly unrelated to the "sleepy linux" concept
(a version of what systems like OLPC, N700, and N800 are doing), I
got rid of the "sleepy.c" file.
- Dave
============ CUT HERE
Boot-time test for system suspend states (STR or standby). The generic
RTC framework triggers wakeup alarms, used to exit those states.
- Measures some aspects of suspend time; uses "jiffies". This
should probably use a clocksource instead, since those often
work properly even while IRQs are disabled.
- Includes a command line parameter, which needs work yet ... it
currently turns this test off, but it should also let the target
state be specified (and maybe even default to "no test").
Lightly tested on an ARM system, which reported that suspending devices
took 7 msec and resuming them took 132 msec:
* The PCMCIA stack misbehaved a bit. It didn't finish enumerating
the card before it suspended, so the wakeup event came from the
CF card IRQ not from the RTC!
* The MMC stack misbehaved more seriously. It wants to remove devices
during the suspend sequence (quite needlessly, on this hardware),
which now makes Linux unhappy.
Workaround in both cases was to take the memory card out before booting.
Also includes some Kconfig tweaks to help reduce configuration bugs on
x86, by avoiding the legacy RTC driver when the generic RTC framework
is enabled ... those should become a separate patch.
---
drivers/char/Kconfig | 5 +
drivers/rtc/Kconfig | 1
kernel/power/Kconfig | 10 +++
kernel/power/main.c | 163 +++++++++++++++++++++++++++++++++++++++++++++++++++
4 files changed, 178 insertions(+), 1 deletion(-)
--- ...The changes look good to me. Well, it would be nice to have this feature in as soon as reasonably possible, so that people can include suspend tests in the automated testing. Thanks, --
They feel unfinished to me though. :) Like using "jiffies" instead of a clocksource, which makes trouble since the timing covers periods with IRQs disabled. And the test Except ... "rtcwake" (from util-linux-ng) already supports such testing, albeit from userspace. But not the timing tests. What was the rationale for wanting this done in-kernel? (Other than to know it can work portably.) - Dave --
Ingo wants it for automated testing not involving user space modifications. Thanks, Rafael --
Well, I'd say that timing has bigger problem, right? It is set alarm suspend system | poweroff alarm expires system resumes Ingo has to answer this one... Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
There's no "poweroff" step when entering STR or STANDBY!
But more specifically, I avoided that issue by comparing times between
(a) start and end of the "suspend devices" steps;
(b) start and end of the "resume devices" steps.
Example output, with the relevant lines highlighted by "*":
PM: test RTC wakeup from 'mem' suspend
PM: Syncing filesystems ... done.
PM: Preparing system for mem sleep
Freezing user space processes ... (elapsed 0.00 seconds) done.
Freezing remaining freezable tasks ... (elapsed 0.00 seconds) done.
PM: Entering mem sleep
Suspending console(s)
* PM: suspend devices took 0.000 seconds
GPIO-A may wake for 00080000
GPIO-C may wake for 00000008
GPIO-D may wake for 00000020
AT91: PM - wake mask 00000036, pm state 3
AT91: PM - no slow clock mode yet ...
AT91: PM - wakeup 00000002
* PM: resume devices took 0.132 seconds
PM: Finishing wakeup.
Restarting tasks ... done.
The underlying clocksource has resolution of 32 KiHz, while HZ=128;
the "suspend" more typically reports 7 msec. And there should be a
few more wakeup GPIOs, except I seem to not have enabled gpio_keys.
That "wakeup 00000002" means the heavily-overloaded "system" IRQ
woke the system ... the RTC is on that IRQ line.
- Dave
--
Aha, that should work, yes. Sorry for noise. Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
What was the decision here? Ingo, did you merge this for 2.6.26, or just for your test farm? -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
it's working fine here, i've had it in my test-setup for a long time,
and it triggers every now and then.
We should merge the patch below. My only gripe that should be solved in
an add-on patch is that right now it triggers with an about 1:100 chance
in randconfig tests. The reason is the insane amount of user-selected
options to get this vital functionality done. Could we _PLEASE_ get a
prominent "new, cool RTC driver stuff" config option that just select's
this cannot really go via x86.git.
Linus, could you please apply the debug patch below? It's really useful
and it is catching suspend/resume bugs as well.
Ingo
------------>
Subject: suspend/resume self-test
From: David Brownell <david-b@pacbell.net>
Boot-time test for system suspend states (STR or standby). The generic
RTC framework triggers wakeup alarms, used to exit those states.
- Measures some aspects of suspend time; uses "jiffies". This
should probably use a clocksource instead, since those often
work properly even while IRQs are disabled.
- Includes a command line parameter, which needs work yet ... it
currently turns this test off, but it should also let the target
state be specified (and maybe even default to "no test").
Lightly tested on an ARM system, which reported that suspending devices
took 7 msec and resuming them took 132 msec:
* The PCMCIA stack misbehaved a bit. It didn't finish enumerating
the card before it suspended, so the wakeup event came from the
CF card IRQ not from the RTC!
* The MMC stack misbehaved more seriously. It wants to remove devices
during the suspend sequence (quite needlessly, on this hardware),
which now makes Linux unhappy.
Workaround in both cases was to take the memory card out before booting.
Also includes some Kconfig tweaks to help reduce configuration bugs on
x86, by avoiding the legacy RTC driver when the generic RTC framework
is enabled ... those should become a separate ...-- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
I think "no test" should be the default; STR working sanely on x86 is unfortunately too much a surprise. Someone more active That's already been split out in a separate patch, now in MM. Though it may deserve another tweak, forcing off *all* legacy RTC drivers if the new framework is enabled. --
Well, out of 4 boxes I have, one doesn't resume from suspend, but this is the Yes, the legacy drivers shoule depend on !(new framework). Thanks, Rafael --
All i'm asking for is to make the self-test easily accessible. Not for
it to blow up in the face of users who do not ask for it.
And, at least to me, there seems to be a rather apparent correlation
between "suspend/resume regressions caught as early as possible" and the
future, desired state of: "STR working sanely on x86" ;-)
You really seem to treat S2R suckiness as a fact of life, but it isnt.
Yes, it's a hard field for a number of reasons, but we could be doing _a
lot_ better. One of them would be this "notice s2r breakage when i
create or add the patch that breaks it" angle.
Ingo
--
I'm all for that, but also I don't want to see it blow up regularly in the face of people who just enable all the selftest options. The Thing is, this will catch not just regressions ... but cases where STR never worked in the first place. Video problems, etc. Also various system startup races, as in the PCMCIA and MMC/SD/SDIO Until it starts working on a given platform, it *IS* a fact of life. Once it works, then it's fair to enter "no regressions" mode. Despite all the recent improvements, I think it's unwise to pretend that STR Right, and the best way to ensure that it's only *regressions* that break things is to expect someone to have configured the kernel command line appropriately (in grub or whatever). Another way to achieve that is to include the test code based on one config option, and change the test *mode* based on another one. That way a distro could include that in standard kernels with "no test" mode as the default, but it would be easy to enable only for oneshot tests or field troubleshooting ... while developers could turn on the more dangerous "always test STR" (or standby, or hibernate) mode, if they were helping to find and fix problems surfaced by such tests. - Dave --
David is right here. At minimum, s2ram needs acpi_sleep=... options to tell it how to set up the video. That is not issue for you, but it means we should not be doing it by default. Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
nowhere did i suggest that it should be default-enabled ... i just suggest the obvious: please make it a trivial, 2 seconds flip of a switch, instead of a 60 minutes hunting which i had to do to enable the S2RAM self-test: which also meant i had to write a few patches to print out what the code does and why it does not work, to finally enable this critical test infrastructure. damn, with that kind of attitude towards people who _want to help_, no wonder s2ram still sucks ;-) Ingo --
The patch you asked to be merged *DID* have it be default-enabled! So you did more than just "suggest"... if that option is enabled in Kconfig, this test is always forced on and it will cause failures on systems where STR has never worked. The patch comments even pointed out that flaw. --
i think you dont understand what "default enabled" means in this context. Default enabled means the Kconfig entry has a "default y" line - which it did not have in the patch i sent. if a tester enables the .config option then it is very much expected to be triggered, just like the other .config debug options. Of course, the tester specifically enabled it, so that's what we want to happen. Not some other "yes_i_really_meant_it" boot option ... There can also be _another_ .config option that turns off the 'run by default' property - but if someone selects the _default off_ feature and enables it, just like i did, then the feature is very much expected to run. you really seem to have deep misunderstandings about how testers use Linux, what it takes to build up a proper debug infrastructure and how to utilize all these things to make Linux better. You come from the embedded world where everything is configured together specifically, where people know what they are using and were every feature is well accounted for. But you seem to have little insight into how distributions utilize kernel features and what it takes to accept the help of testers who dont know the field that well but still want to help out - as long as we let them. These are well-established practices we use in all other debug features that help Linux today, and what you are asking for is weird, unusual and wrong. Please take a look at how all the other debug stuff works. Rule #1: testers dont have to beg the kernel for 1 hour and have to write 2 patches to let the test code be run finally ... ;-) This isnt even about any nuances where reasonable people might disagree, this is really basic testing-101 stuff. Ingo --
a simple .config flag is perfectly fine for that, as long as it's default disabled and properly demarked. We have literally _dozens_ of "dangerous" test options and _nobody_ complains about them being dangerous ... They do their primary job of triggering bugs sooner, faster and harder, resulting in bugs getting fixed sooner, faster and no distro would enable this option, it just adds a needless 5-6 seconds delay to the bootup, and a needless "s2ram blows up sooner than it should" risk. _I_ want to enable this option, and want to see it trigger more often than just once out of a hundred randconfig setups. really, you are making rookie mistakes in this area and you are doing injustice to the code you wrote and maintain :-) As i said it before, externally it looks like as if you intentionally avoided your code from being used, from people who _want_ to use your code. _I_ had to fight for almost an hour (!) until i figured out the zillions of .config variants that were finally able to get my test-system to boot-time suspend and resume all by itself. It's totally non-obvious. As far as the general Linux community goes, it's almost as if your code did not even exist, so well hidden and obscured it is. Ingo --
In fact, it should go through the ACPI tree. Thanks, --
Good. What's next here? Will you just merge it to -x86, meaning it is planned for 2.6.26? Pavel -- (english) http://www.livejournal.com/~pavelmachek (cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html --
Because CONFIG_RTC_DRV_CMOS was not configured, though Then later you had that enabled, but you also had the So that was the driver which succesfully bound to the RTC (since it's probed first) and prevented the more generic code from kicking in. Nobody has yet submitted a patch to help arbitrate between the legacy and "newfangled" RTC drivers. The general "how to Kconfig" policy has been to expect a lot (too much?) from users, but this type of annoyance is too common. Maybe we need something to more actively avoid the "two drivers" on PC hardware ... like the appended. (The "comment" is there as a reminder to folk who still look to that menu for RTC stuff.) - Dave --- g26.orig/drivers/char/Kconfig +++ g26/drivers/char/Kconfig @@ -715,9 +715,12 @@ config NVRAM To compile this driver as a module, choose M here: the module will be called nvram. +comment "You are using the RTC framework, not the legacy CMOS RTC driver" + depends on RTC_DRV_CMOS + config RTC tristate "Enhanced Real Time Clock Support" - depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV && !ARM && !SUPERH && !S390 + depends on !PPC && !PARISC && !IA64 && !M68K && !SPARC && !FRV && !ARM && !SUPERH && !S390 && !RTC_DRV_CMOS ---help--- If you say Y here and create a character special file /dev/rtc with major number 10 and minor number 135 using mknod ("man mknod"), you --- g26.orig/drivers/rtc/Kconfig +++ g26/drivers/rtc/Kconfig @@ -281,6 +281,7 @@ comment "Platform RTC drivers" config RTC_DRV_CMOS tristate "PC-style 'CMOS'" depends on X86 || ALPHA || ARM || M32R || ATARI || PPC || MIPS + default y if X86 help Say "yes" here to get direct support for the real time clock found in every PC or ACPI-based system, and some other boards. --
This looks like a prime candidate for a HAVE_RTC config variable So you do: config RTC tristate "Enhanced Real Time Clock Support" + depends on HAVE_RTC && !RTC_DEV_CMOS config HAVE_RTC bool (No "depends on" allowed!) And then the architectures that do support the RTC Framework do an: (arch/x86/Kconfig as an example): config X86 + select HAVE_RTC So when PPC suddenly supports it they add this single select statement and no more Kconfig magic needed. See Documentation/kbuild/kconfig-language.txt for a bit more elaborate description. Sam --
In drivers/rtc/Kconfig there's *currently* a list of about three dozen (!) different RTC drivers ... some of which support quite a few different variants. I'd be glad to see someone clean up the config mess with respect to systems using MC146818 compatible chips, but that shouldn't begin by assuming the numerous non-MC146818 chips are somehow not real RTCs. ;) - Dave --
