Ok. How about adding BS and FF to the Ok set, and checking if
bad ones are less than 1% of the good ones?
diff --git a/convert.c b/convert.c
index b6b7c66..b0c7641 100644
--- a/convert.c
+++ b/convert.c
@@ -34,11 +34,22 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
stats->lf++;
continue;
}
- if ((c < 32) && (c != '\t' && c != '\033')) {
+ if (c == 127)
+ /* DEL */
stats->nonprintable++;
- continue;
+ else if (c < 32) {
+ switch (c) {
+ /* BS, HT, ESC and FF */
+ case '\b': case '\t': case '\033': case '\014':
+ stats->printable++;
+ break;
+ default:
+ stats->nonprintable++;
+ }
+
}
- stats->printable++;
+ else
+ stats->printable++;
}
}
@@ -48,7 +59,7 @@ static void gather_stats(const char *buf, unsigned long size, struct text_stat *
static int is_binary(unsigned long size, struct text_stat *stats)
{
- if (stats->nonprintable)
+ if ((stats->printable >> 7) < stats->nonprintable)
return 1;
/*
* Other heuristics? Average line length might be relevant,
-
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html