The --color option messes up UTF8 output. Compare the following: echo "útf8" | LANG=C grep --color=always -E '[ -~]' echo "útf8" | LANG=C grep --color=always -E '[^ -~]' Note those regular expressions match ascii and non ascii data respectively. The second one in particular is useful for highlighting UTF8 characters. It was pointed out to me later that '+' can be used to highlight all adjacent non ASCII bytes. I.E the following patch is useful only to minimize the number of colour codes output, and the non printable ASCII matching can be done like this for UTF-8: echo "útf8" | LANG=C grep --color=always '[^ -~]\+' --- grep.c.orig 2005-12-09 17:37:35.000000000 +0000 +++ grep.c 2005-12-12 14:36:42.000000000 +0000 @@ -130,8 +130,8 @@ /* The color strings used for matched text. The user can overwrite them using the deprecated environment variable GREP_COLOR or the new GREP_COLORS. */ -static const char *selected_match_color = "01;31"; /* bold red */ -static const char *context_match_color = "01;31"; /* bold red */ +static const char *selected_match_color = "1;31"; /* bold red */ +static const char *context_match_color = "1;31"; /* bold red */ /* Other colors. Defaults look damn good. */ static const char *filename_color = "35"; /* magenta */ @@ -788,6 +788,19 @@ const char *mid = NULL; char *buf; /* XXX */ const char *ibeg; /* XXX */ + int match_color_end_pending = 0; + +#define MATCH_COLOR_START() \ + do { if (!match_color_end_pending) { \ + PR_SGR_START_IF(match_color); \ + match_color_end_pending = 1; }\ + } while (0) + +#define MATCH_COLOR_END() \ + do { if (match_color_end_pending) { \ + PR_SGR_END_IF(match_color); \ + match_color_end_pending = 0; }\ + } while (0) if (match_icase) /* XXX - None of the -i stuff should be here. */ { @@ -839,17 +852,21 @@ cur = mid; mid = NULL; } - fwrite (cur, sizeof (char), b - cur, stdout); + if (b - cur) { + MATCH_COLOR_END(); + fwrite (cur, sizeof (char), b - cur, stdout); + } } - PR_SGR_START_IF(match_color); + MATCH_COLOR_START(); fwrite (b, sizeof (char), match_size, stdout); - PR_SGR_END_IF(match_color); - if (only_matching) + if (only_matching) { fputs("\n", stdout); + } } cur = b + match_size; } + MATCH_COLOR_END(); if (buf) free(buf); /* XXX */