From 006dfe4cd4dfc2c31ee4a21b6754dbbeb99ef407 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Tue, 14 Oct 2025 21:06:03 +0100 Subject: [PATCH 2/6] numfmt: support reading numbers with grouping characters This does not validate grouping character placement, and currently just ignores grouping characters. * src/numfmt.c (simple_strtod_int): Skip grouping chars that are part of a number. * tests/misc/numfmt.pl: Add test cases. * NEWS: Mention the improvement. --- NEWS | 3 ++- src/numfmt.c | 14 +++++++++++++- tests/misc/numfmt.pl | 16 ++++++++++++++-- 3 files changed, 29 insertions(+), 4 deletions(-) diff --git a/NEWS b/NEWS index 40d443942..a07fe298c 100644 --- a/NEWS +++ b/NEWS @@ -35,7 +35,8 @@ GNU coreutils NEWS -*- outline -*- ** Improvements - numfmt now parses numbers with a non-breaking space character before a unit. + numfmt now parses numbers with a non-breaking space character before a unit, + and numbers containing grouping characters from the current locale. wc -l now operates 10% faster on hosts that support AVX512 instructions. diff --git a/src/numfmt.c b/src/numfmt.c index fbf104b51..1a744770f 100644 --- a/src/numfmt.c +++ b/src/numfmt.c @@ -207,6 +207,8 @@ static bool debug; /* will be set according to the current locale. */ static char const *decimal_point; static int decimal_point_length; +static char const *thousands_sep; +static int thousands_sep_length; /* debugging for developers. Enables devmsg(). */ static bool dev_debug = false; @@ -520,6 +522,11 @@ simple_strtod_int (char const *input_str, val += digit; ++(*endptr); + + if (thousands_sep_length > 0 + && STREQ_LEN (*endptr, thousands_sep, thousands_sep_length) + && c_isdigit ((*endptr)[thousands_sep_length])) + (*endptr) += thousands_sep_length; } if (! found_digit && ! STREQ_LEN (*endptr, decimal_point, decimal_point_length)) @@ -1474,6 +1481,11 @@ main (int argc, char **argv) decimal_point = "."; decimal_point_length = strlen (decimal_point); + thousands_sep = nl_langinfo (THOUSEP); + if (thousands_sep == nullptr) + thousands_sep = ""; + thousands_sep_length = strlen (thousands_sep); + atexit (close_stdout); while (true) @@ -1602,7 +1614,7 @@ main (int argc, char **argv) { if (scale_to != scale_none) error (EXIT_FAILURE, 0, _("grouping cannot be combined with --to")); - if (debug && (strlen (nl_langinfo (THOUSEP)) == 0)) + if (debug && thousands_sep_length == 0) error (0, 0, _("grouping has no effect in this locale")); } diff --git a/tests/misc/numfmt.pl b/tests/misc/numfmt.pl index 85c888cd8..1d3c4202c 100755 --- a/tests/misc/numfmt.pl +++ b/tests/misc/numfmt.pl @@ -1050,8 +1050,20 @@ my @Locale_Tests = {OUT=>"7${lg}000${lg}000"}, {ENV=>"LC_ALL=$locale"}], - # Input with locale'd decimal-point - ['lcl-stdtod-1', '--from=si 12,2K', {OUT=>"12200"}, + # Input with locale's grouping + ['lcl-strtod-1', '--from=si 1${lg}234K', {OUT=>"1234000"}, + {ENV=>"LC_ALL=$locale"}], + + # Input with locale's grouping. Note position not validated. + ['lcl-strtod-2', '--from=si 12${lg}34K', {OUT=>"1234000"}, + {ENV=>"LC_ALL=$locale"}], + + # Input with locale's decimal-point + ['lcl-strtod-3', '--from=si 12,2K', {OUT=>"12200"}, + {ENV=>"LC_ALL=$locale"}], + + # Input with locale's grouping and decimal-point + ['lcl-strtod-4', '--from=si 1${lg}23,4K', {OUT=>"123400"}, {ENV=>"LC_ALL=$locale"}], ['lcl-dbl-to-human-1', '--to=si 1100', {OUT=>"1,1k"}, -- 2.51.0