changeset 14575:df394b4db95d

maint.mk: prohibit doubled words Detect them also when they're separated by a newline. There are 3 ways to customize it: - disable the test on a per file basis, as usual with rules using $(VC_LIST_EXCEPT) - replace the default doubled-word-selecting regexp (affects all files) - ignore a particular file-vs-doubled-word match I nearly used that last one to ignore the "is is" match in coreutils' NEWS file, since the text was "ls -is is ..." To do that, I would have added this line to cfg.mk: ignore_doubled_word_match_RE_ = ^NEWS:[0-9]+:is[ ]is$ but it would have ignored any "is is" match in NEWS. Low probability, but still... Instead, I changed the text, slightly: - ls -is is now consistent with ls -lis in ignoring values returned + "ls -is" is now consistent with ls -lis in ignoring values returned * top/maint.mk (prohibit_double_word_RE_): Provide default. (prohibit_doubled_word_): Define. (sc_prohibit_doubled_word): New rule. (sc_prohibit_the_the): Remove. Subsumed by the above.
author Jim Meyering <meyering@redhat.com>
date Sun, 10 Apr 2011 10:26:46 +0200
parents f16721915551
children edc8739b000c
files ChangeLog top/maint.mk
diffstat 2 files changed, 42 insertions(+), 4 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,26 @@
+2011-04-10  Jim Meyering  <meyering@redhat.com>
+
+	maint.mk: prohibit doubled words
+	Detect them also when they're separated by a newline.
+	There are 3 ways to customize it:
+	  - disable the test on a per file basis, as usual with rules using
+	    $(VC_LIST_EXCEPT)
+	  - replace the default doubled-word-selecting regexp (affects all files)
+	  - ignore a particular file-vs-doubled-word match
+	I nearly used that last one to ignore the "is is" match in
+	coreutils' NEWS file, since the text was "ls -is is ..."
+	To do that, I would have added this line to cfg.mk:
+	  ignore_doubled_word_match_RE_ = ^NEWS:[0-9]+:is[ ]is$
+	but it would have ignored any "is is" match in NEWS.
+	Low probability, but still...
+	Instead, I changed the text, slightly:
+	  -  ls -is is now consistent with ls -lis in ignoring values returned
+	  +  "ls -is" is now consistent with ls -lis in ignoring values returned
+	* top/maint.mk (prohibit_double_word_RE_): Provide default.
+	(prohibit_doubled_word_): Define.
+	(sc_prohibit_doubled_word): New rule.
+	(sc_prohibit_the_the): Remove.  Subsumed by the above.
+
 2011-04-10  Jim Meyering  <meyering@redhat.com>
 
 	maint: fix doubled-word typo in comment
--- a/top/maint.mk
+++ b/top/maint.mk
@@ -841,10 +841,25 @@
 	halt='do not define S_IS* macros; include <sys/stat.h>'		\
 	  $(_sc_search_regexp)
 
-sc_prohibit_the_the:
-	@prohibit='\<the[ ]the\>'					\
-	halt='avoid double "the"'					\
-	  $(_sc_search_regexp)
+prohibit_doubled_word_RE_ ?= \
+  /\b(then?|[iao]n|i[fst]|but|f?or|at|and|[dt]o)\s+\1\b/gims
+prohibit_doubled_word_ =						\
+    -e 'while ($(prohibit_doubled_word_RE_))'				\
+    -e '  {'								\
+    -e '    $$n = ($$` =~ tr/\n/\n/ + 1);'				\
+    -e '    ($$v = $$&) =~ s/\n/\\n/g;'					\
+    -e '    print "$$ARGV:$$n:$$v\n";'					\
+    -e '  }'
+
+# Define this to a regular expression that matches
+# any filename:dd:match lines you want to ignore.
+# The default is to ignore no matches.
+ignore_doubled_word_match_RE_ ?= ^$$
+
+sc_prohibit_doubled_word:
+	@perl -n -0777 $(prohibit_doubled_word_) $$($(VC_LIST_EXCEPT))	\
+	  | grep -vE '$(ignore_doubled_word_match_RE_)'			\
+	  | grep . && { echo '$(ME): doubled words' 1>&2; exit 1; } || :
 
 sc_prohibit_can_not:
 	@prohibit='\<can[ ]not\>'					\