Mercurial > hg > octave-lyh
comparison scripts/strings/validatestring.m @ 13707:35f7cbf09c77
validatestring.m: Overhaul code for Matlab compatability and performance.
* validatestring.m: Correctly handle substring matches and generate an error
if expansion is ambiguous. Rewrite docstring and add examples. Add new tests.
author | Rik <octave@nomad.inbox5.com> |
---|---|
date | Sat, 15 Oct 2011 19:05:00 -0700 |
parents | cefd568ea073 |
children | ee28bf4c3c29 |
comparison
equal
deleted
inserted
replaced
13706:5cd1193ac1c4 | 13707:35f7cbf09c77 |
---|---|
19 ## -*- texinfo -*- | 19 ## -*- texinfo -*- |
20 ## @deftypefn {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}) | 20 ## @deftypefn {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}) |
21 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}, @var{funcname}) | 21 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}, @var{funcname}) |
22 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}, @var{funcname}, @var{varname}) | 22 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}, @var{funcname}, @var{varname}) |
23 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@dots{}, @var{position}) | 23 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@dots{}, @var{position}) |
24 ## Verify that @var{str} is a string or substring of an element of | 24 ## Verify that @var{str} is an element, or substring of an element, in |
25 ## @var{strarray}. | 25 ## @var{strarray}. |
26 ## | 26 ## |
27 ## @var{str} is a character string to be tested, and @var{strarray} is a | 27 ## When @var{str} is a character string to be tested, and @var{strarray} is a |
28 ## cellstr of valid values. @var{validstr} will be the validated form | 28 ## cellstr of valid values, then @var{validstr} will be the validated form |
29 ## of @var{str} where validation is defined as @var{str} being a member | 29 ## of @var{str} where validation is defined as @var{str} being a member |
30 ## or substring of @var{validstr}. If @var{str} is a substring of | 30 ## or substring of @var{validstr}. This is useful for both verifying |
31 ## @var{validstr} and there are multiple matches, the shortest match | 31 ## and expanding short options, such as "r", to their longer forms, such as |
32 ## will be returned if all matches are substrings of each other, and an | 32 ## "red". If @var{str} is a substring of @var{validstr}, and there are |
33 ## error will be raised if the matches are not substrings of each other. | 33 ## multiple matches, the shortest match will be returned if all matches are |
34 ## substrings of each other. Otherwise, an error will be raised because the | |
35 ## expansion of @var{str} is ambiguous. All comparisons are case insensitive. | |
34 ## | 36 ## |
35 ## All comparisons are case insensitive. | 37 ## The additional inputs @var{funcname}, @var{varname}, and @var{position} |
38 ## are optional and will make any generated validation error message more | |
39 ## specific. | |
40 ## | |
41 ## Examples: | |
42 ## | |
43 ## @example | |
44 ## @group | |
45 ## validatestring ("r", {"red", "green", "blue"}) | |
46 ## @result{} "red" | |
47 ## | |
48 ## validatestring ("b", {"red", "green", "blue", "black"}) | |
49 ## @result{} error: validatestring: multiple unique matches were found for 'b': | |
50 ## blue, black | |
51 ## @end group | |
52 ## @end example | |
53 ## | |
54 ## | |
36 ## @seealso{strcmp, strcmpi} | 55 ## @seealso{strcmp, strcmpi} |
37 ## @end deftypefn | 56 ## @end deftypefn |
38 | 57 |
39 ## Author: Bill Denney <bill@denney.ws> | 58 ## Author: Bill Denney <bill@denney.ws> |
40 | 59 |
42 | 61 |
43 if (nargin < 2 || nargin > 5) | 62 if (nargin < 2 || nargin > 5) |
44 print_usage (); | 63 print_usage (); |
45 endif | 64 endif |
46 | 65 |
47 ## set the defaults | |
48 funcname = ""; | |
49 varname = ""; | |
50 position = 0; | 66 position = 0; |
51 ## set the actual values | 67 ## Process input arguments |
52 if (! isempty (varargin)) | 68 if (! isempty (varargin) && isnumeric (varargin{end})) |
53 if (isnumeric (varargin{end})) | 69 position = varargin{end}; |
54 position = varargin{end}; | 70 varargin(end) = []; |
55 varargin(end) = []; | |
56 endif | |
57 endif | 71 endif |
58 funcnameset = false; | 72 |
59 varnameset = false; | 73 funcname = varname = ""; |
60 for i = 1:numel (varargin) | 74 char_idx = cellfun ("isclass", varargin, "char"); |
61 if (ischar (varargin{i})) | 75 n_chararg = sum (char_idx); |
62 if (varnameset) | 76 if (n_chararg > 2) |
63 error ("validatestring: invalid number of character inputs: %d", | 77 error ("validatestring: invalid number of character inputs (3)"); |
64 numel (varargin)); | 78 elseif (n_chararg == 2) |
65 elseif (funcnameset) | 79 [funcname, varname] = deal (varargin{char_idx}); |
66 varname = varargin{i}; | 80 elseif (n_chararg == 1) |
67 varnameset = true; | 81 funcname = varargin{char_idx}; |
68 else | 82 endif |
69 funcname = varargin{i}; | |
70 funcnameset = true; | |
71 endif | |
72 endif | |
73 endfor | |
74 | 83 |
75 ## Check the inputs | 84 ## Check the inputs |
76 if (! ischar (str)) | 85 if (! ischar (str)) |
77 error ("validatestring: STR must be a character string"); | 86 error ("validatestring: STR must be a character string"); |
78 elseif (rows (str) != 1) | 87 elseif (! isrow (str)) |
79 error ("validatestring: STR must have only one row"); | 88 error ("validatestring: STR must be a single row vector"); |
80 elseif (! iscellstr (strarray)) | 89 elseif (! iscellstr (strarray)) |
81 error ("validatestring: STRARRAY must be a cellstr"); | 90 error ("validatestring: STRARRAY must be a cellstr"); |
82 elseif (! ischar (funcname)) | 91 elseif (! isempty (funcname) && ! isrow (funcname)) |
83 error ("validatestring: FUNCNAME must be a character string"); | 92 error ("validatestring: FUNCNAME must be a single row vector"); |
84 elseif (! isempty (funcname) && (rows (funcname) != 1)) | 93 elseif (! isempty (varname) && ! isrow (varname)) |
85 error ("validatestring: FUNCNAME must be exactly one row"); | 94 error ("validatestring: VARNAME must be a single row vector"); |
86 elseif (! ischar (varname)) | |
87 error ("validatestring: VARNAME must be a character string"); | |
88 elseif (! isempty (varname) && (rows (varname) != 1)) | |
89 error ("validatestring: VARNAME must be exactly one row"); | |
90 elseif (position < 0) | 95 elseif (position < 0) |
91 error ("validatestring: POSITION must be >= 0"); | 96 error ("validatestring: POSITION must be >= 0"); |
92 endif | 97 endif |
93 | 98 |
94 ## make the part of the error that will use funcname, varname, and | 99 ## Make static part of error string that uses funcname, varname, and position |
95 ## position | |
96 errstr = ""; | 100 errstr = ""; |
97 if (! isempty (funcname)) | 101 if (! isempty (funcname)) |
98 errstr = sprintf ("Function: %s ", funcname); | 102 errstr = sprintf ("Function: %s ", funcname); |
99 endif | 103 endif |
100 if (! isempty (varname)) | 104 if (! isempty (varname)) |
107 errstr(end:end+1) = ":\n"; | 111 errstr(end:end+1) = ":\n"; |
108 endif | 112 endif |
109 | 113 |
110 matches = strncmpi (str, strarray(:), numel (str)); | 114 matches = strncmpi (str, strarray(:), numel (str)); |
111 nmatches = sum (matches); | 115 nmatches = sum (matches); |
112 if (nmatches == 1) | 116 if (nmatches == 0) |
117 error ("validatestring: %s'%s' does not match any of\n%s", errstr, str, | |
118 sprintf ("%s, ", strarray{:})(1:end-2)); | |
119 elseif (nmatches == 1) | |
113 str = strarray{matches}; | 120 str = strarray{matches}; |
114 elseif (nmatches == 0) | |
115 error ("validatestring: %s%s does not match any of\n%s", errstr, str, | |
116 sprintf ("%s, ", strarray{:})(1:end-1)); | |
117 else | 121 else |
118 ## are the matches a substring of each other, if so, choose the | 122 ## Are the matches substrings of each other? |
119 ## shortest. If not, raise an error. | 123 ## If true, choose the shortest. If not, raise an error. |
120 match_idx = find (matches); | 124 match_idx = find (matches); |
121 match_l = cellfun ("length", strarray(match_idx)); | 125 match_len = cellfun ("length", strarray(match_idx)); |
122 longest_idx = find (match_l == max (match_l), 1); | 126 [min_len, min_idx] = min (match_len); |
123 shortest_idx = find (match_l == min (match_l), 1); | 127 short_str = strarray{match_idx(min_idx)}; |
124 longest = strarray(match_idx)(longest_idx); | 128 submatch = strncmpi (short_str, strarray(match_idx), min_len); |
125 for i = 1:numel(match_idx) | 129 if (all (submatch)) |
126 currentmatch = strarray(match_idx(i)); | 130 str = short_str; |
127 if (! strncmpi (longest, currentmatch, length(currentmatch))) | 131 else |
128 error ("validatestring: %smultiple unique matches were found for %s:\n%s", | 132 error ("validatestring: %smultiple unique matches were found for '%s':\n%s", |
129 errstr, sprintf ("%s, ", strarray(match_idx))(1:end-2)); | 133 errstr, str, sprintf ("%s, ", strarray{match_idx})(1:end-2)); |
130 endif | 134 endif |
131 endfor | |
132 str = strarray{shortest_idx}; | |
133 endif | 135 endif |
134 | 136 |
135 endfunction | 137 endfunction |
136 | 138 |
137 ## Tests | 139 |
138 %!shared strarray | 140 %!shared strarray |
139 %! strarray = {"octave" "Oct" "octopus" "octaves"}; | 141 %! strarray = {"octave" "Oct" "octopus" "octaves"}; |
140 %!assert (validatestring ("octave", strarray), "octave") | 142 %!assert (validatestring ("octave", strarray), "octave") |
141 %!assert (validatestring ("oct", strarray), "Oct") | 143 %!assert (validatestring ("oct", strarray), "Oct") |
142 %!assert (validatestring ("octave", strarray), "octave") | 144 %!assert (validatestring ("octa", strarray), "octave") |
143 %!assert (validatestring ("octav", strarray), "octave") | 145 %! strarray = {"abc1" "def" "abc2"}; |
146 %!assert (validatestring ("d", strarray), "def") | |
147 %!error <'xyz' does not match any> validatestring ("xyz", strarray) | |
148 %!error <Function: DUMMY_TEST> validatestring ("xyz", strarray, "DUMMY_TEST") | |
149 %!error <Function: DUMMY_TEST Variable: DUMMY_VAR:> validatestring ("xyz", strarray, "DUMMY_TEST", "DUMMY_VAR") | |
150 %!error <Function: DUMMY_TEST Variable: DUMMY_VAR Argument position 5> validatestring ("xyz", strarray, "DUMMY_TEST", "DUMMY_VAR", 5) | |
151 %!error <multiple unique matches were found for 'abc'> validatestring ("abc", strarray) | |
152 | |
153 %% Test input validation | |
154 %!error validatestring ("xyz") | |
155 %!error validatestring ("xyz", {"xyz"}, "3", "4", 5, 6) | |
156 %!error <invalid number of character inputs> validatestring ("xyz", {"xyz"}, "3", "4", "5") | |
157 %!error <STR must be a character string> validatestring (1, {"xyz"}, "3", "4", 5) | |
158 %!error <STR must be a single row vector> validatestring ("xyz".', {"xyz"}, "3", "4", 5) | |
159 %!error <STRARRAY must be a cellstr> validatestring ("xyz", "xyz", "3", "4", 5) | |
160 %!error <FUNCNAME must be a single row vector> validatestring ("xyz", {"xyz"}, "33".', "4", 5) | |
161 %!error <VARNAME must be a single row vector> validatestring ("xyz", {"xyz"}, "3", "44".', 5) | |
162 %!error <POSITION must be> validatestring ("xyz", {"xyz"}, "3", "4", -5) | |
163 |