comparison scripts/strings/validatestring.m @ 13707:35f7cbf09c77

validatestring.m: Overhaul code for Matlab compatability and performance. * validatestring.m: Correctly handle substring matches and generate an error if expansion is ambiguous. Rewrite docstring and add examples. Add new tests.
author Rik <octave@nomad.inbox5.com>
date Sat, 15 Oct 2011 19:05:00 -0700
parents cefd568ea073
children ee28bf4c3c29
comparison
equal deleted inserted replaced
13706:5cd1193ac1c4 13707:35f7cbf09c77
19 ## -*- texinfo -*- 19 ## -*- texinfo -*-
20 ## @deftypefn {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}) 20 ## @deftypefn {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray})
21 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}, @var{funcname}) 21 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}, @var{funcname})
22 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}, @var{funcname}, @var{varname}) 22 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@var{str}, @var{strarray}, @var{funcname}, @var{varname})
23 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@dots{}, @var{position}) 23 ## @deftypefnx {Function File} {@var{validstr} =} validatestring (@dots{}, @var{position})
24 ## Verify that @var{str} is a string or substring of an element of 24 ## Verify that @var{str} is an element, or substring of an element, in
25 ## @var{strarray}. 25 ## @var{strarray}.
26 ## 26 ##
27 ## @var{str} is a character string to be tested, and @var{strarray} is a 27 ## When @var{str} is a character string to be tested, and @var{strarray} is a
28 ## cellstr of valid values. @var{validstr} will be the validated form 28 ## cellstr of valid values, then @var{validstr} will be the validated form
29 ## of @var{str} where validation is defined as @var{str} being a member 29 ## of @var{str} where validation is defined as @var{str} being a member
30 ## or substring of @var{validstr}. If @var{str} is a substring of 30 ## or substring of @var{validstr}. This is useful for both verifying
31 ## @var{validstr} and there are multiple matches, the shortest match 31 ## and expanding short options, such as "r", to their longer forms, such as
32 ## will be returned if all matches are substrings of each other, and an 32 ## "red". If @var{str} is a substring of @var{validstr}, and there are
33 ## error will be raised if the matches are not substrings of each other. 33 ## multiple matches, the shortest match will be returned if all matches are
34 ## substrings of each other. Otherwise, an error will be raised because the
35 ## expansion of @var{str} is ambiguous. All comparisons are case insensitive.
34 ## 36 ##
35 ## All comparisons are case insensitive. 37 ## The additional inputs @var{funcname}, @var{varname}, and @var{position}
38 ## are optional and will make any generated validation error message more
39 ## specific.
40 ##
41 ## Examples:
42 ##
43 ## @example
44 ## @group
45 ## validatestring ("r", {"red", "green", "blue"})
46 ## @result{} "red"
47 ##
48 ## validatestring ("b", {"red", "green", "blue", "black"})
49 ## @result{} error: validatestring: multiple unique matches were found for 'b':
50 ## blue, black
51 ## @end group
52 ## @end example
53 ##
54 ##
36 ## @seealso{strcmp, strcmpi} 55 ## @seealso{strcmp, strcmpi}
37 ## @end deftypefn 56 ## @end deftypefn
38 57
39 ## Author: Bill Denney <bill@denney.ws> 58 ## Author: Bill Denney <bill@denney.ws>
40 59
42 61
43 if (nargin < 2 || nargin > 5) 62 if (nargin < 2 || nargin > 5)
44 print_usage (); 63 print_usage ();
45 endif 64 endif
46 65
47 ## set the defaults
48 funcname = "";
49 varname = "";
50 position = 0; 66 position = 0;
51 ## set the actual values 67 ## Process input arguments
52 if (! isempty (varargin)) 68 if (! isempty (varargin) && isnumeric (varargin{end}))
53 if (isnumeric (varargin{end})) 69 position = varargin{end};
54 position = varargin{end}; 70 varargin(end) = [];
55 varargin(end) = [];
56 endif
57 endif 71 endif
58 funcnameset = false; 72
59 varnameset = false; 73 funcname = varname = "";
60 for i = 1:numel (varargin) 74 char_idx = cellfun ("isclass", varargin, "char");
61 if (ischar (varargin{i})) 75 n_chararg = sum (char_idx);
62 if (varnameset) 76 if (n_chararg > 2)
63 error ("validatestring: invalid number of character inputs: %d", 77 error ("validatestring: invalid number of character inputs (3)");
64 numel (varargin)); 78 elseif (n_chararg == 2)
65 elseif (funcnameset) 79 [funcname, varname] = deal (varargin{char_idx});
66 varname = varargin{i}; 80 elseif (n_chararg == 1)
67 varnameset = true; 81 funcname = varargin{char_idx};
68 else 82 endif
69 funcname = varargin{i};
70 funcnameset = true;
71 endif
72 endif
73 endfor
74 83
75 ## Check the inputs 84 ## Check the inputs
76 if (! ischar (str)) 85 if (! ischar (str))
77 error ("validatestring: STR must be a character string"); 86 error ("validatestring: STR must be a character string");
78 elseif (rows (str) != 1) 87 elseif (! isrow (str))
79 error ("validatestring: STR must have only one row"); 88 error ("validatestring: STR must be a single row vector");
80 elseif (! iscellstr (strarray)) 89 elseif (! iscellstr (strarray))
81 error ("validatestring: STRARRAY must be a cellstr"); 90 error ("validatestring: STRARRAY must be a cellstr");
82 elseif (! ischar (funcname)) 91 elseif (! isempty (funcname) && ! isrow (funcname))
83 error ("validatestring: FUNCNAME must be a character string"); 92 error ("validatestring: FUNCNAME must be a single row vector");
84 elseif (! isempty (funcname) && (rows (funcname) != 1)) 93 elseif (! isempty (varname) && ! isrow (varname))
85 error ("validatestring: FUNCNAME must be exactly one row"); 94 error ("validatestring: VARNAME must be a single row vector");
86 elseif (! ischar (varname))
87 error ("validatestring: VARNAME must be a character string");
88 elseif (! isempty (varname) && (rows (varname) != 1))
89 error ("validatestring: VARNAME must be exactly one row");
90 elseif (position < 0) 95 elseif (position < 0)
91 error ("validatestring: POSITION must be >= 0"); 96 error ("validatestring: POSITION must be >= 0");
92 endif 97 endif
93 98
94 ## make the part of the error that will use funcname, varname, and 99 ## Make static part of error string that uses funcname, varname, and position
95 ## position
96 errstr = ""; 100 errstr = "";
97 if (! isempty (funcname)) 101 if (! isempty (funcname))
98 errstr = sprintf ("Function: %s ", funcname); 102 errstr = sprintf ("Function: %s ", funcname);
99 endif 103 endif
100 if (! isempty (varname)) 104 if (! isempty (varname))
107 errstr(end:end+1) = ":\n"; 111 errstr(end:end+1) = ":\n";
108 endif 112 endif
109 113
110 matches = strncmpi (str, strarray(:), numel (str)); 114 matches = strncmpi (str, strarray(:), numel (str));
111 nmatches = sum (matches); 115 nmatches = sum (matches);
112 if (nmatches == 1) 116 if (nmatches == 0)
117 error ("validatestring: %s'%s' does not match any of\n%s", errstr, str,
118 sprintf ("%s, ", strarray{:})(1:end-2));
119 elseif (nmatches == 1)
113 str = strarray{matches}; 120 str = strarray{matches};
114 elseif (nmatches == 0)
115 error ("validatestring: %s%s does not match any of\n%s", errstr, str,
116 sprintf ("%s, ", strarray{:})(1:end-1));
117 else 121 else
118 ## are the matches a substring of each other, if so, choose the 122 ## Are the matches substrings of each other?
119 ## shortest. If not, raise an error. 123 ## If true, choose the shortest. If not, raise an error.
120 match_idx = find (matches); 124 match_idx = find (matches);
121 match_l = cellfun ("length", strarray(match_idx)); 125 match_len = cellfun ("length", strarray(match_idx));
122 longest_idx = find (match_l == max (match_l), 1); 126 [min_len, min_idx] = min (match_len);
123 shortest_idx = find (match_l == min (match_l), 1); 127 short_str = strarray{match_idx(min_idx)};
124 longest = strarray(match_idx)(longest_idx); 128 submatch = strncmpi (short_str, strarray(match_idx), min_len);
125 for i = 1:numel(match_idx) 129 if (all (submatch))
126 currentmatch = strarray(match_idx(i)); 130 str = short_str;
127 if (! strncmpi (longest, currentmatch, length(currentmatch))) 131 else
128 error ("validatestring: %smultiple unique matches were found for %s:\n%s", 132 error ("validatestring: %smultiple unique matches were found for '%s':\n%s",
129 errstr, sprintf ("%s, ", strarray(match_idx))(1:end-2)); 133 errstr, str, sprintf ("%s, ", strarray{match_idx})(1:end-2));
130 endif 134 endif
131 endfor
132 str = strarray{shortest_idx};
133 endif 135 endif
134 136
135 endfunction 137 endfunction
136 138
137 ## Tests 139
138 %!shared strarray 140 %!shared strarray
139 %! strarray = {"octave" "Oct" "octopus" "octaves"}; 141 %! strarray = {"octave" "Oct" "octopus" "octaves"};
140 %!assert (validatestring ("octave", strarray), "octave") 142 %!assert (validatestring ("octave", strarray), "octave")
141 %!assert (validatestring ("oct", strarray), "Oct") 143 %!assert (validatestring ("oct", strarray), "Oct")
142 %!assert (validatestring ("octave", strarray), "octave") 144 %!assert (validatestring ("octa", strarray), "octave")
143 %!assert (validatestring ("octav", strarray), "octave") 145 %! strarray = {"abc1" "def" "abc2"};
146 %!assert (validatestring ("d", strarray), "def")
147 %!error <'xyz' does not match any> validatestring ("xyz", strarray)
148 %!error <Function: DUMMY_TEST> validatestring ("xyz", strarray, "DUMMY_TEST")
149 %!error <Function: DUMMY_TEST Variable: DUMMY_VAR:> validatestring ("xyz", strarray, "DUMMY_TEST", "DUMMY_VAR")
150 %!error <Function: DUMMY_TEST Variable: DUMMY_VAR Argument position 5> validatestring ("xyz", strarray, "DUMMY_TEST", "DUMMY_VAR", 5)
151 %!error <multiple unique matches were found for 'abc'> validatestring ("abc", strarray)
152
153 %% Test input validation
154 %!error validatestring ("xyz")
155 %!error validatestring ("xyz", {"xyz"}, "3", "4", 5, 6)
156 %!error <invalid number of character inputs> validatestring ("xyz", {"xyz"}, "3", "4", "5")
157 %!error <STR must be a character string> validatestring (1, {"xyz"}, "3", "4", 5)
158 %!error <STR must be a single row vector> validatestring ("xyz".', {"xyz"}, "3", "4", 5)
159 %!error <STRARRAY must be a cellstr> validatestring ("xyz", "xyz", "3", "4", 5)
160 %!error <FUNCNAME must be a single row vector> validatestring ("xyz", {"xyz"}, "33".', "4", 5)
161 %!error <VARNAME must be a single row vector> validatestring ("xyz", {"xyz"}, "3", "44".', 5)
162 %!error <POSITION must be> validatestring ("xyz", {"xyz"}, "3", "4", -5)
163