glob.m 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472
  1. %% Expand wildcards for files and directory names
  2. %
  3. % Pattern matching of file and directory names, based on wildcard
  4. % characters. This function is similar to wildcard expansion performed by
  5. % the Unix shell and Python glob.glob function, but it can handle more
  6. % types of wildcards.
  7. %
  8. % [LIST, ISDIR] = glob(FILESPEC)
  9. % returns cell array LIST with files or directories that match the
  10. % path specified by string FILESPEC. Wildcards may be used for
  11. % basenames and for the directory parts. If FILESPEC contains
  12. % directory parts, then these will be included in LIST.
  13. % ISDIR is a boolean, the same size as LIST that is true for
  14. % directories in LIST.
  15. %
  16. % Following wildcards can be used:
  17. % * match zero or more characters
  18. % ? match any single character
  19. % [ab12] match one of the specified characters
  20. % [^ab12] match none of the specified characters
  21. % [a-z] match one character in range of characters
  22. % {a,b,c} matches any one of strings a, b or c
  23. %
  24. % all above wildcards do not match a file separator.
  25. %
  26. % ** match zero or more characters including file separators.
  27. % This can be used to match zero or more directory parts
  28. % and will recursively list matching names.
  29. %
  30. % The differences between GLOB and DIR:
  31. % * GLOB supports wildcards for directories.
  32. % * GLOB returns the directory part of FILESPEC.
  33. % * GLOB returns a cell array of matching names.
  34. % * GLOB does not return hidden files and directories that start
  35. % with '.' unless explicitly specified in FILESPEC.
  36. % * GLOB does not return '.' and '..' unless explicitly specified
  37. % in FILESPEC.
  38. % * GLOB adds a trailing file separator to directory names.
  39. % * GLOB does not return the contents of a directory when
  40. % a directory is specified. To return contents of a directory,
  41. % add a trailing '/*'.
  42. % * GLOB returns only directory names when a trailing file
  43. % separator is specified.
  44. % * On Windows GLOB is not case sensitive, but it returns
  45. % matching names exactely in the case as they are defined on
  46. % the filesystem. Case of host and sharename of a UNC path and
  47. % case of drive letters will be returned as specified in
  48. % FILESPEC.
  49. %
  50. % glob(FILESPEC, '-ignorecase')
  51. % Default GLOB is case sensitive on Unix. With option '-ignorecase'
  52. % FILESPEC matching is not case sensitive. On Windows, GLOB always
  53. % ignores the case. This option can be abbreviated to '-i'.
  54. %
  55. % Examples:
  56. % glob *.m list all .m files in current directory.
  57. %
  58. % glob baz/* list all files and directories in subdirectory 'baz'.
  59. %
  60. % glob b*/*.m list all .m files in subdirectory names starting
  61. % with 'b'. The list will include the names of the
  62. % matching subdirectories.
  63. %
  64. % glob ?z*.m list all .m files where the second character
  65. % is 'z'.
  66. %
  67. % glob baz.[ch] matches baz.c and baz.h
  68. %
  69. % glob test.[^ch] matches test.a but not test.c or test.h
  70. %
  71. % glob demo.[a-c] matches demo.a, demo.b, and demo.c
  72. %
  73. % glob test.{foo,bar,baz} matches test.foo, test.bar, and test.baz
  74. %
  75. % glob .* list all hidden files in current directory,
  76. % excluding '.' and '..'
  77. %
  78. % glob */ list all subdirectories.
  79. %
  80. % glob ** recursively list all files and directories,
  81. % starting in current directory (current directory
  82. % name, hidden files and hidden directories are
  83. % excluded).
  84. %
  85. % glob **.m list all m-files anywhere in directory tree,
  86. % including m-files in current directory. This
  87. % is equivalent with '**/*.m'.
  88. %
  89. % glob foo/**/ recursively list all directories, starting in
  90. % directory 'foo'.
  91. %
  92. % glob **/.svn/ list all .svn directories in directory tree.
  93. %
  94. % glob **/.*/** recursively list all files in hidden directories
  95. % only.
  96. %
  97. % [r,d]=glob('**')
  98. % r(~d) get all files in directory tree.
  99. %
  100. % Known limitation:
  101. % When using '**', symbolic linked directories or junctions may cause
  102. % an infinite loop.
  103. %
  104. % See also dir
  105. %% Last modified
  106. % $Date: 2013-02-02 18:41:41 +0100 (Sat, 02 Feb 2013) $
  107. % $Author: biggelar $
  108. % $Rev: 12966 $
  109. %% History
  110. % 2013-02-02 biggelar submitted to Matlab Central
  111. % 2013-01-11 biggelar add {} wildcards
  112. % 2013-01-02 biggelar Created
  113. %% Copyright (c) 2013, Peter van den Biggelaar
  114. % All rights reserved.
  115. %
  116. % Redistribution and use in source and binary forms, with or without
  117. % modification, are permitted provided that the following conditions are
  118. % met:
  119. %
  120. % * Redistributions of source code must retain the above copyright
  121. % notice, this list of conditions and the following disclaimer.
  122. % * Redistributions in binary form must reproduce the above copyright
  123. % notice, this list of conditions and the following disclaimer in
  124. % the documentation and/or other materials provided with the distribution
  125. %
  126. % THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  127. % AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  128. % IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  129. % ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
  130. % LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  131. % CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  132. % SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  133. % INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  134. % CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  135. % ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  136. % POSSIBILITY OF SUCH DAMAGE.
  137. % ------------------------------------------------------------------------
  138. function [LIST, ISDIR] = glob(FILESPEC, ignorecase)
  139. %% check FILESPEC input
  140. if ischar(FILESPEC)
  141. if isempty(FILESPEC)
  142. % return when FILESPEC is empty
  143. LIST = cell(0);
  144. ISDIR = false(0);
  145. return
  146. elseif size(FILESPEC,1)>1
  147. error('glob:invalidInput', 'FILESPEC must be a single string.')
  148. end
  149. else
  150. error('glob:invalidInput', 'FILESPEC must be a string.')
  151. end
  152. %% check ignorecase option
  153. if nargin==2
  154. if ischar(ignorecase)
  155. % ignore case when option is specified; must be at least 2 characters long
  156. if strncmp(ignorecase, '-ignorecase', max(numel(ignorecase),2));
  157. ignorecase = true;
  158. else
  159. error('glob:invalidOption', 'Invalid option.')
  160. end
  161. else
  162. error('glob:invalidOption', 'Invalid option.')
  163. end
  164. else
  165. % Windows is not case sensitive
  166. % Unix is case sensitive
  167. ignorecase = ispc;
  168. end
  169. %% define function handle to regular expression function for the specified case sensitivity
  170. if ignorecase
  171. regexp_fhandle = @regexpi;
  172. else
  173. regexp_fhandle = @regexp;
  174. end
  175. %% only use forward slashes as file separator to prevent escaping backslashes in regular expressions
  176. filespec = strrep(FILESPEC, '\', '/');
  177. %% split pathroot part from FILESPEC
  178. if strncmp(filespec, '//',2)
  179. if ispc
  180. % FILESPEC specifies a UNC path
  181. % It is not allowed to get a directory listing of share names of a
  182. % host with the DIR command.
  183. % pathroot will contains e.g. //host/share/
  184. pathroot = regexprep(filespec, '(^//+[^/]+/[^/]+/)(.*)', '$1');
  185. filespec = regexprep(filespec, '(^//+[^/]+/[^/]+/)(.*)', '$2');
  186. else
  187. % for Unix, multiple leading file separators are equivalent with a single file separator
  188. filespec = regexprep(filespec, '^/*', '/');
  189. end
  190. elseif strncmp(filespec, '/', 1)
  191. % FILESPEC specifies a absolute path
  192. pathroot = '/';
  193. filespec(1) = [];
  194. elseif ispc && numel(filespec)>=2 && filespec(2)==':'
  195. % FILESPEC specifies a absolute path starting with a drive letter
  196. % check for a fileseparator after ':'. e.g. 'C:\'
  197. if numel(filespec)<3 || filespec(3)~='/'
  198. error('glob:invalidInput','Drive letter must be followed by '':\''.')
  199. end
  200. pathroot = filespec(1:3);
  201. filespec(1:3) = [];
  202. else
  203. % FILESPEC specifies a relative path
  204. pathroot = './';
  205. end
  206. %% replace multiple file separators by a single file separator
  207. filespec = regexprep(filespec, '/+', '/');
  208. %% replace 'a**' with 'a*/**', where 'a' can be any character but not '/'
  209. filespec = regexprep(filespec, '([^/])(\.\*\.\*)', '$1\*/$2');
  210. %% replace '**a' with '**/*a', where a can be any character but not '/'
  211. filespec = regexprep(filespec, '(\.\*\.\*)([^/])', '$1/\*$2');
  212. %% split filespec into chunks at file separator
  213. chunks = strread(filespec, '%s', 'delimiter', '/'); %#ok<FPARK>
  214. %% add empty chunk at the end when filespec ends with a file separator
  215. if ~isempty(filespec) && filespec(end)=='/'
  216. chunks{end+1} = '';
  217. end
  218. %% translate chunks to regular expressions
  219. for i=1:numel(chunks)
  220. chunks{i} = glob2regexp(chunks{i});
  221. end
  222. %% determine file list using LS_REGEXP
  223. % this function requires that PATHROOT does not to contain any wildcards
  224. if ~isempty(chunks)
  225. list = ls_regexp(regexp_fhandle, pathroot, chunks{1:end});
  226. else
  227. list = {pathroot};
  228. end
  229. if strcmp(pathroot, './')
  230. % remove relative pathroot from result
  231. list = regexprep(list, '^\./', '');
  232. end
  233. if nargout==2
  234. % determine directories by checking for '/' at the end
  235. I = regexp(list', '/$');
  236. ISDIR = ~cellfun('isempty', I);
  237. end
  238. %% convert to standard file separators for PC
  239. if ispc
  240. list = strrep(list, '/', '\');
  241. end
  242. %% return output
  243. if nargout==0
  244. if ~isempty(list)
  245. % display list
  246. disp(char(list))
  247. else
  248. disp(['''' FILESPEC ''' not found.']);
  249. end
  250. else
  251. LIST = list';
  252. end
  253. % ------------------------------------------------------------------------
  254. function regexp_str = glob2regexp(glob_str)
  255. %% translate glob_str to regular expression string
  256. % initialize
  257. regexp_str = '';
  258. in_curlies = 0; % is > 0 within curly braces
  259. % handle characters in glob_str one-by-one
  260. for c = glob_str
  261. if any(c=='.()|+^$@%')
  262. % escape simple special characters
  263. regexp_str = [regexp_str '\' c]; %#ok<AGROW>
  264. elseif c=='*'
  265. % '*' should not match '/'
  266. regexp_str = [regexp_str '[^/]*']; %#ok<AGROW>
  267. elseif c=='?'
  268. % '?' should not match '/'
  269. regexp_str = [regexp_str '[^/]']; %#ok<AGROW>
  270. elseif c=='{'
  271. regexp_str = [regexp_str '(']; %#ok<AGROW>
  272. in_curlies = in_curlies+1;
  273. elseif c=='}' && in_curlies
  274. regexp_str = [regexp_str ')']; %#ok<AGROW>
  275. in_curlies = in_curlies-1;
  276. elseif c==',' && in_curlies
  277. regexp_str = [regexp_str '|']; %#ok<AGROW>
  278. else
  279. regexp_str = [regexp_str c]; %#ok<AGROW>
  280. end
  281. end
  282. % replace original '**' (that has now become '[^/]*[^/]*') with '.*.*'
  283. regexp_str = strrep(regexp_str, '[^/]*[^/]*', '.*.*');
  284. % ------------------------------------------------------------------------
  285. function L = ls_regexp(regexp_fhandle, path, varargin)
  286. % List files that match PATH/r1/r2/r3/... where PATH is a string without
  287. % any wildcards and r1..rn are regular expresions that contain the parts of
  288. % a filespec between the file separators.
  289. % L is a cell array with matching file or directory names.
  290. % REGEXP_FHANDLE contain a file handle to REGEXP or REGEXPI depending
  291. % on specified case sensitivity.
  292. % if first regular expressions contains '**', examine complete file tree
  293. if nargin>=3 && any(regexp(varargin{1}, '\.\*\.\*'))
  294. L = ls_regexp_tree(regexp_fhandle, path, varargin{:});
  295. else
  296. % get contents of path
  297. list = dir(path);
  298. if nargin>=3
  299. if strcmp(varargin{1},'\.') || strcmp(varargin{1},'\.\.')
  300. % keep explicitly specified '.' or '..' in first regular expression
  301. if ispc && ~any(strcmp({list.name}, '.'))
  302. % fix strange windows behaviour: root of a volume has no '.' and '..'
  303. list(end+1).name = '.';
  304. list(end).isdir = true;
  305. list(end+1).name = '..';
  306. list(end).isdir = true;
  307. end
  308. else
  309. % remove '.' and '..'
  310. list(strcmp({list.name},'.')) = [];
  311. list(strcmp({list.name},'..')) = [];
  312. % remove files starting with '.' specified in first regular expression
  313. if ~strncmp(varargin{1},'\.',2)
  314. % remove files starting with '.' from list
  315. list(strncmp({list.name},'.',1)) = [];
  316. end
  317. end
  318. end
  319. % define shortcuts
  320. list_isdir = [list.isdir];
  321. list_name = {list.name};
  322. L = {}; % initialize
  323. if nargin==2 % no regular expressions
  324. %% return filename
  325. if ~isempty(list_name)
  326. % add a trailing slash to directories
  327. trailing_fsep = repmat({''}, size(list_name));
  328. trailing_fsep(list_isdir) = {'/'};
  329. L = strcat(path, list_name, trailing_fsep);
  330. end
  331. elseif nargin==3 % last regular expression
  332. %% return list_name matching regular expression
  333. I = regexp_fhandle(list_name, ['^' varargin{1} '$']);
  334. I = ~cellfun('isempty', I);
  335. list_name = list_name(I);
  336. list_isdir = list_isdir(I);
  337. if ~isempty(list_name)
  338. % add a trailing slash to directories
  339. trailing_fsep = repmat({''}, size(list_name));
  340. trailing_fsep(list_isdir) = {'/'};
  341. L = strcat(path, list_name, trailing_fsep);
  342. end
  343. elseif nargin==4 && isempty(varargin{2})
  344. %% only return directories when last regexp is empty
  345. % return list_name matching regular expression and that are directories
  346. I = regexp_fhandle(list_name, ['^' varargin{1} '$']);
  347. I = ~cellfun('isempty', I);
  348. % only return directories
  349. list_name = list_name(I);
  350. list_isdir = list_isdir(I);
  351. if any(list_isdir)
  352. % add a trailing file separator
  353. L = strcat(path, list_name(list_isdir), '/');
  354. end
  355. else
  356. %% traverse for list_name matching regular expression
  357. I = regexp_fhandle(list_name, ['^' varargin{1} '$']);
  358. I = ~cellfun('isempty', I);
  359. for name = list_name(I)
  360. L = [L ls_regexp(regexp_fhandle, [path char(name) '/'], varargin{2:end})]; %#ok<AGROW>
  361. end
  362. end
  363. end
  364. % ------------------------------------------------------------------------
  365. function L = ls_regexp_tree(regexp_fhandle, path, varargin)
  366. % use this function when first argument of varargin contains '**'
  367. % build list of complete directory tree
  368. % if any regexp starts with '\.', keep hidden files and directories
  369. I = regexp(varargin, '^\\\.');
  370. I = ~cellfun('isempty', I);
  371. keep_hidden = any(I);
  372. list = dir_recur(path, keep_hidden);
  373. L = {list.name};
  374. % make one regular expression of all individual regexps
  375. expression = [regexptranslate('escape',path) sprintf('%s/', varargin{1:end-1}) varargin{end}];
  376. % note that /**/ must also match zero directories
  377. % replace '/**/' with (/**/|/)
  378. expression = regexprep(expression, '/\.\*\.\*/', '(/\.\*\.\*/|/)');
  379. % return matching names
  380. if ~isempty(varargin{end})
  381. % determing matching names ignoring trailing '/'
  382. L_no_trailing_fsep = regexprep(L, '/$', '');
  383. I = regexp_fhandle(L_no_trailing_fsep, ['^' expression '$']);
  384. else
  385. % determing matching names including trailing '/'
  386. I = regexp_fhandle(L, ['^' expression '$']);
  387. end
  388. I = cellfun('isempty', I);
  389. L(I) = [];
  390. % ------------------------------------------------------------------------
  391. function d = dir_recur(startdir,keep_hidden)
  392. %% determine recursive directory contents
  393. % get directory contents
  394. d = dir(startdir);
  395. % remove hidden files
  396. if keep_hidden
  397. % only remove '.' and '..'
  398. d(strcmp({d.name},'.')) = [];
  399. d(strcmp({d.name},'..')) = [];
  400. else
  401. % remove all hidden files and directories
  402. d(strncmp({d.name},'.',1)) = [];
  403. end
  404. if ~isempty(d)
  405. % add trailing fileseparator to directories
  406. trailing_fsep = repmat({''}, size(d));
  407. trailing_fsep([d.isdir]) = {'/'};
  408. % prefix startdir to name and postfix fileseparator for directories
  409. dname = strcat(startdir, {d.name}, trailing_fsep');
  410. [d(:).name] = deal(dname{:});
  411. % recurse into subdirectories
  412. for subd = {d([d.isdir]).name}
  413. d = [d; dir_recur(char(subd), keep_hidden)]; %#ok<AGROW>
  414. end
  415. end