Mercurial > hg > octave-image
changeset 563:d2a6dc5e921e
tiff_tag_read: deal with multiple tag and IFD beyond the first and improved documentation
author | carandraug |
---|---|
date | Wed, 18 Apr 2012 20:56:52 +0000 |
parents | 36627778a40e |
children | 7db61a0a2632 |
files | NEWS inst/tiff_tag_read.m |
diffstat | 2 files changed, 224 insertions(+), 109 deletions(-) [+] |
line wrap: on
line diff
--- a/NEWS +++ b/NEWS @@ -63,4 +63,9 @@ ** `isrgb' now returns false for logical matrix. + ** `tiff_tag_read' had several bug fixes and can now check IFDs beyond + the first. It can also accept mutiple tag values and IFDs simultaneously + and return a matrix of the values found. Its documentation has been + expanded (as well as an explanation of TIFF structure on the source) + ** Package is no longer automatically loaded.
--- a/inst/tiff_tag_read.m +++ b/inst/tiff_tag_read.m @@ -1,4 +1,4 @@ -## Copyright (C) 2010 Carnë Draug <carandraug+dev@gmail.com> +## Copyright (C) 2010-2012 Carnë Draug <carandraug+dev@gmail.com> ## ## This program is free software; you can redistribute it and/or modify it under ## the terms of the GNU General Public License as published by the Free Software @@ -14,15 +14,63 @@ ## this program; if not, see <http://www.gnu.org/licenses/>. ## -*- texinfo -*- -## @deftypefn {Function File} [@var{value}, @var{offset}] = tiff_tag_read (@var{file}, @var{tag}, @var{ifd}) -## Reads the values of TIFF file tags. +## @deftypefn {Function File} {[@var{value}, @var{offset}] =} tiff_tag_read (@var{file}, @var{tag}) +## @deftypefnx {Function File} {[@var{value}, @var{offset}] =} tiff_tag_read (@var{file}, @var{tag}, @var{ifd}) +## @deftypefnx {Function File} {[@var{value}, @var{offset}] =} tiff_tag_read (@var{file}, @var{tag}, "all") +## Read value of @var{tag}s from TIFF files. +## +## @var{file} must be a TIFF file and @var{tag} should be a tag ID. To check +## multiple tags, @var{tag} can be a vector. If @var{ifd} is supplied, only +## those IFDs (Image File Directory) will be read. As with @var{tag}, multiple +## IFDs can be checked by using a vector or with the string `all'. By default, +## only the first IFD is read. +## +## @var{value} and @var{offset} will be a matrix with a number of rows and +## columns equal to the number of @var{tag}s and @var{ifd}s requested. The index +## relate to the same order as the input. @var{offset} has the same structure as +## @var{value} and when equal to 1 its matching value on @var{value} will be an +## offset to a position in the file. +## +## @var{tag}s that can't be found will have a value of 0 and the corresponding +## @var{offset} will be 2. +## +## If an error occurs when reading @var{file} (such as lack of permissions of file +## is not a TIFF file), @var{offset} is set to -1 and @var{value} contains the +## error message. ## -## @var{file} is a TIFF file and @var{tag} is the tag number to read. If -## @var{ifd} is given, only the tag value from that IFD (Image File Directory) -## will be read. By default, reads only the first IFD. +## See the following examples: +## @example +## @group +## ## read value of tag 258 on IFD 1 (`off' will be 1 if `val' is an offset or 2 if not found) +## [val, off] = tiff_tag_read (filepath, 258); +## @end group +## @end example ## -## @var{value} is the read value from @var{tag}. @var{offset} will be @code{1} -## if @var{value} is a file offset. +## @example +## @group +## ## read value 258, 262, 254 o IFD 1 (`val' and `off' will be a 1x3 matrix) +## [val, off] = tiff_tag_read (filepath, [258 262 254]); +## if (off(1) == -1), error ("something happpened: %s", val); endif +## off(2,1) # will be 1 if val(2,1) is an offset to a file position or 2 if tag was not found +## val(2,1) # value of tag 262 on IFD 1 +## @end group +## @end example +## +## @example +## @group +## ## read value 258, 262, 254 on the first 10 IFDs 1 (`val' and `off' will be a 1x10 matrix) +## [val, off] = tiff_tag_read (filepath, [258 262 254], 1:10); +## val(2,5) # value of tag 262 on IFD 5 +## @end group +## @end example +## +## @example +## @group +## ## read value 258, 262, 254 o IFD 1 (`val' and `off' will be a 1x3 matrix) +## [val, off] = tiff_tag_read (filepath, [258 262 254], "all"); +## val(2,end) # value of tag 262 on the last IFD +## @end group +## @end example ## ## @seealso{imread, imfinfo, readexif} ## @end deftypefn @@ -36,6 +84,26 @@ ## and the function tiff_read by F. Nedelec, EMBL (www.cytosim.org) ## * http://www.cytosim.org/misc/index.html ## +## Explanation of the TIFF file structure: +## +## The idea of multi-page images meeds to be understood first. These allow one file +## to have multiple images. This may sound strange but consider situtations such as +## an MRI scan (one file can then contain one scan which is multiple images across +## one of the axis) or time-lapse experiment (where one file is not unlike a movie). +## TIFF files support this by being like a container of single images, called IFD +## (Image File Directory). For each page there will be a single IFD. One can see a +## TIFF as an archive file of multiple images files that many times have a single file. +## +## Each TIFF file starts with a small header that identifies the file as TIFF. The +## header ends with the position on the file for the first IFD. Each IFD has multiple +## entries that hold information about the image of that IFD including where on the +## file is the actual image. Each IFD entry is identified by a tag. Each tag has a +## unique meaning; for example, the IFD entry with tag 259 will say the compression +## type (if any), of the image in that IFD. +## +## A TIFF file will always have at least one IFD and each IFD will always have at +## least one IFD entry. +## ## * On the TIFF image file header: ## bytes 00-01 --> byte order used within the file: "II" for little endian ## and "MM" for big endian byte ordering. @@ -47,14 +115,13 @@ ## ## * On a TIFF's IFD structure: ## bytes 00-01 --> number of entries (or tags or fields or directories) -## bytes 02-13 --> the entry (the tag is repeated the number of times -## specified at the start of the IFD, but always takes -## 12 bytes of size) -## bytes XX-XX --> file offset for next IFD (last 4 bytes of the IFD) +## bytes 02-13 --> the IFD entry #0 +## bytes 14+=11 -> the IFD entry #N. Each will have exactly 12 bytes (the +## number of IFD entries was specified at the start of the IFD) +## bytes XX-XX --> file offset for next IFD (last 4 bytes of the IFD) or 0 +## if it's the last IFD ## -## Note: there must be always one IFD and each IFD must have at least one entry -## -## * On an IFD entry (or TIFF's field) structure: +## * On an IFD entry structure: ## bytes 00-01 --> tag that identifies the entry ## bytes 02-03 --> entry type ## 1 --> BYTE (uint8) @@ -69,128 +136,171 @@ ## 10 --> FLOAT (single IEEE precision) ## 11 --> DOUBLE (double IEEE precision) ## bytes 04-07 --> number of values (count) -## bytes 08-11 --> file offset to the value or value (only if it fits in 4 bytes) +## bytes 08-11 --> file offset (from the beggining of file) or value (only if +## it fits in 4 bytes). It is possible that the offset is for +## a structure and not a value so we return the offset ## ## Note: file offset of the value may point anywhere in the file, even after the image. +## +## Tags numbered >= 32768 are private tags +## Tags numbered on the 65000--65535 range are reusable tags -function [value, offset] = tiff_tag_read (file, tag, ifd) +function [val, off] = tiff_tag_read (file, tag, ifd = 1) - [FID, msg] = fopen (file, "r", "native"); - if (msg != 0) - error ("Unable to fopen '%s': %s.", file, msg); + if (nargin < 2 || nargin > 3) + print_usage; + elseif (!isnumeric (tag) || !isvector (tag)) + error ("`tag' must be either a numeric scalar or vector with tags -- identifying number of a field"); + elseif (!(ischar (ifd) && strcmpi (ifd, "all")) && !(isnumeric (ifd) && isvector (ifd) && all (ifd == fix (ifd)) && all (ifd > 0))) + error ("`ifd' must be either the string `all' or numeric scalar or vector of positive integers with the IFD index"); endif - # Read byte order - byte_order = fread(FID, 2, "char=>char"); - if ( strcmp(byte_order', "II") ) + [FID, msg] = fopen (file, "r", "native"); + if (FID == -1) + [val, off] = bad_exit (FID, sprintf ("Unable to fopen '%s': %s.", file, msg)); + return + endif + + ## read byte order + byte_order = fread (FID, 2, "char=>char")'; # if we are retrieving a char, we need to transpose to get the string + if (strcmp (byte_order, "II")) arch = "ieee-le"; # IEEE little endian format - elseif ( strcmp(byte_order',"MM") ) + elseif (strcmp (byte_order,"MM")) arch = "ieee-be"; # IEEE big endian format else - error("First 2 bytes of header returned '%s'. TIFF file expects either 'II' or 'MM'.", byte_order'); + [val, off] = bad_exit (FID, sprintf ("First 2 bytes of '%s' returned '%s'. For TIFF should either be 'II' or 'MM'. Are you sure it's a TIFF.", file, byte_order)); + return endif - # Read number 42 - nTIFF = fread(FID, 1, "uint16", arch); + ## read number 42 + nTIFF = fread (FID, 1, "uint16", arch); if (nTIFF != 42) - error("This is not a TIFF file (missing 42 on header at offset 2. Instead got '%g').", tiff_id); + [val, off] = bad_exit (FID, sprintf ("'%s' is not a TIFF (missing value 42 on header at offset 2. Instead got '%g').", file, tiff_id)); + return + endif + + if (ischar (ifd) && strcmpi (ifd, "all")) + all_ifd = true; + else + all_ifd = false; endif - # Read offset and move for the first IFD - offset_IFD = fread(FID, 1, "uint32", arch); - status = fseek(FID, offset_IFD, "bof"); - if (status != 0) - error("Error on fseek when moving to first IFD."); + ## default values for val and off + def_val = 0; + def_off = 2; + + ## start output values with default values + if (ischar (ifd) && strcmpi (ifd, "all")) + val = def_val * ones (numel (tag), 1); + off = def_off * ones (numel (tag), 1); + else + val = def_val * ones (numel (tag), numel (ifd)); + off = def_off * ones (numel (tag), numel (ifd)); endif - # Read number of entries (nTag) and look for the desired tag ID - nTag = fread(FID, 1, "uint16", arch); - iTag = 0; # Tag index - while (1) # Control is made inside the loop - iTag++; - cTag = fread(FID, 1, "uint16", arch); # Tag ID - if (cTag == tag) # Tag ID was found - value = read_value (FID, arch, tag); # Read tag value - break - elseif (iTag == nTag || cTag > tag) # All tags have been read (tags are in ascendent order) - error ("Unable to find tag %g.", tag) + ## read offset for the first IFD and move into it + offset_IFD = fread (FID, 1, "uint32", arch); + + cIFD = 1; # current IFD + while (offset_IFD != 0 && (all_ifd || any (ifd >= cIFD))) + status = fseek (FID, offset_IFD, "bof"); + if (status != 0) + [val, off] = bad_exit (FID, sprintf ("error on fseek when moving to IFD #%g", cIFD)); + return + endif + + ## if checking on all IFD, add one column to the output + if (all_ifd) + val(:, end+1) = def_val; + off(:, end+1) = def_off; endif - status = fseek(FID, 10, "cof"); # Move to the next tag - if (status != 0) - error("Error on fseek when moving to tag %g of %g. Last tag read had value of %g", rTag, nTag, tag); - endif + + ## read number of entries (nTag) and look for the desired tag ID + nTag = fread (FID, 1, "uint16", arch); # number of tags in the IFD + cTag = 1; # current tag + while (nTag >= cTag) + tagID = fread (FID, 1, "uint16", arch); # current tag ID + if (any(tagID == tag)) # found one + ## column number of this IFD in the output matrix: + ## we don't know at start the number of IFD so if all IFD have been requested + ## we can't find them in `ifd', we need to set the index for output manually + if (all_ifd) + iCol = cIFD; + else + iCol = (ifd == cIFD); + endif + [val(tagID == tag, iCol), ... + off(tagID == tag, iCol) ] = read_value (FID, arch); # read tag value + elseif (all (tag < tagID)) + ## tags are in numeric order so if they wanted tags are all below current tag ID + ## we can jump over to the next IFD + skip_bytes = 10 + (12 * (nTag - cTag)); + status = fseek (FID, skip_bytes, "cof"); # Move to the next IFD + break + else + status = fseek (FID, 10, "cof"); # Move to the next tag + if (status != 0) + [val, off] = bad_exit (FID, sprintf ("error on fseek when moving out of tag #%g (tagID %g) on IFD %g.", cTag, tagID, cIFD)); + return + endif + endif + cTag++; + endwhile + + offset_IFD = fread (FID, 1, "uint32", arch); + cIFD++; endwhile - fclose (FID); - endfunction -##### -function [value, offset] = read_value (FID, arch, tag) - - tiff_type = fread(FID, 1, "uint16", arch); - count = fread(FID, 1, "uint32", arch); +function [val, off] = read_value (FID, arch) - switch (tiff_type) - case 1 # BYTE = 8-bit unsigned integer - nBytes = 1; - precision = "uint8"; - case 2 # ASCII = 8-bit byte that contains a 7-bit ASCII code; the last byte must be NUL (binary zero) - nBytes = 1; - precision = "uchar"; - case 3 # SHORT = 16-bit (2-byte) unsigned integer - nBytes = 2; - precision = "uint16"; - case 4 # LONG = 32-bit (4-byte) unsigned integer - nBytes = 4; - precision = "uint32"; - case 5 # RATIONAL = Two LONGs: the first represents the numerator of a fraction; the second, the denominator - nBytes = 8; - precision = "uint32"; - case 6 # SBYTE = An 8-bit signed (twos-complement) integer - nBytes = 1; - precision = "int8"; - case 7 # UNDEFINED = An 8-bit byte that may contain anything, depending on the definition of the field - nBytes = 1; - precision = "uchar"; - case 8 # SSHORT = A 16-bit (2-byte) signed (twos-complement) integer - nBytes = 2; - precision = "int16"; - case 9 # SLONG = A 32-bit (4-byte) signed (twos-complement) integer - nBytes = 4; - precision = "int32"; - case 10 # SRATIONAL = Two SLONG’s: the first represents the numerator of a fraction, the second the denominator - nBytes = 8; - precision = "int32"; - case 11 # FLOAT = Single precision (4-byte) IEEE format - nBytes = 4; - precision = "float32"; - case 12 # DOUBLE = Double precision (8-byte) IEEE format - nBytes = 8; - precision = "float64"; - otherwise # Warning (from TIFF file specifications): It is possible that other TIFF field types will be added in the future - error("TIFF type %i not supported", tiff_type) + position = ftell (FID); + field_type = fread (FID, 1, "uint16", arch); + count = fread (FID, 1, "uint32", arch); + switch (field_type) + case 1, nBytes = 1; precision = "uint8"; # BYTE = 8-bit unsigned integer + case 2, nBytes = 1; precision = "uchar"; # ASCII = 8-bit byte that contains a 7-bit ASCII code; the last byte must be NUL (binary zero) + case 3, nBytes = 2; precision = "uint16"; # SHORT = 16-bit (2-byte) unsigned integer + case 4, nBytes = 4; precision = "uint32"; # LONG = 32-bit (4-byte) unsigned integer + case 5, nBytes = 8; precision = "uint32"; # RATIONAL = Two LONGs: the first represents the numerator of a fraction; the second, the denominator + case 6, nBytes = 1; precision = "int8"; # SBYTE = An 8-bit signed (twos-complement) integer + case 7, nBytes = 1; precision = "uchar"; # UNDEFINED = An 8-bit byte that may contain anything, depending on the definition of the field + case 8, nBytes = 2; precision = "int16"; # SSHORT = A 16-bit (2-byte) signed (twos-complement) integer + case 9, nBytes = 4; precision = "int32"; # SLONG = A 32-bit (4-byte) signed (twos-complement) integer + case 10, nBytes = 8; precision = "int32"; # SRATIONAL = Two SLONG’s: the first represents the numerator of a fraction, the second the denominator + case 11, nBytes = 4; precision = "float32"; # FLOAT = Single precision (4-byte) IEEE format + case 12, nBytes = 8; precision = "float64"; # DOUBLE = Double precision (8-byte) IEEE format + otherwise + ## From the TIFF file specification (page 16, section 2: TIFF structure): + ## "Warning: It is possible that other TIFF field types will be added in the + ## future. Readers should skip over fields containing an unexpected field type." + ## + ## However, we only get to this point of the code if we are in the tag requested + ## by the use so it makes sense to error if we don't supported it yet. + error ("TIFF type %i not supported", field_type); endswitch - if ( (nBytes*count) > 4 ) # If it doesn't fit in 4 bytes, it's an offset - offset = 1; - value = fread(FID, 1, "uint32", arch); - ## The file offset must be an even number - if ( rem(value,2) != 0 ) - error("Couldn't find correct value offset for tag %g", tag); + if ((nBytes*count) > 4) + off = true; + val = fread (FID, 1, "uint32", arch); + if (rem (val, 2) != 0) # file offset must be an even number + warning ("Found an offset with an odd value %g (offsets should always be even numbers.", val); endif else - offset = 0; - ## read here + off = false; switch precision - case { 5, 10 } - value = fread(FID, 2*count, precision, arch); - otherwise - value = fread(FID, count, precision, arch); + case {5, 10} val = fread (FID, 2*count, precision, arch); val = val(1)/val(2); # the first represents the numerator of a fraction; the second, the denominator + case {2} val = fread (FID, count, [precision "=>char"], arch)'; # if we are retrieving a char, we need to transpose to get the string + otherwise val = fread (FID, count, precision, arch); endswitch - if (precision == 2) - value = char(value'); - endif + ## adjust position to end of IFD entry (not all take up 4 Bytes) + fseek (FID, 4 - (nBytes*count), "cof"); endif +endfunction +function [val, off] = bad_exit (FID, msg) + off = -1; + val = sprintf (msg); + fclose (FID); endfunction