changeset 563:d2a6dc5e921e

tiff_tag_read: deal with multiple tag and IFD beyond the first and improved documentation
author carandraug
date Wed, 18 Apr 2012 20:56:52 +0000
parents 36627778a40e
children 7db61a0a2632
files NEWS inst/tiff_tag_read.m
diffstat 2 files changed, 224 insertions(+), 109 deletions(-) [+]
line wrap: on
line diff
--- a/NEWS
+++ b/NEWS
@@ -63,4 +63,9 @@
 
  ** `isrgb' now returns false for logical matrix.
 
+ ** `tiff_tag_read' had several bug fixes and can now check IFDs beyond
+    the first. It can also accept mutiple tag values and IFDs simultaneously
+    and return a matrix of the values found. Its documentation has been
+    expanded (as well as an explanation of TIFF structure on the source)
+
  ** Package is no longer automatically loaded.
--- a/inst/tiff_tag_read.m
+++ b/inst/tiff_tag_read.m
@@ -1,4 +1,4 @@
-## Copyright (C) 2010 Carnë Draug <carandraug+dev@gmail.com>
+## Copyright (C) 2010-2012 Carnë Draug <carandraug+dev@gmail.com>
 ##
 ## This program is free software; you can redistribute it and/or modify it under
 ## the terms of the GNU General Public License as published by the Free Software
@@ -14,15 +14,63 @@
 ## this program; if not, see <http://www.gnu.org/licenses/>.
 
 ## -*- texinfo -*-
-## @deftypefn {Function File} [@var{value}, @var{offset}] = tiff_tag_read (@var{file}, @var{tag}, @var{ifd})
-## Reads the values of TIFF file tags.
+## @deftypefn {Function File} {[@var{value}, @var{offset}] =} tiff_tag_read (@var{file}, @var{tag})
+## @deftypefnx {Function File} {[@var{value}, @var{offset}] =} tiff_tag_read (@var{file}, @var{tag}, @var{ifd})
+## @deftypefnx {Function File} {[@var{value}, @var{offset}] =} tiff_tag_read (@var{file}, @var{tag}, "all")
+## Read value of @var{tag}s from TIFF files.
+##
+## @var{file} must be a TIFF file and @var{tag} should be a tag ID. To check
+## multiple tags, @var{tag} can be a vector. If @var{ifd} is supplied, only
+## those IFDs (Image File Directory) will be read. As with @var{tag}, multiple
+## IFDs can be checked by using a vector or with the string `all'. By default,
+## only the first IFD is read.
+##
+## @var{value} and @var{offset} will be a matrix with a number of rows and
+## columns equal to the number of @var{tag}s and @var{ifd}s requested. The index
+## relate to the same order as the input. @var{offset} has the same structure as
+## @var{value} and when equal to 1 its matching value on @var{value} will be an
+## offset to a position in the file.
+##
+## @var{tag}s that can't be found will have a value of 0 and the corresponding
+## @var{offset} will be 2.
+##
+## If an error occurs when reading @var{file} (such as lack of permissions of file
+## is not a TIFF file), @var{offset} is set to -1 and @var{value} contains the
+## error message.
 ##
-## @var{file} is a TIFF file and @var{tag} is the tag number to read. If
-## @var{ifd} is given, only the tag value from that IFD (Image File Directory)
-## will be read. By default, reads only the first IFD.
+## See the following examples:
+## @example
+## @group
+## ## read value of tag 258 on IFD 1 (`off' will be 1 if `val' is an offset or 2 if not found)
+## [val, off] = tiff_tag_read (filepath, 258);
+## @end group
+## @end example
 ##
-## @var{value} is the read value from @var{tag}. @var{offset} will be @code{1}
-## if @var{value} is a file offset. 
+## @example
+## @group
+## ## read value 258, 262, 254 o IFD 1 (`val' and `off' will be a 1x3 matrix)
+## [val, off] = tiff_tag_read (filepath, [258 262 254]);
+## if (off(1) == -1), error ("something happpened: %s", val); endif
+## off(2,1)   # will be 1 if val(2,1) is an offset to a file position or 2 if tag was not found
+## val(2,1)   # value of tag 262 on IFD 1
+## @end group
+## @end example
+##
+## @example
+## @group
+## ## read value 258, 262, 254 on the first 10 IFDs 1 (`val' and `off' will be a 1x10 matrix)
+## [val, off] = tiff_tag_read (filepath, [258 262 254], 1:10);
+## val(2,5)   # value of tag 262 on IFD 5
+## @end group
+## @end example
+##
+## @example
+## @group
+## ## read value 258, 262, 254 o IFD 1 (`val' and `off' will be a 1x3 matrix)
+## [val, off] = tiff_tag_read (filepath, [258 262 254], "all");
+## val(2,end)   # value of tag 262 on the last IFD
+## @end group
+## @end example
 ##
 ## @seealso{imread, imfinfo, readexif}
 ## @end deftypefn
@@ -36,6 +84,26 @@
 ## and the function tiff_read by F. Nedelec, EMBL (www.cytosim.org)
 ##  * http://www.cytosim.org/misc/index.html
 ##
+## Explanation of the TIFF file structure:
+##
+## The idea of multi-page images meeds to be understood first. These allow one file
+## to have multiple images. This may sound strange but consider situtations such as
+## an MRI scan (one file can then contain one scan which is multiple images across
+## one of the axis) or time-lapse experiment (where one file is not unlike a movie).
+## TIFF files support this by being like a container of single images, called IFD
+## (Image File Directory). For each page there will be a single IFD. One can see a
+## TIFF as an archive file of multiple images files that many times have a single file.
+##
+## Each TIFF file starts with a small header that identifies the file as TIFF. The
+## header ends with the position on the file for the first IFD. Each IFD has multiple
+## entries that hold information about the image of that IFD including where on the
+## file is the actual image. Each IFD entry is identified by a tag. Each tag has a
+## unique meaning; for example, the IFD entry with tag 259 will say the compression
+## type (if any), of the image in that IFD.
+##
+## A TIFF file will always have at least one IFD and each IFD will always have at
+## least one IFD entry.
+##
 ## * On the TIFF image file header:
 ##     bytes 00-01 --> byte order used within the file: "II" for little endian
 ##                     and "MM" for big endian byte ordering.
@@ -47,14 +115,13 @@
 ##
 ## * On a TIFF's IFD structure:
 ##     bytes 00-01 --> number of entries (or tags or fields or directories)
-##     bytes 02-13 --> the entry (the tag is repeated the number of times
-##                     specified at the start of the IFD, but always takes
-##                     12 bytes of size)
-##     bytes XX-XX --> file offset for next IFD (last 4 bytes of the IFD)
+##     bytes 02-13 --> the IFD entry #0
+##     bytes 14+=11 -> the IFD entry #N. Each will have exactly 12 bytes (the
+##                     number of IFD entries was specified at the start of the IFD)
+##     bytes XX-XX --> file offset for next IFD (last 4 bytes of the IFD) or 0
+##                     if it's the last IFD
 ##
-##   Note: there must be always one IFD and each IFD must have at least one entry
-##
-## * On an IFD entry (or TIFF's field) structure:
+## * On an IFD entry structure:
 ##     bytes 00-01 --> tag that identifies the entry
 ##     bytes 02-03 --> entry type
 ##                      1  --> BYTE (uint8)
@@ -69,128 +136,171 @@
 ##                      10 --> FLOAT (single IEEE precision)
 ##                      11 --> DOUBLE (double IEEE precision)
 ##     bytes 04-07 --> number of values (count)
-##     bytes 08-11 --> file offset to the value or value (only if it fits in 4 bytes)
+##     bytes 08-11 --> file offset (from the beggining of file) or value (only if
+##                     it fits in 4 bytes). It is possible that the offset is for
+##                     a structure and not a value so we return the offset
 ##
 ##   Note: file offset of the value may point anywhere in the file, even after the image.
+##
+## Tags numbered >= 32768 are private tags
+## Tags numbered on the 65000--65535 range are reusable tags
 
-function [value, offset] = tiff_tag_read (file, tag, ifd)
+function [val, off] = tiff_tag_read (file, tag, ifd = 1)
 
-  [FID, msg] = fopen (file, "r", "native");
-  if (msg != 0)
-    error ("Unable to fopen '%s': %s.", file, msg);
+  if (nargin < 2 || nargin > 3)
+    print_usage;
+  elseif (!isnumeric (tag) || !isvector (tag))
+    error ("`tag' must be either a numeric scalar or vector with tags -- identifying number of a field");
+  elseif (!(ischar (ifd) && strcmpi (ifd, "all")) && !(isnumeric (ifd) && isvector (ifd) && all (ifd == fix (ifd)) && all (ifd > 0)))
+    error ("`ifd' must be either the string `all' or numeric scalar or vector of positive integers with the IFD index");
   endif
 
-  # Read byte order
-  byte_order = fread(FID, 2, "char=>char");
-  if ( strcmp(byte_order', "II") )
+  [FID, msg] = fopen (file, "r", "native");
+  if (FID == -1)
+    [val, off] = bad_exit (FID, sprintf ("Unable to fopen '%s': %s.", file, msg));
+    return
+  endif
+
+  ## read byte order
+  byte_order = fread (FID, 2, "char=>char")';     # if we are retrieving a char, we need to transpose to get the string
+  if     (strcmp (byte_order, "II"))
     arch = "ieee-le";                             # IEEE little endian format
-  elseif ( strcmp(byte_order',"MM") )
+  elseif (strcmp (byte_order,"MM"))
     arch = "ieee-be";                             # IEEE big endian format
   else
-    error("First 2 bytes of header returned '%s'. TIFF file expects either 'II' or 'MM'.", byte_order');
+    [val, off] = bad_exit (FID, sprintf ("First 2 bytes of '%s' returned '%s'. For TIFF should either be 'II' or 'MM'. Are you sure it's a TIFF.", file, byte_order));
+    return
   endif
 
-  # Read number 42
-  nTIFF = fread(FID, 1, "uint16", arch);
+  ## read number 42
+  nTIFF = fread (FID, 1, "uint16", arch);
   if (nTIFF != 42)
-    error("This is not a TIFF file (missing 42 on header at offset 2. Instead got '%g').", tiff_id);
+    [val, off] = bad_exit (FID, sprintf ("'%s' is not a TIFF (missing value 42 on header at offset 2. Instead got '%g').", file, tiff_id));
+    return
+  endif
+
+  if (ischar (ifd) && strcmpi (ifd, "all"))
+    all_ifd = true;
+  else
+    all_ifd = false;
   endif
 
-  # Read offset and move for the first IFD
-  offset_IFD = fread(FID, 1, "uint32", arch);
-  status = fseek(FID, offset_IFD, "bof");
-  if (status != 0)
-      error("Error on fseek when moving to first IFD.");
+  ## default values for val and off
+  def_val = 0;
+  def_off = 2;
+
+  ## start output values with default values
+  if (ischar (ifd) && strcmpi (ifd, "all"))
+    val = def_val * ones (numel (tag), 1);
+    off = def_off * ones (numel (tag), 1);
+  else
+    val = def_val * ones (numel (tag), numel (ifd));
+    off = def_off * ones (numel (tag), numel (ifd));
   endif
 
-  # Read number of entries (nTag) and look for the desired tag ID
-  nTag = fread(FID, 1, "uint16", arch);
-  iTag = 0;                                       # Tag index
-  while (1)                                       # Control is made inside the loop
-    iTag++;
-    cTag = fread(FID, 1, "uint16", arch);         # Tag ID
-    if (cTag == tag)                              # Tag ID was found
-      value = read_value (FID, arch, tag);        # Read tag value
-      break
-    elseif (iTag == nTag || cTag > tag)           # All tags have been read (tags are in ascendent order)
-      error ("Unable to find tag %g.", tag)
+  ## read offset for the first IFD and move into it
+  offset_IFD = fread (FID, 1, "uint32", arch);
+
+  cIFD = 1;   # current IFD
+  while (offset_IFD != 0 && (all_ifd || any (ifd >= cIFD)))
+    status = fseek (FID, offset_IFD, "bof");
+    if (status != 0)
+      [val, off] = bad_exit (FID, sprintf ("error on fseek when moving to IFD #%g", cIFD));
+      return
+    endif
+
+    ## if checking on all IFD, add one column to the output
+    if (all_ifd)
+      val(:, end+1) = def_val;
+      off(:, end+1) = def_off;
     endif
-    status = fseek(FID, 10, "cof");               # Move to the next tag
-    if (status != 0)
-      error("Error on fseek when moving to tag %g of %g. Last tag read had value of %g", rTag, nTag, tag);
-    endif
+
+    ## read number of entries (nTag) and look for the desired tag ID
+    nTag = fread (FID, 1, "uint16", arch);          # number of tags in the IFD
+    cTag = 1;                                       # current tag
+    while (nTag >= cTag)
+      tagID = fread (FID, 1, "uint16", arch);       # current tag ID
+      if (any(tagID == tag))                        # found one
+        ## column number of this IFD in the output matrix:
+        ## we don't know at start the number of IFD so if all IFD have been requested
+        ## we can't find them in `ifd', we need to set the index for output manually
+        if (all_ifd)
+          iCol = cIFD;
+        else
+          iCol = (ifd == cIFD);
+        endif
+        [val(tagID == tag, iCol), ...
+         off(tagID == tag, iCol) ] = read_value (FID, arch); # read tag value
+      elseif (all (tag < tagID))
+        ## tags are in numeric order so if they wanted tags are all below current tag ID
+        ## we can jump over to the next IFD
+        skip_bytes = 10 + (12 * (nTag - cTag));
+        status = fseek (FID, skip_bytes, "cof");    # Move to the next IFD
+        break
+      else
+        status = fseek (FID, 10, "cof");            # Move to the next tag
+        if (status != 0)
+          [val, off] = bad_exit (FID, sprintf ("error on fseek when moving out of tag #%g (tagID %g) on IFD %g.", cTag, tagID, cIFD));
+          return
+        endif
+      endif
+      cTag++;
+    endwhile
+
+    offset_IFD = fread (FID, 1, "uint32", arch);
+    cIFD++;
   endwhile
-
   fclose (FID);
-
 endfunction
 
-#####
-function [value, offset] = read_value (FID, arch, tag)
-
-  tiff_type = fread(FID, 1, "uint16", arch);
-  count     = fread(FID, 1, "uint32", arch);
+function [val, off] = read_value (FID, arch)
 
-  switch (tiff_type)
-    case 1      # BYTE      = 8-bit unsigned integer
-      nBytes    = 1;
-      precision = "uint8";
-    case 2      # ASCII     = 8-bit byte that contains a 7-bit ASCII code; the last byte must be NUL (binary zero)
-      nBytes    = 1;
-      precision = "uchar";
-    case 3      # SHORT     = 16-bit (2-byte) unsigned integer
-      nBytes    = 2;
-      precision = "uint16";
-    case 4      # LONG      = 32-bit (4-byte) unsigned integer
-      nBytes    = 4;
-      precision = "uint32";
-    case 5      # RATIONAL  = Two LONGs: the first represents the numerator of a fraction; the second, the denominator
-      nBytes    = 8;
-      precision = "uint32";
-    case 6      # SBYTE     = An 8-bit signed (twos-complement) integer
-      nBytes    = 1;
-      precision = "int8";
-    case 7      # UNDEFINED = An 8-bit byte that may contain anything, depending on the definition of the field
-      nBytes    = 1;
-      precision = "uchar";
-    case 8      # SSHORT    = A 16-bit (2-byte) signed (twos-complement) integer
-      nBytes    = 2;
-      precision = "int16";
-    case 9      # SLONG     = A 32-bit (4-byte) signed (twos-complement) integer
-      nBytes    = 4;
-      precision = "int32";
-    case 10     # SRATIONAL = Two SLONG’s: the first represents the numerator of a fraction, the second the denominator
-      nBytes    = 8;
-      precision = "int32";
-    case 11     # FLOAT     = Single precision (4-byte) IEEE format
-      nBytes    = 4;
-      precision = "float32";
-    case 12     # DOUBLE    = Double precision (8-byte) IEEE format
-      nBytes   = 8;
-      precision = "float64";
-    otherwise # Warning (from TIFF file specifications): It is possible that other TIFF field types will be added in the future
-      error("TIFF type %i not supported", tiff_type)
+  position   = ftell (FID);
+  field_type = fread (FID, 1, "uint16", arch);
+  count      = fread (FID, 1, "uint32", arch);
+  switch (field_type)
+    case  1,  nBytes = 1; precision = "uint8";    # BYTE      = 8-bit unsigned integer
+    case  2,  nBytes = 1; precision = "uchar";    # ASCII     = 8-bit byte that contains a 7-bit ASCII code; the last byte must be NUL (binary zero)
+    case  3,  nBytes = 2; precision = "uint16";   # SHORT     = 16-bit (2-byte) unsigned integer
+    case  4,  nBytes = 4; precision = "uint32";   # LONG      = 32-bit (4-byte) unsigned integer
+    case  5,  nBytes = 8; precision = "uint32";   # RATIONAL  = Two LONGs: the first represents the numerator of a fraction; the second, the denominator
+    case  6,  nBytes = 1; precision = "int8";     # SBYTE     = An 8-bit signed (twos-complement) integer
+    case  7,  nBytes = 1; precision = "uchar";    # UNDEFINED = An 8-bit byte that may contain anything, depending on the definition of the field
+    case  8,  nBytes = 2; precision = "int16";    # SSHORT    = A 16-bit (2-byte) signed (twos-complement) integer
+    case  9,  nBytes = 4; precision = "int32";    # SLONG     = A 32-bit (4-byte) signed (twos-complement) integer
+    case 10,  nBytes = 8; precision = "int32";    # SRATIONAL = Two SLONG’s: the first represents the numerator of a fraction, the second the denominator
+    case 11,  nBytes = 4; precision = "float32";  # FLOAT     = Single precision (4-byte) IEEE format
+    case 12,  nBytes = 8; precision = "float64";  # DOUBLE    = Double precision (8-byte) IEEE format
+    otherwise
+      ## From the TIFF file specification (page 16, section 2: TIFF structure):
+      ## "Warning: It is possible that other TIFF field types will be added in the
+      ## future. Readers should skip over fields containing an unexpected field type."
+      ##
+      ## However, we only get to this point of the code if we are in the tag requested
+      ## by the use so it makes sense to error if we don't supported it yet.
+      error ("TIFF type %i not supported", field_type);
   endswitch
 
-  if ( (nBytes*count) > 4 )    # If it doesn't fit in 4 bytes, it's an offset
-    offset = 1;
-    value = fread(FID, 1, "uint32", arch);
-    ## The file offset must be an even number
-    if ( rem(value,2) != 0 )
-      error("Couldn't find correct value offset for tag %g", tag);
+  if ((nBytes*count) > 4)
+    off = true;
+    val = fread (FID, 1, "uint32", arch);
+    if (rem (val, 2) != 0)  # file offset must be an even number
+      warning ("Found an offset with an odd value %g (offsets should always be even numbers.", val);
     endif
   else
-    offset = 0;
-    ## read here
+    off = false;
     switch precision
-      case { 5, 10 }
-        value = fread(FID, 2*count, precision, arch);
-      otherwise
-        value = fread(FID, count, precision, arch);
+      case {5, 10}    val = fread (FID, 2*count, precision, arch); val = val(1)/val(2);   # the first represents the numerator of a fraction; the second, the denominator
+      case {2}        val = fread (FID, count, [precision "=>char"], arch)';                    # if we are retrieving a char, we need to transpose to get the string
+      otherwise       val = fread (FID, count, precision, arch);
     endswitch
-    if (precision == 2)
-        value = char(value');
-    endif
+    ## adjust position to end of IFD entry (not all take up 4 Bytes)
+    fseek (FID, 4 - (nBytes*count), "cof");
   endif
+endfunction
 
+function [val, off] = bad_exit (FID, msg)
+  off = -1;
+  val = sprintf (msg);
+  fclose (FID);
 endfunction