changeset 12954:a451ae847adb

Collect hierarchical data profile. * profiler.h: Retructure whole data collection. * profiler.cc: Ditto for implementation.
author Daniel Kraft <d@domob.eu>
date Thu, 11 Aug 2011 17:55:23 +0200
parents 1be5f06d9fa7
children 05941540287c
files src/profiler.cc src/profiler.h
diffstat 2 files changed, 273 insertions(+), 119 deletions(-) [+]
line wrap: on
line diff
--- a/src/profiler.cc
+++ b/src/profiler.cc
@@ -24,6 +24,7 @@
 #include <config.h>
 #endif
 
+#include <cstddef>
 #include <iostream>
 
 #include "defun.h"
@@ -56,11 +57,8 @@
     parents (), children ()
 {}
 
-// With the help of a mapping name -> index, convert a function_set list
-// to an Octave array of indices.
 octave_value
-profile_data_accumulator::stats::function_set_value (const function_set& list,
-                                                     const fcn_index_map& idx)
+profile_data_accumulator::stats::function_set_value (const function_set& list)
 {
   const octave_idx_type n = list.size ();
 
@@ -68,9 +66,7 @@
   octave_idx_type i = 0;
   for (function_set::const_iterator p = list.begin (); p != list.end (); ++p)
     {
-      fcn_index_map::const_iterator q = idx.find (*p);
-      assert (q != idx.end ());
-      retval (i) = q->second;
+      retval(i) = *p;
       ++i;
     }
   assert (i == n);
@@ -78,19 +74,144 @@
   return retval;
 }
 
+profile_data_accumulator::tree_node::tree_node (tree_node* p, octave_idx_type f)
+  : parent (p), fcn_id (f), children (), time (0.0), calls (0)
+{}
+
+profile_data_accumulator::tree_node::~tree_node ()
+{
+  for (child_map::iterator i = children.begin (); i != children.end (); ++i)
+    delete i->second;
+}
+
+profile_data_accumulator::tree_node*
+profile_data_accumulator::tree_node::enter (octave_idx_type fcn)
+{
+  tree_node* retval;
+
+  child_map::iterator pos = children.find (fcn);
+  if (pos == children.end ())
+    {
+      retval = new tree_node (this, fcn);
+      children[fcn] = retval;
+    }
+  else
+    retval = pos->second;
+
+  ++retval->calls;
+  return retval;
+}
+
+profile_data_accumulator::tree_node*
+profile_data_accumulator::tree_node::exit (octave_idx_type fcn)
+{
+  assert (parent);
+  assert (fcn_id == fcn);
+
+  return parent;
+}
+
+void
+profile_data_accumulator::tree_node::build_flat (flat_profile& data) const
+{
+  // If this is not the top-level node, update profile entry for this function.
+  if (fcn_id != 0)
+    {
+      stats& entry = data[fcn_id - 1];
+      
+      entry.time += time;
+      entry.calls += calls;
+
+      assert (parent);
+      if (parent->fcn_id != 0)
+        {
+          entry.parents.insert (parent->fcn_id);
+          data[parent->fcn_id - 1].children.insert (fcn_id);
+        }
+
+      if (!entry.recursive)
+        for (const tree_node* i = parent; i; i = i->parent)
+          if (i->fcn_id == fcn_id)
+            {
+              entry.recursive = true;
+              break;
+            }
+    }
+
+  // Recurse on children.
+  for (child_map::const_iterator i = children.begin ();
+       i != children.end (); ++i)
+    i->second->build_flat (data);
+}
+
+octave_value
+profile_data_accumulator::tree_node::get_hierarchical (void) const
+{
+  /* Note that we don't generate the entry just for this node, but rather
+     a struct-array with entries for all children.  This way, the top-node
+     (for which we don't want a real entry) generates already the final
+     hierarchical profile data.  */
+
+  const octave_idx_type n = children.size ();
+
+  Cell rv_indices (n, 1);
+  Cell rv_times (n, 1);
+  Cell rv_calls (n, 1);
+  Cell rv_children (n, 1);
+
+  octave_idx_type i = 0;
+  for (child_map::const_iterator p = children.begin ();
+       p != children.end (); ++p)
+    {
+      const tree_node& entry = *p->second;
+
+      rv_indices(i) = octave_value (p->first);
+      rv_times(i) = octave_value (entry.time);
+      rv_calls(i) = octave_value (entry.calls);
+      rv_children(i) = entry.get_hierarchical ();
+
+      ++i;
+    }
+  assert (i == n);
+
+  Octave_map retval;
+
+  retval.assign ("Index", rv_indices);
+  retval.assign ("SelfTime", rv_times);
+  retval.assign ("NumCalls", rv_calls);
+  retval.assign ("Children", rv_children);
+
+  return retval;
+}
+
 profile_data_accumulator::profile_data_accumulator ()
-  : enabled (false), call_stack (), data (), last_time (-1.0)
+  : known_functions (), fcn_index (),
+    enabled (false), call_tree (NULL), last_time (-1.0)
 {}
 
+profile_data_accumulator::~profile_data_accumulator ()
+{
+  if (call_tree)
+    delete call_tree;
+}
+
 void
 profile_data_accumulator::set_active (bool value)
 {
-  // If we enable, clear the call-stack.  This ensures we freshly start
-  // with collecting times now.
   if (value)
     {
-      while (!call_stack.empty ())
-        call_stack.pop_back ();
+      // Create a call-tree top-node if there isn't yet one.
+      if (!call_tree)
+        call_tree = new tree_node (NULL, 0);
+
+      // Let the top-node be the active one.  This ensures we have a clean
+      // fresh start collecting times.
+      active_fcn = call_tree;
+    }
+  else
+    {
+      // Make sure we start with fresh timing if we're re-enabled later.
+      last_time = -1.0;
     }
 
   enabled = value;
@@ -101,39 +222,34 @@
 {
   // The enter class will check and only call us if the profiler is active.
   assert (is_active ());
+  assert (call_tree);
 
   // If there is already an active function, add to its time before
   // pushing the new one.
-  if (!call_stack.empty ())
+  if (active_fcn != call_tree)
     add_current_time ();
 
-  // Update non-timing related data for the function entered.
-  stats& entry = data[fcn];
-  ++entry.calls;
-  if (!call_stack.empty ())
+  // Map the function's name to its index.
+  octave_idx_type fcn_idx;
+  fcn_index_map::iterator pos = fcn_index.find (fcn);
+  if (pos == fcn_index.end ())
     {
-      const std::string parent_name = call_stack.back ();
-      entry.parents.insert (parent_name);
-      data[parent_name].children.insert (fcn);
+      known_functions.push_back (fcn);
+      fcn_idx = known_functions.size ();
+      fcn_index[fcn] = fcn_idx;
     }
-  if (!entry.recursive)
-    for (call_stack_type::iterator i = call_stack.begin ();
-         i != call_stack.end (); ++i)
-      if (*i == fcn)
-        {
-          entry.recursive = true;
-          break;
-        }
+  else
+    fcn_idx = pos->second;
 
-  call_stack.push_back (fcn);
+  active_fcn = active_fcn->enter (fcn_idx);
   last_time = query_time ();
 }
 
 void
 profile_data_accumulator::exit_function (const std::string& fcn)
 {
-  assert (!call_stack.empty ());
-  assert (fcn == call_stack.back ());
+  assert (call_tree);
+  assert (active_fcn != call_tree);
 
   // Usually, if we are disabled this function is not even called.  But the
   // call disabling the profiler is an exception.  So also check here
@@ -141,7 +257,9 @@
   if (is_active ())
     add_current_time ();
 
-  call_stack.pop_back ();
+  fcn_index_map::iterator pos = fcn_index.find (fcn);
+  assert (pos != fcn_index.end ());
+  active_fcn = active_fcn->exit (pos->second);
 
   // If this was an "inner call", we resume executing the parent function
   // up the stack.  So note the start-time for this!
@@ -157,26 +275,26 @@
       return;
     }
 
-  data.clear ();
+  known_functions.clear ();
+  fcn_index.clear ();
+
+  if (call_tree)
+    {
+      delete call_tree;
+      call_tree = NULL;
+    }
+
   last_time = -1.0;
 }
 
 octave_value
-profile_data_accumulator::get_data (void) const
+profile_data_accumulator::get_flat (void) const
 {
-  const octave_idx_type n = data.size ();
+  const octave_idx_type n = known_functions.size ();
 
-  // For the parent/child data, we need to map function key-names
-  // to the indices they correspond to in the output array.  Find them out
-  // in a preparation step.
-  fcn_index_map fcn_indices;
-  octave_idx_type i = 0;
-  for (stats_map::const_iterator p = data.begin (); p != data.end (); ++p)
-    {
-      fcn_indices[p->first] = i + 1;
-      ++i;
-    }
-  assert (i == n);
+  flat_profile flat (n);
+  assert (call_tree);
+  call_tree->build_flat (flat);
 
   Cell rv_names (n, 1);
   Cell rv_times (n, 1);
@@ -185,21 +303,15 @@
   Cell rv_parents (n, 1);
   Cell rv_children (n, 1);
 
-  i = 0;
-  for (stats_map::const_iterator p = data.begin (); p != data.end (); ++p)
+  for (octave_idx_type i = 0; i != n; ++i)
     {
-      const stats& entry = p->second;
-
-      rv_names (i) = octave_value (p->first);
-      rv_times (i) = octave_value (entry.time);
-      rv_calls (i) = octave_value (entry.calls);
-      rv_recursive (i) = octave_value (entry.recursive);
-      rv_parents (i) = stats::function_set_value (entry.parents, fcn_indices);
-      rv_children (i) = stats::function_set_value (entry.children, fcn_indices);
-
-      ++i;
+      rv_names(i) = octave_value (known_functions[i]);
+      rv_times(i) = octave_value (flat[i].time);
+      rv_calls(i) = octave_value (flat[i].calls);
+      rv_recursive(i) = octave_value (flat[i].recursive);
+      rv_parents(i) = stats::function_set_value (flat[i].parents);
+      rv_children(i) = stats::function_set_value (flat[i].children);
     }
-  assert (i == n);
 
   Octave_map retval;
 
@@ -213,6 +325,13 @@
   return retval;
 }
 
+octave_value
+profile_data_accumulator::get_hierarchical (void) const
+{
+  assert (call_tree);
+  return call_tree->get_hierarchical ();
+}
+
 double
 profile_data_accumulator::query_time (void) const
 {
@@ -226,14 +345,8 @@
   const double t = query_time ();
   assert (last_time >= 0.0 && last_time <= t);
 
-  assert (!call_stack.empty ());
-  const std::string name = call_stack.back ();
-
-  // The entry for this function should already be created; namely
-  // when entering the function via the non-timing data collection!
-  stats_map::iterator pos = data.find (name);
-  assert (pos != data.end ());
-  pos->second.time += t - last_time;
+  assert (call_tree && active_fcn != call_tree);
+  active_fcn->add_time (t - last_time);
 }
 
 profile_data_accumulator profiler;
@@ -283,7 +396,7 @@
 }
 
 // Query the timings collected by the profiler.
-DEFUN (__profiler_data, args, ,
+DEFUN (__profiler_data, args, nargout,
   "-*- texinfo -*-\n\
 @deftypefn {Function File} __profiler_data ()\n\
 Undocumented internal function.\n\
@@ -295,7 +408,9 @@
   if (nargin > 0)
     warning ("profiler_data: ignoring extra arguments");
 
-  retval(0) = profiler.get_data ();
+  retval(0) = profiler.get_flat ();
+  if (nargout > 1)
+    retval(1) = profiler.get_hierarchical ();
 
   return retval;
 }
--- a/src/profiler.h
+++ b/src/profiler.h
@@ -40,72 +40,113 @@
   // functions in a manner protected from stack unwinding.
   class enter
   {
-    private:
-
-      profile_data_accumulator& acc;
+  private:
 
-      std::string fcn;
+    profile_data_accumulator& acc;
+    std::string fcn;
 
-    public:
-
-      enter (profile_data_accumulator&, const std::string&);
+  public:
 
-      virtual ~enter (void);
-
-    private:
+    enter (profile_data_accumulator&, const std::string&);
+    virtual ~enter (void);
 
-      // No copying!
+  private:
 
-      enter (const enter&);
-
-      enter& operator = (const enter&);
+    // No copying!
+    enter (const enter&);
+    enter& operator = (const enter&);
   };
 
   profile_data_accumulator (void);
+  virtual ~profile_data_accumulator ();
 
   bool is_active (void) const { return enabled; }
-
   void set_active (bool);
 
   void reset (void);
 
-  octave_value get_data (void) const;
+  octave_value get_flat (void) const;
+  octave_value get_hierarchical (void) const;
 
 private:
 
-  typedef std::set<std::string> function_set;
+  // One entry in the flat profile (i.e., a collection of data for a single
+  // function).  This is filled in when building the flat profile from the
+  // hierarchical call tree.
+  struct stats
+  {
+    stats ();
+
+    double time;
+    unsigned calls;
+
+    bool recursive;
+
+    typedef std::set<octave_idx_type> function_set;
+    function_set parents;
+    function_set children;
+
+    // Convert a function_set list to an Octave array of indices.
+    static octave_value function_set_value (const function_set&);
+  };
+
+  typedef std::vector<stats> flat_profile;
+  
+  // Store data for one node in the call-tree of the hierarchical profiler
+  // data we collect.
+  class tree_node
+  {
+  public:
+
+    tree_node (tree_node*, octave_idx_type);
+    virtual ~tree_node ();
+
+    void add_time (double dt) { time += dt; }
+
+    // Enter a child function.  It is created in the list of children if it
+    // wasn't already there.  The now-active child node is returned.
+    tree_node* enter (octave_idx_type);
+
+    // Exit function.  As a sanity-check, it is verified that the currently
+    // active function actually is the one handed in here.  Returned is the
+    // then-active node, which is our parent.
+    tree_node* exit (octave_idx_type);
+
+    void build_flat (flat_profile&) const;
+    octave_value get_hierarchical (void) const;
+
+  private:
+
+    tree_node* parent;
+    octave_idx_type fcn_id;
+
+    typedef std::map<octave_idx_type, tree_node*> child_map;
+    child_map children;
+
+    // This is only time spent *directly* on this level, excluding children!
+    double time;
+
+    unsigned calls;
+
+    // No copying!
+    tree_node (const tree_node&);
+    tree_node& operator = (const tree_node&);
+  };
+
+  // Each function we see in the profiler is given a unique index (which
+  // simply counts starting from 1).  We thus have to map profiler-names to
+  // those indices.  For all other stuff, we identify functions by their index.
+
+  typedef std::vector<std::string> function_set;
   typedef std::map<std::string, octave_idx_type> fcn_index_map;
 
-  // Store some statistics data collected for a function.
-  class stats
-  {
-    private:
-
-      double time;
-      unsigned calls;
-
-      bool recursive;
-
-      function_set parents;
-      function_set children;
-
-    public:
-
-      stats ();
-
-      static octave_value
-      function_set_value (const function_set&, const fcn_index_map&);
-
-      friend class profile_data_accumulator;
-  };
+  function_set known_functions;
+  fcn_index_map fcn_index;
 
   bool enabled;
 
-  typedef std::vector<std::string> call_stack_type;
-  call_stack_type call_stack;
-
-  typedef std::map<std::string, stats> stats_map;
-  stats_map data;
+  tree_node* call_tree;
+  tree_node* active_fcn;
 
   // Store last timestamp we had, when the currently active function was called.
   double last_time;
@@ -117,19 +158,17 @@
 
   // Query a timestamp, used for timing calls (obviously).
   // This is not static because in the future, maybe we want a flag
-  // in the profiler or something to choose between cputime, wall-time
+  // in the profiler or something to choose between cputime, wall-time,
   // user-time, system-time, ...
   double query_time () const;
 
-  // Add the time elapsed since last_time to the function on the top
-  // of our call-stack.  This is called from two different positions,
-  // thus it is useful to have it as a seperate function.
+  // Add the time elapsed since last_time to the function we're currently in.
+  // This is called from two different positions, thus it is useful to have
+  // it as a seperate function.
   void add_current_time (void);
 
   // No copying!
-
   profile_data_accumulator (const profile_data_accumulator&);
-
   profile_data_accumulator& operator = (const profile_data_accumulator&);
 };