changeset 11704:d8024cba4b63

Speed up approximate search for matching ChangeLog entries.
author Bruno Haible <bruno@clisp.org>
date Thu, 02 Jul 2009 23:33:11 +0200
parents 89b12821209e
children d93d5b006ffb
files ChangeLog lib/git-merge-changelog.c
diffstat 2 files changed, 24 insertions(+), 8 deletions(-) [+]
line wrap: on
line diff
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,10 @@
+2009-07-02  Bruno Haible  <bruno@clisp.org>
+
+	Speed up approximate search for matching ChangeLog entries.
+	* lib/git-merge-changelog.c (entry_fstrcmp): Add a lower_bound
+	argument. Call fstrcmp_bounded instead of fstrcmp.
+	(compute_mapping, try_split_merged_entry, main): Update callers.
+
 2009-07-02  Bruno Haible  <bruno@clisp.org>
 
 	* lib/git-merge-changelog.c (main): Add comment about git cherry-pick.
--- a/lib/git-merge-changelog.c
+++ b/lib/git-merge-changelog.c
@@ -211,9 +211,12 @@
 
 /* Perform a fuzzy comparison of two ChangeLog entries.
    Return a similarity measure of the two entries, a value between 0 and 1.
-   0 stands for very distinct, 1 for identical.  */
+   0 stands for very distinct, 1 for identical.
+   If the result is < LOWER_BOUND, an arbitrary other value < LOWER_BOUND can
+   be returned.  */
 static double
-entry_fstrcmp (const struct entry *entry1, const struct entry *entry2)
+entry_fstrcmp (const struct entry *entry1, const struct entry *entry2,
+	       double lower_bound)
 {
   /* fstrcmp works only on NUL terminated strings.  */
   char *memory;
@@ -233,7 +236,8 @@
     p += entry2->length;
     *p++ = '\0';
   }
-  similarity = fstrcmp (memory, memory + entry1->length + 1);
+  similarity =
+    fstrcmp_bounded (memory, memory + entry1->length + 1, lower_bound);
   freea (memory);
   return similarity;
 }
@@ -410,7 +414,8 @@
 	for (j = n2 - 1; j >= 0; j--)
 	  if (index_mapping_reverse[j] < 0)
 	    {
-	      double similarity = entry_fstrcmp (entry_i, file2->entries[j]);
+	      double similarity =
+		entry_fstrcmp (entry_i, file2->entries[j], best_j_similarity);
 	      if (similarity > best_j_similarity)
 		{
 		  best_j = j;
@@ -429,7 +434,8 @@
 	      if (index_mapping[ii] < 0)
 		{
 		  double similarity =
-		    entry_fstrcmp (file1->entries[ii], entry_j);
+		    entry_fstrcmp (file1->entries[ii], entry_j,
+				   best_i_similarity);
 		  if (similarity > best_i_similarity)
 		    {
 		      best_i = i;
@@ -729,7 +735,8 @@
 
       new_body.string = new_entry->string + split_offset;
       new_body.length = new_entry->length - split_offset;
-      similarity = entry_fstrcmp (&old_body, &new_body);
+      similarity =
+	entry_fstrcmp (&old_body, &new_body, best_similarity);
       if (similarity > best_similarity)
 	{
 	  best_split_offset = split_offset;
@@ -1219,7 +1226,8 @@
 			    size_t i;
 			    for (i = edit->i1 + 1; i <= edit->i2; i++)
 			      if (entry_fstrcmp (ancestor_file.entries[i],
-						 modified_file.entries[i + edit->j2 - edit->i2])
+						 modified_file.entries[i + edit->j2 - edit->i2],
+						 FSTRCMP_THRESHOLD)
 				  < FSTRCMP_THRESHOLD)
 				{
 				  simple_merged = false;
@@ -1300,7 +1308,8 @@
 			simple = true;
 			for (i = edit->i1; i <= edit->i2; i++)
 			  if (entry_fstrcmp (ancestor_file.entries[i],
-					     modified_file.entries[i + edit->j2 - edit->i2])
+					     modified_file.entries[i + edit->j2 - edit->i2],
+					     FSTRCMP_THRESHOLD)
 			      < FSTRCMP_THRESHOLD)
 			    {
 			      simple = false;