bug-gnu-utils
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: msgmerge speedup: fstrcmp and diffseq improvements


From: Ralf Wildenhues
Subject: Re: msgmerge speedup: fstrcmp and diffseq improvements
Date: Sun, 14 Sep 2008 10:16:58 +0200
User-agent: Mutt/1.5.18 (2008-05-17)

This is the gettext part of the patches.

It seems like fuzzy_search_goal_function could, for
definitions_search_fuzzy, also benefit from an upper bound.

The algorithmic ideas I outlined in (4) would benefit from an explicit
fstrcmp_bound function which would only compute the bounds.  Anyway,
I'll leave that to someone interested in implementing it.

FYI, the changes to msgl-fsearch.c and msgmerge.c are untested, except
for a 'make all check' in gettext.

Cheers,
Ralf

gettext-tools/src/ChangeLog:
2008-09-14  Ralf Wildenhues  <address@hidden>

        * message.c (fuzzy_search_goal_function): New argument
        'lower_bound'.  Rewrite to use fstrcmp_if_higher, passing it
        a lower acceptable bound for the similarity.
        (message_list_search_fuzzy_inner): Adjust caller.
        * message.h (fuzzy_search_goal_function): Adjust declaration.
        * msgl-fsearch.c (message_fuzzy_index_search): Adjust callers.
        * msgmerge.c (definitions_search_fuzzy): Likewise.

Index: gettext-tools/src/message.c
===================================================================
RCS file: /cvsroot/gettext/gettext/gettext-tools/src/message.c,v
retrieving revision 1.32
diff -u -r1.32 message.c
--- gettext-tools/src/message.c 7 Oct 2007 19:35:27 -0000       1.32
+++ gettext-tools/src/message.c 14 Sep 2008 08:10:41 -0000
@@ -1,5 +1,5 @@
 /* GNU gettext - internationalization aids
-   Copyright (C) 1995-1998, 2000-2007 Free Software Foundation, Inc.
+   Copyright (C) 1995-1998, 2000-2008 Free Software Foundation, Inc.
 
    This file was written by Peter Miller <address@hidden>
 
@@ -531,7 +531,8 @@
 
 double
 fuzzy_search_goal_function (const message_ty *mp,
-                           const char *msgctxt, const char *msgid)
+                           const char *msgctxt, const char *msgid,
+                           double lower_bound)
 {
   /* The use of 'volatile' guarantees that excess precision bits are dropped
      before the addition and before the following comparison at the caller's
@@ -539,13 +540,24 @@
      compliant by default, to avoid that msgmerge results become platform and
      compiler option dependent.  'volatile' is a portable alternative to gcc's
      -ffloat-store option.  */
-  volatile double weight = fstrcmp (msgid, mp->msgid);
+  volatile double weight;
+  const double ctxt_advantage = 0.00001;
+
+  /* Substract the small advantage possibly given below.  */
+  if (lower_bound > ctxt_advantage)
+    lower_bound -= ctxt_advantage;
+  else
+    lower_bound = 0.;
+
+  weight = fstrcmp_if_higher (msgid, mp->msgid, lower_bound);
+
   /* A translation for a context is a good proposal also for another.  But
      give mp a small advantage if mp is valid regardless of any context or
      has the same context as the one being looked up.  */
-  if (mp->msgctxt == NULL
-      || (msgctxt != NULL && strcmp (msgctxt, mp->msgctxt) == 0))
-    weight += 0.00001;
+  if (weight > 0.)
+    if (mp->msgctxt == NULL
+       || (msgctxt != NULL && strcmp (msgctxt, mp->msgctxt) == 0))
+      weight += ctxt_advantage;
   return weight;
 }
 
@@ -567,7 +579,8 @@
 
       if (mp->msgstr != NULL && mp->msgstr[0] != '\0')
        {
-         double weight = fuzzy_search_goal_function (mp, msgctxt, msgid);
+         double weight = fuzzy_search_goal_function (mp, msgctxt, msgid,
+                                                     *best_weight_p);
          if (weight > *best_weight_p)
            {
              *best_weight_p = weight;
Index: gettext-tools/src/message.h
===================================================================
RCS file: /cvsroot/gettext/gettext/gettext-tools/src/message.h,v
retrieving revision 1.27
diff -u -r1.27 message.h
--- gettext-tools/src/message.h 7 Oct 2007 19:35:27 -0000       1.27
+++ gettext-tools/src/message.h 14 Sep 2008 08:10:41 -0000
@@ -1,5 +1,5 @@
 /* GNU gettext - internationalization aids
-   Copyright (C) 1995-1998, 2000-2007 Free Software Foundation, Inc.
+   Copyright (C) 1995-1998, 2000-2008 Free Software Foundation, Inc.
 
    This file was written by Peter Miller <address@hidden>
 
@@ -317,10 +317,12 @@
 
 
 /* The goal function used in fuzzy search.
-   Higher values indicate a closer match.  */
+   Higher values indicate a closer match.
+   The result is zero for matches known to be worse than BEST_WEIGHT.  */
 extern double
        fuzzy_search_goal_function (const message_ty *mp,
-                                  const char *msgctxt, const char *msgid);
+                                  const char *msgctxt, const char *msgid,
+                                  double lower_bound);
 
 /* The threshold for fuzzy-searching.
    A message is considered only if  fstrcmp (msg, given) > FUZZY_THRESHOLD.  */
Index: gettext-tools/src/msgl-fsearch.c
===================================================================
RCS file: /cvsroot/gettext/gettext/gettext-tools/src/msgl-fsearch.c,v
retrieving revision 1.3
diff -u -r1.3 msgl-fsearch.c
--- gettext-tools/src/msgl-fsearch.c    7 Oct 2007 19:35:29 -0000       1.3
+++ gettext-tools/src/msgl-fsearch.c    14 Sep 2008 08:10:41 -0000
@@ -1,5 +1,5 @@
 /* Fast fuzzy searching among messages.
-   Copyright (C) 2006 Free Software Foundation, Inc.
+   Copyright (C) 2006, 2008 Free Software Foundation, Inc.
    Written by Bruno Haible <address@hidden>, 2006.
 
    This program is free software: you can redistribute it and/or modify
@@ -553,7 +553,8 @@
                      {
                        message_ty *mp = findex->messages[ptr->index];
                        double weight =
-                         fuzzy_search_goal_function (mp, msgctxt, msgid);
+                         fuzzy_search_goal_function (mp, msgctxt, msgid,
+                                                     best_weight);
 
                        if (weight > best_weight)
                          {
@@ -598,7 +599,8 @@
        for (j = 0; j < mlp->nitems; j++)
          {
            message_ty *mp = mlp->item[j];
-           double weight = fuzzy_search_goal_function (mp, msgctxt, msgid);
+           double weight = fuzzy_search_goal_function (mp, msgctxt, msgid,
+                                                       best_weight);
 
            if (weight > best_weight)
              {
Index: gettext-tools/src/msgmerge.c
===================================================================
RCS file: /cvsroot/gettext/gettext/gettext-tools/src/msgmerge.c,v
retrieving revision 1.60
diff -u -r1.60 msgmerge.c
--- gettext-tools/src/msgmerge.c        24 Aug 2008 01:01:23 -0000      1.60
+++ gettext-tools/src/msgmerge.c        14 Sep 2008 08:10:42 -0000
@@ -776,8 +776,8 @@
       /* Choose the best among mp1, mp2.  */
       if (mp1 == NULL
          || (mp2 != NULL
-             && (fuzzy_search_goal_function (mp2, msgctxt, msgid)
-                 > fuzzy_search_goal_function (mp1, msgctxt, msgid))))
+             && (fuzzy_search_goal_function (mp2, msgctxt, msgid, 0.)
+                 > fuzzy_search_goal_function (mp1, msgctxt, msgid, 0.))))
        mp1 = mp2;
     }
 




reply via email to

[Prev in Thread] Current Thread [Next in Thread]