maposmatic-dev
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Maposmatic-dev] [PATCH 6/8] Completely rework the Nominatim search


From: Thomas Petazzoni
Subject: [Maposmatic-dev] [PATCH 6/8] Completely rework the Nominatim search
Date: Sat, 7 Aug 2010 11:18:51 +0200

On the Nominatim proxy side:

 * Instead of doing complex filtering/sorting on Nominatim results, we
   do a simple filtering on "places", in a fixed list. Results are
   kept sorted in Nominatim order.

 * Nominatim results are enriched with a validity field, and reason
   fields for non-validity (like "no admin boundary" or "area too
   large").

 * Nominatim results are also enriched with fields that allows to know
   whether previous/next entries are available through Nominatim (when
   the search returns more than 10 entries)

 * Simplify the query_nominatim() view arguments. It now takes all
   arguments through GET. 'q' for the query, 'exclude' for the set of
   places to exclude.

The Nominatim proxy code has also been split in several subfunctions
for easier readability.

The JavaScript code is updated accordingly:

 * Display the Nominatim icon as the list bullet. The bullet is not
   visible yet due to CSS issue.

 * Use the new valid/reason/reason_text fields of result entries to
   determine whether an entry is valid or not, and show why it isn't
   valid.

 * Show prev/next buttons that trigger another Ajax query to get the
   next/prev results.
---
 www/maposmatic/nominatim.py     |  316 +++++++++++++++++++++++++--------------
 www/maposmatic/views.py         |   18 +--
 www/media/map_rendering_form.js |   67 +++++++--
 www/urls.py                     |    2 +-
 4 files changed, 261 insertions(+), 142 deletions(-)

diff --git a/www/maposmatic/nominatim.py b/www/maposmatic/nominatim.py
index 9a3a932..7105706 100644
--- a/www/maposmatic/nominatim.py
+++ b/www/maposmatic/nominatim.py
@@ -39,9 +39,10 @@ import psycopg2
 from urllib import urlencode
 import urllib2
 from xml.etree.ElementTree import parse as XMLTree
-
+from django.utils.translation import ugettext
 
 NOMINATIM_BASE_URL = "http://nominatim.openstreetmap.org/search/";
+NOMINATIM_MAX_RESULTS_PER_RESPONSE = 10
 
 def reverse_geo(lat, lon):
     """
@@ -62,7 +63,7 @@ def reverse_geo(lat, lon):
         result.append(attribs)
     return result
 
-def query(query_text, with_polygons = False):
+def query(query_text, exclude, with_polygons = False):
     """
     Query the nominatim service for the given city query and return a
     (python) list of entries for the given squery (eg. "Paris"). Each
@@ -76,16 +77,23 @@ def query(query_text, with_polygons = False):
       - key "id": ID of the OSM database entry
       - key "admin_level": The value stored in the OSM table for admin_level
     """
-    entries = _fetch_entries(query_text, with_polygons)
-    return _canonicalize_data(_retrieve_missing_data_from_GIS(entries))
-
-
-def _fetch_entries(query_text, with_polygons):
+    xml = _fetch_xml(query_text, exclude, with_polygons)
+    (hasprev, prevexcludes, hasnext, nextexcludes) = 
_compute_prev_next_excludes(xml)
+    entries = _extract_entries(xml)
+    entries = _prepare_and_filter_entries(entries)
+    result = {
+        'hasprev'     : hasprev,
+        'prevexcludes': prevexcludes,
+        'hasnext'     : hasnext,
+        'nextexcludes': nextexcludes,
+        'entries'     : entries
+        }
+    return _canonicalize_data(result)
+
+def _fetch_xml(query_text, exclude, with_polygons):
     """
     Query the nominatim service for the given city query and return a
-    (python) list of entries for the given squery (eg. "Paris"). Each
-    entry is a dictionary key -> value (value is always a
-    string).
+    XMLTree object.
     """
     # For some reason, the "xml" nominatim output is ALWAYS used, even
     # though we will later (in views.py) transform this into
@@ -94,14 +102,25 @@ def _fetch_entries(query_text, with_polygons):
     # json output)
     query_tags = dict(q=query_text.encode("UTF-8"),
                       format='xml', addressdetails=1)
+
     if with_polygons:
         query_tags['polygon']=1
 
+    if exclude != '':
+        query_tags['exclude_place_ids'] = exclude
+
     qdata = urlencode(query_tags)
     f = urllib2.urlopen(url="%s?%s" % (NOMINATIM_BASE_URL, qdata))
+    return XMLTree(f)
 
+def _extract_entries(xml):
+    """
+    Given a XMLTree object of a Nominatim result, return a (python)
+    list of entries for the given squery (eg. "Paris"). Each entry is
+    a dictionary key -> value (value is always a string).
+    """
     result = []
-    for place in XMLTree(f).getroot().getchildren():
+    for place in xml.getroot().getchildren():
         attribs = dict(place.attrib)
         for elt in place.getchildren():
             attribs[elt.tag] = elt.text
@@ -109,6 +128,50 @@ def _fetch_entries(query_text, with_polygons):
 
     return result
 
+def _compute_prev_next_excludes(xml):
+    """
+    Given a XML response from Nominatim, determines the set of
+    "exclude_place_ids" that should be used to get the next set of
+    entries and the previous set of entries. We also determine
+    booleans saying whether there are or not previous or next entries
+    available. This allows the website to show previous/next buttons
+    in the administrative boundary search box.
+
+    Args:
+         xml (XMLTree): the XML tree of the Nominatim response
+
+    Returns a (hasprev, prevexcludes, hasnext, nextexcludes) tuple,
+    where:
+         hasprev (boolean): Whether there are or not previous entries
+         prevexcludes (string): String to pass as exclude_place_ids to
+         get the previous entries
+         hasnext (boolean): Whether there are or not next entries
+         nextexcludes (string): String to pass as exclude_place_ids to
+         get the next entries
+    """
+    excludes = xml.getroot().get("exclude_place_ids", None)
+
+    # If the current number of entries is 10, we have other entries
+    if (len(xml.getroot().getchildren()) == 
NOMINATIM_MAX_RESULTS_PER_RESPONSE):
+        nextexcludes = excludes
+        hasnext = True
+    else:
+        nextexcludes = ""
+        hasnext = False
+
+    # Compute the exclude list to get the previous list
+    prevexcludes = ""
+    hasprev = False
+    if excludes is not None:
+        excludes_list = excludes.split(',')
+        hasprev = len(excludes_list) > NOMINATIM_MAX_RESULTS_PER_RESPONSE
+        prevexcludes_count = (len(excludes_list) / 
NOMINATIM_MAX_RESULTS_PER_RESPONSE) * \
+            NOMINATIM_MAX_RESULTS_PER_RESPONSE - 2 * 
NOMINATIM_MAX_RESULTS_PER_RESPONSE
+        if prevexcludes_count >= 0:
+            prevexcludes = ','.join(excludes_list[:prevexcludes_count])
+
+
+    return (hasprev, prevexcludes, hasnext, nextexcludes)
 
 def _canonicalize_data(data):
     """
@@ -132,18 +195,125 @@ def _canonicalize_data(data):
             pass
     return data
 
+def _get_admin_boundary_info_from_GIS(cursor, osm_id):
+    """
+    Lookup additional data for the administrative boundary of given
+    relation osm_id.
+
+    Args:
+          osm_id (int) : the OSM id of the relation to lookup
 
-def _retrieve_missing_data_from_GIS(entries):
+    Returns a tuple (osm_id, admin_level, table_name, valid,
+    reason, reason_text)
+    """
+    # Nominatim returns a field "osm_id" for each result
+    # entry. Depending on the type of the entry, it can point to
+    # various database entries. For admin boundaries, osm_id is
+    # supposed to point to either the 'polygon' or the 'line'
+    # table. Usually, the database entry ID in the table is derived by
+    # the "relation" items by osm2pgsql, which assigns to that ID the
+    # opposite of osm_id... But we still consider that it could be the
+    # real osm_id (not its opposite). Let's have fun...
+    for table_name in ("polygon", "line"):
+        # Lookup the polygon/line table for both osm_id and
+        # the opposite of osm_id
+        cursor.execute("""select osm_id, admin_level,
+                          st_astext(st_envelope(st_transform(way,
+                          4002))) AS bbox
+                          from planet_osm_%s
+                          where osm_id = -%s"""
+                       % (table_name,osm_id))
+        result = tuple(set(cursor.fetchall()))
+
+        if len(result) == 0:
+            continue
+
+        osm_id, admin_level, bboxtxt = result[0]
+        bbox = coords.BoundingBox.parse_wkt(bboxtxt)
+        (metric_size_lat, metric_size_lon) = bbox.spheric_sizes()
+        if (metric_size_lat > www.settings.BBOX_MAXIMUM_LENGTH_IN_METERS
+            or metric_size_lon > www.settings.BBOX_MAXIMUM_LENGTH_IN_METERS):
+            valid = False
+            reason = "area-too-big"
+            reason_text = ugettext("Administrative area too big for rendering")
+        else:
+            valid = True
+            reason = ""
+            reason_text = ""
+
+        return (osm_id, admin_level, table_name,
+                valid, reason, reason_text)
+
+    # Not found
+    return None
+
+def _prepare_entry(cursor, entry):
+    """
+    Prepare an entry by adding additional informations to it, in the
+    form of a ocitysmap_params dictionary.
+
+    Args:
+           cursor: database connection cursor
+           entry:  the entry to enrich
+
+    Returns nothing, but adds an ocitysmap_params dictionary to the
+    entry. It will contain entries 'valid', 'reason', 'reason_text'
+    when the entry is invalid, or 'table', 'id', 'valid', 'reason',
+    'reason_text' when the entry is valid. Meaning of those values:
+
+           valid (boolean): tells whether the entry is valid for
+           rendering or not
+
+           reason (string): non human readable short string that
+           describes why the entry is invalid. To be used for
+           Javascript comparaison. Empty for valid entries.
+
+           reason_text (string): human readable and translated
+           explanation of why the entry is invalid. Empty for valid
+           entries.
+
+           table (string): "line" or "polygon", tells in which table
+           the administrative boundary has been found. Only present
+           for valid entries.
+
+           id (string): the OSM id. Only present for valid entries.
+
+           admin_level (string): the administrative boundary
+           level. Only present for valid entries.
+    """
+    # Try to lookup in the OSM DB, when needed and when it
+    # makes sense (ie. the data is coming from a relation)
+    if (entry.get("class") == "boundary" and
+        entry.get("type") == "administrative" and
+        entry.get('osm_type') == "relation"):
+        details = _get_admin_boundary_info_from_GIS(cursor, entry["osm_id"])
+
+        if details is None:
+            entry["ocitysmap_params"] \
+                = dict(valid=False,
+                       reason="no-admin",
+                       reason_text=ugettext("No administrative boundary"))
+        else:
+            (osm_id, admin_level, table_name,
+             valid, reason, reason_text) = details
+            entry["ocitysmap_params"] \
+                = dict(table=table_name, id=osm_id,
+                       admin_level=admin_level,
+                       valid=valid,
+                       reason=reason,
+                       reason_text=reason_text)
+    else:
+        entry["ocitysmap_params"] \
+            = dict(valid=False,
+                   reason="no-admin",
+                   reason_text=ugettext("No administrative boundary"))
+
+def _prepare_and_filter_entries(entries):
     """
     Try to retrieve additional OSM information for the given nominatim
     entries. Among the information, we try to determine the real ID in
     an OSM table for each of these entries. All these additional data
-    are stored in the "ocitysmap_params" key of the entry, which maps
-    to a dictionary containing:
-      - key "table": when "line" -> refers to table "planet_osm_line";
-        when "polygon" -> "planet_osm_polygon"
-      - key "id": ID of the OSM database entry
-      - key "admin_level": The value stored in the OSM table for admin_level
+    are stored in the "ocitysmap_params" key of the entry.
     """
     if not www.settings.has_gis_database():
         return entries
@@ -159,113 +329,31 @@ def _retrieve_missing_data_from_GIS(entries):
                                  str(e)[:-1])
         return entries
 
-    # Nominatim returns a field "osm_id" for each result
-    # entry. Depending on the type of the entry, it can point to
-    # various database entries. For admin boundaries, osm_id is
-    # supposed to point to either the 'polygon' or the 'line'
-    # table. Usually, the database entry ID in the table is derived by
-    # the "relation" items by osm2pgsql, which assigns to that ID the
-    # opposite of osm_id... But we still consider that it could be the
-    # real osm_id (not its opposite). Let's have fun...
-
-    # Will sort the entries so that the admin boundaries appear first,
-    # then cities, towns, etc. Second order: larger cities
-    # (ie. greater way_area) are listed first
-    unsorted_entries = []
-    admin_boundary_names = set()
-    PLACE_RANKS = { 'city': 20, 'town': 30, 'municipality': 40,
-                    'village': 50, 'hamlet': 60, 'suburb': 70,
-                    'island': 80, 'islet': 90, 'locality': 100 }
-    ADMIN_LEVEL_RANKS = { '8': 0, '7': 1, '6': 2, '5':3 } # level 8 is best !
+    place_tags = [ 'city', 'town', 'municipality',
+                   'village', 'hamlet', 'suburb',
+                   'island', 'islet', 'locality',
+                   'administrative' ]
+    filtered_results = []
     try:
         cursor = conn.cursor()
         for entry in entries:
-            # Should we try to lookup the id in the OSM DB ?
-            lookup_OSM = False
-
-            # Highest rank = last in the output
-            entry_rank = (1000,0) # tuple (sort rank, -area)
-
-            # Try to determine the order in which this entry should appear
-            if entry.get("class") == "boundary":
-                if entry.get("type") == "administrative":
-                    entry_rank = (10,0)
-                    admin_boundary_names.add(entry.get("display_name", 42))
-                    lookup_OSM = True
-                else:
-                    # Just don't try to lookup any additional
-                    # information from OSM when the nominatim entry is
-                    # not an administrative boundary
-                    continue
-            elif entry.get("class") == "place":
-                try:
-                    entry_rank = (PLACE_RANKS[entry.get("type")],0)
-                except KeyError:
-                    # Will ignore all the other place tags
-                    continue
-            else:
-                # We ignore all the other classes
+
+            # Ignore uninteresting tags
+            if not entry.get("type") in place_tags:
                 continue
 
-            # Try to lookup in the OSM DB, when needed and when it
-            # makes sense (ie. the data is coming from a relation)
-            if lookup_OSM and (entry.get('osm_type') == "relation"):
-                for table_name in ("polygon", "line"):
-                    # Lookup the polygon/line table for both osm_id and
-                    # the opposite of osm_id
-                    cursor.execute("""select osm_id, admin_level, way_area,
-                                      st_astext(st_envelope(st_transform(way,
-                                      4002))) AS bbox
-                                      from planet_osm_%s
-                                      where osm_id = -%s""" \
-                                       % (table_name,entry["osm_id"]))
-                    result = tuple(set(cursor.fetchall()))
-                    if len(result) == 1:
-                        osm_id, admin_level, way_area, bboxtxt = result[0]
-
-                        bbox = coords.BoundingBox.parse_wkt(bboxtxt)
-
-                        # Convert the floats to string, since it has
-                        # to be rendered correctly by the JSON encoder
-                        minx = str(bbox.get_top_left()[1])
-                        miny = str(bbox.get_bottom_right()[0])
-                        maxy = str(bbox.get_top_left()[0])
-                        maxx = str(bbox.get_bottom_right()[1])
-
-                        entry["ocitysmap_params"] \
-                            = dict(table=table_name, id=osm_id,
-                                   admin_level=admin_level,
-                                   way_area=way_area, minx=minx, miny=miny,
-                                   maxx=maxx, maxy=maxy)
-                        # Make these first in list, priviledging level 8
-                        entry_rank = (ADMIN_LEVEL_RANKS.get(admin_level,9),
-                                      -way_area)
-                        break
-
-            # Register this entry for the results
-            unsorted_entries.append((entry_rank, entry))
+            # Our entry wil be part of the result
+            filtered_results.append(entry)
+
+            # Enrich the entry with more info
+            _prepare_entry(cursor, entry)
 
         # Some cleanup
         cursor.close()
     finally:
         conn.close()
 
-    # Sort the entries according to their rank
-    sorted_entries = [entry for rank,entry in sorted(unsorted_entries,
-                                                     key=lambda kv: kv[0])]
-
-    # Remove those non-admin-boundaries having the same name as an
-    # admin boundary
-    retval = []
-    for e in sorted_entries:
-        if e.get("class") != "boundary" or e.get("type") != "administrative":
-            if e.get("display_name") in admin_boundary_names:
-                continue
-        retval.append(e)
-
-    return retval
-
-
+    return filtered_results
 
 if __name__ == "__main__":
     import pprint, sys
diff --git a/www/maposmatic/views.py b/www/maposmatic/views.py
index b886ef4..5cdd143 100644
--- a/www/maposmatic/views.py
+++ b/www/maposmatic/views.py
@@ -177,24 +177,18 @@ def all_maps(request):
                                 'pages': helpers.get_pages_list(maps, 
paginator) },
                               
context_instance=MapOSMaticRequestContext(request))
 
-def query_nominatim(request, format, squery):
+def query_nominatim(request):
     """Nominatim query gateway."""
-
-    format = format or request.GET.get('format', 'json')
-    if format not in ['json']:
-        return HttpResponseBadRequest("ERROR: Invalid format")
-
-    squery = squery or request.GET.get('q', '')
+    exclude = request.GET.get('exclude', '')
+    squery = request.GET.get('q', '')
 
     try:
-        contents = nominatim.query(squery, with_polygons=False)
+        contents = nominatim.query(squery, exclude, with_polygons=False)
     except:
         contents = []
 
-    if format == 'json':
-        return HttpResponse(content=json_encode(contents),
-                            mimetype='text/json')
-    # Support other formats here.
+    return HttpResponse(content=json_encode(contents),
+                        mimetype='text/json')
 
 def nominatim_reverse(request, lat, lon):
     """Nominatim reverse geocoding query gateway."""
diff --git a/www/media/map_rendering_form.js b/www/media/map_rendering_form.js
index 634faae..f76e97e 100644
--- a/www/media/map_rendering_form.js
+++ b/www/media/map_rendering_form.js
@@ -333,18 +333,28 @@ function suggest(input, results, osm_id, options) {
   // Disable form validation via the Enter key
   $input.keypress(function(e) { if (e.keyCode == 13) return false; });
 
-  function appendValidResult(item) {
+  function appendValidResult(item)
+  {
     var id = 'rad_' + item.country_code + '_' + item.ocitysmap_params['id'];
-    $results.append('<li class="suggestok" id="' + id + '">'
-       + item.display_name + '</li>');
+    $results.append('<li style="list-style-type: disc; list-style-image: url('
+                    + item.icon + ');" class="suggestok" id="' + id + '">'
+                    + item.display_name + '</li>');
 
     var e = $('#' + id)
     e.bind('click', function(e) { setResult($(this)); });
     e.bind('mouseover', function(e) { setSelectedResultTo($(this)); });
   }
 
+  function appendInvalidResult(item)
+  {
+    $results.append('<li style="list-style-type: disc; list-style-image: url('
+                    + item.icon + ');" class="suggestoff">'
+                    + item.display_name + ' (' + 
item.ocitysmap_params["reason_text"] + ')</li>');
+  }
+
   /* Empty and close the suggestion box. */
-  function closeSuggest(hide) {
+  function closeSuggest(hide)
+  {
     $results.empty();
 
     if (hide)
@@ -355,38 +365,65 @@ function suggest(input, results, osm_id, options) {
     shown = !hide;
   }
 
+  function bindDoQuery(excludes)
+  {
+    return (function(e) {
+      closeSuggest(true);
+      doQuery(excludes);
+    });
+  }
+
   /* Handle the JSON result. */
-  function handleNominatimResults(data, textResult) {
+  function handleNominatimResults(data, textResult)
+  {
     var unusable_token = false;
+    var entries = data.entries
     $(input).css('cursor', 'text');
     closeSuggest(false);
 
-    if (!data.length) {
+    if (!entries.length) {
       $results.append('<li class="info">' + $('#noresultsinfo').html() + 
'</li>');
       return;
     }
 
-    $.each(data, function(i, item) {
-      if (typeof item.ocitysmap_params != 'undefined') {
+    $.each(entries, function(i, item) {
+      if (item.ocitysmap_params["valid"] == 1) {
         appendValidResult(item);
-      } else {
-        $results.append('<li class="suggestoff">'
-          + item.display_name + '</li>');
+      }
+      else {
+        appendInvalidResult(item);
         unusable_token = true;
       }
     });
 
+    if (data.hasprev != "" || data.hasnext != "")
+    {
+      $results.append('<li class="info">');
+      if (data.hasprev != "") {
+        $results.append('<input type="submit" id="suggestprev" 
value="Previous"/>');
+        $("#suggestprev").bind('click', bindDoQuery(data.prevexcludes));
+      }
+
+      if (data.hasnext != "") {
+        $results.append('<input type="submit" id="suggestnext" 
value="Next"/>');
+        $("#suggestnext").bind('click', bindDoQuery(data.nextexcludes));
+      }
+      $results.append('</li>');
+    }
+
     if (unusable_token)
       $results.append('<li class="info">' + $('#noadminlimitinfo').html() + 
'</li>');
   }
 
-  function doQuery() {
+  function doQuery(excludes) {
     if (!$input.val().length) {
       closeSuggest(true);
       return;
     }
     $(input).css('cursor', 'wait');
-    $.getJSON("/apis/nominatim/", { q: $input.val() }, handleNominatimResults);
+      $.getJSON("/apis/nominatim/",
+                { q: $input.val(), exclude: excludes },
+                handleNominatimResults);
   }
 
   function processKey(e) {
@@ -407,12 +444,12 @@ function suggest(input, results, osm_id, options) {
         break;
       case 38:  // UP
         if (!shown)
-          doQuery();
+          doQuery('');
         prevResult();
         break;
       case 40:  // DOWN
         if (!shown)
-          doQuery();
+          doQuery('');
         nextResult();
         break;
       default:
diff --git a/www/urls.py b/www/urls.py
index d5da562..725d20a 100644
--- a/www/urls.py
+++ b/www/urls.py
@@ -62,7 +62,7 @@ urlpatterns = patterns('',
     url(r'^cancel/$', maposmatic.views.cancel,
         name='cancel'),
 
-    (r'^apis/nominatim/([^/]*/)?(.*)$', maposmatic.views.query_nominatim),
+    (r'^apis/nominatim/$', maposmatic.views.query_nominatim),
 
     (r'^apis/reversegeo/([^/]*)/([^/]*)/$', 
maposmatic.views.nominatim_reverse),
 
-- 
1.7.0.4




reply via email to

[Prev in Thread] Current Thread [Next in Thread]