emacs-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: Linking Emacs with libxml2


From: Lars Magne Ingebrigtsen
Subject: Re: Linking Emacs with libxml2
Date: Wed, 08 Sep 2010 18:15:25 +0200
User-agent: Gnus/5.110011 (No Gnus v0.11) Emacs/24.0.50 (gnu/linux)

I did it the hard way:

=== modified file 'ChangeLog'
--- ChangeLog   2010-09-04 07:30:14 +0000
+++ ChangeLog   2010-09-08 16:12:36 +0000
@@ -1,3 +1,7 @@
+2010-09-08  Lars Magne Ingebrigtsen  <address@hidden>
+
+       * configure.in: Check for libxml2/htmlReadMemory().
+
 2010-09-04  Eli Zaretskii  <address@hidden>
 
        * config.bat: Produce lisp/gnus/_dir-locals.el from

=== modified file 'configure'
--- configure   2010-08-23 12:54:09 +0000
+++ configure   2010-09-08 15:55:18 +0000
@@ -660,6 +660,8 @@
 LIBS_MAIL
 liblockfile
 ALLOCA
+LIBXML2_CFLAGS
+LIBXML2_LIBS
 LIBXSM
 LIBGPM
 LIBGIF
@@ -11070,6 +11072,74 @@
 fi
 
 
+### Use libxml2 (-lxml2) if available
+HAVE_LIBXML2=no
+LIBXML2_LIBS=
+if test -n xml2-config; then
+  LIBXML2_CFLAGS="`xml2-config --cflags`"
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$LIBXML2_CFLAGS $CFLAGS"
+  ac_fn_c_check_header_mongrel "$LINENO" "libxml/xmlexports.h" 
"ac_cv_header_libxml_xmlexports_h" "$ac_includes_default"
+if test "x$ac_cv_header_libxml_xmlexports_h" = x""yes; then :
+  { $as_echo "$as_me:${as_lineno-$LINENO}: checking for htmlReadMemory in 
-lxml2" >&5
+$as_echo_n "checking for htmlReadMemory in -lxml2... " >&6; }
+if test "${ac_cv_lib_xml2_htmlReadMemory+set}" = set; then :
+  $as_echo_n "(cached) " >&6
+else
+  ac_check_lib_save_LIBS=$LIBS
+LIBS="-lxml2 -lxml2 $LIBS"
+cat confdefs.h - <<_ACEOF >conftest.$ac_ext
+/* end confdefs.h.  */
+
+/* Override any GCC internal prototype to avoid an error.
+   Use char because int might match the return type of a GCC
+   builtin and then its argument prototype would still apply.  */
+#ifdef __cplusplus
+extern "C"
+#endif
+char htmlReadMemory ();
+int
+main ()
+{
+return htmlReadMemory ();
+  ;
+  return 0;
+}
+_ACEOF
+if ac_fn_c_try_link "$LINENO"; then :
+  ac_cv_lib_xml2_htmlReadMemory=yes
+else
+  ac_cv_lib_xml2_htmlReadMemory=no
+fi
+rm -f core conftest.err conftest.$ac_objext \
+    conftest$ac_exeext conftest.$ac_ext
+LIBS=$ac_check_lib_save_LIBS
+fi
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: 
$ac_cv_lib_xml2_htmlReadMemory" >&5
+$as_echo "$ac_cv_lib_xml2_htmlReadMemory" >&6; }
+if test "x$ac_cv_lib_xml2_htmlReadMemory" = x""yes; then :
+  HAVE_LIBXML2=yes
+fi
+
+fi
+
+
+
+  if test "${HAVE_LIBXML2}" = "yes"; then
+
+$as_echo "#define HAVE_LIBXML2 1" >>confdefs.h
+
+    LIBXML2_LIBS="-lxml2"
+    case "$LIBS" in
+      *-lxml2*) ;;
+      *)      LIBS="$LIBXML2_LIBS $LIBS" ;;
+    esac
+  fi
+  CFLAGS="$SAVE_CFLAGS"
+fi
+
+
+
 # If netdb.h doesn't declare h_errno, we must declare it by hand.
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether netdb declares 
h_errno" >&5
 $as_echo_n "checking whether netdb declares h_errno... " >&6; }

=== modified file 'configure.in'
--- configure.in        2010-08-23 12:54:09 +0000
+++ configure.in        2010-09-08 15:55:38 +0000
@@ -2535,6 +2535,29 @@
 fi
 AC_SUBST(LIBXSM)
 
+### Use libxml2 (-lxml2) if available
+HAVE_LIBXML2=no
+LIBXML2_LIBS=
+if test -n xml2-config; then
+  LIBXML2_CFLAGS="`xml2-config --cflags`"
+  SAVE_CFLAGS="$CFLAGS"
+  CFLAGS="$LIBXML2_CFLAGS $CFLAGS"
+  AC_CHECK_HEADER(libxml/xmlversion.h,
+    [AC_CHECK_LIB(xml2, htmlReadMemory, HAVE_LIBXML2=yes, , -lxml2)])
+
+  if test "${HAVE_LIBXML2}" = "yes"; then
+    AC_DEFINE(HAVE_LIBXML2, 1, [Define to 1 if you have the libxml2 library 
(-lxml2).])
+    LIBXML2_LIBS="-lxml2"
+    case "$LIBS" in
+      *-lxml2*) ;;
+      *)      LIBS="$LIBXML2_LIBS $LIBS" ;;
+    esac
+  fi
+  CFLAGS="$SAVE_CFLAGS"
+fi
+AC_SUBST(LIBXML2_LIBS)
+AC_SUBST(LIBXML2_CFLAGS)
+
 # If netdb.h doesn't declare h_errno, we must declare it by hand.
 AC_CACHE_CHECK(whether netdb declares h_errno,
               emacs_cv_netdb_declares_h_errno,

=== modified file 'src/ChangeLog'
--- src/ChangeLog       2010-09-05 02:06:39 +0000
+++ src/ChangeLog       2010-09-08 16:12:09 +0000
@@ -1,3 +1,9 @@
+2010-09-08  Lars Magne Ingebrigtsen  <address@hidden>
+
+       * xml.c: New file.
+       (Fhtml_parse_buffer): New function to interface to the libxml2
+       html parsing function.
+
 2010-09-05  Juanma Barranquero  <address@hidden>
 
        * biditype.h: Regenerate.

=== modified file 'src/Makefile.in'
--- src/Makefile.in     2010-08-17 21:19:11 +0000
+++ src/Makefile.in     2010-09-08 15:52:01 +0000
@@ -226,6 +226,9 @@
 IMAGEMAGICK_LIBS= @IMAGEMAGICK_LIBS@
 IMAGEMAGICK_CFLAGS= @IMAGEMAGICK_CFLAGS@
 
+LIBXML2_LIBS = @LIBXML2_LIBS@
+LIBXML2_CFLAGS = @LIBXML2_CFLAGS@
+
 
 ## widget.o if USE_X_TOOLKIT, otherwise empty.
 address@hidden@
@@ -320,7 +323,8 @@
 ## FIXME? MYCPPFLAGS only referenced in etc/DEBUG.
 ALL_CFLAGS=-Demacs -DHAVE_CONFIG_H $(MYCPPFLAGS) -I. -I${srcdir} \
   ${C_SWITCH_MACHINE} ${C_SWITCH_SYSTEM} ${C_SWITCH_X_SITE} \
-  ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} 
${DBUS_CFLAGS} \
+  ${C_SWITCH_X_SYSTEM} ${CFLAGS_SOUND} ${RSVG_CFLAGS} ${IMAGEMAGICK_CFLAGS} \
+  ${LIBXML2_CFLAGS} ${DBUS_CFLAGS} \
   ${GCONF_CFLAGS} ${FREETYPE_CFLAGS} ${FONTCONFIG_CFLAGS} \
   ${LIBOTF_CFLAGS} ${M17N_FLT_CFLAGS} ${DEPFLAGS} ${PROFILING_CFLAGS} \
   ${C_WARNINGS_SWITCH} ${CFLAGS}
@@ -349,7 +353,7 @@
        syntax.o $(UNEXEC_OBJ) bytecode.o \
        process.o callproc.o \
        region-cache.o sound.o atimer.o \
-       doprnt.o strftime.o intervals.o textprop.o composite.o md5.o \
+       doprnt.o strftime.o intervals.o textprop.o composite.o md5.o xml.o \
        $(MSDOS_OBJ) $(MSDOS_X_OBJ) $(NS_OBJ) $(CYGWIN_OBJ) $(FONT_OBJ)
 
 ## Object files used on some machine or other.
@@ -595,7 +599,8 @@
 ## duplicated symbols.  If the standard libraries were compiled
 ## with GCC, we might need LIB_GCC again after them.
 LIBES = $(LIBS) $(LIBX_BASE) $(LIBX_OTHER) $(LIBSOUND) \
-   $(RSVG_LIBS) ${IMAGEMAGICK_LIBS}  $(DBUS_LIBS) $(LIBGPM) $(LIBRESOLV) 
$(LIBS_SYSTEM) \
+   $(RSVG_LIBS) ${IMAGEMAGICK_LIBS} $(DBUS_LIBS) \
+   ${LIBXML2_LIBS} $(LIBGPM) $(LIBRESOLV) $(LIBS_SYSTEM) \
    $(LIBS_TERMCAP) $(GETLOADAVG_LIBS) ${GCONF_LIBS} ${LIBSELINUX_LIBS} \
    $(FREETYPE_LIBS) $(FONTCONFIG_LIBS) $(LIBOTF_LIBS) $(M17N_FLT_LIBS) \
    $(LIB_GCC) $(LIB_MATH) $(LIB_STANDARD) $(LIB_GCC)

=== modified file 'src/config.in'
--- src/config.in       2010-08-17 21:19:11 +0000
+++ src/config.in       2010-09-08 15:37:34 +0000
@@ -813,6 +813,9 @@
 /* Define to 1 if you have the SM library (-lSM). */
 #undef HAVE_X_SM
 
+/* Define to 1 if you have the libxml2 library (-lxml2). */
+#undef HAVE_LIBXML2
+
 /* Define to 1 if you want to use the X window system. */
 #undef HAVE_X_WINDOWS
 

=== modified file 'src/emacs.c'
--- src/emacs.c 2010-08-22 21:15:20 +0000
+++ src/emacs.c 2010-09-08 13:39:17 +0000
@@ -1543,6 +1543,7 @@
       syms_of_xselect ();
 #endif
 #endif /* HAVE_X_WINDOWS */
+      syms_of_xml ();
 
       syms_of_menu ();
 

=== modified file 'src/lisp.h'
--- src/lisp.h  2010-08-09 19:25:41 +0000
+++ src/lisp.h  2010-09-08 13:40:50 +0000
@@ -3559,6 +3559,9 @@
 /* Defined in xsmfns.c */
 extern void syms_of_xsmfns (void);
 
+/* Defined in xml.c */
+extern void syms_of_xml (void);
+
 /* Defined in xselect.c */
 EXFUN (Fx_send_client_event, 6);
 extern void syms_of_xselect (void);

=== added file 'src/xml.c'
--- src/xml.c   1970-01-01 00:00:00 +0000
+++ src/xml.c   2010-09-08 16:10:36 +0000
@@ -0,0 +1,131 @@
+/* Interface to libxml2.
+   Copyright (C) 2010 Free Software Foundation, Inc.
+
+This file is part of GNU Emacs.
+
+GNU Emacs is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
+
+#include <config.h>
+
+#ifdef HAVE_LIBXML2
+
+#include <sys/param.h>
+#include <stdio.h>
+#include <setjmp.h>
+#include <libxml/tree.h>
+#include <libxml/parser.h>
+#include <libxml/HTMLparser.h>
+
+#include "lisp.h"
+#include "systime.h"
+#include "sysselect.h"
+#include "frame.h"
+#include "buffer.h"
+
+Lisp_Object make_dom (xmlNode *node)
+{
+  Lisp_Object result = Qnil;
+  xmlNode *child;
+  xmlAttr *property;
+
+  if (node != NULL) {
+    result = Fcons (Fintern (build_string (node->name),
+                            Vobarray),
+                   Qnil);
+    property = node->properties;
+    while (property != NULL) {
+      if (property->children &&
+          property->children->content) {
+       char *pname = xmalloc(strlen(property->name) + 2);
+       *pname = ':';
+       strcpy(pname + 1, property->name);
+       result = Fcons (Fcons (Fintern (build_string (pname), Vobarray),
+                              build_string(property->children->content)),
+                       result);
+       xfree (pname);
+      }
+      property = property->next;
+    }
+    child = node->children;
+    while (child != NULL) {
+      result = Fcons (make_dom (child), result);
+      child = child->next;
+    }
+    if (node->content)
+      result = Fcons (Fcons (Fintern (build_string ("text"), Vobarray),
+                            build_string(node->content)),
+                     result);
+  }
+  return Fnreverse(result);
+}
+
+DEFUN ("html-parse-buffer", Fhtml_parse_buffer, Shtml_parse_buffer,
+       0, 1, 0,
+       doc: /* Parse the buffer as an HTML document and return the parse 
tree.*/)
+  (Lisp_Object object)
+{
+  xmlDoc *doc;
+  struct buffer *buffer;
+  xmlNode *node;
+  unsigned char *string, *s;
+  Lisp_Object result;
+  int ibeg, iend;
+
+  LIBXML_TEST_VERSION
+       
+  if (NILP (object))
+    buffer = current_buffer;
+  else {
+    CHECK_BUFFER (object);
+    buffer = XBUFFER (object);
+  }
+
+  ibeg = CHAR_TO_BYTE (XFASTINT (Fpoint_min ()));
+  iend = CHAR_TO_BYTE (XFASTINT (Fpoint_max ()));
+  move_gap_both (XFASTINT (Fpoint_min ()), ibeg);
+  
+  string = (unsigned char *) xmalloc (iend - ibeg + 1);
+  s = string;
+  
+  while (ibeg < iend) {
+    *s++ = *(BYTE_POS_ADDR (ibeg));
+    ibeg++;
+  }
+  *s = 0;
+  
+  doc = htmlReadMemory (string, strlen(string), "", "utf-8", 0);
+
+  if (doc == NULL)
+    return Qnil;
+
+  node = xmlDocGetRootElement (doc);
+  result = make_dom (node);
+  
+  xmlFreeDoc(doc);
+  xmlCleanupParser();
+      
+  return result;
+}
+
+
+/***********************************************************************
+                           Initialization
+ ***********************************************************************/
+void
+syms_of_xml (void)
+{
+  defsubr (&Shtml_parse_buffer);
+}
+
+#endif /* HAVE_LIBXML2 */

This compiles and works for me, but I'm not really an Emacs internals
expert.  Ahem.

Or an autoconf one, for that matter.  ./configure finds the stuff it's
looking for, but I get this warning:

-------
address@hidden ~/src/emacs/trunk]$ ./configure  | grep xml
checking libxml/xmlversion.h usability... yes
checking libxml/xmlversion.h presence... no
configure: WARNING: libxml/xmlversion.h: accepted by the compiler, rejected by 
the preprocessor!
configure: WARNING: libxml/xmlversion.h: proceeding with the compiler's result
checking for libxml/xmlversion.h... yes
checking for htmlReadMemory in -lxml2... yes
-------

I'm not sure what that means...

-- 
(domestic pets only, the antidote for overdose, milk.)
  address@hidden * Lars Magne Ingebrigtsen

reply via email to

[Prev in Thread] Current Thread [Next in Thread]