emacs-diffs
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Emacs-diffs] Changes to emacs/lisp/gnus/nnweb.el


From: ShengHuo ZHU
Subject: [Emacs-diffs] Changes to emacs/lisp/gnus/nnweb.el
Date: Thu, 07 Feb 2002 10:28:36 -0500

Index: emacs/lisp/gnus/nnweb.el
diff -c emacs/lisp/gnus/nnweb.el:1.8 emacs/lisp/gnus/nnweb.el:1.9
*** emacs/lisp/gnus/nnweb.el:1.8        Wed Nov  8 16:04:28 2000
--- emacs/lisp/gnus/nnweb.el    Tue Oct 30 23:16:51 2001
***************
*** 1,5 ****
  ;;; nnweb.el --- retrieving articles via web search engines
! ;; Copyright (C) 1996, 1997, 1998, 1999, 2000
  ;;        Free Software Foundation, Inc.
  
  ;; Author: Lars Magne Ingebrigtsen <address@hidden>
--- 1,5 ----
  ;;; nnweb.el --- retrieving articles via web search engines
! ;; Copyright (C) 1996, 1997, 1998, 1999, 2000, 2001
  ;;        Free Software Foundation, Inc.
  
  ;; Author: Lars Magne Ingebrigtsen <address@hidden>
***************
*** 55,79 ****
  (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/")
    "Where nnweb will save its files.")
  
! (defvoo nnweb-type 'dejanews
    "What search engine type is being used.
! Valid types include `dejanews', `dejanewsold', `reference',
  and `altavista'.")
  
  (defvar nnweb-type-definition
!   '((dejanews
       (article . ignore)
!      (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text";)
!      (map . nnweb-dejanews-create-mapping)
!      (search . nnweb-dejanews-search)
!      (address . "http://www.deja.com/=dnc/qs.xp";)
!      (identifier . nnweb-dejanews-identity))
!     (dejanewsold
       (article . ignore)
!      (map . nnweb-dejanews-create-mapping)
!      (search . nnweb-dejanewsold-search)
!      (address . "http://www.deja.com/dnquery.xp";)
!      (identifier . nnweb-dejanews-identity))
      (reference
       (article . nnweb-reference-wash-article)
       (map . nnweb-reference-create-mapping)
--- 55,102 ----
  (defvoo nnweb-directory (nnheader-concat gnus-directory "nnweb/")
    "Where nnweb will save its files.")
  
! (defvoo nnweb-type 'google
    "What search engine type is being used.
! Valid types include `google', `dejanews', `dejanewsold', `reference',
  and `altavista'.")
  
  (defvar nnweb-type-definition
!   '(
!     (google
!      ;;(article . nnweb-google-wash-article)
!      ;;(id . "http://groups.google.com/groups?as_umsgid=%s";)
       (article . ignore)
!      (id . "http://groups.google.com/groups?selm=%s&output=gplain";)
!      ;;(reference . nnweb-google-reference)
!      (reference . identity)
!      (map . nnweb-google-create-mapping)
!      (search . nnweb-google-search)
!      (address . "http://groups.google.com/groups";)
!      (identifier . nnweb-google-identity))
!     (dejanews ;; alias of google
!      ;;(article . nnweb-google-wash-article)
!      ;;(id . "http://groups.google.com/groups?as_umsgid=%s";)
       (article . ignore)
!      (id . "http://groups.google.com/groups?selm=%s&output=gplain";)
!      ;;(reference . nnweb-google-reference)
!      (reference . identity)
!      (map . nnweb-google-create-mapping)
!      (search . nnweb-google-search)
!      (address . "http://groups.google.com/groups";)
!      (identifier . nnweb-google-identity))
! ;;;     (dejanews
! ;;;      (article . ignore)
! ;;;      (id . "http://search.dejanews.com/msgid.xp?MID=%s&fmt=text";)
! ;;;      (map . nnweb-dejanews-create-mapping)
! ;;;      (search . nnweb-dejanews-search)
! ;;;      (address . "http://www.deja.com/=dnc/qs.xp";)
! ;;;      (identifier . nnweb-dejanews-identity))
! ;;;     (dejanewsold
! ;;;      (article . ignore)
! ;;;      (map . nnweb-dejanews-create-mapping)
! ;;;      (search . nnweb-dejanewsold-search)
! ;;;      (address . "http://www.deja.com/dnquery.xp";)
! ;;;      (identifier . nnweb-dejanews-identity))
      (reference
       (article . nnweb-reference-wash-article)
       (map . nnweb-reference-create-mapping)
***************
*** 124,129 ****
--- 147,154 ----
  
  (deffoo nnweb-request-scan (&optional group server)
    (nnweb-possibly-change-server group server)
+   (if nnweb-ephemeral-p
+       (setq nnweb-hashtb (gnus-make-hashtable 4095)))
    (funcall (nnweb-definition 'map))
    (unless nnweb-ephemeral-p
      (nnweb-write-active)
***************
*** 134,142 ****
    (when (and group
             (not (equal group nnweb-group))
             (not nnweb-ephemeral-p))
      (let ((info (assoc group nnweb-group-alist)))
        (when info
-       (setq nnweb-group group)
        (setq nnweb-type (nth 2 info))
        (setq nnweb-search (nth 3 info))
        (unless dont-check
--- 159,168 ----
    (when (and group
             (not (equal group nnweb-group))
             (not nnweb-ephemeral-p))
+     (setq nnweb-group group
+         nnweb-articles nil)
      (let ((info (assoc group nnweb-group-alist)))
        (when info
        (setq nnweb-type (nth 2 info))
        (setq nnweb-search (nth 3 info))
        (unless dont-check
***************
*** 175,191 ****
                (and (stringp article)
                     (nnweb-definition 'id t)
                     (let ((fetch (nnweb-definition 'id))
!                          art)
                       (when (string-match "^<\\(.*\\)>$" article)
                         (setq art (match-string 1 article)))
!                      (and fetch
!                           art
!                           (mm-with-unibyte-current-buffer
!                             (nnweb-fetch-url
!                              (format fetch article)))))))
        (unless nnheader-callback-function
!         (funcall (nnweb-definition 'article))
!         (nnweb-decode-entities))
        (nnheader-report 'nnweb "Fetched article %s" article)
        (cons group (and (numberp article) article))))))
  
--- 201,219 ----
                (and (stringp article)
                     (nnweb-definition 'id t)
                     (let ((fetch (nnweb-definition 'id))
!                          art active)
                       (when (string-match "^<\\(.*\\)>$" article)
                         (setq art (match-string 1 article)))
!                      (when (and fetch art)
!                        (setq url (format fetch art))
!                        (mm-with-unibyte-current-buffer
!                          (nnweb-fetch-url url))
!                        (if (nnweb-definition 'reference t)
!                            (setq article
!                                  (funcall (nnweb-definition
!                                            'reference) article)))))))
        (unless nnheader-callback-function
!         (funcall (nnweb-definition 'article)))
        (nnheader-report 'nnweb "Fetched article %s" article)
        (cons group (and (numberp article) article))))))
  
***************
*** 290,299 ****
        (nnweb-open-server server)))
    (unless nnweb-group-alist
      (nnweb-read-active))
    (when group
      (when (and (not nnweb-ephemeral-p)
!              (not (equal group nnweb-group)))
!       (setq nnweb-hashtb (gnus-make-hashtable 4095))
        (nnweb-request-group group nil t))))
  
  (defun nnweb-init (server)
--- 318,328 ----
        (nnweb-open-server server)))
    (unless nnweb-group-alist
      (nnweb-read-active))
+   (unless nnweb-hashtb
+     (setq nnweb-hashtb (gnus-make-hashtable 4095)))
    (when group
      (when (and (not nnweb-ephemeral-p)
!              (equal group nnweb-group))
        (nnweb-request-group group nil t))))
  
  (defun nnweb-init (server)
***************
*** 393,399 ****
                                     (car (rassq (string-to-number
                                                  (match-string 2 date))
                                                 parse-time-months))
!                                    (match-string 3 date) 
                                     (match-string 1 date)))
                (setq date "Jan 1 00:00:00 0000"))
              (incf i)
--- 422,428 ----
                                     (car (rassq (string-to-number
                                                  (match-string 2 date))
                                                 parse-time-months))
!                                    (match-string 3 date)
                                     (match-string 1 date)))
                (setq date "Jan 1 00:00:00 0000"))
              (incf i)
***************
*** 559,564 ****
--- 588,594 ----
        (while (search-forward "," nil t)
          (replace-match " " t t)))
        (widen)
+       (nnweb-decode-entities)
        (set-marker body nil))))
  
  (defun nnweb-reference-search (search)
***************
*** 663,669 ****
        (while (re-search-forward "<address@hidden([^\"]+\\)\">[0-9]+</A>" nil 
t)
        (replace-match "&lt;\\1&gt; " t)))
      (widen)
!     (nnweb-remove-markup)))
  
  (defun nnweb-altavista-search (search &optional part)
    (url-insert-file-contents
--- 693,700 ----
        (while (re-search-forward "<address@hidden([^\"]+\\)\">[0-9]+</A>" nil 
t)
        (replace-match "&lt;\\1&gt; " t)))
      (widen)
!     (nnweb-remove-markup)
!     (nnweb-decode-entities)))
  
  (defun nnweb-altavista-search (search &optional part)
    (url-insert-file-contents
***************
*** 683,695 ****
    t)
  
  ;;;
  ;;; General web/w3 interface utility functions
  ;;;
  
  (defun nnweb-insert-html (parse)
    "Insert HTML based on a w3 parse tree."
    (if (stringp parse)
!       (insert parse)
      (insert "<" (symbol-name (car parse)) " ")
      (insert (mapconcat
             (lambda (param)
--- 714,860 ----
    t)
  
  ;;;
+ ;;; Deja bought by google.com
+ ;;;
+ 
+ (defun nnweb-google-wash-article ()
+   (let ((case-fold-search t) url)
+     (goto-char (point-min))
+     (re-search-forward "^<pre>" nil t)
+     (narrow-to-region (point-min) (point))
+     (search-backward "<table " nil t 2)
+     (delete-region (point-min) (point))
+     (if (re-search-forward "Search Result [0-9]+" nil t)
+       (replace-match ""))
+     (if (re-search-forward "View complete thread ([0-9]+ articles?)" nil t)
+       (replace-match ""))
+     (goto-char (point-min))
+     (while (search-forward "<br>" nil t)
+       (replace-match "\n"))
+     (nnweb-remove-markup)
+     (goto-char (point-min))
+     (while (re-search-forward "^[ \t]*\n" nil t)
+       (replace-match ""))
+     (goto-char (point-max))
+     (insert "\n")
+     (widen)
+     (narrow-to-region (point) (point-max))
+     (search-forward "</pre>" nil t)
+     (delete-region (point) (point-max))
+     (nnweb-remove-markup)
+     (widen)))
+ 
+ (defun nnweb-google-parse-1 (&optional Message-ID)
+   (let ((i 0)
+       (case-fold-search t)
+       (active (cadr (assoc nnweb-group nnweb-group-alist)))
+       Subject Score Date Newsgroups From
+       map url mid)
+     (unless active
+       (push (list nnweb-group (setq active (cons 1 0))
+                 nnweb-type nnweb-search)
+           nnweb-group-alist))
+     ;; Go through all the article hits on this page.
+     (goto-char (point-min))
+     (while (re-search-forward
+           "a href=/groups\\(\\?[^ \">]*selm=\\([^ &\">]+\\)\\)" nil t)
+       (setq mid (match-string 2)
+           url (format 
+                "http://groups.google.com/groups?selm=%s&output=gplain"; mid))
+       (narrow-to-region (search-forward ">" nil t)
+                       (search-forward "</a>" nil t))
+       (nnweb-remove-markup)
+       (nnweb-decode-entities)
+       (setq Subject (buffer-string))
+       (goto-char (point-max))
+       (widen)
+       (forward-line 1)
+       (when (looking-at "<br><font[^>]+>")
+       (goto-char (match-end 0)))
+       (if (not (looking-at "<a[^>]+>"))
+         (skip-chars-forward " \t")
+       (narrow-to-region (point)
+                         (search-forward "</a>" nil t))
+       (nnweb-remove-markup)
+       (nnweb-decode-entities)
+       (setq Newsgroups (buffer-string))
+       (goto-char (point-max))
+       (widen)
+       (skip-chars-forward "- \t"))
+       (when (looking-at
+            "\\([0-9]+[/ ][A-Za-z]+[/ ][0-9]+\\)[ \t]*by[ \t]*\\([^<]*\\) - 
<a")
+       (setq From (match-string 2)
+             Date (match-string 1)))
+       (forward-line 1)
+       (incf i)
+       (unless (nnweb-get-hashtb url)
+       (push
+        (list
+         (incf (cdr active))
+         (make-full-mail-header
+          (cdr active) (if Newsgroups
+                           (concat  "(" Newsgroups ") " Subject)
+                         Subject)
+          From Date (or Message-ID mid)
+          nil 0 0 url))
+        map)
+       (nnweb-set-hashtb (cadar map) (car map))))
+     map))
+ 
+ (defun nnweb-google-reference (id)
+   (let ((map (nnweb-google-parse-1 id)) header)
+     (setq nnweb-articles
+         (nconc nnweb-articles map))
+     (when (setq header (cadar map))
+       (mm-with-unibyte-current-buffer
+       (nnweb-fetch-url (mail-header-xref header)))
+       (caar map))))
+ 
+ (defun nnweb-google-create-mapping ()
+   "Perform the search and create an number-to-url alist."
+   (save-excursion
+     (set-buffer nnweb-buffer)
+     (erase-buffer)
+     (when (funcall (nnweb-definition 'search) nnweb-search)
+       (let ((more t))
+         (while more
+           (setq nnweb-articles
+                 (nconc nnweb-articles (nnweb-google-parse-1)))
+           ;; FIXME: There is more.
+           (setq more nil))
+         ;; Return the articles in the right order.
+         (setq nnweb-articles
+               (sort nnweb-articles 'car-less-than-car))))))
+ 
+ (defun nnweb-google-search (search)
+   (nnweb-insert
+    (concat
+     (nnweb-definition 'address)
+     "?"
+     (nnweb-encode-www-form-urlencoded
+      `(("q" . ,search)
+        ("num". "100")
+        ("hq" . "")
+        ("hl" . "")
+        ("lr" . "")
+        ("safe" . "off")
+        ("sites" . "groups")))))
+   t)
+ 
+ (defun nnweb-google-identity (url)
+   "Return an unique identifier based on URL."
+   (if (string-match "selm=\\([^ &>]+\\)" url)
+       (match-string 1 url)
+     url))
+ 
+ ;;;
  ;;; General web/w3 interface utility functions
  ;;;
  
  (defun nnweb-insert-html (parse)
    "Insert HTML based on a w3 parse tree."
    (if (stringp parse)
!       (insert (nnheader-string-as-multibyte parse))
      (insert "<" (symbol-name (car parse)) " ")
      (insert (mapconcat
             (lambda (param)
***************
*** 729,735 ****
    (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
      (let ((elem (if (eq (aref (match-string 1) 0) ?\#)
                        (let ((c
!                              (string-to-number (substring 
                                                  (match-string 1) 1))))
                          (if (mm-char-or-char-int-p c) c 32))
                      (or (cdr (assq (intern (match-string 1))
--- 894,900 ----
    (while (re-search-forward "&\\(#[0-9]+\\|[a-z]+\\);" nil t)
      (let ((elem (if (eq (aref (match-string 1) 0) ?\#)
                        (let ((c
!                              (string-to-number (substring
                                                  (match-string 1) 1))))
                          (if (mm-char-or-char-int-p c) c 32))
                      (or (cdr (assq (intern (match-string 1))
***************
*** 739,747 ****
        (setq elem (char-to-string elem)))
        (replace-match elem t t))))
  
! (defun nnweb-decode-entities-string (str)
    (with-temp-buffer
!     (insert str)
      (nnweb-decode-entities)
      (buffer-substring (point-min) (point-max))))
  
--- 904,912 ----
        (setq elem (char-to-string elem)))
        (replace-match elem t t))))
  
! (defun nnweb-decode-entities-string (string)
    (with-temp-buffer
!     (insert string)
      (nnweb-decode-entities)
      (buffer-substring (point-min) (point-max))))
  
***************
*** 760,771 ****
    "Insert the contents from an URL in the current buffer.
  If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
    (let ((name buffer-file-name))
!     (if follow-refresh
        (save-restriction
          (narrow-to-region (point) (point))
          (url-insert-file-contents url)
          (goto-char (point-min))
!         (when (re-search-forward 
                 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" 
nil t)
            (let ((url (match-string 1)))
              (delete-region (point-min) (point-max))
--- 925,936 ----
    "Insert the contents from an URL in the current buffer.
  If FOLLOW-REFRESH is non-nil, redirect refresh url in META."
    (let ((name buffer-file-name))
!     (if follow-refresh 
        (save-restriction
          (narrow-to-region (point) (point))
          (url-insert-file-contents url)
          (goto-char (point-min))
!         (when (re-search-forward
                 "<meta[ \t\r\n]*http-equiv=\"Refresh\"[^>]*URL=\\([^\"]+\\)\"" 
nil t)
            (let ((url (match-string 1)))
              (delete-region (point-min) (point-max))
***************
*** 821,826 ****
--- 986,996 ----
        (when (and (consp element)
                 (listp (cdr element)))
        (nnweb-text-1 element)))))
+ 
+ (defun nnweb-replace-in-string (string match newtext)
+   (while (string-match match string)
+     (setq string (replace-match newtext t t string)))
+   string)
  
  (provide 'nnweb)
  



reply via email to

[Prev in Thread] Current Thread [Next in Thread]