chicken-users
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Chicken-users] Save the Gazette!


From: Andy Bennett
Subject: Re: [Chicken-users] Save the Gazette!
Date: Thu, 17 Feb 2011 16:32:44 +0000
User-agent: Mozilla-Thunderbird 2.0.0.22 (X11/20090707)

Hi,

> Super special bonus point:
> 
> 5) Write a script that, given a date range, parses the mailing list
> archive into wiki markup for a list of links to the posts in the
> archive, along with links to the user's pages as per (4), grouped by thread.

The attached script will process the web-based list archives
(http://lists.nongnu.org/archive/html/chicken-users/ ) for the last 8
days worth of messages.

It will group them by thread, sort them with the most active threads at
the top and then provide links to the individual messages, grouped by
author.

It does not link to the users' pages.


A sample output is below:

-----
* Using a macro definition within a explicit renaming macro.    (4 messages)
** Peter Bex
***
[[http://lists.nongnu.org/archive/html/chicken-users/2011-02/msg00042.html|2011/02/11]]

** Patrick Li
***
[[http://lists.nongnu.org/archive/html/chicken-users/2011-02/msg00041.html|2011/02/11]]
[[http://lists.nongnu.org/archive/html/chicken-users/2011-02/msg00049.html|2011/02/12]]

** Felix
***
[[http://lists.nongnu.org/archive/html/chicken-users/2011-02/msg00047.html|2011/02/12]]

-----




Regards,
@ndy

-- 
address@hidden
http://www.ashurst.eu.org/
0x7EBA75FF

(use http-client html-parser sxpath srfi-69 srfi-19)

(define base-url "http://lists.nongnu.org/archive/html/chicken-users/";)

; Fetch the mail archive's index
(define tree (with-input-from-request base-url #f html->sxml))
(define month-list ((sxpath `(// li)) tree))

; The last week might span both this month and last month
;(define interesting-months `(,(car month-list) ,(cadr month-list)))
(define interesting-months `(,(car month-list)
                              ,(cadr month-list)
                              ))

; Fetch the thread listings for the interesting-months
(define (get-thread-path snippet)
  (cadadr ((sxpath `(// a @ href)) snippet)))

; Filter the HTML so that we only have the thread listings. Then convert it to 
SXML
(define thread-trees (map (lambda (thread-path)
                            (with-input-from-string (;apply
                                                     string-intersperse
                                                     (with-input-from-request 
(string-append base-url thread-path) #f
                                                                              
(lambda () (let ((flag #f))
                                                                                
           (map
                                                                                
             (lambda (line)

                                                                                
               (cond ((string-match "<hr [^>]*>" line)
                                                                                
                      (begin
                                                                                
                        (set! flag (not flag))
                                                                                
                        line))
                                                                                
                     (flag line)
                                                                                
                     (else ""))
                                                                                
               ;(string-substitute "<input [^>]*>" "" line)
                                                                                
               ;(string-substitute "(<input [^>]*>)|(<p>)" "" line)
                                                                                
               )
                                                                                
             (read-lines))
                                                                                
           )
                                                                                
)))
                              html->sxml))
                          (map get-thread-path interesting-months)))


; Canonicalise the subject for each message and generate a list of (subject, 
author, date, link)s
(define messages ((sxpath `(// li)) thread-trees))

(define message-list
  (map (lambda (message)
         (if (> (length message) 4)
           (let ((link (car ((sxpath `(// a @ href *text*)) (second message))))
                 (subject (string-substitute "[ \t]+" " " (string-substitute 
(string-append "(Re: )?" (regexp-escape "[Chicken-users] ") "(Re: )?") "" (car 
((sxpath `(// *text*)) (second message)))) 'all ))
                 (author (second (fourth message)))
                 (date (second (sixth message))))
             `(,subject ,author ,date ,link)
             )
           '()
           ))
       messages))

; Filter out everything except messages from the last week
(define (sent-this-week? date-string)
  (let ((date (scan-date date-string "~Y~m~d"))
        (last-week (date-subtract-duration (current-date) (make-duration #:days 
8))))
    (if (>= (date-compare date last-week) 0)
      #t
      #f)))

(define filtered-message-list (map (lambda (message)
                                     (if (= (length message) 4)
                                       (if (sent-this-week? (third message))
                                         message
                                         '())
                                       '()))
                                   message-list))

; Regroup the filtered-message-list by subject then author
(define message-hash
  (fold
    (lambda (msg summary)
      (if (not (eq? msg '()))
        (begin
          (hash-table-update! (second summary) (first msg)
                              (lambda (identity)
                                (hash-table-update! identity (car (cdr msg))
                                                    (lambda (identity) (append 
identity `(,(cddr msg))))
                                                    (lambda () '())
                                                    ) identity)
                              (lambda () (make-hash-table))
                              )
          (hash-table-update! (first summary) (first msg)
                              (lambda (identity) (+ identity 1))
                              (lambda () 0))
          ))
      summary )
    `(,(make-hash-table) ,(make-hash-table))
    filtered-message-list))


; Render a wikified list of threads and links to messages
(define url-regex (regexp "(....)/(..)/.."))
(map
  (lambda (subj) (
                  (lambda (subj value)
                    (format #t "* ~A    (~A messages)" subj (hash-table-ref 
(first message-hash) subj))
                    (newline)
                    (hash-table-walk value (lambda (key value)
                                             (format #t "** ~A" key )
                                             (newline)
                                             (format #t "*** ")
                                             (map (lambda (value)
                                                    (format #t "[[~A~A/~A|~A]] 
" base-url (string-substitute url-regex "\\1-\\2" (first value)) (second value) 
(first value))) value)
                                             (newline)
                                             ))
                    (newline))
                  subj (hash-table-ref (second message-hash) subj)))
  (sort (hash-table-keys (first message-hash)) (lambda (a b) (> (hash-table-ref 
(first message-hash) a) (hash-table-ref (first message-hash) b)))))

Attachment: signature.asc
Description: OpenPGP digital signature


reply via email to

[Prev in Thread] Current Thread [Next in Thread]