[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
(gumm personal ...) spider 0.90
From: |
Thien-Thi Nguyen |
Subject: |
(gumm personal ...) spider 0.90 |
Date: |
Fri, 14 Dec 2001 00:45:28 -0800 |
hello,
see below for a sneak peek at the spider that collects info for (gumm
personal ...). for some hint on what this means, see the protocol:
http://www.glug.org/gumm/personal/protocol.html
note that the protocol is not yet linked from the index in that
directory; that will happen at the Right Moment and Not Sooner... on
the other hand, the registration cgi is already in place for the
adventurous -- that code follows the spider.
feedback requested on the protocol, the spider, the cgi, and/or gumm!
(please remove guile-sources from non-source postings.)
happy hacking,
thi
________________________________________________________________
#!/bin/sh
# -*- scheme -*- time-stamp: <2001-12-14 00:19:39 ttn>
exec guile -s $0 "$@"
!#
;;; (gumm personal ...) spider version 0.90
;;; Copyright (C) 2001 Thien-Thi Nguyen
;;; This program is provided under the terms of the GNU GPL, version 2.
;;; See http://www.fsf.org/copyleft/gpl.html for details.
(debug-enable 'debug 'backtrace)
(define registration-queue-url "http://giblet/bin/gumm.cgi?get=1")
(define *db* "ttn")
(define *table* "gumm_personal_spider_food")
(define *defs* '((url text "UNIQUE")
(lastcheck timestamp)
(unr_count int4)
(config text)))
;;;;;; zonkme-when-installed: ttn-pers-scheme-0.28
(set! %load-path (cons "/home/ttn/build/ttn-pers-scheme" %load-path))
(use-modules ((ttn pgtable) :select (pgtable-manager where-clausifier)))
(define (where . args)
(where-clausifier (apply string-append args)))
(define (M)
(or (object-property M 'pgtable-manager)
(let ((m (pgtable-manager *db* *table* *defs*)))
(set-object-property! M 'pgtable-manager m)
m)))
(define (Msel . args) (apply ((M) 'select) args))
(define (Mupd . args) (apply ((M) 'update-col) args))
(define (Mins alist) (((M) 'insert-alist) alist))
(define (Mdel . args)
(((M) 'delete-rows) (apply where args)))
(use-modules ((ice-9 common-list) :select (every)))
(define *required-keys* '(trigger version release author tarball))
(define (url->raw-config url)
(let ((raw-config (false-if-exception
(with-input-from-string (www:get url)
(lambda () (read))))))
(and (list? raw-config)
(every pair? raw-config)
(every (lambda (key)
(assq key raw-config))
*required-keys*)
raw-config)))
(define (w/o-trigger&->string trigger config)
(object->string (delete (cons 'trigger trigger) config)))
(define (url<-config config)
(assq-ref config 'trigger))
(define (elaborated-config . selection)
(map (lambda (top-level) ; promote config
(let ((config (assq-ref top-level 'config)))
(append (delete (assq 'config top-level) top-level)
(with-input-from-string config
(lambda () (read))))))
(((M) 'tuples-result->alists) (apply Msel selection))))
(define (elaborated-config-1-url url)
(car (elaborated-config "*" (where "url = '" url "'"))))
(use-modules ((ttn echo) :select (echo)))
;;;---------------------------------------------------------------------------
;;; list
(define (list-one alist)
(echo '------------)
(for-each (lambda (pair)
(format #t "~A -- ~A~%" (car pair) (cdr pair)))
(let ((len (lambda (x)
(string-length
(symbol->string (car x))))))
(sort alist ;;; cheesy
(lambda (a b)
(< (len a) (len b)))))))
(define (list!)
(for-each list-one (elaborated-config "*")))
;;;---------------------------------------------------------------------------
;;; add
(use-modules ((www main) :select (www:get)))
(define (read/clear-registration-queue!)
(with-input-from-string (www:get registration-queue-url)
(lambda ()
(let loop ((ent (read)) (acc '()))
(if (eof-object? ent)
(reverse acc)
(loop (read) (cons (cadr ent) acc))))))) ; ugh
(use-modules ((ttn echo) :select (echo echo-n)))
(define (add-one! url config)
(Mins `((url . ,url)
(unr_count . 0)
(lastcheck . ,(current-time))
(config . ,(w/o-trigger&->string url config)))))
(use-modules (database postgres))
(define (add-one-maybe! url)
(echo-n "adding:" url "... ")
(let ((rv (cond ((url->raw-config url)
=> (lambda (raw)
;; integrated redundancy elimination / final check
(and (equal? url (url<-config raw))
(pg-result-status (add-one! url raw)))))
(else #f))))
(echo (or rv "error"))
(eq? 'PGRES_COMMAND_OK rv)))
(use-modules ((ice-9 common-list) :select (count-if))
((ttn echo) :select (echo)))
(define (add!)
(let* ((candidates (read/clear-registration-queue!))
(new (count-if add-one-maybe! candidates)))
(echo "new:" (length candidates) "candidates" new "added")))
;;;---------------------------------------------------------------------------
;;; collect
(use-modules ((ttn echo) :select (echo echow)))
(define (collect-current-url url) ; current means already in db
(let ((update! (lambda (url cols vals)
(Mupd cols vals (string-append "url = '" url "'")))))
(cond ((url->raw-config url)
=> (lambda (raw)
;; on trigger change, discard old trigger
(let ((nurl (url<-config raw)))
(or (equal? url nurl)
(begin
(update! url
'(url unr_count)
`(,nurl 0))
(set! url nurl))))
(update! url
'(lastcheck config)
`(,(current-time) ,(w/o-trigger&->string url raw)))
url))
(else
(update! url '(unr_count) (list (sql-pre "unr_count + 1")))
#f))))
(use-modules ((ttn echo) :select (echo)))
(define (note-changes keys before-lookup after-lookup)
(echo (after-lookup 'url))
(for-each (lambda (key)
(let ((b (before-lookup key))
(a (after-lookup key)))
(or (equal? b a)
(echo "changed:" key "\n" b "\n" a))))
keys))
(use-modules ((ice-9 common-list) :select (pick union)))
(define (collect!)
(let* ((all (elaborated-config "*"))
(make-lookup (lambda (config)
(lambda (key)
(assq-ref config key))))
(need-to-look? (lambda (config)
(let* ((lookup (make-lookup config))
(last (lookup 'lastcheck))
(freq (or (lookup 'spider-frequency) 1))
(next (+ last (* 60 60 freq))))
(<= next (current-time)))))
(needy (pick need-to-look? all))
(unreachable 0))
(for-each (lambda (needy-one)
(let ((lookup (make-lookup needy-one)))
(cond ((collect-current-url (lookup 'url))
=> (lambda (url) ;;; may be different
(let ((after (elaborated-config-1-url url)))
(note-changes (union (map car needy-one)
(map car after))
lookup
(make-lookup after)))))
(else
(let ((toomuch (or (lookup 'unreachable-delist) 24)))
(and (<= toomuch (1+ (lookup 'unr_count)))
(echo "toomuch:" toomuch url "..."
(Mdel "url = '" url "'"))))
(set! unreachable (1+ unreachable))))))
needy)
(echo "collect:"
(length all) "total"
(length needy) "attempts"
unreachable "unreachable")))
;;;---------------------------------------------------------------------------
;;; dispatch
(define (do-command command)
(case command
((list) (list!))
((add) (add!))
((collect) (collect!))
(else (error "bad command:" command))))
(define (command)
(string->symbol (cadr (command-line))))
(do-command (command))
;;; .spider ends here
__________________________________________________________
#!/bin/sh
# -*- scheme -*- time-stamp: <2001-12-06 21:40:39 ttn>
PATH=/home/ttn/local/bin:/usr/local/bin:$PATH
exec guile -s $0 "$@"
!#
;;; gumm.cgi version 0.90
;;; Copyright (C) 2001 Thien-Thi Nguyen
;;; This program is provided under the terms of the GNU GPL, version 2.
;;; See http://www.fsf.org/copyleft/gpl.html for details.
(define queue "var/gumm/registration-queue")
(use-modules (www cgi) (ttn echo))
(echo "Content-type: text/plain")
(echo)
(cgi:init)
(cond ((cgi:value "get")
(flush-all-ports)
(system (string-append "cat " queue))
(with-output-to-file queue
(lambda () (echo ";;; last get/clear:"
(strftime "%c" (gmtime (current-time))))))
(exit #t)))
(define url (cgi:value "register"))
(cond ((not url)
(echo "sorry")
(exit #f)))
(echo-n "Registration in progress... ")
(define start (current-time))
(use-modules (www url))
(define parsed (url:parse url))
(define (bail-if v)
(cond (v (echo "sorry, invalid url")
(exit #f))))
;;(echow parsed)
(bail-if (not (eq? 'http (url:scheme parsed))))
(define host (url:host parsed))
(define port (url:port parsed))
(define path (url:path parsed))
;;(echo 'port port 'host host 'path path)
(bail-if (or (not host)
(not path)
(string-index path #\space)
(string-index path #\newline)))
(use-modules (www http))
(define cnxn (false-if-exception (http:open host port)))
;;(echo 'cnxn cnxn)
(bail-if (not cnxn))
(let ((p (open-file queue "a")))
(with-output-to-port p
(lambda ()
(echow (list (current-time) url)))))
(let ((finish (current-time)))
(echo "done," (- finish start) "seconds.")
(echo))
(echo "OK, looks like:")
(echo)
(echo " " url)
(echo)
(echo "has been added to the GUMM registration queue.")
(echo "Wait a couple hours to see if the spider is happy.")
(exit #t)
;;; gumm.cgi ends here
[Prev in Thread] |
Current Thread |
[Next in Thread] |
- (gumm personal ...) spider 0.90,
Thien-Thi Nguyen <=