(module database mzscheme
(require (lib "spgsql.ss" "spgsql")
(lib "class.ss")
(lib "process.ss")
(prefix db: "configure.scm"))
(provide login-valid?
update-ranking
import-document
export-document
process-search-query)
(define (get-connection)
(or (current-connection)
(let [(connection (connect db:host db:port db:database
db:user db:password))]
(current-connection connection)
(send connection use-type-conversions #t)
connection)))
(define (login-valid? username password)
(positive?
(send (get-connection) query-value
(sql-format "select count(*) from dms_user"
"where username = ~a and password = ~a"
`(string ,username)
`(string ,password)))))
(define (import-document user mime-type file-contents description)
(let* [(mime-type (if (string=? mime-type "none")
(guess-mime-type file-contents) mime-type))
(sql-dir (build-path db:web-root "servlets/dms/sql"))
(tmp-file-relative
(build-path "tmp" user
(string-append "tmp" "."
(get-file-extension mime-type))))]
(current-directory sql-dir)
(with-output-to-file tmp-file-relative
(lambda () (display file-contents)) 'truncate/replace)
(let [(oid (send (get-connection) query-value
(sql-format "SELECT lo_import(~a) ;"
`(string ,(build-path sql-dir
tmp-file-relative)))))]
(system (format "rm -f tmp/~a/words.txt" user))
(system (format "~a ~a > tmp/~a/words.txt"
(get-text-filter mime-type) tmp-file-relative user))
(with-output-to-file (format "tmp/~a/words.txt" user)
(lambda () (display description)) 'append)
(system (format "words.csh tmp/~a/words.txt ~a" user oid))
(delete-file tmp-file-relative)
(send (get-connection) exec
(sql-format "INSERT INTO document VALUES (~a, now(), ~a, ~a) ;"
`(integer ,oid)
`(string ,description)
`(string ,mime-type))))))
(define (guess-mime-type file-contents)
(let [(end (min 255 (string-length file-contents)))]
(cond ((regexp-match "^GIF" file-contents 0 end) "image/gif")
((regexp-match "%PDF" file-contents 0 end) "application/pdf")
((regexp-match "%!PS" file-contents 0 end) "application/postscript")
((or (regexp-match "<HTML>" file-contents 0 end)
(regexp-match "<html>" file-contents 0 end)) "text/html")
(else "none"))))
(define (export-document user oid)
(let* [(mime-type (send (get-connection) query-value
(sql-format "SELECT type FROM document WHERE id = ~a ;"
`(integer ,oid))))
(file-name (string-append oid "." (get-file-extension mime-type)))]
(current-directory (build-path db:web-root "htdocs/users" user))
(if (file-exists? file-name) (delete-file file-name))
(send (get-connection) exec
(sql-format "SELECT lo_export(~a, '/tmp/~a') ;"
`(integer ,oid) `(sql ,file-name)))
(system (format "cp /tmp/~a ~a" file-name file-name))
(format "/users/~a/~a" user file-name)))
(define (get-file-extension mime-type)
(case (string->symbol mime-type)
[(text/html) "html"]
[(text/plain) "txt"]
[(application/vnd.ms-excel) "xls"]
[(application/x-tex) "tex"]
[(application/pdf) "pdf"]
[(application/postscript) "ps"]
[(application/vnd.ms-powerpoint) "ppt"]
[(application/rtf) "rtf"]
[(application/msword) "doc"]
[(image/gif) "gif"]
[(image/jpeg) "jpg"]
[else "none"]))
(define (get-text-filter mime-type)
(case (string->symbol mime-type)
[(text/html) "/sw/bin/lynx -dump"]
[(text/plain) "/bin/cat"]
[(application/vnd.ms-excel) "/bin/echo"]
[(application/x-tex) "/sw/bin/detex"]
[(application/pdf) "/bin/echo"]
[(application/postscript) "/bin/echo"]
[(application/vnd.ms-powerpoint) "/bin/echo"]
[(application/rtf) "/bin/echo"]
[(application/msword) "/sw/bin/antiword"]
[(image/gif) "/bin/echo"]
[(image/jpeg) "/bin/echo"]
[else "/bin/echo"]))
(define (process-search-query keywords begin end format-function)
(let [(keyword-ids (extract-term-ids (get-connection) keywords))]
(if (null? keyword-ids)
(list)
(send (get-connection) map
(format-search-query keyword-ids begin end)
format-function))))
(define (extract-term-ids connection keywords)
(send connection query-list
(format-extract-term-ids keywords)))
(define (format-search-query term-ids begin end)
(do [(i 1 (add1 i)) (keys term-ids (cdr keys))
(from-string "FROM") (where-string "WHERE")]
[(null? keys)
(sql-format "SELECT id, date_trunc ( 'second', mark ), description,"
"trunc (score_document ( CAST ( id AS integer) ), 3 )"
from-string "document" where-string
"AND occurs_~a.doc = id AND mark > ~a AND mark < ~a"
"ORDER BY score_document ( CAST ( id AS integer) ) DESC;"
`(sql ,(- i 1)) `(string ,begin) `(string ,end))]
(set! from-string
(sql-format from-string "occurs AS occurs_~a," `(sql ,i)))
(cond ((= (length keys) 1)
(set! where-string
(sql-format where-string
"occurs_~a.term = ~a" `(sql ,i) `(sql ,(car keys)))))
(else
(set! where-string
(sql-format where-string
"occurs_~a.term = ~a AND"
"occurs_~a.doc = occurs_~a.doc AND"
`(sql ,i) `(sql ,(car keys))
`(sql ,i) `(sql ,(+ i 1))))))))
(define (format-extract-term-ids keywords)
(do [(keys keywords (cdr keys)) (where-string "")]
[(null? keys)
(sql-format "SELECT id FROM dictionary WHERE" where-string ";")]
(if (= (length keys) 1)
(set! where-string
(sql-format where-string "term = ~a" `(string ,(car keys))))
(set! where-string
(sql-format where-string "term = ~a OR" `(string ,(car keys)))))))
(define (update-ranking oids)
(send (get-connection) exec
(sql-format "DELETE FROM occurs WHERE doc = 0 ;"))
(send (get-connection) exec
(format-ranking-query oids)))
(define (format-ranking-query oids)
(do [(i 1 (add1 i)) (ids oids (cdr ids)) (where-string "")]
[(null? ids)
(sql-format "SELECT insert_occurs ( 0, sub.term, sum ( sub.count ) )"
"FROM (SELECT * FROM occurs WHERE" where-string ") AS sub"
"GROUP BY sub.term ;" )]
(if (= (length ids) 1)
(set! where-string
(sql-format where-string "doc = ~a" `(sql ,(car ids))))
(set! where-string
(sql-format where-string "doc = ~a OR" `(sql ,(car ids)))))))
)