(module database mzscheme (require (lib "spgsql.ss" "spgsql") (lib "class.ss") (lib "process.ss") (prefix db: "configure.scm")) (provide login-valid? update-ranking import-document export-document process-search-query) ;; get-connection : -> connection ;; Get a stored connection or create a new one. (define (get-connection) (or (current-connection) (let [(connection (connect db:host db:port db:database db:user db:password))] (current-connection connection) (send connection use-type-conversions #t) connection))) ;; login-valid? : string string -> boolean ;; Returns true if the username and password are valid. (define (login-valid? username password) (positive? (send (get-connection) query-value (sql-format "select count(*) from dms_user" "where username = ~a and password = ~a" `(string ,username) `(string ,password))))) ;; import-document : string string string string -> boolean ;; The functions 'import-document' and 'export-document' are messy ;; for several reasons. First off, much of the work is done in the ;; directory './sql/' (specified relative to the directory in which ;; this file resides) by the shell script 'words.csh' which calls a ;; perl script 'terms.pl'. Second, some of the work is being done ;; by the PostgreSQL user 'postgres' while other work is being done ;; by the PLT web server started by 'tld' and so permissioning is ;; complicated. Rather than sort out the permissioning I just ;; hacked around it. To do it right, I should probably let ;; 'postgres' own everything or set up a group including 'tld' and ;; 'postgres' with both read and write permission. (define (import-document user mime-type file-contents description) (let* [(mime-type (if (string=? mime-type "none") (guess-mime-type file-contents) mime-type)) (sql-dir (build-path db:web-root "servlets/dms/sql")) (tmp-file-relative (build-path "tmp" user (string-append "tmp" "." (get-file-extension mime-type))))] ;; Connect to the directory containing shell scripts. (current-directory sql-dir) ;; Store the file in the local directory assigned to the user. (with-output-to-file tmp-file-relative (lambda () (display file-contents)) 'truncate/replace) ;; Use an absolute path for the PostgreSQL import. (let [(oid (send (get-connection) query-value (sql-format "SELECT lo_import(~a) ;" `(string ,(build-path sql-dir tmp-file-relative)))))] ;; Delete any existing temporary file. (system (format "rm -f tmp/~a/words.txt" user)) ;; Apply the appropriate text filter. (system (format "~a ~a > tmp/~a/words.txt" (get-text-filter mime-type) tmp-file-relative user)) ;; Append the supplied description to the word file. (with-output-to-file (format "tmp/~a/words.txt" user) (lambda () (display description)) 'append) ;; Run the shell script that processes the word file. ;; Note 'system' is synchronous and 'process' is asynchronous. (system (format "words.csh tmp/~a/words.txt ~a" user oid)) ;; Delete the uploaded file. (delete-file tmp-file-relative) ;; Create the requisite record in the 'document' table. (send (get-connection) exec (sql-format "INSERT INTO document VALUES (~a, now(), ~a, ~a) ;" `(integer ,oid) `(string ,description) `(string ,mime-type)))))) ;; guess-mime-type: string -> string ;; Search the first part of a file for clues as to its mime-type. (define (guess-mime-type file-contents) (let [(end (min 255 (string-length file-contents)))] (cond ((regexp-match "^GIF" file-contents 0 end) "image/gif") ((regexp-match "%PDF" file-contents 0 end) "application/pdf") ((regexp-match "%!PS" file-contents 0 end) "application/postscript") ((or (regexp-match "<HTML>" file-contents 0 end) (regexp-match "<html>" file-contents 0 end)) "text/html") (else "none")))) ;; export-document : string string -> string (define (export-document user oid) (let* [(mime-type (send (get-connection) query-value (sql-format "SELECT type FROM document WHERE id = ~a ;" `(integer ,oid)))) (file-name (string-append oid "." (get-file-extension mime-type)))] ;; Connect to the user's directory on the web server. (current-directory (build-path db:web-root "htdocs/users" user)) ;; Delete any previous temporary files with same name. (if (file-exists? file-name) (delete-file file-name)) ;; Export to /tmp/ to avoid problems with permissions. (send (get-connection) exec (sql-format "SELECT lo_export(~a, '/tmp/~a') ;" `(integer ,oid) `(sql ,file-name))) ;; Note 'system' is synchronous and 'process' is asynchronous. (system (format "cp /tmp/~a ~a" file-name file-name)) ;; Provide URL relative to web-server root directory. (format "/users/~a/~a" user file-name))) ;; get-file-extension: string -> string ;; Returns the default extension for a subset of MIME types. (define (get-file-extension mime-type) (case (string->symbol mime-type) [(text/html) "html"] [(text/plain) "txt"] [(application/vnd.ms-excel) "xls"] [(application/x-tex) "tex"] [(application/pdf) "pdf"] [(application/postscript) "ps"] [(application/vnd.ms-powerpoint) "ppt"] [(application/rtf) "rtf"] [(application/msword) "doc"] [(image/gif) "gif"] [(image/jpeg) "jpg"] [else "none"])) ;; get-text-filter: string -> string ;; Returns a text filter for a subset of MIME types. (define (get-text-filter mime-type) (case (string->symbol mime-type) [(text/html) "/sw/bin/lynx -dump"] [(text/plain) "/bin/cat"] [(application/vnd.ms-excel) "/bin/echo"] [(application/x-tex) "/sw/bin/detex"] [(application/pdf) "/bin/echo"] [(application/postscript) "/bin/echo"] [(application/vnd.ms-powerpoint) "/bin/echo"] [(application/rtf) "/bin/echo"] [(application/msword) "/sw/bin/antiword"] [(image/gif) "/bin/echo"] [(image/jpeg) "/bin/echo"] [else "/bin/echo"])) ;; process-search-query: (listof symbol) date date -> xexpr (define (process-search-query keywords begin end format-function) (let [(keyword-ids (extract-term-ids (get-connection) keywords))] (if (null? keyword-ids) (list) (send (get-connection) map (format-search-query keyword-ids begin end) format-function)))) ;; extract-term-ids: db-connection (listof string) -> (listof integer) (define (extract-term-ids connection keywords) (send connection query-list (format-extract-term-ids keywords))) ;; format-search-query: (listof integer) date date -> string ;; Generates a string for an SQL query that finds all documents that contain ;; at least one occurence of each term specified by a set of term identifiers: ;; SELECT id, mark, description ;; FROM occurs AS occurs_1, occurs AS occurs_2, ..., ;; occurs AS occurs_n, document ;; WHERE occurs_1.doc = occurs_2.doc AND occurs_1.term = term_1 AND ;; occurs_2.doc = occurs_3.doc AND occurs_2.term = term_2 AND ... ;; occurs_n-1.doc = occurs_n.doc AND occurs_n-1.term = term_n-1 AND ;; occurs_n.term = term_n AND occurs_n.doc = id AND ;; mark > begin AND mark < end ;; ORDER BY score_document ( CAST ( id AS integer) ) DESC; (define (format-search-query term-ids begin end) (do [(i 1 (add1 i)) (keys term-ids (cdr keys)) (from-string "FROM") (where-string "WHERE")] [(null? keys) (sql-format "SELECT id, date_trunc ( 'second', mark ), description," "trunc (score_document ( CAST ( id AS integer) ), 3 )" from-string "document" where-string "AND occurs_~a.doc = id AND mark > ~a AND mark < ~a" "ORDER BY score_document ( CAST ( id AS integer) ) DESC;" `(sql ,(- i 1)) `(string ,begin) `(string ,end))] (set! from-string (sql-format from-string "occurs AS occurs_~a," `(sql ,i))) (cond ((= (length keys) 1) (set! where-string (sql-format where-string "occurs_~a.term = ~a" `(sql ,i) `(sql ,(car keys))))) (else (set! where-string (sql-format where-string "occurs_~a.term = ~a AND" "occurs_~a.doc = occurs_~a.doc AND" `(sql ,i) `(sql ,(car keys)) `(sql ,i) `(sql ,(+ i 1)))))))) ;; format-extract-term-ids: (listof symbol) -> string ;; Generates a string for a SQL query of the form: ;; SELECT id FROM dictionary WHERE term = 'key_1' OR ... OR term = 'key_n' ; (define (format-extract-term-ids keywords) (do [(keys keywords (cdr keys)) (where-string "")] [(null? keys) (sql-format "SELECT id FROM dictionary WHERE" where-string ";")] (if (= (length keys) 1) (set! where-string (sql-format where-string "term = ~a" `(string ,(car keys)))) (set! where-string (sql-format where-string "term = ~a OR" `(string ,(car keys))))))) ;; update-ranking: (listof integer) -> boolean ;; Ranking documents returned by a query is accomplished using a faux ;; document with id = 0 which is generated from one or more documents: ;; Here's how you'd update the scoring function: ;; DELETE FROM occurs WHERE doc = 0 ; ;; SELECT insert_occurs ( 0, sub.term, sum ( sub.count ) ) FROM ;; (SELECT * FROM occurs WHERE doc = 1 OR doc = 2) AS sub ;; GROUP BY sub.term ; ;; where 1 and 2 are object ids. Following these invocations the ;; next SQL query will list the document sorted by the scoring function: ;; SELECT id, score_document ( CAST ( id AS integer) ) ;; FROM document ;; ORDER BY score_document ( CAST ( id AS integer) ) DESC; (define (update-ranking oids) (send (get-connection) exec (sql-format "DELETE FROM occurs WHERE doc = 0 ;")) (send (get-connection) exec (format-ranking-query oids))) ;; format-ranking-query: (listof integer) -> string (define (format-ranking-query oids) (do [(i 1 (add1 i)) (ids oids (cdr ids)) (where-string "")] [(null? ids) (sql-format "SELECT insert_occurs ( 0, sub.term, sum ( sub.count ) )" "FROM (SELECT * FROM occurs WHERE" where-string ") AS sub" "GROUP BY sub.term ;" )] (if (= (length ids) 1) (set! where-string (sql-format where-string "doc = ~a" `(sql ,(car ids)))) (set! where-string (sql-format where-string "doc = ~a OR" `(sql ,(car ids))))))) )