(module database mzscheme
  
  (require (lib "spgsql.ss" "spgsql")
           (lib "class.ss")
           (lib "process.ss")
           (prefix db: "configure.scm"))
  

  (provide login-valid?
           update-ranking 
           import-document
           export-document
           process-search-query)
  

  ;; get-connection : -> connection
  ;; Get a stored connection or create a new one.

  (define (get-connection)
    (or (current-connection)
        (let [(connection (connect db:host db:port db:database
                                   db:user db:password))]
          (current-connection connection)
          (send connection use-type-conversions #t)
          connection)))


  ;; login-valid? : string string -> boolean
  ;; Returns true if the username and password are valid.

  (define (login-valid? username password)
    (positive?
     (send (get-connection) query-value
           (sql-format "select count(*) from dms_user"
                       "where username = ~a and password = ~a"
                       `(string ,username)
                       `(string ,password)))))


  ;; import-document : string string string string -> boolean

  ;; The functions 'import-document' and 'export-document' are messy
  ;; for several reasons.  First off, much of the work is done in the
  ;; directory './sql/' (specified relative to the directory in which
  ;; this file resides) by the shell script 'words.csh' which calls a
  ;; perl script 'terms.pl'.  Second, some of the work is being done
  ;; by the PostgreSQL user 'postgres' while other work is being done
  ;; by the PLT web server started by 'tld' and so permissioning is
  ;; complicated.  Rather than sort out the permissioning I just
  ;; hacked around it.  To do it right, I should probably let
  ;; 'postgres' own everything or set up a group including 'tld' and
  ;; 'postgres' with both read and write permission.

  (define (import-document user mime-type file-contents description)
    (let* [(mime-type (if (string=? mime-type "none")
                          (guess-mime-type file-contents) mime-type))
           (sql-dir (build-path db:web-root "servlets/dms/sql"))
           (tmp-file-relative 
            (build-path "tmp" user 
                        (string-append "tmp" "." 
                                       (get-file-extension mime-type))))]
      ;; Connect to the directory containing shell scripts.
      (current-directory sql-dir)
      ;; Store the file in the local directory assigned to the user.
      (with-output-to-file tmp-file-relative
        (lambda () (display file-contents)) 'truncate/replace)
      ;; Use an absolute path for the PostgreSQL import.
      (let [(oid (send (get-connection) query-value
                       (sql-format "SELECT lo_import(~a) ;" 
                                   `(string ,(build-path sql-dir 
                                                         tmp-file-relative)))))]
        ;; Delete any existing temporary file.
        (system (format "rm -f tmp/~a/words.txt" user))
        ;; Apply the appropriate text filter.
        (system (format "~a ~a > tmp/~a/words.txt"
                        (get-text-filter mime-type) tmp-file-relative user))
        ;; Append the supplied description to the word file.
        (with-output-to-file (format "tmp/~a/words.txt" user)
          (lambda () (display description)) 'append)
        ;; Run the shell script that processes the word file.
        ;; Note 'system' is synchronous and 'process' is asynchronous.
        (system (format "words.csh tmp/~a/words.txt ~a" user oid))
        ;; Delete the uploaded file.
        (delete-file tmp-file-relative)
        ;; Create the requisite record in the 'document' table.
        (send (get-connection) exec
              (sql-format "INSERT INTO document VALUES (~a, now(), ~a, ~a) ;"
                          `(integer ,oid) 
                          `(string ,description)
                          `(string ,mime-type))))))


  ;; guess-mime-type: string -> string
  ;; Search the first part of a file for clues as to its mime-type.

  (define (guess-mime-type file-contents)
    (let [(end (min 255 (string-length file-contents)))]
      (cond ((regexp-match "^GIF" file-contents 0 end) "image/gif")
            ((regexp-match "%PDF" file-contents 0 end) "application/pdf")
            ((regexp-match "%!PS" file-contents 0 end) "application/postscript")
            ((or (regexp-match "<HTML>" file-contents 0 end)
                 (regexp-match "<html>" file-contents 0 end)) "text/html")
            (else "none"))))


  ;; export-document : string string -> string

  (define (export-document user oid)
    (let* [(mime-type (send (get-connection) query-value
                            (sql-format "SELECT type FROM document WHERE id = ~a ;" 
                                        `(integer ,oid))))
           (file-name (string-append oid "." (get-file-extension mime-type)))]
      ;; Connect to the user's directory on the web server.
      (current-directory (build-path db:web-root "htdocs/users" user))
      ;; Delete any previous temporary files with same name.
      (if (file-exists? file-name) (delete-file file-name))
      ;; Export to /tmp/ to avoid problems with permissions.
      (send (get-connection) exec
            (sql-format "SELECT lo_export(~a, '/tmp/~a') ;"
                        `(integer ,oid) `(sql ,file-name)))
      ;; Note 'system' is synchronous and 'process' is asynchronous.
      (system (format "cp /tmp/~a  ~a" file-name file-name))
      ;; Provide URL relative to web-server root directory.
      (format "/users/~a/~a" user file-name)))


  ;; get-file-extension: string -> string
  ;; Returns the default extension for a subset of MIME types.

  (define (get-file-extension mime-type)
    (case (string->symbol mime-type)
      [(text/html) "html"]
      [(text/plain) "txt"]
      [(application/vnd.ms-excel) "xls"]
      [(application/x-tex) "tex"]
      [(application/pdf) "pdf"]
      [(application/postscript) "ps"]
      [(application/vnd.ms-powerpoint) "ppt"]
      [(application/rtf) "rtf"]
      [(application/msword) "doc"]
      [(image/gif) "gif"]
      [(image/jpeg) "jpg"]
      [else "none"]))


  ;; get-text-filter: string -> string
  ;; Returns a text filter for a subset of MIME types.

  (define (get-text-filter mime-type)
    (case (string->symbol mime-type)
      [(text/html) "/sw/bin/lynx -dump"]
      [(text/plain) "/bin/cat"]
      [(application/vnd.ms-excel) "/bin/echo"]
      [(application/x-tex) "/sw/bin/detex"]
      [(application/pdf) "/bin/echo"]
      [(application/postscript) "/bin/echo"]
      [(application/vnd.ms-powerpoint) "/bin/echo"]
      [(application/rtf) "/bin/echo"]
      [(application/msword) "/sw/bin/antiword"]
      [(image/gif) "/bin/echo"]
      [(image/jpeg) "/bin/echo"]
      [else "/bin/echo"]))


  ;; process-search-query: (listof symbol) date date -> xexpr

  (define (process-search-query keywords begin end format-function)
    (let [(keyword-ids (extract-term-ids (get-connection) keywords))]
      (if (null? keyword-ids)
          (list)
          (send (get-connection) map
                (format-search-query keyword-ids begin end)
                format-function))))


  ;; extract-term-ids: db-connection (listof string) -> (listof integer)

  (define (extract-term-ids connection keywords)
    (send connection query-list
          (format-extract-term-ids keywords)))


  ;; format-search-query: (listof integer) date date -> string

  ;; Generates a string for an SQL query that finds all documents that contain
  ;; at least one occurence of each term specified by a set of term identifiers:
  ;; SELECT id, mark, description
  ;;   FROM occurs AS occurs_1, occurs AS occurs_2, ...,
  ;;        occurs AS occurs_n, document
  ;;   WHERE occurs_1.doc = occurs_2.doc AND occurs_1.term = term_1 AND
  ;;         occurs_2.doc = occurs_3.doc AND occurs_2.term = term_2 AND ...
  ;;         occurs_n-1.doc = occurs_n.doc AND occurs_n-1.term = term_n-1 AND
  ;;         occurs_n.term = term_n AND occurs_n.doc = id AND
  ;;         mark > begin AND mark < end
  ;;   ORDER BY score_document ( CAST ( id AS integer) ) DESC;

  (define (format-search-query term-ids begin end)
    (do [(i 1 (add1 i)) (keys term-ids (cdr keys))
         (from-string "FROM") (where-string "WHERE")]
        [(null? keys) 
         (sql-format "SELECT id, date_trunc ( 'second', mark ), description," 
                     "trunc (score_document ( CAST ( id AS integer) ), 3 )"
                     from-string "document" where-string 
                     "AND occurs_~a.doc = id AND mark > ~a AND mark < ~a"
                     "ORDER BY score_document ( CAST ( id AS integer) ) DESC;"
                     `(sql ,(- i 1)) `(string ,begin) `(string ,end))]
      (set! from-string
            (sql-format from-string "occurs AS occurs_~a," `(sql ,i)))
      (cond ((= (length keys) 1)
             (set! where-string
                   (sql-format where-string
                               "occurs_~a.term = ~a" `(sql ,i) `(sql ,(car keys)))))
            (else
             (set! where-string
                   (sql-format where-string
                               "occurs_~a.term = ~a AND" 
                               "occurs_~a.doc = occurs_~a.doc AND" 
                               `(sql ,i) `(sql ,(car keys)) 
                               `(sql ,i) `(sql ,(+ i 1))))))))


  ;; format-extract-term-ids: (listof symbol) -> string

  ;; Generates a string for a SQL query of the form:
  ;; SELECT id FROM dictionary WHERE term = 'key_1' OR ... OR term = 'key_n' ;

  (define (format-extract-term-ids keywords)
    (do [(keys keywords (cdr keys)) (where-string "")]
        [(null? keys) 
         (sql-format "SELECT id FROM dictionary WHERE" where-string ";")]
      (if (= (length keys) 1)
          (set! where-string
                (sql-format where-string "term = ~a" `(string ,(car keys))))
          (set! where-string
                (sql-format where-string "term = ~a OR" `(string ,(car keys)))))))


  ;; update-ranking: (listof integer) -> boolean

  ;; Ranking documents returned by a query is accomplished using a faux
  ;; document with id = 0 which is generated from one or more documents:
  ;; Here's how you'd update the scoring function:
  ;; DELETE FROM occurs WHERE doc = 0 ;
  ;; SELECT insert_occurs ( 0, sub.term, sum ( sub.count ) ) FROM
  ;;        (SELECT * FROM occurs WHERE doc = 1 OR doc = 2) AS sub
  ;;        GROUP BY sub.term ;
  ;; where 1 and 2 are object ids.  Following these invocations the
  ;; next SQL query will list the document sorted by the scoring function:
  ;; SELECT id, score_document ( CAST ( id AS integer) )
  ;;        FROM document
  ;;        ORDER BY score_document ( CAST ( id AS integer) ) DESC;

  (define (update-ranking oids)
    (send (get-connection) exec
          (sql-format "DELETE FROM occurs WHERE doc = 0 ;"))
    (send (get-connection) exec
          (format-ranking-query oids)))

  ;; format-ranking-query: (listof integer) -> string

  (define (format-ranking-query oids)
    (do [(i 1 (add1 i)) (ids oids (cdr ids)) (where-string "")]
        [(null? ids) 
         (sql-format "SELECT insert_occurs ( 0, sub.term, sum ( sub.count ) )"
                     "FROM (SELECT * FROM occurs WHERE"  where-string ") AS sub" 
                     "GROUP BY sub.term ;" )]
      (if (= (length ids) 1)
          (set! where-string
                (sql-format where-string "doc = ~a" `(sql ,(car ids))))
          (set! where-string
                (sql-format where-string "doc = ~a OR" `(sql ,(car ids)))))))

  )