Actions

Programming language shootout: fasta

From Gambit wiki

Revision as of 00:53, 30 June 2008 by Bjlucier (talk | contribs)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

This is a Gambit implementation of the fasta benchmark of the Computer Language Benchmarks Game.

The program

#!gsi-script

;; The Computer Language Benchmarks Game
;; http://shootout.alioth.debian.org/
;;
;; Derived by Bradley Lucier from the Ikarus variant
;; derived by Michael D. Adams from the Chicken variant by Anthony Borla

(declare (standard-bindings)(extended-bindings)(block)(not safe))

(define-macro (unless test . body)
  `(if (not ,test)
       (begin
	 ,@body)))

(define *alu*
  (string-append
   "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
   "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
   "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
   "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
   "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
   "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
   "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"))

(define *iub*
  (list
   '(#\a . 0.27) '(#\c . 0.12) '(#\g . 0.12) '(#\t . 0.27) '(#\B . 0.02)
   '(#\D . 0.02) '(#\H . 0.02) '(#\K . 0.02) '(#\M . 0.02) '(#\N . 0.02)
   '(#\R . 0.02) '(#\S . 0.02) '(#\V . 0.02) '(#\W . 0.02) '(#\Y . 0.02)))

(define *homosapien*
  (list
   '(#\a . 0.3029549426680) '(#\c . 0.1979883004921)
   '(#\g . 0.1975473066391) '(#\t . 0.3015094502008)))

;; -------------

(define *line-size* 60)

;; -------------------------------

(define (make-random seed)
  (let ((ia 3877.0)
	(ic 29573.)
	(im 139968.)
	(last (f64vector (exact->inexact seed)))
	(result (f64vector 0.)))
    (lambda ()
      (let* ((advance (fl+ ic
			(fl* (f64vector-ref last 0)
			     ia)))
	     (next (fl- advance
			(fl* im
			     (fltruncate (fl/ advance im))))))
	(f64vector-set! last 0 next)
	(f64vector-set! result 0 (fl/ next im))))))

;; -------------------------------

(define (make-cumulative-table frequency-table)
  (let ((cumulative 0.0))
    (map
     (lambda (x)
       (set! cumulative (fl+ cumulative (cdr x)))
       (cons (car x) cumulative))
       frequency-table)))

;; -------------

(define random-next (make-random 42))
(define *segmarker* ">")

;; -------------

(define (select-random cumulative-table)
  (select-over-threshold (random-next) cumulative-table))

(define (select-over-threshold rvalue table)
  (if (fl<= (f64vector-ref rvalue 0) (cdar table))
      (caar table)
      (select-over-threshold rvalue (cdr table))))

;; -------------

(define (repeat-fasta id desc n sequence line-length)
  (let ((seqlen (string-length sequence))
        (out (current-output-port))
	(buffer (make-string line-length)))
    (display (string-append *segmarker* id " " desc "\n") out)
    (let loop-o ((n n) (k 0))
      (unless (fx<= n 0)
              (let ((m (fxmin n line-length)))
                (let loop-i ((i 0) (k k))
                  (if (fx>= i m)
                      (begin
			(write-substring buffer 0 m)
                        (write-char #\newline out)
                        (loop-o (fx- n line-length) k))
                      (let ([k (if (fx= k seqlen) 0 k)])
			(string-set! buffer i (string-ref sequence k))
                        (loop-i (fx+ 1 i) (fx+ 1 k))))))))
    (force-output out)))

;; -------------

(define (random-fasta id desc n cumulative-table line-length)
  (let ((out (current-output-port))
	(buffer (make-string line-length)))
    (display (string-append *segmarker* id " " desc "\n") out)
    (let loop-o ((n n))
      (unless (<= n 0)
              (let ((m (min n line-length)))
                (let loop-i ((i 0))
                  (unless (>= i m)
			  (string-set! buffer i (select-random cumulative-table))
                          (loop-i (fx+ 1 i))))
		(write-substring buffer 0 m)
                (write-char #\newline out)
                (loop-o (fx- n line-length)))))
    (force-output out)))

;; -------------------------------

(define (main . args)
  (let ((n (string->number (car args))))
    (repeat-fasta "ONE" "Homo sapiens alu" (* n 2) *alu* *line-size*)
    (random-fasta "TWO" "IUB ambiguity codes" (* n 3)
                  (make-cumulative-table *iub*) *line-size*)
    (random-fasta "THREE" "Homo sapiens frequency" (* n 5)
                  (make-cumulative-table *homosapien*) *line-size*) ))

;; -------------------------------

Compiling

gsc fasta

Running

gsi fasta 25000000 > /dev/null

Building the input for k-nucleotide

gsi fasta 1000000 > knucleotide-input1000000.txt

The file knucleotide-input1000000.txt is used by k-nucleotide.