Actions

Programming language shootout: fasta

From Gambit wiki

The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

This is a Gambit implementation of the fasta benchmark of the Computer Language Benchmarks Game.

The program

#!gsi-script

;; The Computer Language Benchmarks Game
;; http://shootout.alioth.debian.org/
;;
;; Derived by Bradley Lucier from the Ikarus variant
;; derived by Michael D. Adams from the Chicken variant by Anthony Borla

(declare (standard-bindings)(extended-bindings)(block)(not safe))

(define-macro (unless test . body)
  `(if (not ,test)
       (begin
	 ,@body)))

(define *alu*
  (string-append
   "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG"
   "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA"
   "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT"
   "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA"
   "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG"
   "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC"
   "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA"))

(define *iub*
  (list
   '(#\a . 0.27) '(#\c . 0.12) '(#\g . 0.12) '(#\t . 0.27) '(#\B . 0.02)
   '(#\D . 0.02) '(#\H . 0.02) '(#\K . 0.02) '(#\M . 0.02) '(#\N . 0.02)
   '(#\R . 0.02) '(#\S . 0.02) '(#\V . 0.02) '(#\W . 0.02) '(#\Y . 0.02)))

(define *homosapien*
  (list
   '(#\a . 0.3029549426680) '(#\c . 0.1979883004921)
   '(#\g . 0.1975473066391) '(#\t . 0.3015094502008)))

;; -------------

(define *line-size* 60)

;; -------------------------------

(define (make-random seed)
  (let ((ia 3877.0)
	(ic 29573.)
	(im 139968.)
	(last (f64vector (exact->inexact seed)))
	(result (f64vector 0.)))
    (lambda ()
      (let* ((advance (fl+ ic
			(fl* (f64vector-ref last 0)
			     ia)))
	     (next (fl- advance
			(fl* im
			     (fltruncate (fl/ advance im))))))
	(f64vector-set! last 0 next)
	(f64vector-set! result 0 (fl/ next im))))))

;; -------------------------------

(define (make-cumulative-table frequency-table)
  (let ((cumulative 0.0))
    (map
     (lambda (x)
       (set! cumulative (fl+ cumulative (cdr x)))
       (cons (car x) cumulative))
       frequency-table)))

;; -------------

(define random-next (make-random 42))
(define *segmarker* ">")

;; -------------

(define (select-random cumulative-table)
  (select-over-threshold (random-next) cumulative-table))

(define (select-over-threshold rvalue table)
  (if (fl<= (f64vector-ref rvalue 0) (cdar table))
      (caar table)
      (select-over-threshold rvalue (cdr table))))

;; -------------

(define (repeat-fasta id desc n sequence line-length)
  (let ((seqlen (string-length sequence))
        (out (current-output-port))
	(buffer (make-string line-length)))
    (display (string-append *segmarker* id " " desc "\n") out)
    (let loop-o ((n n) (k 0))
      (unless (fx<= n 0)
              (let ((m (fxmin n line-length)))
                (let loop-i ((i 0) (k k))
                  (if (fx>= i m)
                      (begin
			(write-substring buffer 0 m)
                        (write-char #\newline out)
                        (loop-o (fx- n line-length) k))
                      (let ([k (if (fx= k seqlen) 0 k)])
			(string-set! buffer i (string-ref sequence k))
                        (loop-i (fx+ 1 i) (fx+ 1 k))))))))
    (force-output out)))

;; -------------

(define (random-fasta id desc n cumulative-table line-length)
  (let ((out (current-output-port))
	(buffer (make-string line-length)))
    (display (string-append *segmarker* id " " desc "\n") out)
    (let loop-o ((n n))
      (unless (<= n 0)
              (let ((m (min n line-length)))
                (let loop-i ((i 0))
                  (unless (>= i m)
			  (string-set! buffer i (select-random cumulative-table))
                          (loop-i (fx+ 1 i))))
		(write-substring buffer 0 m)
                (write-char #\newline out)
                (loop-o (fx- n line-length)))))
    (force-output out)))

;; -------------------------------

(define (main . args)
  (let ((n (string->number (car args))))
    (repeat-fasta "ONE" "Homo sapiens alu" (* n 2) *alu* *line-size*)
    (random-fasta "TWO" "IUB ambiguity codes" (* n 3)
                  (make-cumulative-table *iub*) *line-size*)
    (random-fasta "THREE" "Homo sapiens frequency" (* n 5)
                  (make-cumulative-table *homosapien*) *line-size*) ))

;; -------------------------------

Compiling

gsc fasta

Running

gsi fasta 25000000 > /dev/null

Building the input for k-nucleotide

gsi fasta 1000000 > knucleotide-input1000000.txt

The file knucleotide-input1000000.txt is used by k-nucleotide.