Programming language shootout: fasta
From Gambit wiki
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.
This is a Gambit implementation of the fasta benchmark of the Computer Language Benchmarks Game.
The program
#!gsi-script ;; The Computer Language Benchmarks Game ;; http://shootout.alioth.debian.org/ ;; ;; Derived by Bradley Lucier from the Ikarus variant ;; derived by Michael D. Adams from the Chicken variant by Anthony Borla (declare (standard-bindings)(extended-bindings)(block)(not safe)) (define-macro (unless test . body) `(if (not ,test) (begin ,@body))) (define *alu* (string-append "GGCCGGGCGCGGTGGCTCACGCCTGTAATCCCAGCACTTTGG" "GAGGCCGAGGCGGGCGGATCACCTGAGGTCAGGAGTTCGAGA" "CCAGCCTGGCCAACATGGTGAAACCCCGTCTCTACTAAAAAT" "ACAAAAATTAGCCGGGCGTGGTGGCGCGCGCCTGTAATCCCA" "GCTACTCGGGAGGCTGAGGCAGGAGAATCGCTTGAACCCGGG" "AGGCGGAGGTTGCAGTGAGCCGAGATCGCGCCACTGCACTCC" "AGCCTGGGCGACAGAGCGAGACTCCGTCTCAAAAA")) (define *iub* (list '(#\a . 0.27) '(#\c . 0.12) '(#\g . 0.12) '(#\t . 0.27) '(#\B . 0.02) '(#\D . 0.02) '(#\H . 0.02) '(#\K . 0.02) '(#\M . 0.02) '(#\N . 0.02) '(#\R . 0.02) '(#\S . 0.02) '(#\V . 0.02) '(#\W . 0.02) '(#\Y . 0.02))) (define *homosapien* (list '(#\a . 0.3029549426680) '(#\c . 0.1979883004921) '(#\g . 0.1975473066391) '(#\t . 0.3015094502008))) ;; ------------- (define *line-size* 60) ;; ------------------------------- (define (make-random seed) (let ((ia 3877.0) (ic 29573.) (im 139968.) (last (f64vector (exact->inexact seed))) (result (f64vector 0.))) (lambda () (let* ((advance (fl+ ic (fl* (f64vector-ref last 0) ia))) (next (fl- advance (fl* im (fltruncate (fl/ advance im)))))) (f64vector-set! last 0 next) (f64vector-set! result 0 (fl/ next im)))))) ;; ------------------------------- (define (make-cumulative-table frequency-table) (let ((cumulative 0.0)) (map (lambda (x) (set! cumulative (fl+ cumulative (cdr x))) (cons (car x) cumulative)) frequency-table))) ;; ------------- (define random-next (make-random 42)) (define *segmarker* ">") ;; ------------- (define (select-random cumulative-table) (select-over-threshold (random-next) cumulative-table)) (define (select-over-threshold rvalue table) (if (fl<= (f64vector-ref rvalue 0) (cdar table)) (caar table) (select-over-threshold rvalue (cdr table)))) ;; ------------- (define (repeat-fasta id desc n sequence line-length) (let ((seqlen (string-length sequence)) (out (current-output-port)) (buffer (make-string line-length))) (display (string-append *segmarker* id " " desc "\n") out) (let loop-o ((n n) (k 0)) (unless (fx<= n 0) (let ((m (fxmin n line-length))) (let loop-i ((i 0) (k k)) (if (fx>= i m) (begin (write-substring buffer 0 m) (write-char #\newline out) (loop-o (fx- n line-length) k)) (let ([k (if (fx= k seqlen) 0 k)]) (string-set! buffer i (string-ref sequence k)) (loop-i (fx+ 1 i) (fx+ 1 k)))))))) (force-output out))) ;; ------------- (define (random-fasta id desc n cumulative-table line-length) (let ((out (current-output-port)) (buffer (make-string line-length))) (display (string-append *segmarker* id " " desc "\n") out) (let loop-o ((n n)) (unless (<= n 0) (let ((m (min n line-length))) (let loop-i ((i 0)) (unless (>= i m) (string-set! buffer i (select-random cumulative-table)) (loop-i (fx+ 1 i)))) (write-substring buffer 0 m) (write-char #\newline out) (loop-o (fx- n line-length))))) (force-output out))) ;; ------------------------------- (define (main . args) (let ((n (string->number (car args)))) (repeat-fasta "ONE" "Homo sapiens alu" (* n 2) *alu* *line-size*) (random-fasta "TWO" "IUB ambiguity codes" (* n 3) (make-cumulative-table *iub*) *line-size*) (random-fasta "THREE" "Homo sapiens frequency" (* n 5) (make-cumulative-table *homosapien*) *line-size*) )) ;; -------------------------------
Compiling
gsc fasta
Running
gsi fasta 25000000 > /dev/null
Building the input for k-nucleotide
gsi fasta 1000000 > knucleotide-input1000000.txt
The file knucleotide-input1000000.txt is used by k-nucleotide.