Programming language shootout: k-nucleotide
From Gambit wiki
This is a Gambit implementation of the k-nucleotide benchmark of the Computer Language Benchmarks Game.
This implementation is quite slow; nearly all the time is spent in all-counts.
The program
;; The Computer Language Benchmarks Game ;; http://shootout.alioth.debian.org/ ;; Derived by Bradley Lucier from the Ikarus variant ;; derived by Michael D. Adams from the MzScheme variant (declare (standard-bindings)(extended-bindings)(block)(fixnum)(not safe)) (define (substring-copy! dest dest-start source source-start source-end) (declare (not interrupts-enabled)) (do ((i source-start (+ i 1)) (j dest-start (+ j 1))) ((= i source-end)) (string-set! dest j (string-ref source i)))) (define (strings->string strings) (let* ((total-length (do ((i 0 (+ i (string-length (car strings)))) (strings strings (cdr strings))) ((null? strings) i))) (result (make-string total-length))) (let loop ((strings strings) (i 0)) (if (null? strings) result (let* ((string (car strings)) (n (string-length string))) (substring-copy! result i string 0 n) (loop (cdr strings) (+ i n))))))) (define (insertion-sort lst <) (define (insert item lst) (if (or (null? lst) (< item (car lst))) (cons item lst) (cons (car lst) (insert item (cdr lst))))) (if (null? lst) lst (insert (car lst) (insertion-sort (cdr lst) <)))) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;; Boiler-plate for formatting floating point values (define (roundto digits n) (declare (generic)) (let* ((e (expt 10 digits)) (num (round (abs (* e (inexact->exact n))))) (str (number->string (remainder num e)))) (string-append (if (negative? n) "-" "") (number->string (quotient num e)) "." (make-string (- digits (string-length str)) #\0) str))) (define (upcase! str) (let ((n (string-length str))) (do ((i 0 (+ i 1))) ((= i n) str) (string-set! str i (char-upcase (string-ref str i)))))) (define (all-counts len dna) (let ((table (make-table test: string=? size: (let () (declare (generic)) (min (expt 4 len) (string-length dna)))))) (let loop ((s (- (string-length dna) len)) (seq (make-string len))) (if (< s 0) table (begin (substring-copy! seq 0 dna s (+ s len)) (let* ((cnt (table-ref table seq 0))) (table-set! table seq (+ 1 cnt)) (loop (- s 1) (if (zero? cnt) (make-string len) seq)))))))) (define (write-freqs table) (declare (generic)) (let* ((keys+values (table->list table)) (total (apply + (map cdr keys+values)))) (for-each (lambda (a) (display (car a)) (display " ") (display (roundto 3 (* 100 (/ (cdr a) total)))) (newline)) (insertion-sort keys+values (lambda (x y) (< (cdr y) (cdr x))))))) (define (write-one-freq table key) (let ((cnt (table-ref table key 0))) (display cnt) (display "\t") (display key) (display "\n"))) (define dna (let ((port (current-input-port)) (start ">THREE Homo sapiens frequency")) (let skip ((x (read-line port))) (if (not (string=? x start)) (skip (read-line port)) (let loop ((x (read-line port)) (s '())) (if (eof-object? x) (strings->string (reverse s)) (loop (read-line port) (cons (upcase! x) s)))))))) ;; 1-nucleotide counts: (write-freqs (all-counts 1 dna)) (newline) ;; 2-nucleotide counts: (write-freqs (all-counts 2 dna)) (newline) ;; Specific sequences: (for-each (lambda (seq) (write-one-freq (all-counts (string-length seq) dna) seq)) '("GGT" "GGTA" "GGTATT" "GGTATTTTAATT" "GGTATTTTAATTTATAGT"))
Compiling
gsc k-nucleotide
Running
gsi k-nucleotide < knucleotide-input1000000.txt
The file knucleotide-input1000000.txt is generated by fasta.