-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathutil.rkt
110 lines (103 loc) · 4.27 KB
/
util.rkt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
#lang racket/base
(require "word.rkt"
racket/list
racket/sequence
srfi/13)
(provide get-all-source-files
string-w/o-word
string-truncated-from-word
list-random-ref
percent-of-words-from-file)
; get-all-source-files : path-string -> list-of-filepath
; gets all racket files in directory
(define (get-all-source-files dir)
(for/list ([path (in-directory dir)]
#:when (and (file-exists? path)
(or (string-suffix? ".rkt" (path->string path))
(string-suffix? ".ss" (path->string path)))))
path))
(define (string-w/o-word s w)
(string-append (substring s 0 (word-pos w))
(substring s (+ (word-pos w) (string-length (word-str w))))))
(module+ test
(require rackunit)
(define test-string "(this is) a \"test\"")
(define words (string->words test-string))
(check-equal? (string-w/o-word test-string (third words))
"(this is) \"test\"")
(check-equal? (string-w/o-word test-string (first words))
"( is) a \"test\"")
(check-equal? (string-w/o-word test-string (fourth words))
"(this is) a \"\""))
(define (string-truncated-from-word s w)
(substring s 0 (word-pos w)))
(module+ test
(set! test-string "(this is) a \"test\" you know?")
(set! words (string->words test-string))
(check-equal? (string-truncated-from-word test-string (third words))
"(this is) ")
(check-equal? (string-truncated-from-word test-string (first words))
"(")
(check-equal? (string-truncated-from-word test-string (fourth words))
"(this is) a \"")
(check-equal? (string-truncated-from-word test-string (sixth words))
"(this is) a \"test\" you "))
(define (list-random-ref l)
(list-ref l (random (length l))))
; percent-of-words-from-file : float[0, 1] string -> list-of-word
(define (percent-of-words-from-file percent file-string)
(define word-list (string->word-symbols file-string))
(define total-words (length word-list))
(define word-count (inexact->exact (ceiling (* (length word-list) percent))))
(define words (list-tail (shuffle word-list) (- total-words word-count)))
words)
(module+ test
(define percent-str "#lang racket (define \"hi there\" 5 8 + < be) (hi there)")
(check-equal? (length (percent-of-words-from-file 1 percent-str))
6)
(check-equal? (length (percent-of-words-from-file .5 percent-str))
3))
; file-string->word-symbols : string -> list-of-word
; the main purpose of this method is to ignore strings in the code
; this prevents the output from being tainted by non-code symbols
(define (string->word-symbols s)
(define code-stx
(parameterize ([read-accept-reader #t]
[read-accept-lang #t]
[port-count-lines-enabled #t])
(read-syntax "name" (open-input-string s))))
(define stx
(if (string-prefix? "#lang racket" s)
(rest (syntax-e (fourth (syntax-e code-stx))))
(list code-stx)))
(let loop ([stx-lst stx])
(if (empty? stx-lst)
empty
(let ([fst (syntax-e (first stx-lst))])
(cond
[(symbol? fst)
(cons (word (symbol->string fst)
(sub1 (syntax-position (first stx-lst))))
(loop (rest stx-lst)))]
[(syntax? fst)
(loop (cons fst (rest stx-lst)))]
[(list? fst)
(loop (append fst (rest stx-lst)))]
[else
(loop (rest stx-lst))])))))
(module+ test
(define lam-str "#lang racket (λ (a) b)")
(define lam-results (string->word-symbols lam-str))
(check-equal? (length lam-results) 3)
(check-equal? (first lam-results) (word "λ" 14))
(check-equal? (second lam-results) (word "a" 17))
(check-equal? (third lam-results) (word "b" 20))
(define str "#lang racket (define \"hi there\" 5 8 + < be) (hi there)")
(define results (string->word-symbols str))
(check-equal? (length results) 6)
(check-equal? (first results) (word "define" 14))
(check-equal? (second results) (word "+" 36))
(check-equal? (third results) (word "<" 38))
(check-equal? (fourth results) (word "be" 40))
(check-equal? (fifth results) (word "hi" 45))
(check-equal? (sixth results) (word "there" 48)))