-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathregex.nim
35 lines (29 loc) · 844 Bytes
/
regex.nim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import re
proc main()=
var sq = string(readAll(stdin))
let ilen = len(sq)
sq = replace(sq, re">.*\n|\n", "")
let clen = len(sq)
let variants = [ "agggtaaa|tttaccct",
"[cgt]gggtaaa|tttaccc[acg]",
"a[act]ggtaaa|tttacc[agt]t",
"ag[act]gtaaa|tttac[agt]ct",
"agg[act]taaa|ttta[agt]cct",
"aggg[acg]aaa|ttt[cgt]ccct",
"agggt[cgt]aa|tt[acg]accct",
"agggta[cgt]a|t[acg]taccct",
"agggtaa[cgt]|[acg]ttaccct" ]
for f in variants:
echo f, " ", len(findAll(sq, re(f)))
let subs = { "tHa[Nt]" : "<4>",
"aND|caN|Ha[DS]|WaS" : "<3>",
"a[NSt]|BY" : "<2>",
"<[^>]*>" : "|",
"\\|[^|][^|]*\\|" : "-" }
for r, f in subs.items():
sq = replace(sq, re(r), f)
echo ""
echo ilen
echo clen
echo len(sq)
main()