-
Notifications
You must be signed in to change notification settings - Fork 1
/
PwC_Bash_code.txt
92 lines (55 loc) · 3.34 KB
/
PwC_Bash_code.txt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
### "Paying with Change" Article Bash Code ###
# Marshall A. Taylor #
# Dustin S. Stoltz #
##############################
## PwC_Bash_Code.txt: "Paying with Change" Article State Code
## Note: Used Stanford NER tagger, version 3.6.0. You will need to download the tagger and
##nest it within your file path. You will then need to download the "Data.zip" folder,
##unzip it, and then nest it within the NER tagger folder. You can also bypass tagging
##any of this yourself and simply unzip the "tagged_docs" folder, which contains the tags
##for each of the 280 documents.
## Author: Marshall A. Taylor
##############################
############################
## COMMANDS BEGIN HERE ##
############################
$ cd "/file/path/to/stanford-ner-2015-12-09/Data/Data1/“
$ echo " unique_identifier" | tee -a *.txt
$ cd "/file/path/to/stanford-ner-2015-12-09/Data/Data2/”
$ echo " unique_identifier" | tee -a *.txt
$ cd "/file/path/to/stanford-ner-2015-12-09/Data/Data3/“
$ echo " unique_identifier" | tee -a *.txt
$ cd "/file/path/to/stanford-ner-2015-12-09/Data/Data4/”
$ echo " unique_identifier" | tee -a *.txt
$ cd "/file/path/to/stanford-ner-2015-12-09/Data/Data5/“
$ echo " unique_identifier" | tee -a *.txt
$ cd "/file/path/to/stanford-ner-2015-12-09/Data/Data6/”
$ echo " unique_identifier" | tee -a *.txt
$ cd "/file/path/to/stanford-ner-2015-12-09/“
$ cat Data/Data1/*.txt >> text1.txt
$ cat Data/Data2/*.txt >> text2.txt
$ cat Data/Data3/*.txt >> text3.txt
$ cat Data/Data4/*.txt >> text4.txt
$ cat Data/Data5/*.txt >> text5.txt
$ cat Data/Data6/*.txt >> text6.txt
$ java -mx600m -cp stanford-ner.jar:lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier \
> classifiers/english.all.3class.distsim.crf.ser.gz outputFormat slashTags -textFile text1.txt > tagged_text1.txt
$ java -mx600m -cp stanford-ner.jar:lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier \
> classifiers/english.all.3class.distsim.crf.ser.gz outputFormat slashTags -textFile text2.txt > tagged_text2.txt
$ java -mx600m -cp stanford-ner.jar:lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier \
$ classifiers/english.all.3class.distsim.crf.ser.gz outputFormat slashTags -textFile text3.txt > tagged_text3.txt
$ java -mx600m -cp stanford-ner.jar:lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier \
> classifiers/english.all.3class.distsim.crf.ser.gz outputFormat slashTags -textFile text4.txt > tagged_text4.txt
$ java -mx600m -cp stanford-ner.jar:lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier \
> classifiers/english.all.3class.distsim.crf.ser.gz outputFormat slashTags -textFile text5.txt > tagged_text5.txt
$ java -mx600m -cp stanford-ner.jar:lib/* edu.stanford.nlp.ie.crf.CRFClassifier -loadClassifier \
> classifiers/english.all.3class.distsim.crf.ser.gz outputFormat slashTags -textFile text6.txt > tagged_text6.txt
$ csplit -f tagsa tagged_text1.txt /"unique_identifier/O”/ {49}
$ csplit -f tagsb tagged_text2.txt /“unique_identifier/O”/ {49}
$ csplit -f tagsc tagged_text3.txt /“unique_identifier/O”/ {49}
$ csplit -f tagsd tagged_text4.txt /“unique_identifier/O”/ {49}
$ csplit -f tagse tagged_text5.txt /“unique_identifier/O”/ {49}
$ csplit -f tagsf tagged_text6.txt /“unique_identifier/O”/ {29}
$ rm tagsa50 tagsb50 tagsc50 tagsd50 tagse50 tagsf30
$ mkdir tagged_docs
$ mv tags* tagged_docs/