-
Notifications
You must be signed in to change notification settings - Fork 8
/
Copy pathMain.cpp
163 lines (134 loc) · 5.35 KB
/
Main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
/////////////////////////////////////////////////////////////////////////////
//Title: Main.cpp (for program CSSR)
//Author: Kristina Klinkner
//Date: July 23, 2003
//Description: Creates separate causal states for each history of data
// with a singular probability distribution. History length
// increases incrementally until cutoff point is reached. Then
// removes transient states, determinizes remaining states, and
// calculates various metrics for the resulting state machine.
// Outputs a file of states, a file of state sequences, a dot
// file, and an information file with the metrics.
//
/////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2002 Kristina Klinkner
// This file is part of CSSR
//
// CSSR is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// (at your option) any later version.
//
// CSSR is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with CSSR; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
//////////////////////////////////////////////////////////////////////////////
#include "Main.h"
///////////////////////////////////////////////////////////////////////////////
int main(int argc, char *argv[])
//argv[1] is Alphabet File,
//argv[2] is Datafile,
//argv[3] is maximum length of string
//argv[4] is the (optional) flag for changing the significance level
//argv[5] is multiline version, if used
// argv[6] is use of chi-squared test (optional)
{
int max_length;
char* data_file;
char* alpha_file;
HashTable2 *alphaHash;
bool isMulti = false;
bool stateRemoved = false; //dummy
Machine* machine;
double sigLevel = SIGLEVEL;
bool isSigLevel = false;
bool isChi = false;
//read in info from command line
//check for proper arguments
if(argc !=4 )
{
if (argc == 5)
FiveArgs(argv, isMulti, isSigLevel, sigLevel, isChi);
else if (argc == 6)
SixArgs(argv, isMulti, isSigLevel, sigLevel, isChi);
else if (argc == 7)
SevenArgs(argv, isMulti, isSigLevel, sigLevel, isChi);
else
PrintError();
}
PrintCopyrightInfo();
//set arguments
max_length = atoi(argv[3]);
data_file = argv[2];
alpha_file = argv[1];
//if no significance level is set, use default
//(should be set already, just to be careful)
if(!isSigLevel)
sigLevel = SIGLEVEL;
else
cout << "Significance level set to " << sigLevel <<".\n";
//create parse tree to store all strings in data
ParseTree parsetree(max_length);
//if using multi-line input, read in data and enter
//tree one line at a time
if(isMulti)
{
parsetree.ReadProcessMultiLine(alpha_file, data_file);
cout << "Multi-line option is set.\n"
<< "Max line length is "<< MAX_LINE_SIZE
<< "\n";
}
//otherwise do data read first, then enter in tree
else
{
//read in data and alphabet from files
parsetree.ReadInput(alpha_file, data_file);
//enter data in tree
parsetree.FillTree();
}
//make hash table of alpha symbols and indices
alphaHash = parsetree.MakeAlphaHash();
//create array of states
AllStates allstates(parsetree.getAlphaSize(), sigLevel, isChi);
//calculate frequency of occurence of symbols
allstates.InitialFrequencies(parsetree);
//check all possible strings up to max
//length and compare distributions
for(int k = 1; k <= max_length; k++)
allstates.CalcNewDist(k, parsetree);
//remove shorter strings
stateRemoved = allstates.DestroyShortHists(max_length, parsetree);
//remove all non-recurring states
allstates.CheckConnComponents(parsetree);
//check futures longer than 1,
//by using determinism of states
allstates.Determinize(parsetree);
//remove all non-recurring states (again, since there may be new ones)
allstates.CheckConnComponents(parsetree);
//store transitions from state to state
allstates.StoreTransitions(parsetree.getMaxLength(), parsetree.getAlpha());
//calculate distribution/frequency of states
allstates.GetStateDistsMulti(parsetree, data_file, alphaHash, isMulti);
//calculate information values
machine = new Machine(&allstates);
machine->CalcRelEnt(parsetree, alphaHash, isMulti);
machine->CalcRelEntRate(parsetree, alphaHash, isMulti);
machine->CalcCmu();
machine->CalcEntRate();
machine->CalcVariation(parsetree, alphaHash, isMulti);
//print out states
allstates.PrintOut(data_file, parsetree.getAlpha());
//print out machine and calculationsf
machine->PrintOut(data_file, alpha_file, data_file, max_length, sigLevel, isMulti, isChi, parsetree.getAlphaSize());
machine->PrintDot(data_file, parsetree.getAlpha());
delete machine;
return 1;
}