-
Notifications
You must be signed in to change notification settings - Fork 0
/
gitalyse.py
234 lines (193 loc) · 9.14 KB
/
gitalyse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
#
# Original work Copyright (c) 2017 Annemarie Mattmann
#
# Licensed under the MIT license. See the LICENSE file for details.
#
import argparse
keywords = ["S.", "Seite"]
parser = argparse.ArgumentParser(description="Progress tracker based on git commit messages. It analyses all git log messages from a given file that contain a keyword followed by a number (with a space in between). The default use case is to track writing progress in pages with the number representing the current page at the time of the commit (example: " + str(keywords[0]) + " 19). Currently used keywords: " + str(keywords), formatter_class=argparse.ArgumentDefaultsHelpFormatter)
parser.add_argument("filename", type=str, help="name of the file to be analysed")
parser.add_argument("-l", dest="log", action="store_true", help="list all commits and exit, this is similar to \"git log --oneline\" of the given file")
parser.add_argument("-m", dest="missing", action="store_true", help="list missing commit messages and exit, messages can be missing (i.e. non-detected) because of missing or wrong keywords or wrong structure in commit messages")
parser.add_argument("-c", default="0000000", metavar="COMMIT HASH", help="the commit in history to compare the latest one with, default is the earliest commit")
parser.add_argument("-t", type=int, default="25", metavar="POS NUMBER", help="the threshold that marks a remarkable change for verbose/recent mode")
parser.add_argument("-n", type=int, default="3", metavar="POS NUMBER", help="the number of latest commits you want to analyse in more detail and the number of commits that are ignored when searching for a recent remarkable change")
parser.add_argument("-r", dest="recent", action="store_true", help="list most recent changes made since the last remarkable backward change which is assumed to be a new iteration of work")
parser.add_argument("-v", dest="verbose", action="store_true", help="list and visualise significant changes")
# TODO add tags (e.g. for recent)
# TODO allow multiple word keywords
# TODO median, mode (np)
# TODO remarkable date, page change
# TODO parse date differently and plot it
# TODO average plot
# TODO add empty list checks
args = parser.parse_args()
import subprocess
import time
import dateutil.parser
filename=args.filename
# list all commits and exit
if args.log:
subprocess.run(["git", "log", "--oneline", filename])
quit()
# (pre)process helper functions #
# get the difference in days between two points in time given in iso format
def timediff(isotime1, isotime2):
#time1 = time.strptime(isotime1, "%Y-%m-%dT%H:%M:%S") # problems with last piece
#time2 = time.strptime(isotime2, "%Y-%m-%dT%H:%M:%S")
#time1 = time.mktime(time1)
#time2 = time.mktime(time2)
#return int(time2-time1) / 86400
d1 = dateutil.parser.parse(isotime1)
d2 = dateutil.parser.parse(isotime2)
return (d1-d2).days
# get the commit index for the analysis (if not given by the user this is the first commit in history)
commitIndex = args.c
def getCommitIndex():
# make sure to only compare against entry[0] which is the hash (in case a hash is given in a message)
for entry in loglistDetected:
if commitIndex == entry[0]:
return loglistDetected.index(entry)
print("Warning: The given commit hash was not part of a detected git log entry, using default.")
return len(loglistDetected)-1
# preprocess #
# read hashes and commit messages from log
log = subprocess.run(["git", "log", "--oneline", filename], stdout=subprocess.PIPE).stdout.decode("utf-8")
loglist = log.split("\n")
# parse the loglist
loglistDetected=[]
loglistFailed=[]
for entry in loglist:
if len(entry) == 0: continue
# split at first space to split hashes from messages
entrylist = entry.split(" ", 1)
# read the commit date from the log based on the parsed hash and add it to the entry
date = subprocess.run(["git", "log", "-1", "--pretty=tformat:%aI", entrylist[0]], stdout=subprocess.PIPE).stdout.decode("utf-8")
entrylist.append(date.split("\n")[0])
# if a keyword is found in the message check if a number follows
if any(True for key in keywords if key in entrylist[1]):
messagelist = entrylist[1].split()
#keywordIndex = messagelist.index(next(key for key in keywords if key in messagelist))
found = False
for messagepart in messagelist:
if found and messagepart.isdigit():
entrylist.append(int(messagepart))
loglistDetected.append(entrylist) # only analyse those that have a number of pages
break # stop if a number is found
if found:
loglistFailed.append(entrylist) # put those that don't in fail
break # stop if the structure is unclear to prevent false positives
if any(True for key in keywords if key in messagepart):
found = True
continue
# if no known keyword is found in the message add it to the failed entries
else:
loglistFailed.append(entrylist)
# pretty-print the fail list
if args.missing or len(loglistDetected) == 0:
for entry in loglistFailed:
if len(entry) == 3:
print(entry[0] + "\t" + entry[2] + "\t" + entry[1])
print(str(len(loglistFailed)) + " entries are not detected.")
if len(loglistDetected) == 0:
print("Warning: Keyword detection failed on all entries!")
quit()
# get lists for the difference in time and pages for two concurrent detected entries each
daydifflist = []
pagedifflist = []
for i in range(len(loglistDetected)-1):
daydifflist.append(timediff(loglistDetected[i][2], loglistDetected[i+1][2]))
pagedifflist.append(loglistDetected[i][3] - loglistDetected[i+1][3])
# print helper functions #
def prettyprintEntry(entry, color=True):
if color:
print('\x1b[0;35;40m' + entry[0] + '\x1b[0m' + "\t" + entry[2] + "\t" + entry[1])
else:
print(entry[0] + "\t" + entry[2] + "\t" + entry[1])
def printChange(entry1, entry2):
days = timediff(entry1[2], entry2[2]) + 1 # +1 to avoid division by 0
pages = entry1[3] - entry2[3]
print("Between: ")
prettyprintEntry(entry1)
prettyprintEntry(entry2)
print(str(days) + " days total passed while writing " + str(pages) + " pages")
print("That is an average of " + str(pages/days) + " pages per day")
def printCurrentChange(loglist, numCommits = 3):
if len(loglist) > numCommits:
entry1 = loglist[0]
entry2 = loglist[numCommits-1]
days = timediff(entry1[2], entry2[2]) + 1 # +1 to avoid division by 0
pages = entry1[3] - entry2[3]
average = pages/days
print("For the last " + str(numCommits) + " commits: ")
for i in range(0, numCommits):
prettyprintEntry(loglist[i])
print(str(days) + " days total passed while writing " + str(pages) + " pages")
print("That is an average of " + str(average) + " pages per day")
print(" - great, keep going! :D" if average >= 2 else " - just a little bit more! :)" if average >= 1.5 else " - don't worry, just do it!")
# process #
threshold = args.t
numCommits = args.n
index1 = 0
index2 = getCommitIndex() if not commitIndex == "0000000" else len(loglistDetected)-1
recentIndex = -1
splitlist = loglistDetected[index1:index2+1] # in the default case this is the whole list
# print analysis
print("Detected " + str(len(loglistDetected)) + " commits, failed to detect " + str(len(loglistFailed)) + " commits.")
for i in range(len(daydifflist)-1):
# print remarkable changes
if args.verbose:
if (daydifflist[i] > 14):
print("A long break (more than two weeks) was between: ")
prettyprintEntry(splitlist[i])
prettyprintEntry(splitlist[i+1])
if (pagedifflist[i] > threshold or pagedifflist[i] < -threshold):
print("A remarkable change in page difference happened between: ")
prettyprintEntry(splitlist[i])
prettyprintEntry(splitlist[i+1])
# get recent index
if args.recent and recentIndex == -1 and i > numCommits and pagedifflist[i] < -threshold:
#print("Warning: The last commit already marks a remarkable backward change, recent commits will be shown until the next remarkable change in history.")
recentIndex = i
# overall average
if args.verbose or index2 < len(loglistDetected)-1:
printChange(loglistDetected[index1], loglistDetected[index2])
# recent average
if args.recent:
printChange(loglistDetected[index1], loglistDetected[recentIndex])
# current average over a given number of commits (default: 3)
printCurrentChange(loglistDetected, numCommits)
# plotting helper functions
def plotAnalysis(daydifflist, pagedifflist):
daydifflistAccumulated = []
daysSum = 0
for entry in daydifflist[::-1]:
daysSum += entry
daydifflistAccumulated.append(daysSum)
pagedifflistAccumulated = []
pagesSum = 0
for entry in pagedifflist[::-1]:
pagesSum += entry
pagedifflistAccumulated.append(pagesSum)
fig0 = plt.figure()
plt.plot(range(len(daydifflist)), pagedifflist[:])
plt.title("Writing Curve")
plt.xlabel("n-th commit in history")
plt.ylabel("Pages")
fig1 = plt.figure()
plt.plot(daydifflistAccumulated[:], pagedifflistAccumulated[:])
plt.title("Writing Curve")
plt.xlabel("Day Number")
plt.ylabel("Page Number")
# plot #
import matplotlib.pyplot as plt
#export NO_AT_BRIDGE=1 in shell if error annoys
if args.verbose:
daydifflist = daydifflist[index1:index2]
daydifflist = daydifflist[index1:index2]
plotAnalysis(daydifflist, pagedifflist)
if args.recent:
daydifflist = daydifflist[index1:recentIndex]
pagedifflist = pagedifflist[index1:recentIndex]
plotAnalysis(daydifflist, pagedifflist)
plt.show()