-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmusic21_corpus.py
208 lines (167 loc) · 7.93 KB
/
music21_corpus.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
"""Exploratory work on melodies in the Ceol Rince na
hEireann corpus, converting each melody from MIDI to feature sequences via the Music21 library;
also finding the root note of each melody via the methods below:
Root detection uses:
1: Music21's implementation of the Krumhansl key-spelling algorithm,
with Krumhansl-Schmucker; Aarden-Essen; Bellman-Budge; and Temperley-Kostka-Payne weightings,
and Craig Sapp's keycor Simple Weights.
2: Key as assigned by the transcriber.
3: Key assigned per Breathnach's method, taking the final note of the piece as the root.
This program extracts and estimates key for every melody in a corpus per the above methods.
It counts the key results obtained for each tune by each method, designates the majority verdict
as the root note of the piece, and returns a simple confidence score indicating the degree of
consensus among detection methods.
'"""
import music21
import os
import pandas as pd
import traceback
from utils import write_to_csv
class Music21Corpus:
def __init__(self, inpath):
# MIDI corpus directory:
self.inpath = inpath
self.filepaths = os.listdir(self.inpath)
# List of all melody titles extracted from file names:
self.melody_titles = [path.split('/')[-1][:-4] for path in self.filepaths if path.endswith('.mid')]
# List of all melodies in music21 stream format:
self.melodies = []
# dict of all tune titles (keys) and melody streams (values)
self.corpus = {}
# Pandas DF of key values as assigned and / or detected by methods below:
self.keys = pd.DataFrame()
# Pandas DF of root values:
self.roots = pd.DataFrame()
# Pandas DF of frequency of occurrence of root values:
self.root_freq = pd.DataFrame()
def read_midi_files_to_streams(self):
for file in self.filepaths:
if file.endswith('.mid') and not file.startswith('.'):
mf = music21.midi.MidiFile()
mf.open((self.inpath + "/" + file).encode())
try:
mf.read()
mf.close()
s = music21.midi.translate.midiFileToStream(mf, quantizePost=False).flat
except Exception as exc:
print(traceback.format_exc())
print(exc)
self.melodies.append(s)
return self.melodies
def combine_melodies_and_titles(self):
self.corpus = dict(zip(self.melody_titles, self.melodies))
return self.corpus
# Q: How does this treat tunes which resolve to chords?
def reformat_to_numeric_feat_seq(self, output_dir):
for title, tune in self.corpus.items():
tune_df = pd.DataFrame(columns=["MIDI_note", "onset", "duration", "velocity"])
for note in tune.recurse().notes:
# output and format musical data from music21 stream, and print to dataframe:
note_df = pd.DataFrame([[
round(float(note.offset), 2),
round(float(note.duration.quarterLength), 2),
note.volume.velocity
]],
columns=["onset", "duration", "velocity"])
if note.isNote:
note_df["MIDI_note"] = int(note.pitch.ps)
if note.isChord:
tune_df["MIDI_note"] = int(note.root().ps)
tune_df = tune_df.append(note_df, ignore_index=True)
# write output file
tune_df.to_csv((output_dir + "/" + title + ".csv"))
def run_key_detection_algs(self):
self.keys['title'] = self.corpus
self.keys.reset_index(inplace=True, drop=True)
krumhansl = [tune.analyze('key') for tune in self.corpus.values()]
self.keys['Krumhansl-Shmuckler'] = krumhansl
simple = [music21.analysis.discrete.SimpleWeights(tune).getSolution(tune)
for tune in self.corpus.values()]
self.keys['simple weights'] = simple
aarden = [music21.analysis.discrete.AardenEssen(tune).getSolution(tune)
for tune in self.corpus.values()]
self.keys['Aarden Essen'] = aarden
bellman = [music21.analysis.discrete.BellmanBudge(tune).getSolution(tune)
for tune in self.corpus.values()]
self.keys['Bellman Budge'] = bellman
temperley = [music21.analysis.discrete.TemperleyKostkaPayne(tune).getSolution(tune)
for tune in self.corpus.values()]
self.keys['Temperly Kostka Payne'] = temperley
return self.keys
def extract_transcriber_assigned_key(self):
self.keys['as transcribed'] = [tune[1] for tune in self.corpus.values()]
return self.keys
# this method no longer necessary?
def reformat_music21_objs_to_str(self):
self.keys = self.keys.astype('string')
return self.keys
# this also method no longer necessary?
def find_most_frequent_key(self):
self.keys['key'] = self.keys.mode(axis=1, numeric_only=False)[0]
return self.keys
def extract_roots_from_keys(self):
self.roots = self.keys.copy(deep=True)
for res in ['Krumhansl-Shmuckler',
'simple weights',
'Aarden Essen',
'Bellman Budge',
'Temperly Kostka Payne',
'as transcribed']:
self.roots[res] = self.roots[res].apply(lambda x: x.tonic.name.upper())
# this uses music21's note.Note.name to access pitch name (note letter, and sharp/flat if it has one)
return self.roots
def extract_final_note(self):
final_notes = []
for tune in self.corpus.values():
last_note = tune.recurse().notes[-1]
print(last_note)
if last_note.isNote:
final_notes.append(last_note.name.upper())
elif last_note.isChord:
final_notes.append(last_note.root().name.upper())
else:
final_notes.append('')
self.roots['final_note'] = final_notes
return self.roots
# this has been refactored and moved to MusicDataCorpus
def count_freq_of_root_occurrences(self):
# Counts occurrences of each root note name in each row of dataframe.
roots = self.roots.copy(deep=True)
# Temporarily remove the 'title' column so that it is not included in the count:
roots.pop('title')
self.root_freq = roots.apply(pd.Series.value_counts, axis=1, normalize=True)
certainty_vals = self.root_freq.max(axis=1).to_numpy()
certainty_vals = certainty_vals.round(decimals=2)
self.roots['certainty'] = certainty_vals
self.root_freq.insert(0, 'title', self.roots['title'])
return self.root_freq
# this has been moved to MusicDataCorpus
def find_most_frequent_root(self):
self.roots['root'] = self.roots.mode(axis=1, numeric_only=False)[0]
return self.roots
corpus = Music21Corpus("/Users/dannydiamond/NUIG/Polifonia/CRE/CRE_MIDI")
print(corpus.filepaths)
print('\n\n')
print(corpus.melody_titles)
corpus.read_midi_files_to_streams()
corpus.combine_melodies_and_titles()
for i, j in corpus.corpus.items():
print(i, [note for note in j])
corpus.run_key_detection_algs()
corpus.extract_transcriber_assigned_key()
corpus.extract_roots_from_keys()
print(corpus.roots.head())
corpus.extract_final_note()
corpus.reformat_music21_objs_to_str()
print(corpus.keys.head())
corpus.find_most_frequent_key()
print(corpus.keys.head())
corpus.count_freq_of_root_occurrences()
corpus.find_most_frequent_root()
print(corpus.root_freq.head())
roots_outpath = '/Users/dannydiamond/NUIG/Polifonia/CRE/root_detection'
write_to_csv(corpus.roots, roots_outpath, 'music21_roots')
write_to_csv(corpus.root_freq, roots_outpath, 'music21_freqs')
write_to_csv(corpus.keys, roots_outpath, 'music21_keys')
numeric_corpus_outpath = '/Users/dannydiamond/NUIG/Polifonia/CRE/CRE_primary_feat_seq'
corpus.reformat_to_numeric_feat_seq(numeric_corpus_outpath)