-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathsimplenote2joplin.py
367 lines (328 loc) · 16.3 KB
/
simplenote2joplin.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
from __future__ import print_function
import argparse
import json
import datetime
import logging
import sys
import re
def eprint(*args, **kwargs):
"""
Print to stderr
Placeholder until implementing proper logging
Parameters
----------
args
kwargs
"""
print(*args, file=sys.stderr, **kwargs)
# Constants for default or hard-coded parameters
MAX_TITLE_LEN = 250
FIX_AMPERSAND_IN_TITLE = True
class SimpleNoteToEnex:
"""
Class to convert notes from Simple Note (in JSON format) to ENEX (Evernote Export) xml-based format.
Simple Note (www.simplenote.com) is a popular note taking application. It exports notes in JSON format
ENEX is the EverNote xml-based import/export format for notes, used by many note-taking applications.
See: https://evernote.com/blog/how-evernotes-xml-export-format-works/
Joplin is another (free and open source) multi platform note taking app that can import notes in ENEX format.
Attributes
----------
author : str
author of the note.
add_note_title : bool
True if title of the note must be inferred from first line of the (Simple Note) note
sn_title_separator : str
Separates first line from the rest in SN Notes - used to identify title. Initially hard-coded to '\r\n'
max_title_len : int
Maximum size of title - applies only if add_note_title is True
json_file : str
The JSON file containing one or multiple Simple Note notes
export_time : str
Time at which the converted Notes were created. Use for all notes the time of execution of class __init__
max_notes: int
Maximum number of notes to be converted. Used mainly for tests and debugging (e.g. find offending note)
filter_tags: bool
True if a tag_filter will be used. False otherwise
tag_filter: str or str[]
Set of tags used to filter subset of notes to be converted (e.g. ['recipes', 'cooking'])
If a tag_filter is applied, all untagged notes will also be excluded.
match_tagged: bool
if True, convert only notes with tags
match_untagged: bool
if True, convert only notes without tags
invert_match : bool
if True, invert the result of matching tag_filter OR match_untagged OR match_tagged
line_sep: str
Line separator to prettify the generated XML. Hard coded to '\r\n'
TODO: verify if conversion also works with blank (empty string) separator
verbose : bool
generage verbose output to stderr
TODO: implement proper logging
Methods
-------
convert_to_enex(sn_note)
Convert an individual Simple Note note in JSON format to ENEX XML
process_file()
Process all notes in Simple Note export file. Call convert_to_enex for notes allowed by tag_filter
match_note_logical_or()
Determine if a given note should be converted based on tag filter or match tagged/untagged flags
"""
def __init__(self, json_file, author=None, create_title=False, title_size=MAX_TITLE_LEN, verbose_level=0, max_notes=None, tag_filter='',
invert_match=False, match_tagged=False, match_untagged=False):
"""
Parameters
----------
json_file
author
create_title
title_size
verbose_level
max_notes
tag_filter
invert_match
match_tagged
match_untagged
"""
self.author = author
self.add_note_title = create_title
# Max length -- in case there is no \r\n to delimit the note's first line
self.max_title_len = title_size if title_size >= 0 else MAX_TITLE_LEN
self.sn_title_separator = '\r\n'
# True if we want to address issue #03 (joplin chokes importing note with ampersand / & in note title)
# At the moment hardcoded.
# TODO: Consider using a CLI switch if this fix for joplin impacts import in other note apps.
self.fix_ampersand_title = FIX_AMPERSAND_IN_TITLE
self.json_file = json_file
self.export_time = datetime.datetime.now().strftime("%Y%m%d%H%M%S%ZZ") # TODO: fix time format (Z)
self.verbose_level = verbose_level
self.match_tagged = match_tagged
self.match_untagged = match_untagged
self.invert_match = invert_match
if max_notes == None:
self.max_notes = sys.maxsize
else:
self.max_notes = max_notes
if tag_filter is None or tag_filter == '':
self.filter_tags = False
else:
self.filter_tags = True
self.tag_filter = tag_filter.split(',')
self.match_untagged = match_untagged
self.match_tagged = match_tagged
self.line_sep = '\r\n'
if self.verbose_level > 1:
eprint(f"match_untagged {self.match_untagged}")
eprint(f"match_tagged {self.match_tagged}")
eprint(f"filter_tags {self.filter_tags}")
eprint(f"invert_match {self.invert_match}")
def cleanup_content(self, json_content, pattern = "\\r\\n", remove_whitespace = True, replace_br = True):
"""
Clean up json_content from simplenotes (or other note provider):
- remove leading and trailing whitespace - driven by remove_whitespace Boolean
- remove leading and training pattern strings (default '\r\n' -> empty lines)
returns: "clean" string
"""
if remove_whitespace:
# Leading whitespace
#temp_string = re.sub("\A\s+", "", json_content)
# Trailing whitespace
#temp_string = re.sub("\s+\Z", "", temp_string)
temp_string = json_content.strip()
# remove pattern at the start of the string
temp_string = re.sub("\A" + pattern, "", temp_string)
# remove pattern at the end of the string
temp_string = re.sub(pattern + "\Z", "", temp_string)
# if replace_br:
# temp_string = re.sub(pattern, "<br/>", temp_string)
# return temp_string
return temp_string
def embed_extra_codes(self, json_content):
"""
Modify up json_content from simplenotes (or other note provider):
Attempt to fix the disappeance of empty lines when joplin imports the enex file
(issue #002 in github)
The current fix is:
changing every occurrence of '\r\n\r\n' (2 empty lines in original simple note) into '\r\n<br/>'
returns: string with embedded html or xml extra codes
"""
# two_spaces = " "
# back_slash = '\\\\' ## need 4 for 2 levels of escaping
html_break = '<br/>'
temp_string = re.sub(self.line_sep+self.line_sep, self.line_sep+html_break, json_content)
return temp_string
def convert_to_enex(self, sn_note):
"""
Convert individual Simple Note from JSON to ENEX format, adding some fields like author if requested
Parameters
----------
sn_note: dict
Simple Note note converted from JSON into dict
Returns
-------
str
full text (XML format) of Simple Note note converted to XML
"""
# Lambda to simplify verification of parameter and convert None to empty string ''
verif_none = lambda note_property: note_property or ''
# Simple Note export formats date as YYYY-MM-DDT:hh:mm:ss.xxxZ
# ENEX seems to use YYYYMMDDThhmmssZ
# Perform basic conversion keeping the milliseconds (xxx) anyway
enex_created = sn_note['creationDate'].replace('-', '').replace(':', '').replace('.', '')
enex_updated = sn_note['lastModified'].replace('-', '').replace(':', '').replace('.', '')
enex_content = verif_none(sn_note['content'])
enex_content = self.cleanup_content(enex_content, self.line_sep, True)
enex_author = verif_none(self.author)
enex_source = "Converted from Simple Note (simplenote.com)"
# enex_latitude = kwargs['latitude']
# enex_longitude = kwargs['longitude']
# enex_altitude = kwargs['altitude']
if (self.add_note_title):
# Simple Note JSON export format does not have explicit field to contain the note title.
# Assume title is first line of Simple Note content, delimited by first "\r\n"
enex_title = enex_content.split(self.sn_title_separator, 1)[0]
enex_title = enex_title[:min(len(enex_title), self.max_title_len)]
# fix for Joplin specific issue - chokes importing a note with an ampersand in the title
# workaround is to encode --in the title-- the & using the html code &
if self.fix_ampersand_title:
enex_title = re.sub('&', '&', enex_title)
else:
enex_title = ''
enex_content = self.embed_extra_codes(enex_content)
enex_tags = ''
if 'tags' in sn_note:
for tag in sn_note['tags']:
enex_tags += '<tag>' + tag + '</tag>' + self.line_sep
# Build XML output using f-strings
# TODO: verify if the CDATA and DOCTYPE are needed for plain Markdown Notes.
enex_note = f'''
<note>
<title>{enex_title}</title>
<content>
<![CDATA[<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!DOCTYPE en-note SYSTEM "http://xml.evernote.com/pub/enml2.dtd">
<en-note><div>{enex_content}</div></en-note>
]]>
</content>
<created>{enex_created}</created>
<updated>{enex_updated}</updated>
<note-attributes>
<author>{enex_author}</author>
</note-attributes>
{enex_tags}
</note>
'''
return enex_note + self.line_sep
# <source>{enex_source}</source>
# <reminder-order>0</reminder-order>
def process_file(self):
"""
Process JSON file (self.json_file) with Simple Note notes in JSON format
Returns
-------
str
full text of json_file converted to ENEX format (XML)
"""
# </en-export export-date=\"{export_time}\">
enex_file_header = f'''
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE en-export SYSTEM "http://xml.evernote.com/pub/evernote-export3.dtd">
<en-export export-date=\"{self.export_time}\">
'''
enex_file_footer = '''</en-export>'''
enex_text = ''
with open(self.json_file) as jfp:
simplenotes = json.load(jfp)
num_active_notes = len(simplenotes['activeNotes'])
if self.verbose_level >= 1:
eprint(f"Processing file: {self.json_file} ")
eprint(f"Notes author: ", self.author)
eprint(f"Active notes: {num_active_notes}")
if 'trashedNotes' in simplenotes:
eprint(f"Trashed notes: {len(simplenotes['trashedNotes'])} -- will not be converted to ENEX")
nconv = 0
number_to_be_converted = min(self.max_notes, num_active_notes)
for sn_note in simplenotes['activeNotes']:
if nconv >= number_to_be_converted:
break
if self.match_note_logical_or(sn_note):
enex_text += self.convert_to_enex(sn_note)
nconv += 1
if self.verbose_level > 0:
eprint(f"Converted {nconv} notes")
return enex_file_header + self.line_sep + enex_text + self.line_sep + enex_file_footer
def match_note_logical_or(self, sn_note):
"""
Function to implement the logic to filter by tags or tag presence/absence when deciding
if a given note is to be converted.
The logic to filter by tags is
- tag filtering is case sensitive (following the behavior of "tag:" search in Simple Note)
- match if at least one tag in the filter is present among the tags of the note.
- example: if the filter (self.tag_filter) is ['linux', 'windows'] and the tags in the note contain
['Mac','linux'], the note will be converted.
The chosen logic for different tag filters is:
- If no filters are selected (default) - return True (by default, all notes are converted)
- Otherwise return the logical OR of all applied filters, XORing the result of the OR with self.invert_match
This allows some valid but strange behavior combinations depending of selected filter flags.
For example:
- combining --match-tagged and --match-untagged converts all tags (equivalent to specifying no match/filter)
- combining --tag-filter 'linux,windows' --match-untagged converts all untagged notes and all tagged notes with tags containing 'linux' and/or 'windows'
- combining --tag-filter 'foo' and --match-tagged converts all tagged notes regardless of the tag
- combining --match-tagged and --invert-match is the same as --match-untagged
When in doubt, it is simpler to use a single filter expression (--tag-filter, --match-tagged, --match-untagged)
possibly complemented with --invert-match
Parameters
----------
sn_note : dict
Simple Note note being evaluated - dict from original individual note formatted as json.
Should contain as dict element an array of tags (sn_note['tags'])
Returns
-------
bool
Result of applying all active filters, XORed with invert_match
"""
# if no filter or matching command is applied
if not(self.filter_tags or self.match_tagged or self.match_untagged):
return True # regardless of value of invert_match
convert_this_note = False
if self.filter_tags and 'tags' in sn_note:
# Convert array of tags in filter and array of tags in note into sets.
# Match if the intersection of both sets is non-empty.
if len(set(self.tag_filter) & (set(sn_note['tags']))) != 0:
convert_this_note = True
if self.match_untagged and not('tags' in sn_note):
convert_this_note = True
if self.match_tagged and 'tags' in sn_note:
convert_this_note = True
# XOR result with invert_match
return convert_this_note != self.invert_match
def main(args):
# TODO: explicit argument for output file
sne = SimpleNoteToEnex(args.json_file, args.author, args.create_title, args.title_size,
args.verbose_level, args.num_notes, args.tag_filter, args.invert_match,
args.match_tagged, args.match_untagged )
enex_file = sne.process_file()
print(enex_file)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--json-file', required=True, type=str, dest='json_file',
help='Simple Note export file (json) to be converted to ENEX')
parser.add_argument('--author', required=False, type=str,
help='Specify an author for all converted notes')
parser.add_argument('--create-title', required=False, dest='create_title', action='store_true',
help='Attempt to create a title for each ENEX note from first line of "Simple Note" notes')
parser.add_argument('--title-size', required=False, dest='title_size', type=int, default=f'{MAX_TITLE_LEN}',
help=f'Maximum size in characters of title - default {MAX_TITLE_LEN}')
parser.add_argument('--tag-filter', required=False, dest='tag_filter',
help='Comma-separated list of tags. Will convert notes matching any tag in list')
parser.add_argument('--match-tagged', required=False, dest='match_tagged', action='store_true',
help='Convert tagged notes')
parser.add_argument('--match-untagged', required=False, dest='match_untagged', action='store_true',
help='Convert untagged notes')
parser.add_argument('--invert-match', required=False, dest='invert_match', action='store_true',
help='Invert match after combining (OR) all other filters/matching conditions')
parser.add_argument('--verbose-level', required=False, dest='verbose_level', type=int, default='0',
help=f"Verbose output level. Output to stderr. Default 0 - no output")
parser.add_argument('--number', required=False, dest='num_notes', type=int,
help='Number of notes to convert (Optional, default is convert all notes)')
args = parser.parse_args()
main(args)