-
Notifications
You must be signed in to change notification settings - Fork 0
/
del2sla.py
147 lines (125 loc) · 4.05 KB
/
del2sla.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#
# Import XML from Del.icio.us into Slasti
#
# Copyright (C) 2011 Pete Zaitcev
# See file COPYING for licensing information (expect GPL 2).
#
# requires:
# ElementTree as built into Python 2.7 (xml.etree)
#
import sys
import time
import calendar
from xml.etree import ElementTree
# N.B. This includes app-level generics such as AppError. Any better ideas?
import slasti
from slasti import AppError
TAG = "del2sla"
def Usage():
print >>sys.stderr, "Usage: "+TAG+" target_dir bookmarks.xml"
sys.exit(2)
# We are not aware of any specification, so it is unclear if tags are split
# by space or whitespace. We assume space, to be locale-independent.
# But this means that we include tabs and foreign whitespace into tags.
def split_tags(tagstr):
tags = []
for t in tagstr.split(' '):
if t != '':
tags.append(t)
return tags
def verify_tags(tagstr):
if "/" in tagstr:
return 0
if "\n" in tagstr:
return 0
return 1
def verify_attr(attrstr):
if "\n" in attrstr:
return 0
return 1
# XXX The add1 has a big problem with DB consistency in case of errors:
# if any mark problem causes us to abort, user cannot re-run with
# minimal fixes to the set: the database will receive a bunch of dups.
# We need either detect dups or somehow roll back everything we added.
def do(dirname, xmlname):
base = slasti.tagbase.TagBase(dirname)
base.open()
try:
# Verify XML has UTF-8 encoding perhaps?
etree = ElementTree.parse(xmlname)
except IOError as e:
raise AppError(str(e))
except ElementTree.ParseError as e:
raise AppError(xmlname+": "+str(e))
etroot = etree.getroot()
if etroot == None:
raise AppError(xmlname+": No root element")
if etroot.tag != 'posts':
raise AppError(xmlname+": root is not `posts'")
for et in etroot:
if et.tag != 'post':
continue
title = et.attrib.get('description')
if title == None:
continue
url = et.attrib.get('href')
if url == None:
continue
# 'tag' is a string of space-separated tags
tagstr = et.attrib.get('tag')
if tagstr == None:
continue
# not sure what to do with hash and meta
# 'hash' is MD5 digest of URL
#hash = et.attrib.get('hash')
#meta = et.attrib.get('meta')
note = et.attrib.get('extended')
if note == None:
note = ""
if not verify_attr(title):
raise AppError("Invalid title: `"+title+"'")
if not verify_attr(url):
raise AppError("Invalid URL: `"+url+"'")
if not verify_attr(note):
raise AppError("Invalid note: `"+note+"'")
if not verify_tags(tagstr):
raise AppError("Invalid tags: `"+tagstr+"'")
tags = split_tags(tagstr)
if tags == []:
continue
#time="2010-12-10T08:04:46Z"
timestr = et.attrib.get('time')
if timestr == None:
# We could create fake dates, but that would be just wrong.
continue
try:
timeval = time.strptime(timestr, "%Y-%m-%dT%H:%M:%SZ")
except ValueError as e:
# We bug out on this because this case may be worth diagnosing.
# The error message has both format and unparsed date string.
raise AppError(str(e))
if timeval == None:
continue
try:
timeint = calendar.timegm(timeval)
except (ValueError, OverflowError) as e:
# XXX A user supplied Year 1900 or something like that.
print e
continue
base.add1(timeint, title, url, note, tags)
base.close()
def main(args):
argc = len(args)
if argc == 2:
dirname = args[0]
xmlname = args[1]
else:
Usage()
try:
do(dirname, xmlname)
except AppError as e:
print >>sys.stderr, TAG+":", e
sys.exit(1)
# http://utcc.utoronto.ca/~cks/space/blog/python/ImportableMain
if __name__ == "__main__":
main(sys.argv[1:])