Ticket #305: scanfiles.py

File scanfiles.py, 11.8 KB (added by scanner@…, 19 years ago)

program that scans a 'music root' for files, extracts id3 tags, and fills in the db from that

Line 
1#!/usr/bin/env python
2#
3
4import os
5import os.path
6import datetime
7import string
8import optparse
9import exceptions
10
11import eyeD3
12import eyeD3.tag
13import eyeD3.frames
14import eyeD3.utils
15
16# We are not running inside the django framework so we need to tell it where
17# to find our django app framework.
18#
19os.environ["DJANGO_SETTINGS_MODULE"] = "mediaserv.settings.main"
20
21from django.models.music import *
22
23"""This program is intended to be invoked periodically on the system hosting
24the mediaserv app.
25
26It will load up the mediaserv models.
27
28It will then iterate through the defined MusicRoot's. For each MusicRoot it
29will scan all the files in it and all of its sub-directories.
30
31For every file that is an mp3 it will attempt to parse the ID3 tags for that
32file and based on those tags it will create Artist, ArtistName, Album, and
33Track records in our database.
34
35For every track that we encounter we will update the 'last_scanned' field.
36
37After we finish scanning a MusicRoot we ask for all of the tracks whose
38last_scanned field is before its MusicRoot's last_scan_started date. We will
39then remove those tracks from the system.
40"""
41
42############################################################################
43#
44def is_simple_charset( value ):
45 """Given a string return True if it only contains printable ascii
46 characters, otherwise return false.
47
48 This is our simple test to see if a string is a fancy UTF-8 one or
49 something else, or is a simple plain ASCII string.
50 """
51 for char in value:
52 if char not in string.printable:
53 return False
54 return True
55
56############################################################################
57#
58def create_artist(id3_artist):
59 """We are called with the name of an artist. We create a new Arist object
60 and ArtistName and tie them together.
61
62 If the ArtistName is SJIS or has 8bit characters then we also flag this
63 name as not having a simple character set.
64 """
65
66 art = artists.Artist(date_added = datetime.datetime.now())
67 art.save()
68 art.add_artistname("'%s'" % id3_artist, preference = 0,
69 simple_char_set = is_simple_charset(id3_artist))
70 return art
71
72############################################################################
73#
74def create_album(id3_album):
75 """Create a new album object with the given name.
76 """
77 alb = albums.Album(name = "'%s'" % id3_album,
78 date_added = datetime.datetime.now())
79 alb.save()
80 return alb
81
82############################################################################
83#
84def add_file(filename, af, music_root, verbosity = "quiet"):
85 """We have a file. We know it is an audio/mpeg file. We know we do not have
86 this file in our track db. Query the file for its info and add the
87 requisite objects to our db.
88 """
89
90 tag = af.getTag()
91
92 playtime = af.getPlayTime()
93 (vbr, bitrate) = af.getBitRate()
94 if vbr == 0:
95 vbr = False
96 else:
97 vbr = True
98
99 # Let us pull out all the id3 tags we have, filling in default info
100 #
101 if tag is None:
102 # The file has no id3 info. We create a title based on the file name
103 # and everything else is empty.
104 #
105 track = tracks.Track(title = "'%s'" % os.path.basename(filename),
106 filename = filename,
107 last_scanned = datetime.datetime.now(),
108 play_time = playtime, bit_rate = bitrate,
109 vbr = vbr, musicroot_id = music_root.id)
110 track.save()
111 return
112
113
114 # Get the title, if it does not exist, use the file name.
115 #
116 title = tag.getTitle()
117 if not title:
118 title = filename
119
120 # At this point we have the information to create the basic track object.
121 # The rest of the fields are optional.
122 #
123 track = tracks.Track(title = "'%s'" % title, filename = filename,
124 last_scanned = datetime.datetime.now(),
125 play_time = playtime, bit_rate = bitrate, vbr = vbr,
126 musicroot_id = music_root.id)
127
128 # Get the artist tag. See if we have an artist with this exact name.
129 # If we do not, then create a new artist.
130 #
131 id3_artist = tag.getArtist()
132 if id3_artist:
133 try:
134 artist = artistnames.get_object(name__exact = \
135 "'%s'" % id3_artist).get_artist()
136 except artistnames.ArtistNameDoesNotExist:
137 artist = create_artist(id3_artist)
138 track.artist_id = artist.id
139
140 # See if we can get the track number & disc number (note: getTrackNum() &
141 # getDiscNum() return a tuple (track num, total tracks) (disc num, total
142 # discs) so we only one element 0 of the tuple.
143 #
144 id3_tracknum = tag.getTrackNum()[0]
145 if id3_tracknum:
146 track.track_number = id3_tracknum
147 id3_discnum = tag.getDiscNum()[0]
148 if id3_discnum:
149 track.disc_number = id3_discnum
150
151 # The album is like the artist. We see if we can find an album that exists
152 # with the exact name. If we can then we use it. If we can not then we
153 # create a new album and use that.
154 #
155 id3_album = tag.getAlbum()
156 if id3_album:
157 try:
158 album = albums.get_object(name__exact = "'%s'" % id3_album)
159 except albums.AlbumDoesNotExist:
160 album = create_album(id3_album)
161 track.album_id = album.id
162
163 # Going to skip genre for now. Just really do not care much about it.
164 #
165 track.save()
166
167 # Depending on the verbosity print out info on the track we just scanned.
168 #
169 if verbosity == "terse":
170 print "Added track: %s" % track
171 elif verbose == "verbose":
172 print "Added track: %s .. (and other info)" % track
173
174 print "Track %s, last scanned: %s" % (track, track.last_scanned)
175 return
176
177############################################################################
178#
179#
180def scan_file(filename, music_root, verbosity = "quiet"):
181 """This function is given a file name as an actual absolute file path.
182 We will now use the eyeD3 library to scan any id3 tags it may have and
183 based on the content of those tags create tracks, artists, artistnames, and
184 albums in our database.
185
186 If a track object already exists for this file we will see if any of the
187 records in our database need to be updated (and update them.)
188 """
189
190 # If the file is not an mp3 file we just return.
191 #
192 if not eyeD3.tag.isMp3File(filename):
193 if verbosity == "verbose":
194 print "Skipping file %s (not an audio/mpeg file)" % filename
195 return
196
197 if verbosity == "verbose":
198 print " Scanning file %s" % filename
199
200 try:
201 af = eyeD3.tag.Mp3AudioFile(filename)
202 except Exception, e:
203 print "Unable to parse file: %s" % filename
204 return
205 tag = af.getTag()
206 if tag is None:
207 if verbosity == "verbose":
208 print "File %s had no id3 tag information. Filling in defaults" % \
209 filename
210
211 # First see if a track already exists that refers to this exact same
212 # file. This is because files are the acutal item that identifies a
213 # track. If the file already exists then we already have this track in our
214 # db. We just need to make sure that all the fields we have in the db match
215 # the ones in this file.
216 #
217 try:
218 track = tracks.get_object(filename__exact = filename)
219 except tracks.TrackDoesNotExist:
220 add_file(filename, af, music_root, verbosity)
221 return
222
223 # This track already existed in our db. Check to see if any of its id3 tags
224 # differ from what we already have in the db. If they do, update the db.
225 #
226 print "We would normually update track %s, but we are skipping it for " \
227 "now" % os.path.basename(filename)
228 #compare_update_file(filename, af, music_root, verbosity = verbosity)
229 return
230
231############################################################################
232#
233#
234def run(verbosity = "quiet"):
235 """This is the function that actually does the work of scanning all of our
236 MusicRoots for .mp3 files.
237
238 It expects a single argument: a string that indicates the verbosity
239 level. This mean be either 'verbose', 'terse', or 'quiet.' If not specified
240 it will default to 'quiet.'
241 """
242
243 # Get the list of defined MusicRoots. These had better point to real
244 # directories!
245 #
246 music_roots = musicroots.get_list()
247 for music_root in music_roots:
248
249 # We first mark that we actually started to scan this music root.
250 #
251 music_root.last_scan_started = datetime.datetime.now()
252 music_root.save()
253
254# music_root = musicroots.get_object(pk = music_root.id)
255
256 if verbosity == "verbose":
257 print "Started scanning MusicRoot %s at %s" % \
258 (music_root.directory, music_root.last_scan_started)
259
260 # Then the magic walk happens
261 #
262 for root, dirs, files in os.walk(music_root.directory):
263 if verbosity == "verbose":
264 print "Scanning directory: %s" % root
265
266 for f in files:
267 check_file = os.path.join(root, f)
268 scan_file(check_file, music_root, verbosity = verbosity)
269
270 # Okay. Our magic walk happeend. Now we need to delete any tracks that
271 # had been a part of this music root but were not scanned in this run
272 #
273 missing_tracks = tracks.get_list(last_scanned__lt = \
274 music_root.last_scan_started)
275
276 print "\n\n** Music root last scanned: %s" % music_root.last_scan_started
277 for track in missing_tracks:
278 # If it is a member of any playlists remove it..
279 #
280 print "Track %s last scanned: %s" % (track, track.last_scanned)
281 if verbosity == "verbose" or verbosity == "terse":
282 print "Track %s not found in scan. Deleting from MusicRoot " \
283 "%s" % (track, music_root)
284 track.set_playlists([])
285 track.delete()
286
287 # Done scanning a music root. Indicate when we finished scanning it.
288 #
289 music_root.last_scan_finished = datetime.datetime.now()
290 music_root.save()
291
292 return
293
294############################################################################
295#
296#
297def setup_option_parser():
298 """This function uses the python OptionParser module to define an option
299 parser for parsing the command line options for this script. This does not
300 actually parse the command line options. It returns the parser object that
301 can be used for parsing them.
302 """
303 parser = optparse.OptionParser(usage = "%prog [options]",
304 version = "%prog 1.0")
305 parser.add_option("-v", "--verbosity", type="choice", dest="verbosity",
306 default="terse", choices = ["verbose", "terse",
307 "quiet"],
308 help = """Controls how talkative the script is about what
309 it is doing. In 'verbose' mode it will tell you
310 every track it finds. In 'terse' mode it will only tell
311 you about tracks that are changed, added or removed.
312 In 'quiet' mode it will say nothing. DEFAULT:
313 '%default'""")
314 return parser
315
316############################################################################
317#
318def main():
319 """The main routine. This is invoked if this file is run as a program
320 instead of being imported as a library.
321
322 If you are running this as a module you should not invoke the 'main()'
323 function but should instead invoke the 'run()' function.
324 """
325
326 parser = setup_option_parser()
327 (opts, args) = parser.parse_args()
328
329 run(opts.verbosity)
330
331###########
332#
333# The work starts here
334#
335
336if __name__ == "__main__":
337 main()
338
339#
340#
341#
342###########
Back to Top