Retrieving The ID3 Info
Now that we've established our files are missing ID3 info and there is no reliable means of determining it from the file path, we must look to the web for our data. We'll use the MusicBrainz XML Web Service to search for and fill in our missing data. You can find out more about their web service at http://musicbrainz.org/doc/XML_Web_Service/Version_2. To make the calls to the service, we will use the urllib and the urllib2 packages.
Code:
Code:
Code:
Now that we've established our files are missing ID3 info and there is no reliable means of determining it from the file path, we must look to the web for our data. We'll use the MusicBrainz XML Web Service to search for and fill in our missing data. You can find out more about their web service at http://musicbrainz.org/doc/XML_Web_Service/Version_2. To make the calls to the service, we will use the urllib and the urllib2 packages.
Code:
#!/usr/bin/python import sys, os, fnmatch, shutil, argparse, re, urllib, urllib2 from mutagen.easyid3 import EasyID3 from mutagen.id3 import ID3, TIT2 def getTrackNumber(track): if track.find("/") > -1: return getTrackNumber(track[0:track.find("/")]) elif track.isdigit(): return track.zfill(2) def getID3FromFilename(file, id3info): filename = os.path.basename(file) m = re.match(r"(?P<trackNumber>\d{2})\. (?P<artist>((?! -).)+) - (?P<title>[^\.]+)\.mp3", filename) id3info["tracknumber"] = m.group('trackNumber') id3info["artist"] = m.group('artist') id3info["title"] = m.group('title') id3info["album"] = file.split('/')[-2] id3info.save() def getID3FromWeb(file, id3info): # Get the Artist and Title from id3info to make the query _artist = id3info["artist"][0] _title = id3info["title"][0] # Query the MusicBrainz web service try: query = { 'query' : 'artist:' + _artist + ' AND recording:' + _title } response = urllib2.urlopen('http://musicbrainz.org/ws/2/recording?' + urllib.urlencode(query)) print response.read() except: print sys.exc_info()[0] def move(output, file, dirMatch): # Check if there are ID3 tags to begin with, if not, it will complain try: tag = ID3(file) except: tag = ID3() tag.add(TIT2(encoding=3, text=["Title"])) tag.save(file) id3info = EasyID3(file) try: _trackNumber = getTrackNumber(id3info["tracknumber"][0]) _artist = id3info["artist"][0] _title = id3info["title"][0] _album = id3info["album"][0] except KeyError: if dirMatch: getID3FromFilename(file, id3info) return else: getID3FromWeb(file, id3info) return outputDir = output + "/" + _artist + "/" + _album + "/" outputFile = _trackNumber + ". " + _artist + " - " + _title + ".mp3" if not os.path.exists(outputDir): os.makedirs(outputDir) shutil.move(file, outputDir + outputFile) def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '--directory', nargs=1, required=True, help='') parser.add_argument('-o', '--output', nargs=1, required=True, help='') args = parser.parse_args() directory = os.path.abspath(args.directory[0]) output = os.path.abspath(args.output[0]) dirMatch = (directory == output) for root, subFolders, filenames in os.walk(directory): for filename in fnmatch.filter(filenames, '*.mp3'): move(output, os.path.join(root, filename), dirMatch) main()Result: Note: Due to the large amount of data returned from the XML web service, I will link to each file's results rather than display pages and pages of text.
$ ./mp3-tagger.py -d /UntamedMusic/ -o /Music/ http://musicbrainz.org/ws/2/recording?query=artist%3AThrice+AND+recording%3AMoving+Mountains http://musicbrainz.org/ws/2/recording?query=artist%3AThrice+AND+recording%3AChild+Of+Dust http://musicbrainz.org/ws/2/recording?query=artist%3AThrice+AND+recording%3AThe+Earth+Isnt+Humming http://musicbrainz.org/ws/2/recording?query=artist%3AThrice+AND+recording%3ADigging+My+Own+Grave http://musicbrainz.org/ws/2/recording?query=artist%3AThrice+AND+recording%3AThe+Lion+And+The+Wolf http://musicbrainz.org/ws/2/recording?query=artist%3AThrice+AND+recording%3ACome+All+You+WearyNext, we will be using the xml.dom.minidom package to parse the XML returned from the MusicBrainz service. You can learn more about the xml.dom.minidom package at http://docs.python.org/library/xml.dom.minidom.html and XML in general from http://www.w3schools.com/xml/xml_whatis.asp.
Code:
#!/usr/bin/python import sys, os, fnmatch, shutil, argparse, re, urllib, urllib2, xml.dom.minidom from mutagen.easyid3 import EasyID3 from mutagen.id3 import ID3, TIT2 def getTrackNumber(track): if track.find("/") > -1: return getTrackNumber(track[0:track.find("/")]) elif track.isdigit(): return track.zfill(2) def getID3FromFilename(file, id3info): filename = os.path.basename(file) m = re.match(r"(?P<trackNumber>\d{2})\. (?P<artist>((?! -).)+) - (?P<title>[^\.]+)\.mp3", filename) id3info["tracknumber"] = m.group('trackNumber') id3info["artist"] = m.group('artist') id3info["title"] = m.group('title') id3info["album"] = file.split('/')[-2] id3info.save() def getID3FromWeb(file, id3info): # Get the Artist and Title from id3info to make the query _artist = id3info["artist"][0] _title = id3info["title"][0] # Query the MusicBrainz web service try: query = { 'query' : 'artist:' + _artist + ' AND recording:' + _title } response = urllib2.urlopen('http://musicbrainz.org/ws/2/recording?' + urllib.urlencode(query)) x = xml.dom.minidom.parse(response) recordingList = x.getElementsByTagNameNS('http://musicbrainz.org/ns/mmd-2.0#', 'recording-list')[0] recording = recordingList.getElementsByTagName('recording')[0] if recording.nodeType == 1 and recording.attributes.get('ext:score').value == '100': id3info["tracknumber"] = getTrackNumber(str(int(recording.getElementsByTagName('track-list')[0].attributes.get('offset').value)+1)) id3info["artist"] = recording.getElementsByTagName('name')[0].firstChild.nodeValue id3info["title"] = recording.getElementsByTagName('title')[0].firstChild.nodeValue id3info["album"] = recording.getElementsByTagName('release')[0].getElementsByTagName('title')[0].firstChild.nodeValue print id3info except: print sys.exc_info()[0] def move(output, file, dirMatch): # Check if there are ID3 tags to begin with, if not, it will complain try: tag = ID3(file) except: tag = ID3() tag.add(TIT2(encoding=3, text=["Title"])) tag.save(file) id3info = EasyID3(file) try: _trackNumber = getTrackNumber(id3info["tracknumber"][0]) _artist = id3info["artist"][0] _title = id3info["title"][0] _album = id3info["album"][0] except KeyError: if dirMatch: getID3FromFilename(file, id3info) return else: getID3FromWeb(file, id3info) return return outputDir = output + "/" + _artist + "/" + _album + "/" outputFile = _trackNumber + ". " + _artist + " - " + _title + ".mp3" if not os.path.exists(outputDir): os.makedirs(outputDir) shutil.move(file, outputDir + outputFile) def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '--directory', nargs=1, required=True, help='') parser.add_argument('-o', '--output', nargs=1, required=True, help='') args = parser.parse_args() directory = os.path.abspath(args.directory[0]) output = os.path.abspath(args.output[0]) dirMatch = (directory == output) for root, subFolders, filenames in os.walk(directory): for filename in fnmatch.filter(filenames, '*.mp3'): move(output, os.path.join(root, filename), dirMatch) main()Result:
$ ./mp3-tagger.py -d /UntamedMusic/ -o /Music/ {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'01'], 'title': [u'Moving Mountains'], 'artist': [u'Thrice']} {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'06'], 'title': [u'Child of Dust'], 'artist': [u'Thrice']} {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'03'], 'title': [u"The Earth Isn't Humming"], 'artist': [u'Thrice']} {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'02'], 'title': [u'Digging My Own Grave'], 'artist': [u'Thrice']} {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'04'], 'title': [u'The Lion and the Wolf'], 'artist': [u'Thrice']} {'album': [u'Come All You Weary'], 'tracknumber': [u'01'], 'title': [u'Come All You Weary'], 'artist': [u'Thrice']}Looks like we've run into a problem. One of our tracks appears on multiple albums and the first option isn't the one we wanted. We could solve this in a number of ways.
- Try to extract more information from the file path or ID3 data.
This would require that such information is available and consistent. Because our music collection could potentially be in any file structure, we cannot rely on extracting more data from the file path. We also cannot count on any more ID3 data, as the whole purpose of using the web service is to fill in data we do not have. - Use the files in the same directory to determine ID3 data. For example, 5 out of the 6 tracks on the album I am using come from the same album, thus the 6th track must come from that album as well.
Again, this requires some form of file structure. If a user has all of their mp3 files in a single folder, this method would not work. - Leave the file untouched, leaving the user to manually fix any mp3 files that cannot reliably be tagged and moved.
We want to automate as much as possible, so this option seems counter to our goals. - Present the options to the user and have him or her select between them.
This is the one I am most fond of. The user can look at the file path and determine which is the most appropriate selection for the file. This should prevent any errors and only needs user interaction if files are missing ID3 info and appear on multiple albums.
Code:
#!/usr/bin/python import sys, os, fnmatch, shutil, argparse, re, urllib, urllib2, xml.dom.minidom from mutagen.easyid3 import EasyID3 from mutagen.id3 import ID3, TIT2 def getTrackNumber(track): if track.find("/") > -1: return getTrackNumber(track[0:track.find("/")]) elif track.isdigit(): return track.zfill(2) def getID3FromFilename(file, id3info): filename = os.path.basename(file) m = re.match(r"(?P<trackNumber>\d{2})\. (?P<artist>((?! -).)+) - (?P<title>[^\.]+)\.mp3", filename) id3info["tracknumber"] = m.group('trackNumber') id3info["artist"] = m.group('artist') id3info["title"] = m.group('title') id3info["album"] = file.split('/')[-2] id3info.save() def getID3FromWeb(file, id3info): # Get the Artist and Title from id3info to make the query _artist = id3info["artist"][0] _title = id3info["title"][0] # Query the MusicBrainz web service try: query = { 'query' : 'artist:' + _artist + ' AND recording:' + _title } response = urllib2.urlopen('http://musicbrainz.org/ws/2/recording?' + urllib.urlencode(query)) x = xml.dom.minidom.parse(response) recordingList = x.getElementsByTagNameNS('http://musicbrainz.org/ns/mmd-2.0#', 'recording-list')[0] recording = recordingList.getElementsByTagName('recording')[0] if recording.nodeType == 1 and recording.attributes.get('ext:score').value == '100': id3info["tracknumber"] = getTrackNumber(str(int(recording.getElementsByTagName('track-list')[0].attributes.get('offset').value)+1)) id3info["artist"] = recording.getElementsByTagName('name')[0].firstChild.nodeValue id3info["title"] = recording.getElementsByTagName('title')[0].firstChild.nodeValue # Check for multiple releases containing the title releaseList = recording.getElementsByTagName('release-list')[0] releases = releaseList.getElementsByTagName('release') releaseOpts = [] for release in releases: releaseTitle = release.getElementsByTagName('title')[0].firstChild.nodeValue # If the release is not already in the list of options, add it if releaseTitle not in releaseOpts: releaseOpts.append(releaseTitle) # If more than one release option exists, ask the user to pick one if len(releaseOpts) > 1: # Ask the user which option they'd prefer print file + " has multiple options:" index = 0 print "0) Ignore file and discard edits." for opt in releaseOpts: index += 1 print str(index) + ") " + opt choice = input("Choice: ") if choice == 0: print "You chose to ignore the file." else: id3info["album"] = releaseOpts[choice - 1] id3info["tracknumber"] = getTrackNumber(str(int(releases[choice - 1].getElementsByTagName('track-list')[0].attributes.get('offset').value)+1)) else: id3info["album"] = releaseOpts[0] print id3info except: print sys.exc_info()[0] def move(output, file, dirMatch): # Check if there are ID3 tags to begin with, if not, it will complain try: tag = ID3(file) except: tag = ID3() tag.add(TIT2(encoding=3, text=["Title"])) tag.save(file) id3info = EasyID3(file) try: _trackNumber = getTrackNumber(id3info["tracknumber"][0]) _artist = id3info["artist"][0] _title = id3info["title"][0] _album = id3info["album"][0] except KeyError: if dirMatch: getID3FromFilename(file, id3info) return else: getID3FromWeb(file, id3info) return return outputDir = output + "/" + _artist + "/" + _album + "/" outputFile = _trackNumber + ". " + _artist + " - " + _title + ".mp3" if not os.path.exists(outputDir): os.makedirs(outputDir) shutil.move(file, outputDir + outputFile) def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '--directory', nargs=1, required=True, help='') parser.add_argument('-o', '--output', nargs=1, required=True, help='') args = parser.parse_args() directory = os.path.abspath(args.directory[0]) output = os.path.abspath(args.output[0]) dirMatch = (directory == output) for root, subFolders, filenames in os.walk(directory): for filename in fnmatch.filter(filenames, '*.mp3'): move(output, os.path.join(root, filename), dirMatch) main()Result:
$ ./mp3-tagger.py -d /UntamedMusic/ -o /Music/ {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'01'], 'title': [u'Moving Mountains'], 'artist': [u'Thrice']} {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'06'], 'title': [u'Child of Dust'], 'artist': [u'Thrice']} {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'03'], 'title': [u"The Earth Isn't Humming"], 'artist': [u'Thrice']} {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'02'], 'title': [u'Digging My Own Grave'], 'artist': [u'Thrice']} {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': [u'04'], 'title': [u'The Lion and the Wolf'], 'artist': [u'Thrice']} /UntamedMusic/The Alchemy Index Vol. 4 - Earth/Thrice - Come All You Weary.mp3 has multiple options: 0) Ignore file and discard edits. 1) Come All You Weary 2) The Alchemy Index, Volumes III & IV Choice: 2 {'album': [u'The Alchemy Index, Volumes III & IV'], 'tracknumber': ['05'], 'title': [u'Come All You Weary'], 'artist': [u'Thrice']}Now our ID3 data looks exactly how we want it to, so we can move on to the final step of saving the data and moving the file.