Saving The Artwork
Now we want to embed the image data into our mp3 files so each file has the album art and we won't have any extra image files to worry about in our directories (with hard drives being as cheap as they are, I would rather my mp3 files be slightly larger in size than my directories be cluttered with extra image files). To do this, we need to first retrieve the image data from the web and determine its mime-type.
Code:
Now we want to embed the image data into our mp3 files so each file has the album art and we won't have any extra image files to worry about in our directories (with hard drives being as cheap as they are, I would rather my mp3 files be slightly larger in size than my directories be cluttered with extra image files). To do this, we need to first retrieve the image data from the web and determine its mime-type.
Code:
#!/usr/bin/python
import sys, os, fnmatch, shutil, argparse, re, urllib, urllib2, xml.dom.minidom, glob
from mutagen.easyid3 import EasyID3
from mutagen.mp3 import MP3
def getTrackNumber(track):
if track.find("/") > -1:
return getTrackNumber(track[0:track.find("/")])
elif track.isdigit():
return track.zfill(2)
def getID3FromFilename(file, id3info):
filename = os.path.basename(file)
m = re.match(r"(?P<trackNumber>\d{2})\. (?P<artist>((?! -).)+) - (?P<title>[^\.]+)\.mp3", filename)
id3info["tracknumber"] = m.group('trackNumber')
id3info["artist"] = m.group('artist')
id3info["title"] = m.group('title')
id3info["album"] = file.split('/')[-2]
id3info.save()
return id3info
def getID3FromWeb(file, id3info):
# Get the Artist and Title from id3info to make the query
_artist = id3info["artist"][0]
_title = id3info["title"][0]
# Query the MusicBrainz web service
try:
query = { 'query' : 'artist:' + _artist + ' AND recording:' + _title }
response = urllib2.urlopen('http://musicbrainz.org/ws/2/recording?' + urllib.urlencode(query))
x = xml.dom.minidom.parse(response)
recordingList = x.getElementsByTagNameNS('http://musicbrainz.org/ns/mmd-2.0#', 'recording-list')[0]
recording = recordingList.getElementsByTagName('recording')[0]
if recording.nodeType == 1 and recording.attributes.get('ext:score').value == '100':
id3info["tracknumber"] = getTrackNumber(str(int(recording.getElementsByTagName('track-list')[0].attributes.get('offset').value)+1))
id3info["artist"] = recording.getElementsByTagName('name')[0].firstChild.nodeValue
id3info["title"] = recording.getElementsByTagName('title')[0].firstChild.nodeValue
# Check for multiple releases containing the title
releaseList = recording.getElementsByTagName('release-list')[0]
releases = releaseList.getElementsByTagName('release')
releaseOpts = []
for release in releases:
releaseTitle = release.getElementsByTagName('title')[0].firstChild.nodeValue
if releaseTitle not in releaseOpts:
releaseOpts.append(releaseTitle)
if len(releaseOpts) > 1:
# Ask the user which option they'd prefer
print file + " has multiple options:"
index = 0
print "0) Ignore file and discard edits."
for opt in releaseOpts:
index += 1
print str(index) + ") " + opt
choice = input("Choice: ")
if choice == 0:
print "You chose to ignore the file."
else:
id3info["album"] = releaseOpts[choice - 1]
id3info["tracknumber"] = getTrackNumber(str(int(releases[choice - 1].getElementsByTagName('track-list')[0].attributes.get('offset').value)+1))
else:
id3info["album"] = releaseOpts[0]
id3info.save()
except:
print sys.exc_info()[0]
return id3info
def getAlbumArtwork(artist, album):
query = {
'method' : 'album.getinfo',
'api_key' : '<Your API Key>',
'artist' : artist,
'album' : album
}
response = urllib2.urlopen('http://ws.audioscrobbler.com/2.0/?' + urllib.urlencode(query))
x = xml.dom.minidom.parse(response)
imageUrl = ""
images = x.getElementsByTagName('image')
for image in images:
if image.getAttribute('size') == 'extralarge':
imageUrl = image.firstChild.nodeValue
break
artwork = {
'mime' : getMimeType(imageUrl),
'data' : urllib2.urlopen(imageUrl).read()
}
return artwork
def getMimeType(imageUrl):
mimeTypes = {
'jpg' : 'image/jpeg',
'jpeg' : 'image/jpeg',
'png' : 'image/png',
'gif' : 'image/gif'
}
if imageUrl.find('.') > -1:
ext = os.path.splitext(os.path.basename(imageUrl))[1][1:].strip()
return mimeTypes[ext]
else:
return ""
def move(output, file, dirMatch):
audio = MP3(file, ID3=ID3)
# Add ID3 tag if none exist
try:
audio.add_tags()
except:
pass
audio.save()
id3info = EasyID3(file)
try:
_trackNumber = getTrackNumber(id3info["tracknumber"][0])
_artist = id3info["artist"][0]
_title = id3info["title"][0]
_album = id3info["album"][0]
except KeyError:
if dirMatch:
id3info = getID3FromFilename(file, id3info)
else:
id3info = getID3FromWeb(file, id3info)
_trackNumber = id3info["tracknumber"][0]
_artist = id3info["artist"][0]
_title = id3info["title"][0]
_album = id3info["album"][0]
outputDir = output + "/" + _artist + "/" + _album + "/"
outputFile = _trackNumber + ". " + _artist + " - " + _title + ".mp3"
print "Saving file ", outputDir + outputFile
if not os.path.exists(outputDir):
os.makedirs(outputDir)
shutil.move(file, outputDir + outputFile)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory', nargs=1, required=True, help='')
parser.add_argument('-o', '--output', nargs=1, required=True, help='')
args = parser.parse_args()
directory = os.path.abspath(args.directory[0])
output = os.path.abspath(args.output[0])
dirMatch = (directory == output)
for root, subFolders, filenames in os.walk(directory):
for filename in fnmatch.filter(filenames, '*.mp3'):
move(output, os.path.join(root, filename), dirMatch)
# Build a list of all the album directories
dirList = glob.glob(os.path.join(output, '*', '*'))
dirList = filter(lambda f: os.path.isdir(f), dirList)
for albumDir in dirList:
album = albumDir.split("/")
print getAlbumArtwork(album[-2], album[-1])
main()
Result:
$ ./mp3-tagger.py -d /UntamedMusic/ -o /Music/
{'data': '<File Data>', 'mime' : 'image/jpeg'}
{'data': '<File Data>', 'mime' : 'image/jpeg'}
Things are looking good. We are getting data back for the images and the mime-types are correct as well. Now we just need to save the artwork to the files.
Code:
#!/usr/bin/python
import sys, os, fnmatch, shutil, argparse, re, urllib, urllib2, xml.dom.minidom, glob
from mutagen.easyid3 import EasyID3
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, APIC
def getTrackNumber(track):
if track.find("/") > -1:
return getTrackNumber(track[0:track.find("/")])
elif track.isdigit():
return track.zfill(2)
def getID3FromFilename(file, id3info):
filename = os.path.basename(file)
m = re.match(r"(?P<trackNumber>\d{2})\. (?P<artist>((?! -).)+) - (?P<title>[^\.]+)\.mp3", filename)
id3info["tracknumber"] = m.group('trackNumber')
id3info["artist"] = m.group('artist')
id3info["title"] = m.group('title')
id3info["album"] = file.split('/')[-2]
id3info.save()
return id3info
def getID3FromWeb(file, id3info):
# Get the Artist and Title from id3info to make the query
_artist = id3info["artist"][0]
_title = id3info["title"][0]
# Query the MusicBrainz web service
try:
query = { 'query' : 'artist:' + _artist + ' AND recording:' + _title }
response = urllib2.urlopen('http://musicbrainz.org/ws/2/recording?' + urllib.urlencode(query))
x = xml.dom.minidom.parse(response)
recordingList = x.getElementsByTagNameNS('http://musicbrainz.org/ns/mmd-2.0#', 'recording-list')[0]
recording = recordingList.getElementsByTagName('recording')[0]
if recording.nodeType == 1 and recording.attributes.get('ext:score').value == '100':
id3info["tracknumber"] = getTrackNumber(str(int(recording.getElementsByTagName('track-list')[0].attributes.get('offset').value)+1))
id3info["artist"] = recording.getElementsByTagName('name')[0].firstChild.nodeValue
id3info["title"] = recording.getElementsByTagName('title')[0].firstChild.nodeValue
# Check for multiple releases containing the title
releaseList = recording.getElementsByTagName('release-list')[0]
releases = releaseList.getElementsByTagName('release')
releaseOpts = []
for release in releases:
releaseTitle = release.getElementsByTagName('title')[0].firstChild.nodeValue
if releaseTitle not in releaseOpts:
releaseOpts.append(releaseTitle)
if len(releaseOpts) > 1:
# Ask the user which option they'd prefer
print file + " has multiple options:"
index = 0
print "0) Ignore file and discard edits."
for opt in releaseOpts:
index += 1
print str(index) + ") " + opt
choice = input("Choice: ")
if choice == 0:
print "You chose to ignore the file."
else:
id3info["album"] = releaseOpts[choice - 1]
id3info["tracknumber"] = getTrackNumber(str(int(releases[choice - 1].getElementsByTagName('track-list')[0].attributes.get('offset').value)+1))
else:
id3info["album"] = releaseOpts[0]
id3info.save()
except:
print sys.exc_info()[0]
return id3info
def getAlbumArtwork(artist, album):
query = {
'method' : 'album.getinfo',
'api_key' : '<Your API Key>',
'artist' : artist,
'album' : album
}
response = urllib2.urlopen('http://ws.audioscrobbler.com/2.0/?' + urllib.urlencode(query))
x = xml.dom.minidom.parse(response)
imageUrl = ""
images = x.getElementsByTagName('image')
for image in images:
if image.getAttribute('size') == 'extralarge':
imageUrl = image.firstChild.nodeValue
break
artwork = {
'mime' : getMimeType(imageUrl),
'data' : urllib2.urlopen(imageUrl).read()
}
return artwork
def getMimeType(imageUrl):
mimeTypes = {
'jpg' : 'image/jpeg',
'jpeg' : 'image/jpeg',
'png' : 'image/png',
'gif' : 'image/gif'
}
if imageUrl.find('.') > -1:
ext = os.path.splitext(os.path.basename(imageUrl))[1][1:].strip()
return mimeTypes[ext]
else:
return ""
def saveAlbumArtwork(albumDir):
# Determine the artist and album from the first mp3 file in the dir
album = ""
artist = ""
artwork = None
for root, subFolders, filenames in os.walk(albumDir):
for filename in fnmatch.filter(filenames, '*.mp3'):
if artwork is None:
id3info = EasyID3(os.path.join(root, filename))
album = id3info["album"][0]
artist = id3info["artist"][0]
artwork = getAlbumArtwork(artist, album)
# Save the artwork to the file
audio = MP3(os.path.join(root, filename))
audio.tags.add(
APIC(
encoding=3, # 3 is for utf-8
mime=artwork['mime'],
type=3, # 3 is for the cover image
desc=u'Cover',
data=artwork['data']
)
)
audio.save()
def move(output, file, dirMatch):
audio = MP3(file, ID3=ID3)
# Add ID3 tag if none exist
try:
audio.add_tags()
except:
pass
audio.save()
id3info = EasyID3(file)
try:
_trackNumber = getTrackNumber(id3info["tracknumber"][0])
_artist = id3info["artist"][0]
_title = id3info["title"][0]
_album = id3info["album"][0]
except KeyError:
if dirMatch:
id3info = getID3FromFilename(file, id3info)
else:
id3info = getID3FromWeb(file, id3info)
_trackNumber = id3info["tracknumber"][0]
_artist = id3info["artist"][0]
_title = id3info["title"][0]
_album = id3info["album"][0]
outputDir = output + "/" + _artist + "/" + _album + "/"
outputFile = _trackNumber + ". " + _artist + " - " + _title + ".mp3"
print "Saving file ", outputDir + outputFile
if not os.path.exists(outputDir):
os.makedirs(outputDir)
shutil.move(file, outputDir + outputFile)
def main():
parser = argparse.ArgumentParser()
parser.add_argument('-d', '--directory', nargs=1, required=True, help='')
parser.add_argument('-o', '--output', nargs=1, required=True, help='')
args = parser.parse_args()
directory = os.path.abspath(args.directory[0])
output = os.path.abspath(args.output[0])
dirMatch = (directory == output)
for root, subFolders, filenames in os.walk(directory):
for filename in fnmatch.filter(filenames, '*.mp3'):
move(output, os.path.join(root, filename), dirMatch)
# Build a list of all the album directories
dirList = glob.glob(os.path.join(output, '*', '*'))
dirList = filter(lambda f: os.path.isdir(f), dirList)
for albumDir in dirList:
saveAlbumArtwork(albumDir)
main()