Download Audiobook Cover Images with Python: A CLI Tool
If you’ve ever wanted to automatically download and save cover images for your audiobooks, this Python script is for you. This command-line interface (CLI) tool extracts metadata from an MP3 file and uses it to find and save the corresponding book cover image. In this blog post, we’ll walk through what the code does, how to use it, and what you need to run it.
Table of Contents
Overview
This Python script performs the following tasks:
- Extract Metadata: Reads the title and author from the MP3 file’s metadata using the
mutagen
library. - Search for Book Cover: Constructs a query to search for the book cover image using Google Images.
- Download Image: Downloads the cover image and saves it to the same directory as the MP3 file.
- CLI Interface: Uses
argparse
to allow users to specify the path to the MP3 file directly from the command line.
Code Breakdown
1. Importing Libraries
import argparse
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, TIT2, TPE1
import requests
from bs4 import BeautifulSoup
import os
import re
argparse
: For handling command-line arguments.mutagen
: For reading MP3 metadata.requests
: For making HTTP requests.BeautifulSoup
: For parsing HTML and extracting image URLs.os
: For file and directory operations.re
: For regular expressions to sanitize filenames.
2. Extract Metadata
def extract_metadata(file_path):
audio = MP3(file_path, ID3=ID3)
title = audio.get('TIT2')
author = audio.get('TPE1')
return title.text[0] if title else None, author.text[0] if author else None
This function extracts the title and author from the MP3 file’s metadata. The ID3
tag is used to read the ID3v2 metadata.
3. Search for Book Cover
def search_book_cover(title, author):
query = f"{title} {author} book cover"
url = f"https://www.google.com/search?tbm=isch&q={query}"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
images = soup.find_all("img")
for img in images:
img_url = img.get("src")
if img_url and img_url.startswith("http"):
return img_url
return None
This function constructs a search query for Google Images based on the book’s title and author, then parses the HTML to find and return the URL of the first image.
4. Download the Image
def download_image(url, save_path):
print(f"Downloading image from URL: {url}")
response = requests.get(url)
if response.status_code == 200:
with open(save_path, 'wb') as file:
file.write(response.content)
print(f"Cover image saved to {save_path}")
else:
print("Failed to download image, status code:", response.status_code)
This function downloads the image from the given URL and saves it to the specified path.
5. Sanitize Filename
def sanitize_filename(filename):
return re.sub(r'[\/:*?"<>|]', '', filename)
This function removes invalid characters from the filename to ensure compatibility with different filesystems.
6. Main Function
def main():
parser = argparse.ArgumentParser(description="Download audiobook cover image.")
parser.add_argument('mp3_path', type=str, help="Path to the MP3 file")
args = parser.parse_args()
file_path = args.mp3_path
if not os.path.isfile(file_path):
print(f"File not found: {file_path}")
return
print(f"Extracting metadata from: {file_path}")
title, author = extract_metadata(file_path)
print(f"Extracted Title: {title}, Author: {author}")
if title:
sanitized_title = sanitize_filename(title)
image_directory = os.path.dirname(file_path)
image_save_path = os.path.join(image_directory, f"{sanitized_title}.jpg")
cover_url = search_book_cover(title, author)
if cover_url:
if not os.path.exists(image_directory):
os.makedirs(image_directory)
print(f"Created directory: {image_directory}")
download_image(cover_url, image_save_path)
else:
print("Cover image not found")
else:
print("Metadata extraction failed")
if __name__ == "__main__":
main()
The main
function uses argparse
to get the MP3 file path from the command line. It then performs metadata extraction, searches for the cover image, and downloads it to the same directory as the MP3 file.
How to Use the Script
-
Save the Script: Save the code to a file named
download_cover.py
. -
Install Required Packages: Make sure you have the following Python packages installed:
mutagen
requests
beautifulsoup4
You can install them using pip:
pip install mutagen requests beautifulsoup4
-
Run the Script: Open your terminal and run the script by providing the path to the MP3 file:
python download_cover.py /path/to/your/file.mp3
The script will extract the title and author from the MP3 file, search for the book cover, and save the cover image in the same directory as the MP3 file.
Requirements
- Python 3.x: Ensure you have Python 3 installed.
- Libraries:
mutagen
,requests
,beautifulsoup4
.
Complete Code
Here is the complete code for the script:
import argparse
from mutagen.mp3 import MP3
from mutagen.id3 import ID3, TIT2, TPE1
import requests
from bs4 import BeautifulSoup
import os
import re
def extract_metadata(file_path):
audio = MP3(file_path, ID3=ID3)
title = audio.get('TIT2')
author = audio.get('TPE1')
return title.text[0] if title else None, author.text[0] if author else None
def search_book_cover(title, author):
query = f"{title} {author} book cover"
url = f"https://www.google.com/search?tbm=isch&q={query}"
headers = {"User-Agent": "Mozilla/5.0"}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, "html.parser")
images = soup.find_all("img")
for img in images:
img_url = img.get("src")
if img_url and img_url.startswith("http"):
return img_url
return None
def download_image(url, save_path):
print(f"Downloading image from URL: {url}")
response = requests.get(url)
if response.status_code == 200:
with open(save_path, 'wb') as file:
file.write(response.content)
print(f"Cover image saved to {save_path}")
else:
print("Failed to download image, status code:", response.status_code)
def sanitize_filename(filename):
return re.sub(r'[\/:*?"<>|]', '', filename)
def main():
parser = argparse.ArgumentParser(description="Download audiobook cover image.")
parser.add_argument('mp3_path', type=str, help="Path to the MP3 file")
args = parser.parse_args()
file_path = args.mp3_path
if not os.path.isfile(file_path):
print(f"File not found: {file_path}")
return
print(f"Extracting metadata from: {file_path}")
title, author = extract_metadata(file_path)
print(f"Extracted Title: {title}, Author: {author}")
if title:
sanitized_title = sanitize_filename(title)
image_directory = os.path.dirname(file_path)
image_save_path = os.path.join(image_directory, f"{sanitized_title}.jpg")
cover_url = search_book_cover(title, author)
if cover_url:
if not os.path.exists(image_directory):
os.makedirs(image_directory)
print(f"Created directory: {image_directory}")
download_image(cover_url, image_save_path)
else:
print("Cover image not found")
else:
print("Metadata extraction failed")
if __name
__ == "__main__":
main()
Conclusion
This Python CLI tool is a handy way to automatically download and save cover images for your audiobooks. By extracting metadata from MP3 files and leveraging web search, you can easily organize your audiobook library with visually appealing cover images.
Feel free to customize and expand the script to better suit your needs!
Software