-
Notifications
You must be signed in to change notification settings - Fork 23
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Added script and vagrant related files.
- Added grub.py, the main script - Added content to the README.md file including instructions - Added Vagrantfile and bootstrap.sh for a quick jumpstart on development and usage - Added .gitignore
- Loading branch information
Showing
5 changed files
with
236 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
# App | ||
*.pyc | ||
.vagrant/ | ||
downloads/ | ||
|
||
# OS or program files | ||
.DS_store | ||
Thumbs.db | ||
.Trashes |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,83 @@ | ||
# slideshare-to-pdf | ||
A python script to help you back up your SlideShare presentation to PDF. — http://grub.cballenar.me/ | ||
# SlideShare to PDF | ||
|
||
A python script to help you back up your SlideShare presentations to PDF. | ||
|
||
|
||
## Requirements | ||
|
||
This script has been tested with Vagrant on an **Ubuntu Trusty 64** VM (Vagrantfile included) and requires the following packages: | ||
|
||
- [ImageMagick](http://www.imagemagick.org/script/index.php) | ||
- [BeautifulSoup](https://www.crummy.com/software/BeautifulSoup/) | ||
- [LXML](http://lxml.de/) | ||
|
||
They can be installed by running: | ||
|
||
```` | ||
apt-get update | ||
apt-get install -y imagemagick python-bs4 python-lxml | ||
```` | ||
|
||
|
||
## Usage | ||
|
||
### Just run it | ||
|
||
Simply running the script will prompt you to input the SlideShare URL you'd like to download. By default, this file will be saved in the `downloads` directory created in the root of the script. | ||
|
||
```` | ||
./grub.py | ||
Input the SlideShare URL you want to convert: [SLIDE URL] | ||
Reading SlideShare page... | ||
Downloading slide 1... | ||
Downloading slide 2... | ||
[...] | ||
Converting to PDF... | ||
Your file has been successfully created at downloads/[SLIDE NAME].pdf | ||
```` | ||
|
||
|
||
### Run it with Arguments | ||
|
||
#### Input | ||
|
||
Specify the SlideShare URL you'd like to download with `-i`. | ||
|
||
```` | ||
./grub.py -i [SLIDESHARE URL] | ||
```` | ||
|
||
|
||
#### Output | ||
|
||
You can specify where to save your PDF with `-o`. The script will accept a directory or a file path. If only the directory path is specified, the name of the slide will be used. | ||
|
||
```` | ||
./grub.py -o [FOLDER OR FILE PATH] | ||
# save in directory | ||
./grub.py -i [...] -o /home/user/documents/ | ||
# save to file | ||
./grub.py -i [...] -o /home/user/documents/my-slide.pdf | ||
```` | ||
|
||
|
||
#### Quiet | ||
Don't print status messages to stdout. | ||
|
||
```` | ||
./grub.py -q | ||
```` | ||
|
||
|
||
#### Help | ||
Show help message and exit. | ||
|
||
```` | ||
./grub.py -h | ||
```` | ||
|
||
|
||
## Development | ||
This repository includes a Vagrantfile. If you'd like to collaborate, this should help jumpstart the development process. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
# -*- mode: ruby -*- | ||
# vi: set ft=ruby : | ||
Vagrant.configure(2) do |config| | ||
config.vm.box = "ubuntu/trusty64" | ||
config.vm.provision :shell, path: "bootstrap.sh" | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,4 @@ | ||
#!/usr/bin/env bash | ||
|
||
apt-get update | ||
apt-get install -y imagemagick python-bs4 python-lxml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
#!/usr/bin/env python | ||
|
||
import os | ||
import re | ||
import sys | ||
import lxml | ||
import errno | ||
import socket | ||
import shutil | ||
import urllib | ||
import argparse | ||
import requests | ||
import tempfile | ||
import subprocess | ||
from bs4 import BeautifulSoup | ||
|
||
# set default output file name and directory | ||
output_file = '' | ||
output_dir = 'downloads/' | ||
output_format = '.pdf' | ||
|
||
socket.setdefaulttimeout(20) | ||
|
||
# argument parser | ||
parser = argparse.ArgumentParser(description='A python script to help you back up your SlideShare presentations to PDF.') | ||
parser.add_argument('-q', '--quiet', dest='verbose', action='store_false', default=True, help='Don\'t print status messages to stdout.') | ||
parser.add_argument('-i', '--input', help='SlideShare URL to be processed, e.g.: "http://www.slideshare.net/korlayashwanth/download-disabled-slide-share-ppts-by-authors"') | ||
parser.add_argument('-o', '--output', help='Path where to save the file. It can be a folder or especific file. e.g.: "\\Users\\user\\Desktop\\my-slides.pdf" OR "\\Users\\user\\Desktop\\". Default: "./downloads/slide-name.pdf".') | ||
args = parser.parse_args() | ||
|
||
# get input | ||
if args.input: | ||
url = args.input | ||
else: | ||
url = raw_input('Input the SlideShare URL you want to convert: ') | ||
|
||
# if output was specified, split path into file name and directory | ||
if args.output: | ||
output_dir, output_file = os.path.split(args.output) | ||
|
||
# check output filename | ||
if output_file == '': | ||
# build output file name from url | ||
urlMatch = re.search('(?:[^\/]*\/){3}([A-Za-z0-9-_\.]*)(?:\/)([A-Za-z0-9-_\.]*)', url) | ||
output_file = '{}-by-{}{}'.format(urlMatch.group(2), urlMatch.group(1), output_format) | ||
else: | ||
# check if correct format | ||
if output_file[-4:] != output_format: | ||
output_file = '{}{}'.format(output_file, output_format) | ||
|
||
# check output directory | ||
if output_dir != '': | ||
try: | ||
os.makedirs(output_dir) | ||
except OSError: | ||
if not os.path.isdir(output_dir): | ||
raise | ||
|
||
# (re)build output path | ||
output_path = os.path.join(output_dir, output_file) | ||
|
||
# make tmp directory | ||
dir_tmp = tempfile.mkdtemp() | ||
|
||
# grab slideshare html | ||
if args.verbose: | ||
print('Reading SlideShare page...') | ||
|
||
html = '' | ||
images = None | ||
try: | ||
html = requests.get(url) | ||
html.raise_for_status() | ||
except Exception, e: | ||
# terminate script | ||
sys.exit('Could not download {}. {}'.format(url, e)) | ||
else: | ||
# read html and get images | ||
soup = BeautifulSoup(html.text, 'lxml') | ||
images = soup.find_all('img', attrs={'class': 'slide_image'}) | ||
|
||
# check if full resolution available | ||
if images[0].has_attr('data-full'): | ||
# use full resolution | ||
slide_resolution = 'data-full' | ||
elif images[0].has_attr('data-normal'): | ||
# else use normal | ||
slide_resolution = 'data-normal' | ||
else: | ||
# else terminate | ||
sys.exit('Could not find slides. Terminating...') | ||
|
||
# download slides to tmp directory | ||
downloaded_slides = [] | ||
for i, image in enumerate(images, start=1): | ||
# form slides data | ||
remote_slide = image[slide_resolution] | ||
local_slide = os.path.join(dir_tmp, 'slide-{}.jpg'.format(str(i))) | ||
|
||
# download slide | ||
if args.verbose: | ||
print('Downloading slide {}...'.format(str(i))) | ||
|
||
try: | ||
urllib.urlretrieve(remote_slide, filename=local_slide) | ||
except Exception, e: | ||
# cleanup and terminate | ||
shutil.rmtree(dir_tmp) | ||
sys.exit('Could not download slide-{}. {}'.format(str(i), e)) | ||
else: | ||
# add to array | ||
downloaded_slides.append(local_slide) | ||
|
||
# combine images into pdf | ||
if args.verbose: | ||
print('Converting to PDF...') | ||
|
||
downloaded_slides_str = ' '.join(downloaded_slides) | ||
try: | ||
subprocess.call('convert {} -quality 100 {}'.format(downloaded_slides_str, output_path), shell=True) | ||
except Exception, e: | ||
sys.exit('Could not convert slides to PDF. {}'.format(str(i), e)) | ||
|
||
# remove tmp directory | ||
shutil.rmtree(dir_tmp) | ||
|
||
# check if file was created | ||
if os.path.isfile(output_path): | ||
if args.verbose: | ||
print 'Your file has been successfully created at {}'.format(output_path) | ||
|
||
sys.exit(0) | ||
else: | ||
sys.exit('Your file could not be created.') |