Archiving ComiXology

by Ook

ComiXology is a neat site where you can buy comics online and read them.

However, I'm not always in a position where I can browse a website, so I like to archive the things I buy.

ComiXology doesn't really permit you to do this, so I felt I should see what I could do to permit myself - after all, I can see the images displayed on my screen, so they're in the clear somewhere.

My first attempt to pull things off a website is the same as anyone else: look at the source (unhelpful), then look at the network traffic (just as unhelpful, but interesting).  The images don't exist as-is on the pipe anywhere - however, they do exist, as image scrambles with no simple pattern.

Next, I decided to look at the Document Object Model (DOM) to see if the page script itself was assembling the images in some coherent way - it was!

The pages were composed of a seemingly random number of canvas tags.

So, the simplest attack would have been to use the toDataURL method on - oho!  toDataURL was set to null!

This was simple enough to restore (recently I'd looked into sandboxing localStorage away from a potential attacker via the same means, only to find it functionally impossible).

Now that I had a data URL, I saw some very interesting stuff: the canvas images each contained a subset of the comic page in random staggers.  To get the full image, I'd need to copy the canvas data to a new canvas.  A really neat perk, though, was that they were creating the canvas at full resolution, then using CSS transforms to scale it down.  I could get full resolution images!

Teaching a script how to click the "Next" button and wait for the DOM changes that would signal the next page had loaded was easy enough.

Later I worked out that I could just trawl the page selector at the bottom - both to get the full set of pages and to choose each one.

I was able to use Chrome's FileSystem API to then save the composited images individually, but getting them back out was painful.

Even with Eli Grey's useful FileSaver.js, I'd get a bunch of JPG or PNG files - that's neat, but there had to be something that would be more "click a link, get a file."

Using Stuk's JSZip library, I found I could create a ZIP file in memory within the browser - I could just create a CBZ file!

I have a friend who's really into comics as well, and figured he might want to be able to archive his stuff.

So I built a small UI to let him select the quality of the downloaded CBZ (especially for longer comics; full resolution PNGs were averaging five megabytes a page, and a particular 165 page comic was crashing Chrome when attempting to build a CBZ file of almost a gig in size).

The finished, commented code is too lengthy to print here, but is available on the 2600 Code Repository (www.2600.com/code).

I share with a warning: They put quite a bit of work into preventing theft: encrypting the image data, shuffling it, splitting it between canvases, obfuscating their code, etc.

I didn't do any kind of analysis to see if they were embedding compression-resistant steganographic watermarks in the images to concretely identify me as the purchaser should my archives get out into the wild - but if I were the programmer on the project, it's something I'd have recommended to enable suit should my copyright be threatened by unchecked file sharing.

Don't help others steal things - but if you do, analyze the images to make sure it's not traceable to you as well.

if (!JSZip) {
	$('<scr' + 'ipt>').attr('src', 'https://stuk.github.io/jszip/dist/jszip.js').appendTo(document.body);
	$('<scr' + 'ipt>').attr('src', 'https://stuk.github.io/jszip/vendor/FileSaver.js').appendTo(document.body);
}
(function () {
	"use strict";
	var cxt = {};
	// If you left off on some other page, this clicks the "cancel" button for you.
	$('.continue-modal .button-cancel:visible').click();
	// Opens the page list so we can grab all the pages, and have links by which to control the active page
	$('#browse-btn').click();
	cxt.thumbs = [].slice.apply($('.thumbnails-list figure'));
	// Simple configuration for image quality; these are reasonable settings for reading on a phone or tablet.
	cxt.config = JSON.parse(localStorage.getItem('cxtConfig') || 'null') ||	{
		maxHeight: 1440,
		fileType: 'jpg',
		quality: 0.85
	};
	//  Sets and stores a configuration option
	cxt.setConfig = function (name, value) {
		if (cxt.config[name] !== value) {
			cxt.config[name] = value;
			localStorage.setItem('cxtConfig', JSON.stringify(cxt.config));
		}
	}
	cxt.mime = {
		png: 'image/png',
		jpg: 'image/jpeg'
	};
	cxt.fileSystem = null;
	
	
	// Reads the thumbnail map off the ComiXology UI
	cxt.getPages = function () {
		return cxt.thumbs.map(function (thumb) { return $(thumb).find('figcaption').text(); });
	};
	
	// Converts a page number into a filename for storage.
	cxt.getFilename = function (pageNumber) {
		return cxt.title.replace(/[^A-Za-z0-9_ \.]/g, '_') + ' - ' + pageNumber + '.' + cxt.config.fileType;
	};
	cxt.title = document.title.replace(' - comiXology', '');
	
	//  Get a copy of toDataURL from an off-page canvas
	cxt.toDataURL = (function () {
		var ifr = $('<iframe>').appendTo(document.body),
			tdu = $('<canvas>', ifr[0].contentDocument)[0].toDataURL;
		ifr.remove();
		return tdu;
	}());
	
	// Sets the current page, and resolves when the page has loaded
	cxt.setPage = function (pageNumber) {
		var def = new $.Deferred(),
			doResolve = function () {
				if ($('#reader .loading').is(':visible')) {
					setTimeout(doResolve, 0.125);
					return;
				}
				def.resolve();
			},
			options = {
				queue: true,
				done: doResolve,
				fail: doResolve
			};
		if (!cxt.thumbs[pageNumber - 1]) {
			def.reject(new Error("Page " + pageNumber + " does not exist; range is 1 to " + cxt.thumbs.length));
			return def.promise();
		}
		$(cxt.thumbs[pageNumber - 1]).click();
		setTimeout(function () {
			$('div.view').eq(1).animate({ opacity: 1 }, options);
			$('div.view').eq(0).animate({ opacity: 1 }, options);
		}, 10);
		return def.promise();
	};
	
	// Get a copy of Chrome's FileSystem API, for image storage
	cxt.getFS = function () {
		var def = new $.Deferred(),
			err = function (error) {
				def.reject(error);
			};
		if (cxt.fileSystem !== null) {
			def.resolve(cxt.fileSystem);
			return def.promise();
		}
		// Yes, 2 GB.  It could happen.
		webkitStorageInfo.requestQuota(PERSISTENT, 1024 * 1024 * 1024 * 2, function (grantedBytes) {
			webkitRequestFileSystem(PERSISTENT, grantedBytes, function (fs) {
				cxt.fileSystem = fs;
				def.resolve(fs);
			}, err);
		}, err);
		return def.promise();
	};
	
	
	// Get a FileEntry by filename.
	cxt.getFile = function (name, create) {
		var def = new $.Deferred();
		cxt.getFS().done(function (fs) {
			fs.root.getFile(name, { create: create }, function (entry) {
				def.resolve(entry);
			}, function (error) {
				def.reject(error);
			});
		}).fail(function (error) {
			def.reject(error);
		});
		return def.promise();
	};
	
	// Convert a Data URI to a binary blob for storage in the sandboxed FileSystem
	cxt.toBlob = function toBlob(dataURI) {
		var byteString,
			arrayBuffer,
			intArray,
			i,
			mimeString,
			bb,
			data = dataURI.split(',');
		if (data[0].indexOf('base64') >= 0) {
			byteString = atob(data[1]);
		} else {
			byteString = decodeURIComponent(data.slice(1).join(','));
		}
		arrayBuffer = new ArrayBuffer(byteString.length);
		intArray = new Uint8Array(arrayBuffer);
		for (i = 0; i < byteString.length; i += 1) {
			intArray[i] = byteString.charCodeAt(i);
		}
		mimeString = dataURI.split(',')[0].split(':')[1].split(';')[0];
		return new Blob(
			[intArray],
			{type: mimeString}
		);
	};
	
	// Go to a page and save it's page image to the sandboxed FileSystem; present a thumbnail from the FileSystem URL as proof.
	cxt.getImage = function (pageNumber) {
		var def = cxt.progressMeter(new $.Deferred(), "Fetching image " + pageNumber),
			fileName = cxt.getFilename(pageNumber);
		$(cxt.thumbs[pageNumber - 1]).click();
		cxt.setPage(pageNumber).done(function () {
			var fc, dw, dh, format,
				canv = $('canvas.no-select'),
				w = canv[0].attributes.width.value,
				h = canv[0].attributes.height.value,
				ttl = canv.length + 1,
				prog = 1;
			def.notify(prog, ttl);
			dw = w;
			dh = h;
			if (dh > (cxt.config.maxHeight || Infinity)) {
				dh = cxt.config.maxHeight;
				dw = Math.round(dh * w / h);
			}
			format = cxt.mime[cxt.config.fileType] || 'image/png';
			fc = $('<canvas>')
					.attr('width', dw)
					.attr('height', dh)[0];

			$('canvas.no-select').each(function () {
				fc.getContext('2d').drawImage(this, 0, 0, w, h, 0, 0, dw, dh);
				def.notify(prog, ttl);
				prog += 1;
			});
			cxt.store(fileName, cxt.toDataURL.call(fc, format, cxt.config.quality)).done(function (fileEntry) {
				var img = $('<div>').append(
						$('<img>')
							.attr({ src: fileEntry.toURL() })
							.css({ height: 144, display: 'block' }),
						$('<div>')
							.css({ font: '7pt cursive, sans-serif', color: '#EEE' })
							.text("Saved " + cxt.title + ", Page " + pageNumber)
					)
						.css({
							position: 'absolute',
							top: 0,
							right: 0,
							zIndex: 1000,
							boxShadow: '-5px 5px 10px -5px black',
							height: 144,
							opacity: 0,
							backgroundColor: '#333'

						})
						.appendTo(document.body);
				img.find('img').on('load', function () {
					//The image doesn't exist, far as we're concerned, until it can be displayed.
					def.resolve(fileName, fileEntry.toURL(), fileEntry);
					img.animate({ opacity: 1 }, { duration: 250, complete: function () {
						setTimeout(function () {
							img.fadeOut('slow', function () {
								img.remove();
							});
						}, 1500);
					}});
				});
			}).fail(function (error) {
				def.reject(error);
			});
		});
		return def.promise();
	};
	// Clear the sandboxed FileSystem
	cxt.clearFiles = function () {
		var def = cxt.progressMeter(new $.Deferred(), "Clearing files");
		cxt.getFS().done(function (fs) {
			fs.root.createReader().readEntries(function (entries) {
				$.when.apply($, [].map.call(entries, function (entry, index) {
					var eDef = new $.Deferred(),
						resolve = function () { 
							eDef.resolve(); 
							def.notify(index, entries.length); 
						},
						reject = function (e) { 
							eDef.reject(e); 
							def.notify(index, entries.length); 
						};
					if (!entry.isDirectory) {
						entry.remove(resolve, resolve);
					}
				})).done(function () {
					def.resolve();
				}).fail(function (e) {
					def.reject(e);
				});
			}, function (e) {
				def.reject(e);
			});
		});
		return def.promise();
	};
	// Store a file in the sandbox
	cxt.store = function (fileName, data) {
		var def = new $.Deferred();
		cxt.getFS().done(function (fs) {
			cxt.removeFile(fileName)
				.always(function () {
					cxt.getFile(fileName, true).done(function (fileEntry) {
						fileEntry.createWriter(function (writer) {
							writer.write(cxt.toBlob(data));
							def.resolve(fileEntry);
						}, function (error) {
							def.reject(error);
						});
					}).fail(function (error) {
						def.reject(error);
					});
				});
		}).fail(function (error) {
			def.reject(error);
		});
		return def.promise();
	};
	// Download a list of pages using the provided list of page numbers
	cxt.downloadPages = function (pageList) {
		var def = cxt.progressMeter(new $.Deferred(), "Downloading pages"),
			files = [],
			err = [];
		function downloadOne() {
			if (pageList.length) {
				var fn = cxt.getFilename(pageList[0]);
				cxt.getImage(pageList[0])
					.done(function (fileName) {
						pageList.shift();
						files.push(fileName);
						downloadOne();
					})
					.fail(function () {
						err.push(pageList.shift());
						downloadOne();
					});
			} else {
				if (err.length) {
					def.reject(new Error("Some pages failed to download"), err);
				} else {
					def.resolve(files);
				}
			}
		}
		setTimeout(downloadOne, 125);
		return def.promise();
	};
	// Download all of the comic's pages, create a CBZ file, and tell the browser to save it.
	cxt.downloadCBZ = function () {
		var zip = new JSZip(),
			def = new $.Deferred();
		cxt.downloadPages(cxt.getPages()).done(function (pages) {
			var p = pages.slice(),
				added = 0,
				ttl = p.length;
			cxt.progressMeter(def, "Creating CBZ file");
			function addOne() {
				if (def.state() !== 'pending') {
					return;
				}
				if (p.length > 0) {
					cxt.readFile(p[0])
						.done(function (data) {
							zip.file(p[0], data, { 
								binary: true, 
								base64: false 
							});
							p.shift();
							added += 1;
							def.notify(added, ttl);
							setTimeout(addOne, 10);
						}).fail(function (error) {
							def.reject(error);
						});
				} else {
					var content = zip.generate({ type: 'blob' });
					saveAs(content, cxt.title.replace(/[^A-Za-z0-9_ \.]/g, '_') + '.cbz');
					def.resolve();
				}
			}
			addOne();
		});;
		return def;
	};
	// Read a file from the sandbox
	cxt.readFile = function (fileName) {
		var def = new $.Deferred();
		cxt.getFile(fileName).done(function (fileEntry) {
			fileEntry.file(function (file) {
				var reader = new FileReader();
				reader.onloadend = function (e) {
					def.resolve(e.target.result);
				};
				reader.onerror = function (e) {
					def.reject(e);
				};
				reader.readAsBinaryString(file);
			}, function (e) {
				def.reject(e);
			});
		}).fail(function (e) {
			def.reject(e);
		});
		return def;
	};
	// ----- UI Stuff -----
	// Create a stylesheet from a set of rules
	cxt.createStylesheet = function (css) {
		function rulesToText(css) {
			return Object.keys(css).map(function (selector) {
				var rules = css[selector];
				if (selector[0] === '@') {
					return selector + ' {' + rulesToText(rules).replace(/^|\n/g, '\n\t') + '\n}';
				}
				return selector + ' {\n\t' + Object.keys(rules).map(function (ruleName) {
					var value = rules[ruleName],
						dashed = ruleName.replace(/[A-Z]/g, function (m) {
							return '-' + m.toLowerCase();
						});
					return dashed + ': ' + value;
				}).join(';\n\t') + ';\n}';
			}).join('\n');
		}
		return rulesToText(css);
	};
	// Apply a set of CSS rules
	cxt.applyRules = function (rules) {
		$('<style>').text(cxt.createStylesheet(rules)).appendTo(document.body);
	};
	// Just some pretty for our progressbars
	cxt.applyRules({
		"@WebkitKeyframes cxtProgressUnknown": {
			"from": {
				backgroundPosition: "0 0"
			},
			"to": {
				backgroundPosition: "-14px 0"
			}
		},
		".cxtProgressMeter.unknown": {
			backgroundImage: "linear-gradient(" + [
				"-45deg",
				"#f7f7f7",
				"#f7f7f7 5px",
				"#d6d6d6 5px",
				"#d6d6d6 10px",
				"#f7f7f7 10px",
				"#f7f7f7 15px",
				"#d6d6d6 15px",
				"#d6d6d6 20px"
			].join(', ') + ")",
			backgroundSize: "14px 14px",
			WebkitAnimationName: "cxtProgressUnknown",
			WebkitAnimationDuration: "1s",
			WebkitAnimationIterationCount: "infinite",
			WebkitAnimationTimingFunction: "linear"
		},
		".cxtProgressMeter": {
			position: 'absolute',
			zIndex: 1000,
			top: ($(document).height() / 2 - 24) + "px",
			left: ($(document).width() / 4) + "px",
			height: "16px",
			margin: "4px 0px",
			width: ($(document).width() / 2) + "px",
			backgroundColor: '#e6e6e6',
			boxShadow: 'black 0px 5px 40px -14px inset',
			opacity: 0.75,
			borderRadius: "8px"
		},
		".cxtProgressMeter div.bar": {
			width: 0,
			height: '100%',
			backgroundColor: 'green',
			borderRadius: "8px"
		},
		".cxtProgressMeter div.message": {
			width: "100%",
			textAlign: "center",
			color: "black",
			textShadow: "0px 0px 6px white",
			font: "10px \"Arial\", sans-serif",
			lineHeight: '16px',
			position: 'absolute',
			top: 0,
			left: 0
		},
		".cxtProgressMeter.error div": {
			backgroundColor: 'red'
		}
	});
	// Deferred-controlled progress bar with message
	cxt.progressMeter = function (def, message) {
		function positionMeters() {
			var meters = $('.cxtProgressMeter'),
				ttlHeight = [].reduce.call(meters, function (prev, cur) { return prev + $(cur).outerHeight(true); }, 0),
				newTop = $(document).height() / 2 - ttlHeight / 2;
			meters.each(function () {
				$(this).css({ top: newTop });
				newTop += $(this).outerHeight(true);
			});
		}
		var bar = $('<div>').addClass('bar'),
			msg = $('<div>').addClass('message').html(message),
			meter = $('<div>').addClass('cxtProgressMeter unknown').append(bar, msg).appendTo(document.body);
		positionMeters();
		def.progress(function (pct, ttl) {
			var pro = -1;
			if (!isNaN(pct)) {
				if (!isNaN(ttl)) {
					pro = pct / ttl;
				} else if (pct > 0 && pct <= 1) {
					pro = pct;
				} else if (pct > 0 && pct <= 100) {
					pro = pct / 100;
				}
			}
			if (pro > 0) {
				meter.removeClass('unknown');
				bar.width((pro * 100) + '%');
			}
		});
		def.done(function () {
			bar.addClass('complete');
		}).fail(function () {
			bar.addClass('error');
		}).always(function () {
			bar.width('100%');
			setTimeout(function () {
				meter.fadeOut('slow', function () {
					meter.remove();
					positionMeters();
				});
			}, 1000);
		});
		return def;
	};
	// Present the UI (uses ComiXology's own dialog)
	cxt.ui = function () {
		var dlg = $('<aside>')
			.addClass('modal vertical-center horizontal-center')
			.css({
				display: 'block',
				overflow: 'hidden'
			})
			.append(
				$('<header>').addClass('modal-header').append(
					$('<h2>').addClass('center-text').text('Setup CBZ Download')
				),
				$('<section>').addClass('modal-content').append(
					$('<div>').append(
						$('<label>').attr('for', 'cxtConfigMaxHeight').append(
							$('<div>').text("Size").css({ width: '50%', display: 'inline-block' }),
							$('<input>')
								.css({ width: "4em" })
								.attr('id', 'cxtConfigMaxHeight')
								.val(cxt.config.maxHeight)
								.on('change', function (e) {
									var v = Math.floor(parseFloat($(e.target).val()));
									if (isNaN(v)) { return; }
									cxt.setConfig('maxHeight', v);
								}),
							"px"
						)
					),
					$('<div>').append(
						$('<label>').attr('for', 'cxtConfigFileType').append(
							$('<div>').text("Type").css({ width: '50%', display: 'inline-block' }),
							$('<select>').attr('id', 'cxtConfigFileType')
								.append(
									$('<option>').text('jpg'),
									$('<option>').text('png')
								)
								.val(cxt.config.fileType)
								.on('change', function (e) {
									cxt.setConfig('fileType', $(e.target).val());
								})
						)
					),
					$('<div>').append(
						$('<label>').attr('for', 'cxtConfigQuality').append(
							$('<div>').text("Quality").css({ width: '50%', display: 'inline-block' }),
							$('<input>').attr('id', 'cxtConfigQuality')
								.css({ width: '2em' })
								.val(cxt.config.quality * 100)
								.on('change', function (e) {
									var v = Math.floor(parseFloat($(e.target).val()));
									if (isNaN(v)) { return; }
									if (v > 100 || v <= 0) { return; }
									cxt.setConfig('quality', v / 100);
								}),
							"%"
						)
					)
				),
				$('<section>').addClass('modal-actions right-text').append(
					$('<button>').addClass('button-cancel').text('Cancel')
						.on('click', function () {
							dlg.remove();
						}),
					$('<button>').addClass('button-action').text('Download')
						.on('click', function () {
							dlg.fadeOut('slow', function () {
								dlg.remove();
								cxt.clearFiles()
									.done(function () {
										cxt.downloadCBZ().done(function () {
											cxt.clearFiles();
										});
									});
							});

						})
				)
			)
			.appendTo(document.body);
	};
	// Waits for the "Continue from page X?" dialog to go away, then starts the UI.
	function start() {
		if ($('.continue-modal .button-cancel:visible').length) {
			setTimeout(start, 500);
			return;
		}
		cxt.ui();
	}
	start();
}());

Code: comixology-cxt.js

Return to $2600 Index