Last active
last monthJanuary 22, 2025 20:33
Save Safari website as PDF (with syncing cookies)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/swift | |
@preconcurrency import WebKit | |
@preconcurrency import Foundation | |
@preconcurrency import Darwin | |
// Disable everything written to stderr | |
freopen("/dev/null", "w", stderr) | |
struct Cookie { | |
var domain: String | |
var path: String | |
var secure: Bool | |
var expires: Date? | |
var name: String | |
var value: String | |
} | |
struct CookieFlags { | |
static let secure: Int = 0x01 | |
static let httpOnly: Int = 0x04 | |
} | |
extension Data { | |
func read(location:Int, length: Int) -> Data { | |
if location >= self.count || length <= 0 { | |
return Data() | |
} | |
let endLocation = location + length | |
if self.count < endLocation { | |
return self.subdata(in: location..<(self.count - location)) | |
} | |
return self.subdata(in: location..<(location + length)) | |
} | |
func string(encoding: String.Encoding) -> String? { | |
return String(data: self, encoding: encoding) | |
} | |
func readUntilZero(from startIndex: Int) -> Data { | |
let start = startIndex | |
// Find the next zero byte | |
guard let zeroIndex = self[start...].firstIndex(of: 0) else { | |
// If no zero byte is found, return the remainder of the data | |
return self[start...] | |
} | |
// Extract the data up to (but not including) the zero byte | |
let resultData = self[start..<zeroIndex] | |
// Return the data and the index after the zero byte | |
return Data(resultData) | |
} | |
} | |
class BinaryCookiesReader { | |
static func getCookies() -> [Cookie] { | |
let path = NSHomeDirectory().appending("/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies") | |
return self.readCookies(path: path) | |
} | |
static func getCookies(like domain:String) -> [Cookie] { | |
let path = NSHomeDirectory().appending("/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies") | |
return self.readCookies(path: path).filter { $0.domain.contains(domain) } | |
} | |
static func readCookies(path: String) -> [Cookie] { | |
guard let data = try? Data(contentsOf: URL(fileURLWithPath: path)) else { | |
return [] | |
} | |
return self.readCookies(data: data) | |
} | |
static func readCookies(data: Data) -> [Cookie] { | |
var curentLocation: Int = 0 | |
var cookieList:[Cookie] = [] | |
let file_header = data.read(location: curentLocation, length: 4) | |
curentLocation += 4 | |
if String(data: file_header , encoding: .utf8) != "cook" { | |
debugPrint("Not a Cookies.binarycookie file") | |
return cookieList | |
} | |
// pageCount | |
let pageCount = self.bytesToInt(data: data.read(location: curentLocation, length: 4) , isBe: true) | |
curentLocation += 4 | |
guard pageCount - 1 >= 0 else { | |
return cookieList | |
} | |
// pageSize of page | |
var pageSize:[Int] = [] | |
for _ in 0...(pageCount - 1) { | |
let size = self.bytesToInt(data: data.read(location: curentLocation, length: 4) , isBe: true) | |
pageSize.append(size) | |
curentLocation += 4 | |
} | |
// cookie | |
for i in 0...(pageCount - 1) { | |
let begin = data.read(location: curentLocation, length: pageSize[i]) | |
let cookies = self.handleCookieData(data: begin) | |
cookieList.append(contentsOf: cookies) | |
curentLocation += pageSize[i] | |
} | |
return cookieList | |
} | |
static func handleCookieData(data: Data) -> [Cookie] { | |
let cookieData = data | |
var tempLocation:Int = 0 | |
var cookieList:[Cookie] = [] | |
let pageHeader = self.bytesToInt(data: cookieData, isBe: true) | |
tempLocation += 4 | |
if pageHeader != 0x00000100 { | |
debugPrint("page header is error, not 0x00000100!") | |
return cookieList | |
} | |
let cookieCount = self.bytesToInt(data: cookieData.read(location: tempLocation, length: 4) , isBe: false) | |
tempLocation += 4 | |
if cookieCount - 1 >= 0 { | |
for _ in 0...(cookieCount - 1 ) { | |
let offset = self.bytesToInt(data: cookieData.read(location: tempLocation, length: 4), isBe: false) | |
tempLocation += 4 | |
let contentData = cookieData.read(location: offset, length: cookieData.count - offset) | |
var contentDataLocation = 0 | |
//let cookieSize = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false) | |
contentDataLocation += 4 | |
// let version = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false) | |
contentDataLocation += 4 | |
let flags = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false) | |
contentDataLocation += 4 | |
//let hasPort = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false) | |
contentDataLocation += 4 | |
let domain_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false) | |
contentDataLocation += 4 | |
let name_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false) | |
contentDataLocation += 4 | |
let path_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false) | |
contentDataLocation += 4 | |
let value_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false) | |
contentDataLocation += 4 | |
//let endofcookie = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 8), isBe: false) | |
contentDataLocation += 8 | |
let data:NSData = contentData.read(location: contentDataLocation, length: 8) as NSData | |
contentDataLocation += 8 | |
var out:double_t = 0; | |
memcpy(&out, data.bytes, MemoryLayout<double_t>.size); | |
let expiry_date_epoch = Int64(out) + Int64(978307200) | |
let expires:Date = Date(timeIntervalSince1970: TimeInterval(expiry_date_epoch)) | |
let domainData = contentData.readUntilZero(from: domain_offset) | |
let domain = String(data: domainData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? "" | |
let nameData = contentData.readUntilZero(from: name_offset) | |
let name = String(data: nameData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? "" | |
let pathData = contentData.readUntilZero(from: path_offset) | |
let path = String(data: pathData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? "" | |
let valueData = contentData.readUntilZero(from: value_offset) | |
let value = String(data: valueData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? "" | |
let cookie = Cookie( | |
domain: domain, | |
path: path, | |
secure: (flags & CookieFlags.secure) != 0, | |
expires: expires, | |
name: name, | |
value: value | |
) | |
cookieList.append(cookie) | |
} | |
} | |
return cookieList | |
} | |
static func bytesToInt(data: Data, isBe: Bool) -> Int { | |
if data.count < 4 { | |
return 0 | |
} | |
let temp = [UInt8](data) | |
var src:[UInt64] = [] | |
for item in temp { | |
src.append(UInt64(item)) | |
} | |
if isBe { | |
let first = (src[3] & 0xFF) | ((src[2] & 0xFF)<<8) | |
return Int(first | ((src[1] & 0xFF)<<16) | ((src[0] & 0xFF)<<24)) | |
} else { | |
let first = (src[0] & 0xFF) | ((src[1] & 0xFF)<<8) | |
return Int(first | ((src[2] & 0xFF)<<16) | ((src[3] & 0xFF)<<24)) | |
} | |
} | |
} | |
class SaveWebView: WKWebView, WKNavigationDelegate { | |
var contentHeight: CGFloat | |
private var loadCompletion: ((Error?) -> Void)? | |
override init(frame: CGRect, configuration: WKWebViewConfiguration) { | |
self.contentHeight = frame.height | |
super.init(frame: frame, configuration: configuration) | |
self.navigationDelegate = self | |
configuration.websiteDataStore = .default() | |
configuration.processPool = WKProcessPool() | |
} | |
required init?(coder: NSCoder) { | |
// Initialize for storyboard/xib usage | |
self.contentHeight = 0 | |
super.init(coder: coder) | |
self.navigationDelegate = self | |
} | |
func load(url: URL, completion: @escaping (Error?) -> Void) { | |
self.loadCompletion = completion | |
let request = URLRequest(url: url) | |
self.load(request) | |
} | |
func loadCookiesFromFile(path: String) -> [Cookie] { | |
guard let content = try? String(contentsOfFile: path, encoding: .utf8) else { | |
print("Could not read cookie file") | |
return [] | |
} | |
var cookies: [Cookie] = [] | |
let lines = content.components(separatedBy: .newlines) | |
for line in lines { | |
// Skip comments and empty lines | |
if line.starts(with: "#") || line.isEmpty { | |
continue | |
} | |
let parts = line.components(separatedBy: "\t") | |
if parts.count < 7 { | |
print("Invalid cookie format: \(line)") | |
continue | |
} | |
if parts.count >= 7 { | |
let domain = parts[0] | |
let path = parts[2] | |
let secure = parts[3] == "TRUE" | |
let expiryString = parts[4] | |
let name = parts[5] | |
let value = parts[6] | |
// Convert expiry timestamp | |
let expires = Double(expiryString).flatMap { Date(timeIntervalSince1970: $0) } | |
let cookie = Cookie( | |
domain: domain, | |
path: path, | |
secure: secure, | |
expires: expires, | |
name: name, | |
value: value | |
) | |
cookies.append(cookie) | |
} | |
} | |
return cookies | |
} | |
func syncCookiesToWebView(cookies: [Cookie], completion: @escaping () -> Void) { | |
let group = DispatchGroup() | |
for cookieData in cookies { | |
var properties: [HTTPCookiePropertyKey: Any] = [ | |
.domain: cookieData.domain as String, | |
.path: cookieData.path as String, | |
.name: cookieData.name as String, | |
.value: cookieData.value as String, | |
.secure: cookieData.secure as Bool | |
] | |
if let expires = cookieData.expires { | |
properties[.expires] = expires | |
} | |
if let cookie = HTTPCookie(properties: properties) { | |
group.enter() | |
self.configuration.websiteDataStore.httpCookieStore.setCookie(cookie) { | |
group.leave() | |
} | |
} | |
} | |
group.notify(queue: .main) { | |
completion() | |
} | |
} | |
func loadUrlWithFileCookies(url: URL, cookieFilePath: String, completion: @escaping (Error?) -> Void) { | |
let cookies = loadCookiesFromFile(path: cookieFilePath) | |
syncCookiesToWebView(cookies: cookies) { | |
self.load(url: url, completion: completion) | |
} | |
} | |
func loadUrlWithSyncedCookies(url: URL, completion: @escaping (Error?) -> Void) { | |
let cookies = BinaryCookiesReader.getCookies(like: url.host!) | |
syncCookiesToWebView(cookies: cookies) { | |
self.load(url: url, completion: completion) | |
} | |
} | |
func scrollPage(_ timeout: Int) { | |
let javascript = """ | |
(async function () { | |
const viewportHeight = window.innerHeight; | |
const totalHeight = document.documentElement.scrollHeight; | |
while (window.scrollY < totalHeight - viewportHeight) { | |
window.scrollBy({ | |
top: viewportHeight, | |
behavior: 'smooth' | |
}); | |
await new Promise(resolve => setTimeout(resolve, \(timeout))); | |
} | |
})(); | |
true; | |
""" | |
self.evaluateJavaScript(javascript) { (result, error) in | |
if let error = error { | |
print("Error preloading images: \(error.localizedDescription)") | |
} | |
} | |
} | |
/// Save a copy of the web view's contents as a webarchive file. | |
/// | |
/// This method will block until the webarchive has been saved, | |
/// or the save has failed for some reason. | |
func saveAsWebArchive(savePath: URL, contentHeight: CGFloat) { | |
var isSaving = true | |
// Create PDF configuration with full content size | |
let config = WKPDFConfiguration() | |
// Set the rect to cover the entire content | |
config.rect = CGRect(x: 0, y: 0, width: 900, height: contentHeight) | |
//self.createWebArchiveData(completionHandler: { result in | |
self.createPDF(configuration: config, completionHandler: { result in | |
do { | |
let data = try result.get() | |
try data.write( | |
to: savePath, | |
options: [Data.WritingOptions.withoutOverwriting] | |
) | |
isSaving = false | |
} catch { | |
fputs("Unable to save webarchive file: \(error.localizedDescription)\n", stderr) | |
exit(1) | |
}}) | |
while (isSaving) { | |
RunLoop.main.run(until: Date(timeIntervalSinceNow: 0.1)) | |
} | |
} | |
// WKNavigationDelegate Methods | |
func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) { | |
let group = DispatchGroup() | |
group.enter() | |
self.evaluateJavaScript("document.documentElement.scrollHeight") { (result, error) in | |
if let error = error { | |
print("Error getting scrollHeight: \(error.localizedDescription)") | |
} else { | |
self.contentHeight = result as! CGFloat | |
} | |
group.leave() | |
} | |
group.notify(queue: .main) { | |
self.loadCompletion?(nil) | |
self.loadCompletion = nil | |
} | |
} | |
func webView(_ webView: WKWebView, didFail navigation: WKNavigation!, withError error: Error) { | |
print("Navigation failed: \(error.localizedDescription)") | |
self.loadCompletion?(error) | |
self.loadCompletion = nil | |
} | |
func webView(_ webView: WKWebView, didFailProvisionalNavigation navigation: WKNavigation!, withError error: Error) { | |
print("didFailProvisionalNavigation") | |
self.loadCompletion?(error) | |
self.loadCompletion = nil | |
} | |
} | |
guard CommandLine.arguments.count == 3 else { | |
fputs("Usage: \(CommandLine.arguments[0]) <URL> <OUTPUT_PATH>\n", stderr) | |
exit(1) | |
} | |
var keepRunning = true | |
let frameHeight: Int = 1200 | |
let scrollTimeout: Double = 0.25 | |
let webView = SaveWebView(frame: CGRect(x: 0, y: 0, width: 900, height: frameHeight)) | |
let url = URL(string: CommandLine.arguments[1])! | |
let savePath = URL(fileURLWithPath: CommandLine.arguments[2]) | |
func wait(seconds: Double) { | |
let until = Date().addingTimeInterval(seconds) | |
repeat { | |
RunLoop.current.run(mode: .default, before: until) | |
} while Date() < until | |
} | |
webView.loadUrlWithSyncedCookies(url: url) { error in | |
if let error = error { | |
print("Failed to load the page: \(error.localizedDescription)") | |
} else { | |
wait(seconds: 5.0) | |
webView.scrollPage(Int(scrollTimeout * 1000)) | |
wait(seconds: ceil(Double(webView.contentHeight) / Double(frameHeight)) * scrollTimeout * 1.1) | |
//print(webView.title as Any) | |
webView.saveAsWebArchive(savePath: savePath, contentHeight: webView.contentHeight) | |
} | |
keepRunning = false | |
} | |
while keepRunning { | |
RunLoop.current.run(mode: .default, before: .distantFuture) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment