Skip to content

Instantly share code, notes, and snippets.

@mdbraber
Last active January 22, 2025 20:33
Save Safari website as PDF (with syncing cookies)
#!/usr/bin/swift
@preconcurrency import WebKit
@preconcurrency import Foundation
@preconcurrency import Darwin
// Disable everything written to stderr
freopen("/dev/null", "w", stderr)
struct Cookie {
var domain: String
var path: String
var secure: Bool
var expires: Date?
var name: String
var value: String
}
struct CookieFlags {
static let secure: Int = 0x01
static let httpOnly: Int = 0x04
}
extension Data {
func read(location:Int, length: Int) -> Data {
if location >= self.count || length <= 0 {
return Data()
}
let endLocation = location + length
if self.count < endLocation {
return self.subdata(in: location..<(self.count - location))
}
return self.subdata(in: location..<(location + length))
}
func string(encoding: String.Encoding) -> String? {
return String(data: self, encoding: encoding)
}
func readUntilZero(from startIndex: Int) -> Data {
let start = startIndex
// Find the next zero byte
guard let zeroIndex = self[start...].firstIndex(of: 0) else {
// If no zero byte is found, return the remainder of the data
return self[start...]
}
// Extract the data up to (but not including) the zero byte
let resultData = self[start..<zeroIndex]
// Return the data and the index after the zero byte
return Data(resultData)
}
}
class BinaryCookiesReader {
static func getCookies() -> [Cookie] {
let path = NSHomeDirectory().appending("/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies")
return self.readCookies(path: path)
}
static func getCookies(like domain:String) -> [Cookie] {
let path = NSHomeDirectory().appending("/Library/Containers/com.apple.Safari/Data/Library/Cookies/Cookies.binarycookies")
return self.readCookies(path: path).filter { $0.domain.contains(domain) }
}
static func readCookies(path: String) -> [Cookie] {
guard let data = try? Data(contentsOf: URL(fileURLWithPath: path)) else {
return []
}
return self.readCookies(data: data)
}
static func readCookies(data: Data) -> [Cookie] {
var curentLocation: Int = 0
var cookieList:[Cookie] = []
let file_header = data.read(location: curentLocation, length: 4)
curentLocation += 4
if String(data: file_header , encoding: .utf8) != "cook" {
debugPrint("Not a Cookies.binarycookie file")
return cookieList
}
// pageCount
let pageCount = self.bytesToInt(data: data.read(location: curentLocation, length: 4) , isBe: true)
curentLocation += 4
guard pageCount - 1 >= 0 else {
return cookieList
}
// pageSize of page
var pageSize:[Int] = []
for _ in 0...(pageCount - 1) {
let size = self.bytesToInt(data: data.read(location: curentLocation, length: 4) , isBe: true)
pageSize.append(size)
curentLocation += 4
}
// cookie
for i in 0...(pageCount - 1) {
let begin = data.read(location: curentLocation, length: pageSize[i])
let cookies = self.handleCookieData(data: begin)
cookieList.append(contentsOf: cookies)
curentLocation += pageSize[i]
}
return cookieList
}
static func handleCookieData(data: Data) -> [Cookie] {
let cookieData = data
var tempLocation:Int = 0
var cookieList:[Cookie] = []
let pageHeader = self.bytesToInt(data: cookieData, isBe: true)
tempLocation += 4
if pageHeader != 0x00000100 {
debugPrint("page header is error, not 0x00000100!")
return cookieList
}
let cookieCount = self.bytesToInt(data: cookieData.read(location: tempLocation, length: 4) , isBe: false)
tempLocation += 4
if cookieCount - 1 >= 0 {
for _ in 0...(cookieCount - 1 ) {
let offset = self.bytesToInt(data: cookieData.read(location: tempLocation, length: 4), isBe: false)
tempLocation += 4
let contentData = cookieData.read(location: offset, length: cookieData.count - offset)
var contentDataLocation = 0
//let cookieSize = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
// let version = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let flags = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
//let hasPort = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let domain_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let name_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let path_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
let value_offset = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 4), isBe: false)
contentDataLocation += 4
//let endofcookie = self.bytesToInt(data: contentData.read(location: contentDataLocation, length: 8), isBe: false)
contentDataLocation += 8
let data:NSData = contentData.read(location: contentDataLocation, length: 8) as NSData
contentDataLocation += 8
var out:double_t = 0;
memcpy(&out, data.bytes, MemoryLayout<double_t>.size);
let expiry_date_epoch = Int64(out) + Int64(978307200)
let expires:Date = Date(timeIntervalSince1970: TimeInterval(expiry_date_epoch))
let domainData = contentData.readUntilZero(from: domain_offset)
let domain = String(data: domainData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? ""
let nameData = contentData.readUntilZero(from: name_offset)
let name = String(data: nameData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? ""
let pathData = contentData.readUntilZero(from: path_offset)
let path = String(data: pathData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? ""
let valueData = contentData.readUntilZero(from: value_offset)
let value = String(data: valueData, encoding: .utf8)?.replacingOccurrences(of: "\0", with: "") ?? ""
let cookie = Cookie(
domain: domain,
path: path,
secure: (flags & CookieFlags.secure) != 0,
expires: expires,
name: name,
value: value
)
cookieList.append(cookie)
}
}
return cookieList
}
static func bytesToInt(data: Data, isBe: Bool) -> Int {
if data.count < 4 {
return 0
}
let temp = [UInt8](data)
var src:[UInt64] = []
for item in temp {
src.append(UInt64(item))
}
if isBe {
let first = (src[3] & 0xFF) | ((src[2] & 0xFF)<<8)
return Int(first | ((src[1] & 0xFF)<<16) | ((src[0] & 0xFF)<<24))
} else {
let first = (src[0] & 0xFF) | ((src[1] & 0xFF)<<8)
return Int(first | ((src[2] & 0xFF)<<16) | ((src[3] & 0xFF)<<24))
}
}
}
class SaveWebView: WKWebView, WKNavigationDelegate {
var contentHeight: CGFloat
private var loadCompletion: ((Error?) -> Void)?
override init(frame: CGRect, configuration: WKWebViewConfiguration) {
self.contentHeight = frame.height
super.init(frame: frame, configuration: configuration)
self.navigationDelegate = self
configuration.websiteDataStore = .default()
configuration.processPool = WKProcessPool()
}
required init?(coder: NSCoder) {
// Initialize for storyboard/xib usage
self.contentHeight = 0
super.init(coder: coder)
self.navigationDelegate = self
}
func load(url: URL, completion: @escaping (Error?) -> Void) {
self.loadCompletion = completion
let request = URLRequest(url: url)
self.load(request)
}
func loadCookiesFromFile(path: String) -> [Cookie] {
guard let content = try? String(contentsOfFile: path, encoding: .utf8) else {
print("Could not read cookie file")
return []
}
var cookies: [Cookie] = []
let lines = content.components(separatedBy: .newlines)
for line in lines {
// Skip comments and empty lines
if line.starts(with: "#") || line.isEmpty {
continue
}
let parts = line.components(separatedBy: "\t")
if parts.count < 7 {
print("Invalid cookie format: \(line)")
continue
}
if parts.count >= 7 {
let domain = parts[0]
let path = parts[2]
let secure = parts[3] == "TRUE"
let expiryString = parts[4]
let name = parts[5]
let value = parts[6]
// Convert expiry timestamp
let expires = Double(expiryString).flatMap { Date(timeIntervalSince1970: $0) }
let cookie = Cookie(
domain: domain,
path: path,
secure: secure,
expires: expires,
name: name,
value: value
)
cookies.append(cookie)
}
}
return cookies
}
func syncCookiesToWebView(cookies: [Cookie], completion: @escaping () -> Void) {
let group = DispatchGroup()
for cookieData in cookies {
var properties: [HTTPCookiePropertyKey: Any] = [
.domain: cookieData.domain as String,
.path: cookieData.path as String,
.name: cookieData.name as String,
.value: cookieData.value as String,
.secure: cookieData.secure as Bool
]
if let expires = cookieData.expires {
properties[.expires] = expires
}
if let cookie = HTTPCookie(properties: properties) {
group.enter()
self.configuration.websiteDataStore.httpCookieStore.setCookie(cookie) {
group.leave()
}
}
}
group.notify(queue: .main) {
completion()
}
}
func loadUrlWithFileCookies(url: URL, cookieFilePath: String, completion: @escaping (Error?) -> Void) {
let cookies = loadCookiesFromFile(path: cookieFilePath)
syncCookiesToWebView(cookies: cookies) {
self.load(url: url, completion: completion)
}
}
func loadUrlWithSyncedCookies(url: URL, completion: @escaping (Error?) -> Void) {
let cookies = BinaryCookiesReader.getCookies(like: url.host!)
syncCookiesToWebView(cookies: cookies) {
self.load(url: url, completion: completion)
}
}
func scrollPage(_ timeout: Int) {
let javascript = """
(async function () {
const viewportHeight = window.innerHeight;
const totalHeight = document.documentElement.scrollHeight;
while (window.scrollY < totalHeight - viewportHeight) {
window.scrollBy({
top: viewportHeight,
behavior: 'smooth'
});
await new Promise(resolve => setTimeout(resolve, \(timeout)));
}
})();
true;
"""
self.evaluateJavaScript(javascript) { (result, error) in
if let error = error {
print("Error preloading images: \(error.localizedDescription)")
}
}
}
/// Save a copy of the web view's contents as a webarchive file.
///
/// This method will block until the webarchive has been saved,
/// or the save has failed for some reason.
func saveAsWebArchive(savePath: URL, contentHeight: CGFloat) {
var isSaving = true
// Create PDF configuration with full content size
let config = WKPDFConfiguration()
// Set the rect to cover the entire content
config.rect = CGRect(x: 0, y: 0, width: 900, height: contentHeight)
//self.createWebArchiveData(completionHandler: { result in
self.createPDF(configuration: config, completionHandler: { result in
do {
let data = try result.get()
try data.write(
to: savePath,
options: [Data.WritingOptions.withoutOverwriting]
)
isSaving = false
} catch {
fputs("Unable to save webarchive file: \(error.localizedDescription)\n", stderr)
exit(1)
}})
while (isSaving) {
RunLoop.main.run(until: Date(timeIntervalSinceNow: 0.1))
}
}
// WKNavigationDelegate Methods
func webView(_ webView: WKWebView, didFinish navigation: WKNavigation!) {
let group = DispatchGroup()
group.enter()
self.evaluateJavaScript("document.documentElement.scrollHeight") { (result, error) in
if let error = error {
print("Error getting scrollHeight: \(error.localizedDescription)")
} else {
self.contentHeight = result as! CGFloat
}
group.leave()
}
group.notify(queue: .main) {
self.loadCompletion?(nil)
self.loadCompletion = nil
}
}
func webView(_ webView: WKWebView, didFail navigation: WKNavigation!, withError error: Error) {
print("Navigation failed: \(error.localizedDescription)")
self.loadCompletion?(error)
self.loadCompletion = nil
}
func webView(_ webView: WKWebView, didFailProvisionalNavigation navigation: WKNavigation!, withError error: Error) {
print("didFailProvisionalNavigation")
self.loadCompletion?(error)
self.loadCompletion = nil
}
}
guard CommandLine.arguments.count == 3 else {
fputs("Usage: \(CommandLine.arguments[0]) <URL> <OUTPUT_PATH>\n", stderr)
exit(1)
}
var keepRunning = true
let frameHeight: Int = 1200
let scrollTimeout: Double = 0.25
let webView = SaveWebView(frame: CGRect(x: 0, y: 0, width: 900, height: frameHeight))
let url = URL(string: CommandLine.arguments[1])!
let savePath = URL(fileURLWithPath: CommandLine.arguments[2])
func wait(seconds: Double) {
let until = Date().addingTimeInterval(seconds)
repeat {
RunLoop.current.run(mode: .default, before: until)
} while Date() < until
}
webView.loadUrlWithSyncedCookies(url: url) { error in
if let error = error {
print("Failed to load the page: \(error.localizedDescription)")
} else {
wait(seconds: 5.0)
webView.scrollPage(Int(scrollTimeout * 1000))
wait(seconds: ceil(Double(webView.contentHeight) / Double(frameHeight)) * scrollTimeout * 1.1)
//print(webView.title as Any)
webView.saveAsWebArchive(savePath: savePath, contentHeight: webView.contentHeight)
}
keepRunning = false
}
while keepRunning {
RunLoop.current.run(mode: .default, before: .distantFuture)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment