From 6f27d5c73ee74f24637f05857697b4152dcc9485 Mon Sep 17 00:00:00 2001 From: Bipin <87369440+bipinkrish@users.noreply.github.com> Date: Thu, 23 Feb 2023 12:39:53 +0530 Subject: [PATCH] Add files via upload --- decrypt/decodeEPUB.py | 314 ++++++ decrypt/decodePDF.py | 2309 ++++++++++++++++++++++++++++++++++++++ decrypt/zeroedzipinfo.py | 30 + 3 files changed, 2653 insertions(+) create mode 100644 decrypt/decodeEPUB.py create mode 100644 decrypt/decodePDF.py create mode 100644 decrypt/zeroedzipinfo.py diff --git a/decrypt/decodeEPUB.py b/decrypt/decodeEPUB.py new file mode 100644 index 0000000..048034a --- /dev/null +++ b/decrypt/decodeEPUB.py @@ -0,0 +1,314 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# ineptepub.py +# Copyright © 2009-2022 by i♥cabbages, Apprentice Harper et al. + +# Released under the terms of the GNU General Public Licence, version 3 +# + +""" +Decrypt Adobe Digital Editions encrypted ePub books. +""" + +KEYPATH = "adobekey.der" +__license__ = 'GPL v3' +__version__ = "8.0" + +import sys +import os +import traceback +import base64 +import zlib +import zipfile +from zipfile import ZipInfo, ZipFile, ZIP_STORED, ZIP_DEFLATED +from decrypt.zeroedzipinfo import ZeroedZipInfo +from contextlib import closing +from lxml import etree +from uuid import UUID +import hashlib + +try: + from Cryptodome.Cipher import AES, PKCS1_v1_5 + from Cryptodome.PublicKey import RSA +except ImportError: + from Crypto.Cipher import AES, PKCS1_v1_5 + from Crypto.PublicKey import RSA + + +def unpad(data, padding=16): + if sys.version_info[0] == 2: + pad_len = ord(data[-1]) + else: + pad_len = data[-1] + + return data[:-pad_len] + + +class ADEPTError(Exception): + pass + +class ADEPTNewVersionError(Exception): + pass + +META_NAMES = ('mimetype', 'META-INF/rights.xml') +NSMAP = {'adept': 'http://ns.adobe.com/adept', + 'enc': 'http://www.w3.org/2001/04/xmlenc#'} + +class Decryptor(object): + def __init__(self, bookkey, encryption): + enc = lambda tag: '{%s}%s' % (NSMAP['enc'], tag) + self._aes = AES.new(bookkey, AES.MODE_CBC, b'\x00'*16) + self._encryption = etree.fromstring(encryption) + self._encrypted = encrypted = set() + self._encryptedForceNoDecomp = encryptedForceNoDecomp = set() + self._otherData = otherData = set() + + self._json_elements_to_remove = json_elements_to_remove = set() + self._has_remaining_xml = False + expr = './%s/%s/%s' % (enc('EncryptedData'), enc('CipherData'), + enc('CipherReference')) + for elem in self._encryption.findall(expr): + path = elem.get('URI', None) + encryption_type_url = (elem.getparent().getparent().find("./%s" % (enc('EncryptionMethod'))).get('Algorithm', None)) + if path is not None: + if (encryption_type_url == "http://www.w3.org/2001/04/xmlenc#aes128-cbc"): + # Adobe + path = path.encode('utf-8') + encrypted.add(path) + json_elements_to_remove.add(elem.getparent().getparent()) + elif (encryption_type_url == "http://ns.adobe.com/adept/xmlenc#aes128-cbc-uncompressed"): + # Adobe uncompressed, for stuff like video files + path = path.encode('utf-8') + encryptedForceNoDecomp.add(path) + json_elements_to_remove.add(elem.getparent().getparent()) + else: + path = path.encode('utf-8') + otherData.add(path) + self._has_remaining_xml = True + + for elem in json_elements_to_remove: + elem.getparent().remove(elem) + + def check_if_remaining(self): + return self._has_remaining_xml + + def get_xml(self): + return "\n" + etree.tostring(self._encryption, encoding="utf-8", pretty_print=True, xml_declaration=False).decode("utf-8") + + + def decompress(self, bytes): + dc = zlib.decompressobj(-15) + try: + decompressed_bytes = dc.decompress(bytes) + ex = dc.decompress(b'Z') + dc.flush() + if ex: + decompressed_bytes = decompressed_bytes + ex + except: + # possibly not compressed by zip - just return bytes + return bytes + return decompressed_bytes + + def decrypt(self, path, data): + if path.encode('utf-8') in self._encrypted or path.encode('utf-8') in self._encryptedForceNoDecomp: + data = self._aes.decrypt(data)[16:] + if type(data[-1]) != int: + place = ord(data[-1]) + else: + place = data[-1] + data = data[:-place] + if not path.encode('utf-8') in self._encryptedForceNoDecomp: + data = self.decompress(data) + return data + +# check file to make check whether it's probably an Adobe Adept encrypted ePub +def adeptBook(inpath): + with closing(ZipFile(open(inpath, 'rb'))) as inf: + namelist = set(inf.namelist()) + if 'META-INF/rights.xml' not in namelist or \ + 'META-INF/encryption.xml' not in namelist: + return False + try: + rights = etree.fromstring(inf.read('META-INF/rights.xml')) + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + expr = './/%s' % (adept('encryptedKey'),) + bookkey = ''.join(rights.findtext(expr)) + if len(bookkey) in [192, 172, 64]: + return True + except: + # if we couldn't check, assume it is + return True + return False + +def isPassHashBook(inpath): + # If this is an Adobe book, check if it's a PassHash-encrypted book (B&N) + with closing(ZipFile(open(inpath, 'rb'))) as inf: + namelist = set(inf.namelist()) + if 'META-INF/rights.xml' not in namelist or \ + 'META-INF/encryption.xml' not in namelist: + return False + try: + rights = etree.fromstring(inf.read('META-INF/rights.xml')) + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + expr = './/%s' % (adept('encryptedKey'),) + bookkey = ''.join(rights.findtext(expr)) + if len(bookkey) == 64: + return True + except: + pass + + return False + +# Checks the license file and returns the UUID the book is licensed for. +# This is used so that the Calibre plugin can pick the correct decryption key +# first try without having to loop through all possible keys. +def adeptGetUserUUID(inpath): + with closing(ZipFile(open(inpath, 'rb'))) as inf: + try: + rights = etree.fromstring(inf.read('META-INF/rights.xml')) + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + expr = './/%s' % (adept('user'),) + user_uuid = ''.join(rights.findtext(expr)) + if user_uuid[:9] != "urn:uuid:": + return None + return user_uuid[9:] + except: + return None + +def removeHardening(rights, keytype, keydata): + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + textGetter = lambda name: ''.join(rights.findtext('.//%s' % (adept(name),))) + + # Gather what we need, and generate the IV + resourceuuid = UUID(textGetter("resource")) + deviceuuid = UUID(textGetter("device")) + fullfillmentuuid = UUID(textGetter("fulfillment")[:36]) + kekiv = UUID(int=resourceuuid.int ^ deviceuuid.int ^ fullfillmentuuid.int).bytes + + # Derive kek from just "keytype" + rem = int(keytype, 10) % 16 + H = hashlib.sha256(keytype.encode("ascii")).digest() + kek = H[2*rem : 16 + rem] + H[rem : 2*rem] + + return unpad(AES.new(kek, AES.MODE_CBC, kekiv).decrypt(keydata), 16) # PKCS#7 + +def decryptBook(userkey, inpath, outpath): + with closing(ZipFile(open(inpath, 'rb'))) as inf: + namelist = inf.namelist() + if 'META-INF/rights.xml' not in namelist or \ + 'META-INF/encryption.xml' not in namelist: + print("{0:s} is DRM-free.".format(os.path.basename(inpath))) + return 1 + for name in META_NAMES: + namelist.remove(name) + try: + rights = etree.fromstring(inf.read('META-INF/rights.xml')) + adept = lambda tag: '{%s}%s' % (NSMAP['adept'], tag) + expr = './/%s' % (adept('encryptedKey'),) + bookkeyelem = rights.find(expr) + bookkey = bookkeyelem.text + keytype = bookkeyelem.attrib.get('keyType', '0') + if len(bookkey) >= 172 and int(keytype, 10) > 2: + print("{0:s} is a secure Adobe Adept ePub with hardening.".format(os.path.basename(inpath))) + elif len(bookkey) == 172: + print("{0:s} is a secure Adobe Adept ePub.".format(os.path.basename(inpath))) + elif len(bookkey) == 64: + print("{0:s} is a secure Adobe PassHash (B&N) ePub.".format(os.path.basename(inpath))) + else: + print("{0:s} is not an Adobe-protected ePub!".format(os.path.basename(inpath))) + return 1 + + if len(bookkey) != 64: + # Normal or "hardened" Adobe ADEPT + rsakey = RSA.importKey(userkey) # parses the ASN1 structure + bookkey = base64.b64decode(bookkey) + if int(keytype, 10) > 2: + bookkey = removeHardening(rights, keytype, bookkey) + try: + bookkey = PKCS1_v1_5.new(rsakey).decrypt(bookkey, None) # automatically unpads + except ValueError: + bookkey = None + + if bookkey is None: + print("Could not decrypt {0:s}. Wrong key".format(os.path.basename(inpath))) + return 2 + else: + # Adobe PassHash / B&N + key = base64.b64decode(userkey)[:16] + bookkey = base64.b64decode(bookkey) + bookkey = unpad(AES.new(key, AES.MODE_CBC, b'\x00'*16).decrypt(bookkey), 16) # PKCS#7 + + if len(bookkey) > 16: + bookkey = bookkey[-16:] + + encryption = inf.read('META-INF/encryption.xml') + decryptor = Decryptor(bookkey, encryption) + kwds = dict(compression=ZIP_DEFLATED, allowZip64=False) + with closing(ZipFile(open(outpath, 'wb'), 'w', **kwds)) as outf: + + for path in (["mimetype"] + namelist): + data = inf.read(path) + zi = ZipInfo(path) + zi.compress_type=ZIP_DEFLATED + + if path == "mimetype": + zi.compress_type = ZIP_STORED + + elif path == "META-INF/encryption.xml": + # Check if there's still something in there + if (decryptor.check_if_remaining()): + data = decryptor.get_xml() + print("Adding encryption.xml for the remaining embedded files.") + # We removed DRM, but there's still stuff like obfuscated fonts. + else: + continue + + + try: + # get the file info, including time-stamp + oldzi = inf.getinfo(path) + # copy across useful fields + zi.date_time = oldzi.date_time + zi.comment = oldzi.comment + zi.extra = oldzi.extra + zi.internal_attr = oldzi.internal_attr + # external attributes are dependent on the create system, so copy both. + zi.external_attr = oldzi.external_attr + + zi.volume = oldzi.volume + zi.create_system = oldzi.create_system + zi.create_version = oldzi.create_version + + if any(ord(c) >= 128 for c in path) or any(ord(c) >= 128 for c in zi.comment): + # If the file name or the comment contains any non-ASCII char, set the UTF8-flag + zi.flag_bits |= 0x800 + except: + pass + + # Python 3 has a bug where the external_attr is reset to `0o600 << 16` + # if it's NULL, so we need a workaround: + if zi.external_attr == 0: + zi = ZeroedZipInfo(zi) + + + if path == "META-INF/encryption.xml": + outf.writestr(zi, data) + else: + outf.writestr(zi, decryptor.decrypt(path, data)) + except: + print("Could not decrypt {0:s} because of an exception:\n{1:s}".format(os.path.basename(inpath), traceback.format_exc())) + return 2 + return 0 + + +def decryptEPUB(inpath): + keypath = KEYPATH + outpath = os.path.basename(inpath).removesuffix(".epub") + "_decrypted.epub" + userkey = open(keypath,'rb').read() + result = decryptBook(userkey, inpath, outpath) + if result == 0: + print("Successfully decrypted") + return outpath + else: + print("Decryption failed") + return None diff --git a/decrypt/decodePDF.py b/decrypt/decodePDF.py new file mode 100644 index 0000000..d958877 --- /dev/null +++ b/decrypt/decodePDF.py @@ -0,0 +1,2309 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +# ineptpdf.py +# Copyright © 2009-2020 by i♥cabbages, Apprentice Harper et al. +# Copyright © 2021-2022 by noDRM et al. + +# Released under the terms of the GNU General Public Licence, version 3 +# + + +""" +Decrypts Adobe ADEPT-encrypted PDF files. +""" + +KEYPATH = "adobekey.der" +__license__ = 'GPL v3' +__version__ = "10.0.4" + +import codecs +import hashlib +import sys +import os +import re +import zlib +import struct +import binascii +import base64 +from io import BytesIO +from decimal import Decimal +import itertools +import xml.etree.ElementTree as etree +import traceback +from uuid import UUID + +try: + from Cryptodome.Cipher import AES, ARC4, PKCS1_v1_5 + from Cryptodome.PublicKey import RSA +except ImportError: + from Crypto.Cipher import AES, ARC4, PKCS1_v1_5 + from Crypto.PublicKey import RSA + + +def unpad(data, padding=16): + if sys.version_info[0] == 2: + pad_len = ord(data[-1]) + else: + pad_len = data[-1] + + return data[:-pad_len] + + +iswindows = sys.platform.startswith('win') +isosx = sys.platform.startswith('darwin') + +class ADEPTError(Exception): + pass + +class ADEPTInvalidPasswordError(Exception): + pass + +class ADEPTNewVersionError(Exception): + pass + +def SHA256(message): + return hashlib.sha256(message).digest() + +# Do we generate cross reference streams on output? +# 0 = never +# 1 = only if present in input +# 2 = always + +GEN_XREF_STM = 1 + +# This is the value for the current document +gen_xref_stm = False # will be set in PDFSerializer + +# PDF parsing routines from pdfminer, with changes for EBX_HANDLER + +# Utilities + +def choplist(n, seq): + '''Groups every n elements of the list.''' + r = [] + for x in seq: + r.append(x) + if len(r) == n: + yield tuple(r) + r = [] + return + +def nunpack(s, default=0): + '''Unpacks up to 4 bytes big endian.''' + l = len(s) + if not l: + return default + elif l == 1: + return ord(s) + elif l == 2: + return struct.unpack('>H', s)[0] + elif l == 3: + if sys.version_info[0] == 2: + return struct.unpack('>L', '\x00'+s)[0] + else: + return struct.unpack('>L', bytes([0]) + s)[0] + elif l == 4: + return struct.unpack('>L', s)[0] + else: + return TypeError('invalid length: %d' % l) + + +STRICT = 0 + + +# PS Exceptions + +class PSException(Exception): pass +class PSEOF(PSException): pass +class PSSyntaxError(PSException): pass +class PSTypeError(PSException): pass +class PSValueError(PSException): pass + + +# Basic PostScript Types + + +# PSLiteral +class PSObject(object): pass + +class PSLiteral(PSObject): + ''' + PS literals (e.g. "/Name"). + Caution: Never create these objects directly. + Use PSLiteralTable.intern() instead. + ''' + def __init__(self, name): + self.name = name.decode('utf-8') + return + + def __repr__(self): + name = [] + for char in self.name: + if not char.isalnum(): + char = '#%02x' % ord(char) + name.append(char) + return '/%s' % ''.join(name) + +# PSKeyword +class PSKeyword(PSObject): + ''' + PS keywords (e.g. "showpage"). + Caution: Never create these objects directly. + Use PSKeywordTable.intern() instead. + ''' + def __init__(self, name): + self.name = name.decode('utf-8') + return + + def __repr__(self): + return self.name + +# PSSymbolTable +class PSSymbolTable(object): + + ''' + Symbol table that stores PSLiteral or PSKeyword. + ''' + + def __init__(self, classe): + self.dic = {} + self.classe = classe + return + + def intern(self, name): + if name in self.dic: + lit = self.dic[name] + else: + lit = self.classe(name) + self.dic[name] = lit + return lit + +PSLiteralTable = PSSymbolTable(PSLiteral) +PSKeywordTable = PSSymbolTable(PSKeyword) +LIT = PSLiteralTable.intern +KWD = PSKeywordTable.intern +KEYWORD_BRACE_BEGIN = KWD(b'{') +KEYWORD_BRACE_END = KWD(b'}') +KEYWORD_ARRAY_BEGIN = KWD(b'[') +KEYWORD_ARRAY_END = KWD(b']') +KEYWORD_DICT_BEGIN = KWD(b'<<') +KEYWORD_DICT_END = KWD(b'>>') + + +def literal_name(x): + if not isinstance(x, PSLiteral): + if STRICT: + raise PSTypeError('Literal required: %r' % x) + else: + return str(x) + return x.name + +def keyword_name(x): + if not isinstance(x, PSKeyword): + if STRICT: + raise PSTypeError('Keyword required: %r' % x) + else: + return str(x) + return x.name + + +## PSBaseParser +## +EOL = re.compile(br'[\r\n]') +SPC = re.compile(br'\s') +NONSPC = re.compile(br'\S') +HEX = re.compile(br'[0-9a-fA-F]') +END_LITERAL = re.compile(br'[#/%\[\]()<>{}\s]') +END_HEX_STRING = re.compile(br'[^\s0-9a-fA-F]') +HEX_PAIR = re.compile(br'[0-9a-fA-F]{2}|.') +END_NUMBER = re.compile(br'[^0-9]') +END_KEYWORD = re.compile(br'[#/%\[\]()<>{}\s]') +END_STRING = re.compile(br'[()\\]') +OCT_STRING = re.compile(br'[0-7]') +ESC_STRING = { b'b':8, b't':9, b'n':10, b'f':12, b'r':13, b'(':40, b')':41, b'\\':92 } + +class EmptyArrayValue(object): + def __str__(self): + return "<>" + + +class PSBaseParser(object): + + ''' + Most basic PostScript parser that performs only basic tokenization. + ''' + BUFSIZ = 4096 + + def __init__(self, fp): + self.fp = fp + self.seek(0) + return + + def __repr__(self): + return '' % (self.fp, self.bufpos) + + def flush(self): + return + + def close(self): + self.flush() + return + + def tell(self): + return self.bufpos+self.charpos + + def poll(self, pos=None, n=80): + pos0 = self.fp.tell() + if not pos: + pos = self.bufpos+self.charpos + self.fp.seek(pos) + self.fp.seek(pos0) + return + + def seek(self, pos): + ''' + Seeks the parser to the given position. + ''' + self.fp.seek(pos) + # reset the status for nextline() + self.bufpos = pos + self.buf = b'' + self.charpos = 0 + # reset the status for nexttoken() + self.parse1 = self.parse_main + self.tokens = [] + return + + def fillbuf(self): + if self.charpos < len(self.buf): return + # fetch next chunk. + self.bufpos = self.fp.tell() + self.buf = self.fp.read(self.BUFSIZ) + if not self.buf: + raise PSEOF('Unexpected EOF') + self.charpos = 0 + return + + def parse_main(self, s, i): + m = NONSPC.search(s, i) + if not m: + return (self.parse_main, len(s)) + j = m.start(0) + if isinstance(s[j], str): + # Python 2 + c = s[j] + else: + # Python 3 + c = bytes([s[j]]) + self.tokenstart = self.bufpos+j + if c == b'%': + self.token = c + return (self.parse_comment, j+1) + if c == b'/': + self.token = b'' + return (self.parse_literal, j+1) + if c in b'-+' or c.isdigit(): + self.token = c + return (self.parse_number, j+1) + if c == b'.': + self.token = c + return (self.parse_decimal, j+1) + if c.isalpha(): + self.token = c + return (self.parse_keyword, j+1) + if c == b'(': + self.token = b'' + self.paren = 1 + return (self.parse_string, j+1) + if c == b'<': + self.token = b'' + return (self.parse_wopen, j+1) + if c == b'>': + self.token = b'' + return (self.parse_wclose, j+1) + self.add_token(KWD(c)) + return (self.parse_main, j+1) + + def add_token(self, obj): + self.tokens.append((self.tokenstart, obj)) + return + + def parse_comment(self, s, i): + m = EOL.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_comment, len(s)) + j = m.start(0) + self.token += s[i:j] + # We ignore comments. + #self.tokens.append(self.token) + return (self.parse_main, j) + + def parse_literal(self, s, i): + m = END_LITERAL.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_literal, len(s)) + j = m.start(0) + self.token += s[i:j] + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'#': + self.hex = b'' + return (self.parse_literal_hex, j+1) + self.add_token(LIT(self.token)) + return (self.parse_main, j) + + def parse_literal_hex(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if HEX.match(c) and len(self.hex) < 2: + self.hex += c + return (self.parse_literal_hex, i+1) + if self.hex: + if sys.version_info[0] == 2: + self.token += chr(int(self.hex, 16)) + else: + self.token += bytes([int(self.hex, 16)]) + return (self.parse_literal, i) + + def parse_number(self, s, i): + m = END_NUMBER.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_number, len(s)) + j = m.start(0) + self.token += s[i:j] + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'.': + self.token += c + return (self.parse_decimal, j+1) + try: + self.add_token(int(self.token)) + except ValueError: + pass + return (self.parse_main, j) + + def parse_decimal(self, s, i): + m = END_NUMBER.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_decimal, len(s)) + j = m.start(0) + self.token += s[i:j] + self.add_token(Decimal(self.token.decode('utf-8'))) + return (self.parse_main, j) + + def parse_keyword(self, s, i): + m = END_KEYWORD.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_keyword, len(s)) + j = m.start(0) + self.token += s[i:j] + if self.token == 'true': + token = True + elif self.token == 'false': + token = False + else: + token = KWD(self.token) + self.add_token(token) + return (self.parse_main, j) + + def parse_string(self, s, i): + m = END_STRING.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_string, len(s)) + j = m.start(0) + self.token += s[i:j] + if isinstance(s[j], str): + c = s[j] + else: + c = bytes([s[j]]) + if c == b'\\': + self.oct = '' + return (self.parse_string_1, j+1) + if c == b'(': + self.paren += 1 + self.token += c + return (self.parse_string, j+1) + if c == b')': + self.paren -= 1 + if self.paren: + self.token += c + return (self.parse_string, j+1) + self.add_token(self.token) + return (self.parse_main, j+1) + + def parse_string_1(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if OCT_STRING.match(c) and len(self.oct) < 3: + self.oct += c + return (self.parse_string_1, i+1) + if self.oct: + if sys.version_info[0] == 2: + self.token += chr(int(self.oct, 8)) + else: + self.token += bytes([int(self.oct, 8)]) + return (self.parse_string, i) + if c in ESC_STRING: + + if sys.version_info[0] == 2: + self.token += chr(ESC_STRING[c]) + else: + self.token += bytes([ESC_STRING[c]]) + + return (self.parse_string, i+1) + + def parse_wopen(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if c.isspace() or HEX.match(c): + return (self.parse_hexstring, i) + if c == b'<': + self.add_token(KEYWORD_DICT_BEGIN) + i += 1 + if c == b'>': + # Empty array without any contents. Why though? + # We need to add some dummy python object that will serialize to + # nothing, otherwise the code removes the whole array. + self.add_token(EmptyArrayValue()) + i += 1 + + return (self.parse_main, i) + + def parse_wclose(self, s, i): + if isinstance(s[i], str): + c = s[i] + else: + c = bytes([s[i]]) + if c == b'>': + self.add_token(KEYWORD_DICT_END) + i += 1 + return (self.parse_main, i) + + def parse_hexstring(self, s, i): + m = END_HEX_STRING.search(s, i) + if not m: + self.token += s[i:] + return (self.parse_hexstring, len(s)) + j = m.start(0) + self.token += s[i:j] + if sys.version_info[0] == 2: + token = HEX_PAIR.sub(lambda m: chr(int(m.group(0), 16)), + SPC.sub('', self.token)) + else: + token = HEX_PAIR.sub(lambda m: bytes([int(m.group(0), 16)]), + SPC.sub(b'', self.token)) + self.add_token(token) + return (self.parse_main, j) + + def nexttoken(self): + while not self.tokens: + self.fillbuf() + (self.parse1, self.charpos) = self.parse1(self.buf, self.charpos) + token = self.tokens.pop(0) + return token + + def nextline(self): + ''' + Fetches a next line that ends either with \\r or \\n. + ''' + linebuf = b'' + linepos = self.bufpos + self.charpos + eol = False + while 1: + self.fillbuf() + if eol: + if sys.version_info[0] == 2: + c = self.buf[self.charpos] + else: + c = bytes([self.buf[self.charpos]]) + + # handle '\r\n' + if c == b'\n': + linebuf += c + self.charpos += 1 + break + m = EOL.search(self.buf, self.charpos) + if m: + linebuf += self.buf[self.charpos:m.end(0)] + self.charpos = m.end(0) + if sys.version_info[0] == 2: + if linebuf[-1] == b'\r': + eol = True + else: + break + else: + if bytes([linebuf[-1]]) == b'\r': + eol = True + else: + break + + else: + linebuf += self.buf[self.charpos:] + self.charpos = len(self.buf) + return (linepos, linebuf) + + def revreadlines(self): + ''' + Fetches a next line backword. This is used to locate + the trailers at the end of a file. + ''' + self.fp.seek(0, 2) + pos = self.fp.tell() + buf = b'' + while 0 < pos: + prevpos = pos + pos = max(0, pos-self.BUFSIZ) + self.fp.seek(pos) + s = self.fp.read(prevpos-pos) + if not s: break + while 1: + n = max(s.rfind(b'\r'), s.rfind(b'\n')) + if n == -1: + buf = s + buf + break + yield s[n:]+buf + s = s[:n] + buf = b'' + return + + +## PSStackParser +## +class PSStackParser(PSBaseParser): + + def __init__(self, fp): + PSBaseParser.__init__(self, fp) + self.reset() + return + + def reset(self): + self.context = [] + self.curtype = None + self.curstack = [] + self.results = [] + return + + def seek(self, pos): + PSBaseParser.seek(self, pos) + self.reset() + return + + def push(self, *objs): + self.curstack.extend(objs) + return + def pop(self, n): + objs = self.curstack[-n:] + self.curstack[-n:] = [] + return objs + def popall(self): + objs = self.curstack + self.curstack = [] + return objs + def add_results(self, *objs): + self.results.extend(objs) + return + + def start_type(self, pos, type): + self.context.append((pos, self.curtype, self.curstack)) + (self.curtype, self.curstack) = (type, []) + return + def end_type(self, type): + if self.curtype != type: + raise PSTypeError('Type mismatch: %r != %r' % (self.curtype, type)) + objs = [ obj for (_,obj) in self.curstack ] + (pos, self.curtype, self.curstack) = self.context.pop() + return (pos, objs) + + def do_keyword(self, pos, token): + return + + def nextobject(self, direct=False): + ''' + Yields a list of objects: keywords, literals, strings (byte arrays), + numbers, arrays and dictionaries. Arrays and dictionaries + are represented as Python sequence and dictionaries. + ''' + while not self.results: + (pos, token) = self.nexttoken() + if (isinstance(token, int) or + isinstance(token, Decimal) or + isinstance(token, bool) or + isinstance(token, bytearray) or + isinstance(token, bytes) or + isinstance(token, str) or + isinstance(token, PSLiteral)): + # normal token + self.push((pos, token)) + elif token == KEYWORD_ARRAY_BEGIN: + # begin array + self.start_type(pos, 'a') + elif token == KEYWORD_ARRAY_END: + # end array + try: + self.push(self.end_type('a')) + except PSTypeError: + if STRICT: raise + elif token == KEYWORD_DICT_BEGIN: + # begin dictionary + self.start_type(pos, 'd') + elif token == KEYWORD_DICT_END: + # end dictionary + try: + (pos, objs) = self.end_type('d') + if len(objs) % 2 != 0: + print("Incomplete dictionary construct") + objs.append("") # this isn't necessary. + # temporary fix. is this due to rental books? + # raise PSSyntaxError( + # 'Invalid dictionary construct: %r' % objs) + d = dict((literal_name(k), v) \ + for (k,v) in choplist(2, objs)) + self.push((pos, d)) + except PSTypeError: + if STRICT: raise + else: + self.do_keyword(pos, token) + if self.context: + continue + else: + if direct: + return self.pop(1)[0] + self.flush() + obj = self.results.pop(0) + return obj + + +LITERAL_CRYPT = LIT(b'Crypt') +LITERALS_FLATE_DECODE = (LIT(b'FlateDecode'), LIT(b'Fl')) +LITERALS_LZW_DECODE = (LIT(b'LZWDecode'), LIT(b'LZW')) +LITERALS_ASCII85_DECODE = (LIT(b'ASCII85Decode'), LIT(b'A85')) + + +## PDF Objects +## +class PDFObject(PSObject): pass + +class PDFException(PSException): pass +class PDFTypeError(PDFException): pass +class PDFValueError(PDFException): pass +class PDFNotImplementedError(PSException): pass + + +## PDFObjRef +## +class PDFObjRef(PDFObject): + + def __init__(self, doc, objid, genno): + if objid == 0: + if STRICT: + raise PDFValueError('PDF object id cannot be 0.') + self.doc = doc + self.objid = objid + self.genno = genno + return + + def __repr__(self): + return '' % (self.objid, self.genno) + + def resolve(self): + return self.doc.getobj(self.objid) + + +# resolve +def resolve1(x): + ''' + Resolve an object. If this is an array or dictionary, + it may still contains some indirect objects inside. + ''' + while isinstance(x, PDFObjRef): + x = x.resolve() + return x + +def resolve_all(x): + ''' + Recursively resolve X and all the internals. + Make sure there is no indirect reference within the nested object. + This procedure might be slow. + ''' + while isinstance(x, PDFObjRef): + x = x.resolve() + if isinstance(x, list): + x = [ resolve_all(v) for v in x ] + elif isinstance(x, dict): + for (k,v) in iter(x.items()): + x[k] = resolve_all(v) + return x + +def decipher_all(decipher, objid, genno, x): + ''' + Recursively decipher X. + ''' + if isinstance(x, bytearray) or isinstance(x,bytes) or isinstance(x,str): + return decipher(objid, genno, x) + decf = lambda v: decipher_all(decipher, objid, genno, v) + if isinstance(x, list): + x = [decf(v) for v in x] + elif isinstance(x, dict): + x = dict((k, decf(v)) for (k, v) in iter(x.items())) + return x + + +# Type cheking +def int_value(x): + x = resolve1(x) + if not isinstance(x, int): + if STRICT: + raise PDFTypeError('Integer required: %r' % x) + return 0 + return x + +def decimal_value(x): + x = resolve1(x) + if not isinstance(x, Decimal): + if STRICT: + raise PDFTypeError('Decimal required: %r' % x) + return 0.0 + return x + +def num_value(x): + x = resolve1(x) + if not (isinstance(x, int) or isinstance(x, Decimal)): + if STRICT: + raise PDFTypeError('Int or Float required: %r' % x) + return 0 + return x + +def str_value(x): + x = resolve1(x) + if not (isinstance(x, bytearray) or isinstance(x, bytes) or isinstance(x, str)): + if STRICT: + raise PDFTypeError('String required: %r' % x) + return '' + return x + +def list_value(x): + x = resolve1(x) + if not (isinstance(x, list) or isinstance(x, tuple)): + if STRICT: + raise PDFTypeError('List required: %r' % x) + return [] + return x + +def dict_value(x): + x = resolve1(x) + if not isinstance(x, dict): + if STRICT: + raise PDFTypeError('Dict required: %r' % x) + return {} + return x + +def stream_value(x): + x = resolve1(x) + if not isinstance(x, PDFStream): + if STRICT: + raise PDFTypeError('PDFStream required: %r' % x) + return PDFStream({}, '') + return x + +# ascii85decode(data) +def ascii85decode(data): + n = b = 0 + out = b'' + for c in data: + if b'!' <= c and c <= b'u': + n += 1 + b = b*85+(c-33) + if n == 5: + out += struct.pack('>L',b) + n = b = 0 + elif c == b'z': + assert n == 0 + out += b'\0\0\0\0' + elif c == b'~': + if n: + for _ in range(5-n): + b = b*85+84 + out += struct.pack('>L',b)[:n-1] + break + return out + + +## PDFStream type +class PDFStream(PDFObject): + def __init__(self, dic, rawdata, decipher=None): + length = int_value(dic.get('Length', 0)) + eol = rawdata[length:] + # quick and dirty fix for false length attribute, + # might not work if the pdf stream parser has a problem + if decipher != None and decipher.__name__ == 'decrypt_aes': + if (len(rawdata) % 16) != 0: + cutdiv = len(rawdata) // 16 + rawdata = rawdata[:16*cutdiv] + else: + if eol in (b'\r', b'\n', b'\r\n'): + rawdata = rawdata[:length] + + self.dic = dic + self.rawdata = rawdata + self.decipher = decipher + self.data = None + self.decdata = None + self.objid = None + self.genno = None + return + + def set_objid(self, objid, genno): + self.objid = objid + self.genno = genno + return + + def __repr__(self): + if self.rawdata: + return '' % \ + (self.objid, len(self.rawdata), self.dic) + else: + return '' % \ + (self.objid, len(self.data), self.dic) + + def decode(self): + assert self.data is None and self.rawdata is not None + data = self.rawdata + if self.decipher: + # Handle encryption + data = self.decipher(self.objid, self.genno, data) + if gen_xref_stm: + self.decdata = data # keep decrypted data + if 'Filter' not in self.dic: + self.data = data + self.rawdata = None + return + filters = self.dic['Filter'] + if not isinstance(filters, list): + filters = [ filters ] + for f in filters: + if f in LITERALS_FLATE_DECODE: + # will get errors if the document is encrypted. + data = zlib.decompress(data) + # elif f in LITERALS_LZW_DECODE: + # data = b''.join(LZWDecoder(BytesIO(data)).run()) + elif f in LITERALS_ASCII85_DECODE: + data = ascii85decode(data) + elif f == LITERAL_CRYPT: + raise PDFNotImplementedError('/Crypt filter is unsupported') + else: + raise PDFNotImplementedError('Unsupported filter: %r' % f) + # apply predictors + if 'DP' in self.dic: + params = self.dic['DP'] + else: + params = self.dic.get('DecodeParms', {}) + if 'Predictor' in params: + pred = int_value(params['Predictor']) + if pred: + if pred != 12: + raise PDFNotImplementedError( + 'Unsupported predictor: %r' % pred) + if 'Columns' not in params: + raise PDFValueError( + 'Columns undefined for predictor=12') + columns = int_value(params['Columns']) + buf = b'' + ent0 = b'\x00' * columns + for i in range(0, len(data), columns+1): + pred = data[i] + ent1 = data[i+1:i+1+columns] + if sys.version_info[0] == 2: + if pred == '\x02': + ent1 = ''.join(chr((ord(a)+ord(b)) & 255) \ + for (a,b) in zip(ent0,ent1)) + else: + if pred == 2: + ent1 = b''.join(bytes([(a+b) & 255]) \ + for (a,b) in zip(ent0,ent1)) + buf += ent1 + ent0 = ent1 + data = buf + self.data = data + self.rawdata = None + return + + def get_data(self): + if self.data is None: + self.decode() + return self.data + + def get_rawdata(self): + return self.rawdata + + def get_decdata(self): + if self.decdata is not None: + return self.decdata + data = self.rawdata + if self.decipher and data: + # Handle encryption + data = self.decipher(self.objid, self.genno, data) + return data + + +## PDF Exceptions +## +class PDFSyntaxError(PDFException): pass +class PDFNoValidXRef(PDFSyntaxError): pass +class PDFEncryptionError(PDFException): pass +class PDFPasswordIncorrect(PDFEncryptionError): pass + +# some predefined literals and keywords. +LITERAL_OBJSTM = LIT(b'ObjStm') +LITERAL_XREF = LIT(b'XRef') +LITERAL_PAGE = LIT(b'Page') +LITERAL_PAGES = LIT(b'Pages') +LITERAL_CATALOG = LIT(b'Catalog') + + +## XRefs +## + +## PDFXRef +## +class PDFXRef(object): + + def __init__(self): + self.offsets = None + return + + def __repr__(self): + return '' % len(self.offsets) + + def objids(self): + return iter(self.offsets.keys()) + + def load(self, parser): + self.offsets = {} + while 1: + try: + (pos, line) = parser.nextline() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF - file corrupted?') + if not line: + raise PDFNoValidXRef('Premature eof: %r' % parser) + if line.startswith(b'trailer'): + parser.seek(pos) + break + f = line.strip().split(b' ') + if len(f) != 2: + raise PDFNoValidXRef('Trailer not found: %r: line=%r' % (parser, line)) + try: + (start, nobjs) = map(int, f) + except ValueError: + raise PDFNoValidXRef('Invalid line: %r: line=%r' % (parser, line)) + for objid in range(start, start+nobjs): + try: + (_, line) = parser.nextline() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF - file corrupted?') + f = line.strip().split(b' ') + if len(f) != 3: + raise PDFNoValidXRef('Invalid XRef format: %r, line=%r' % (parser, line)) + (pos, genno, use) = f + if use != b'n': + continue + self.offsets[objid] = (int(genno.decode('utf-8')), int(pos.decode('utf-8'))) + self.load_trailer(parser) + return + + KEYWORD_TRAILER = KWD(b'trailer') + def load_trailer(self, parser): + try: + (_,kwd) = parser.nexttoken() + assert kwd is self.KEYWORD_TRAILER + (_,dic) = parser.nextobject(direct=True) + except PSEOF: + x = parser.pop(1) + if not x: + raise PDFNoValidXRef('Unexpected EOF - file corrupted') + (_,dic) = x[0] + self.trailer = dict_value(dic) + return + + def getpos(self, objid): + try: + (genno, pos) = self.offsets[objid] + except KeyError: + raise + return (None, pos) + + +## PDFXRefStream +## +class PDFXRefStream(object): + + def __init__(self): + self.index = None + self.data = None + self.entlen = None + self.fl1 = self.fl2 = self.fl3 = None + return + + def __repr__(self): + return '' % self.index + + def objids(self): + for first, size in self.index: + for objid in range(first, first + size): + yield objid + + def load(self, parser, debug=0): + (_,objid) = parser.nexttoken() # ignored + (_,genno) = parser.nexttoken() # ignored + (_,kwd) = parser.nexttoken() + (_,stream) = parser.nextobject() + if not isinstance(stream, PDFStream) or \ + stream.dic['Type'] is not LITERAL_XREF: + raise PDFNoValidXRef('Invalid PDF stream spec.') + size = stream.dic['Size'] + index = stream.dic.get('Index', (0,size)) + self.index = list(zip(itertools.islice(index, 0, None, 2), + itertools.islice(index, 1, None, 2))) + (self.fl1, self.fl2, self.fl3) = stream.dic['W'] + self.data = stream.get_data() + self.entlen = self.fl1+self.fl2+self.fl3 + self.trailer = stream.dic + return + + def getpos(self, objid): + offset = 0 + for first, size in self.index: + if first <= objid and objid < (first + size): + break + offset += size + else: + raise KeyError(objid) + i = self.entlen * ((objid - first) + offset) + ent = self.data[i:i+self.entlen] + f1 = nunpack(ent[:self.fl1], 1) + if f1 == 1: + pos = nunpack(ent[self.fl1:self.fl1+self.fl2]) + genno = nunpack(ent[self.fl1+self.fl2:]) + return (None, pos) + elif f1 == 2: + objid = nunpack(ent[self.fl1:self.fl1+self.fl2]) + index = nunpack(ent[self.fl1+self.fl2:]) + return (objid, index) + # this is a free object + raise KeyError(objid) + + +## PDFDocument +## +## A PDFDocument object represents a PDF document. +## Since a PDF file is usually pretty big, normally it is not loaded +## at once. Rather it is parsed dynamically as processing goes. +## A PDF parser is associated with the document. +## +class PDFDocument(object): + + def __init__(self): + self.xrefs = [] + self.objs = {} + self.parsed_objs = {} + self.root = None + self.catalog = None + self.parser = None + self.encryption = None + self.decipher = None + return + + # set_parser(parser) + # Associates the document with an (already initialized) parser object. + def set_parser(self, parser): + if self.parser: + return + self.parser = parser + # The document is set to be temporarily ready during collecting + # all the basic information about the document, e.g. + # the header, the encryption information, and the access rights + # for the document. + self.ready = True + # Retrieve the information of each header that was appended + # (maybe multiple times) at the end of the document. + self.xrefs = parser.read_xref() + for xref in self.xrefs: + trailer = xref.trailer + if not trailer: continue + # If there's an encryption info, remember it. + if 'Encrypt' in trailer: + #assert not self.encryption + try: + self.encryption = (list_value(trailer['ID']), + dict_value(trailer['Encrypt'])) + # fix for bad files + except: + self.encryption = (b'ffffffffffffffffffffffffffffffffffff', + dict_value(trailer['Encrypt'])) + if 'Root' in trailer: + self.set_root(dict_value(trailer['Root'])) + break + else: + raise PDFSyntaxError('No /Root object! - Is this really a PDF?') + # The document is set to be non-ready again, until all the + # proper initialization (asking the password key and + # verifying the access permission, so on) is finished. + self.ready = False + return + + # set_root(root) + # Set the Root dictionary of the document. + # Each PDF file must have exactly one /Root dictionary. + def set_root(self, root): + self.root = root + self.catalog = dict_value(self.root) + if self.catalog.get('Type') is not LITERAL_CATALOG: + if STRICT: + raise PDFSyntaxError('Catalog not found!') + return + # initialize(password='') + # Perform the initialization with a given password. + # This step is mandatory even if there's no password associated + # with the document. + def initialize(self, password=b'', inept=True): + if not self.encryption: + self.is_printable = self.is_modifiable = self.is_extractable = True + self.ready = True + raise PDFEncryptionError('Document is not encrypted.') + return + (docid, param) = self.encryption + type = literal_name(param['Filter']) + if type == 'Adobe.APS': + return self.initialize_adobe_ps(password, docid, param) + if type == 'Standard': + return self.initialize_standard(password, docid, param) + if type == 'EBX_HANDLER' and inept is True: + return self.initialize_ebx_inept(password, docid, param) + if type == 'EBX_HANDLER' and inept is False: + return self.initialize_ebx_ignoble(password, docid, param) + + raise PDFEncryptionError('Unknown filter: param=%r' % param) + + def initialize_and_return_filter(self): + if not self.encryption: + self.is_printable = self.is_modifiable = self.is_extractable = True + self.ready = True + return None + + (docid, param) = self.encryption + type = literal_name(param['Filter']) + return type + + def initialize_adobe_ps(self, password, docid, param): + global KEYFILEPATH + self.decrypt_key = self.genkey_adobe_ps(param) + self.genkey = self.genkey_v4 + self.decipher = self.decrypt_aes + self.ready = True + return + + def genkey_adobe_ps(self, param): + # nice little offline principal keys dictionary + # global static principal key for German Onleihe / Bibliothek Digital + principalkeys = { b'bibliothek-digital.de': codecs.decode(b'rRwGv2tbpKov1krvv7PO0ws9S436/lArPlfipz5Pqhw=','base64')} + self.is_printable = self.is_modifiable = self.is_extractable = True + length = int_value(param.get('Length', 0)) // 8 + edcdata = str_value(param.get('EDCData')).decode('base64') + pdrllic = str_value(param.get('PDRLLic')).decode('base64') + pdrlpol = str_value(param.get('PDRLPol')).decode('base64') + edclist = [] + for pair in edcdata.split(b'\n'): + edclist.append(pair) + # principal key request + for key in principalkeys: + if key in pdrllic: + principalkey = principalkeys[key] + else: + raise ADEPTError('Cannot find principal key for this pdf') + shakey = SHA256(principalkey) + ivector = bytes(16) # 16 zero bytes + plaintext = AES.new(shakey,AES.MODE_CBC,ivector).decrypt(edclist[9].decode('base64')) + if plaintext[-16:] != bytearray(b'\0x10')*16: + raise ADEPTError('Offlinekey cannot be decrypted, aborting ...') + pdrlpol = AES.new(plaintext[16:32],AES.MODE_CBC,edclist[2].decode('base64')).decrypt(pdrlpol) + if pdrlpol[-1] < 1 or pdrlpol[-1] > 16: + raise ADEPTError('Could not decrypt PDRLPol, aborting ...') + else: + cutter = -1 * pdrlpol[-1] + pdrlpol = pdrlpol[:cutter] + return plaintext[:16] + + PASSWORD_PADDING = b'(\xbfN^Nu\x8aAd\x00NV\xff\xfa\x01\x08..' \ + b'\x00\xb6\xd0h>\x80/\x0c\xa9\xfedSiz' + # experimental aes pw support + + def check_user_password(self, password, docid, param): + V = int_value(param.get('V', 0)) + if V < 5: + return self.check_user_password_V4(password, docid, param) + else: + return self.check_user_password_V5(password, param) + + def check_owner_password(self, password, docid, param): + V = int_value(param.get('V', 0)) + if V < 5: + return self.check_owner_password_V4(password, docid, param) + else: + return self.check_owner_password_V5(password, param) + + def check_user_password_V5(self, password, param): + U = str_value(param['U']) + userdata = U[:32] + salt = U[32:32+8] + # Truncate password: + password = password[:min(127, len(password))] + if self.hash_V5(password, salt, b"", param) == userdata: + return True + return None + + def check_owner_password_V5(self, password, param): + U = str_value(param['U']) + O = str_value(param['O']) + userdata = U[:48] + ownerdata = O[:32] + salt = O[32:32+8] + # Truncate password: + password = password[:min(127, len(password))] + if self.hash_V5(password, salt, userdata, param) == ownerdata: + return True + return None + + def recover_encryption_key_with_password(self, password, docid, param): + # Truncate password: + key_password = password[:min(127, len(password))] + + if self.check_owner_password_V5(key_password, param): + O = str_value(param['O']) + U = str_value(param['U']) + OE = str_value(param['OE']) + key_salt = O[40:40+8] + user_data = U[:48] + encrypted_file_key = OE[:32] + elif self.check_user_password_V5(key_password, param): + U = str_value(param['U']) + UE = str_value(param['UE']) + key_salt = U[40:40+8] + user_data = b"" + encrypted_file_key = UE[:32] + else: + raise Exception("Trying to recover key, but neither user nor owner pass is correct.") + + intermediate_key = self.hash_V5(key_password, key_salt, user_data, param) + + file_key = self.process_with_aes(intermediate_key, False, encrypted_file_key) + + return file_key + + + def process_with_aes(self, key, encrypt, data, repetitions = 1, iv = None): + if iv is None: + keylen = len(key) + iv = bytes([0x00]*keylen) + + if not encrypt: + plaintext = AES.new(key,AES.MODE_CBC,iv, True).decrypt(data) + return plaintext + else: + aes = AES.new(key, AES.MODE_CBC, iv, False) + new_data = bytes(data * repetitions) + crypt = aes.encrypt(new_data) + return crypt + + + def hash_V5(self, password, salt, userdata, param): + R = int_value(param['R']) + K = SHA256(password + salt + userdata) + if R < 6: + return K + elif R == 6: + round_number = 0 + done = False + while (not done): + round_number = round_number + 1 + K1 = password + K + userdata + if len(K1) < 32: + raise Exception("K1 < 32 ...") + #def process_with_aes(self, key: bytes, encrypt: bool, data: bytes, repetitions: int = 1, iv: bytes = None): + E = self.process_with_aes(K[:16], True, K1, 64, K[16:32]) + K = (hashlib.sha256, hashlib.sha384, hashlib.sha512)[sum(E) % 3](E).digest() + + if round_number >= 64: + ch = int.from_bytes(E[-1:], "big", signed=False) + if ch <= round_number - 32: + done = True + + result = K[0:32] + return result + else: + raise NotImplementedError("Revision > 6 not supported.") + + + def check_owner_password_V4(self, password, docid, param): + + # compute_O_rc4_key: + V = int_value(param.get('V', 0)) + if V >= 5: + raise Exception("compute_O_rc4_key not possible with V>= 5") + + R = int_value(param.get('R', 0)) + + length = int_value(param.get('Length', 40)) # Key length (bits) + password = (password+self.PASSWORD_PADDING)[:32] + hash = hashlib.md5(password) + if R >= 3: + for _ in range(50): + hash = hashlib.md5(hash.digest()[:length//8]) + hash = hash.digest()[:length//8] + + # "hash" is the return value of compute_O_rc4_key + + Odata = str_value(param.get('O')) + # now call iterate_rc4 ... + x = ARC4.new(hash).decrypt(Odata) # 4 + if R >= 3: + for i in range(1,19+1): + if sys.version_info[0] == 2: + k = b''.join(chr(ord(c) ^ i) for c in hash ) + else: + k = b''.join(bytes([c ^ i]) for c in hash ) + x = ARC4.new(k).decrypt(x) + + + # "x" is now the padded user password. + + # If we wanted to recover / extract the user password, + # we'd need to trim off the padding string from the end. + # As we just want to get access to the encryption key, + # we can just hand the password into the check_user_password + # as it is, as that function would be adding padding anyways. + # This trick only works with V4 and lower. + + enc_key = self.check_user_password(x, docid, param) + if enc_key is not None: + return enc_key + + return False + + + + + def check_user_password_V4(self, password, docid, param): + + V = int_value(param.get('V', 0)) + length = int_value(param.get('Length', 40)) # Key length (bits) + O = str_value(param['O']) + R = int_value(param['R']) # Revision + U = str_value(param['U']) + P = int_value(param['P']) + + # Algorithm 3.2 + password = (password+self.PASSWORD_PADDING)[:32] # 1 + hash = hashlib.md5(password) # 2 + hash.update(O) # 3 + hash.update(struct.pack('= 4: + hash.update(codecs.decode(b'ffffffff','hex')) + if R >= 3: + # 8 + for _ in range(50): + hash = hashlib.md5(hash.digest()[:length//8]) + key = hash.digest()[:length//8] + if R == 2: + # Algorithm 3.4 + u1 = ARC4.new(key).decrypt(password) + elif R >= 3: + # Algorithm 3.5 + hash = hashlib.md5(self.PASSWORD_PADDING) # 2 + hash.update(docid[0]) # 3 + x = ARC4.new(key).decrypt(hash.digest()[:16]) # 4 + for i in range(1,19+1): + if sys.version_info[0] == 2: + k = b''.join(chr(ord(c) ^ i) for c in key ) + else: + k = b''.join(bytes([c ^ i]) for c in key ) + x = ARC4.new(k).decrypt(x) + u1 = x+x # 32bytes total + if R == 2: + is_authenticated = (u1 == U) + else: + is_authenticated = (u1[:16] == U[:16]) + + if is_authenticated: + return key + + return None + + def initialize_standard(self, password, docid, param): + + self.decrypt_key = None + + + # copy from a global variable + V = int_value(param.get('V', 0)) + if (V <=0 or V > 5): + raise PDFEncryptionError('Unknown algorithm: %r' % V) + R = int_value(param['R']) # Revision + if R >= 7: + raise PDFEncryptionError('Unknown revision: %r' % R) + + # check owner pass: + retval = self.check_owner_password(password, docid, param) + if retval is True or (retval is not False and retval is not None): + #print("Owner pass is valid") + if retval is True: + self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param) + else: + self.decrypt_key = retval + + if self.decrypt_key is None or self.decrypt_key is True or self.decrypt_key is False: + # That's not the owner password. Check if it's the user password. + retval = self.check_user_password(password, docid, param) + if retval is True or (retval is not False and retval is not None): + #print("User pass is valid") + if retval is True: + self.decrypt_key = self.recover_encryption_key_with_password(password, docid, param) + else: + self.decrypt_key = retval + + if self.decrypt_key is None or self.decrypt_key is True or self.decrypt_key is False: + raise ADEPTInvalidPasswordError("Password invalid.") + + + P = int_value(param['P']) + + self.is_printable = bool(P & 4) + self.is_modifiable = bool(P & 8) + self.is_extractable = bool(P & 16) + self.is_annotationable = bool(P & 32) + self.is_formsenabled = bool(P & 256) + self.is_textextractable = bool(P & 512) + self.is_assemblable = bool(P & 1024) + self.is_formprintable = bool(P & 2048) + + + # genkey method + if V == 1 or V == 2 or V == 4: + self.genkey = self.genkey_v2 + elif V == 3: + self.genkey = self.genkey_v3 + elif V >= 5: + self.genkey = self.genkey_v5 + + set_decipher = False + + if V >= 4: + # Check if we need new genkey_v4 - only if we're using AES. + try: + for key in param['CF']: + algo = str(param["CF"][key]["CFM"]) + if algo == "/AESV2": + if V == 4: + self.genkey = self.genkey_v4 + set_decipher = True + self.decipher = self.decrypt_aes + elif algo == "/AESV3": + if V == 4: + self.genkey = self.genkey_v4 + set_decipher = True + self.decipher = self.decrypt_aes + elif algo == "/V2": + set_decipher = True + self.decipher = self.decrypt_rc4 + except: + pass + + # rc4 + if V < 4: + self.decipher = self.decrypt_rc4 # XXX may be AES + # aes + if not set_decipher: + # This should usually already be set by now. + # If it's not, assume that V4 and newer are using AES + if V >= 4: + self.decipher = self.decrypt_aes + self.ready = True + return + + + def initialize_ebx_ignoble(self, keyb64, docid, param): + self.is_printable = self.is_modifiable = self.is_extractable = True + + try: + key = keyb64.decode('base64')[:16] + # This will probably always error, but I'm not 100% sure, so lets leave the old code in. + except AttributeError: + key = codecs.decode(keyb64.encode("ascii"), 'base64')[:16] + + + length = int_value(param.get('Length', 0)) / 8 + rights = codecs.decode(str_value(param.get('ADEPT_LICENSE')), "base64") + rights = zlib.decompress(rights, -15) + rights = etree.fromstring(rights) + expr = './/{http://ns.adobe.com/adept}encryptedKey' + bookkey = ''.join(rights.findtext(expr)) + bookkey = base64.b64decode(bookkey) + bookkey = AES.new(key, AES.MODE_CBC, b'\x00'*16).decrypt(bookkey) + bookkey = unpad(bookkey, 16) # PKCS#7 + if len(bookkey) > 16: + bookkey = bookkey[-16:] + ebx_V = int_value(param.get('V', 4)) + ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6)) + # added because of improper booktype / decryption book session key errors + if length > 0: + if len(bookkey) == length: + if ebx_V == 3: + V = 3 + else: + V = 2 + elif len(bookkey) == length + 1: + V = bookkey[0] + bookkey = bookkey[1:] + else: + print("ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)) + print("length is %d and len(bookkey) is %d" % (length, len(bookkey))) + if len(bookkey) > 0: + print("bookkey[0] is %d" % bookkey[0]) + raise ADEPTError('error decrypting book session key - mismatched length') + else: + # proper length unknown try with whatever you have + print("ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)) + print("length is %d and len(bookkey) is %d" % (length, len(bookkey))) + if len(bookkey) > 0: + print("bookkey[0] is %d" % ord(bookkey[0])) + if ebx_V == 3: + V = 3 + else: + V = 2 + self.decrypt_key = bookkey + self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 + self.decipher = self.decrypt_rc4 + self.ready = True + return + + @staticmethod + def removeHardening(rights, keytype, keydata): + adept = lambda tag: '{%s}%s' % ('http://ns.adobe.com/adept', tag) + textGetter = lambda name: ''.join(rights.findtext('.//%s' % (adept(name),))) + + # Gather what we need, and generate the IV + resourceuuid = UUID(textGetter("resource")) + deviceuuid = UUID(textGetter("device")) + fullfillmentuuid = UUID(textGetter("fulfillment")[:36]) + kekiv = UUID(int=resourceuuid.int ^ deviceuuid.int ^ fullfillmentuuid.int).bytes + + # Derive kek from just "keytype" + rem = int(keytype, 10) % 16 + H = SHA256(keytype.encode("ascii")) + kek = H[2*rem : 16 + rem] + H[rem : 2*rem] + + return unpad(AES.new(kek, AES.MODE_CBC, kekiv).decrypt(keydata), 16) + + def initialize_ebx_inept(self, password, docid, param): + self.is_printable = self.is_modifiable = self.is_extractable = True + rsakey = RSA.importKey(password) # parses the ASN1 structure + length = int_value(param.get('Length', 0)) // 8 + rights = codecs.decode(param.get('ADEPT_LICENSE'), 'base64') + rights = zlib.decompress(rights, -15) + rights = etree.fromstring(rights) + expr = './/{http://ns.adobe.com/adept}encryptedKey' + bookkeyelem = rights.find(expr) + bookkey = codecs.decode(bookkeyelem.text.encode('utf-8'),'base64') + keytype = bookkeyelem.attrib.get('keyType', '0') + + if int(keytype, 10) > 2: + bookkey = PDFDocument.removeHardening(rights, keytype, bookkey) + try: + bookkey = PKCS1_v1_5.new(rsakey).decrypt(bookkey, None) # automatically unpads + except ValueError: + bookkey = None + + if bookkey is None: + raise ADEPTError('error decrypting book session key') + + ebx_V = int_value(param.get('V', 4)) + ebx_type = int_value(param.get('EBX_ENCRYPTIONTYPE', 6)) + # added because of improper booktype / decryption book session key errors + if length > 0: + if len(bookkey) == length: + if ebx_V == 3: + V = 3 + else: + V = 2 + elif len(bookkey) == length + 1: + V = bookkey[0] + bookkey = bookkey[1:] + else: + print("ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)) + print("length is %d and len(bookkey) is %d" % (length, len(bookkey))) + if len(bookkey) > 0: + print("bookkey[0] is %d" % bookkey[0]) + raise ADEPTError('error decrypting book session key - mismatched length') + else: + # proper length unknown try with whatever you have + print("ebx_V is %d and ebx_type is %d" % (ebx_V, ebx_type)) + print("length is %d and len(bookkey) is %d" % (length, len(bookkey))) + if len(bookkey) > 0: + print("bookkey[0] is %d" % bookkey[0]) + if ebx_V == 3: + V = 3 + else: + V = 2 + self.decrypt_key = bookkey + self.genkey = self.genkey_v3 if V == 3 else self.genkey_v2 + self.decipher = self.decrypt_rc4 + self.ready = True + return + + # genkey functions + def genkey_v2(self, objid, genno): + objid = struct.pack(' PDFObjStmRef.maxindex: + PDFObjStmRef.maxindex = index + + +## PDFParser +## +class PDFParser(PSStackParser): + + def __init__(self, doc, fp): + PSStackParser.__init__(self, fp) + self.doc = doc + self.doc.set_parser(self) + return + + def __repr__(self): + return '' + + KEYWORD_R = KWD(b'R') + KEYWORD_ENDOBJ = KWD(b'endobj') + KEYWORD_STREAM = KWD(b'stream') + KEYWORD_XREF = KWD(b'xref') + KEYWORD_STARTXREF = KWD(b'startxref') + def do_keyword(self, pos, token): + if token in (self.KEYWORD_XREF, self.KEYWORD_STARTXREF): + self.add_results(*self.pop(1)) + return + if token is self.KEYWORD_ENDOBJ: + self.add_results(*self.pop(4)) + return + + if token is self.KEYWORD_R: + # reference to indirect object + try: + ((_,objid), (_,genno)) = self.pop(2) + (objid, genno) = (int(objid), int(genno)) + obj = PDFObjRef(self.doc, objid, genno) + self.push((pos, obj)) + except PSSyntaxError: + pass + return + + if token is self.KEYWORD_STREAM: + # stream object + ((_,dic),) = self.pop(1) + dic = dict_value(dic) + try: + objlen = int_value(dic['Length']) + except KeyError: + if STRICT: + raise PDFSyntaxError('/Length is undefined: %r' % dic) + objlen = 0 + self.seek(pos) + try: + (_, line) = self.nextline() # 'stream' + except PSEOF: + if STRICT: + raise PDFSyntaxError('Unexpected EOF') + return + pos += len(line) + self.fp.seek(pos) + data = self.fp.read(objlen) + self.seek(pos+objlen) + while 1: + try: + (linepos, line) = self.nextline() + except PSEOF: + if STRICT: + raise PDFSyntaxError('Unexpected EOF') + break + if b'endstream' in line: + i = line.index(b'endstream') + objlen += i + data += line[:i] + break + objlen += len(line) + data += line + self.seek(pos+objlen) + obj = PDFStream(dic, data, self.doc.decipher) + self.push((pos, obj)) + return + + # others + self.push((pos, token)) + return + + def find_xref(self): + # search the last xref table by scanning the file backwards. + prev = None + for line in self.revreadlines(): + line = line.strip() + if line == b'startxref': break + if line: + prev = line + else: + raise PDFNoValidXRef('Unexpected EOF') + return int(prev) + + # read xref table + def read_xref_from(self, start, xrefs): + self.seek(start) + self.reset() + try: + (pos, token) = self.nexttoken() + except PSEOF: + raise PDFNoValidXRef('Unexpected EOF') + if isinstance(token, int): + # XRefStream: PDF-1.5 + if GEN_XREF_STM == 1: + global gen_xref_stm + gen_xref_stm = True + self.seek(pos) + self.reset() + xref = PDFXRefStream() + xref.load(self) + else: + if token is not self.KEYWORD_XREF: + raise PDFNoValidXRef('xref not found: pos=%d, token=%r' % + (pos, token)) + self.nextline() + xref = PDFXRef() + xref.load(self) + xrefs.append(xref) + trailer = xref.trailer + if 'XRefStm' in trailer: + pos = int_value(trailer['XRefStm']) + self.read_xref_from(pos, xrefs) + if 'Prev' in trailer: + # find previous xref + pos = int_value(trailer['Prev']) + self.read_xref_from(pos, xrefs) + return + + # read xref tables and trailers + def read_xref(self): + xrefs = [] + trailerpos = None + try: + pos = self.find_xref() + self.read_xref_from(pos, xrefs) + except PDFNoValidXRef: + # fallback + self.seek(0) + pat = re.compile(b'^(\\d+)\\s+(\\d+)\\s+obj\\b') + offsets = {} + xref = PDFXRef() + while 1: + try: + (pos, line) = self.nextline() + except PSEOF: + break + if line.startswith(b'trailer'): + trailerpos = pos # remember last trailer + m = pat.match(line) + if not m: continue + (objid, genno) = m.groups() + offsets[int(objid)] = (0, pos) + if not offsets: raise + xref.offsets = offsets + if trailerpos: + self.seek(trailerpos) + xref.load_trailer(self) + xrefs.append(xref) + return xrefs + +## PDFObjStrmParser +## +class PDFObjStrmParser(PDFParser): + + def __init__(self, data, doc): + PSStackParser.__init__(self, BytesIO(data)) + self.doc = doc + return + + def flush(self): + self.add_results(*self.popall()) + return + + KEYWORD_R = KWD(b'R') + def do_keyword(self, pos, token): + if token is self.KEYWORD_R: + # reference to indirect object + try: + ((_,objid), (_,genno)) = self.pop(2) + (objid, genno) = (int(objid), int(genno)) + obj = PDFObjRef(self.doc, objid, genno) + self.push((pos, obj)) + except PSSyntaxError: + pass + return + # others + self.push((pos, token)) + return + + +# Takes a PDF file name as input, and if this is an ADE-protected PDF, +# returns the UUID of the user that's licensed to open this file. +def adeptGetUserUUID(inf): + try: + doc = PDFDocument() + inf = open(inf, 'rb') + pars = PDFParser(doc, inf) + + (docid, param) = doc.encryption + type = literal_name(param['Filter']) + if type != 'EBX_HANDLER': + # No EBX_HANDLER, no idea which user key can decrypt this. + inf.close() + return None + + rights = codecs.decode(param.get('ADEPT_LICENSE'), 'base64') + inf.close() + + rights = zlib.decompress(rights, -15) + rights = etree.fromstring(rights) + expr = './/{http://ns.adobe.com/adept}user' + user_uuid = ''.join(rights.findtext(expr)) + if user_uuid[:9] != "urn:uuid:": + return None + return user_uuid[9:] + + except: + return None + +### +### My own code, for which there is none else to blame + +class PDFSerializer(object): + def __init__(self, inf, userkey, inept=True): + global GEN_XREF_STM, gen_xref_stm + gen_xref_stm = GEN_XREF_STM > 1 + self.version = inf.read(8) + inf.seek(0) + self.doc = doc = PDFDocument() + parser = PDFParser(doc, inf) + doc.initialize(userkey, inept) + self.objids = objids = set() + for xref in reversed(doc.xrefs): + trailer = xref.trailer + for objid in xref.objids(): + objids.add(objid) + trailer = dict(trailer) + trailer.pop('Prev', None) + trailer.pop('XRefStm', None) + if 'Encrypt' in trailer: + objids.remove(trailer.pop('Encrypt').objid) + self.trailer = trailer + + def dump(self, outf): + self.outf = outf + self.write(self.version) + self.write(b'\n%\xe2\xe3\xcf\xd3\n') + doc = self.doc + objids = self.objids + xrefs = {} + maxobj = max(objids) + trailer = dict(self.trailer) + trailer['Size'] = maxobj + 1 + for objid in objids: + obj = doc.getobj(objid) + if isinstance(obj, PDFObjStmRef): + xrefs[objid] = obj + continue + if obj is not None: + try: + genno = obj.genno + except AttributeError: + genno = 0 + xrefs[objid] = (self.tell(), genno) + self.serialize_indirect(objid, obj) + startxref = self.tell() + + if not gen_xref_stm: + self.write(b'xref\n') + self.write(b'0 %d\n' % (maxobj + 1,)) + for objid in range(0, maxobj + 1): + if objid in xrefs: + # force the genno to be 0 + self.write(b"%010d 00000 n \n" % xrefs[objid][0]) + else: + self.write(b"%010d %05d f \n" % (0, 65535)) + + self.write(b'trailer\n') + self.serialize_object(trailer) + self.write(b'\nstartxref\n%d\n%%%%EOF' % startxref) + + else: # Generate crossref stream. + + # Calculate size of entries + maxoffset = max(startxref, maxobj) + maxindex = PDFObjStmRef.maxindex + fl2 = 2 + power = 65536 + while maxoffset >= power: + fl2 += 1 + power *= 256 + fl3 = 1 + power = 256 + while maxindex >= power: + fl3 += 1 + power *= 256 + + index = [] + first = None + prev = None + data = [] + # Put the xrefstream's reference in itself + startxref = self.tell() + maxobj += 1 + xrefs[maxobj] = (startxref, 0) + for objid in sorted(xrefs): + if first is None: + first = objid + elif objid != prev + 1: + index.extend((first, prev - first + 1)) + first = objid + prev = objid + objref = xrefs[objid] + if isinstance(objref, PDFObjStmRef): + f1 = 2 + f2 = objref.stmid + f3 = objref.index + else: + f1 = 1 + f2 = objref[0] + # we force all generation numbers to be 0 + # f3 = objref[1] + f3 = 0 + + data.append(struct.pack('>B', f1)) + data.append(struct.pack('>L', f2)[-fl2:]) + data.append(struct.pack('>L', f3)[-fl3:]) + index.extend((first, prev - first + 1)) + data = zlib.compress(b''.join(data)) + dic = {'Type': LITERAL_XREF, 'Size': prev + 1, 'Index': index, + 'W': [1, fl2, fl3], 'Length': len(data), + 'Filter': LITERALS_FLATE_DECODE[0], + 'Root': trailer['Root'],} + if 'Info' in trailer: + dic['Info'] = trailer['Info'] + xrefstm = PDFStream(dic, data) + self.serialize_indirect(maxobj, xrefstm) + self.write(b'startxref\n%d\n%%%%EOF' % startxref) + def write(self, data): + self.outf.write(data) + self.last = data[-1:] + + def tell(self): + return self.outf.tell() + + def escape_string(self, string): + string = string.replace(b'\\', b'\\\\') + string = string.replace(b'\n', b'\\n') + string = string.replace(b'(', b'\\(') + string = string.replace(b')', b'\\)') + return string + + def serialize_object(self, obj): + if isinstance(obj, dict): + # Correct malformed Mac OS resource forks for Stanza + if 'ResFork' in obj and 'Type' in obj and 'Subtype' not in obj \ + and isinstance(obj['Type'], int): + obj['Subtype'] = obj['Type'] + del obj['Type'] + # end - hope this doesn't have bad effects + self.write(b'<<') + for key, val in obj.items(): + self.write(str(LIT(key.encode('utf-8'))).encode('utf-8')) + self.serialize_object(val) + self.write(b'>>') + elif isinstance(obj, list): + self.write(b'[') + for val in obj: + self.serialize_object(val) + self.write(b']') + elif isinstance(obj, bytearray): + self.write(b'(%s)' % self.escape_string(obj)) + elif isinstance(obj, bytes): + self.write(b'<%s>' % binascii.hexlify(obj).upper()) + elif isinstance(obj, str): + self.write(b'(%s)' % self.escape_string(obj.encode('utf-8'))) + elif isinstance(obj, bool): + if self.last.isalnum(): + self.write(b' ') + self.write(str(obj).lower().encode('utf-8')) + elif isinstance(obj, int): + if self.last.isalnum(): + self.write(b' ') + self.write(str(obj).encode('utf-8')) + elif isinstance(obj, Decimal): + if self.last.isalnum(): + self.write(b' ') + self.write(str(obj).encode('utf-8')) + elif isinstance(obj, PDFObjRef): + if self.last.isalnum(): + self.write(b' ') + self.write(b'%d %d R' % (obj.objid, 0)) + elif isinstance(obj, PDFStream): + ### If we don't generate cross ref streams the object streams + ### are no longer useful, as we have extracted all objects from + ### them. Therefore leave them out from the output. + if obj.dic.get('Type') == LITERAL_OBJSTM and not gen_xref_stm: + self.write(b'(deleted)') + else: + data = obj.get_decdata() + + # Fix length: + # We've decompressed and then recompressed the PDF stream. + # Depending on the algorithm, the implementation, and the compression level, + # the resulting recompressed stream is unlikely to have the same length as the original. + # So we need to update the PDF object to contain the new proper length. + + # Without this change, all PDFs exported by this plugin are slightly corrupted - + # even though most if not all PDF readers can correct that on-the-fly. + + if 'Length' in obj.dic: + obj.dic['Length'] = len(data) + + + self.serialize_object(obj.dic) + self.write(b'stream\n') + self.write(data) + self.write(b'\nendstream') + else: + data = str(obj).encode('utf-8') + if bytes([data[0]]).isalnum() and self.last.isalnum(): + self.write(b' ') + self.write(data) + + def serialize_indirect(self, objid, obj): + self.write(b'%d 0 obj' % (objid,)) + self.serialize_object(obj) + if self.last.isalnum(): + self.write(b'\n') + self.write(b'endobj\n') + + + + +def decryptBook(userkey, inpath, outpath, inept=True): + with open(inpath, 'rb') as inf: + serializer = PDFSerializer(inf, userkey, inept) + with open(outpath, 'wb') as outf: + # help construct to make sure the method runs to the end + try: + serializer.dump(outf) + except Exception as e: + print("error writing pdf: {0}".format(e)) + traceback.print_exc() + return 2 + return 0 + + +def getPDFencryptionType(inpath): + with open(inpath, 'rb') as inf: + doc = doc = PDFDocument() + parser = PDFParser(doc, inf) + filter = doc.initialize_and_return_filter() + return filter + + +def decryptPDF(inpath): + keypath = KEYPATH + outpath = os.path.basename(inpath).removesuffix(".pdf") + "_decrypted.pdf" + userkey = open(keypath,'rb').read() + result = decryptBook(userkey, inpath, outpath) + if result == 0: + print("Successfully decrypted") + return outpath + else: + print("Decryption failed") + return None + + diff --git a/decrypt/zeroedzipinfo.py b/decrypt/zeroedzipinfo.py new file mode 100644 index 0000000..08c65d0 --- /dev/null +++ b/decrypt/zeroedzipinfo.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + + +""" +Python 3's "zipfile" has an annoying bug where the `external_attr` field +of a ZIP file cannot be set to 0. However, if the original DRMed ZIP has +that set to 0 then we want the DRM-free ZIP to have that as 0, too. +See https://github.com/python/cpython/issues/87713 + +We cannot just set the "external_attr" to 0 as the code to save the ZIP +resets that variable. + +So, here's a class that inherits from ZipInfo and ensures that EVERY +read access to that variable will return a 0 ... + +""" + +import zipfile + +class ZeroedZipInfo(zipfile.ZipInfo): + def __init__(self, zinfo): + for k in self.__slots__: + if hasattr(zinfo, k): + setattr(self, k, getattr(zinfo, k)) + + def __getattribute__(self, name): + if name == "external_attr": + return 0 + return object.__getattribute__(self, name)