init
This commit is contained in:
25
venv/lib/python3.12/site-packages/pyxlsb/__init__.py
Normal file
25
venv/lib/python3.12/site-packages/pyxlsb/__init__.py
Normal file
@@ -0,0 +1,25 @@
|
||||
from .handlers import Handler
|
||||
from .reader import BIFF12Reader
|
||||
from .workbook import Workbook
|
||||
from .worksheet import Worksheet
|
||||
|
||||
__version__ = '1.0.10'
|
||||
|
||||
def open_workbook(name, debug=False):
|
||||
from zipfile import ZipFile
|
||||
zf = ZipFile(name, 'r')
|
||||
return Workbook(fp=zf, debug=debug)
|
||||
|
||||
def convert_date(date):
|
||||
if not isinstance(date, int) and not isinstance(date, float):
|
||||
return None
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
if int(date) == 0:
|
||||
return datetime(1900, 1, 1, 0, 0, 0) + timedelta(seconds=round(date * 24 * 60 * 60))
|
||||
elif int(date) >= 61:
|
||||
# According to Lotus 1-2-3, Feb 29th 1900 is a real thing, therefore we have to remove one day after that date
|
||||
return datetime(1899, 12, 31, 0, 0, 0) + timedelta(days=int(date) - 1, seconds=round((date % 1) * 24 * 60 * 60))
|
||||
else:
|
||||
# Feb 29th 1900 will show up as Mar 1st 1900 because Python won't handle that date
|
||||
return datetime(1899, 12, 31, 0, 0, 0) + timedelta(days=int(date), seconds=round((date % 1) * 24 * 60 * 60))
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
151
venv/lib/python3.12/site-packages/pyxlsb/biff12.py
Normal file
151
venv/lib/python3.12/site-packages/pyxlsb/biff12.py
Normal file
@@ -0,0 +1,151 @@
|
||||
# Workbook records
|
||||
DEFINEDNAME = 0x0027
|
||||
FILEVERSION = 0x0180
|
||||
WORKBOOK = 0x0183
|
||||
WORKBOOK_END = 0x0184
|
||||
BOOKVIEWS = 0x0187
|
||||
BOOKVIEWS_END = 0x0188
|
||||
SHEETS = 0x018F
|
||||
SHEETS_END = 0x0190
|
||||
WORKBOOKPR = 0x0199
|
||||
SHEET = 0x019C
|
||||
CALCPR = 0x019D
|
||||
WORKBOOKVIEW = 0x019E
|
||||
EXTERNALREFERENCES = 0x02E1
|
||||
EXTERNALREFERENCES_END = 0x02E2
|
||||
EXTERNALREFERENCE = 0x02E3
|
||||
WEBPUBLISHING = 0x04A9
|
||||
|
||||
# Worksheet records
|
||||
ROW = 0x0000
|
||||
BLANK = 0x0001
|
||||
NUM = 0x0002
|
||||
BOOLERR = 0x0003
|
||||
BOOL = 0x0004
|
||||
FLOAT = 0x0005
|
||||
STRING = 0x0007
|
||||
FORMULA_STRING = 0x0008
|
||||
FORMULA_FLOAT = 0x0009
|
||||
FORMULA_BOOL = 0x000A
|
||||
FORMULA_BOOLERR = 0x000B
|
||||
COL = 0x003C
|
||||
WORKSHEET = 0x0181
|
||||
WORKSHEET_END = 0x0182
|
||||
SHEETVIEWS = 0x0185
|
||||
SHEETVIEWS_END = 0x0186
|
||||
SHEETVIEW = 0x0189
|
||||
SHEETVIEW_END = 0x018A
|
||||
SHEETDATA = 0x0191
|
||||
SHEETDATA_END = 0x0192
|
||||
SHEETPR = 0x0193
|
||||
DIMENSION = 0x0194
|
||||
SELECTION = 0x0198
|
||||
COLS = 0x0386
|
||||
COLS_END = 0x0387
|
||||
CONDITIONALFORMATTING = 0x03CD
|
||||
CONDITIONALFORMATTING_END = 0x03CE
|
||||
CFRULE = 0x03CF
|
||||
CFRULE_END = 0x03D0
|
||||
ICONSET = 0x03D1
|
||||
ICONSET_END = 0x03D2
|
||||
DATABAR = 0x03D3
|
||||
DATABAR_END = 0x03D4
|
||||
COLORSCALE = 0x03D5
|
||||
COLORSCALE_END = 0x03D6
|
||||
CFVO = 0x03D7
|
||||
PAGEMARGINS = 0x03DC
|
||||
PRINTOPTIONS = 0x03DD
|
||||
PAGESETUP = 0x03DE
|
||||
HEADERFOOTER = 0x03DF
|
||||
SHEETFORMATPR = 0x03E5
|
||||
HYPERLINK = 0x03EE
|
||||
DRAWING = 0x04A6
|
||||
LEGACYDRAWING = 0x04A7
|
||||
COLOR = 0x04B4
|
||||
OLEOBJECTS = 0x04FE
|
||||
OLEOBJECT = 0x04FF
|
||||
OLEOBJECTS_END = 0x0580
|
||||
TABLEPARTS = 0x0594
|
||||
TABLEPART = 0x0595
|
||||
TABLEPARTS_END = 0x0596
|
||||
|
||||
# SharedStrings records
|
||||
SI = 0x0013
|
||||
SST = 0x019F
|
||||
SST_END = 0x01A0
|
||||
|
||||
# Styles records
|
||||
FONT = 0x002B
|
||||
FILL = 0x002D
|
||||
BORDER = 0x002E
|
||||
XF = 0x002F
|
||||
CELLSTYLE = 0x0030
|
||||
STYLESHEET = 0x0296
|
||||
STYLESHEET_END = 0x0297
|
||||
COLORS = 0x03D9
|
||||
COLORS_END = 0x03DA
|
||||
DXFS = 0x03F9
|
||||
DXFS_END = 0x03FA
|
||||
TABLESTYLES = 0x03FC
|
||||
TABLESTYLES_END = 0x03FD
|
||||
FILLS = 0x04DB
|
||||
FILLS_END = 0x04DC
|
||||
FONTS = 0x04E3
|
||||
FONTS_END = 0x04E4
|
||||
BORDERS = 0x04E5
|
||||
BORDERS_END = 0x04E6
|
||||
CELLXFS = 0x04E9
|
||||
CELLXFS_END = 0x04EA
|
||||
CELLSTYLES = 0x04EB
|
||||
CELLSTYLES_END = 0x04EC
|
||||
CELLSTYLEXFS = 0x04F2
|
||||
CELLSTYLEXFS_END = 0x04F3
|
||||
|
||||
# Comment records
|
||||
COMMENTS = 0x04F4
|
||||
COMMENTS_END = 0x04F5
|
||||
AUTHORS = 0x04F6
|
||||
AUTHORS_END = 0x04F7
|
||||
AUTHOR = 0x04F8
|
||||
COMMENTLIST = 0x04F9
|
||||
COMMENTLIST_END = 0x04FA
|
||||
COMMENT = 0x04FB
|
||||
COMMENT_END = 0x04FC
|
||||
TEXT = 0x04FD
|
||||
|
||||
# Table records
|
||||
AUTOFILTER = 0x01A1
|
||||
AUTOFILTER_END = 0x01A2
|
||||
FILTERCOLUMN = 0x01A3
|
||||
FILTERCOLUMN_END = 0x01A4
|
||||
FILTERS = 0x01A5
|
||||
FILTERS_END = 0x01A6
|
||||
FILTER = 0x01A7
|
||||
TABLE = 0x02D7
|
||||
TABLE_END = 0x02D8
|
||||
TABLECOLUMNS = 0x02D9
|
||||
TABLECOLUMNS_END = 0x02DA
|
||||
TABLECOLUMN = 0x02DB
|
||||
TABLECOLUMN_END = 0x02DC
|
||||
TABLESTYLEINFO = 0x0481
|
||||
SORTSTATE = 0x0492
|
||||
SORTCONDITION = 0x0494
|
||||
SORTSTATE_END = 0x0495
|
||||
|
||||
# QueryTable records
|
||||
QUERYTABLE = 0x03BF
|
||||
QUERYTABLE_END = 0x03C0
|
||||
QUERYTABLEREFRESH = 0x03C1
|
||||
QUERYTABLEREFRESH_END = 0x03C2
|
||||
QUERYTABLEFIELDS = 0x03C7
|
||||
QUERYTABLEFIELDS_END = 0x03C8
|
||||
QUERYTABLEFIELD = 0x03C9
|
||||
QUERYTABLEFIELD_END = 0x03CA
|
||||
|
||||
# Connection records
|
||||
CONNECTIONS = 0x03AD
|
||||
CONNECTIONS_END = 0x03AE
|
||||
CONNECTION = 0x01C9
|
||||
CONNECTION_END = 0x01CA
|
||||
DBPR = 0x01CB
|
||||
DBPR_END = 0x01CC
|
||||
143
venv/lib/python3.12/site-packages/pyxlsb/handlers.py
Normal file
143
venv/lib/python3.12/site-packages/pyxlsb/handlers.py
Normal file
@@ -0,0 +1,143 @@
|
||||
from . import biff12
|
||||
from collections import namedtuple
|
||||
|
||||
class Handler(object):
|
||||
def __init__(self):
|
||||
super(Handler, self).__init__()
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
if reclen > 0:
|
||||
reader.skip(reclen)
|
||||
|
||||
|
||||
class BasicHandler(Handler):
|
||||
def __init__(self, name=None):
|
||||
super(BasicHandler, self).__init__()
|
||||
self.name = name
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
super(BasicHandler, self).read(reader, recid, reclen)
|
||||
return self.name
|
||||
|
||||
|
||||
class StringTableHandler(Handler):
|
||||
cls = namedtuple('sst', ['count', 'uniqueCount'])
|
||||
|
||||
def __init__(self):
|
||||
super(StringTableHandler, self).__init__()
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
count = reader.read_int()
|
||||
unique = reader.read_int()
|
||||
return self.cls._make([count, unique])
|
||||
|
||||
|
||||
class StringInstanceHandler(Handler):
|
||||
cls = namedtuple('si', ['t'])
|
||||
|
||||
def __init__(self):
|
||||
super(StringInstanceHandler, self).__init__()
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
reader.skip(1)
|
||||
val = reader.read_string()
|
||||
return self.cls._make([val])
|
||||
|
||||
|
||||
class SheetHandler(Handler):
|
||||
cls = namedtuple('sheet', ['sheetId', 'rId', 'name'])
|
||||
|
||||
def __init__(self):
|
||||
super(SheetHandler, self).__init__()
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
reader.skip(4)
|
||||
sheetid = reader.read_int()
|
||||
relid = reader.read_string()
|
||||
name = reader.read_string()
|
||||
return self.cls._make([sheetid, relid, name])
|
||||
|
||||
|
||||
class DimensionHandler(Handler):
|
||||
cls = namedtuple('dimension', ['r', 'c', 'h', 'w'])
|
||||
|
||||
def __init__(self):
|
||||
super(DimensionHandler, self).__init__()
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
r1 = reader.read_int()
|
||||
r2 = reader.read_int()
|
||||
c1 = reader.read_int()
|
||||
c2 = reader.read_int()
|
||||
return self.cls._make([r1, c1, r2 - r1 + 1, c2 - c1 + 1])
|
||||
|
||||
|
||||
class ColumnHandler(Handler):
|
||||
cls = namedtuple('col', ['c1', 'c2', 'width', 'style'])
|
||||
|
||||
def __init__(self):
|
||||
super(ColumnHandler, self).__init__()
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
c1 = reader.read_int()
|
||||
c2 = reader.read_int()
|
||||
width = reader.read_int() / 256
|
||||
style = reader.read_int()
|
||||
return self.cls._make([c1, c2, width, style])
|
||||
|
||||
|
||||
class RowHandler(Handler):
|
||||
cls = namedtuple('row', ['r'])
|
||||
|
||||
def __init__(self):
|
||||
super(RowHandler, self).__init__()
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
r = reader.read_int()
|
||||
return self.cls._make([r])
|
||||
|
||||
|
||||
class CellHandler(Handler):
|
||||
cls = namedtuple('c', ['c', 'v', 'f', 'style'])
|
||||
|
||||
def __init__(self):
|
||||
super(CellHandler, self).__init__()
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
col = reader.read_int()
|
||||
style = reader.read_int()
|
||||
val = None
|
||||
if recid == biff12.NUM:
|
||||
val = reader.read_float()
|
||||
elif recid == biff12.BOOLERR:
|
||||
val = hex(reader.read_byte())
|
||||
elif recid == biff12.BOOL:
|
||||
val = reader.read_byte() != 0
|
||||
elif recid == biff12.FLOAT:
|
||||
val = reader.read_double()
|
||||
elif recid == biff12.STRING:
|
||||
val = reader.read_int()
|
||||
elif recid == biff12.FORMULA_STRING:
|
||||
val = reader.read_string()
|
||||
elif recid == biff12.FORMULA_FLOAT:
|
||||
val = reader.read_double()
|
||||
elif recid == biff12.FORMULA_BOOL:
|
||||
val = reader.read_byte() != 0
|
||||
elif recid == biff12.FORMULA_BOOLERR:
|
||||
val = hex(reader.read_byte())
|
||||
return self.cls._make([col, val, None, style])
|
||||
|
||||
|
||||
class HyperlinkHandler(Handler):
|
||||
cls = namedtuple('hyperlink', ['r', 'c', 'h', 'w', 'rId'])
|
||||
|
||||
def __init__(self):
|
||||
super(HyperlinkHandler, self).__init__()
|
||||
|
||||
def read(self, reader, recid, reclen):
|
||||
r1 = reader.read_int()
|
||||
r2 = reader.read_int()
|
||||
c1 = reader.read_int()
|
||||
c2 = reader.read_int()
|
||||
rId = reader.read_string()
|
||||
return self.cls._make([r1, c1, r2 - r1 + 1, c2 - c1 + 1, rId])
|
||||
187
venv/lib/python3.12/site-packages/pyxlsb/reader.py
Normal file
187
venv/lib/python3.12/site-packages/pyxlsb/reader.py
Normal file
@@ -0,0 +1,187 @@
|
||||
import io
|
||||
import os
|
||||
import struct
|
||||
from . import biff12
|
||||
from .handlers import *
|
||||
|
||||
uint8_t = struct.Struct('<B')
|
||||
uint16_t = struct.Struct('<H')
|
||||
int32_t = struct.Struct('<i')
|
||||
uint32_t = struct.Struct('<I')
|
||||
double_t = struct.Struct('<d')
|
||||
|
||||
class RecordReader(object):
|
||||
def __init__(self, buf, enc='utf-16'):
|
||||
self._fp = io.BytesIO(buf)
|
||||
self._enc = enc
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self._fp.close()
|
||||
|
||||
def tell(self):
|
||||
return self._fp.tell()
|
||||
|
||||
def seek(self, offset, whence=os.SEEK_SET):
|
||||
self._fp.seek(offset, whence)
|
||||
|
||||
def skip(self, size):
|
||||
self._fp.seek(size, os.SEEK_CUR)
|
||||
|
||||
def read(self, size):
|
||||
return self._fp.read(size)
|
||||
|
||||
def read_int(self):
|
||||
buff = self._fp.read(4)
|
||||
if len(buff) < 4:
|
||||
return None
|
||||
return uint32_t.unpack(buff)[0]
|
||||
|
||||
def read_short(self):
|
||||
buff = self._fp.read(2)
|
||||
if len(buff) < 2:
|
||||
return None
|
||||
return uint16_t.unpack(buff)[0]
|
||||
|
||||
def read_byte(self):
|
||||
byte = self._fp.read(1)
|
||||
if not byte:
|
||||
return None
|
||||
return uint8_t.unpack(byte)[0]
|
||||
|
||||
def read_float(self):
|
||||
buff = self._fp.read(4)
|
||||
if len(buff) < 4:
|
||||
return None
|
||||
v = 0.0
|
||||
intval = int32_t.unpack(buff)[0]
|
||||
if intval & 0x02 != 0:
|
||||
v = float(intval >> 2)
|
||||
else:
|
||||
v = double_t.unpack(b'\x00\x00\x00\x00' + uint32_t.pack(intval & 0xFFFFFFFC))[0]
|
||||
if intval & 0x01 != 0:
|
||||
v /= 100
|
||||
return v
|
||||
|
||||
def read_double(self):
|
||||
buff = self._fp.read(8)
|
||||
if len(buff) < 8:
|
||||
return None
|
||||
return double_t.unpack(buff)[0]
|
||||
|
||||
def read_string(self):
|
||||
l = self.read_int()
|
||||
if l is None:
|
||||
return None
|
||||
buff = self.read(l * 2)
|
||||
if len(buff) < l * 2:
|
||||
return None
|
||||
return buff.decode(self._enc, errors='replace')
|
||||
|
||||
|
||||
class BIFF12Reader(object):
|
||||
handlers = {
|
||||
# Workbook part handlers
|
||||
biff12.WORKBOOK: BasicHandler('workbook'),
|
||||
biff12.SHEETS: BasicHandler('sheets'),
|
||||
biff12.SHEETS_END: BasicHandler('/sheets'),
|
||||
biff12.SHEET: SheetHandler(),
|
||||
|
||||
# SharedStrings part handlers
|
||||
biff12.SST: StringTableHandler(),
|
||||
biff12.SST_END: BasicHandler('/sst'),
|
||||
biff12.SI: StringInstanceHandler(),
|
||||
|
||||
# Worksheet part handlers
|
||||
biff12.WORKSHEET: BasicHandler('worksheet'),
|
||||
biff12.WORKSHEET_END: BasicHandler('/worksheet'),
|
||||
biff12.DIMENSION: DimensionHandler(),
|
||||
biff12.SHEETDATA: BasicHandler('sheetData'),
|
||||
biff12.SHEETDATA_END: BasicHandler('/sheetData'),
|
||||
biff12.COLS: BasicHandler('cols'),
|
||||
biff12.COLS_END: BasicHandler('/cols'),
|
||||
biff12.COL: ColumnHandler(),
|
||||
biff12.ROW: RowHandler(),
|
||||
biff12.BLANK: CellHandler(),
|
||||
biff12.NUM: CellHandler(),
|
||||
biff12.BOOLERR: CellHandler(),
|
||||
biff12.BOOL: CellHandler(),
|
||||
biff12.FLOAT: CellHandler(),
|
||||
biff12.STRING: CellHandler(),
|
||||
biff12.FORMULA_STRING: CellHandler(),
|
||||
biff12.FORMULA_FLOAT: CellHandler(),
|
||||
biff12.FORMULA_BOOL: CellHandler(),
|
||||
biff12.FORMULA_BOOLERR: CellHandler(),
|
||||
biff12.HYPERLINK: HyperlinkHandler()
|
||||
}
|
||||
|
||||
def __init__(self, fp, debug=False):
|
||||
super(BIFF12Reader, self).__init__()
|
||||
self._debug = debug
|
||||
self._fp = fp
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
return self.next()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.close()
|
||||
|
||||
def tell(self):
|
||||
return self._fp.tell()
|
||||
|
||||
def seek(self, offset, whence=os.SEEK_SET):
|
||||
self._fp.seek(offset, whence)
|
||||
|
||||
def read_id(self):
|
||||
v = 0
|
||||
for i in range(4):
|
||||
byte = self._fp.read(1)
|
||||
if not byte:
|
||||
return None
|
||||
byte = uint8_t.unpack(byte)[0]
|
||||
v += byte << 8 * i
|
||||
if byte & 0x80 == 0:
|
||||
break
|
||||
return v
|
||||
|
||||
def read_len(self):
|
||||
v = 0
|
||||
for i in range(4):
|
||||
byte = self._fp.read(1)
|
||||
if not byte:
|
||||
return None
|
||||
byte = uint8_t.unpack(byte)[0]
|
||||
v += (byte & 0x7F) << (7 * i)
|
||||
if byte & 0x80 == 0:
|
||||
break
|
||||
return v
|
||||
|
||||
def register_handler(self, recid, handler):
|
||||
self.handlers[recid] = handler
|
||||
|
||||
def next(self):
|
||||
ret = None
|
||||
while ret is None:
|
||||
if self._debug:
|
||||
pos = self._fp.tell()
|
||||
recid = self.read_id()
|
||||
reclen = self.read_len()
|
||||
if recid is None or reclen is None:
|
||||
raise StopIteration
|
||||
recdata = self._fp.read(reclen)
|
||||
with RecordReader(recdata) as reader:
|
||||
ret = (self.handlers.get(recid) or Handler()).read(reader, recid, reclen)
|
||||
if self._debug:
|
||||
print('{:08X} {:04X} {:<6} {} {}'.format(pos, recid, reclen, ' '.join('{:02X}'.format(b) for b in recdata), ret))
|
||||
return (recid, ret)
|
||||
|
||||
def close(self):
|
||||
self._fp.close()
|
||||
31
venv/lib/python3.12/site-packages/pyxlsb/stringtable.py
Normal file
31
venv/lib/python3.12/site-packages/pyxlsb/stringtable.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from . import biff12
|
||||
from .reader import BIFF12Reader
|
||||
|
||||
class StringTable(object):
|
||||
def __init__(self, fp):
|
||||
super(StringTable, self).__init__()
|
||||
self._reader = BIFF12Reader(fp=fp)
|
||||
self._strings = []
|
||||
self._parse()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.close()
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._strings[key]
|
||||
|
||||
def _parse(self):
|
||||
for item in self._reader:
|
||||
if item[0] == biff12.SI:
|
||||
self._strings.append(item[1].t)
|
||||
elif item[0] == biff12.SST_END:
|
||||
break
|
||||
|
||||
def get_string(self, idx):
|
||||
return self._strings[idx]
|
||||
|
||||
def close(self):
|
||||
self._reader.close()
|
||||
88
venv/lib/python3.12/site-packages/pyxlsb/workbook.py
Normal file
88
venv/lib/python3.12/site-packages/pyxlsb/workbook.py
Normal file
@@ -0,0 +1,88 @@
|
||||
import os
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from . import biff12
|
||||
from .reader import BIFF12Reader
|
||||
from .stringtable import StringTable
|
||||
from .worksheet import Worksheet
|
||||
from tempfile import TemporaryFile
|
||||
|
||||
if sys.version_info > (3,):
|
||||
basestring = (str, bytes)
|
||||
|
||||
class Workbook(object):
|
||||
def __init__(self, fp, debug=False):
|
||||
super(Workbook, self).__init__()
|
||||
self._zf = fp
|
||||
self._debug = debug
|
||||
self._sheets = []
|
||||
self.stringtable = None
|
||||
self._parse()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.close()
|
||||
|
||||
@property
|
||||
def sheets(self):
|
||||
return [v[0] for v in self._sheets]
|
||||
|
||||
def _parse(self):
|
||||
rels = {}
|
||||
with self._zf.open('xl/_rels/workbook.bin.rels', 'r') as zf:
|
||||
for el in ET.parse(zf).getroot():
|
||||
rels[el.attrib['Id']] = el.attrib['Target']
|
||||
|
||||
with TemporaryFile() as temp:
|
||||
with self._zf.open('xl/workbook.bin', 'r') as zf:
|
||||
temp.write(zf.read())
|
||||
temp.seek(0, os.SEEK_SET)
|
||||
reader = BIFF12Reader(fp=temp, debug=self._debug)
|
||||
for item in reader:
|
||||
if item[0] == biff12.SHEET:
|
||||
self._sheets.append((item[1].name, rels[item[1].rId]))
|
||||
elif item[0] == biff12.SHEETS_END:
|
||||
break
|
||||
|
||||
try:
|
||||
temp = TemporaryFile()
|
||||
with self._zf.open('xl/sharedStrings.bin', 'r') as zf:
|
||||
temp.write(zf.read())
|
||||
temp.seek(0, os.SEEK_SET)
|
||||
self.stringtable = StringTable(fp=temp)
|
||||
except KeyError:
|
||||
temp.close()
|
||||
except Exception:
|
||||
temp.close()
|
||||
raise
|
||||
|
||||
def get_sheet(self, idx, rels=False):
|
||||
if isinstance(idx, basestring):
|
||||
idx = [s.lower() for s, _ in self._sheets].index(idx.lower()) + 1
|
||||
if idx < 1 or idx > len(self._sheets):
|
||||
raise IndexError('sheet index out of range')
|
||||
|
||||
name = self._sheets[idx - 1][0]
|
||||
target = self._sheets[idx - 1][1].split('/')
|
||||
|
||||
temp = TemporaryFile()
|
||||
with self._zf.open('xl/{}/{}'.format(target[0], target[-1]), 'r') as zf:
|
||||
temp.write(zf.read())
|
||||
temp.seek(0, os.SEEK_SET)
|
||||
|
||||
if rels:
|
||||
rels_temp = TemporaryFile()
|
||||
with self._zf.open('xl/{}/_rels/{}.rels'.format(target[0], target[-1]), 'r') as zf:
|
||||
rels_temp.write(zf.read())
|
||||
rels_temp.seek(0, os.SEEK_SET)
|
||||
else:
|
||||
rels_temp = None
|
||||
|
||||
return Worksheet(name=name, fp=temp, rels_fp=rels_temp, stringtable=self.stringtable, debug=self._debug)
|
||||
|
||||
def close(self):
|
||||
self._zf.close()
|
||||
if self.stringtable is not None:
|
||||
self.stringtable.close()
|
||||
83
venv/lib/python3.12/site-packages/pyxlsb/worksheet.py
Normal file
83
venv/lib/python3.12/site-packages/pyxlsb/worksheet.py
Normal file
@@ -0,0 +1,83 @@
|
||||
import os
|
||||
import sys
|
||||
import xml.etree.ElementTree as ET
|
||||
from . import biff12
|
||||
from .reader import BIFF12Reader
|
||||
from collections import namedtuple
|
||||
|
||||
if sys.version_info > (3,):
|
||||
xrange = range
|
||||
|
||||
Cell = namedtuple('Cell', ['r', 'c', 'v'])
|
||||
|
||||
class Worksheet(object):
|
||||
def __init__(self, name, fp, rels_fp=None, stringtable=None, debug=False):
|
||||
super(Worksheet, self).__init__()
|
||||
self.name = name
|
||||
self._reader = BIFF12Reader(fp=fp, debug=debug)
|
||||
self._rels_fp = rels_fp
|
||||
self._rels = ET.parse(rels_fp).getroot() if rels_fp is not None else None
|
||||
self._stringtable = stringtable
|
||||
self._data_offset = 0
|
||||
self.dimension = None
|
||||
self.cols = []
|
||||
self.rels = {}
|
||||
self.hyperlinks = {}
|
||||
self._parse()
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __exit__(self, type, value, traceback):
|
||||
self.close()
|
||||
|
||||
def __iter__(self):
|
||||
return self.rows()
|
||||
|
||||
def _parse(self):
|
||||
if self._rels is not None:
|
||||
for el in self._rels:
|
||||
self.rels[el.attrib['Id']] = el.attrib['Target']
|
||||
|
||||
for item in self._reader:
|
||||
if item[0] == biff12.DIMENSION:
|
||||
self.dimension = item[1]
|
||||
elif item[0] == biff12.COL:
|
||||
self.cols.append(item[1])
|
||||
elif item[0] == biff12.SHEETDATA:
|
||||
self._data_offset = self._reader.tell()
|
||||
if self._rels is None:
|
||||
break
|
||||
elif item[0] == biff12.HYPERLINK and self._rels is not None:
|
||||
for r in xrange(item[1].h):
|
||||
for c in xrange(item[1].w):
|
||||
self.hyperlinks[item[1].r + r, item[1].c + c] = item[1].rId
|
||||
|
||||
def rows(self, sparse=False):
|
||||
self._reader.seek(self._data_offset, os.SEEK_SET)
|
||||
row_num = -1
|
||||
row = None
|
||||
for item in self._reader:
|
||||
if item[0] == biff12.ROW and item[1].r != row_num:
|
||||
if row is not None:
|
||||
yield row
|
||||
if not sparse:
|
||||
while row_num < item[1].r - 1:
|
||||
row_num += 1
|
||||
yield [Cell(row_num, i, None) for i in xrange(self.dimension.c + self.dimension.w)]
|
||||
row_num = item[1].r
|
||||
row = [Cell(row_num, i, None) for i in xrange(self.dimension.c + self.dimension.w)]
|
||||
elif item[0] >= biff12.BLANK and item[0] <= biff12.FORMULA_BOOLERR:
|
||||
if item[0] == biff12.STRING and self._stringtable is not None:
|
||||
row[item[1].c] = Cell(row_num, item[1].c, self._stringtable[item[1].v])
|
||||
else:
|
||||
row[item[1].c] = Cell(row_num, item[1].c, item[1].v)
|
||||
elif item[0] == biff12.SHEETDATA_END:
|
||||
if row is not None:
|
||||
yield row
|
||||
break
|
||||
|
||||
def close(self):
|
||||
self._reader.close()
|
||||
if self._rels_fp is not None:
|
||||
self._rels_fp.close()
|
||||
Reference in New Issue
Block a user