Compare commits

..

No commits in common. "master" and "v0.0.9" have entirely different histories.

22 changed files with 303 additions and 2801 deletions

View File

@ -1,22 +0,0 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
commit-message:
prefix: 💚 ci
include: "scope"
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
commit-message:
prefix: ⬆️ dep-bump
include: "scope"

View File

@ -1,47 +0,0 @@
name: Lint and test
on:
push:
branches: ["**"]
pull_request:
branches: [master]
jobs:
test:
name: Test on ${{ matrix.python_version }}
runs-on: ubuntu-latest
strategy:
matrix:
python_version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
fail-fast: false
steps:
- uses: actions/checkout@v6
- name: Set up Python ${{ matrix.python_version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python_version }}
- name: Install poetry
uses: abatilo/actions-poetry@v4
with:
poetry-version: "2.1.2"
- name: Configure poetry
run: |
poetry config virtualenvs.create true --local
poetry config virtualenvs.in-project true --local
- uses: actions/cache@v4
name: Define a cache for the virtual environment based on the dependencies lock file
with:
path: ./.venv
key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('poetry.lock') }}-venv
- name: Install build dependencies
run: sudo apt install -y libxml2-dev libxslt-dev
- name: Install Dependencies
run: poetry install
- name: Lint with black
run: |
# stop the build if there are Python syntax errors or undefined names
poetry run black --check --diff pandas_ods_reader/ tests/
- name: Test with pytest
run: |
poetry run pytest tests/

12
.gitignore vendored
View File

@ -27,15 +27,3 @@ venv/
# vim config # vim config
.vim/ .vim/
# Coverage
.coverage
# mypy
.mypy_cache/
# pyenv
.python-version
# mise
mise.local.toml

View File

@ -1,5 +0,0 @@
repos:
- repo: https://github.com/commitizen-tools/commitizen
rev: v2.38.0
hooks:
- id: commitizen

3
MANIFEST.in Normal file
View File

@ -0,0 +1,3 @@
include LICENSE.txt
include README.md
include pandas_ods_reader/VERSION

View File

@ -1,13 +1,15 @@
pandas-ods-reader pandas_ods_reader
=== ===
Provides a function to read in a **.ods** or **.fods** file and returns a pandas DataFrame. Provides a function to read in an ODS file and returns a pandas DataFrame.
It uses `ezodf` to read in **.ods** files. Since **.fods** files are essentially xml, `lxml` is used to read them. The correct parser is automatically chosen based on the file's extension. It uses `ezodf` to read in the ods file. If a range is specified in the sheet
to be imported, it seems that `ezodf` imports empty cells as well. Therefore,
completely empty rows and columns are dropped from the DataFrame, before it is
returned. Only trailing empty rows and columns are dropped.
If a range is specified in the sheet to be imported, it seems that `ezodf` imports empty cells as well. Therefore, completely empty rows and columns are dropped from the DataFrame, before it is returned. Only trailing empty rows and columns are dropped. If the ODS file contains duplicated column names, they will be numbered and the
number is appended to the column name in the resulting DataFrame.
If the ODS file contains duplicated column names, they will be numbered and the number is appended to the column name in the resulting DataFrame.
Dependencies Dependencies
--- ---
@ -19,7 +21,7 @@ Dependencies
Installation Installation
--- ---
`pip install pandas-ods-reader` `pip install pandas_ods_reader`
Usage Usage
--- ---
@ -33,7 +35,7 @@ path = "path/to/file.ods"
df = read_ods(path) df = read_ods(path)
# load a sheet based on its index (1 based) # load a sheet based on its index (1 based)
sheet_idx = 2 sheet_idx = 1
df = read_ods(path, sheet_idx) df = read_ods(path, sheet_idx)
# load a sheet based on its name # load a sheet based on its name

View File

@ -0,0 +1 @@
0.0.9

View File

@ -1,11 +1,6 @@
import sys import pkg_resources
from .main import read_ods from .parser import read_ods
if sys.version_info >= (3, 8):
from importlib.metadata import version
else:
from importlib_metadata import version
__version__ = version("pandas-ods-reader") __version__ = pkg_resources.get_distribution("pandas_ods_reader").version

View File

@ -1,99 +0,0 @@
from collections import OrderedDict
from pathlib import Path
from types import ModuleType
from typing import Any, Dict, Iterator, List, Union
import pandas as pd
from .utils import sanitize_df
def get_columns_from_headers(backend: ModuleType, row: Any) -> List[str]:
repeat_until = -1
repeat_value = None
# columns as lists in a dictionary
columns = []
# parse the first row as column names
for k, cell in enumerate(row):
value, n_repeated = backend.get_value(cell)
if n_repeated > 0:
repeat_value = value
repeat_until = n_repeated + k
if not value and k <= repeat_until:
value = repeat_value
if k == repeat_until:
# reset to allow for more than one repeated column
repeat_until = -1
if value and value not in columns:
columns.append(value)
else:
column_name = value if value else "unnamed"
# add count to column name
idx = 1
while f"{column_name}.{idx}" in columns:
idx += 1
columns.append(f"{column_name}.{idx}")
return columns
def get_generic_columns(row: Any) -> List[str]:
return [f"column.{j}" for j in range(len(row))]
def get_columns(backend: ModuleType, row: Any, headers: bool) -> List[str]:
if headers:
return get_columns_from_headers(backend, row)
return get_generic_columns(row)
def parse_data(
backend: ModuleType,
rows: Iterator[List[Any]],
headers: bool,
columns: List[str],
skiprows: int,
) -> pd.DataFrame:
df_dict: OrderedDict[str, Any] = OrderedDict()
col_index: Dict[int, str] = {}
for _ in range(skiprows):
next(rows)
for i, row in enumerate(rows):
# row is a list of cells
if i == 0:
columns = columns or get_columns(backend, row, headers)
df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers
col_index = {j: column for j, column in enumerate(columns)}
if headers:
continue
for j, cell in enumerate(row):
if j < len(col_index):
value, _ = backend.get_value(cell, parsed=True)
# use header instead of column index
df_dict[col_index[j]].append(value)
# make sure all columns are of the same length
max_col_length = max(len(df_dict[col]) for col in df_dict)
for col in df_dict:
col_length = len(df_dict[col])
if col_length < max_col_length:
df_dict[col] += [None] * (max_col_length - col_length)
return pd.DataFrame(df_dict)
def read_data(
backend: ModuleType,
file_or_path: Path,
sheet_id: Union[str, int],
headers: bool,
columns: List[str],
skiprows: int,
) -> pd.DataFrame:
doc = backend.get_doc(file_or_path)
rows = backend.get_rows(doc, sheet_id)
df = parse_data(backend, rows, headers=headers, columns=columns, skiprows=skiprows)
return sanitize_df(df)

View File

@ -1,49 +0,0 @@
"""Imports an ods or fods file into a DataFrame object"""
from pathlib import Path
from typing import Optional, List, Union
import pandas as pd
from .parsers import fods, ods
from . import algo
EXT_MAP = {".ods": ods, ".fods": fods}
def read_ods(
file_or_path: Union[str, Path],
sheet: Union[str, int] = 1,
headers: bool = True,
columns: Optional[List[str]] = None,
skiprows: int = 0,
) -> pd.DataFrame:
"""
Read in the provided ods or .ods file and convert it to `pandas.DataFrame`.
Will detect the filetype based on the file's extension or fall back to
ods.
Args:
file_or_path: The path to the .ods or .fods file.
sheet: If `int`, the 1 based index of the sheet to be read. If `str`, the
name of the sheet to be read.
headers: If `True`, then the first row is treated as the list of column names.
columns: A list of column names to be used as headers.
skiprows: The number of rows to skip before starting to read data.
Returns:
The content of the specified sheet as a DataFrame.
"""
path = file_or_path if isinstance(file_or_path, Path) else Path(file_or_path)
if not path.is_file():
raise FileNotFoundError(f"file {path} does not exist")
backend = EXT_MAP.get(path.suffix, ods)
return algo.read_data(
backend,
path,
sheet,
headers=headers,
columns=columns or [],
skiprows=skiprows,
)

View File

@ -0,0 +1,14 @@
"""Imports an ods or fods file into a DataFrame object"""
from pathlib import Path
from .parsers import fods, ods
EXT_MAP = {".ods": ods, ".fods": fods}
def read_ods(file_or_path, sheet=1, headers=True, columns=None):
loader = EXT_MAP.get(Path(file_or_path).suffix)
if not loader:
raise ValueError("Unknown filetype.")
return loader.read(file_or_path, sheet, headers=headers, columns=columns)

View File

@ -1,7 +1,9 @@
from pathlib import Path from collections import defaultdict
from typing import Iterator, Optional, Tuple, Union
from lxml import etree from lxml import etree
import pandas as pd
from ..tools import sanitize_df
BODY_TAG = "office:body" BODY_TAG = "office:body"
@ -16,16 +18,11 @@ TABLE_CELL_REPEATED_ATTRIB = "number-columns-repeated"
VALUE_TYPE_ATTRIB = "value-type" VALUE_TYPE_ATTRIB = "value-type"
def get_doc(file_or_path: Path) -> etree._ElementTree: def get_sheet(spreadsheet, sheet_id):
return etree.parse(str(file_or_path))
def get_sheet(spreadsheet: etree._Element, sheet_id: Union[str, int]) -> etree._Element:
namespaces = spreadsheet.nsmap namespaces = spreadsheet.nsmap
if isinstance(sheet_id, str): if isinstance(sheet_id, str):
sheet = spreadsheet.find( sheet = spreadsheet.find(
f"{TABLE_TAG}[@table:name='{sheet_id}']", f"{TABLE_TAG}[@table:name='{sheet_id}']", namespaces=namespaces
namespaces=namespaces,
) )
if sheet is None: if sheet is None:
raise KeyError(f"There is no sheet named {sheet_id}.") raise KeyError(f"There is no sheet named {sheet_id}.")
@ -36,39 +33,83 @@ def get_sheet(spreadsheet: etree._Element, sheet_id: Union[str, int]) -> etree._
return tables[sheet_id - 1] return tables[sheet_id - 1]
def get_rows( def parse_columns(cells, headers=True, columns=None):
doc: etree._ElementTree, orig_columns = cells.pop(0) if headers else None
sheet_id: Union[str, int], if columns is None:
) -> Iterator[etree._Element]: if orig_columns:
repeated_val = None
columns = []
repeated_dict = defaultdict(lambda: 0)
for i, col in enumerate(orig_columns):
text = col.find(TABLE_CELL_TEXT_TAG, namespaces=col.nsmap)
if text is not None:
value = text.text
elif text is None and repeated_val:
value = repeated_val
else:
value = "unnamed"
idx = 1
while "{}.{}".format(value, idx) in columns:
idx += 1
value = f"{value}.{idx}"
repeated = col.attrib.get(
f"{{{col.nsmap[TABLE_KEY]}}}{TABLE_CELL_REPEATED_ATTRIB}"
)
if repeated:
repeated_dict[value] += 1
repeated_val = f"{value}.{repeated_dict[value]}"
column = value if value not in columns else f"{value}.{i}"
columns.append(column)
else:
columns = [f"column.{i}" for i in range(len(cells[0]))]
return columns, cells
def parse_value(cell):
text = cell.find(TABLE_CELL_TEXT_TAG, namespaces=cell.nsmap)
is_float = (
cell.attrib.get(f"{{{cell.nsmap[OFFICE_KEY]}}}{VALUE_TYPE_ATTRIB}") == "float"
)
if text is None:
return None
value = text.text
if is_float:
return float(value)
return value
def load_fods(doc, sheet_id, headers=True, columns=None):
if not isinstance(sheet_id, (str, int)): if not isinstance(sheet_id, (str, int)):
raise ValueError("Sheet id has to be either `str` or `int`") raise ValueError("Sheet id has to be either `str` or `int`")
root = doc.getroot() root = doc.getroot()
namespaces = root.nsmap namespaces = root.nsmap
spreadsheet = doc.find(BODY_TAG, namespaces=namespaces).find( # type: ignore spreadsheet = doc.find(BODY_TAG, namespaces=namespaces).find(
SPREADSHEET_TAG, namespaces=namespaces SPREADSHEET_TAG, namespaces=namespaces
) )
sheet = get_sheet(spreadsheet, sheet_id) sheet = get_sheet(spreadsheet, sheet_id)
return sheet.iterfind(TABLE_ROW_TAG, namespaces=namespaces) rows = sheet.findall(TABLE_ROW_TAG, namespaces=namespaces)
allcells = []
for row in rows:
cells = row.findall(TABLE_CELL_TAG, namespaces=namespaces)
allcells.append(cells)
columns, values = parse_columns(allcells, headers, columns)
data = []
for row in values:
rowvalues = [parse_value(cell) for cell in row]
data.append(rowvalues)
final_rows = []
for row in data:
final_row = []
for i in range(len(columns)):
if i < len(row):
final_row.append(row[i])
else:
final_row.append(None)
final_rows.append(final_row)
return pd.DataFrame(final_rows, columns=columns)
def is_float(cell: etree._Element) -> bool: def read(file_or_path, sheet=1, headers=True, columns=None):
return ( doc = etree.parse(str(file_or_path))
cell.attrib.get(f"{{{cell.nsmap[OFFICE_KEY]}}}{VALUE_TYPE_ATTRIB}") == "float" df = load_fods(doc, sheet, headers=headers, columns=columns)
) return sanitize_df(df)
def get_value(
cell: etree._Element,
parsed: bool = False,
) -> Tuple[Optional[Union[str, float]], int]:
text = cell.find(TABLE_CELL_TEXT_TAG, namespaces=cell.nsmap)
if text is None:
return None, 0
value: Union[str, float] = "".join(text.itertext())
if parsed and is_float(cell):
value = float(value)
_n_repeated = cell.attrib.get(
f"{{{cell.nsmap[TABLE_KEY]}}}{TABLE_CELL_REPEATED_ATTRIB}"
)
n_repeated = int(_n_repeated) if _n_repeated is not None else 0
return value, n_repeated

View File

@ -1,28 +1,79 @@
from pathlib import Path from collections import OrderedDict
from typing import Any, Iterator, List, Tuple, Union
import ezodf # type: ignore[import] import ezodf
from ezodf.document import FlatXMLDocument, PackagedDocument # type: ignore[import] import pandas as pd
from ..tools import sanitize_df
def get_doc(file_or_path: Path) -> Union[FlatXMLDocument, PackagedDocument]: def load_ods(doc, sheet_id, headers=True, columns=None):
return ezodf.opendoc(file_or_path) # convert the sheet to a pandas.DataFrame
def get_rows(
doc: Union[FlatXMLDocument, PackagedDocument],
sheet_id: Union[str, int],
) -> Iterator[List[ezodf.Cell]]:
if not isinstance(sheet_id, (int, str)): if not isinstance(sheet_id, (int, str)):
raise ValueError("Sheet id has to be either `str` or `int`") raise ValueError("Sheet id has to be either `str` or `int`")
if isinstance(sheet_id, str): if isinstance(sheet_id, str):
sheets: List[str] = [sheet.name for sheet in doc.sheets] sheets = [sheet.name for sheet in doc.sheets]
if sheet_id not in sheets: if sheet_id not in sheets:
raise KeyError("There is no sheet named {}".format(sheet_id)) raise KeyError("There is no sheet named {}".format(sheet_id))
sheet_id = sheets.index(sheet_id) + 1 sheet_id = sheets.index(sheet_id) + 1
sheet: ezodf.Sheet = doc.sheets[sheet_id - 1] sheet = doc.sheets[sheet_id - 1]
return sheet.rows() df_dict = OrderedDict()
col_index = {}
for i, row in enumerate(sheet.rows()):
# row is a list of cells
if headers and i == 0 and not columns:
# columns as lists in a dictionary
columns = []
for cell in row:
if cell.value and cell.value not in columns:
columns.append(cell.value)
else:
column_name = cell.value if cell.value else "unnamed"
# add count to column name
idx = 1
while "{}.{}".format(column_name, idx) in columns:
idx += 1
columns.append("{}.{}".format(column_name, idx))
df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers
col_index = {j: column for j, column in enumerate(columns)}
continue
elif i == 0:
columns = columns if columns else ([f"column.{j}" for j in range(len(row))])
# columns as lists in a dictionary
df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers
col_index = {j: column for j, column in enumerate(columns)}
if headers:
continue
for j, cell in enumerate(row):
if j < len(col_index):
# use header instead of column index
df_dict[col_index[j]].append(cell.value)
else:
continue
df = pd.DataFrame(df_dict)
return df
def get_value(cell: ezodf.Cell, parsed: bool = False) -> Tuple[Any, int]: def read(file_or_path, sheet=1, headers=True, columns=None):
return cell.value, 0 """
This function reads in the provided ods file and converts it to a
dictionary. The dictionary is converted to a DataFrame. Trailing empty rows
and columns are dropped from the DataFrame, before it is returned.
:param file_or_path: str
the path to the ODS file
:param sheet: int or str, default 1
if int, the 1 based index of the sheet to be read in. If str, the name of
the sheet to be read in
:param header: bool, default True
if True, the first row is read in as headers
:param columns: list, default None
a list of column names to be used as headers
:returns: pandas.DataFrame
the ODS file as a pandas DataFrame
"""
doc = ezodf.opendoc(file_or_path)
df = load_ods(doc, sheet, headers, columns)
return sanitize_df(df)

View File

@ -1,7 +1,5 @@
"""Provides utility functions for the parser""" """Provides utility functions for the parser"""
import pandas as pd
def ods_info(doc): def ods_info(doc):
"""Print the number of sheets, their names, and number of rows and columns""" """Print the number of sheets, their names, and number of rows and columns"""
@ -16,8 +14,8 @@ def ods_info(doc):
) )
def sanitize_df(df: pd.DataFrame) -> pd.DataFrame: def sanitize_df(df):
"""Drop empty rows and columns from the DataFrame and return it.""" """Drop empty rows and columns from the DataFrame and returns it"""
# Delete empty rows # Delete empty rows
for i in df.index.tolist()[-1::-1]: for i in df.index.tolist()[-1::-1]:
if df.iloc[i].isna().all(): if df.iloc[i].isna().all():

1909
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,42 +0,0 @@
[tool.poetry]
name = "pandas-ods-reader"
version = "1.0.2"
description = "Read in .ods and .fods files and return a pandas.DataFrame."
authors = ["iuvbio <iuvbio@users.noreply.github.com>"]
license = "MIT"
readme = "README.md"
repository = "https://github.com/iuvbio/pandas_ods_reader"
keywords = ["data", "io", "pandas", "ods"]
classifiers = [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Topic :: Utilities",
]
[tool.poetry.dependencies]
python = ">=3.9,<4"
ezodf = ">=0.3.2"
lxml = ">=4.9.2"
pandas = ">=2.2.3"
[tool.poetry.group.dev.dependencies]
black = ">=22.10.0"
pytest = ">=7.1.3"
pytest-cov = ">=4.0.0"
mypy = ">=0.991"
flake8 = ">=6.0.0"
pandas-stubs = ">=1.5.2.221213"
types-lxml = ">=2022.11.8"
commitizen = ">=2.38.0"
pre-commit = ">=3.7.1"
[tool.commitizen]
name = "cz_conventional_commits"
tag_format = "v$version"
version_provider = "poetry"
version_files = ["pyproject.toml:version"]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

31
setup.cfg Normal file
View File

@ -0,0 +1,31 @@
[metadata]
name = pandas_ods_reader
version = file: pandas_ods_reader/VERSION
description = Read in an ODS file and return it as a pandas.DataFrame
long_description = file: README.md, LICENSE.txt
long_description_content_type = text/markdown
classifiers =
Development Status :: 2 - Beta
License :: OSI Approved :: MIT License
Programming Language :: Python :: 3
Topic :: Utilities
keywords = data, io, pandas, ods
url = "http://github.com/iuvbio/pandas_ods_reader"
author = iuvbio
author_email = cryptodemigod@protonmail.com
license = MIT
[options]
zip_safe = False
packages = find:
include_package_data = True
install_requires =
ezodf
pandas
lxml
[options.extras_require]
test = pytest
[aliases]
test = pytest

4
setup.py Normal file
View File

@ -0,0 +1,4 @@
from setuptools import setup
setup()

View File

@ -1,423 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<office:document xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" xmlns:css3t="http://www.w3.org/TR/css3-text/" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:rpt="http://openoffice.org/2005/report" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:ooo="http://openoffice.org/2004/office" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:calcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" office:version="1.2" office:mimetype="application/vnd.oasis.opendocument.spreadsheet">
<office:meta><meta:initial-creator>Lukas Jansen</meta:initial-creator><meta:creation-date>2019-01-27T03:31:08.931482632</meta:creation-date><dc:date>2022-10-25T08:45:33.990049580</dc:date><meta:editing-duration>PT2M33S</meta:editing-duration><meta:editing-cycles>2</meta:editing-cycles><meta:generator>LibreOffice/6.4.7.2$Linux_X86_64 LibreOffice_project/40$Build-2</meta:generator><meta:document-statistic meta:table-count="1" meta:cell-count="55" meta:object-count="0"/></office:meta>
<office:settings>
<config:config-item-set config:name="ooo:view-settings">
<config:config-item config:name="VisibleAreaTop" config:type="int">0</config:config-item>
<config:config-item config:name="VisibleAreaLeft" config:type="int">0</config:config-item>
<config:config-item config:name="VisibleAreaWidth" config:type="int">11288</config:config-item>
<config:config-item config:name="VisibleAreaHeight" config:type="int">4967</config:config-item>
<config:config-item-map-indexed config:name="Views">
<config:config-item-map-entry>
<config:config-item config:name="ViewId" config:type="string">view1</config:config-item>
<config:config-item-map-named config:name="Tables">
<config:config-item-map-entry config:name="Sheet1">
<config:config-item config:name="CursorPositionX" config:type="int">5</config:config-item>
<config:config-item config:name="CursorPositionY" config:type="int">2</config:config-item>
<config:config-item config:name="HorizontalSplitMode" config:type="short">0</config:config-item>
<config:config-item config:name="VerticalSplitMode" config:type="short">0</config:config-item>
<config:config-item config:name="HorizontalSplitPosition" config:type="int">0</config:config-item>
<config:config-item config:name="VerticalSplitPosition" config:type="int">0</config:config-item>
<config:config-item config:name="ActiveSplitRange" config:type="short">2</config:config-item>
<config:config-item config:name="PositionLeft" config:type="int">0</config:config-item>
<config:config-item config:name="PositionRight" config:type="int">0</config:config-item>
<config:config-item config:name="PositionTop" config:type="int">0</config:config-item>
<config:config-item config:name="PositionBottom" config:type="int">0</config:config-item>
<config:config-item config:name="ZoomType" config:type="short">0</config:config-item>
<config:config-item config:name="ZoomValue" config:type="int">100</config:config-item>
<config:config-item config:name="PageViewZoomValue" config:type="int">60</config:config-item>
<config:config-item config:name="ShowGrid" config:type="boolean">true</config:config-item>
<config:config-item config:name="AnchoredTextOverflowLegacy" config:type="boolean">false</config:config-item>
</config:config-item-map-entry>
</config:config-item-map-named>
<config:config-item config:name="ActiveTable" config:type="string">Sheet1</config:config-item>
<config:config-item config:name="HorizontalScrollbarWidth" config:type="int">1861</config:config-item>
<config:config-item config:name="ZoomType" config:type="short">0</config:config-item>
<config:config-item config:name="ZoomValue" config:type="int">100</config:config-item>
<config:config-item config:name="PageViewZoomValue" config:type="int">60</config:config-item>
<config:config-item config:name="ShowPageBreakPreview" config:type="boolean">false</config:config-item>
<config:config-item config:name="ShowZeroValues" config:type="boolean">true</config:config-item>
<config:config-item config:name="ShowNotes" config:type="boolean">true</config:config-item>
<config:config-item config:name="ShowGrid" config:type="boolean">true</config:config-item>
<config:config-item config:name="GridColor" config:type="int">12632256</config:config-item>
<config:config-item config:name="ShowPageBreaks" config:type="boolean">true</config:config-item>
<config:config-item config:name="HasColumnRowHeaders" config:type="boolean">true</config:config-item>
<config:config-item config:name="HasSheetTabs" config:type="boolean">true</config:config-item>
<config:config-item config:name="IsOutlineSymbolsSet" config:type="boolean">true</config:config-item>
<config:config-item config:name="IsValueHighlightingEnabled" config:type="boolean">false</config:config-item>
<config:config-item config:name="IsSnapToRaster" config:type="boolean">false</config:config-item>
<config:config-item config:name="RasterIsVisible" config:type="boolean">false</config:config-item>
<config:config-item config:name="RasterResolutionX" config:type="int">1000</config:config-item>
<config:config-item config:name="RasterResolutionY" config:type="int">1000</config:config-item>
<config:config-item config:name="RasterSubdivisionX" config:type="int">1</config:config-item>
<config:config-item config:name="RasterSubdivisionY" config:type="int">1</config:config-item>
<config:config-item config:name="IsRasterAxisSynchronized" config:type="boolean">true</config:config-item>
<config:config-item config:name="AnchoredTextOverflowLegacy" config:type="boolean">false</config:config-item>
</config:config-item-map-entry>
</config:config-item-map-indexed>
</config:config-item-set>
<config:config-item-set config:name="ooo:configuration-settings">
<config:config-item config:name="EmbedComplexScriptFonts" config:type="boolean">true</config:config-item>
<config:config-item config:name="EmbedAsianScriptFonts" config:type="boolean">true</config:config-item>
<config:config-item config:name="EmbedLatinScriptFonts" config:type="boolean">true</config:config-item>
<config:config-item config:name="EmbedOnlyUsedFonts" config:type="boolean">false</config:config-item>
<config:config-item config:name="RasterResolutionY" config:type="int">1000</config:config-item>
<config:config-item config:name="IsOutlineSymbolsSet" config:type="boolean">true</config:config-item>
<config:config-item config:name="RasterSubdivisionY" config:type="int">1</config:config-item>
<config:config-item config:name="GridColor" config:type="int">12632256</config:config-item>
<config:config-item config:name="HasColumnRowHeaders" config:type="boolean">true</config:config-item>
<config:config-item config:name="ShowNotes" config:type="boolean">true</config:config-item>
<config:config-item config:name="HasSheetTabs" config:type="boolean">true</config:config-item>
<config:config-item config:name="PrinterSetup" config:type="base64Binary">kwH+/0dlbmVyaWMgUHJpbnRlcgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAU0dFTlBSVAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAWAAMAtAAAAAAAAAAEAAhSAAAEdAAASm9iRGF0YSAxCnByaW50ZXI9R2VuZXJpYyBQcmludGVyCm9yaWVudGF0aW9uPVBvcnRyYWl0CmNvcGllcz0xCmNvbGxhdGU9ZmFsc2UKbWFyZ2luZGFqdXN0bWVudD0wLDAsMCwwCmNvbG9yZGVwdGg9MjQKcHNsZXZlbD0wCnBkZmRldmljZT0xCmNvbG9yZGV2aWNlPTAKUFBEQ29udGV4RGF0YQpQYWdlU2l6ZTpBNAAAEgBDT01QQVRfRFVQTEVYX01PREUPAER1cGxleE1vZGU6Ok9mZg==</config:config-item>
<config:config-item config:name="RasterResolutionX" config:type="int">1000</config:config-item>
<config:config-item config:name="SyntaxStringRef" config:type="short">7</config:config-item>
<config:config-item config:name="RasterIsVisible" config:type="boolean">false</config:config-item>
<config:config-item config:name="ShowZeroValues" config:type="boolean">true</config:config-item>
<config:config-item config:name="ApplyUserData" config:type="boolean">true</config:config-item>
<config:config-item config:name="RasterSubdivisionX" config:type="int">1</config:config-item>
<config:config-item config:name="IsRasterAxisSynchronized" config:type="boolean">true</config:config-item>
<config:config-item config:name="LoadReadonly" config:type="boolean">false</config:config-item>
<config:config-item config:name="AutoCalculate" config:type="boolean">true</config:config-item>
<config:config-item config:name="EmbedFonts" config:type="boolean">false</config:config-item>
<config:config-item config:name="SaveThumbnail" config:type="boolean">true</config:config-item>
<config:config-item config:name="ShowGrid" config:type="boolean">true</config:config-item>
<config:config-item config:name="PrinterName" config:type="string">Generic Printer</config:config-item>
<config:config-item config:name="PrinterPaperFromSetup" config:type="boolean">false</config:config-item>
<config:config-item config:name="CharacterCompressionType" config:type="short">0</config:config-item>
<config:config-item config:name="LinkUpdateMode" config:type="short">3</config:config-item>
<config:config-item config:name="ShowPageBreaks" config:type="boolean">true</config:config-item>
<config:config-item config:name="SaveVersionOnClose" config:type="boolean">false</config:config-item>
<config:config-item config:name="IsSnapToRaster" config:type="boolean">false</config:config-item>
<config:config-item config:name="IsKernAsianPunctuation" config:type="boolean">false</config:config-item>
<config:config-item config:name="UpdateFromTemplate" config:type="boolean">true</config:config-item>
<config:config-item config:name="IsDocumentShared" config:type="boolean">false</config:config-item>
<config:config-item config:name="AllowPrintJobCancel" config:type="boolean">true</config:config-item>
</config:config-item-set>
</office:settings>
<office:scripts>
<office:script script:language="ooo:Basic">
<ooo:libraries xmlns:ooo="http://openoffice.org/2004/office" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</office:script>
</office:scripts>
<office:font-face-decls>
<style:font-face style:name="Liberation Sans" svg:font-family="&apos;Liberation Sans&apos;" style:font-family-generic="swiss" style:font-pitch="variable"/>
<style:font-face style:name="Lohit Devanagari" svg:font-family="&apos;Lohit Devanagari&apos;" style:font-family-generic="system" style:font-pitch="variable"/>
<style:font-face style:name="Noto Sans CJK SC" svg:font-family="&apos;Noto Sans CJK SC&apos;" style:font-family-generic="system" style:font-pitch="variable"/>
<style:font-face style:name="Noto Sans CJK SC Regular" svg:font-family="&apos;Noto Sans CJK SC Regular&apos;" style:font-family-generic="system" style:font-pitch="variable"/>
</office:font-face-decls>
<office:styles>
<style:default-style style:family="table-cell">
<style:paragraph-properties style:tab-stop-distance="1.25cm"/>
<style:text-properties style:font-name="Liberation Sans" fo:language="en" fo:country="NZ" style:font-name-asian="Noto Sans CJK SC Regular" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Lohit Devanagari" style:language-complex="hi" style:country-complex="IN"/>
</style:default-style>
<number:number-style style:name="N0">
<number:number number:min-integer-digits="1"/>
</number:number-style>
<style:style style:name="Default" style:family="table-cell"/>
<style:style style:name="Heading" style:family="table-cell" style:parent-style-name="Default">
<style:text-properties fo:color="#000000" fo:font-size="24pt" fo:font-style="normal" fo:font-weight="bold"/>
</style:style>
<style:style style:name="Heading_20_1" style:display-name="Heading 1" style:family="table-cell" style:parent-style-name="Heading">
<style:text-properties fo:color="#000000" fo:font-size="18pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Heading_20_2" style:display-name="Heading 2" style:family="table-cell" style:parent-style-name="Heading">
<style:text-properties fo:color="#000000" fo:font-size="12pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Text" style:family="table-cell" style:parent-style-name="Default"/>
<style:style style:name="Note" style:family="table-cell" style:parent-style-name="Text">
<style:table-cell-properties fo:background-color="#ffffcc" style:diagonal-bl-tr="none" style:diagonal-tl-br="none" fo:border="0.74pt solid #808080"/>
<style:text-properties fo:color="#333333" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Footnote" style:family="table-cell" style:parent-style-name="Text">
<style:text-properties fo:color="#808080" fo:font-size="10pt" fo:font-style="italic" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Hyperlink" style:family="table-cell" style:parent-style-name="Text">
<style:text-properties fo:color="#0000ee" fo:font-size="10pt" fo:font-style="normal" style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="#0000ee" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Status" style:family="table-cell" style:parent-style-name="Default"/>
<style:style style:name="Good" style:family="table-cell" style:parent-style-name="Status">
<style:table-cell-properties fo:background-color="#ccffcc"/>
<style:text-properties fo:color="#006600" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Neutral" style:family="table-cell" style:parent-style-name="Status">
<style:table-cell-properties fo:background-color="#ffffcc"/>
<style:text-properties fo:color="#996600" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Bad" style:family="table-cell" style:parent-style-name="Status">
<style:table-cell-properties fo:background-color="#ffcccc"/>
<style:text-properties fo:color="#cc0000" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Warning" style:family="table-cell" style:parent-style-name="Status">
<style:text-properties fo:color="#cc0000" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Error" style:family="table-cell" style:parent-style-name="Status">
<style:table-cell-properties fo:background-color="#cc0000"/>
<style:text-properties fo:color="#ffffff" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="bold"/>
</style:style>
<style:style style:name="Accent" style:family="table-cell" style:parent-style-name="Default">
<style:text-properties fo:color="#000000" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="bold"/>
</style:style>
<style:style style:name="Accent_20_1" style:display-name="Accent 1" style:family="table-cell" style:parent-style-name="Accent">
<style:table-cell-properties fo:background-color="#000000"/>
<style:text-properties fo:color="#ffffff" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Accent_20_2" style:display-name="Accent 2" style:family="table-cell" style:parent-style-name="Accent">
<style:table-cell-properties fo:background-color="#808080"/>
<style:text-properties fo:color="#ffffff" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Accent_20_3" style:display-name="Accent 3" style:family="table-cell" style:parent-style-name="Accent">
<style:table-cell-properties fo:background-color="#dddddd"/>
</style:style>
<style:style style:name="Result" style:family="table-cell" style:parent-style-name="Default">
<style:text-properties fo:color="#000000" fo:font-size="10pt" fo:font-style="italic" style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="#000000" fo:font-weight="bold"/>
</style:style>
</office:styles>
<office:automatic-styles>
<style:style style:name="co1" style:family="table-column">
<style:table-column-properties fo:break-before="auto" style:column-width="2.258cm"/>
</style:style>
<style:style style:name="ro1" style:family="table-row">
<style:table-row-properties style:row-height="0.452cm" fo:break-before="auto" style:use-optimal-row-height="true"/>
</style:style>
<style:style style:name="ta1" style:family="table" style:master-page-name="Default">
<style:table-properties table:display="true" style:writing-mode="lr-tb"/>
</style:style>
<number:number-style style:name="N2">
<number:number number:decimal-places="2" loext:min-decimal-places="2" number:min-integer-digits="1"/>
</number:number-style>
<style:page-layout style:name="pm1">
<style:page-layout-properties style:writing-mode="lr-tb"/>
<style:header-style>
<style:header-footer-properties fo:min-height="0.75cm" fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-bottom="0.25cm"/>
</style:header-style>
<style:footer-style>
<style:header-footer-properties fo:min-height="0.75cm" fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-top="0.25cm"/>
</style:footer-style>
</style:page-layout>
<style:page-layout style:name="pm2">
<style:page-layout-properties style:writing-mode="lr-tb"/>
<style:header-style>
<style:header-footer-properties fo:min-height="0.75cm" fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-bottom="0.25cm" fo:border="2.49pt solid #000000" fo:padding="0.018cm" fo:background-color="#c0c0c0">
<style:background-image/>
</style:header-footer-properties>
</style:header-style>
<style:footer-style>
<style:header-footer-properties fo:min-height="0.75cm" fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-top="0.25cm" fo:border="2.49pt solid #000000" fo:padding="0.018cm" fo:background-color="#c0c0c0">
<style:background-image/>
</style:header-footer-properties>
</style:footer-style>
</style:page-layout>
</office:automatic-styles>
<office:master-styles>
<style:master-page style:name="Default" style:page-layout-name="pm1">
<style:header>
<text:p><text:sheet-name>???</text:sheet-name></text:p>
</style:header>
<style:header-left style:display="false"/>
<style:footer>
<text:p>Page <text:page-number>1</text:page-number></text:p>
</style:footer>
<style:footer-left style:display="false"/>
</style:master-page>
<style:master-page style:name="Report" style:page-layout-name="pm2">
<style:header>
<style:region-left>
<text:p><text:sheet-name>???</text:sheet-name><text:s/>(<text:title>???</text:title>)</text:p>
</style:region-left>
<style:region-right>
<text:p><text:date style:data-style-name="N2" text:date-value="2022-10-25">00/00/0000</text:date>, <text:time style:data-style-name="N2" text:time-value="08:45:14.557684313">00:00:00</text:time></text:p>
</style:region-right>
</style:header>
<style:header-left style:display="false"/>
<style:footer>
<text:p>Page <text:page-number>1</text:page-number><text:s/>/ <text:page-count>99</text:page-count></text:p>
</style:footer>
<style:footer-left style:display="false"/>
</style:master-page>
</office:master-styles>
<office:body>
<office:spreadsheet>
<table:calculation-settings table:automatic-find-labels="false" table:use-regular-expressions="false" table:use-wildcards="true"/>
<table:table table:name="Sheet1" table:style-name="ta1">
<table:table-column table:style-name="co1" table:number-columns-repeated="5" table:default-cell-style-name="Default"/>
<table:table-row table:style-name="ro1">
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>A</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>B</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>C</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>D</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>E</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:number-columns-repeated="5" office:value-type="string" calcext:value-type="string">
<text:p>skip this</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>a</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>b</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>c</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>d</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>e</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="10" calcext:value-type="float">
<text:p>10</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="0" calcext:value-type="float">
<text:p>0</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="58" calcext:value-type="float">
<text:p>58</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="29" calcext:value-type="float">
<text:p>29</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="77" calcext:value-type="float">
<text:p>77</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="5" calcext:value-type="float">
<text:p>5</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="47" calcext:value-type="float">
<text:p>47</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="50" calcext:value-type="float">
<text:p>50</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="99" calcext:value-type="float">
<text:p>99</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="79" calcext:value-type="float">
<text:p>79</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="75" calcext:value-type="float">
<text:p>75</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="25" calcext:value-type="float">
<text:p>25</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="86" calcext:value-type="float">
<text:p>86</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="47" calcext:value-type="float">
<text:p>47</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="65" calcext:value-type="float">
<text:p>65</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="82" calcext:value-type="float">
<text:p>82</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="45" calcext:value-type="float">
<text:p>45</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="88" calcext:value-type="float">
<text:p>88</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="48" calcext:value-type="float">
<text:p>48</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="74" calcext:value-type="float">
<text:p>74</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="72" calcext:value-type="float">
<text:p>72</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="47" calcext:value-type="float">
<text:p>47</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="57" calcext:value-type="float">
<text:p>57</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="82" calcext:value-type="float">
<text:p>82</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="46" calcext:value-type="float">
<text:p>46</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="40" calcext:value-type="float">
<text:p>40</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="54" calcext:value-type="float">
<text:p>54</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="26" calcext:value-type="float">
<text:p>26</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="97" calcext:value-type="float">
<text:p>97</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="1" calcext:value-type="float">
<text:p>1</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="54" calcext:value-type="float">
<text:p>54</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="26" calcext:value-type="float">
<text:p>26</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="99" calcext:value-type="float">
<text:p>99</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="63" calcext:value-type="float">
<text:p>63</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="49" calcext:value-type="float">
<text:p>49</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="87" calcext:value-type="float">
<text:p>87</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="24" calcext:value-type="float">
<text:p>24</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="47" calcext:value-type="float">
<text:p>47</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="87" calcext:value-type="float">
<text:p>87</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="15" calcext:value-type="float">
<text:p>15</text:p>
</table:table-cell>
</table:table-row>
</table:table>
<table:named-expressions/>
</office:spreadsheet>
</office:body>
</office:document>

Binary file not shown.

View File

@ -1,5 +1,4 @@
"""Tests for core read_ods function with different files""" """Tests for core read_ods function with different files"""
from pathlib import Path from pathlib import Path
import pandas as pd import pandas as pd
@ -17,161 +16,132 @@ duplicated_column_names_file = "example_duplicated_column_names.ods"
col_len_file = "example_col_lengths.ods" col_len_file = "example_col_lengths.ods"
missing_header_file = "example_missing_header.ods" missing_header_file = "example_missing_header.ods"
mixed_dtypes_file = "mixed_dtypes.ods" mixed_dtypes_file = "mixed_dtypes.ods"
skiprows_file = "example_skiprows.ods"
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) class TestOdsReader:
def test_header_file_simple(suffix: str) -> None: @pytest.mark.parametrize("suffix", [".ods", ".fods"])
"""Test a simple file with headers.""" def test_header_file_simple(self, suffix):
path = rsc / header_file
df = read_ods(path.with_suffix(suffix))
assert isinstance(df, pd.DataFrame) path = rsc / header_file
assert len(df) == 10 df = read_ods(path.with_suffix(suffix))
assert len(df.columns) == 5
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_with_int(suffix: str) -> None: def test_header_file_with_int(self, suffix):
"""Test referencing a sheet by index."""
path = rsc / header_file
df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame) path = rsc / header_file
assert len(df) == 10 df = read_ods(path.with_suffix(suffix), 1)
assert len(df.columns) == 5
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_with_str(suffix: str) -> None: def test_header_file_with_str(self, suffix):
"""Test referencing a sheet by name."""
path = rsc / header_file
df = read_ods(path.with_suffix(suffix), "Sheet1")
assert isinstance(df, pd.DataFrame) path = rsc / header_file
assert len(df) == 10 df = read_ods(path.with_suffix(suffix), "Sheet1")
assert len(df.columns) == 5
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_with_cols(suffix: str) -> None: def test_header_file_with_cols(self, suffix):
"""Test overwriting haders with column names."""
path = rsc / header_file
columns = ["One", "Two", "Three", "Four", "Five"]
df = read_ods(path.with_suffix(suffix), "Sheet1", columns=columns)
assert list(df.columns) == columns path = rsc / header_file
assert len(df) == 10 columns = ["One", "Two", "Three", "Four", "Five"]
assert len(df.columns) == 5 df = read_ods(path.with_suffix(suffix), "Sheet1", columns=columns)
assert list(df.columns) == columns
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_no_header_file_no_cols(suffix: str) -> None: def test_no_header_file_no_cols(self, suffix):
"""Test autogeneration of headers with no headers and not column names."""
path = rsc / no_header_file
df = read_ods(path.with_suffix(suffix), 1, headers=False)
assert list(df.columns) == [f"column.{i}" for i in range(len(df.columns))] path = rsc / no_header_file
assert len(df) == 10 df = read_ods(path.with_suffix(suffix), 1, headers=False)
assert len(df.columns) == 5
assert list(df.columns) == [f"column.{i}" for i in range(len(df.columns))]
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_no_header_file_with_cols(suffix: str) -> None: def test_no_header_file_with_cols(self, suffix):
"""Test reading a file with no headers and passing column names."""
path = rsc / no_header_file
columns = ["A", "B", "C", "D", "E"]
df = read_ods(path.with_suffix(suffix), 1, headers=False, columns=columns)
assert list(df.columns) == columns path = rsc / no_header_file
assert len(df) == 10 columns = ["A", "B", "C", "D", "E"]
df = read_ods(path.with_suffix(suffix), 1, headers=False, columns=columns)
assert list(df.columns) == columns
assert len(df) == 10
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_duplicated_column_names(suffix: str) -> None: def test_duplicated_column_names(self, suffix):
"""Test numbering duplicate column names."""
path = rsc / duplicated_column_names_file
df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame) path = rsc / duplicated_column_names_file
assert len(df.columns) == 4 df = read_ods(path.with_suffix(suffix), 1)
assert "website.1" in df.columns
assert isinstance(df, pd.DataFrame)
assert len(df.columns) == 4
assert "website.1" in df.columns
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_col_len(suffix: str) -> None: def test_header_file_col_len(self, suffix):
"""Test the correct number of columns is read."""
path = rsc / col_len_file
df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame) path = rsc / col_len_file
assert len(df) == 10 df = read_ods(path.with_suffix(suffix), 1)
assert len(df.columns) == 5
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_wrong_id_type(suffix: str) -> None: def test_wrong_id_type(self, suffix):
"""Verify passing a wrong id type raises an error."""
path = rsc / header_file
with pytest.raises(ValueError) as e_info: path = rsc / header_file
read_ods(path.with_suffix(suffix), 1.0) # type: ignore[arg-type]
assert e_info.match("Sheet id has to be either `str` or `int`")
with pytest.raises(ValueError) as e_info:
read_ods(path.with_suffix(suffix), 1.0)
assert e_info.match("Sheet id has to be either `str` or `int`")
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_non_existent_sheet(suffix: str) -> None: def test_non_existent_sheet(self, suffix):
"""Verify referencing a non-existent sheet raises an error."""
path = rsc / header_file
sheet_name = "No_Sheet"
with pytest.raises(KeyError) as e_info: path = rsc / header_file
read_ods(path.with_suffix(suffix), sheet_name) sheet_name = "No_Sheet"
assert e_info.match(f"There is no sheet named {sheet_name}")
with pytest.raises(KeyError) as e_info:
read_ods(path.with_suffix(suffix), sheet_name)
assert e_info.match(f"There is no sheet named {sheet_name}")
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_missing_header(suffix: str) -> None: def test_missing_header(self, suffix):
"""Verify that a missing header is named."""
path = rsc / missing_header_file
df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame) path = rsc / missing_header_file
assert len(df) == 10 df = read_ods(path.with_suffix(suffix), 1)
assert len(df.columns) == 5
assert df.columns[2] == "unnamed.1" assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
assert df.columns[2] == "unnamed.1"
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) @pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_mixed_dtypes(suffix: str) -> None: def test_mixed_dtypes(self, suffix):
"""Verify loading a df with mixed types."""
path = rsc / mixed_dtypes_file
df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame) path = rsc / mixed_dtypes_file
assert len(df) == 10 df = read_ods(path.with_suffix(suffix), 1)
assert len(df.columns) == 5
type_list = [float, object, float, float, object] assert isinstance(df, pd.DataFrame)
assert df.dtypes.tolist() == type_list assert len(df) == 10
col_b_types = [type(v) for v in df.B.values] assert len(df.columns) == 5
assert str in col_b_types and float in col_b_types
type_list = [float, object, float, float, object]
@pytest.mark.parametrize("suffix", [".ods", ".fods"]) assert df.dtypes.tolist() == type_list
def test_skiprows(suffix: str) -> None: col_b_types = [type(v) for v in df.B.values]
"""Verify skipping rows works correctly.""" assert str in col_b_types and float in col_b_types
path = rsc / skiprows_file
df = read_ods(path.with_suffix(suffix), skiprows=2)
assert isinstance(df, pd.DataFrame)
assert len(df) == 8
assert len(df.columns) == 5
assert df.columns.tolist() == ["a", "b", "c", "d", "e"]
def test_invalid_path() -> None:
"""Verify passing an invalid path raises an error."""
path = rsc / "does-not-exist.ods"
with pytest.raises(FileNotFoundError, match="does not exist"):
read_ods(path)