Compare commits

..

No commits in common. "master" and "v0.1.1" have entirely different histories.

20 changed files with 230 additions and 2715 deletions

View File

@ -1,22 +0,0 @@
# To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
commit-message:
prefix: 💚 ci
include: "scope"
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "daily"
commit-message:
prefix: ⬆️ dep-bump
include: "scope"

View File

@ -2,46 +2,30 @@ name: Lint and test
on:
push:
branches: ["**"]
branches: [ master ]
pull_request:
branches: [master]
branches: [ master ]
jobs:
test:
name: Test on ${{ matrix.python_version }}
build:
runs-on: ubuntu-latest
strategy:
matrix:
python_version: ["3.9", "3.10", "3.11", "3.12", "3.13"]
fail-fast: false
steps:
- uses: actions/checkout@v6
- name: Set up Python ${{ matrix.python_version }}
uses: actions/setup-python@v6
with:
python-version: ${{ matrix.python_version }}
- name: Install poetry
uses: abatilo/actions-poetry@v4
with:
poetry-version: "2.1.2"
- name: Configure poetry
run: |
poetry config virtualenvs.create true --local
poetry config virtualenvs.in-project true --local
- uses: actions/cache@v4
name: Define a cache for the virtual environment based on the dependencies lock file
with:
path: ./.venv
key: ${{ runner.os }}-python-${{ env.pythonLocation }}-${{ hashFiles('poetry.lock') }}-venv
- name: Install build dependencies
run: sudo apt install -y libxml2-dev libxslt-dev
- name: Install Dependencies
run: poetry install
- name: Lint with black
run: |
# stop the build if there are Python syntax errors or undefined names
poetry run black --check --diff pandas_ods_reader/ tests/
- name: Test with pytest
run: |
poetry run pytest tests/
- uses: actions/checkout@v2
- name: Set up Python 3.9
uses: actions/setup-python@v2
with:
python-version: 3.9
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install black pytest
pip install .
- name: Lint with black
run: |
# stop the build if there are Python syntax errors or undefined names
black --check --diff pandas_ods_reader/ tests/
- name: Test with pytest
run: |
python -m pytest tests/

12
.gitignore vendored
View File

@ -27,15 +27,3 @@ venv/
# vim config
.vim/
# Coverage
.coverage
# mypy
.mypy_cache/
# pyenv
.python-version
# mise
mise.local.toml

View File

@ -1,5 +0,0 @@
repos:
- repo: https://github.com/commitizen-tools/commitizen
rev: v2.38.0
hooks:
- id: commitizen

3
MANIFEST.in Normal file
View File

@ -0,0 +1,3 @@
include LICENSE.txt
include README.md
include pandas_ods_reader/VERSION

View File

@ -0,0 +1 @@
0.1.1

View File

@ -1,11 +1,6 @@
import sys
import importlib.metadata
from .main import read_ods
if sys.version_info >= (3, 8):
from importlib.metadata import version
else:
from importlib_metadata import version
__version__ = version("pandas-ods-reader")
__version__ = importlib.metadata.version("pandas-ods-reader")

View File

@ -1,99 +1,66 @@
from collections import OrderedDict
from pathlib import Path
from types import ModuleType
from typing import Any, Dict, Iterator, List, Union
import pandas as pd
from .utils import sanitize_df
def get_columns_from_headers(backend: ModuleType, row: Any) -> List[str]:
repeat_until = -1
repeat_value = None
# columns as lists in a dictionary
columns = []
# parse the first row as column names
for k, cell in enumerate(row):
value, n_repeated = backend.get_value(cell)
if n_repeated > 0:
repeat_value = value
repeat_until = n_repeated + k
if not value and k <= repeat_until:
value = repeat_value
if k == repeat_until:
# reset to allow for more than one repeated column
repeat_until = -1
if value and value not in columns:
columns.append(value)
else:
column_name = value if value else "unnamed"
# add count to column name
idx = 1
while f"{column_name}.{idx}" in columns:
idx += 1
columns.append(f"{column_name}.{idx}")
return columns
def get_generic_columns(row: Any) -> List[str]:
return [f"column.{j}" for j in range(len(row))]
def get_columns(backend: ModuleType, row: Any, headers: bool) -> List[str]:
if headers:
return get_columns_from_headers(backend, row)
return get_generic_columns(row)
def parse_data(
backend: ModuleType,
rows: Iterator[List[Any]],
headers: bool,
columns: List[str],
skiprows: int,
) -> pd.DataFrame:
df_dict: OrderedDict[str, Any] = OrderedDict()
col_index: Dict[int, str] = {}
for _ in range(skiprows):
next(rows)
def parse_data(backend, rows, headers=True, columns=None):
df_dict = OrderedDict()
col_index = {}
for i, row in enumerate(rows):
# row is a list of cells
if headers and i == 0 and not columns:
repeat_until = -1
repeat_value = None
# columns as lists in a dictionary
columns = []
# parse the first row as column names
for k, cell in enumerate(row):
value, n_repeated = backend.get_value(cell)
if n_repeated > 0:
repeat_value = value
repeat_until = n_repeated + k
if not value and k <= repeat_until:
value = repeat_value
if k == repeat_until:
# reset to allow for more than one repeated column
repeat_until = -1
if value and value not in columns:
columns.append(value)
else:
column_name = value if value else "unnamed"
# add count to column name
idx = 1
while f"{column_name}.{idx}" in columns:
idx += 1
columns.append(f"{column_name}.{idx}")
elif i == 0:
# without headers, assign generic numbered column names
columns = columns if columns else [f"column.{j}" for j in range(len(row))]
if i == 0:
columns = columns or get_columns(backend, row, headers)
df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers
col_index = {j: column for j, column in enumerate(columns)}
if headers:
continue
for j, cell in enumerate(row):
if j < len(col_index):
value, _ = backend.get_value(cell, parsed=True)
# use header instead of column index
df_dict[col_index[j]].append(value)
# make sure all columns are of the same length
max_col_length = max(len(df_dict[col]) for col in df_dict)
for col in df_dict:
col_length = len(df_dict[col])
if col_length < max_col_length:
df_dict[col] += [None] * (max_col_length - col_length)
return pd.DataFrame(df_dict)
df = pd.DataFrame(df_dict)
return df
def read_data(
backend: ModuleType,
file_or_path: Path,
sheet_id: Union[str, int],
headers: bool,
columns: List[str],
skiprows: int,
) -> pd.DataFrame:
def read_data(backend, file_or_path, sheet_id, headers=True, columns=None):
doc = backend.get_doc(file_or_path)
rows = backend.get_rows(doc, sheet_id)
df = parse_data(backend, rows, headers=headers, columns=columns, skiprows=skiprows)
df = parse_data(backend, rows, headers=headers, columns=columns)
return sanitize_df(df)

View File

@ -1,9 +1,5 @@
"""Imports an ods or fods file into a DataFrame object"""
from pathlib import Path
from typing import Optional, List, Union
import pandas as pd
from .parsers import fods, ods
from . import algo
@ -12,38 +8,30 @@ from . import algo
EXT_MAP = {".ods": ods, ".fods": fods}
def read_ods(
file_or_path: Union[str, Path],
sheet: Union[str, int] = 1,
headers: bool = True,
columns: Optional[List[str]] = None,
skiprows: int = 0,
) -> pd.DataFrame:
def read_ods(file_or_path, sheet=1, headers=True, columns=None):
"""
Read in the provided ods or .ods file and convert it to `pandas.DataFrame`.
Will detect the filetype based on the file's extension or fall back to
ods.
Read in the provided ods file and convert it to `pandas.DataFrame`.
Args:
file_or_path: The path to the .ods or .fods file.
sheet: If `int`, the 1 based index of the sheet to be read. If `str`, the
name of the sheet to be read.
headers: If `True`, then the first row is treated as the list of column names.
columns: A list of column names to be used as headers.
skiprows: The number of rows to skip before starting to read data.
Parameters
----------
file_or_path : str
The path to the .ods or .fods file.
sheet : int or str, default 1
If `int`, the 1 based index of the sheet to be read. If `str`, the
name of the sheet to be read.
header : bool, default True
If `True`, then the first row is treated as the list of column names.
columns : list or None, optional
A list of column names to be used as headers.
Returns:
Returns
-------
pandas.DataFrame
The content of the specified sheet as a DataFrame.
"""
path = file_or_path if isinstance(file_or_path, Path) else Path(file_or_path)
if not path.is_file():
raise FileNotFoundError(f"file {path} does not exist")
backend = EXT_MAP.get(path.suffix, ods)
backend = EXT_MAP.get(Path(file_or_path).suffix)
if not backend:
raise ValueError("Unknown filetype.")
return algo.read_data(
backend,
path,
sheet,
headers=headers,
columns=columns or [],
skiprows=skiprows,
backend, file_or_path, sheet, headers=headers, columns=columns
)

View File

@ -1,6 +1,3 @@
from pathlib import Path
from typing import Iterator, Optional, Tuple, Union
from lxml import etree
@ -16,16 +13,15 @@ TABLE_CELL_REPEATED_ATTRIB = "number-columns-repeated"
VALUE_TYPE_ATTRIB = "value-type"
def get_doc(file_or_path: Path) -> etree._ElementTree:
def get_doc(file_or_path):
return etree.parse(str(file_or_path))
def get_sheet(spreadsheet: etree._Element, sheet_id: Union[str, int]) -> etree._Element:
def get_sheet(spreadsheet, sheet_id):
namespaces = spreadsheet.nsmap
if isinstance(sheet_id, str):
sheet = spreadsheet.find(
f"{TABLE_TAG}[@table:name='{sheet_id}']",
namespaces=namespaces,
f"{TABLE_TAG}[@table:name='{sheet_id}']", namespaces=namespaces
)
if sheet is None:
raise KeyError(f"There is no sheet named {sheet_id}.")
@ -36,39 +32,34 @@ def get_sheet(spreadsheet: etree._Element, sheet_id: Union[str, int]) -> etree._
return tables[sheet_id - 1]
def get_rows(
doc: etree._ElementTree,
sheet_id: Union[str, int],
) -> Iterator[etree._Element]:
def get_rows(doc, sheet_id):
if not isinstance(sheet_id, (str, int)):
raise ValueError("Sheet id has to be either `str` or `int`")
root = doc.getroot()
namespaces = root.nsmap
spreadsheet = doc.find(BODY_TAG, namespaces=namespaces).find( # type: ignore
spreadsheet = doc.find(BODY_TAG, namespaces=namespaces).find(
SPREADSHEET_TAG, namespaces=namespaces
)
sheet = get_sheet(spreadsheet, sheet_id)
return sheet.iterfind(TABLE_ROW_TAG, namespaces=namespaces)
rows = sheet.findall(TABLE_ROW_TAG, namespaces=namespaces)
return rows
def is_float(cell: etree._Element) -> bool:
def is_float(cell):
return (
cell.attrib.get(f"{{{cell.nsmap[OFFICE_KEY]}}}{VALUE_TYPE_ATTRIB}") == "float"
)
def get_value(
cell: etree._Element,
parsed: bool = False,
) -> Tuple[Optional[Union[str, float]], int]:
def get_value(cell, parsed=False):
text = cell.find(TABLE_CELL_TEXT_TAG, namespaces=cell.nsmap)
if text is None:
return None, 0
value: Union[str, float] = "".join(text.itertext())
value = text.text
if parsed and is_float(cell):
value = float(value)
_n_repeated = cell.attrib.get(
n_repeated = cell.attrib.get(
f"{{{cell.nsmap[TABLE_KEY]}}}{TABLE_CELL_REPEATED_ATTRIB}"
)
n_repeated = int(_n_repeated) if _n_repeated is not None else 0
n_repeated = int(n_repeated) if n_repeated is not None else 0
return value, n_repeated

View File

@ -1,28 +1,21 @@
from pathlib import Path
from typing import Any, Iterator, List, Tuple, Union
import ezodf # type: ignore[import]
from ezodf.document import FlatXMLDocument, PackagedDocument # type: ignore[import]
import ezodf
def get_doc(file_or_path: Path) -> Union[FlatXMLDocument, PackagedDocument]:
def get_doc(file_or_path):
return ezodf.opendoc(file_or_path)
def get_rows(
doc: Union[FlatXMLDocument, PackagedDocument],
sheet_id: Union[str, int],
) -> Iterator[List[ezodf.Cell]]:
def get_rows(doc, sheet_id):
if not isinstance(sheet_id, (int, str)):
raise ValueError("Sheet id has to be either `str` or `int`")
if isinstance(sheet_id, str):
sheets: List[str] = [sheet.name for sheet in doc.sheets]
sheets = [sheet.name for sheet in doc.sheets]
if sheet_id not in sheets:
raise KeyError("There is no sheet named {}".format(sheet_id))
sheet_id = sheets.index(sheet_id) + 1
sheet: ezodf.Sheet = doc.sheets[sheet_id - 1]
sheet = doc.sheets[sheet_id - 1]
return sheet.rows()
def get_value(cell: ezodf.Cell, parsed: bool = False) -> Tuple[Any, int]:
def get_value(cell, parsed=False):
return cell.value, 0

View File

@ -1,7 +1,5 @@
"""Provides utility functions for the parser"""
import pandas as pd
def ods_info(doc):
"""Print the number of sheets, their names, and number of rows and columns"""
@ -16,8 +14,8 @@ def ods_info(doc):
)
def sanitize_df(df: pd.DataFrame) -> pd.DataFrame:
"""Drop empty rows and columns from the DataFrame and return it."""
def sanitize_df(df):
"""Drop empty rows and columns from the DataFrame and returns it"""
# Delete empty rows
for i in df.index.tolist()[-1::-1]:
if df.iloc[i].isna().all():

1909
poetry.lock generated

File diff suppressed because it is too large Load Diff

View File

@ -1,42 +0,0 @@
[tool.poetry]
name = "pandas-ods-reader"
version = "1.0.2"
description = "Read in .ods and .fods files and return a pandas.DataFrame."
authors = ["iuvbio <iuvbio@users.noreply.github.com>"]
license = "MIT"
readme = "README.md"
repository = "https://github.com/iuvbio/pandas_ods_reader"
keywords = ["data", "io", "pandas", "ods"]
classifiers = [
"Development Status :: 5 - Production/Stable",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Topic :: Utilities",
]
[tool.poetry.dependencies]
python = ">=3.9,<4"
ezodf = ">=0.3.2"
lxml = ">=4.9.2"
pandas = ">=2.2.3"
[tool.poetry.group.dev.dependencies]
black = ">=22.10.0"
pytest = ">=7.1.3"
pytest-cov = ">=4.0.0"
mypy = ">=0.991"
flake8 = ">=6.0.0"
pandas-stubs = ">=1.5.2.221213"
types-lxml = ">=2022.11.8"
commitizen = ">=2.38.0"
pre-commit = ">=3.7.1"
[tool.commitizen]
name = "cz_conventional_commits"
tag_format = "v$version"
version_provider = "poetry"
version_files = ["pyproject.toml:version"]
[build-system]
requires = ["poetry-core>=1.0.0"]
build-backend = "poetry.core.masonry.api"

31
setup.cfg Normal file
View File

@ -0,0 +1,31 @@
[metadata]
name = pandas_ods_reader
version = file: pandas_ods_reader/VERSION
description = Read in an ODS file and return it as a pandas.DataFrame
long_description = file: README.md, LICENSE.txt
long_description_content_type = text/markdown
classifiers =
Development Status :: 5 - Production/Stable
License :: OSI Approved :: MIT License
Programming Language :: Python :: 3
Topic :: Utilities
keywords = data, io, pandas, ods
url = https://github.com/iuvbio/pandas_ods_reader
author = iuvbio
author_email = cryptodemigod@protonmail.com
license = MIT
[options]
zip_safe = False
packages = find:
include_package_data = True
install_requires =
ezodf
lxml
pandas
[options.extras_require]
test = pytest
[aliases]
test = pytest

7
setup.py Normal file
View File

@ -0,0 +1,7 @@
from setuptools import setup
if __name__ == "__main__":
setup(
# see setup.cfg
)

View File

@ -1,423 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<office:document xmlns:presentation="urn:oasis:names:tc:opendocument:xmlns:presentation:1.0" xmlns:css3t="http://www.w3.org/TR/css3-text/" xmlns:grddl="http://www.w3.org/2003/g/data-view#" xmlns:xhtml="http://www.w3.org/1999/xhtml" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsd="http://www.w3.org/2001/XMLSchema" xmlns:chart="urn:oasis:names:tc:opendocument:xmlns:chart:1.0" xmlns:svg="urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" xmlns:draw="urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" xmlns:rpt="http://openoffice.org/2005/report" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:meta="urn:oasis:names:tc:opendocument:xmlns:meta:1.0" xmlns:config="urn:oasis:names:tc:opendocument:xmlns:config:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:of="urn:oasis:names:tc:opendocument:xmlns:of:1.2" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:ooo="http://openoffice.org/2004/office" xmlns:dr3d="urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" xmlns:formx="urn:openoffice:names:experimental:ooxml-odf-interop:xmlns:form:1.0" xmlns:calcext="urn:org:documentfoundation:names:experimental:calc:xmlns:calcext:1.0" xmlns:drawooo="http://openoffice.org/2010/draw" xmlns:field="urn:openoffice:names:experimental:ooo-ms-interop:xmlns:field:1.0" xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:number="urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" xmlns:ooow="http://openoffice.org/2004/writer" xmlns:oooc="http://openoffice.org/2004/calc" xmlns:tableooo="http://openoffice.org/2009/table" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" xmlns:math="http://www.w3.org/1998/Math/MathML" xmlns:form="urn:oasis:names:tc:opendocument:xmlns:form:1.0" xmlns:script="urn:oasis:names:tc:opendocument:xmlns:script:1.0" xmlns:dom="http://www.w3.org/2001/xml-events" xmlns:xforms="http://www.w3.org/2002/xforms" office:version="1.2" office:mimetype="application/vnd.oasis.opendocument.spreadsheet">
<office:meta><meta:initial-creator>Lukas Jansen</meta:initial-creator><meta:creation-date>2019-01-27T03:31:08.931482632</meta:creation-date><dc:date>2022-10-25T08:45:33.990049580</dc:date><meta:editing-duration>PT2M33S</meta:editing-duration><meta:editing-cycles>2</meta:editing-cycles><meta:generator>LibreOffice/6.4.7.2$Linux_X86_64 LibreOffice_project/40$Build-2</meta:generator><meta:document-statistic meta:table-count="1" meta:cell-count="55" meta:object-count="0"/></office:meta>
<office:settings>
<config:config-item-set config:name="ooo:view-settings">
<config:config-item config:name="VisibleAreaTop" config:type="int">0</config:config-item>
<config:config-item config:name="VisibleAreaLeft" config:type="int">0</config:config-item>
<config:config-item config:name="VisibleAreaWidth" config:type="int">11288</config:config-item>
<config:config-item config:name="VisibleAreaHeight" config:type="int">4967</config:config-item>
<config:config-item-map-indexed config:name="Views">
<config:config-item-map-entry>
<config:config-item config:name="ViewId" config:type="string">view1</config:config-item>
<config:config-item-map-named config:name="Tables">
<config:config-item-map-entry config:name="Sheet1">
<config:config-item config:name="CursorPositionX" config:type="int">5</config:config-item>
<config:config-item config:name="CursorPositionY" config:type="int">2</config:config-item>
<config:config-item config:name="HorizontalSplitMode" config:type="short">0</config:config-item>
<config:config-item config:name="VerticalSplitMode" config:type="short">0</config:config-item>
<config:config-item config:name="HorizontalSplitPosition" config:type="int">0</config:config-item>
<config:config-item config:name="VerticalSplitPosition" config:type="int">0</config:config-item>
<config:config-item config:name="ActiveSplitRange" config:type="short">2</config:config-item>
<config:config-item config:name="PositionLeft" config:type="int">0</config:config-item>
<config:config-item config:name="PositionRight" config:type="int">0</config:config-item>
<config:config-item config:name="PositionTop" config:type="int">0</config:config-item>
<config:config-item config:name="PositionBottom" config:type="int">0</config:config-item>
<config:config-item config:name="ZoomType" config:type="short">0</config:config-item>
<config:config-item config:name="ZoomValue" config:type="int">100</config:config-item>
<config:config-item config:name="PageViewZoomValue" config:type="int">60</config:config-item>
<config:config-item config:name="ShowGrid" config:type="boolean">true</config:config-item>
<config:config-item config:name="AnchoredTextOverflowLegacy" config:type="boolean">false</config:config-item>
</config:config-item-map-entry>
</config:config-item-map-named>
<config:config-item config:name="ActiveTable" config:type="string">Sheet1</config:config-item>
<config:config-item config:name="HorizontalScrollbarWidth" config:type="int">1861</config:config-item>
<config:config-item config:name="ZoomType" config:type="short">0</config:config-item>
<config:config-item config:name="ZoomValue" config:type="int">100</config:config-item>
<config:config-item config:name="PageViewZoomValue" config:type="int">60</config:config-item>
<config:config-item config:name="ShowPageBreakPreview" config:type="boolean">false</config:config-item>
<config:config-item config:name="ShowZeroValues" config:type="boolean">true</config:config-item>
<config:config-item config:name="ShowNotes" config:type="boolean">true</config:config-item>
<config:config-item config:name="ShowGrid" config:type="boolean">true</config:config-item>
<config:config-item config:name="GridColor" config:type="int">12632256</config:config-item>
<config:config-item config:name="ShowPageBreaks" config:type="boolean">true</config:config-item>
<config:config-item config:name="HasColumnRowHeaders" config:type="boolean">true</config:config-item>
<config:config-item config:name="HasSheetTabs" config:type="boolean">true</config:config-item>
<config:config-item config:name="IsOutlineSymbolsSet" config:type="boolean">true</config:config-item>
<config:config-item config:name="IsValueHighlightingEnabled" config:type="boolean">false</config:config-item>
<config:config-item config:name="IsSnapToRaster" config:type="boolean">false</config:config-item>
<config:config-item config:name="RasterIsVisible" config:type="boolean">false</config:config-item>
<config:config-item config:name="RasterResolutionX" config:type="int">1000</config:config-item>
<config:config-item config:name="RasterResolutionY" config:type="int">1000</config:config-item>
<config:config-item config:name="RasterSubdivisionX" config:type="int">1</config:config-item>
<config:config-item config:name="RasterSubdivisionY" config:type="int">1</config:config-item>
<config:config-item config:name="IsRasterAxisSynchronized" config:type="boolean">true</config:config-item>
<config:config-item config:name="AnchoredTextOverflowLegacy" config:type="boolean">false</config:config-item>
</config:config-item-map-entry>
</config:config-item-map-indexed>
</config:config-item-set>
<config:config-item-set config:name="ooo:configuration-settings">
<config:config-item config:name="EmbedComplexScriptFonts" config:type="boolean">true</config:config-item>
<config:config-item config:name="EmbedAsianScriptFonts" config:type="boolean">true</config:config-item>
<config:config-item config:name="EmbedLatinScriptFonts" config:type="boolean">true</config:config-item>
<config:config-item config:name="EmbedOnlyUsedFonts" config:type="boolean">false</config:config-item>
<config:config-item config:name="RasterResolutionY" config:type="int">1000</config:config-item>
<config:config-item config:name="IsOutlineSymbolsSet" config:type="boolean">true</config:config-item>
<config:config-item config:name="RasterSubdivisionY" config:type="int">1</config:config-item>
<config:config-item config:name="GridColor" config:type="int">12632256</config:config-item>
<config:config-item config:name="HasColumnRowHeaders" config:type="boolean">true</config:config-item>
<config:config-item config:name="ShowNotes" config:type="boolean">true</config:config-item>
<config:config-item config:name="HasSheetTabs" config:type="boolean">true</config:config-item>
<config:config-item config:name="PrinterSetup" config:type="base64Binary">kwH+/0dlbmVyaWMgUHJpbnRlcgAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAU0dFTlBSVAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAWAAMAtAAAAAAAAAAEAAhSAAAEdAAASm9iRGF0YSAxCnByaW50ZXI9R2VuZXJpYyBQcmludGVyCm9yaWVudGF0aW9uPVBvcnRyYWl0CmNvcGllcz0xCmNvbGxhdGU9ZmFsc2UKbWFyZ2luZGFqdXN0bWVudD0wLDAsMCwwCmNvbG9yZGVwdGg9MjQKcHNsZXZlbD0wCnBkZmRldmljZT0xCmNvbG9yZGV2aWNlPTAKUFBEQ29udGV4RGF0YQpQYWdlU2l6ZTpBNAAAEgBDT01QQVRfRFVQTEVYX01PREUPAER1cGxleE1vZGU6Ok9mZg==</config:config-item>
<config:config-item config:name="RasterResolutionX" config:type="int">1000</config:config-item>
<config:config-item config:name="SyntaxStringRef" config:type="short">7</config:config-item>
<config:config-item config:name="RasterIsVisible" config:type="boolean">false</config:config-item>
<config:config-item config:name="ShowZeroValues" config:type="boolean">true</config:config-item>
<config:config-item config:name="ApplyUserData" config:type="boolean">true</config:config-item>
<config:config-item config:name="RasterSubdivisionX" config:type="int">1</config:config-item>
<config:config-item config:name="IsRasterAxisSynchronized" config:type="boolean">true</config:config-item>
<config:config-item config:name="LoadReadonly" config:type="boolean">false</config:config-item>
<config:config-item config:name="AutoCalculate" config:type="boolean">true</config:config-item>
<config:config-item config:name="EmbedFonts" config:type="boolean">false</config:config-item>
<config:config-item config:name="SaveThumbnail" config:type="boolean">true</config:config-item>
<config:config-item config:name="ShowGrid" config:type="boolean">true</config:config-item>
<config:config-item config:name="PrinterName" config:type="string">Generic Printer</config:config-item>
<config:config-item config:name="PrinterPaperFromSetup" config:type="boolean">false</config:config-item>
<config:config-item config:name="CharacterCompressionType" config:type="short">0</config:config-item>
<config:config-item config:name="LinkUpdateMode" config:type="short">3</config:config-item>
<config:config-item config:name="ShowPageBreaks" config:type="boolean">true</config:config-item>
<config:config-item config:name="SaveVersionOnClose" config:type="boolean">false</config:config-item>
<config:config-item config:name="IsSnapToRaster" config:type="boolean">false</config:config-item>
<config:config-item config:name="IsKernAsianPunctuation" config:type="boolean">false</config:config-item>
<config:config-item config:name="UpdateFromTemplate" config:type="boolean">true</config:config-item>
<config:config-item config:name="IsDocumentShared" config:type="boolean">false</config:config-item>
<config:config-item config:name="AllowPrintJobCancel" config:type="boolean">true</config:config-item>
</config:config-item-set>
</office:settings>
<office:scripts>
<office:script script:language="ooo:Basic">
<ooo:libraries xmlns:ooo="http://openoffice.org/2004/office" xmlns:xlink="http://www.w3.org/1999/xlink"/>
</office:script>
</office:scripts>
<office:font-face-decls>
<style:font-face style:name="Liberation Sans" svg:font-family="&apos;Liberation Sans&apos;" style:font-family-generic="swiss" style:font-pitch="variable"/>
<style:font-face style:name="Lohit Devanagari" svg:font-family="&apos;Lohit Devanagari&apos;" style:font-family-generic="system" style:font-pitch="variable"/>
<style:font-face style:name="Noto Sans CJK SC" svg:font-family="&apos;Noto Sans CJK SC&apos;" style:font-family-generic="system" style:font-pitch="variable"/>
<style:font-face style:name="Noto Sans CJK SC Regular" svg:font-family="&apos;Noto Sans CJK SC Regular&apos;" style:font-family-generic="system" style:font-pitch="variable"/>
</office:font-face-decls>
<office:styles>
<style:default-style style:family="table-cell">
<style:paragraph-properties style:tab-stop-distance="1.25cm"/>
<style:text-properties style:font-name="Liberation Sans" fo:language="en" fo:country="NZ" style:font-name-asian="Noto Sans CJK SC Regular" style:language-asian="zh" style:country-asian="CN" style:font-name-complex="Lohit Devanagari" style:language-complex="hi" style:country-complex="IN"/>
</style:default-style>
<number:number-style style:name="N0">
<number:number number:min-integer-digits="1"/>
</number:number-style>
<style:style style:name="Default" style:family="table-cell"/>
<style:style style:name="Heading" style:family="table-cell" style:parent-style-name="Default">
<style:text-properties fo:color="#000000" fo:font-size="24pt" fo:font-style="normal" fo:font-weight="bold"/>
</style:style>
<style:style style:name="Heading_20_1" style:display-name="Heading 1" style:family="table-cell" style:parent-style-name="Heading">
<style:text-properties fo:color="#000000" fo:font-size="18pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Heading_20_2" style:display-name="Heading 2" style:family="table-cell" style:parent-style-name="Heading">
<style:text-properties fo:color="#000000" fo:font-size="12pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Text" style:family="table-cell" style:parent-style-name="Default"/>
<style:style style:name="Note" style:family="table-cell" style:parent-style-name="Text">
<style:table-cell-properties fo:background-color="#ffffcc" style:diagonal-bl-tr="none" style:diagonal-tl-br="none" fo:border="0.74pt solid #808080"/>
<style:text-properties fo:color="#333333" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Footnote" style:family="table-cell" style:parent-style-name="Text">
<style:text-properties fo:color="#808080" fo:font-size="10pt" fo:font-style="italic" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Hyperlink" style:family="table-cell" style:parent-style-name="Text">
<style:text-properties fo:color="#0000ee" fo:font-size="10pt" fo:font-style="normal" style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="#0000ee" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Status" style:family="table-cell" style:parent-style-name="Default"/>
<style:style style:name="Good" style:family="table-cell" style:parent-style-name="Status">
<style:table-cell-properties fo:background-color="#ccffcc"/>
<style:text-properties fo:color="#006600" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Neutral" style:family="table-cell" style:parent-style-name="Status">
<style:table-cell-properties fo:background-color="#ffffcc"/>
<style:text-properties fo:color="#996600" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Bad" style:family="table-cell" style:parent-style-name="Status">
<style:table-cell-properties fo:background-color="#ffcccc"/>
<style:text-properties fo:color="#cc0000" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Warning" style:family="table-cell" style:parent-style-name="Status">
<style:text-properties fo:color="#cc0000" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Error" style:family="table-cell" style:parent-style-name="Status">
<style:table-cell-properties fo:background-color="#cc0000"/>
<style:text-properties fo:color="#ffffff" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="bold"/>
</style:style>
<style:style style:name="Accent" style:family="table-cell" style:parent-style-name="Default">
<style:text-properties fo:color="#000000" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="bold"/>
</style:style>
<style:style style:name="Accent_20_1" style:display-name="Accent 1" style:family="table-cell" style:parent-style-name="Accent">
<style:table-cell-properties fo:background-color="#000000"/>
<style:text-properties fo:color="#ffffff" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Accent_20_2" style:display-name="Accent 2" style:family="table-cell" style:parent-style-name="Accent">
<style:table-cell-properties fo:background-color="#808080"/>
<style:text-properties fo:color="#ffffff" fo:font-size="10pt" fo:font-style="normal" fo:font-weight="normal"/>
</style:style>
<style:style style:name="Accent_20_3" style:display-name="Accent 3" style:family="table-cell" style:parent-style-name="Accent">
<style:table-cell-properties fo:background-color="#dddddd"/>
</style:style>
<style:style style:name="Result" style:family="table-cell" style:parent-style-name="Default">
<style:text-properties fo:color="#000000" fo:font-size="10pt" fo:font-style="italic" style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="#000000" fo:font-weight="bold"/>
</style:style>
</office:styles>
<office:automatic-styles>
<style:style style:name="co1" style:family="table-column">
<style:table-column-properties fo:break-before="auto" style:column-width="2.258cm"/>
</style:style>
<style:style style:name="ro1" style:family="table-row">
<style:table-row-properties style:row-height="0.452cm" fo:break-before="auto" style:use-optimal-row-height="true"/>
</style:style>
<style:style style:name="ta1" style:family="table" style:master-page-name="Default">
<style:table-properties table:display="true" style:writing-mode="lr-tb"/>
</style:style>
<number:number-style style:name="N2">
<number:number number:decimal-places="2" loext:min-decimal-places="2" number:min-integer-digits="1"/>
</number:number-style>
<style:page-layout style:name="pm1">
<style:page-layout-properties style:writing-mode="lr-tb"/>
<style:header-style>
<style:header-footer-properties fo:min-height="0.75cm" fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-bottom="0.25cm"/>
</style:header-style>
<style:footer-style>
<style:header-footer-properties fo:min-height="0.75cm" fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-top="0.25cm"/>
</style:footer-style>
</style:page-layout>
<style:page-layout style:name="pm2">
<style:page-layout-properties style:writing-mode="lr-tb"/>
<style:header-style>
<style:header-footer-properties fo:min-height="0.75cm" fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-bottom="0.25cm" fo:border="2.49pt solid #000000" fo:padding="0.018cm" fo:background-color="#c0c0c0">
<style:background-image/>
</style:header-footer-properties>
</style:header-style>
<style:footer-style>
<style:header-footer-properties fo:min-height="0.75cm" fo:margin-left="0cm" fo:margin-right="0cm" fo:margin-top="0.25cm" fo:border="2.49pt solid #000000" fo:padding="0.018cm" fo:background-color="#c0c0c0">
<style:background-image/>
</style:header-footer-properties>
</style:footer-style>
</style:page-layout>
</office:automatic-styles>
<office:master-styles>
<style:master-page style:name="Default" style:page-layout-name="pm1">
<style:header>
<text:p><text:sheet-name>???</text:sheet-name></text:p>
</style:header>
<style:header-left style:display="false"/>
<style:footer>
<text:p>Page <text:page-number>1</text:page-number></text:p>
</style:footer>
<style:footer-left style:display="false"/>
</style:master-page>
<style:master-page style:name="Report" style:page-layout-name="pm2">
<style:header>
<style:region-left>
<text:p><text:sheet-name>???</text:sheet-name><text:s/>(<text:title>???</text:title>)</text:p>
</style:region-left>
<style:region-right>
<text:p><text:date style:data-style-name="N2" text:date-value="2022-10-25">00/00/0000</text:date>, <text:time style:data-style-name="N2" text:time-value="08:45:14.557684313">00:00:00</text:time></text:p>
</style:region-right>
</style:header>
<style:header-left style:display="false"/>
<style:footer>
<text:p>Page <text:page-number>1</text:page-number><text:s/>/ <text:page-count>99</text:page-count></text:p>
</style:footer>
<style:footer-left style:display="false"/>
</style:master-page>
</office:master-styles>
<office:body>
<office:spreadsheet>
<table:calculation-settings table:automatic-find-labels="false" table:use-regular-expressions="false" table:use-wildcards="true"/>
<table:table table:name="Sheet1" table:style-name="ta1">
<table:table-column table:style-name="co1" table:number-columns-repeated="5" table:default-cell-style-name="Default"/>
<table:table-row table:style-name="ro1">
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>A</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>B</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>C</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>D</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>E</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:number-columns-repeated="5" office:value-type="string" calcext:value-type="string">
<text:p>skip this</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>a</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>b</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>c</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>d</text:p>
</table:table-cell>
<table:table-cell office:value-type="string" calcext:value-type="string">
<text:p>e</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="10" calcext:value-type="float">
<text:p>10</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="0" calcext:value-type="float">
<text:p>0</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="58" calcext:value-type="float">
<text:p>58</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="29" calcext:value-type="float">
<text:p>29</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="77" calcext:value-type="float">
<text:p>77</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="5" calcext:value-type="float">
<text:p>5</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="47" calcext:value-type="float">
<text:p>47</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="50" calcext:value-type="float">
<text:p>50</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="99" calcext:value-type="float">
<text:p>99</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="79" calcext:value-type="float">
<text:p>79</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="75" calcext:value-type="float">
<text:p>75</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="25" calcext:value-type="float">
<text:p>25</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="86" calcext:value-type="float">
<text:p>86</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="47" calcext:value-type="float">
<text:p>47</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="65" calcext:value-type="float">
<text:p>65</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="82" calcext:value-type="float">
<text:p>82</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="45" calcext:value-type="float">
<text:p>45</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="88" calcext:value-type="float">
<text:p>88</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="48" calcext:value-type="float">
<text:p>48</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="74" calcext:value-type="float">
<text:p>74</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="72" calcext:value-type="float">
<text:p>72</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="47" calcext:value-type="float">
<text:p>47</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="57" calcext:value-type="float">
<text:p>57</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="82" calcext:value-type="float">
<text:p>82</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="46" calcext:value-type="float">
<text:p>46</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="40" calcext:value-type="float">
<text:p>40</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="54" calcext:value-type="float">
<text:p>54</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="26" calcext:value-type="float">
<text:p>26</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="97" calcext:value-type="float">
<text:p>97</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="1" calcext:value-type="float">
<text:p>1</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="54" calcext:value-type="float">
<text:p>54</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="26" calcext:value-type="float">
<text:p>26</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="99" calcext:value-type="float">
<text:p>99</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="63" calcext:value-type="float">
<text:p>63</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="49" calcext:value-type="float">
<text:p>49</text:p>
</table:table-cell>
</table:table-row>
<table:table-row table:style-name="ro1">
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="87" calcext:value-type="float">
<text:p>87</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="24" calcext:value-type="float">
<text:p>24</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="47" calcext:value-type="float">
<text:p>47</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="87" calcext:value-type="float">
<text:p>87</text:p>
</table:table-cell>
<table:table-cell table:formula="of:=RANDBETWEEN(0; 100)" office:value-type="float" office:value="15" calcext:value-type="float">
<text:p>15</text:p>
</table:table-cell>
</table:table-row>
</table:table>
<table:named-expressions/>
</office:spreadsheet>
</office:body>
</office:document>

Binary file not shown.

View File

@ -1,5 +1,4 @@
"""Tests for core read_ods function with different files"""
from pathlib import Path
import pandas as pd
@ -17,161 +16,132 @@ duplicated_column_names_file = "example_duplicated_column_names.ods"
col_len_file = "example_col_lengths.ods"
missing_header_file = "example_missing_header.ods"
mixed_dtypes_file = "mixed_dtypes.ods"
skiprows_file = "example_skiprows.ods"
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_simple(suffix: str) -> None:
"""Test a simple file with headers."""
path = rsc / header_file
df = read_ods(path.with_suffix(suffix))
class TestOdsReader:
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_simple(self, suffix):
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
path = rsc / header_file
df = read_ods(path.with_suffix(suffix))
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_with_int(suffix: str) -> None:
"""Test referencing a sheet by index."""
path = rsc / header_file
df = read_ods(path.with_suffix(suffix), 1)
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_with_int(self, suffix):
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
path = rsc / header_file
df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_with_str(suffix: str) -> None:
"""Test referencing a sheet by name."""
path = rsc / header_file
df = read_ods(path.with_suffix(suffix), "Sheet1")
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_with_str(self, suffix):
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
path = rsc / header_file
df = read_ods(path.with_suffix(suffix), "Sheet1")
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_with_cols(suffix: str) -> None:
"""Test overwriting haders with column names."""
path = rsc / header_file
columns = ["One", "Two", "Three", "Four", "Five"]
df = read_ods(path.with_suffix(suffix), "Sheet1", columns=columns)
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_with_cols(self, suffix):
assert list(df.columns) == columns
assert len(df) == 10
assert len(df.columns) == 5
path = rsc / header_file
columns = ["One", "Two", "Three", "Four", "Five"]
df = read_ods(path.with_suffix(suffix), "Sheet1", columns=columns)
assert list(df.columns) == columns
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_no_header_file_no_cols(suffix: str) -> None:
"""Test autogeneration of headers with no headers and not column names."""
path = rsc / no_header_file
df = read_ods(path.with_suffix(suffix), 1, headers=False)
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_no_header_file_no_cols(self, suffix):
assert list(df.columns) == [f"column.{i}" for i in range(len(df.columns))]
assert len(df) == 10
assert len(df.columns) == 5
path = rsc / no_header_file
df = read_ods(path.with_suffix(suffix), 1, headers=False)
assert list(df.columns) == [f"column.{i}" for i in range(len(df.columns))]
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_no_header_file_with_cols(suffix: str) -> None:
"""Test reading a file with no headers and passing column names."""
path = rsc / no_header_file
columns = ["A", "B", "C", "D", "E"]
df = read_ods(path.with_suffix(suffix), 1, headers=False, columns=columns)
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_no_header_file_with_cols(self, suffix):
assert list(df.columns) == columns
assert len(df) == 10
path = rsc / no_header_file
columns = ["A", "B", "C", "D", "E"]
df = read_ods(path.with_suffix(suffix), 1, headers=False, columns=columns)
assert list(df.columns) == columns
assert len(df) == 10
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_duplicated_column_names(suffix: str) -> None:
"""Test numbering duplicate column names."""
path = rsc / duplicated_column_names_file
df = read_ods(path.with_suffix(suffix), 1)
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_duplicated_column_names(self, suffix):
assert isinstance(df, pd.DataFrame)
assert len(df.columns) == 4
assert "website.1" in df.columns
path = rsc / duplicated_column_names_file
df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame)
assert len(df.columns) == 4
assert "website.1" in df.columns
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_col_len(suffix: str) -> None:
"""Test the correct number of columns is read."""
path = rsc / col_len_file
df = read_ods(path.with_suffix(suffix), 1)
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_header_file_col_len(self, suffix):
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
path = rsc / col_len_file
df = read_ods(path.with_suffix(suffix), 1)
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_wrong_id_type(suffix: str) -> None:
"""Verify passing a wrong id type raises an error."""
path = rsc / header_file
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_wrong_id_type(self, suffix):
with pytest.raises(ValueError) as e_info:
read_ods(path.with_suffix(suffix), 1.0) # type: ignore[arg-type]
assert e_info.match("Sheet id has to be either `str` or `int`")
path = rsc / header_file
with pytest.raises(ValueError) as e_info:
read_ods(path.with_suffix(suffix), 1.0)
assert e_info.match("Sheet id has to be either `str` or `int`")
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_non_existent_sheet(suffix: str) -> None:
"""Verify referencing a non-existent sheet raises an error."""
path = rsc / header_file
sheet_name = "No_Sheet"
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_non_existent_sheet(self, suffix):
with pytest.raises(KeyError) as e_info:
read_ods(path.with_suffix(suffix), sheet_name)
assert e_info.match(f"There is no sheet named {sheet_name}")
path = rsc / header_file
sheet_name = "No_Sheet"
with pytest.raises(KeyError) as e_info:
read_ods(path.with_suffix(suffix), sheet_name)
assert e_info.match(f"There is no sheet named {sheet_name}")
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_missing_header(suffix: str) -> None:
"""Verify that a missing header is named."""
path = rsc / missing_header_file
df = read_ods(path.with_suffix(suffix), 1)
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_missing_header(self, suffix):
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
path = rsc / missing_header_file
df = read_ods(path.with_suffix(suffix), 1)
assert df.columns[2] == "unnamed.1"
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
assert df.columns[2] == "unnamed.1"
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_mixed_dtypes(suffix: str) -> None:
"""Verify loading a df with mixed types."""
path = rsc / mixed_dtypes_file
df = read_ods(path.with_suffix(suffix), 1)
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_mixed_dtypes(self, suffix):
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
path = rsc / mixed_dtypes_file
df = read_ods(path.with_suffix(suffix), 1)
type_list = [float, object, float, float, object]
assert df.dtypes.tolist() == type_list
col_b_types = [type(v) for v in df.B.values]
assert str in col_b_types and float in col_b_types
assert isinstance(df, pd.DataFrame)
assert len(df) == 10
assert len(df.columns) == 5
@pytest.mark.parametrize("suffix", [".ods", ".fods"])
def test_skiprows(suffix: str) -> None:
"""Verify skipping rows works correctly."""
path = rsc / skiprows_file
df = read_ods(path.with_suffix(suffix), skiprows=2)
assert isinstance(df, pd.DataFrame)
assert len(df) == 8
assert len(df.columns) == 5
assert df.columns.tolist() == ["a", "b", "c", "d", "e"]
def test_invalid_path() -> None:
"""Verify passing an invalid path raises an error."""
path = rsc / "does-not-exist.ods"
with pytest.raises(FileNotFoundError, match="does not exist"):
read_ods(path)
type_list = [float, object, float, float, object]
assert df.dtypes.tolist() == type_list
col_b_types = [type(v) for v in df.B.values]
assert str in col_b_types and float in col_b_types