reorganize structure

This commit is contained in:
iuvbio 2021-08-18 23:30:13 +02:00
parent 255698a1e2
commit 3ddf3a19cf
5 changed files with 87 additions and 91 deletions

View File

@ -0,0 +1 @@
0.0.8

View File

@ -1,65 +1,10 @@
"""Imports an ods file into a DataFrame object"""
from collections import OrderedDict
import ezodf
import pandas as pd
from .parsers import ods
from .tools import sanitize_df
def load_ods(doc, sheet_id, headers=True, columns=None):
# convert the sheet to a pandas.DataFrame
if not isinstance(sheet_id, (int, str)):
raise ValueError("Sheet id has to be either `str` or `int`")
if isinstance(sheet_id, str):
sheets = [sheet.name for sheet in doc.sheets]
if sheet_id not in sheets:
raise KeyError("There is no sheet named {}".format(sheet_id))
sheet_id = sheets.index(sheet_id) + 1
sheet = doc.sheets[sheet_id - 1]
df_dict = OrderedDict()
col_index = {}
for i, row in enumerate(sheet.rows()):
# row is a list of cells
if headers and i == 0 and not columns:
# columns as lists in a dictionary
columns = []
for cell in row:
if cell.value and cell.value not in columns:
columns.append(cell.value)
else:
column_name = cell.value if cell.value else "unnamed"
# add count to column name
idx = 1
while "{}.{}".format(column_name, idx) in columns:
idx += 1
columns.append("{}.{}".format(column_name, idx))
df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers
col_index = {
j: column for j, column in enumerate(columns)
}
continue
elif i == 0:
columns = columns if columns else (
[f"column.{j}" for j in range(len(row))])
# columns as lists in a dictionary
df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers
col_index = {j: column for j, column in enumerate(columns)}
if headers:
continue
for j, cell in enumerate(row):
if j < len(col_index):
# use header instead of column index
df_dict[col_index[j]].append(cell.value)
else:
continue
df = pd.DataFrame(df_dict)
return df
def read_ods(file_or_path, sheet=1, headers=True, columns=None):
"""
This function reads in the provided ods file and converts it to a
@ -79,5 +24,5 @@ def read_ods(file_or_path, sheet=1, headers=True, columns=None):
the ODS file as a pandas DataFrame
"""
doc = ezodf.opendoc(file_or_path)
df = load_ods(doc, sheet, headers, columns)
df = ods.load_ods(doc, sheet, headers, columns)
return sanitize_df(df)

View File

@ -0,0 +1,53 @@
from collections import OrderedDict
import pandas as pd
def load_ods(doc, sheet_id, headers=True, columns=None):
# convert the sheet to a pandas.DataFrame
if not isinstance(sheet_id, (int, str)):
raise ValueError("Sheet id has to be either `str` or `int`")
if isinstance(sheet_id, str):
sheets = [sheet.name for sheet in doc.sheets]
if sheet_id not in sheets:
raise KeyError("There is no sheet named {}".format(sheet_id))
sheet_id = sheets.index(sheet_id) + 1
sheet = doc.sheets[sheet_id - 1]
df_dict = OrderedDict()
col_index = {}
for i, row in enumerate(sheet.rows()):
# row is a list of cells
if headers and i == 0 and not columns:
# columns as lists in a dictionary
columns = []
for cell in row:
if cell.value and cell.value not in columns:
columns.append(cell.value)
else:
column_name = cell.value if cell.value else "unnamed"
# add count to column name
idx = 1
while "{}.{}".format(column_name, idx) in columns:
idx += 1
columns.append("{}.{}".format(column_name, idx))
df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers
col_index = {j: column for j, column in enumerate(columns)}
continue
elif i == 0:
columns = columns if columns else ([f"column.{j}" for j in range(len(row))])
# columns as lists in a dictionary
df_dict = OrderedDict((column, []) for column in columns)
# create index for the column headers
col_index = {j: column for j, column in enumerate(columns)}
if headers:
continue
for j, cell in enumerate(row):
if j < len(col_index):
# use header instead of column index
df_dict[col_index[j]].append(cell.value)
else:
continue
df = pd.DataFrame(df_dict)
return df

View File

@ -1,2 +1,30 @@
[metadata]
name = pandas_ods_reader
version = file: pandas_ods_reader/VERSION
description = Read in an ODS file and return it as a pandas.DataFrame
long_description = file: README.md, LICENSE.txt
long_description_content_type = text/markdown
classifiers =
Development Status :: 2 - Beta
License :: OSI Approved :: MIT License
Programming Language :: Python :: 3
Topic :: Utilities
keywords = data, io, pandas, ods
url = "http://github.com/iuvbio/pandas_ods_reader"
author = iuvbio
author_email = cryptodemigod@protonmail.com
license = MIT
[options]
zip_safe = False
packages = find:
install_requires =
ezodf
pandas
lxml
[options.extras_require]
test = pytest
[aliases]
test = pytest

View File

@ -1,35 +1,4 @@
from setuptools import setup, find_packages
from setuptools import setup
version = None
with open('pandas_ods_reader/__init__.py') as f:
for line in f.readlines():
if not line.startswith('__version__'):
continue
version = line.split(' = ')[1].strip()[1:-1]
with open("README.md", "r") as fh:
long_description = fh.read()
setup(
name="pandas_ods_reader",
version=version,
description="Read in an ODS file and return it as a pandas.DataFrame",
long_description=long_description,
long_description_content_type="text/markdown",
classifiers=[
"Development Status :: 2 - Beta",
"License :: OSI Approved :: MIT License",
"Programming Language :: Python :: 3",
"Topic :: Utilities",
],
keywords="data io pandas ods",
url="http://github.com/iuvbio/pandas_ods_reader",
author="iuvbio",
author_email="cryptodemigod@protonmail.com",
license="MIT",
packages=find_packages(),
zip_safe=False,
install_requires=["ezodf", "pandas", "lxml"],
tests_require=["pytest"]
)
setup()