From 3ddf3a19cfa1c3123a32eba8c5cf714f43727e9f Mon Sep 17 00:00:00 2001 From: iuvbio Date: Wed, 18 Aug 2021 23:30:13 +0200 Subject: [PATCH] reorganize structure --- pandas_ods_reader/VERSION | 1 + pandas_ods_reader/parser.py | 59 ++------------------------------ pandas_ods_reader/parsers/ods.py | 53 ++++++++++++++++++++++++++++ setup.cfg | 30 +++++++++++++++- setup.py | 35 ++----------------- 5 files changed, 87 insertions(+), 91 deletions(-) create mode 100644 pandas_ods_reader/VERSION create mode 100644 pandas_ods_reader/parsers/ods.py diff --git a/pandas_ods_reader/VERSION b/pandas_ods_reader/VERSION new file mode 100644 index 0000000..d169b2f --- /dev/null +++ b/pandas_ods_reader/VERSION @@ -0,0 +1 @@ +0.0.8 diff --git a/pandas_ods_reader/parser.py b/pandas_ods_reader/parser.py index 13522ff..b9d4404 100644 --- a/pandas_ods_reader/parser.py +++ b/pandas_ods_reader/parser.py @@ -1,65 +1,10 @@ """Imports an ods file into a DataFrame object""" -from collections import OrderedDict - import ezodf -import pandas as pd +from .parsers import ods from .tools import sanitize_df -def load_ods(doc, sheet_id, headers=True, columns=None): - # convert the sheet to a pandas.DataFrame - if not isinstance(sheet_id, (int, str)): - raise ValueError("Sheet id has to be either `str` or `int`") - if isinstance(sheet_id, str): - sheets = [sheet.name for sheet in doc.sheets] - if sheet_id not in sheets: - raise KeyError("There is no sheet named {}".format(sheet_id)) - sheet_id = sheets.index(sheet_id) + 1 - sheet = doc.sheets[sheet_id - 1] - df_dict = OrderedDict() - col_index = {} - for i, row in enumerate(sheet.rows()): - # row is a list of cells - if headers and i == 0 and not columns: - # columns as lists in a dictionary - columns = [] - for cell in row: - if cell.value and cell.value not in columns: - columns.append(cell.value) - else: - column_name = cell.value if cell.value else "unnamed" - # add count to column name - idx = 1 - while "{}.{}".format(column_name, idx) in columns: - idx += 1 - columns.append("{}.{}".format(column_name, idx)) - - df_dict = OrderedDict((column, []) for column in columns) - # create index for the column headers - col_index = { - j: column for j, column in enumerate(columns) - } - continue - elif i == 0: - columns = columns if columns else ( - [f"column.{j}" for j in range(len(row))]) - # columns as lists in a dictionary - df_dict = OrderedDict((column, []) for column in columns) - # create index for the column headers - col_index = {j: column for j, column in enumerate(columns)} - if headers: - continue - for j, cell in enumerate(row): - if j < len(col_index): - # use header instead of column index - df_dict[col_index[j]].append(cell.value) - else: - continue - df = pd.DataFrame(df_dict) - return df - - def read_ods(file_or_path, sheet=1, headers=True, columns=None): """ This function reads in the provided ods file and converts it to a @@ -79,5 +24,5 @@ def read_ods(file_or_path, sheet=1, headers=True, columns=None): the ODS file as a pandas DataFrame """ doc = ezodf.opendoc(file_or_path) - df = load_ods(doc, sheet, headers, columns) + df = ods.load_ods(doc, sheet, headers, columns) return sanitize_df(df) diff --git a/pandas_ods_reader/parsers/ods.py b/pandas_ods_reader/parsers/ods.py new file mode 100644 index 0000000..d92ce79 --- /dev/null +++ b/pandas_ods_reader/parsers/ods.py @@ -0,0 +1,53 @@ +from collections import OrderedDict + +import pandas as pd + + +def load_ods(doc, sheet_id, headers=True, columns=None): + # convert the sheet to a pandas.DataFrame + if not isinstance(sheet_id, (int, str)): + raise ValueError("Sheet id has to be either `str` or `int`") + if isinstance(sheet_id, str): + sheets = [sheet.name for sheet in doc.sheets] + if sheet_id not in sheets: + raise KeyError("There is no sheet named {}".format(sheet_id)) + sheet_id = sheets.index(sheet_id) + 1 + sheet = doc.sheets[sheet_id - 1] + df_dict = OrderedDict() + col_index = {} + for i, row in enumerate(sheet.rows()): + # row is a list of cells + if headers and i == 0 and not columns: + # columns as lists in a dictionary + columns = [] + for cell in row: + if cell.value and cell.value not in columns: + columns.append(cell.value) + else: + column_name = cell.value if cell.value else "unnamed" + # add count to column name + idx = 1 + while "{}.{}".format(column_name, idx) in columns: + idx += 1 + columns.append("{}.{}".format(column_name, idx)) + + df_dict = OrderedDict((column, []) for column in columns) + # create index for the column headers + col_index = {j: column for j, column in enumerate(columns)} + continue + elif i == 0: + columns = columns if columns else ([f"column.{j}" for j in range(len(row))]) + # columns as lists in a dictionary + df_dict = OrderedDict((column, []) for column in columns) + # create index for the column headers + col_index = {j: column for j, column in enumerate(columns)} + if headers: + continue + for j, cell in enumerate(row): + if j < len(col_index): + # use header instead of column index + df_dict[col_index[j]].append(cell.value) + else: + continue + df = pd.DataFrame(df_dict) + return df diff --git a/setup.cfg b/setup.cfg index b7e4789..3fadbdc 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,2 +1,30 @@ +[metadata] +name = pandas_ods_reader +version = file: pandas_ods_reader/VERSION +description = Read in an ODS file and return it as a pandas.DataFrame +long_description = file: README.md, LICENSE.txt +long_description_content_type = text/markdown +classifiers = + Development Status :: 2 - Beta + License :: OSI Approved :: MIT License + Programming Language :: Python :: 3 + Topic :: Utilities +keywords = data, io, pandas, ods +url = "http://github.com/iuvbio/pandas_ods_reader" +author = iuvbio +author_email = cryptodemigod@protonmail.com +license = MIT + +[options] +zip_safe = False +packages = find: +install_requires = + ezodf + pandas + lxml + +[options.extras_require] +test = pytest + [aliases] -test=pytest +test = pytest diff --git a/setup.py b/setup.py index 96798f5..b024da8 100644 --- a/setup.py +++ b/setup.py @@ -1,35 +1,4 @@ -from setuptools import setup, find_packages +from setuptools import setup -version = None -with open('pandas_ods_reader/__init__.py') as f: - for line in f.readlines(): - if not line.startswith('__version__'): - continue - version = line.split(' = ')[1].strip()[1:-1] - -with open("README.md", "r") as fh: - long_description = fh.read() - -setup( - name="pandas_ods_reader", - version=version, - description="Read in an ODS file and return it as a pandas.DataFrame", - long_description=long_description, - long_description_content_type="text/markdown", - classifiers=[ - "Development Status :: 2 - Beta", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 3", - "Topic :: Utilities", - ], - keywords="data io pandas ods", - url="http://github.com/iuvbio/pandas_ods_reader", - author="iuvbio", - author_email="cryptodemigod@protonmail.com", - license="MIT", - packages=find_packages(), - zip_safe=False, - install_requires=["ezodf", "pandas", "lxml"], - tests_require=["pytest"] -) +setup()