reorganize structure

2021-08-18 23:30:13 +02:00 · 2021-08-18 23:30:13 +02:00 · 3ddf3a19cf
parent 255698a1e2
commit 3ddf3a19cf
5 changed files with 87 additions and 91 deletions
--- a/pandas_ods_reader/VERSION
+++ b/pandas_ods_reader/VERSION
@ -0,0 +1 @@
 0.0.8
--- a/pandas_ods_reader/parser.py
+++ b/pandas_ods_reader/parser.py
@ -1,65 +1,10 @@
 """Imports an ods file into a DataFrame object"""
 from collections import OrderedDict
 import ezodf
 import pandas as pd
 from .parsers import ods
 from .tools import sanitize_df
 def load_ods(doc, sheet_id, headers=True, columns=None):
    # convert the sheet to a pandas.DataFrame
    if not isinstance(sheet_id, (int, str)):
        raise ValueError("Sheet id has to be either `str` or `int`")
    if isinstance(sheet_id, str):
        sheets = [sheet.name for sheet in doc.sheets]
        if sheet_id not in sheets:
            raise KeyError("There is no sheet named {}".format(sheet_id))
        sheet_id = sheets.index(sheet_id) + 1
    sheet = doc.sheets[sheet_id - 1]
    df_dict = OrderedDict()
    col_index = {}
    for i, row in enumerate(sheet.rows()):
        # row is a list of cells
        if headers and i == 0 and not columns:
            # columns as lists in a dictionary
            columns = []
            for cell in row:
                if cell.value and cell.value not in columns:
                    columns.append(cell.value)
                else:
                    column_name = cell.value if cell.value else "unnamed"
                    # add count to column name
                    idx = 1
                    while "{}.{}".format(column_name, idx) in columns:
                        idx += 1
                    columns.append("{}.{}".format(column_name, idx))
            df_dict = OrderedDict((column, []) for column in columns)
            # create index for the column headers
            col_index = {
                j: column for j, column in enumerate(columns)
            }
            continue
        elif i == 0:
            columns = columns if columns else (
                [f"column.{j}" for j in range(len(row))])
            # columns as lists in a dictionary
            df_dict = OrderedDict((column, []) for column in columns)
            # create index for the column headers
            col_index = {j: column for j, column in enumerate(columns)}
            if headers:
                continue
        for j, cell in enumerate(row):
            if j < len(col_index):
                # use header instead of column index
                df_dict[col_index[j]].append(cell.value)
            else:
                continue
    df = pd.DataFrame(df_dict)
    return df
 def read_ods(file_or_path, sheet=1, headers=True, columns=None):
    """
    This function reads in the provided ods file and converts it to a
@ -79,5 +24,5 @@ def read_ods(file_or_path, sheet=1, headers=True, columns=None):
    the ODS file as a pandas DataFrame
    """
    doc = ezodf.opendoc(file_or_path)
-    df = load_ods(doc, sheet, headers, columns)
+    df = ods.load_ods(doc, sheet, headers, columns)
    return sanitize_df(df)
--- a/pandas_ods_reader/parsers/ods.py
+++ b/pandas_ods_reader/parsers/ods.py
@ -0,0 +1,53 @@
 from collections import OrderedDict
 import pandas as pd
 def load_ods(doc, sheet_id, headers=True, columns=None):
    # convert the sheet to a pandas.DataFrame
    if not isinstance(sheet_id, (int, str)):
        raise ValueError("Sheet id has to be either `str` or `int`")
    if isinstance(sheet_id, str):
        sheets = [sheet.name for sheet in doc.sheets]
        if sheet_id not in sheets:
            raise KeyError("There is no sheet named {}".format(sheet_id))
        sheet_id = sheets.index(sheet_id) + 1
    sheet = doc.sheets[sheet_id - 1]
    df_dict = OrderedDict()
    col_index = {}
    for i, row in enumerate(sheet.rows()):
        # row is a list of cells
        if headers and i == 0 and not columns:
            # columns as lists in a dictionary
            columns = []
            for cell in row:
                if cell.value and cell.value not in columns:
                    columns.append(cell.value)
                else:
                    column_name = cell.value if cell.value else "unnamed"
                    # add count to column name
                    idx = 1
                    while "{}.{}".format(column_name, idx) in columns:
                        idx += 1
                    columns.append("{}.{}".format(column_name, idx))
            df_dict = OrderedDict((column, []) for column in columns)
            # create index for the column headers
            col_index = {j: column for j, column in enumerate(columns)}
            continue
        elif i == 0:
            columns = columns if columns else ([f"column.{j}" for j in range(len(row))])
            # columns as lists in a dictionary
            df_dict = OrderedDict((column, []) for column in columns)
            # create index for the column headers
            col_index = {j: column for j, column in enumerate(columns)}
            if headers:
                continue
        for j, cell in enumerate(row):
            if j < len(col_index):
                # use header instead of column index
                df_dict[col_index[j]].append(cell.value)
            else:
                continue
    df = pd.DataFrame(df_dict)
    return df
--- a/setup.cfg
+++ b/setup.cfg
@ -1,2 +1,30 @@
 [metadata]
 name = pandas_ods_reader
 version = file: pandas_ods_reader/VERSION
 description = Read in an ODS file and return it as a pandas.DataFrame
 long_description = file: README.md, LICENSE.txt
 long_description_content_type = text/markdown
 classifiers = 
    Development Status :: 2 - Beta
    License :: OSI Approved :: MIT License
    Programming Language :: Python :: 3
    Topic :: Utilities
 keywords = data, io, pandas, ods
 url = "http://github.com/iuvbio/pandas_ods_reader"
 author = iuvbio
 author_email = cryptodemigod@protonmail.com
 license = MIT
 [options]
 zip_safe = False
 packages = find:
 install_requires =
    ezodf
    pandas
    lxml
 [options.extras_require]
 test = pytest
 [aliases]
-test=pytest
+test = pytest
--- a/setup.py
+++ b/setup.py
@ -1,35 +1,4 @@
-from setuptools import setup, find_packages
+from setuptools import setup
-version = None
+setup()
 with open('pandas_ods_reader/__init__.py') as f:
    for line in f.readlines():
        if not line.startswith('__version__'):
            continue
        version = line.split(' = ')[1].strip()[1:-1]
 with open("README.md", "r") as fh:
    long_description = fh.read()
 setup(
  name="pandas_ods_reader",
  version=version,
  description="Read in an ODS file and return it as a pandas.DataFrame",
  long_description=long_description,
  long_description_content_type="text/markdown",
  classifiers=[
    "Development Status :: 2 - Beta",
    "License :: OSI Approved :: MIT License",
    "Programming Language :: Python :: 3",
    "Topic :: Utilities",
  ],
  keywords="data io pandas ods",
  url="http://github.com/iuvbio/pandas_ods_reader",
  author="iuvbio",
  author_email="cryptodemigod@protonmail.com",
  license="MIT",
  packages=find_packages(),
  zip_safe=False,
  install_requires=["ezodf", "pandas", "lxml"],
  tests_require=["pytest"]
 )