reorganize structure

2021-08-18 23:30:13 +02:00 · 2021-08-18 23:30:13 +02:00 · 3ddf3a19cf
parent 255698a1e2
commit 3ddf3a19cf
5 changed files with 87 additions and 91 deletions
--- a/pandas_ods_reader/VERSION
+++ b/pandas_ods_reader/VERSION
@ -0,0 +1 @@
+0.0.8
--- a/pandas_ods_reader/parser.py
+++ b/pandas_ods_reader/parser.py
@ -1,65 +1,10 @@
 """Imports an ods file into a DataFrame object"""
-from collections import OrderedDict
-
 import ezodf
-import pandas as pd

+from .parsers import ods
 from .tools import sanitize_df


-def load_ods(doc, sheet_id, headers=True, columns=None):
-    # convert the sheet to a pandas.DataFrame
-    if not isinstance(sheet_id, (int, str)):
-        raise ValueError("Sheet id has to be either `str` or `int`")
-    if isinstance(sheet_id, str):
-        sheets = [sheet.name for sheet in doc.sheets]
-        if sheet_id not in sheets:
-            raise KeyError("There is no sheet named {}".format(sheet_id))
-        sheet_id = sheets.index(sheet_id) + 1
-    sheet = doc.sheets[sheet_id - 1]
-    df_dict = OrderedDict()
-    col_index = {}
-    for i, row in enumerate(sheet.rows()):
-        # row is a list of cells
-        if headers and i == 0 and not columns:
-            # columns as lists in a dictionary
-            columns = []
-            for cell in row:
-                if cell.value and cell.value not in columns:
-                    columns.append(cell.value)
-                else:
-                    column_name = cell.value if cell.value else "unnamed"
-                    # add count to column name
-                    idx = 1
-                    while "{}.{}".format(column_name, idx) in columns:
-                        idx += 1
-                    columns.append("{}.{}".format(column_name, idx))
-
-            df_dict = OrderedDict((column, []) for column in columns)
-            # create index for the column headers
-            col_index = {
-                j: column for j, column in enumerate(columns)
-            }
-            continue
-        elif i == 0:
-            columns = columns if columns else (
-                [f"column.{j}" for j in range(len(row))])
-            # columns as lists in a dictionary
-            df_dict = OrderedDict((column, []) for column in columns)
-            # create index for the column headers
-            col_index = {j: column for j, column in enumerate(columns)}
-            if headers:
-                continue
-        for j, cell in enumerate(row):
-            if j < len(col_index):
-                # use header instead of column index
-                df_dict[col_index[j]].append(cell.value)
-            else:
-                continue
-    df = pd.DataFrame(df_dict)
-    return df
-
-
 def read_ods(file_or_path, sheet=1, headers=True, columns=None):
    """
    This function reads in the provided ods file and converts it to a
@ -79,5 +24,5 @@ def read_ods(file_or_path, sheet=1, headers=True, columns=None):
    the ODS file as a pandas DataFrame
    """
    doc = ezodf.opendoc(file_or_path)
-    df = load_ods(doc, sheet, headers, columns)
+    df = ods.load_ods(doc, sheet, headers, columns)
    return sanitize_df(df)
--- a/pandas_ods_reader/parsers/ods.py
+++ b/pandas_ods_reader/parsers/ods.py
@ -0,0 +1,53 @@
+from collections import OrderedDict
+
+import pandas as pd
+
+
+def load_ods(doc, sheet_id, headers=True, columns=None):
+    # convert the sheet to a pandas.DataFrame
+    if not isinstance(sheet_id, (int, str)):
+        raise ValueError("Sheet id has to be either `str` or `int`")
+    if isinstance(sheet_id, str):
+        sheets = [sheet.name for sheet in doc.sheets]
+        if sheet_id not in sheets:
+            raise KeyError("There is no sheet named {}".format(sheet_id))
+        sheet_id = sheets.index(sheet_id) + 1
+    sheet = doc.sheets[sheet_id - 1]
+    df_dict = OrderedDict()
+    col_index = {}
+    for i, row in enumerate(sheet.rows()):
+        # row is a list of cells
+        if headers and i == 0 and not columns:
+            # columns as lists in a dictionary
+            columns = []
+            for cell in row:
+                if cell.value and cell.value not in columns:
+                    columns.append(cell.value)
+                else:
+                    column_name = cell.value if cell.value else "unnamed"
+                    # add count to column name
+                    idx = 1
+                    while "{}.{}".format(column_name, idx) in columns:
+                        idx += 1
+                    columns.append("{}.{}".format(column_name, idx))
+
+            df_dict = OrderedDict((column, []) for column in columns)
+            # create index for the column headers
+            col_index = {j: column for j, column in enumerate(columns)}
+            continue
+        elif i == 0:
+            columns = columns if columns else ([f"column.{j}" for j in range(len(row))])
+            # columns as lists in a dictionary
+            df_dict = OrderedDict((column, []) for column in columns)
+            # create index for the column headers
+            col_index = {j: column for j, column in enumerate(columns)}
+            if headers:
+                continue
+        for j, cell in enumerate(row):
+            if j < len(col_index):
+                # use header instead of column index
+                df_dict[col_index[j]].append(cell.value)
+            else:
+                continue
+    df = pd.DataFrame(df_dict)
+    return df
--- a/setup.cfg
+++ b/setup.cfg
@ -1,2 +1,30 @@
+[metadata]
+name = pandas_ods_reader
+version = file: pandas_ods_reader/VERSION
+description = Read in an ODS file and return it as a pandas.DataFrame
+long_description = file: README.md, LICENSE.txt
+long_description_content_type = text/markdown
+classifiers = 
+    Development Status :: 2 - Beta
+    License :: OSI Approved :: MIT License
+    Programming Language :: Python :: 3
+    Topic :: Utilities
+keywords = data, io, pandas, ods
+url = "http://github.com/iuvbio/pandas_ods_reader"
+author = iuvbio
+author_email = cryptodemigod@protonmail.com
+license = MIT
+
+[options]
+zip_safe = False
+packages = find:
+install_requires =
+    ezodf
+    pandas
+    lxml
+
+[options.extras_require]
+test = pytest
+
 [aliases]
 test = pytest
--- a/setup.py
+++ b/setup.py
@ -1,35 +1,4 @@
-from setuptools import setup, find_packages
+from setuptools import setup


-version = None
-with open('pandas_ods_reader/__init__.py') as f:
-    for line in f.readlines():
-        if not line.startswith('__version__'):
-            continue
-        version = line.split(' = ')[1].strip()[1:-1]
-
-with open("README.md", "r") as fh:
-    long_description = fh.read()
-
-setup(
-  name="pandas_ods_reader",
-  version=version,
-  description="Read in an ODS file and return it as a pandas.DataFrame",
-  long_description=long_description,
-  long_description_content_type="text/markdown",
-  classifiers=[
-    "Development Status :: 2 - Beta",
-    "License :: OSI Approved :: MIT License",
-    "Programming Language :: Python :: 3",
-    "Topic :: Utilities",
-  ],
-  keywords="data io pandas ods",
-  url="http://github.com/iuvbio/pandas_ods_reader",
-  author="iuvbio",
-  author_email="cryptodemigod@protonmail.com",
-  license="MIT",
-  packages=find_packages(),
-  zip_safe=False,
-  install_requires=["ezodf", "pandas", "lxml"],
-  tests_require=["pytest"]
-)
+setup()