diff --git a/pandas_ods_reader/__init__.py b/pandas_ods_reader/__init__.py index b682708..89634ee 100644 --- a/pandas_ods_reader/__init__.py +++ b/pandas_ods_reader/__init__.py @@ -1 +1 @@ -from .read_ods import read_ods +from .parser import read_ods diff --git a/pandas_ods_reader/read_ods.py b/pandas_ods_reader/parser.py similarity index 78% rename from pandas_ods_reader/read_ods.py rename to pandas_ods_reader/parser.py index d51e73d..b2f7f27 100644 --- a/pandas_ods_reader/read_ods.py +++ b/pandas_ods_reader/parser.py @@ -2,14 +2,7 @@ import ezodf import pandas as pd - -def ods_info(doc): - print("Spreadsheet contains %d sheet(s)." % len(doc.sheets)) - for sheet in doc.sheets: - print("-"*40) - print(" Sheet name : '%s'" % sheet.name) - print("Size of Sheet : (rows=%d, cols=%d)" % ( - sheet.nrows(), sheet.ncols())) +from .tools import sanitize_df def load_ods(doc, sheet, headers=True, columns=None): @@ -52,25 +45,6 @@ def load_ods(doc, sheet, headers=True, columns=None): return df -def sanitize_df(df): - # Delete empty rows - rows = len(df) - 1 - for i in range(rows): - row = df.iloc[-1] - if row.isnull().all(): - df = df.iloc[:-2] - else: - break - # Delete empty columns - cols = [] - for column in df: - if not df[column].isnull().all(): - cols.append(column) - df = df[cols] - len(df.columns) - return df - - def read_ods(file_or_path, sheet, headers=True, columns=None): """ This function reads in the provided ods file and converts it to a diff --git a/pandas_ods_reader/tools.py b/pandas_ods_reader/tools.py new file mode 100644 index 0000000..ca98694 --- /dev/null +++ b/pandas_ods_reader/tools.py @@ -0,0 +1,31 @@ +"""Provides utility functions for the parser""" + + +def ods_info(doc): + """Prints the number of sheets, their names, and number of rows and columns""" + print("Spreadsheet contains %d sheet(s)." % len(doc.sheets)) + for sheet in doc.sheets: + print("-"*40) + print(" Sheet name : '%s'" % sheet.name) + print("Size of Sheet : (rows=%d, cols=%d)" % ( + sheet.nrows(), sheet.ncols())) + + +def sanitize_df(df): + """Drops empty rows and columns from the DataFrame and returns it""" + # Delete empty rows + rows = len(df) - 1 + for i in range(rows): + row = df.iloc[-1] + if row.isnull().all(): + df = df.iloc[:-2] + else: + break + # Delete empty columns + cols = [] + for column in df: + if not df[column].isnull().all(): + cols.append(column) + df = df[cols] + len(df.columns) + return df diff --git a/setup.py b/setup.py index 26c7798..430dfa0 100644 --- a/setup.py +++ b/setup.py @@ -1,7 +1,7 @@ from setuptools import setup, find_packages -VERSION = "0.0.2" +VERSION = "0.0.3" setup(name="pandas_ods_reader", version=VERSION, @@ -9,7 +9,7 @@ setup(name="pandas_ods_reader", classifiers=[ 'Development Status :: 3 - Alpha', 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Topic :: Utilities', ], keywords='data io pandas ods', @@ -19,5 +19,5 @@ setup(name="pandas_ods_reader", license="MIT", packages=find_packages(), zip_safe=False, - install_requires=["ezodf", "pandas"] + install_requires=["ezodf", "pandas", "lxml"] )