reorganize structure
This commit is contained in:
parent
255698a1e2
commit
3ddf3a19cf
|
|
@ -0,0 +1 @@
|
|||
0.0.8
|
||||
|
|
@ -1,65 +1,10 @@
|
|||
"""Imports an ods file into a DataFrame object"""
|
||||
from collections import OrderedDict
|
||||
|
||||
import ezodf
|
||||
import pandas as pd
|
||||
|
||||
from .parsers import ods
|
||||
from .tools import sanitize_df
|
||||
|
||||
|
||||
def load_ods(doc, sheet_id, headers=True, columns=None):
|
||||
# convert the sheet to a pandas.DataFrame
|
||||
if not isinstance(sheet_id, (int, str)):
|
||||
raise ValueError("Sheet id has to be either `str` or `int`")
|
||||
if isinstance(sheet_id, str):
|
||||
sheets = [sheet.name for sheet in doc.sheets]
|
||||
if sheet_id not in sheets:
|
||||
raise KeyError("There is no sheet named {}".format(sheet_id))
|
||||
sheet_id = sheets.index(sheet_id) + 1
|
||||
sheet = doc.sheets[sheet_id - 1]
|
||||
df_dict = OrderedDict()
|
||||
col_index = {}
|
||||
for i, row in enumerate(sheet.rows()):
|
||||
# row is a list of cells
|
||||
if headers and i == 0 and not columns:
|
||||
# columns as lists in a dictionary
|
||||
columns = []
|
||||
for cell in row:
|
||||
if cell.value and cell.value not in columns:
|
||||
columns.append(cell.value)
|
||||
else:
|
||||
column_name = cell.value if cell.value else "unnamed"
|
||||
# add count to column name
|
||||
idx = 1
|
||||
while "{}.{}".format(column_name, idx) in columns:
|
||||
idx += 1
|
||||
columns.append("{}.{}".format(column_name, idx))
|
||||
|
||||
df_dict = OrderedDict((column, []) for column in columns)
|
||||
# create index for the column headers
|
||||
col_index = {
|
||||
j: column for j, column in enumerate(columns)
|
||||
}
|
||||
continue
|
||||
elif i == 0:
|
||||
columns = columns if columns else (
|
||||
[f"column.{j}" for j in range(len(row))])
|
||||
# columns as lists in a dictionary
|
||||
df_dict = OrderedDict((column, []) for column in columns)
|
||||
# create index for the column headers
|
||||
col_index = {j: column for j, column in enumerate(columns)}
|
||||
if headers:
|
||||
continue
|
||||
for j, cell in enumerate(row):
|
||||
if j < len(col_index):
|
||||
# use header instead of column index
|
||||
df_dict[col_index[j]].append(cell.value)
|
||||
else:
|
||||
continue
|
||||
df = pd.DataFrame(df_dict)
|
||||
return df
|
||||
|
||||
|
||||
def read_ods(file_or_path, sheet=1, headers=True, columns=None):
|
||||
"""
|
||||
This function reads in the provided ods file and converts it to a
|
||||
|
|
@ -79,5 +24,5 @@ def read_ods(file_or_path, sheet=1, headers=True, columns=None):
|
|||
the ODS file as a pandas DataFrame
|
||||
"""
|
||||
doc = ezodf.opendoc(file_or_path)
|
||||
df = load_ods(doc, sheet, headers, columns)
|
||||
df = ods.load_ods(doc, sheet, headers, columns)
|
||||
return sanitize_df(df)
|
||||
|
|
|
|||
|
|
@ -0,0 +1,53 @@
|
|||
from collections import OrderedDict
|
||||
|
||||
import pandas as pd
|
||||
|
||||
|
||||
def load_ods(doc, sheet_id, headers=True, columns=None):
|
||||
# convert the sheet to a pandas.DataFrame
|
||||
if not isinstance(sheet_id, (int, str)):
|
||||
raise ValueError("Sheet id has to be either `str` or `int`")
|
||||
if isinstance(sheet_id, str):
|
||||
sheets = [sheet.name for sheet in doc.sheets]
|
||||
if sheet_id not in sheets:
|
||||
raise KeyError("There is no sheet named {}".format(sheet_id))
|
||||
sheet_id = sheets.index(sheet_id) + 1
|
||||
sheet = doc.sheets[sheet_id - 1]
|
||||
df_dict = OrderedDict()
|
||||
col_index = {}
|
||||
for i, row in enumerate(sheet.rows()):
|
||||
# row is a list of cells
|
||||
if headers and i == 0 and not columns:
|
||||
# columns as lists in a dictionary
|
||||
columns = []
|
||||
for cell in row:
|
||||
if cell.value and cell.value not in columns:
|
||||
columns.append(cell.value)
|
||||
else:
|
||||
column_name = cell.value if cell.value else "unnamed"
|
||||
# add count to column name
|
||||
idx = 1
|
||||
while "{}.{}".format(column_name, idx) in columns:
|
||||
idx += 1
|
||||
columns.append("{}.{}".format(column_name, idx))
|
||||
|
||||
df_dict = OrderedDict((column, []) for column in columns)
|
||||
# create index for the column headers
|
||||
col_index = {j: column for j, column in enumerate(columns)}
|
||||
continue
|
||||
elif i == 0:
|
||||
columns = columns if columns else ([f"column.{j}" for j in range(len(row))])
|
||||
# columns as lists in a dictionary
|
||||
df_dict = OrderedDict((column, []) for column in columns)
|
||||
# create index for the column headers
|
||||
col_index = {j: column for j, column in enumerate(columns)}
|
||||
if headers:
|
||||
continue
|
||||
for j, cell in enumerate(row):
|
||||
if j < len(col_index):
|
||||
# use header instead of column index
|
||||
df_dict[col_index[j]].append(cell.value)
|
||||
else:
|
||||
continue
|
||||
df = pd.DataFrame(df_dict)
|
||||
return df
|
||||
30
setup.cfg
30
setup.cfg
|
|
@ -1,2 +1,30 @@
|
|||
[metadata]
|
||||
name = pandas_ods_reader
|
||||
version = file: pandas_ods_reader/VERSION
|
||||
description = Read in an ODS file and return it as a pandas.DataFrame
|
||||
long_description = file: README.md, LICENSE.txt
|
||||
long_description_content_type = text/markdown
|
||||
classifiers =
|
||||
Development Status :: 2 - Beta
|
||||
License :: OSI Approved :: MIT License
|
||||
Programming Language :: Python :: 3
|
||||
Topic :: Utilities
|
||||
keywords = data, io, pandas, ods
|
||||
url = "http://github.com/iuvbio/pandas_ods_reader"
|
||||
author = iuvbio
|
||||
author_email = cryptodemigod@protonmail.com
|
||||
license = MIT
|
||||
|
||||
[options]
|
||||
zip_safe = False
|
||||
packages = find:
|
||||
install_requires =
|
||||
ezodf
|
||||
pandas
|
||||
lxml
|
||||
|
||||
[options.extras_require]
|
||||
test = pytest
|
||||
|
||||
[aliases]
|
||||
test=pytest
|
||||
test = pytest
|
||||
|
|
|
|||
35
setup.py
35
setup.py
|
|
@ -1,35 +1,4 @@
|
|||
from setuptools import setup, find_packages
|
||||
from setuptools import setup
|
||||
|
||||
|
||||
version = None
|
||||
with open('pandas_ods_reader/__init__.py') as f:
|
||||
for line in f.readlines():
|
||||
if not line.startswith('__version__'):
|
||||
continue
|
||||
version = line.split(' = ')[1].strip()[1:-1]
|
||||
|
||||
with open("README.md", "r") as fh:
|
||||
long_description = fh.read()
|
||||
|
||||
setup(
|
||||
name="pandas_ods_reader",
|
||||
version=version,
|
||||
description="Read in an ODS file and return it as a pandas.DataFrame",
|
||||
long_description=long_description,
|
||||
long_description_content_type="text/markdown",
|
||||
classifiers=[
|
||||
"Development Status :: 2 - Beta",
|
||||
"License :: OSI Approved :: MIT License",
|
||||
"Programming Language :: Python :: 3",
|
||||
"Topic :: Utilities",
|
||||
],
|
||||
keywords="data io pandas ods",
|
||||
url="http://github.com/iuvbio/pandas_ods_reader",
|
||||
author="iuvbio",
|
||||
author_email="cryptodemigod@protonmail.com",
|
||||
license="MIT",
|
||||
packages=find_packages(),
|
||||
zip_safe=False,
|
||||
install_requires=["ezodf", "pandas", "lxml"],
|
||||
tests_require=["pytest"]
|
||||
)
|
||||
setup()
|
||||
|
|
|
|||
Loading…
Reference in New Issue