reorganize structure
This commit is contained in:
parent
255698a1e2
commit
3ddf3a19cf
|
|
@ -0,0 +1 @@
|
||||||
|
0.0.8
|
||||||
|
|
@ -1,65 +1,10 @@
|
||||||
"""Imports an ods file into a DataFrame object"""
|
"""Imports an ods file into a DataFrame object"""
|
||||||
from collections import OrderedDict
|
|
||||||
|
|
||||||
import ezodf
|
import ezodf
|
||||||
import pandas as pd
|
|
||||||
|
|
||||||
|
from .parsers import ods
|
||||||
from .tools import sanitize_df
|
from .tools import sanitize_df
|
||||||
|
|
||||||
|
|
||||||
def load_ods(doc, sheet_id, headers=True, columns=None):
|
|
||||||
# convert the sheet to a pandas.DataFrame
|
|
||||||
if not isinstance(sheet_id, (int, str)):
|
|
||||||
raise ValueError("Sheet id has to be either `str` or `int`")
|
|
||||||
if isinstance(sheet_id, str):
|
|
||||||
sheets = [sheet.name for sheet in doc.sheets]
|
|
||||||
if sheet_id not in sheets:
|
|
||||||
raise KeyError("There is no sheet named {}".format(sheet_id))
|
|
||||||
sheet_id = sheets.index(sheet_id) + 1
|
|
||||||
sheet = doc.sheets[sheet_id - 1]
|
|
||||||
df_dict = OrderedDict()
|
|
||||||
col_index = {}
|
|
||||||
for i, row in enumerate(sheet.rows()):
|
|
||||||
# row is a list of cells
|
|
||||||
if headers and i == 0 and not columns:
|
|
||||||
# columns as lists in a dictionary
|
|
||||||
columns = []
|
|
||||||
for cell in row:
|
|
||||||
if cell.value and cell.value not in columns:
|
|
||||||
columns.append(cell.value)
|
|
||||||
else:
|
|
||||||
column_name = cell.value if cell.value else "unnamed"
|
|
||||||
# add count to column name
|
|
||||||
idx = 1
|
|
||||||
while "{}.{}".format(column_name, idx) in columns:
|
|
||||||
idx += 1
|
|
||||||
columns.append("{}.{}".format(column_name, idx))
|
|
||||||
|
|
||||||
df_dict = OrderedDict((column, []) for column in columns)
|
|
||||||
# create index for the column headers
|
|
||||||
col_index = {
|
|
||||||
j: column for j, column in enumerate(columns)
|
|
||||||
}
|
|
||||||
continue
|
|
||||||
elif i == 0:
|
|
||||||
columns = columns if columns else (
|
|
||||||
[f"column.{j}" for j in range(len(row))])
|
|
||||||
# columns as lists in a dictionary
|
|
||||||
df_dict = OrderedDict((column, []) for column in columns)
|
|
||||||
# create index for the column headers
|
|
||||||
col_index = {j: column for j, column in enumerate(columns)}
|
|
||||||
if headers:
|
|
||||||
continue
|
|
||||||
for j, cell in enumerate(row):
|
|
||||||
if j < len(col_index):
|
|
||||||
# use header instead of column index
|
|
||||||
df_dict[col_index[j]].append(cell.value)
|
|
||||||
else:
|
|
||||||
continue
|
|
||||||
df = pd.DataFrame(df_dict)
|
|
||||||
return df
|
|
||||||
|
|
||||||
|
|
||||||
def read_ods(file_or_path, sheet=1, headers=True, columns=None):
|
def read_ods(file_or_path, sheet=1, headers=True, columns=None):
|
||||||
"""
|
"""
|
||||||
This function reads in the provided ods file and converts it to a
|
This function reads in the provided ods file and converts it to a
|
||||||
|
|
@ -79,5 +24,5 @@ def read_ods(file_or_path, sheet=1, headers=True, columns=None):
|
||||||
the ODS file as a pandas DataFrame
|
the ODS file as a pandas DataFrame
|
||||||
"""
|
"""
|
||||||
doc = ezodf.opendoc(file_or_path)
|
doc = ezodf.opendoc(file_or_path)
|
||||||
df = load_ods(doc, sheet, headers, columns)
|
df = ods.load_ods(doc, sheet, headers, columns)
|
||||||
return sanitize_df(df)
|
return sanitize_df(df)
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,53 @@
|
||||||
|
from collections import OrderedDict
|
||||||
|
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
def load_ods(doc, sheet_id, headers=True, columns=None):
|
||||||
|
# convert the sheet to a pandas.DataFrame
|
||||||
|
if not isinstance(sheet_id, (int, str)):
|
||||||
|
raise ValueError("Sheet id has to be either `str` or `int`")
|
||||||
|
if isinstance(sheet_id, str):
|
||||||
|
sheets = [sheet.name for sheet in doc.sheets]
|
||||||
|
if sheet_id not in sheets:
|
||||||
|
raise KeyError("There is no sheet named {}".format(sheet_id))
|
||||||
|
sheet_id = sheets.index(sheet_id) + 1
|
||||||
|
sheet = doc.sheets[sheet_id - 1]
|
||||||
|
df_dict = OrderedDict()
|
||||||
|
col_index = {}
|
||||||
|
for i, row in enumerate(sheet.rows()):
|
||||||
|
# row is a list of cells
|
||||||
|
if headers and i == 0 and not columns:
|
||||||
|
# columns as lists in a dictionary
|
||||||
|
columns = []
|
||||||
|
for cell in row:
|
||||||
|
if cell.value and cell.value not in columns:
|
||||||
|
columns.append(cell.value)
|
||||||
|
else:
|
||||||
|
column_name = cell.value if cell.value else "unnamed"
|
||||||
|
# add count to column name
|
||||||
|
idx = 1
|
||||||
|
while "{}.{}".format(column_name, idx) in columns:
|
||||||
|
idx += 1
|
||||||
|
columns.append("{}.{}".format(column_name, idx))
|
||||||
|
|
||||||
|
df_dict = OrderedDict((column, []) for column in columns)
|
||||||
|
# create index for the column headers
|
||||||
|
col_index = {j: column for j, column in enumerate(columns)}
|
||||||
|
continue
|
||||||
|
elif i == 0:
|
||||||
|
columns = columns if columns else ([f"column.{j}" for j in range(len(row))])
|
||||||
|
# columns as lists in a dictionary
|
||||||
|
df_dict = OrderedDict((column, []) for column in columns)
|
||||||
|
# create index for the column headers
|
||||||
|
col_index = {j: column for j, column in enumerate(columns)}
|
||||||
|
if headers:
|
||||||
|
continue
|
||||||
|
for j, cell in enumerate(row):
|
||||||
|
if j < len(col_index):
|
||||||
|
# use header instead of column index
|
||||||
|
df_dict[col_index[j]].append(cell.value)
|
||||||
|
else:
|
||||||
|
continue
|
||||||
|
df = pd.DataFrame(df_dict)
|
||||||
|
return df
|
||||||
30
setup.cfg
30
setup.cfg
|
|
@ -1,2 +1,30 @@
|
||||||
|
[metadata]
|
||||||
|
name = pandas_ods_reader
|
||||||
|
version = file: pandas_ods_reader/VERSION
|
||||||
|
description = Read in an ODS file and return it as a pandas.DataFrame
|
||||||
|
long_description = file: README.md, LICENSE.txt
|
||||||
|
long_description_content_type = text/markdown
|
||||||
|
classifiers =
|
||||||
|
Development Status :: 2 - Beta
|
||||||
|
License :: OSI Approved :: MIT License
|
||||||
|
Programming Language :: Python :: 3
|
||||||
|
Topic :: Utilities
|
||||||
|
keywords = data, io, pandas, ods
|
||||||
|
url = "http://github.com/iuvbio/pandas_ods_reader"
|
||||||
|
author = iuvbio
|
||||||
|
author_email = cryptodemigod@protonmail.com
|
||||||
|
license = MIT
|
||||||
|
|
||||||
|
[options]
|
||||||
|
zip_safe = False
|
||||||
|
packages = find:
|
||||||
|
install_requires =
|
||||||
|
ezodf
|
||||||
|
pandas
|
||||||
|
lxml
|
||||||
|
|
||||||
|
[options.extras_require]
|
||||||
|
test = pytest
|
||||||
|
|
||||||
[aliases]
|
[aliases]
|
||||||
test=pytest
|
test = pytest
|
||||||
|
|
|
||||||
35
setup.py
35
setup.py
|
|
@ -1,35 +1,4 @@
|
||||||
from setuptools import setup, find_packages
|
from setuptools import setup
|
||||||
|
|
||||||
|
|
||||||
version = None
|
setup()
|
||||||
with open('pandas_ods_reader/__init__.py') as f:
|
|
||||||
for line in f.readlines():
|
|
||||||
if not line.startswith('__version__'):
|
|
||||||
continue
|
|
||||||
version = line.split(' = ')[1].strip()[1:-1]
|
|
||||||
|
|
||||||
with open("README.md", "r") as fh:
|
|
||||||
long_description = fh.read()
|
|
||||||
|
|
||||||
setup(
|
|
||||||
name="pandas_ods_reader",
|
|
||||||
version=version,
|
|
||||||
description="Read in an ODS file and return it as a pandas.DataFrame",
|
|
||||||
long_description=long_description,
|
|
||||||
long_description_content_type="text/markdown",
|
|
||||||
classifiers=[
|
|
||||||
"Development Status :: 2 - Beta",
|
|
||||||
"License :: OSI Approved :: MIT License",
|
|
||||||
"Programming Language :: Python :: 3",
|
|
||||||
"Topic :: Utilities",
|
|
||||||
],
|
|
||||||
keywords="data io pandas ods",
|
|
||||||
url="http://github.com/iuvbio/pandas_ods_reader",
|
|
||||||
author="iuvbio",
|
|
||||||
author_email="cryptodemigod@protonmail.com",
|
|
||||||
license="MIT",
|
|
||||||
packages=find_packages(),
|
|
||||||
zip_safe=False,
|
|
||||||
install_requires=["ezodf", "pandas", "lxml"],
|
|
||||||
tests_require=["pytest"]
|
|
||||||
)
|
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue