Merge branch 'master' of https://github.com/iuvbio/pandas_ods_reader
This commit is contained in:
commit
d8d71f610d
|
|
@ -6,16 +6,16 @@ from collections import OrderedDict
|
|||
from .tools import sanitize_df
|
||||
|
||||
|
||||
def load_ods(doc, sheet, headers=True, columns=None):
|
||||
def load_ods(doc, sheet_id, headers=True, columns=None):
|
||||
# convert the sheet to a pandas.DataFrame
|
||||
if isinstance(sheet, int):
|
||||
sheet = doc.sheets[sheet - 1]
|
||||
elif isinstance(sheet, str):
|
||||
if not isinstance(sheet_id, (int, str)):
|
||||
raise ValueError("Sheet id has to be either `str` or `int`")
|
||||
if isinstance(sheet_id, str):
|
||||
sheets = [sheet.name for sheet in doc.sheets]
|
||||
if sheet not in sheets:
|
||||
raise ValueError("There is no sheet named {}".format(sheet))
|
||||
sheet_idx = sheets.index(sheet)
|
||||
sheet = doc.sheets[sheet_idx]
|
||||
if sheet_id not in sheets:
|
||||
raise ValueError("There is no sheet named {}".format(sheet_id))
|
||||
sheet_id = sheets.index(sheet_id) + 1
|
||||
sheet = doc.sheets[sheet_id - 1]
|
||||
df_dict = OrderedDict()
|
||||
col_index = {}
|
||||
for i, row in enumerate(sheet.rows()):
|
||||
|
|
@ -42,7 +42,7 @@ def load_ods(doc, sheet, headers=True, columns=None):
|
|||
continue
|
||||
elif i == 0:
|
||||
columns = columns if columns else (
|
||||
["column_%s" % j for j in range(len(row))])
|
||||
[f"column_{j}" for j in range(len(row))])
|
||||
# columns as lists in a dictionary
|
||||
df_dict = OrderedDict((column, []) for column in columns)
|
||||
# create index for the column headers
|
||||
|
|
@ -62,8 +62,8 @@ def load_ods(doc, sheet, headers=True, columns=None):
|
|||
def read_ods(file_or_path, sheet, headers=True, columns=None):
|
||||
"""
|
||||
This function reads in the provided ods file and converts it to a
|
||||
dictionary. The dictionary is converted to a DataFrame. Empty rows and
|
||||
columns are dropped from the DataFrame, before it is returned.
|
||||
dictionary. The dictionary is converted to a DataFrame. Trailing empty rows
|
||||
and columns are dropped from the DataFrame, before it is returned.
|
||||
|
||||
:param file_or_path: str
|
||||
the path to the ODS file
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -11,6 +11,7 @@ rsc = os.path.join(root, "rsc")
|
|||
header_file = "example_headers.ods"
|
||||
no_header_file = "example_no_headers.ods"
|
||||
duplicated_column_names_file = "example_duplicated_column_names.ods"
|
||||
col_len_file = "example_col_lengths.ods"
|
||||
|
||||
|
||||
class TestOdsReader(object):
|
||||
|
|
@ -19,12 +20,14 @@ class TestOdsReader(object):
|
|||
df = read_ods(path, 1)
|
||||
assert isinstance(df, pd.DataFrame)
|
||||
assert len(df) == 10
|
||||
assert (len(df.columns) == 5)
|
||||
|
||||
def test_header_file_with_str(self):
|
||||
path = os.path.join(rsc, header_file)
|
||||
df = read_ods(path, "Sheet1")
|
||||
assert isinstance(df, pd.DataFrame)
|
||||
assert len(df) == 10
|
||||
assert (len(df.columns) == 5)
|
||||
|
||||
def test_header_file_with_cols(self):
|
||||
path = os.path.join(rsc, header_file)
|
||||
|
|
@ -32,13 +35,15 @@ class TestOdsReader(object):
|
|||
df = read_ods(path, "Sheet1", columns=columns)
|
||||
assert list(df.columns) == columns
|
||||
assert len(df) == 10
|
||||
assert (len(df.columns) == 5)
|
||||
|
||||
def test_no_header_file_no_cols(self):
|
||||
path = os.path.join(rsc, no_header_file)
|
||||
df = read_ods(path, 1, headers=False)
|
||||
assert list(df.columns) == [
|
||||
"column_%s" % i for i in range(len(df.columns))]
|
||||
f"column_{i}" for i in range(len(df.columns))]
|
||||
assert len(df) == 10
|
||||
assert (len(df.columns) == 5)
|
||||
|
||||
def test_no_header_file_with_cols(self):
|
||||
path = os.path.join(rsc, no_header_file)
|
||||
|
|
@ -53,3 +58,10 @@ class TestOdsReader(object):
|
|||
assert isinstance(df, pd.DataFrame)
|
||||
assert len(df.columns) == 4
|
||||
assert "website.1" in df.columns
|
||||
|
||||
def test_header_file_col_len(self):
|
||||
path = os.path.join(rsc, col_len_file)
|
||||
df = read_ods(path, 1)
|
||||
assert isinstance(df, pd.DataFrame)
|
||||
assert len(df) == 10
|
||||
assert (len(df.columns) == 5)
|
||||
|
|
|
|||
Loading…
Reference in New Issue