Merge branch 'master' of https://github.com/iuvbio/pandas_ods_reader
This commit is contained in:
commit
d8d71f610d
|
|
@ -6,16 +6,16 @@ from collections import OrderedDict
|
||||||
from .tools import sanitize_df
|
from .tools import sanitize_df
|
||||||
|
|
||||||
|
|
||||||
def load_ods(doc, sheet, headers=True, columns=None):
|
def load_ods(doc, sheet_id, headers=True, columns=None):
|
||||||
# convert the sheet to a pandas.DataFrame
|
# convert the sheet to a pandas.DataFrame
|
||||||
if isinstance(sheet, int):
|
if not isinstance(sheet_id, (int, str)):
|
||||||
sheet = doc.sheets[sheet - 1]
|
raise ValueError("Sheet id has to be either `str` or `int`")
|
||||||
elif isinstance(sheet, str):
|
if isinstance(sheet_id, str):
|
||||||
sheets = [sheet.name for sheet in doc.sheets]
|
sheets = [sheet.name for sheet in doc.sheets]
|
||||||
if sheet not in sheets:
|
if sheet_id not in sheets:
|
||||||
raise ValueError("There is no sheet named {}".format(sheet))
|
raise ValueError("There is no sheet named {}".format(sheet_id))
|
||||||
sheet_idx = sheets.index(sheet)
|
sheet_id = sheets.index(sheet_id) + 1
|
||||||
sheet = doc.sheets[sheet_idx]
|
sheet = doc.sheets[sheet_id - 1]
|
||||||
df_dict = OrderedDict()
|
df_dict = OrderedDict()
|
||||||
col_index = {}
|
col_index = {}
|
||||||
for i, row in enumerate(sheet.rows()):
|
for i, row in enumerate(sheet.rows()):
|
||||||
|
|
@ -42,7 +42,7 @@ def load_ods(doc, sheet, headers=True, columns=None):
|
||||||
continue
|
continue
|
||||||
elif i == 0:
|
elif i == 0:
|
||||||
columns = columns if columns else (
|
columns = columns if columns else (
|
||||||
["column_%s" % j for j in range(len(row))])
|
[f"column_{j}" for j in range(len(row))])
|
||||||
# columns as lists in a dictionary
|
# columns as lists in a dictionary
|
||||||
df_dict = OrderedDict((column, []) for column in columns)
|
df_dict = OrderedDict((column, []) for column in columns)
|
||||||
# create index for the column headers
|
# create index for the column headers
|
||||||
|
|
@ -62,8 +62,8 @@ def load_ods(doc, sheet, headers=True, columns=None):
|
||||||
def read_ods(file_or_path, sheet, headers=True, columns=None):
|
def read_ods(file_or_path, sheet, headers=True, columns=None):
|
||||||
"""
|
"""
|
||||||
This function reads in the provided ods file and converts it to a
|
This function reads in the provided ods file and converts it to a
|
||||||
dictionary. The dictionary is converted to a DataFrame. Empty rows and
|
dictionary. The dictionary is converted to a DataFrame. Trailing empty rows
|
||||||
columns are dropped from the DataFrame, before it is returned.
|
and columns are dropped from the DataFrame, before it is returned.
|
||||||
|
|
||||||
:param file_or_path: str
|
:param file_or_path: str
|
||||||
the path to the ODS file
|
the path to the ODS file
|
||||||
|
|
|
||||||
Binary file not shown.
|
|
@ -11,6 +11,7 @@ rsc = os.path.join(root, "rsc")
|
||||||
header_file = "example_headers.ods"
|
header_file = "example_headers.ods"
|
||||||
no_header_file = "example_no_headers.ods"
|
no_header_file = "example_no_headers.ods"
|
||||||
duplicated_column_names_file = "example_duplicated_column_names.ods"
|
duplicated_column_names_file = "example_duplicated_column_names.ods"
|
||||||
|
col_len_file = "example_col_lengths.ods"
|
||||||
|
|
||||||
|
|
||||||
class TestOdsReader(object):
|
class TestOdsReader(object):
|
||||||
|
|
@ -19,12 +20,14 @@ class TestOdsReader(object):
|
||||||
df = read_ods(path, 1)
|
df = read_ods(path, 1)
|
||||||
assert isinstance(df, pd.DataFrame)
|
assert isinstance(df, pd.DataFrame)
|
||||||
assert len(df) == 10
|
assert len(df) == 10
|
||||||
|
assert (len(df.columns) == 5)
|
||||||
|
|
||||||
def test_header_file_with_str(self):
|
def test_header_file_with_str(self):
|
||||||
path = os.path.join(rsc, header_file)
|
path = os.path.join(rsc, header_file)
|
||||||
df = read_ods(path, "Sheet1")
|
df = read_ods(path, "Sheet1")
|
||||||
assert isinstance(df, pd.DataFrame)
|
assert isinstance(df, pd.DataFrame)
|
||||||
assert len(df) == 10
|
assert len(df) == 10
|
||||||
|
assert (len(df.columns) == 5)
|
||||||
|
|
||||||
def test_header_file_with_cols(self):
|
def test_header_file_with_cols(self):
|
||||||
path = os.path.join(rsc, header_file)
|
path = os.path.join(rsc, header_file)
|
||||||
|
|
@ -32,13 +35,15 @@ class TestOdsReader(object):
|
||||||
df = read_ods(path, "Sheet1", columns=columns)
|
df = read_ods(path, "Sheet1", columns=columns)
|
||||||
assert list(df.columns) == columns
|
assert list(df.columns) == columns
|
||||||
assert len(df) == 10
|
assert len(df) == 10
|
||||||
|
assert (len(df.columns) == 5)
|
||||||
|
|
||||||
def test_no_header_file_no_cols(self):
|
def test_no_header_file_no_cols(self):
|
||||||
path = os.path.join(rsc, no_header_file)
|
path = os.path.join(rsc, no_header_file)
|
||||||
df = read_ods(path, 1, headers=False)
|
df = read_ods(path, 1, headers=False)
|
||||||
assert list(df.columns) == [
|
assert list(df.columns) == [
|
||||||
"column_%s" % i for i in range(len(df.columns))]
|
f"column_{i}" for i in range(len(df.columns))]
|
||||||
assert len(df) == 10
|
assert len(df) == 10
|
||||||
|
assert (len(df.columns) == 5)
|
||||||
|
|
||||||
def test_no_header_file_with_cols(self):
|
def test_no_header_file_with_cols(self):
|
||||||
path = os.path.join(rsc, no_header_file)
|
path = os.path.join(rsc, no_header_file)
|
||||||
|
|
@ -53,3 +58,10 @@ class TestOdsReader(object):
|
||||||
assert isinstance(df, pd.DataFrame)
|
assert isinstance(df, pd.DataFrame)
|
||||||
assert len(df.columns) == 4
|
assert len(df.columns) == 4
|
||||||
assert "website.1" in df.columns
|
assert "website.1" in df.columns
|
||||||
|
|
||||||
|
def test_header_file_col_len(self):
|
||||||
|
path = os.path.join(rsc, col_len_file)
|
||||||
|
df = read_ods(path, 1)
|
||||||
|
assert isinstance(df, pd.DataFrame)
|
||||||
|
assert len(df) == 10
|
||||||
|
assert (len(df.columns) == 5)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue