diff --git a/.gitignore b/.gitignore index e342915..167d464 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,3 @@ venv/ # vim config .vim/ - -*.ods -.py39/ diff --git a/example_skiprows.ods b/example_skiprows.ods new file mode 100644 index 0000000..ef5b079 Binary files /dev/null and b/example_skiprows.ods differ diff --git a/pandas_ods_reader/algo.py b/pandas_ods_reader/algo.py index 6d391a9..cbf2c31 100644 --- a/pandas_ods_reader/algo.py +++ b/pandas_ods_reader/algo.py @@ -1,5 +1,4 @@ from collections import OrderedDict -from unittest import skip import pandas as pd @@ -67,13 +66,8 @@ def parse_data(backend, rows, headers=True, columns=None, skiprows=None): return df -def read_data( - backend, file_or_path, sheet_id, - headers=True, columns=None, skiprows=0 -): +def read_data(backend, file_or_path, sheet_id, headers=True, columns=None, skiprows=0): doc = backend.get_doc(file_or_path) rows = backend.get_rows(doc, sheet_id) - df = parse_data( - backend, rows, headers=headers, columns=columns, skiprows=skiprows - ) + df = parse_data(backend, rows, headers=headers, columns=columns, skiprows=skiprows) return sanitize_df(df) diff --git a/pandas_ods_reader/main.py b/pandas_ods_reader/main.py index 8de5548..63f0370 100644 --- a/pandas_ods_reader/main.py +++ b/pandas_ods_reader/main.py @@ -36,5 +36,7 @@ def read_ods(file_or_path, sheet=1, headers=True, columns=None, skiprows=0): backend, file_or_path, sheet, - headers=headers, columns=columns, skiprows=skiprows + headers=headers, + columns=columns, + skiprows=skiprows, ) diff --git a/pandas_ods_reader/parsers/fods.py b/pandas_ods_reader/parsers/fods.py index d950bc8..a5e2c05 100644 --- a/pandas_ods_reader/parsers/fods.py +++ b/pandas_ods_reader/parsers/fods.py @@ -42,7 +42,8 @@ def get_rows(doc, sheet_id): ) sheet = get_sheet(spreadsheet, sheet_id) rows = sheet.findall(TABLE_ROW_TAG, namespaces=namespaces) - return rows + for row in rows: + yield row def is_float(cell): diff --git a/poetry.lock b/poetry.lock index 029afd8..a15505a 100644 --- a/poetry.lock +++ b/poetry.lock @@ -15,10 +15,10 @@ optional = false python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" [package.extras] -dev = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface", "furo", "sphinx", "sphinx-notfound-page", "pre-commit"] -docs = ["furo", "sphinx", "zope.interface", "sphinx-notfound-page"] -tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins", "zope.interface"] -tests_no_zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "pympler", "pytest (>=4.3.0)", "six", "mypy", "pytest-mypy-plugins"] +dev = ["coverage[toml] (>=5.0.2)", "furo", "hypothesis", "mypy", "pre-commit", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "sphinx", "sphinx-notfound-page", "zope.interface"] +docs = ["furo", "sphinx", "sphinx-notfound-page", "zope.interface"] +tests = ["coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six", "zope.interface"] +tests-no-zope = ["coverage[toml] (>=5.0.2)", "hypothesis", "mypy", "pympler", "pytest (>=4.3.0)", "pytest-mypy-plugins", "six"] [[package]] name = "black" @@ -38,7 +38,7 @@ tomli = ">=0.2.6,<2.0.0" typed-ast = {version = ">=1.4.2", markers = "python_version < \"3.8\""} typing-extensions = [ {version = ">=3.10.0.0", markers = "python_version < \"3.10\""}, - {version = "!=3.10.0.1", markers = "python_version >= \"3.10\""}, + {version = ">=3.10.0.0,<3.10.0.1 || >3.10.0.1", markers = "python_version >= \"3.10\""}, ] [package.extras] @@ -89,9 +89,9 @@ typing-extensions = {version = ">=3.6.4", markers = "python_version < \"3.8\""} zipp = ">=0.5" [package.extras] -docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] +docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"] perf = ["ipython"] -testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "packaging", "pep517", "pyfakefs", "flufl.flake8", "pytest-perf (>=0.9.2)", "pytest-black (>=0.3.7)", "pytest-mypy", "importlib-resources (>=1.3)"] +testing = ["flufl.flake8", "importlib-resources (>=1.3)", "packaging", "pep517", "pyfakefs", "pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy", "pytest-perf (>=0.9.2)"] [[package]] name = "iniconfig" @@ -112,7 +112,7 @@ python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, != 3.4.*" [package.extras] cssselect = ["cssselect (>=0.7)"] html5 = ["html5lib"] -htmlsoup = ["beautifulsoup4"] +htmlsoup = ["BeautifulSoup4"] source = ["Cython (>=0.29.7)"] [[package]] @@ -156,7 +156,7 @@ python-dateutil = ">=2.7.3" pytz = ">=2017.2" [package.extras] -test = ["pytest (>=4.0.2)", "pytest-xdist", "hypothesis (>=3.58)"] +test = ["hypothesis (>=3.58)", "pytest (>=4.0.2)", "pytest-xdist"] [[package]] name = "pathspec" @@ -307,13 +307,13 @@ optional = false python-versions = ">=3.6" [package.extras] -docs = ["sphinx", "jaraco.packaging (>=8.2)", "rst.linker (>=1.9)"] -testing = ["pytest (>=4.6)", "pytest-checkdocs (>=2.4)", "pytest-flake8", "pytest-cov", "pytest-enabler (>=1.0.1)", "jaraco.itertools", "func-timeout", "pytest-black (>=0.3.7)", "pytest-mypy"] +docs = ["jaraco.packaging (>=8.2)", "rst.linker (>=1.9)", "sphinx"] +testing = ["func-timeout", "jaraco.itertools", "pytest (>=4.6)", "pytest-black (>=0.3.7)", "pytest-checkdocs (>=2.4)", "pytest-cov", "pytest-enabler (>=1.0.1)", "pytest-flake8", "pytest-mypy"] [metadata] lock-version = "1.1" python-versions = "^3.7" -content-hash = "d9c435fd7f0ded3ef3a28aa6a93b4b47ea1ccbd9cda9c0133bd33c405fe53706" +content-hash = "f0f7573338f20f81f960b8c0f670e525b77b081975a7f8918b11d3e7f65cec57" [metadata.files] atomicwrites = [ diff --git a/test.ipynb b/test.ipynb deleted file mode 100644 index 0c02ddb..0000000 --- a/test.ipynb +++ /dev/null @@ -1,176 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [], - "source": [ - "%load_ext autoreload\n", - "%autoreload 2" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "from pandas_ods_reader import read_ods" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [], - "source": [ - "file_path = \"Dicionário_Microdados_Enem_2021.ods\"" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
| \n", - " | column.0 | \n", - "column.1 | \n", - "column.2 | \n", - "column.3 | \n", - "column.4 | \n", - "column.5 | \n", - "
|---|---|---|---|---|---|---|
| 0 | \n", - "NU_INSCRICAO | \n", - "Número de inscrição1 | \n", - "None | \n", - "None | \n", - "12.0 | \n", - "Numérica | \n", - "
| 1 | \n", - "NU_ANO | \n", - "Ano do Enem | \n", - "None | \n", - "None | \n", - "4.0 | \n", - "Numérica | \n", - "
| 2 | \n", - "TP_FAIXA_ETARIA | \n", - "Faixa etária2 | \n", - "1 | \n", - "Menor de 17 anos | \n", - "2.0 | \n", - "Numérica | \n", - "
| 3 | \n", - "None | \n", - "None | \n", - "2 | \n", - "17 anos | \n", - "NaN | \n", - "None | \n", - "
| 4 | \n", - "None | \n", - "None | \n", - "3 | \n", - "18 anos | \n", - "NaN | \n", - "None | \n", - "