# -*-coding: utf-8-*-
import logging
logger = logging.getLogger("sdata")
import collections
import pandas as pd
import numpy as np
from sdata.timestamp import TimeStamp
import sdata
from sdata.data import Data
from sdata.metadata import Metadata, Attribute
import json
import os
import hashlib
import uuid
[docs]class Workbook(Data):
"""Workbook with Sheets
.. warning::
highly experimental"""
def __init__(self, **kwargs):
"""Workbook with Sheets
:param name:
:param kwargs:
"""
Data.__init__(self, **kwargs)
self._sheets = collections.OrderedDict()
[docs] def create_sheet(self, name):
"""create a sheet to the Workbook
:param data:
:return: sdata.Data
"""
if not isinstance(name, str):
logger.error("{}.create_sheet: sheet name has to be a string!".format(self.__class__.__name__))
return
data = Data(name=name)
self._sheets[name] = data
return data
[docs] def add_sheet(self, data):
"""create a sheet to the Workbook
:param data:
:return: sdata.Data
"""
if isinstance(data, (Data, sdata.Data)):
self._sheets[data.name] = data
else:
logger.error("{}.add_sheet: need sdata.Data, got {}".format(self.__class__.__name__, type(data)))
@property
def sheetnames(self):
"""return sheet names of the workbook"""
return list(self._sheets.keys())
[docs] def get_sheet(self, name, default=None):
"""get Sheet from Workbook
:param name:
:return: sheet data
"""
return self._sheets.get(name, default)
@property
def sheets(self):
"""all sheets of the workbook"""
return list(self._sheets.values())
def __getitem__(self, name):
return self.get(name)
def __iter__(self):
"""
:return:
"""
self._isheet = 0
return self
def __next__(self):
"""
:return:
"""
if self._isheet <= (len(self._sheets.keys()) - 1):
self._isheet += 1
return self._sheets.get(list(self._sheets.keys())[self._isheet - 1])
else:
raise StopIteration
[docs] def get(self, name, default=None):
if self.get_sheet(name) is not None:
return self.get_sheet(name)
else:
return default
[docs] def keys(self):
"""
:return: list of Attribute names
"""
return list(self._sheets.keys())
[docs] def values(self):
"""
:return: list of Attribute values
"""
return list(self._sheets.values())
[docs] def items(self):
"""
:return: list of Attribute items (keys, values)
"""
return list(self._sheets.items())
[docs] def to_hdf5(self, filepath, **kwargs):
"""export sdata.Data to hdf5
:param filepath:
:param complib: default='zlib' ['zlib', 'lzo', 'bzip2', 'blosc', 'blosc:blosclz', 'blosc:lz4', 'blosc:lz4hc', 'blosc:snappy', 'blosc:zlib', 'blosc:zstd']
:param complevel: default=9 [0-9]
:return:
"""
if not isinstance(self.df, pd.DataFrame):
df = pd.DataFrame()
else:
df = self.df
kwargs["mode"] = "w"
if kwargs.get("complib") is None:
kwargs["complib"] = "zlib"
if kwargs.get("complevel") is None:
kwargs["complevel"] = 9
with pd.HDFStore(filepath, **kwargs) as hdf:
hdf.put('metadata', self.metadata.df, format='fixed', data_columns=True)
hdf.put('table', df, format='fixed', data_columns=True)
hdf.put('description', self.description_to_df(), format='fixed', data_columns=True)
for sheet in self.sheets:
if not isinstance(sheet.df, pd.DataFrame):
logger.warning("no df in sheet!")
df = pd.DataFrame()
else:
df = sheet.df
hdf.put(f'/sheets/uuid_{sheet.uuid}/metadata', sheet.metadata.df, format='fixed', data_columns=True)
hdf.put(f'/sheets/uuid_{sheet.uuid}/table', df, format='fixed', data_columns=True)
hdf.put(f'/sheets/uuid_{sheet.uuid}/description', sheet.description_to_df(), format='fixed',
data_columns=True)
[docs] @classmethod
def from_hdf5(cls, filepath, **kwargs):
"""import sdata.Data from hdf5
:param filepath:
:return: sdata.Data
"""
if not os.path.exists:
logger.error("hdf5 file '{}' not available".format(filepath))
return
def read_data(nodepath):
metadata_path = f"{nodepath}/metadata".format(uuid)
table_path = f"{nodepath}/table".format(uuid)
description_path = f"{nodepath}/description".format(uuid)
df_metadata = hdf.get(metadata_path)
# print(df_metadata)
df_table = hdf.get(table_path)
df_description = hdf.get(description_path)
metadata = Metadata.from_dataframe(df_metadata)
# logger.debug("hdf {}".format(metadata.get("!sdata_uuid").value))
sheetdata = Data(metadata=metadata, table=df_table)
sheetdata.metadata = metadata
sheetdata.description_from_df(df_description)
# print(sheetdata.df)
return sheetdata
with pd.HDFStore(filepath, mode="r+") as hdf:
metadata_path = "/metadata".format(uuid)
table_path = "/table".format(uuid)
description_path = "/description".format(uuid)
df_metadata = hdf.get(metadata_path)
df_table = hdf.get(table_path)
df_description = hdf.get(description_path)
metadata = Metadata.from_dataframe(df_metadata)
# logger.debug("hdf {}".format(metadata.get("!sdata_uuid").value))
wb = Workbook(metadata=metadata, table=df_table)
wb.metadata = metadata
wb.description_from_df(df_description)
s = hdf.get_node("/sheets")
for g in s._v_groups:
# print(g)
nodepath = f"/sheets/{g}"
sheetdata = read_data(nodepath)
wb.add_sheet(sheetdata)
return wb