sdata api

class sdata.Blob(**kwargs)[source]

Bases: Data

Binary Large Object as reference

Warning

highly experimental

ATTR_NAMES = []

SDATA_ATTRIBUTES = ['!sdata_version', '!sdata_name', '!sdata_uuid', '!sdata_class', '!sdata_parent', '!sdata_project', '!sdata_ctime', '!sdata_mtime']

SDATA_CLASS = '!sdata_class'

SDATA_CTIME = '!sdata_ctime'

SDATA_MTIME = '!sdata_mtime'

SDATA_NAME = '!sdata_name'

SDATA_PARENT = '!sdata_parent'

SDATA_PROJECT = '!sdata_project'

SDATA_UUID = '!sdata_uuid'

SDATA_VERSION = '!sdata_version'

VAULT_TYPES = ['filesystem', 'hdf5', 'db', 'www']

add_data(data): add data, if data.name is unique

property asciiname

static clear_folder(path)

delete subfolder in export folder

Parameters: path – path
Returns: None

clear_group(): clear group dict

copy(**kwargs)

create a copy of the Data object

data = sdata.Data(name="data", uuid="38b26864e7794f5182d38459bab85842", description="this is remarkable")
datac = data.copy()
print("data  {0.uuid}".format(data))
print("datac {0.uuid}".format(datac))
print("datac.metadata['!sdata_parent'] {0.value}".format(datac.metadata["sdata_parent"]))

data  38b26864e7794f5182d38459bab85842
datac 2c4eb15900af435d8cd9c8573ca777e2
datac.metadata['!sdata_parent'] 38b26864e7794f5182d38459bab85842

Returns: Data

describe()

Generate descriptive info of the data

df = pd.DataFrame([1,2,3])
data = sdata.Data(name='my name',
            uuid='38b26864e7794f5182d38459bab85842',
            table=df,
            description="A remarkable description")
data.describe()

                0
metadata        3
table_rows      3
table_columns   1
description    24

Returns: pd.DataFrame

property description: description of the object

description_from_df(df)

set description from DataFrame of lines

Returns

description_to_df()

get description as DataFrame

Returns: DataFrame of description lines

property df: table object(pandas.DataFrame)

dir()

returns a nested list of all child objects

Returns: list of sdata.Data objects

exists(vault='filesystem')[source]

Test whether a object under the blob.url exists.

Parameters: vault –
Returns

property filename

classmethod from_csv(s=None, filepath=None, sep=';')

import sdata.Data from csv

Parameters

s – csv str
filepath –
sep – separator (default=”;”)

Returns

sdata.Data

classmethod from_folder(path)

sdata object instance

Parameters: path –
Returns

classmethod from_hdf5(filepath, **kwargs)

import sdata.Data from hdf5

Parameters: filepath –
Returns: sdata.Data

classmethod from_json(s=None, filepath=None)

create Data from json str or file

Parameters

s – json str
filepath –

Returns

sdata.Data

classmethod from_sqlite(filepath, **kwargs)

import sdata.Data from sqlite

Parameters

filepath –
kwargs –

Returns

sdata.Data

classmethod from_url(url=None, stype=None)

create Data from json str or file

Parameters

url – url
stype – “json” (“xlsx”, “csv”)

Returns

sdata.Data

classmethod from_xlsx(filepath)

save table as xlsx

Parameters: filepath –
Returns

gen_uuid()

generate new uuid string

Returns: str, e.g. ‘5fa04a3738e4431dbc34eccea5e795c4’

gen_uuid_from_state()

generate the same uuid for the same data

Returns: uuid

get_data_by_name(name): :return obj by name

get_data_by_uuid(uid): get data by uuid

get_download_link(): Generates a link allowing the data in a given panda dataframe to be downloaded in: dataframe out: href string

get_group()

property group: get group

items()

get all child objects

Returns: [(child uuid, child objects), ]

keys()

get all child objects uuids

Returns: list of uuid’s

property md5

calculate the md5 hash of the blob

Returns: sha1

classmethod metadata_from_hdf5(filepath, **kwargs)

import sdata.Data.Metadata from hdf5

Parameters: filepath –
Returns: sdata.Data

property name: name of the object

property osname

Returns: os compatible name (ascii?)

property prefix: prefix of the object name

property project: name of the project

refactor(fix_columns=True, add_table_metadata=True)

helper function

to cleanup dataframe column name
to define Attributes for all dataframe columns

property sha1

calculate the sha1 hash of the blob

Returns: sha1

property sha3_256

Return a SHA3 hash of the sData object with a hashbit length of 32 bytes.

sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256

'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'

Returns: hashlib.sha3_256.hexdigest()

property sha3_256_table

Return a SHA3 hash of the sData.table object with a hashbit length of 32 bytes.

sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256_table

'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'

Returns: hashlib.sha3_256.hexdigest()

property table: table object(pandas.DataFrame)

to_csv(filepath=None)

export sdata.Data to csv

Parameters: filepath –
Returns

to_folder(path, dtype='csv')

export data to folder

Parameters

path –
dtype –

Returns

to_hdf5(filepath, **kwargs)

export sdata.Data to hdf5

Parameters

filepath –
complib – default=’zlib’ [‘zlib’, ‘lzo’, ‘bzip2’, ‘blosc’, ‘blosc:blosclz’, ‘blosc:lz4’, ‘blosc:lz4hc’, ‘blosc:snappy’, ‘blosc:zlib’, ‘blosc:zstd’]
complevel – default=9 [0-9]

Returns

to_html(filepath, xlsx=True, style=None)

export Data to html

Parameters

filepath –
xlsx –
style –

Returns

to_json(filepath=None)

export Data in json format

Parameters: filepath – export file path (default:None)
Returns: json str

to_sqlite(filepath, **kwargs)

export sdata.Data to sqlite

Parameters

filepath –
kwargs –

Returns

to_xlsx(filepath=None)

export atrributes and data to excel

Parameters: filepath –
Returns

to_xlsx_base64()

get xlsx as byteio base64 encoded

Returns: base64

to_xlsx_byteio()

get xlsx as byteio

Returns: BytesIO

tree_folder(dir, padding=' ', print_files=True, hidden_files=False, last=True): print tree folder structure

update_hash(fh, hashobject, buffer_size=65536)[source]

A hash represents the object used to calculate a checksum of a string of information.

hashobject = hashlib.md5()
df = pd.DataFrame([1,2,3])
url = "/tmp/blob.csv"
df.to_csv(url)
blob = sdata.Blob(url=url)
fh = open(url, "rb")
blob.update_hash(fh, hashobject)
hashobject.hexdigest()

Parameters

fh – file handle
hashobject – hash object, e.g. hashlib.sha1()
buffer_size – buffer size (default buffer_size=65536)

Returns

hashobject

update_mtime()

update modification time

Returns

property url: url of the blob

property uuid: uuid of the object

values()

get all child objects

Returns: list of child objects

verify_attributes(): check mandatory attributes

class sdata.Data(**kwargs)[source]

Bases: object

Base sdata object

ATTR_NAMES = []

SDATA_ATTRIBUTES = ['!sdata_version', '!sdata_name', '!sdata_uuid', '!sdata_class', '!sdata_parent', '!sdata_project', '!sdata_ctime', '!sdata_mtime']

SDATA_CLASS = '!sdata_class'

SDATA_CTIME = '!sdata_ctime'

SDATA_MTIME = '!sdata_mtime'

SDATA_NAME = '!sdata_name'

SDATA_PARENT = '!sdata_parent'

SDATA_PROJECT = '!sdata_project'

SDATA_UUID = '!sdata_uuid'

SDATA_VERSION = '!sdata_version'

add_data(data)[source]: add data, if data.name is unique

property asciiname

static clear_folder(path)[source]

delete subfolder in export folder

Parameters: path – path
Returns: None

clear_group()[source]: clear group dict

copy(**kwargs)[source]

create a copy of the Data object

data = sdata.Data(name="data", uuid="38b26864e7794f5182d38459bab85842", description="this is remarkable")
datac = data.copy()
print("data  {0.uuid}".format(data))
print("datac {0.uuid}".format(datac))
print("datac.metadata['!sdata_parent'] {0.value}".format(datac.metadata["sdata_parent"]))

data  38b26864e7794f5182d38459bab85842
datac 2c4eb15900af435d8cd9c8573ca777e2
datac.metadata['!sdata_parent'] 38b26864e7794f5182d38459bab85842

Returns: Data

describe()[source]

Generate descriptive info of the data

df = pd.DataFrame([1,2,3])
data = sdata.Data(name='my name',
            uuid='38b26864e7794f5182d38459bab85842',
            table=df,
            description="A remarkable description")
data.describe()

                0
metadata        3
table_rows      3
table_columns   1
description    24

Returns: pd.DataFrame

property description: description of the object

description_from_df(df)[source]

set description from DataFrame of lines

Returns

description_to_df()[source]

get description as DataFrame

Returns: DataFrame of description lines

property df: table object(pandas.DataFrame)

dir()[source]

returns a nested list of all child objects

Returns: list of sdata.Data objects

property filename

classmethod from_csv(s=None, filepath=None, sep=';')[source]

import sdata.Data from csv

Parameters

s – csv str
filepath –
sep – separator (default=”;”)

Returns

sdata.Data

classmethod from_folder(path)[source]

sdata object instance

Parameters: path –
Returns

classmethod from_hdf5(filepath, **kwargs)[source]

import sdata.Data from hdf5

Parameters: filepath –
Returns: sdata.Data

classmethod from_json(s=None, filepath=None)[source]

create Data from json str or file

Parameters

s – json str
filepath –

Returns

sdata.Data

classmethod from_sqlite(filepath, **kwargs)[source]

import sdata.Data from sqlite

Parameters

filepath –
kwargs –

Returns

sdata.Data

classmethod from_url(url=None, stype=None)[source]

create Data from json str or file

Parameters

url – url
stype – “json” (“xlsx”, “csv”)

Returns

sdata.Data

classmethod from_xlsx(filepath)[source]

save table as xlsx

Parameters: filepath –
Returns

gen_uuid()[source]

generate new uuid string

Returns: str, e.g. ‘5fa04a3738e4431dbc34eccea5e795c4’

gen_uuid_from_state()[source]

generate the same uuid for the same data

Returns: uuid

get_data_by_name(name)[source]: :return obj by name

get_data_by_uuid(uid)[source]: get data by uuid

get_download_link()[source]: Generates a link allowing the data in a given panda dataframe to be downloaded in: dataframe out: href string

get_group()[source]

property group: get group

items()[source]

get all child objects

Returns: [(child uuid, child objects), ]

keys()[source]

get all child objects uuids

Returns: list of uuid’s

classmethod metadata_from_hdf5(filepath, **kwargs)[source]

import sdata.Data.Metadata from hdf5

Parameters: filepath –
Returns: sdata.Data

property name: name of the object

property osname

Returns: os compatible name (ascii?)

property prefix: prefix of the object name

property project: name of the project

refactor(fix_columns=True, add_table_metadata=True)[source]

helper function

to cleanup dataframe column name
to define Attributes for all dataframe columns

property sha3_256

Return a SHA3 hash of the sData object with a hashbit length of 32 bytes.

sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256

'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'

Returns: hashlib.sha3_256.hexdigest()

property sha3_256_table

Return a SHA3 hash of the sData.table object with a hashbit length of 32 bytes.

sdata.Data(name="1", uuid=sdata.uuid_from_str("1")).sha3_256_table

'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'

Returns: hashlib.sha3_256.hexdigest()

property table: table object(pandas.DataFrame)

to_csv(filepath=None)[source]

export sdata.Data to csv

Parameters: filepath –
Returns

to_folder(path, dtype='csv')[source]

export data to folder

Parameters

path –
dtype –

Returns

to_hdf5(filepath, **kwargs)[source]

export sdata.Data to hdf5

Parameters

filepath –
complib – default=’zlib’ [‘zlib’, ‘lzo’, ‘bzip2’, ‘blosc’, ‘blosc:blosclz’, ‘blosc:lz4’, ‘blosc:lz4hc’, ‘blosc:snappy’, ‘blosc:zlib’, ‘blosc:zstd’]
complevel – default=9 [0-9]

Returns

to_html(filepath, xlsx=True, style=None)[source]

export Data to html

Parameters

filepath –
xlsx –
style –

Returns

to_json(filepath=None)[source]

export Data in json format

Parameters: filepath – export file path (default:None)
Returns: json str

to_sqlite(filepath, **kwargs)[source]

export sdata.Data to sqlite

Parameters

filepath –
kwargs –

Returns

to_xlsx(filepath=None)[source]

export atrributes and data to excel

Parameters: filepath –
Returns

to_xlsx_base64()[source]

get xlsx as byteio base64 encoded

Returns: base64

to_xlsx_byteio()[source]

get xlsx as byteio

Returns: BytesIO

tree_folder(dir, padding=' ', print_files=True, hidden_files=False, last=True)[source]: print tree folder structure

update_hash(hashobject)[source]

A hash represents the object used to calculate a checksum of a string of information.

data = sdata.Data()

md5 = hashlib.md5()
data.update_hash(md5)
md5.hexdigest()
'bbf323bdcb0bf961803b5504a8a60d69'

sha1 = hashlib.sha1()
data.update_hash(sha1)
sha1.hexdigest()
'3c59368c7735c1ecaf03ebd4c595bb6e73e90f0c'

hashobject = hashlib.sha3_256()
data.update_hash(hashobject).hexdigest()
'c468e659891eb5dea6eb6baf73f51ca0688792bf9ad723209dc22730903f6efa'

data.update_hash(hashobject).digest()
b'M8...'

Parameters: hash – hash object, e.g. hashlib.sha1()
Returns: hash

update_mtime()[source]

update modification time

Returns

property uuid: uuid of the object

values()[source]

get all child objects

Returns: list of child objects

verify_attributes()[source]: check mandatory attributes

class sdata.metadata.Attribute(name, value, **kwargs)[source]

Bases: object

Attribute class

DTYPES = {'bool': <class 'bool'>, 'float': <class 'float'>, 'int': <class 'int'>, 'str': <class 'str'>, 'timestamp': <class 'sdata.timestamp.TimeStamp'>}

property description: Attribute description

property dtype: Attribute type str

static guess_dtype(value)[source]

returns dtype class

Parameters: value –
Returns: __class__

property label: Attribute label

property name: Attribute name

property required: Attribute required

to_csv(prefix='', sep=',', quote=None)[source]

export Attribute to csv

Parameters

prefix –
sep –
quote –

Returns

to_dict()[source]: :returns dict of attribute items

to_list()[source]

property unit: Attribute unit

property value: Attribute value

class sdata.metadata.Metadata(**kwargs)[source]

Bases: object

Metadata container class

each Metadata entry has has a

name (256)
value
unit
description
type (int, str, float, bool, timestamp)

ATTRIBUTEKEYS = ['name', 'value', 'dtype', 'unit', 'description', 'label', 'required']

add(name, value=None, **kwargs)[source]

add Attribute

Parameters

name –
value –
kwargs –

Returns

property attributes: returns Attributes

copy()[source]: returns a deep copy

property df: create dataframe

classmethod from_csv(filepath)[source]: create metadata from dataframe

classmethod from_dataframe(df)[source]: create metadata from dataframe

classmethod from_dict(d)[source]: setup metadata from dict

classmethod from_json(jsonstr=None, filepath=None)[source]

create metadata from json file

Parameters

jsonstr – json str
filepath – filepath to json file

Returns

Metadata

classmethod from_list(mlist)[source]

create metadata from a list of Attribute values

[[‘force_x’, 1.2, ‘float’, ‘kN’, ‘force in x-direction’],: [‘force_y’, 3.1, ‘float’, ‘N’, ‘force in y-direction’, ‘label’, True]]

get(name, default=None)[source]

get_attr(name)[source]: get Attribute by name

get_sdict()[source]: get sdata attribute as dict

get_udict()[source]: get user attribute as dict

static guess_dtype_from_value(value)[source]

guess dtype from value, e.g. ‘1.23’ -> ‘float’ ‘otto1.23’ -> ‘str’ 1 -> ‘int’ False -> ‘bool’

Parameters: value –
Returns: dtype(value), dtype [‘int’, ‘float’, ‘bool’, ‘str’]

guess_value_dtype()[source]

try to cast the Attribute values, e.g. str -> float

Returns

is_complete()[source]: check all required attributes

items()[source]

Returns: list of Attribute items (keys, values)

keys()[source]

Returns: list of Attribute names

property name: Name of the Metadata

relabel(name, newname)[source]

relabel Attribute

Parameters

name – old attribute name
newname – new attribute name

Returns

None

property required_attributes

property sdata_attributes

property sdf: create dataframe for sdata attributes

property sdft: create transposed dataframe for sdata attributes

set_attr(name='N.N.', value=None, **kwargs)[source]: set Attribute

set_unit_from_name(add_description=True, fix_name=True)[source]

try to extract unit from attribute name

Returns

property sha3_256

Return a new SHA3 hash object with a hashbit length of 32 bytes.

Returns: hashlib.sha3_256.hexdigest()

property size: return number uf Attribute

to_csv(filepath=None, sep=',', header=False)[source]: serialize to csv

to_csv_header(prefix='#', sep=',', filepath=None)[source]: serialize to csv

to_dataframe()[source]: create dataframe

to_dict()[source]: serialize attributes to dict

to_json(filepath=None)[source]

create a json

Parameters: filepath – default None
Returns: json str

to_list()[source]

create a nested list of Attribute values

Returns: list

property udf: create dataframe for user attributes

update_from_dict(d)[source]

set attributes from dict

Parameters: d – dict
Returns

update_from_usermetadata(metadata)[source]: update user metadata from metadata

update_hash(hashobject)[source]

A hash represents the object used to calculate a checksum of a string of information.

hashobject = hashlib.sha3_256()
metadata = Metadata()
metadata.update_hash(hashobject)
hash.hexdigest()

Parameters: hash – hash object
Returns: hash_function().hexdigest()

property user_attributes

values()[source]

Returns: list of Attribute values

sdata.metadata.extract_name_unit(value)[source]

extract name and unit from a combined string

value: 'Target Strain Rate (1/s) '
name : 'Target Strain Rate'
unit : '1/s'

value: 'Gauge Length [mm] monkey '
name : 'Gauge Length'
unit : 'mm'

value: 'Gauge Length <mm> whatever '
name : 'Gauge Length'
unit : 'mm'

Parameters: value – string, e.g. ‘Length <mm> whatever’
Returns: name, unit