#
# Copyright (c) 2016-2024 Deephaven Data Labs and Patent Pending
#
""" This module implements the Column class and functions that work with Columns. """
from dataclasses import dataclass, field
from enum import Enum
from typing import Sequence, Any
import jpy
import deephaven.dtypes as dtypes
from deephaven import DHError
from deephaven.dtypes import DType
from deephaven.dtypes import _instant_array
_JColumnHeader = jpy.get_type("io.deephaven.qst.column.header.ColumnHeader")
_JColumn = jpy.get_type("io.deephaven.qst.column.Column")
_JColumnDefinition = jpy.get_type("io.deephaven.engine.table.ColumnDefinition")
_JColumnDefinitionType = jpy.get_type("io.deephaven.engine.table.ColumnDefinition$ColumnType")
_JPrimitiveArrayConversionUtility = jpy.get_type("io.deephaven.integrations.common.PrimitiveArrayConversionUtility")
[docs]
class ColumnType(Enum):
NORMAL = _JColumnDefinitionType.Normal
""" A regular column. """
PARTITIONING = _JColumnDefinitionType.Partitioning
""" A partitioning column. """
def __repr__(self):
return self.name
[docs]
@dataclass
class Column:
""" A Column object represents a column definition in a Deephaven Table. """
name: str
data_type: DType
component_type: DType = None
column_type: ColumnType = ColumnType.NORMAL
@property
def j_column_header(self):
return _JColumnHeader.of(self.name, self.data_type.qst_type)
@property
def j_column_definition(self):
if hasattr(self.data_type.j_type, 'jclass'):
j_data_type = self.data_type.j_type.jclass
else:
j_data_type = self.data_type.qst_type.clazz()
j_component_type = self.component_type.qst_type.clazz() if self.component_type else None
j_column_type = self.column_type.value
return _JColumnDefinition.fromGenericType(self.name, j_data_type, j_component_type, j_column_type)
[docs]
def bool_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing Boolean data.
Args:
name (str): the column name
data (Any): a sequence of compatible data, e.g. list, tuple, numpy array, Pandas series, etc.
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.bool_, input_data=data)
[docs]
def byte_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing primitive byte data.
Args:
name (str): the column name
data (Any): a sequence of compatible data, e.g. list, tuple, numpy array, Pandas series, etc.
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.byte, input_data=data)
[docs]
def char_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing primitive char data.
Args:
name (str): the column name
data (Any): a sequence of compatible data, e.g. list, tuple, numpy array, Pandas series, etc.
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.char, input_data=data)
[docs]
def short_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing primitive short data.
Args:
name (str): the column name
data (Any): a sequence of compatible data, e.g. list, tuple, numpy array, Pandas series, etc.
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.short, input_data=data)
[docs]
def int_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing primitive int data.
Args:
name (str): the column name
data (Any): a sequence of compatible data, e.g. list, tuple, numpy array, Pandas series, etc.
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.int32, input_data=data)
[docs]
def long_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing primitive long data.
Args:
name (str): the column name
data (Any): a python sequence of compatible data, could be numpy array or Pandas series
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.long, input_data=data)
[docs]
def float_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing primitive float data.
Args:
name (str): the column name
data (Any): a sequence of compatible data, e.g. list, tuple, numpy array, Pandas series, etc.
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.float32, input_data=data)
[docs]
def double_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing primitive double data.
Args:
name (str): the column name
data (Any): a sequence of compatible data, e.g. list, tuple, numpy array, Pandas series, etc.
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.double, input_data=data)
[docs]
def string_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing string data.
Args:
name (str): the column name
data (Any): a sequence of compatible data, e.g. list, tuple, numpy array, Pandas series, etc.
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.string, input_data=data)
[docs]
def datetime_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing Deephaven Datetime instances.
Args:
name (str): the column name
data (Any): a sequence of Datetime instances or values that can be converted to Datetime instances
(e.g. Instant, int nanoseconds since the Epoch, str, datetime.datetime, numpy.datetime64, pandas.Timestamp).
Returns:
a new input column
"""
data = _instant_array(data)
return InputColumn(name=name, data_type=dtypes.Instant, input_data=data)
[docs]
def pyobj_col(name: str, data: Sequence) -> InputColumn:
"""Creates an input column containing complex, non-primitive-like Python objects.
Args:
name (str): the column name
data (Any): a sequence of Python objects
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.PyObject, input_data=data)
[docs]
def jobj_col(name: str, data: Sequence) -> InputColumn:
""" Creates an input column containing Java objects.
Args:
name (str): the column name
data (Any): a sequence of Java objects
Returns:
a new input column
"""
return InputColumn(name=name, data_type=dtypes.JObject, input_data=data)