Module csv_lua.sym
Sym module deals with symbolic columns of a CSV
Expand source code
"""Sym module deals with symbolic columns of a CSV"""
import math
class Sym:
"""
Sym summarizes a stream of symbols.
"""
size: int
"""Number of symbols held"""
col_pos: int
"""Position of symbolic column"""
name: str
"""Name of the symbolic column"""
_has: dict
"""Counts of occurrence for each symbol"""
def __init__(self, col_pos=0, name="") -> None:
self.size = 0
self.col_pos = int(col_pos)
self.name = name
self._has = {}
def add(self, symbol: str) -> None:
"""Add method adds a symbol to stream."""
if symbol != "?":
self.size += 1
self._has[symbol] = self._has.get(symbol, 0) + 1
def mid(self) -> str:
"""Mid method calculates the middle of the symbol stream."""
most = -1
mode = None
for key, count in self._has.items():
if count > most:
mode = key
most = count
return mode
def div(self) -> float:
"""Div method calculates the diversity of the symbol stream."""
entropy = 0
for _, count in self._has.items():
if count > 0:
probability = count / self.size
entropy -= probability * math.log2(probability)
return entropy
Classes
class Sym (col_pos=0, name='')-
Sym summarizes a stream of symbols.
Expand source code
class Sym: """ Sym summarizes a stream of symbols. """ size: int """Number of symbols held""" col_pos: int """Position of symbolic column""" name: str """Name of the symbolic column""" _has: dict """Counts of occurrence for each symbol""" def __init__(self, col_pos=0, name="") -> None: self.size = 0 self.col_pos = int(col_pos) self.name = name self._has = {} def add(self, symbol: str) -> None: """Add method adds a symbol to stream.""" if symbol != "?": self.size += 1 self._has[symbol] = self._has.get(symbol, 0) + 1 def mid(self) -> str: """Mid method calculates the middle of the symbol stream.""" most = -1 mode = None for key, count in self._has.items(): if count > most: mode = key most = count return mode def div(self) -> float: """Div method calculates the diversity of the symbol stream.""" entropy = 0 for _, count in self._has.items(): if count > 0: probability = count / self.size entropy -= probability * math.log2(probability) return entropyClass variables
var col_pos : int-
Position of symbolic column
var name : str-
Name of the symbolic column
var size : int-
Number of symbols held
Methods
def add(self, symbol: str) ‑> None-
Add method adds a symbol to stream.
Expand source code
def add(self, symbol: str) -> None: """Add method adds a symbol to stream.""" if symbol != "?": self.size += 1 self._has[symbol] = self._has.get(symbol, 0) + 1 def div(self) ‑> float-
Div method calculates the diversity of the symbol stream.
Expand source code
def div(self) -> float: """Div method calculates the diversity of the symbol stream.""" entropy = 0 for _, count in self._has.items(): if count > 0: probability = count / self.size entropy -= probability * math.log2(probability) return entropy def mid(self) ‑> str-
Mid method calculates the middle of the symbol stream.
Expand source code
def mid(self) -> str: """Mid method calculates the middle of the symbol stream.""" most = -1 mode = None for key, count in self._has.items(): if count > most: mode = key most = count return mode