Module csv_lua.num
Num module deals with numeric columns in a CSV
Expand source code
"""Num module deals with numeric columns in a CSV"""
import random
from csv_lua.settings import settings
from csv_lua.util import percentile
class Num:
"""
Num summarizes a stream of numbers.
"""
size: int
"""Number of numbers held"""
col_pos: int
"""Position of the numeric column"""
name: str
"""Name of the numeric column"""
_has: list[float]
"""List of numbers held"""
low: int | float
"""Lowest number held"""
high: int | float
"""Highest number held"""
is_sorted: bool
"""Whether the stream is sorted"""
w: int
"""Whether the numeric column is increasing (1) or decreasing (-1)"""
def __init__(self, col_pos=0, name=""):
"""Create a numeric column"""
self.size = 0
self.col_pos = int(col_pos)
self.name = name
self._has = []
self.low = float("inf")
self.high = float("-inf")
self.is_sorted = True
self.w = 1
def __str__(self):
return str(
{
"at": self.col_pos,
"hi": self.high,
"lo": self.low,
"isSorted": self.is_sorted,
"n": self.size,
"name": self.name,
"w": self.w,
}
)
def nums(self):
"""Nums method returns the numbers within the stream."""
if not self.is_sorted:
self.is_sorted = True
self._has.sort()
return self._has
def add(self, value: str, nums: int = 512):
"""Add method adds a number to the stream."""
if value != "?":
value = int(value)
settings["nums"] = nums
self.size += 1
self.low = min(value, self.low)
self.high = max(value, self.high)
if len(self._has) < settings["nums"]:
self._has.append(value)
elif random.random() < settings["nums"] / self.size:
pos = random.randint(0, len(self._has) - 1)
self._has[pos] = value
self.is_sorted = False
def div(self):
"""Div method returns the diversity of the stream."""
items = self.nums()
return (percentile(items, 0.9) - percentile(items, 0.1)) / 2.56
def mid(self):
"""Mid method returns the middle of the stream."""
return percentile(self.nums(), 0.5)
Classes
class Num (col_pos=0, name='')-
Num summarizes a stream of numbers.
Create a numeric column
Expand source code
class Num: """ Num summarizes a stream of numbers. """ size: int """Number of numbers held""" col_pos: int """Position of the numeric column""" name: str """Name of the numeric column""" _has: list[float] """List of numbers held""" low: int | float """Lowest number held""" high: int | float """Highest number held""" is_sorted: bool """Whether the stream is sorted""" w: int """Whether the numeric column is increasing (1) or decreasing (-1)""" def __init__(self, col_pos=0, name=""): """Create a numeric column""" self.size = 0 self.col_pos = int(col_pos) self.name = name self._has = [] self.low = float("inf") self.high = float("-inf") self.is_sorted = True self.w = 1 def __str__(self): return str( { "at": self.col_pos, "hi": self.high, "lo": self.low, "isSorted": self.is_sorted, "n": self.size, "name": self.name, "w": self.w, } ) def nums(self): """Nums method returns the numbers within the stream.""" if not self.is_sorted: self.is_sorted = True self._has.sort() return self._has def add(self, value: str, nums: int = 512): """Add method adds a number to the stream.""" if value != "?": value = int(value) settings["nums"] = nums self.size += 1 self.low = min(value, self.low) self.high = max(value, self.high) if len(self._has) < settings["nums"]: self._has.append(value) elif random.random() < settings["nums"] / self.size: pos = random.randint(0, len(self._has) - 1) self._has[pos] = value self.is_sorted = False def div(self): """Div method returns the diversity of the stream.""" items = self.nums() return (percentile(items, 0.9) - percentile(items, 0.1)) / 2.56 def mid(self): """Mid method returns the middle of the stream.""" return percentile(self.nums(), 0.5)Class variables
var col_pos : int-
Position of the numeric column
var high : int | float-
Highest number held
var is_sorted : bool-
Whether the stream is sorted
var low : int | float-
Lowest number held
var name : str-
Name of the numeric column
var size : int-
Number of numbers held
var w : int-
Whether the numeric column is increasing (1) or decreasing (-1)
Methods
def add(self, value: str, nums: int = 512)-
Add method adds a number to the stream.
Expand source code
def add(self, value: str, nums: int = 512): """Add method adds a number to the stream.""" if value != "?": value = int(value) settings["nums"] = nums self.size += 1 self.low = min(value, self.low) self.high = max(value, self.high) if len(self._has) < settings["nums"]: self._has.append(value) elif random.random() < settings["nums"] / self.size: pos = random.randint(0, len(self._has) - 1) self._has[pos] = value self.is_sorted = False def div(self)-
Div method returns the diversity of the stream.
Expand source code
def div(self): """Div method returns the diversity of the stream.""" items = self.nums() return (percentile(items, 0.9) - percentile(items, 0.1)) / 2.56 def mid(self)-
Mid method returns the middle of the stream.
Expand source code
def mid(self): """Mid method returns the middle of the stream.""" return percentile(self.nums(), 0.5) def nums(self)-
Nums method returns the numbers within the stream.
Expand source code
def nums(self): """Nums method returns the numbers within the stream.""" if not self.is_sorted: self.is_sorted = True self._has.sort() return self._has