Module csv_lua.num

Num module deals with numeric columns in a CSV

Expand source code
"""Num module deals with numeric columns in a CSV"""

import random
from csv_lua.settings import settings
from csv_lua.util import percentile


class Num:
    """
    Num summarizes a stream of numbers.
    """

    size: int
    """Number of numbers held"""
    col_pos: int
    """Position of the numeric column"""
    name: str
    """Name of the numeric column"""
    _has: list[float]
    """List of numbers held"""
    low: int | float
    """Lowest number held"""
    high: int | float
    """Highest number held"""
    is_sorted: bool
    """Whether the stream is sorted"""
    w: int
    """Whether the numeric column is increasing (1) or decreasing (-1)"""

    def __init__(self, col_pos=0, name=""):
        """Create a numeric column"""
        self.size = 0
        self.col_pos = int(col_pos)
        self.name = name
        self._has = []
        self.low = float("inf")
        self.high = float("-inf")
        self.is_sorted = True
        self.w = 1

    def __str__(self):
        return str(
            {
                "at": self.col_pos,
                "hi": self.high,
                "lo": self.low,
                "isSorted": self.is_sorted,
                "n": self.size,
                "name": self.name,
                "w": self.w,
            }
        )

    def nums(self):
        """Nums method returns the numbers within the stream."""
        if not self.is_sorted:
            self.is_sorted = True
            self._has.sort()
        return self._has

    def add(self, value: str, nums: int = 512):
        """Add method adds a number to the stream."""

        if value != "?":
            value = int(value)
            settings["nums"] = nums
            self.size += 1
            self.low = min(value, self.low)
            self.high = max(value, self.high)
            if len(self._has) < settings["nums"]:
                self._has.append(value)
            elif random.random() < settings["nums"] / self.size:
                pos = random.randint(0, len(self._has) - 1)
                self._has[pos] = value
            self.is_sorted = False

    def div(self):
        """Div method returns the diversity of the stream."""
        items = self.nums()
        return (percentile(items, 0.9) - percentile(items, 0.1)) / 2.56

    def mid(self):
        """Mid method returns the middle of the stream."""
        return percentile(self.nums(), 0.5)

Classes

class Num (col_pos=0, name='')

Num summarizes a stream of numbers.

Create a numeric column

Expand source code
class Num:
    """
    Num summarizes a stream of numbers.
    """

    size: int
    """Number of numbers held"""
    col_pos: int
    """Position of the numeric column"""
    name: str
    """Name of the numeric column"""
    _has: list[float]
    """List of numbers held"""
    low: int | float
    """Lowest number held"""
    high: int | float
    """Highest number held"""
    is_sorted: bool
    """Whether the stream is sorted"""
    w: int
    """Whether the numeric column is increasing (1) or decreasing (-1)"""

    def __init__(self, col_pos=0, name=""):
        """Create a numeric column"""
        self.size = 0
        self.col_pos = int(col_pos)
        self.name = name
        self._has = []
        self.low = float("inf")
        self.high = float("-inf")
        self.is_sorted = True
        self.w = 1

    def __str__(self):
        return str(
            {
                "at": self.col_pos,
                "hi": self.high,
                "lo": self.low,
                "isSorted": self.is_sorted,
                "n": self.size,
                "name": self.name,
                "w": self.w,
            }
        )

    def nums(self):
        """Nums method returns the numbers within the stream."""
        if not self.is_sorted:
            self.is_sorted = True
            self._has.sort()
        return self._has

    def add(self, value: str, nums: int = 512):
        """Add method adds a number to the stream."""

        if value != "?":
            value = int(value)
            settings["nums"] = nums
            self.size += 1
            self.low = min(value, self.low)
            self.high = max(value, self.high)
            if len(self._has) < settings["nums"]:
                self._has.append(value)
            elif random.random() < settings["nums"] / self.size:
                pos = random.randint(0, len(self._has) - 1)
                self._has[pos] = value
            self.is_sorted = False

    def div(self):
        """Div method returns the diversity of the stream."""
        items = self.nums()
        return (percentile(items, 0.9) - percentile(items, 0.1)) / 2.56

    def mid(self):
        """Mid method returns the middle of the stream."""
        return percentile(self.nums(), 0.5)

Class variables

var col_pos : int

Position of the numeric column

var high : int | float

Highest number held

var is_sorted : bool

Whether the stream is sorted

var low : int | float

Lowest number held

var name : str

Name of the numeric column

var size : int

Number of numbers held

var w : int

Whether the numeric column is increasing (1) or decreasing (-1)

Methods

def add(self, value: str, nums: int = 512)

Add method adds a number to the stream.

Expand source code
def add(self, value: str, nums: int = 512):
    """Add method adds a number to the stream."""

    if value != "?":
        value = int(value)
        settings["nums"] = nums
        self.size += 1
        self.low = min(value, self.low)
        self.high = max(value, self.high)
        if len(self._has) < settings["nums"]:
            self._has.append(value)
        elif random.random() < settings["nums"] / self.size:
            pos = random.randint(0, len(self._has) - 1)
            self._has[pos] = value
        self.is_sorted = False
def div(self)

Div method returns the diversity of the stream.

Expand source code
def div(self):
    """Div method returns the diversity of the stream."""
    items = self.nums()
    return (percentile(items, 0.9) - percentile(items, 0.1)) / 2.56
def mid(self)

Mid method returns the middle of the stream.

Expand source code
def mid(self):
    """Mid method returns the middle of the stream."""
    return percentile(self.nums(), 0.5)
def nums(self)

Nums method returns the numbers within the stream.

Expand source code
def nums(self):
    """Nums method returns the numbers within the stream."""
    if not self.is_sorted:
        self.is_sorted = True
        self._has.sort()
    return self._has