bin(216) # gives binary representation of integer

'0b11011000'

import numpy as np

np.base_repr(216, base=2, padding=0)

'11011000'

for k in range(0,32):
    print(f'{k:4d}  {bin(k):>8s} {hex(k):>5s}.')

   0       0b0   0x0.
   1       0b1   0x1.
   2      0b10   0x2.
   3      0b11   0x3.
   4     0b100   0x4.
   5     0b101   0x5.
   6     0b110   0x6.
   7     0b111   0x7.
   8    0b1000   0x8.
   9    0b1001   0x9.
  10    0b1010   0xa.
  11    0b1011   0xb.
  12    0b1100   0xc.
  13    0b1101   0xd.
  14    0b1110   0xe.
  15    0b1111   0xf.
  16   0b10000  0x10.
  17   0b10001  0x11.
  18   0b10010  0x12.
  19   0b10011  0x13.
  20   0b10100  0x14.
  21   0b10101  0x15.
  22   0b10110  0x16.
  23   0b10111  0x17.
  24   0b11000  0x18.
  25   0b11001  0x19.
  26   0b11010  0x1a.
  27   0b11011  0x1b.
  28   0b11100  0x1c.
  29   0b11101  0x1d.
  30   0b11110  0x1e.
  31   0b11111  0x1f.

1./0.

---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_26264\832528835.py in <module>
----> 1 1./0.

ZeroDivisionError: float division by zero

import numpy as np

1/np.array([0.]), -1./np.array([0.])

C:\Users\micro\AppData\Local\Temp\ipykernel_26264\3923875365.py:3: RuntimeWarning: divide by zero encountered in true_divide
  1/np.array([0.]), -1./np.array([0.])

(array([inf]), array([-inf]))

0.0/0.0

---------------------------------------------------------------------------
ZeroDivisionError                         Traceback (most recent call last)
~\AppData\Local\Temp\ipykernel_26264\1949058463.py in <module>
----> 1 0.0/0.0

ZeroDivisionError: float division by zero

0.0/np.array([0.0])

C:\Users\micro\AppData\Local\Temp\ipykernel_26264\844779733.py:1: RuntimeWarning: invalid value encountered in true_divide
  0.0/np.array([0.0])

array([nan])

0*np.nan

nan

np.inf*np.nan

nan

1 + 1e-12

1.000000000001

1 + 1e-25

1.0

1 + 1e-25 == 1

True

1 + 1e-12 == 1

False

1e-25 + 1e-12

1.0000000000001e-12

A = np.random.randint(0, 10, size=(3, 3))
A_inv = np.linalg.inv(A)
print(A@A_inv) # matrix times its inverse should be identity

[[ 1.00000000e+00  0.00000000e+00 -2.22044605e-16]
 [-3.70074342e-18  1.00000000e+00  5.55111512e-17]
 [ 0.00000000e+00  0.00000000e+00  1.00000000e+00]]

0.1 + 0.1 + 0.1 == 0.3

False

0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 + 0.1 == 1.0

False

round(0.1 + 0.1 + 0.1, 5) == round(0.3, 5) # round to 5 digits precision

True

import math

math.isclose(0.1 + 0.1 + 0.1, 0.3)

True

# but some calculations can still work

a=3
b=a/3
c=b*3
c==a

True

# Integer Overflow (int8)

import numpy as np

a = np.int8(120)
b = np.int8(10)
overflow_result = a + b
print("a =", a)
print("b =", b)
print("a + b =", overflow_result)  # Wraps around to a negative number
print("Expected (120 + 10) = 130, but stored as int8:", np.int8(130))

a = 120
b = 10
a + b = -126
Expected (120 + 10) = 130, but stored as int8: -126

C:\Users\micro\AppData\Local\Temp\ipykernel_11188\1159094512.py:7: RuntimeWarning: overflow encountered in byte_scalars
  overflow_result = a + b

# Integer Overflow (int16) 

a16 = np.int16(32760)
b16 = np.int16(10)
print("a16 + b16 =", a16 + b16)  # Correct
print("int16 max =", np.iinfo(np.int16).max)

# Show overflow explicitly
overflow16 = np.int16(np.iinfo(np.int16).max + 1)
print("Overflow: int16(max + 1) =", overflow16)

a16 + b16 = -32766
int16 max = 32767
Overflow: int16(max + 1) = -32768

C:\Users\micro\AppData\Local\Temp\ipykernel_11188\934376104.py:5: RuntimeWarning: overflow encountered in short_scalars
  print("a16 + b16 =", a16 + b16)  # Correct

# Negative Integer "Overflow" (int8)

x = np.int8(-120)
y = np.int8(-10)
overflow_result = x + y
print("x =", x)
print("y =", y)
print("x + y =", overflow_result)  # Wraps around to a positive number
print("Expected (-120 - 10) = -130, but stored as int8:", np.int8(-130))

x = -120
y = -10
x + y = 126
Expected (-120 - 10) = -130, but stored as int8: 126

C:\Users\micro\AppData\Local\Temp\ipykernel_11188\1048983399.py:5: RuntimeWarning: overflow encountered in byte_scalars
  overflow_result = x + y

# Floating-Point Overflow 
# This will overflow for float32 but not for float64

import numpy as np

a = np.float32(1e38)
b = np.float32(10)
overflow_result = a * b
print("a =", a)
print("b =", b)
print("a * b =", overflow_result)  # Will be inf
print("Is inf?", np.isinf(overflow_result))

a = 1e+38
b = 10.0
a * b = inf
Is inf? True

C:\Users\micro\AppData\Local\Temp\ipykernel_11188\591289397.py:8: RuntimeWarning: overflow encountered in float_scalars
  overflow_result = a * b

# Floating-Point Underflow 

x = np.float32(1e-38)
y = np.float32(1e-10)
underflow_result = x * y
print("x =", x)
print("y =", y)
print("x * y =", underflow_result)  # Will be 0.0
print("Is zero?", underflow_result == 0.0)

x = 1e-38
y = 1e-10
x * y = 0.0
Is zero? True

# Float64 Example

a64 = np.float64(1e308)
b64 = np.float64(10)
print("a64 * b64 =", a64 * b64)  # Will overflow

--- Float64 Safe Example ---
a64 * b64 = inf

C:\Users\micro\AppData\Local\Temp\ipykernel_11188\1189525556.py:5: RuntimeWarning: overflow encountered in double_scalars
  print("a64 * b64 =", a64 * b64)  # Will overflow

x64 = np.float64(1e-308)
y64 = np.float64(1e-10)
print("x64 * y64 =", x64 * y64)  # Will underflow?

x64 * y64 = 1e-318

import numpy as np

print("\n--- Preventing Overflow with Type Promotion ---")
# Start with int8
a = np.int8(120)
b = np.int8(10)

# Promote to int16 before addition
safe_result = np.int16(a) + np.int16(b)
print("a (int8) =", a)
print("b (int8) =", b)
print("a + b with int8 =", np.int8(a + b))  # Overflows
print("a + b with int16 =", safe_result)   # Correct result 130

--- Preventing Overflow with Type Promotion ---
a (int8) = 120
b (int8) = 10
a + b with int8 = -126
a + b with int16 = 130

C:\Users\micro\AppData\Local\Temp\ipykernel_11188\3938648987.py:12: RuntimeWarning: overflow encountered in byte_scalars
  print("a + b with int8 =", np.int8(a + b))  # Overflows

import numpy as np
import time

# Matrix size
N = 1000  # Adjust based on system memory/performance

# Generate base float64 matrices from uniform [0, 255]
A64 = (np.random.rand(N, N) * 255).astype(np.float64)
B64 = (np.random.rand(N, N) * 255).astype(np.float64)

print("\n--- float64 ---")
size_bytes = A64.nbytes + B64.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
C64 = A64 @ B64
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

print("\n--- float32 ---")
A32 = A64.astype(np.float32)
B32 = B64.astype(np.float32)
size_bytes = A32.nbytes + B32.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
C32 = A32 @ B32
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

print("\n--- float16 ---")
A16 = A64.astype(np.float16)
B16 = B64.astype(np.float16)
size_bytes = A16.nbytes + B16.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
C16 = A16 @ B16
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

print("\n--- int32 ---")
Aint32 = A64.astype(np.int32)
Bint32 = B64.astype(np.int32)
size_bytes = Aint32.nbytes + Bint32.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
Cint32 = Aint32 @ Bint32
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

print("\n--- int64 ---")
Aint64 = A64.astype(np.int64)
Bint64 = B64.astype(np.int64)
size_bytes = Aint64.nbytes + Bint64.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
Cint64 = Aint64 @ Bint64
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

# Upsample results to float64 for fair comparison
C16_up = C16.astype(np.float64)
C32_up = C32.astype(np.float64)
Cint32_up = Cint32.astype(np.float64)
Cint64_up = Cint64.astype(np.float64)

# Compute maximum absolute differences
print("\n--- Maximum Absolute Differences ---")
print(f"max|C64 - C32| = {np.max(np.abs(C64 - C32_up)):.6e}")
print(f"max|C64 - C16| = {np.max(np.abs(C64 - C16_up)):.6e}")
print(f"max|C32 - C16| = {np.max(np.abs(C32_up - C16_up)):.6e}")
print(f"max|C64 - int32| = {np.max(np.abs(C64 - Cint32_up)):.6e}")
print(f"max|C64 - int64| = {np.max(np.abs(C64 - Cint64_up)):.6e}")

--- float64 ---
Matrix size: 16.00 MB
Time taken: 0.0224 seconds

--- float32 ---
Matrix size: 8.00 MB
Time taken: 0.0077 seconds

--- float16 ---
Matrix size: 4.00 MB

C:\Users\keith\AppData\Local\Temp\ipykernel_12860\449388697.py:35: RuntimeWarning: overflow encountered in matmul
  C16 = A16 @ B16

Time taken: 7.8111 seconds

--- int32 ---
Matrix size: 8.00 MB
Time taken: 2.1030 seconds

--- int64 ---
Matrix size: 16.00 MB
Time taken: 2.6975 seconds

--- Maximum Absolute Differences ---
max|C64 - C32| = 7.997219e+00
max|C64 - C16| = inf
max|C32 - C16| = inf
max|C64 - int32| = 1.394282e+05
max|C64 - int64| = 1.394282e+05

import numpy as np
import time

# Matrix size
N = 1000  # Adjust based on system memory/performance

# Generate base float64 matrices from uniform [0, 255]
A64 = (np.random.rand(N, N) * 255).astype(np.float64)
B64 = (np.random.rand(N, N) * 255).astype(np.float64)

print("\n--- float64 ---")
size_bytes = A64.nbytes + B64.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
C64 = A64 @ B64
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

print("\n--- float32 ---")
A32 = A64.astype(np.float32)
B32 = B64.astype(np.float32)
size_bytes = A32.nbytes + B32.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
C32 = A32 @ B32
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

print("\n--- float16 ---")
A16 = A64.astype(np.float16)
B16 = B64.astype(np.float16)
size_bytes = A16.nbytes + B16.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
C16 = A16 @ B16
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

print("\n--- int32 ---")
Aint32 = A64.astype(np.int32)
Bint32 = B64.astype(np.int32)
size_bytes = Aint32.nbytes + Bint32.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
Cint32 = Aint32 @ Bint32
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

print("\n--- int64 ---")
Aint64 = A64.astype(np.int64)
Bint64 = B64.astype(np.int64)
size_bytes = Aint64.nbytes + Bint64.nbytes
print(f"Matrix size: {size_bytes / 1e6:.2f} MB")
start_time = time.time()
Cint64 = Aint64 @ Bint64
end_time = time.time()
print(f"Time taken: {end_time - start_time:.4f} seconds")

# Upsample results to float64 for fair comparison
C16_up = C16.astype(np.float64)
C32_up = C32.astype(np.float64)
Cint32_up = Cint32.astype(np.float64)
Cint64_up = Cint64.astype(np.float64)

# Compute maximum absolute differences
print("\n--- Maximum Absolute Differences ---")
print(f"max|C64 - C32| = {np.max(np.abs(C64 - C32_up)):.6e}")
print(f"max|C64 - C16| = {np.max(np.abs(C64 - C16_up)):.6e}")
print(f"max|C32 - C16| = {np.max(np.abs(C32_up - C16_up)):.6e}")
print(f"max|C64 - int32| = {np.max(np.abs(C64 - Cint32_up)):.6e}")
print(f"max|C64 - int64| = {np.max(np.abs(C64 - Cint64_up)):.6e}")

--- float64 ---
Matrix size: 16.00 MB
Time taken: 0.1867 seconds

--- float32 ---
Matrix size: 8.00 MB
Time taken: 0.0340 seconds

--- float16 ---
Matrix size: 4.00 MB

C:\Users\micro\AppData\Local\Temp\ipykernel_11188\449388697.py:35: RuntimeWarning: overflow encountered in matmul
  C16 = A16 @ B16

Time taken: 6.0759 seconds

--- int32 ---
Matrix size: 8.00 MB
Time taken: 2.8536 seconds

--- int64 ---
Matrix size: 16.00 MB
Time taken: 3.8560 seconds

--- Maximum Absolute Differences ---
max|C64 - C32| = 1.330282e+01
max|C64 - C16| = inf
max|C32 - C16| = inf
max|C64 - int32| = 1.374838e+05
max|C64 - int64| = 1.374838e+05

import numpy as np

v = [1,3,1,4]

for p in range(1,10):
    print(p,np.power(sum(np.power(np.abs(np.array(v)),p)),1/p))

1 9.0
2 5.196152422706632
3 4.530654896083492
4 4.290915128445443
5 4.175344598847825
6 4.110988070009078
7 4.0723049678331895
8 4.048006070825583
9 4.032310478684122

from numpy import random

printcols(dir(random))

BitGenerator          __package__           default_rng           noncentral_chisquare  set_state             
Generator             __path__              dirichlet             noncentral_f          shuffle               
MT19937               __spec__              exponential           normal                standard_cauchy       
PCG64                 _bounded_integers     f                     pareto                standard_exponential  
PCG64DXSM             _common               gamma                 permutation           standard_gamma        
Philox                _generator            geometric             poisson               standard_normal       
RandomState           _mt19937              get_state             power                 standard_t            
SFC64                 _pcg64                gumbel                rand                  test                  
SeedSequence          _philox               hypergeometric        randint               triangular            
__RandomState_ctor    _pickle               laplace               randn                 uniform               
__all__               _sfc64                logistic              random                vonmises              
__builtins__          beta                  lognormal             random_integers       wald                  
__cached__            binomial              logseries             random_sample         weibull               
__doc__               bit_generator         mtrand                ranf                  zipf                  
__file__              bytes                 multinomial           rayleigh                                    
__loader__            chisquare             multivariate_normal   sample                                      
__name__              choice                negative_binomial     seed

import numpy as np
from matplotlib import pyplot as plt

def univariate_normal(x, mean, var):
    return ((1. / np.sqrt(2 * np.pi * var)) * np.exp(-(x - mean)**2 / (2 * var)))

x = np.linspace(-5,5,1000)
plt.plot(x,univariate_normal(x,1,2));
plt.show()

def multivariate_normal(x, n, mean, cov):
    return (1./(np.sqrt((2*np.pi)**n * np.linalg.det(cov))) * np.exp(-1/2*(x - mean).T@np.linalg.inv(cov)@(x - mean)))


mean = np.array([35,70])
cov = 100*np.array([[1,.5],[.5,1]])
pic = np.zeros((100,100))
for x1 in np.arange(0,100):
    for x2 in np.arange(0,100):
        x = [x1,x2]
        pic[x1,x2] = multivariate_normal(x, 2, mean, cov)
        
plt.contour(pic);

Common names	bits
character (char)	8
integers (int)	32
unsigned integers (uint)	32
floating-pointing numbers (float, single)	32,64
double precision floating point numbers (double)	64
short integers (short)	16
long integers (long)	32
double long integers (longlong)	64
unsigned long integers (ulong)	64

BDS 761: Data Science and Machine Learning I

Topic 4: Precision and Norms

This topic:¶

Numerical Precision¶

Binary numbers¶

Hexadecimal¶

Floating point numbers¶

Commonly used number types¶

Working with limited precision¶

Floating-Point Arithmetic¶

Overflow and Underflow¶

Preventing Overflow¶

Precision Errors in Matrix Multiplication¶

Norms & Distances¶

Metric¶

Euclidean Norm¶

Example¶

Root-mean-square value (RMS)¶

Chebychev Inequality¶

Motivation for other types of norms¶

Norms¶

$p$-Norms¶

Famous "norms"¶

Exercise¶

"Norm balls"¶

Norms, Inner products, and angles¶

Cauchy-Schwartz Inequality $v^T w \le \| v \|\| w \|$¶

Triangle Inequality¶

$S$-norm¶

$S$ inner product¶

Minkowski metric¶

II. Distances¶

Euclidean distance between two vectors $a$ and $b$ in $\mathbb{R}^{n}$:¶

Norm versus Distance¶

Distance properties¶

Exercise¶

Triangle Inequality¶

Application: feature distances¶

Application: Nearest Neighbor Search¶

Application: Document dissimilarity¶

Application: rms prediction error¶

Manhattan or "Taxicab" Distance, also "Rectilinear distance"¶

Exercise¶

Chebyschev Distance¶

Cosine Distance¶

Exercise¶

Exercise¶

IV. Statistics with Python¶

Random sampling¶

The Normal Distribution¶

Exercise¶

Statistics¶

Exercise¶

The Standard Normal Distribution $Z$¶

Standardizing data has two tasks¶

Lab: Standardizing data¶

Multivariate Gaussian (for $n$ dimensions)¶

Tricky Exercise¶

Exercise:¶

V. Clustering¶

Dimensionality Reduction¶

Clustering References¶

Marketing Motivation¶

K-means clustering¶

K-means algorithm¶

k-Means as Greedy Optimization¶

Lab¶