A simple Python package for one-dimensional data clustering, implementing various clustering algorithms including traditional and novel approaches.
Install the package using pip:
pip install usmerge
This package provides multiple one-dimensional clustering methods:
The package accepts various input formats:
from usmerge import equal_wid_merge
labels, edges = equal_wid_merge(data, n=3)
from usmerge import equal_fre_merge
labels, edges = equal_fre_merge(data, n=3)
from usmerge import kmeans_merge
labels, edges = kmeans_merge(data, n=3, max_iter=100)
from usmerge import som_k_merge
labels, edges = som_k_merge(data, n=3, sigma=0.5, learning_rate=0.5, epochs=1000)
from usmerge import fcm_merge
labels, edges = fcm_merge(data, n=3, m=2.0, max_iter=100, epsilon=1e-6)
from usmerge import kernel_density_merge
labels, edges = kernel_density_merge(data, n=3, bandwidth=None)
from usmerge import jenks_breaks_merge
labels, edges = jenks_breaks_merge(data, n=3)
from usmerge import quantile_merge
labels, edges = quantile_merge(data, n=3)
from usmerge import dbscan_1d_merge
labels, edges = dbscan_1d_merge(data, n=3, min_samples=3)
All clustering methods return two values:
import numpy as np
import matplotlib.pyplot as plt
from usmerge import som_k_merge, fcm_merge, kmeans_merge, hierarchical_density_merge, dbscan_1d_merge
# Generate synthetic data with three clear clusters
np.random.seed(42)
data = np.concatenate([
np.random.normal(0, 0.3, 50), # First cluster
np.random.normal(5, 0.4, 50), # Second cluster
np.random.normal(10, 0.3, 50) # Third cluster
])
# Compare different clustering methods
methods = {
'SOM-K': som_k_merge(data, n=3, sigma=0.5, learning_rate=0.5, epochs=1000),
'FCM': fcm_merge(data, n=3, m=2.0, max_iter=100),
'K-means': kmeans_merge(data, n=3),
'DBSCAN': dbscan_1d_merge(data, n=3, min_samples=3),
'Hierarchical Density': hierarchical_density_merge(data, n=3)
}
# Visualize results
plt.figure(figsize=(15, 5))
for i, (name, (labels, edges)) in enumerate(methods.items(), 1):
plt.subplot(1, 5, i)
plt.scatter(data, np.zeros_like(data), c=labels, cmap='viridis')
plt.title(f'{name} Clustering')
# Plot cluster boundaries
for edge in edges:
plt.axvline(x=edge, color='r', linestyle='--', alpha=0.5)
plt.ylim(-0.5, 0.5)
plt.tight_layout()
plt.show()
Each clustering method has its own set of parameters:
sigma
(neighborhood size), learning_rate
(learning rate), epochs
(iterations)m
(fuzziness), max_iter
, epsilon
(convergence threshold)bandwidth
(kernel width)alpha
(compression-accuracy trade-off)max_iter
, epsilon
(convergence threshold)min_cluster_size
(minimum points per cluster)n
(target number of clusters), eps
(optional neighborhood size), min_samples
(minimum points in cluster), max_iter
(maximum iterations for eps adjustment)Feel free to contribute to this project by submitting issues or pull requests.
MIT License