Types of Machine Learning: Supervised, Unsupervised, and Reinforcement Learning
Deep dive into the three main types of machine learning - supervised, unsupervised, and reinforcement learning. Understand when to use each approach with practical examples.
Machine Learning approaches can be categorized into three main types based on how they learn from data and the nature of the feedback they receive. Understanding these categories is crucial for choosing the right approach for your specific problem.
Overview of Machine Learning Types
graph TD
A[Machine Learning] --> B[Supervised Learning]
A --> C[Unsupervised Learning]
A --> D[Reinforcement Learning]
B --> E[Classification]
B --> F[Regression]
C --> G[Clustering]
C --> H[Dimensionality Reduction]
C --> I[Anomaly Detection]
C --> J[Association Rules]
D --> K[Model-Based]
D --> L[Model-Free]
Supervised Learning: Learning with a Teacher
Supervised learning is like learning with a teacher who provides correct answers. The algorithm learns from labeled training data, where each example has an input and the desired output.
How It Works
# Conceptual framework of supervised learning
class SupervisedLearning:
def __init__(self):
self.model = None
def train(self, X_train, y_train):
"""Learn the mapping from inputs (X) to outputs (y)"""
# The algorithm finds patterns that connect X to y
self.model = self.find_patterns(X_train, y_train)
def predict(self, X_new):
"""Apply learned patterns to new data"""
return self.model.apply_patterns(X_new)
Classification: Predicting Categories
Classification involves predicting which category or class an input belongs to.
Example 1: Image Classification
import numpy as np
from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler
class ImageClassifier:
def __init__(self):
self.classifier = MLPClassifier(
hidden_layer_sizes=(100, 50),
max_iter=1000,
random_state=42
)
self.scaler = StandardScaler()
def preprocess_image(self, image):
"""Convert image to feature vector"""
# In practice, use CNN features
return image.flatten()
def train(self, images, labels):
"""Train classifier on labeled images"""
# Extract features
features = np.array([self.preprocess_image(img) for img in images])
features = self.scaler.fit_transform(features)
# Train model
self.classifier.fit(features, labels)
print(f"Training accuracy: {self.classifier.score(features, labels):.2%}")
def predict(self, image):
"""Predict class of new image"""
features = self.preprocess_image(image).reshape(1, -1)
features = self.scaler.transform(features)
prediction = self.classifier.predict(features)[0]
probabilities = self.classifier.predict_proba(features)[0]
return {
'class': prediction,
'confidence': max(probabilities),
'all_probabilities': dict(zip(self.classifier.classes_, probabilities))
}
# Usage example
classifier = ImageClassifier()
# Simulated data
train_images = np.random.rand(1000, 28, 28) # 1000 28x28 images
train_labels = np.random.choice(['cat', 'dog', 'bird'], 1000)
classifier.train(train_images, train_labels)
Example 2: Sentiment Analysis
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
from sklearn.pipeline import Pipeline
class SentimentAnalyzer:
def __init__(self):
self.pipeline = Pipeline([
('tfidf', TfidfVectorizer(max_features=5000)),
('classifier', SVC(kernel='linear', probability=True))
])
def train(self, texts, sentiments):
"""Train on labeled text data"""
self.pipeline.fit(texts, sentiments)
def analyze(self, text):
"""Analyze sentiment of new text"""
prediction = self.pipeline.predict([text])[0]
proba = self.pipeline.predict_proba([text])[0]
sentiment_scores = dict(zip(self.pipeline.classes_, proba))
return {
'sentiment': prediction,
'scores': sentiment_scores,
'confidence': max(proba)
}
# Example usage
analyzer = SentimentAnalyzer()
training_texts = [
"This product is amazing! Best purchase ever!",
"Terrible experience, would not recommend.",
"It's okay, nothing special but does the job.",
"Absolutely love it! Exceeded expectations!"
]
training_sentiments = ['positive', 'negative', 'neutral', 'positive']
analyzer.train(training_texts, training_sentiments)
result = analyzer.analyze("This is fantastic, really impressed!")
print(f"Sentiment: {result['sentiment']} (confidence: {result['confidence']:.2%})")
Regression: Predicting Continuous Values
Regression predicts numerical values rather than categories.
Example: House Price Prediction
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import pandas as pd
class HousePricePredictor:
def __init__(self):
self.model = RandomForestRegressor(
n_estimators=100,
max_depth=10,
random_state=42
)
self.feature_importance = None
def engineer_features(self, df):
"""Create additional features"""
df = df.copy()
df['total_area'] = df['living_area'] + df['garage_area'] + df['basement_area']
df['age'] = 2025 - df['year_built']
df['renovated'] = (df['year_renovated'] > 0).astype(int)
df['price_per_sqft_neighborhood'] = df.groupby('neighborhood')['price'].transform('mean') / df['living_area']
return df
def train(self, data):
"""Train the price prediction model"""
# Prepare features
data = self.engineer_features(data)
feature_cols = ['bedrooms', 'bathrooms', 'living_area', 'lot_size',
'age', 'total_area', 'garage_cars', 'neighborhood_encoded']
X = data[feature_cols]
y = data['price']
# Train model
self.model.fit(X, y)
# Store feature importance
self.feature_importance = pd.DataFrame({
'feature': feature_cols,
'importance': self.model.feature_importances_
}).sort_values('importance', ascending=False)
# Evaluate
predictions = self.model.predict(X)
mae = mean_absolute_error(y, predictions)
r2 = r2_score(y, predictions)
print(f"Training MAE: ${mae:,.2f}")
print(f"Training R²: {r2:.4f}")
print(f"\nTop features:")
print(self.feature_importance.head())
def predict_price(self, house_features):
"""Predict price for a new house"""
prediction = self.model.predict([house_features])[0]
# Get prediction interval using trees
tree_predictions = np.array([
tree.predict([house_features])[0]
for tree in self.model.estimators_
])
return {
'predicted_price': prediction,
'confidence_interval': (
np.percentile(tree_predictions, 5),
np.percentile(tree_predictions, 95)
),
'uncertainty': np.std(tree_predictions)
}
Unsupervised Learning: Finding Hidden Patterns
Unsupervised learning discovers patterns in data without labeled examples. It's like exploring data to find natural groupings or structures.
Clustering: Grouping Similar Items
Example: Customer Segmentation
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt
class CustomerSegmentation:
def __init__(self, n_segments=5):
self.n_segments = n_segments
self.kmeans = KMeans(n_clusters=n_segments, random_state=42)
self.scaler = StandardScaler()
self.segment_profiles = None
def analyze_segments(self, data, features):
"""Perform segmentation and analyze results"""
# Scale features
X_scaled = self.scaler.fit_transform(data[features])
# Perform clustering
segments = self.kmeans.fit_predict(X_scaled)
data['segment'] = segments
# Analyze each segment
self.segment_profiles = []
for i in range(self.n_segments):
segment_data = data[data['segment'] == i]
profile = {
'segment_id': i,
'size': len(segment_data),
'percentage': len(segment_data) / len(data) * 100,
'avg_purchase_value': segment_data['total_purchases'].mean(),
'avg_frequency': segment_data['purchase_frequency'].mean(),
'avg_recency': segment_data['days_since_last_purchase'].mean(),
'characteristics': self.describe_segment(segment_data)
}
self.segment_profiles.append(profile)
return segments
def describe_segment(self, segment_data):
"""Generate description for segment"""
if segment_data['total_purchases'].mean() > 1000:
value = "High-value"
elif segment_data['total_purchases'].mean() > 500:
value = "Medium-value"
else:
value = "Low-value"
if segment_data['purchase_frequency'].mean() > 10:
frequency = "frequent"
else:
frequency = "occasional"
return f"{value} {frequency} customers"
def visualize_segments(self, data, features):
"""Create visualization of segments"""
# Reduce to 2D for visualization
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
X_scaled = self.scaler.transform(data[features])
X_pca = pca.fit_transform(X_scaled)
plt.figure(figsize=(10, 8))
scatter = plt.scatter(X_pca[:, 0], X_pca[:, 1],
c=data['segment'], cmap='viridis', alpha=0.6)
plt.colorbar(scatter, label='Segment')
plt.xlabel(f'PC1 ({pca.explained_variance_ratio_[0]:.2%} variance)')
plt.ylabel(f'PC2 ({pca.explained_variance_ratio_[1]:.2%} variance)')
plt.title('Customer Segments Visualization')
# Plot centroids
centroids_pca = pca.transform(self.kmeans.cluster_centers_)
plt.scatter(centroids_pca[:, 0], centroids_pca[:, 1],
c='red', marker='x', s=200, linewidths=3)
plt.show()
Dimensionality Reduction: Simplifying Complex Data
Example: Feature Extraction with PCA
from sklearn.decomposition import PCA
import numpy as np
class DimensionalityReducer:
def __init__(self, target_variance=0.95):
self.target_variance = target_variance
self.pca = None
self.n_components = None
def fit_transform(self, data):
"""Reduce dimensionality while preserving variance"""
# Determine number of components needed
pca_full = PCA()
pca_full.fit(data)
cumsum_var = np.cumsum(pca_full.explained_variance_ratio_)
self.n_components = np.argmax(cumsum_var >= self.target_variance) + 1
# Fit PCA with optimal components
self.pca = PCA(n_components=self.n_components)
transformed_data = self.pca.fit_transform(data)
print(f"Reduced from {data.shape[1]} to {self.n_components} dimensions")
print(f"Preserved variance: {self.pca.explained_variance_ratio_.sum():.2%}")
return transformed_data
def visualize_variance(self):
"""Plot explained variance by component"""
plt.figure(figsize=(10, 6))
# Individual explained variance
plt.subplot(1, 2, 1)
plt.bar(range(1, len(self.pca.explained_variance_ratio_) + 1),
self.pca.explained_variance_ratio_)
plt.xlabel('Principal Component')
plt.ylabel('Explained Variance Ratio')
plt.title('Variance Explained by Each Component')
# Cumulative explained variance
plt.subplot(1, 2, 2)
plt.plot(range(1, len(self.pca.explained_variance_ratio_) + 1),
np.cumsum(self.pca.explained_variance_ratio_))
plt.axhline(y=self.target_variance, color='r', linestyle='--',
label=f'Target: {self.target_variance:.0%}')
plt.xlabel('Number of Components')
plt.ylabel('Cumulative Explained Variance')
plt.title('Cumulative Variance Explained')
plt.legend()
plt.tight_layout()
plt.show()
Anomaly Detection: Finding Outliers
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
class AnomalyDetector:
def __init__(self, contamination=0.1):
self.detector = IsolationForest(
contamination=contamination,
random_state=42
)
self.scaler = StandardScaler()
def fit_detect(self, data):
"""Detect anomalies in data"""
# Scale data
data_scaled = self.scaler.fit_transform(data)
# Fit and predict
predictions = self.detector.fit_predict(data_scaled)
# Convert to binary (1: normal, 0: anomaly)
is_anomaly = (predictions == -1).astype(int)
# Calculate anomaly scores
scores = self.detector.score_samples(data_scaled)
return {
'anomalies': is_anomaly,
'scores': scores,
'n_anomalies': is_anomaly.sum(),
'anomaly_rate': is_anomaly.mean()
}
def explain_anomaly(self, data_point, feature_names):
"""Explain why a point is anomalous"""
data_point_scaled = self.scaler.transform([data_point])
score = self.detector.score_samples(data_point_scaled)[0]
# Find which features deviate most from normal
mean = self.scaler.mean_
std = self.scaler.scale_
z_scores = np.abs((data_point - mean) / std)
unusual_features = []
for i, (feature, z_score) in enumerate(zip(feature_names, z_scores)):
if z_score > 2: # More than 2 std deviations
unusual_features.append({
'feature': feature,
'value': data_point[i],
'z_score': z_score,
'normal_range': (mean[i] - 2*std[i], mean[i] + 2*std[i])
})
return {
'anomaly_score': score,
'is_anomaly': score < 0,
'unusual_features': sorted(unusual_features,
key=lambda x: x['z_score'],
reverse=True)
}
Reinforcement Learning: Learning from Interaction
Reinforcement Learning (RL) is about learning optimal behaviors through trial and error interactions with an environment.
Core Concepts
graph LR
A[Agent] -->|Action| B[Environment]
B -->|State| A
B -->|Reward| A
A -->|Policy| A
Example: Training a Game AI
import numpy as np
from collections import defaultdict
class QLearningAgent:
def __init__(self, n_states, n_actions, learning_rate=0.1,
discount_factor=0.95, exploration_rate=0.1):
self.n_states = n_states
self.n_actions = n_actions
self.lr = learning_rate
self.gamma = discount_factor
self.epsilon = exploration_rate
# Initialize Q-table
self.q_table = defaultdict(lambda: np.zeros(n_actions))
self.training_history = []
def choose_action(self, state, training=True):
"""Epsilon-greedy action selection"""
if training and np.random.random() < self.epsilon:
# Explore: random action
return np.random.randint(self.n_actions)
else:
# Exploit: best known action
return np.argmax(self.q_table[state])
def learn(self, state, action, reward, next_state, done):
"""Update Q-values based on experience"""
current_q = self.q_table[state][action]
if done:
target_q = reward
else:
# Bellman equation
target_q = reward + self.gamma * np.max(self.q_table[next_state])
# Update Q-value
self.q_table[state][action] = current_q + self.lr * (target_q - current_q)
# Track learning progress
self.training_history.append({
'state': state,
'action': action,
'reward': reward,
'q_value': self.q_table[state][action]
})
def train_episode(self, env):
"""Train for one episode"""
state = env.reset()
total_reward = 0
steps = 0
while True:
# Choose action
action = self.choose_action(state, training=True)
# Take action
next_state, reward, done = env.step(action)
# Learn from experience
self.learn(state, action, reward, next_state, done)
total_reward += reward
steps += 1
state = next_state
if done:
break
return total_reward, steps
# Example: Grid World Environment
class GridWorld:
def __init__(self, size=5):
self.size = size
self.goal = (size-1, size-1)
self.state = None
def reset(self):
self.state = (0, 0)
return self.state
def step(self, action):
# Actions: 0=up, 1=right, 2=down, 3=left
x, y = self.state
if action == 0 and y > 0:
y -= 1
elif action == 1 and x < self.size - 1:
x += 1
elif action == 2 and y < self.size - 1:
y += 1
elif action == 3 and x > 0:
x -= 1
self.state = (x, y)
# Reward structure
if self.state == self.goal:
reward = 100
done = True
else:
reward = -1 # Small penalty for each step
done = False
return self.state, reward, done
def render(self, agent=None):
"""Visualize the grid world"""
grid = [['.' for _ in range(self.size)] for _ in range(self.size)]
# Mark current position
x, y = self.state
grid[y][x] = 'A'
# Mark goal
gx, gy = self.goal
grid[gy][gx] = 'G' if self.state != self.goal else 'W'
# Show grid
print("\nGrid World:")
for row in grid:
print(' '.join(row))
# Show Q-values if agent provided
if agent and self.state in agent.q_table:
print(f"\nQ-values at {self.state}:")
actions = ['Up', 'Right', 'Down', 'Left']
for i, (action, q_val) in enumerate(zip(actions, agent.q_table[self.state])):
print(f" {action}: {q_val:.2f}")
Deep Reinforcement Learning
import torch
import torch.nn as nn
import torch.optim as optim
class DeepQNetwork(nn.Module):
def __init__(self, state_size, action_size, hidden_size=128):
super(DeepQNetwork, self).__init__()
self.fc1 = nn.Linear(state_size, hidden_size)
self.fc2 = nn.Linear(hidden_size, hidden_size)
self.fc3 = nn.Linear(hidden_size, action_size)
def forward(self, x):
x = torch.relu(self.fc1(x))
x = torch.relu(self.fc2(x))
return self.fc3(x)
class DQNAgent:
def __init__(self, state_size, action_size, learning_rate=0.001):
self.state_size = state_size
self.action_size = action_size
self.memory = []
self.epsilon = 1.0
self.epsilon_decay = 0.995
self.epsilon_min = 0.01
# Neural network
self.q_network = DeepQNetwork(state_size, action_size)
self.target_network = DeepQNetwork(state_size, action_size)
self.optimizer = optim.Adam(self.q_network.parameters(), lr=learning_rate)
# Update target network
self.update_target_network()
def update_target_network(self):
"""Copy weights from main network to target network"""
self.target_network.load_state_dict(self.q_network.state_dict())
def remember(self, state, action, reward, next_state, done):
"""Store experience in replay memory"""
self.memory.append((state, action, reward, next_state, done))
if len(self.memory) > 10000:
self.memory.pop(0)
def act(self, state):
"""Choose action using epsilon-greedy policy"""
if np.random.random() <= self.epsilon:
return np.random.randint(self.action_size)
state_tensor = torch.FloatTensor(state).unsqueeze(0)
q_values = self.q_network(state_tensor)
return np.argmax(q_values.detach().numpy())
def replay(self, batch_size=32):
"""Train the model on a batch of experiences"""
if len(self.memory) < batch_size:
return
batch = np.random.choice(self.memory, batch_size, replace=False)
for state, action, reward, next_state, done in batch:
target = reward
if not done:
next_state_tensor = torch.FloatTensor(next_state).unsqueeze(0)
target = reward + 0.95 * torch.max(
self.target_network(next_state_tensor)
).item()
state_tensor = torch.FloatTensor(state).unsqueeze(0)
target_f = self.q_network(state_tensor)
target_f[0][action] = target
loss = nn.MSELoss()(self.q_network(state_tensor), target_f)
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
if self.epsilon > self.epsilon_min:
self.epsilon *= self.epsilon_decay
Comparing Learning Types
| Aspect | Supervised | Unsupervised | Reinforcement | |--------|------------|--------------|---------------| | Training Data | Labeled examples | Unlabeled data | Environment interactions | | Goal | Predict labels | Find patterns | Maximize reward | | Feedback | Immediate | None | Delayed reward | | Applications | Classification, Regression | Clustering, Compression | Games, Robotics, Control | | Examples | Spam detection, Price prediction | Customer segmentation, Anomaly detection | Game AI, Robot control |
Choosing the Right Approach
Use Supervised Learning When:
- You have labeled training data
- The task is to predict specific outputs
- You need interpretable predictions
- Historical examples exist
Use Unsupervised Learning When:
- You don't have labeled data
- You want to explore data structure
- You need to reduce data dimensions
- You're looking for hidden patterns
Use Reinforcement Learning When:
- The problem involves sequential decisions
- You can simulate the environment
- Rewards can be defined but optimal actions are unknown
- The agent needs to learn through interaction
Semi-Supervised and Self-Supervised Learning
Semi-Supervised Learning
Combines small amounts of labeled data with large amounts of unlabeled data.
from sklearn.semi_supervised import LabelPropagation
class SemiSupervisedClassifier:
def __init__(self):
self.model = LabelPropagation()
def train(self, X_labeled, y_labeled, X_unlabeled):
"""Train with both labeled and unlabeled data"""
# Combine data
X_combined = np.vstack([X_labeled, X_unlabeled])
# Create labels (-1 for unlabeled)
y_combined = np.concatenate([
y_labeled,
np.full(len(X_unlabeled), -1)
])
# Train model
self.model.fit(X_combined, y_combined)
# Return predictions for unlabeled data
return self.model.transduction_[len(y_labeled):]
Self-Supervised Learning
Creates supervised tasks from unlabeled data.
class SelfSupervisedLearning:
def create_pretext_task(self, data):
"""Create a supervised task from unlabeled data"""
# Example: Predict rotation angle of images
rotations = [0, 90, 180, 270]
X_augmented = []
y_rotations = []
for image in data:
for rotation in rotations:
rotated = self.rotate_image(image, rotation)
X_augmented.append(rotated)
y_rotations.append(rotation)
return np.array(X_augmented), np.array(y_rotations)
Conclusion
Understanding the three main types of machine learning - supervised, unsupervised, and reinforcement learning - is fundamental to applying ML effectively. Each approach has its strengths and ideal use cases:
- Supervised Learning excels when you have clear examples of what you want to predict
- Unsupervised Learning reveals hidden structures and patterns in your data
- Reinforcement Learning optimizes sequential decision-making through experience
As you progress in your ML journey, you'll often find that real-world problems benefit from combining these approaches, leading to more powerful and flexible solutions.
Next Steps
Ready to explore specific applications? Check out our article on Key Applications of AI to see how these ML types are applied in healthcare, finance, autonomous vehicles, and more.