from sklearn.svm import SVC
model = SVC(kernel='rbf', C=1, gamma='scale')
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred))
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [0.1, 1, 10],
'gamma': [1, 0.1, 0.01], 'kernel': ['rbf']}
grid = GridSearchCV(SVC(), param_grid, refit=True, verbose=2)
grid.fit(X_train, y_train)
grid.best_params_grid.best_estimator_
flowchart LR
subgraph Prediction["Production Deployment"]
direction TB
I[New Email] --> J["Preprocessing"]
J --> K["Feature Extraction"]
K --> L["SVM Prediction"]
L --> M{Classification}
M -->|"Score > 0.9"| N["High Confidence Spam"]
M -->|"0.5 < Score < 0.9"| O["Potential Spam"]
M -->|"Score < 0.5"| P["Not Spam"]
N --> Q["Spam Folder"]
O --> R["Quarantine for Review"]
P --> S["Inbox"]
end
subgraph Training["Training Phase"]
direction TB
A[Email Training Dataset] --> AA["Data Preprocessing"]
subgraph preprocess["Preprocessing Steps"]
direction TB
AA --> |Clean Data| AA1["• Remove HTML tags
• Convert to lowercase
• Remove special characters
• Handle missing values
• Remove duplicate emails"]
AA1 --> AA2["Text Normalization
• Tokenization
• Stop word removal
• Lemmatization
• Spell checking"]
end
subgraph features["Feature Engineering"]
direction TB
B["Feature Extraction"] --> C["Content-Based Features"]
B --> D["Metadata Features"]
B --> E["Behavioral Features"]
C --> |Extract| C1["• Word frequency (TF-IDF)
• Keywords presence
• Character n-grams
• Word embeddings
• Spam word ratio"]
D --> |Extract| D1["• Email headers
• Sender information
• Time sent
• Number of recipients
• Mail client used"]
E --> |Extract| E1["• Link frequency
• Image ratio
• Email size
• Text-to-HTML ratio
• Recipient patterns"]
end
subgraph model["Model Training"]
direction TB
F["SVM Configuration"] --> F1["Kernel Selection
• Linear for high-dimensional data
• RBF for complex patterns
• Polynomial for non-linear data"]
F1 --> F2["Hyperparameter Tuning
• C: Control overfitting
• Gamma: Kernel coefficient
• Degree: Polynomial kernel
• Cross-validation splits"]
F2 --> G["Model Training"]
G --> H["Model Evaluation"]
H --> H1["Performance Metrics
• Accuracy
• Precision
• Recall
• F1-Score
• ROC-AUC"]
end
end
H --> L
style Training fill:#e1f5fe,stroke:#01579b
style Prediction fill:#f3e5f5,stroke:#4a148c
style preprocess fill:#fff,stroke:#666
style features fill:#fff,stroke:#666
style model fill:#fff,stroke:#666
classDef processNode fill:#fff,stroke:#333,stroke-width:2px
classDef dataNode fill:#e3f2fd,stroke:#1565c0,stroke-width:2px
classDef decisionNode fill:#f5f5f5,stroke:#424242,stroke-width:2px
classDef metricsNode fill:#ffebee,stroke:#c62828,stroke-width:2px
class A,I dataNode
class F,G processNode
class M decisionNode
class H1 metricsNode