Data Sanitization
Sockeon provides comprehensive sanitization utilities through the Sanitizer
class, offering standalone data cleaning and normalization functions for various data types.
Overview
The Sanitizer
class provides:
- Standalone sanitization - Independent of validation
- Type-specific cleaning - Specialized functions for different data types
- Security-focused - XSS protection and input normalization
- Flexible options - Configurable sanitization behavior
- Comprehensive coverage - Support for strings, numbers, arrays, and more
Basic Usage
use Sockeon\Sockeon\Validation\Sanitizer;
// Basic sanitization
$cleanString = Sanitizer::string($userInput, true, true);
$cleanEmail = Sanitizer::email($userInput);
$cleanInteger = Sanitizer::integer($userInput, 0);
$cleanBoolean = Sanitizer::boolean($userInput);
$cleanArray = Sanitizer::array($userInput);
String Sanitization
Basic String Cleaning
// Basic string sanitization
$clean = Sanitizer::string(' Hello World '); // "Hello World"
$clean = Sanitizer::string('<script>alert("xss")</script>Hello'); // "Hello"
// With options
$clean = Sanitizer::string($input, $trim = true, $stripTags = true);
Email Sanitization
// Email normalization
$email = Sanitizer::email(' USER@EXAMPLE.COM '); // "user@example.com"
$email = Sanitizer::email('user@example.com'); // "user@example.com"
$email = Sanitizer::email(''); // ""
URL Sanitization
// URL normalization
$url = Sanitizer::url('example.com'); // "http://example.com"
$url = Sanitizer::url('https://example.com'); // "https://example.com"
$url = Sanitizer::url(''); // ""
HTML Sanitization
// HTML content sanitization
$html = '<p>Hello <script>alert("xss")</script> <strong>World</strong></p>';
// Remove all HTML tags
$clean = Sanitizer::html($html); // "Hello World"
// Allow specific tags
$clean = Sanitizer::html($html, ['p', 'strong']); // "<p>Hello <strong>World</strong></p>"
Filename Sanitization
// Safe filename creation
$filename = Sanitizer::filename('../../dangerous/path/file.txt'); // "file.txt"
$filename = Sanitizer::filename('My File (2024).pdf'); // "MyFile2024.pdf"
$filename = Sanitizer::filename(''); // ""
Phone Number Sanitization
// Phone number cleaning
$phone = Sanitizer::phone('+1 (555) 123-4567'); // "+1 (555) 123-4567"
$phone = Sanitizer::phone('555.123.4567'); // "555 123 4567"
$phone = Sanitizer::phone('5551234567'); // "5551234567"
Credit Card Sanitization
// Credit card number cleaning
$card = Sanitizer::creditCard('1234-5678-9012-3456'); // "1234567890123456"
$card = Sanitizer::creditCard('1234 5678 9012 3456'); // "1234567890123456"
$card = Sanitizer::creditCard(''); // ""
Password Sanitization
// Basic password sanitization
$password = Sanitizer::password(' MyPassword123 '); // "MyPassword123"
$password = Sanitizer::password('<script>alert("xss")</script>pass'); // "pass"
Numeric Sanitization
Integer Sanitization
// Integer conversion with fallback
$int = Sanitizer::integer('25'); // 25
$int = Sanitizer::integer('25.5'); // 25
$int = Sanitizer::integer('invalid', 0); // 0
$int = Sanitizer::integer('', 18); // 18
Float Sanitization
// Float conversion with fallback
$float = Sanitizer::float('25.5'); // 25.5
$float = Sanitizer::float('25'); // 25.0
$float = Sanitizer::float('invalid', 0.0); // 0.0
$float = Sanitizer::float('', 10.5); // 10.5
Boolean Sanitization
// Boolean conversion
$bool = Sanitizer::boolean('true'); // true
$bool = Sanitizer::boolean('1'); // true
$bool = Sanitizer::boolean('yes'); // true
$bool = Sanitizer::boolean('on'); // true
$bool = Sanitizer::boolean('false'); // false
$bool = Sanitizer::boolean('0'); // false
$bool = Sanitizer::boolean('no'); // false
$bool = Sanitizer::boolean('off'); // false
Array Sanitization
// Array conversion
$array = Sanitizer::array(['item1', 'item2']); // ['item1', 'item2']
$array = Sanitizer::array('["item1", "item2"]'); // ['item1', 'item2'] (JSON)
$array = Sanitizer::array('not an array'); // []
$array = Sanitizer::array(null); // []
JSON Sanitization
// JSON parsing and sanitization
$json = Sanitizer::json('{"name": "John", "age": 30}'); // ['name' => 'John', 'age' => 30]
$json = Sanitizer::json('invalid json'); // 'invalid json' (unchanged)
$json = Sanitizer::json(null); // null
Date and Time Sanitization
Date Sanitization
// Date formatting
$date = Sanitizer::date('2024-01-15'); // "2024-01-15"
$date = Sanitizer::date('01/15/2024', 'Y-m-d'); // "2024-01-15"
$date = Sanitizer::date('invalid date'); // ""
$date = Sanitizer::date('', 'Y-m-d'); // ""
Time Sanitization
// Time formatting
$time = Sanitizer::time('14:30:00'); // "14:30:00"
$time = Sanitizer::time('2:30 PM', 'H:i:s'); // "14:30:00"
$time = Sanitizer::time('invalid time'); // ""
DateTime Sanitization
// DateTime formatting
$datetime = Sanitizer::datetime('2024-01-15 14:30:00'); // "2024-01-15 14:30:00"
$datetime = Sanitizer::datetime('01/15/2024 2:30 PM', 'Y-m-d H:i:s'); // "2024-01-15 14:30:00"
$datetime = Sanitizer::datetime('invalid datetime'); // ""
Specialized Sanitization
IP Address Sanitization
// IP address validation
$ip = Sanitizer::ipAddress('192.168.1.1'); // "192.168.1.1"
$ip = Sanitizer::ipAddress('invalid ip'); // ""
$ip = Sanitizer::ipAddress(''); // ""
Color Sanitization
// Color value validation
$color = Sanitizer::color('#FF0000'); // "#FF0000"
$color = Sanitizer::color('rgb(255, 0, 0)'); // "rgb(255, 0, 0)"
$color = Sanitizer::color('rgba(255, 0, 0, 0.5)'); // "rgba(255, 0, 0, 0.5)"
$color = Sanitizer::color('invalid color'); // ""
CSS Class Sanitization
// CSS class name cleaning
$class = Sanitizer::cssClass('my-class-name'); // "my-class-name"
$class = Sanitizer::cssClass('my class name'); // "myclassname"
$class = Sanitizer::cssClass('my-class@name'); // "my-classname"
$class = Sanitizer::cssClass(''); // ""
ID Sanitization
// HTML ID attribute cleaning
$id = Sanitizer::id('my-element-id'); // "my-element-id"
$id = Sanitizer::id('123element'); // "id_123element" (prefixed if starts with number)
$id = Sanitizer::id('my element id'); // "myelementid"
$id = Sanitizer::id(''); // ""
Advanced Usage
Custom Sanitization Chains
// Chain multiple sanitization steps
$userInput = ' <script>alert("xss")</script>John Doe ';
// Step 1: Basic string sanitization
$clean = Sanitizer::string($userInput, true, true); // "John Doe"
// Step 2: Additional HTML encoding if needed
$safe = htmlspecialchars($clean, ENT_QUOTES, 'UTF-8'); // "John Doe"
Batch Sanitization
// Sanitize multiple fields at once
$data = [
'name' => ' John Doe ',
'email' => ' USER@EXAMPLE.COM ',
'age' => '25',
'website' => 'example.com',
'phone' => '+1 (555) 123-4567'
];
$sanitized = [
'name' => Sanitizer::string($data['name']),
'email' => Sanitizer::email($data['email']),
'age' => Sanitizer::integer($data['age']),
'website' => Sanitizer::url($data['website']),
'phone' => Sanitizer::phone($data['phone'])
];
// Result:
// [
// 'name' => 'John Doe',
// 'email' => 'user@example.com',
// 'age' => 25,
// 'website' => 'http://example.com',
// 'phone' => '+1 (555) 123-4567'
// ]
Conditional Sanitization
// Sanitize based on conditions
function sanitizeUserData(array $data): array
{
$sanitized = [];
// Always sanitize name
$sanitized['name'] = Sanitizer::string($data['name'] ?? '');
// Sanitize email only if provided
if (!empty($data['email'])) {
$sanitized['email'] = Sanitizer::email($data['email']);
}
// Sanitize age with fallback
$sanitized['age'] = Sanitizer::integer($data['age'] ?? '', 18);
// Sanitize website only if valid
if (!empty($data['website'])) {
$website = Sanitizer::url($data['website']);
if (!empty($website)) {
$sanitized['website'] = $website;
}
}
return $sanitized;
}
Security Considerations
XSS Protection
// Always sanitize user input to prevent XSS
$userInput = '<script>alert("xss")</script>Hello World';
// Remove HTML tags
$safe = Sanitizer::string($userInput, true, true); // "Hello World"
// Or allow specific tags
$safe = Sanitizer::html($userInput, ['p', 'strong']); // "Hello World"
SQL Injection Prevention
// Sanitize data before database operations
$userId = Sanitizer::integer($_GET['id'] ?? '', 0);
$username = Sanitizer::string($_POST['username'] ?? '', true, true);
// Use prepared statements with sanitized data
$stmt = $pdo->prepare("SELECT * FROM users WHERE id = ? AND username = ?");
$stmt->execute([$userId, $username]);
File Upload Security
// Sanitize filenames
$originalName = $_FILES['file']['name'];
$safeName = Sanitizer::filename($originalName);
// Validate file type and size
if ($_FILES['file']['size'] > 5000000) { // 5MB limit
throw new Exception('File too large');
}
// Move to safe location
move_uploaded_file($_FILES['file']['tmp_name'], "/uploads/{$safeName}");
Best Practices
1. Always Sanitize User Input
// Good: Sanitize all user input
$name = Sanitizer::string($_POST['name'] ?? '');
$email = Sanitizer::email($_POST['email'] ?? '');
$age = Sanitizer::integer($_POST['age'] ?? '', 0);
// Bad: Using raw input
$name = $_POST['name'] ?? '';
$email = $_POST['email'] ?? '';
$age = $_POST['age'] ?? 0;
2. Use Appropriate Sanitization Methods
// Use type-specific sanitization
$email = Sanitizer::email($input); // For emails
$phone = Sanitizer::phone($input); // For phone numbers
$url = Sanitizer::url($input); // For URLs
$filename = Sanitizer::filename($input); // For filenames
3. Provide Fallback Values
// Always provide sensible defaults
$age = Sanitizer::integer($input, 18); // Default age
$isActive = Sanitizer::boolean($input, false); // Default status
$tags = Sanitizer::array($input); // Default empty array
4. Chain with Validation
// Sanitize before validation
$cleanInput = [
'name' => Sanitizer::string($rawInput['name'] ?? ''),
'email' => Sanitizer::email($rawInput['email'] ?? ''),
'age' => Sanitizer::integer($rawInput['age'] ?? '', 0)
];
// Then validate
$validator->validate($cleanInput, [
'name' => 'required|string|min:2',
'email' => 'required|email',
'age' => 'integer|min:18'
]);
5. Handle Empty Values
// Check for empty values after sanitization
$name = Sanitizer::string($input);
if (empty($name)) {
throw new Exception('Name is required');
}
// Or provide defaults
$name = Sanitizer::string($input) ?: 'Anonymous';
Performance Considerations
1. Reuse Sanitized Data
// Sanitize once and reuse
$sanitizedData = [
'name' => Sanitizer::string($input['name']),
'email' => Sanitizer::email($input['email'])
];
// Use sanitized data multiple times
$user = User::create($sanitizedData);
$log->info('User created', $sanitizedData);
2. Batch Processing
// Sanitize multiple items efficiently
$names = [' John ', ' Jane ', ' Bob '];
$cleanNames = array_map(fn($name) => Sanitizer::string($name), $names);
// Result: ['John', 'Jane', 'Bob']
This comprehensive sanitization system ensures your data is clean, secure, and properly formatted for safe processing in your Sockeon applications.