SELECT
SELECT *
FROM employees;
SELECT first_name, last_name, salary
FROM employees;
SELECT *
FROM employees
WHERE department = 'IT';
SELECT first_name, salary
FROM employees
ORDER BY salary DESC;
SELECT *
FROM employees
LIMIT 5;
SELECT DISTINCT department
FROM employees;
SELECT department, COUNT(*)
FROM employees
GROUP BY department;
SELECT department, COUNT(*)
FROM employees
GROUP BY department
HAVING COUNT(*) > 10;
SELECT employees.first_name, departments.department_name
FROM employees
JOIN departments ON employees.department_id = departments.id;
SELECT first_name, salary
FROM employees
WHERE salary > (SELECT AVG(salary) FROM employees);
SELECT first_name, salary,
CASE
WHEN salary > 50000 THEN 'High Salary'
ELSE 'Low Salary'
END AS salary_category
FROM employees;
INSERT
INSERT INTO employees (first_name, last_name, department, salary)
VALUES ('John', 'Doe', 'IT', 60000);
INSERT INTO employees (first_name, last_name, department, salary)
VALUES
('Alice', 'Smith', 'HR', 50000),
('Bob', 'Johnson', 'Finance', 55000),
('Charlie', 'Brown', 'IT', 62000);
INSERT INTO employees
VALUES (101, 'Emma', 'Wilson', 'Marketing', 70000);
INSERT INTO employees_backup (first_name, last_name, department, salary)
SELECT first_name, last_name, department, salary FROM employees;
INSERT INTO employees (first_name, last_name, department)
VALUES ('David', 'Miller', 'Sales');
INSERT INTO orders (customer_name, total_amount, created_at)
VALUES ('John Doe', 150.75, NOW());
INSERT INTO employees (first_name, last_name, department, salary)
VALUES ('Sophia', 'Anderson', NULL, 52000);
INSERT INTO employees (first_name, last_name, department, salary)
VALUES ('Michael', 'Clark', 'IT', 58000);
UPDATE
UPDATE employees
SET salary = 65000
WHERE employee_id = 101;
UPDATE employees
SET salary = 70000, department = 'Finance'
WHERE employee_id = 102;
UPDATE employees
SET salary = salary + 5000
WHERE department = 'IT';
UPDATE employees
SET salary = 50000;
UPDATE employees
SET salary =
CASE
WHEN department = 'IT' THEN salary + 7000
WHEN department = 'HR' THEN salary + 5000
ELSE salary + 3000
END;
UPDATE employees
SET salary = (SELECT AVG(salary) FROM employees)
WHERE department = 'Sales';
UPDATE employees
JOIN departments ON employees.department_id = departments.id
SET employees.salary = employees.salary * 1.1
WHERE departments.department_name = 'Marketing';
UPDATE employees
SET salary = 60000
WHERE department = 'HR'
LIMIT 5;
START TRANSACTION;
UPDATE employees
SET salary = 80000
WHERE department = 'Finance';
ROLLBACK; -- This undoes the update
COMMIT; -- This saves the update permanently
DELETE
DELETE FROM employees
WHERE employee_id = 101;
DELETE FROM employees
WHERE department = 'HR';
DELETE FROM employees;
DELETE employees
FROM employees
JOIN departments ON employees.department_id = departments.id
WHERE departments.department_name = 'Sales';
DELETE FROM employees
WHERE department_id IN (SELECT id FROM departments WHERE department_name = 'HR');
DELETE FROM employees
WHERE department = 'IT'
LIMIT 5;
START TRANSACTION;
DELETE FROM employees WHERE department = 'Finance';
ROLLBACK; -- Cancels the deletion
COMMIT; -- Saves the deletion permanently
CREATE
CREATE DATABASE company_db;
USE company_db;
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL,
last_name VARCHAR(50) NOT NULL,
department VARCHAR(50),
salary DECIMAL(10,2),
hire_date DATE DEFAULT CURRENT_DATE
);
- employee_id INT PRIMARY KEY AUTO_INCREMENT - Unique ID that auto-increments.
- VARCHAR(50) NOT NULL - Stores text with a max of 50 characters, required.
- DECIMAL(10,2) - Stores salary with up to 10 digits and 2 decimal places.
- DEFAULT CURRENT_DATE - Sets the default hire date as today.
CREATE TABLE departments (
department_id INT PRIMARY KEY AUTO_INCREMENT,
department_name VARCHAR(100) UNIQUE NOT NULL,
manager_id INT,
FOREIGN KEY (manager_id) REFERENCES employees(employee_id) ON DELETE SET NULL
);
- PRIMARY KEY - Ensures unique records.
- UNIQUE - Ensures values in a column are unique.
- FOREIGN KEY - Links to another table (employees), setting manager_id to NULL if the referenced record is deleted.
CREATE TABLE employees_backup AS
SELECT * FROM employees;
CREATE INDEX idx_department ON employees(department);
CREATE VIEW high_salary_employees AS
SELECT first_name, last_name, salary FROM employees WHERE salary > 70000;
-- You can now query the view like a table:
SELECT * FROM high_salary_employees;
CREATE PROCEDURE GetEmployeesByDepartment(IN dept_name VARCHAR(50))
BEGIN
SELECT * FROM employees WHERE department = dept_name;
END;
-- Execute the procedure:
CALL GetEmployeesByDepartment('IT');
CREATE DATABASE IF NOT EXISTS company_db;
DROP
DROP DATABASE company_db;
DROP DATABASE IF EXISTS company_db;
DROP TABLE employees;
DROP TABLE IF EXISTS employees;
ALTER TABLE employees DROP COLUMN salary;
DROP INDEX idx_department ON employees;
DROP VIEW high_salary_employees;
DROP PROCEDURE GetEmployeesByDepartment;
ALTER
ALTER TABLE employees
ADD email VARCHAR(100);
ALTER TABLE employees
ADD phone_number VARCHAR(15),
ADD address VARCHAR(255);
ALTER TABLE employees
DROP COLUMN salary;
ALTER TABLE employees
DROP COLUMN IF EXISTS salary;
ALTER TABLE employees
MODIFY COLUMN salary DECIMAL(12,2);
ALTER TABLE employees
ALTER COLUMN hire_date SET DEFAULT CURRENT_DATE;
ALTER TABLE employees
CHANGE COLUMN old_column_name new_column_name VARCHAR(100);
ALTER TABLE employees
RENAME COLUMN old_column_name TO new_column_name;
ALTER TABLE employees
RENAME TO staff;
ALTER TABLE employees
ADD PRIMARY KEY (employee_id);
ALTER TABLE employees
ADD CONSTRAINT fk_department
FOREIGN KEY (department_id) REFERENCES departments(department_id);
ALTER TABLE employees
DROP CONSTRAINT fk_department;
ALTER TABLE employees
ADD INDEX idx_department (department);
ALTER TABLE employees
DROP INDEX idx_department;
TRUNCATE
TRUNCATE TABLE employees;
TRUNCATE TABLE employees;
SET FOREIGN_KEY_CHECKS = 0;
TRUNCATE TABLE employees;
SET FOREIGN_KEY_CHECKS = 1;
START TRANSACTION;
TRUNCATE TABLE employees;
ROLLBACK; -- (This works in PostgreSQL, but not MySQL)
PRIMARY KEY
- Uniqueness - No two rows can have the same primary key value.
- Not NULL - A primary key column cannot have NULL values.
- One per Table - A table can have only one primary key.
- Automatic Indexing - Most databases automatically create an index on the primary key column for faster searches.
CREATE TABLE employees (
employee_id INT PRIMARY KEY,
first_name VARCHAR(50),
last_name VARCHAR(50)
);
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50),
last_name VARCHAR(50)
);
CREATE TABLE project_assignments (
employee_id INT,
project_id INT,
PRIMARY KEY (employee_id, project_id)
);
ALTER TABLE employees
ADD PRIMARY KEY (employee_id);
ALTER TABLE employees
DROP PRIMARY KEY;
CREATE TABLE departments (
department_id INT PRIMARY KEY,
department_name VARCHAR(100)
);
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50),
department_id INT,
FOREIGN KEY (department_id) REFERENCES departments(department_id)
);
FOREIGN KEY
- References a Primary Key - The foreign key column(s) must match values from a primary key in another table.
- Ensures Data Integrity - Prevents inserting invalid or orphaned records.
- Can Be NULL - If a foreign key is optional, it can be NULL.
- Can Have Multiple Foreign Keys - A table can have multiple foreign keys referencing different tables.
It's good to use indexes on foreign key columns for better performance.
Be careful to avoid circular dependencies where tables reference each other.
CREATE TABLE departments (
department_id INT PRIMARY KEY,
department_name VARCHAR(100) NOT NULL
);
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL,
department_id INT,
FOREIGN KEY (department_id) REFERENCES departments(department_id)
);
ALTER TABLE employees
ADD CONSTRAINT fk_department
FOREIGN KEY (department_id) REFERENCES departments(department_id);
ALTER TABLE employees
ADD CONSTRAINT fk_department
FOREIGN KEY (department_id) REFERENCES departments(department_id);
ALTER TABLE employees
DROP FOREIGN KEY fk_department;
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL,
department_id INT,
FOREIGN KEY (department_id) REFERENCES departments(department_id)
ON DELETE CASCADE
ON UPDATE CASCADE
);
- ON DELETE CASCADE - Deletes related rows in the child table when the parent row is deleted.
- ON DELETE SET NULL - Sets the foreign key column to NULL if the parent row is deleted.
- ON DELETE RESTRICT - Prevents deletion of a parent row if related rows exist.
- ON UPDATE CASCADE - Updates foreign key values in child rows if the primary key in the parent table changes.
CREATE TABLE customers (
customer_id INT PRIMARY KEY,
customer_name VARCHAR(100) NOT NULL
);
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL
);
CREATE TABLE orders (
order_id INT PRIMARY KEY AUTO_INCREMENT,
customer_id INT,
employee_id INT,
FOREIGN KEY (customer_id) REFERENCES customers(customer_id) ON DELETE CASCADE,
FOREIGN KEY (employee_id) REFERENCES employees(employee_id) ON DELETE SET NULL
);
UNIQUE
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
email VARCHAR(100) UNIQUE,
phone_number VARCHAR(15) UNIQUE
);
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50),
last_name VARCHAR(50),
department VARCHAR(50),
UNIQUE (first_name, last_name, department)
);
ALTER TABLE employees
ADD CONSTRAINT unique_email UNIQUE (email);
ALTER TABLE employees
DROP CONSTRAINT unique_email;
-- If you're using MySQL, use:
ALTER TABLE employees
DROP INDEX unique_email;
INSERT IGNORE INTO employees (email, phone_number)
VALUES ('john@example.com', '1234567890');
INSERT INTO employees (email, phone_number)
VALUES ('john@example.com', '1234567890')
ON DUPLICATE KEY UPDATE phone_number = '0987654321';
NOT NULL
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL,
last_name VARCHAR(50) NOT NULL,
email VARCHAR(100) UNIQUE NOT NULL
);
ALTER TABLE employees
MODIFY COLUMN first_name VARCHAR(50) NOT NULL;
ALTER TABLE employees
MODIFY COLUMN first_name VARCHAR(50) NULL;
-- This works because all NOT NULL columns have values:
INSERT INTO employees (first_name, last_name, email)
VALUES ('John', 'Doe', 'john@example.com');
-- This will fail:
INSERT INTO employees (first_name, last_name)
VALUES ('John', 'Doe');
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL,
salary DECIMAL(10,2) NOT NULL DEFAULT 50000.00
);
CHECK
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL,
age INT CHECK (age >= 18),
salary DECIMAL(10,2) CHECK (salary > 0)
);
- age must be at least 18
- salary must be greater than 0
- if an attempt is made to insert an employee under 18 or with a salary of 0 or less, the database rejects it
ALTER TABLE employees
ADD CONSTRAINT check_salary CHECK (salary > 0);
ALTER TABLE employees
DROP CONSTRAINT check_salary;
-- In MySQL, use:
ALTER TABLE employees
DROP CHECK check_salary;
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL,
age INT,
salary DECIMAL(10,2),
CHECK (age >= 18 AND salary > 30000)
);
CREATE TABLE orders (
order_id INT PRIMARY KEY AUTO_INCREMENT,
quantity INT NOT NULL,
price DECIMAL(10,2) NOT NULL,
CHECK (quantity * price > 100)
);
DEFAULT
CREATE TABLE employees (
employee_id INT PRIMARY KEY AUTO_INCREMENT,
first_name VARCHAR(50) NOT NULL,
salary DECIMAL(10,2) DEFAULT 50000.00,
hire_date DATE DEFAULT CURRENT_DATE
);
-- This works because salary and hire_date have default values:
INSERT INTO employees (first_name)
VALUES ('John Doe');
-- To explicitly override the default, provide a value:
INSERT INTO employees (first_name, salary, hire_date)
VALUES ('Jane Smith', 60000, '2025-01-01');
ALTER TABLE employees
ALTER COLUMN salary SET DEFAULT 45000.00;
ALTER TABLE employees
ALTER COLUMN salary DROP DEFAULT;
-- Numeric default:
CREATE TABLE products (
product_id INT PRIMARY KEY AUTO_INCREMENT,
price DECIMAL(10,2) DEFAULT 9.99
);
-- Text default:
CREATE TABLE users (
user_id INT PRIMARY KEY AUTO_INCREMENT,
role VARCHAR(20) DEFAULT 'customer'
);
-- Boolean default (SQL Server, PostgreSQL):
CREATE TABLE accounts (
account_id INT PRIMARY KEY AUTO_INCREMENT,
is_active BOOLEAN DEFAULT TRUE
);
-- Date/Time default:
CREATE TABLE orders (
order_id INT PRIMARY KEY AUTO_INCREMENT,
order_date TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
INNER JOIN
SELECT column_names
FROM table1
INNER JOIN table2
ON table1.common_column = table2.common_column;
SELECT employees.name, departments.department_name, locations.location_name
FROM employees
INNER JOIN departments ON employees.department_id = departments.department_id
INNER JOIN locations ON departments.location_id = locations.location_id;
You can join more than two tables using INNER JOIN.
This query retrieves the employee name, their department, and the location of the department.
LEFT JOIN
SELECT column_names
FROM table1
LEFT JOIN table2
ON table1.common_column = table2.common_column;
SELECT employees.name, departments.department_name, locations.location_name
FROM employees
LEFT JOIN departments ON employees.department_id = departments.department_id
LEFT JOIN locations ON departments.location_id = locations.location_id;
RIGHT JOIN
SELECT column_names
FROM table1
RIGHT JOIN table2
ON table1.common_column = table2.common_column;
SELECT employees.name, departments.department_name, locations.location_name
FROM employees
RIGHT JOIN departments ON employees.department_id = departments.department_id
RIGHT JOIN locations ON departments.location_id = locations.location_id;
FULL OUTER JOIN
SELECT column_names
FROM table1
FULL OUTER JOIN table2
ON table1.common_column = table2.common_column;
SELECT employees.name, departments.department_name, locations.location_name
FROM employees
FULL OUTER JOIN departments ON employees.department_id = departments.department_id
FULL OUTER JOIN locations ON departments.location_id = locations.location_id;
SELECT employees.employee_id, employees.name, departments.department_name
FROM employees
LEFT JOIN departments ON employees.department_id = departments.department_id
UNION
SELECT employees.employee_id, employees.name, departments.department_name
FROM employees
RIGHT JOIN departments ON employees.department_id = departments.department_id;
CROSS JOIN
- If both tables have 1000 rows each, the result will have 1,000,000 rows (1000 x 1000).
- Be cautious when using CROSS JOIN on large tables, as it can slow down queries.
SELECT column_names
FROM table1
CROSS JOIN table2;
-- OR (in some databases, you can use this alternative syntax):
SELECT column_names
FROM table1, table2;
SELECT employees.name, departments.department_name
FROM employees, departments;
SELF JOIN
SELECT A.column_name, B.column_name
FROM table_name A
JOIN table_name B
ON A.common_column = B.common_column;
UNION
SELECT column_names FROM table1
UNION
SELECT column_names FROM table2;
SELECT employee_id, name, department FROM Employees_2023
UNION ALL
SELECT employee_id, name, department FROM Employees_2024;
SELECT name, department FROM Employees_2023 WHERE department = 'IT'
UNION
SELECT name, department FROM Employees_2024 WHERE department = 'IT';
SELECT employee_id, name FROM Employees_2023
UNION
SELECT employee_id, name FROM Employees_2024
ORDER BY name ASC;
COUNT()
SELECT COUNT(*)
FROM table_name;
SELECT COUNT(column_name)
FROM table_name;
SELECT COUNT(DISTINCT department)
FROM Employees;
SELECT department, COUNT(*) AS employee_count
FROM Employees
GROUP BY department;
SELECT COUNT(*)
FROM Employees
WHERE department = 'IT';
SELECT department, COUNT(*) AS employee_count
FROM Employees
GROUP BY department
HAVING COUNT(*) > 1;
SUM()
SELECT SUM(column_name)
FROM table_name;
SELECT customer, SUM(amount) AS total_spent
FROM Sales
GROUP BY customer;
SELECT SUM(amount)
FROM Sales WHERE customer = 'Alice';
SELECT customer, SUM(amount) AS total_spent
FROM Sales
GROUP BY customer
HAVING SUM(amount) > 100;
SELECT SUM(DISTINCT amount)
FROM Sales;
SELECT SUM(COALESCE(amount, 0))
FROM Sales;
AVG()
SELECT AVG(column_name)
FROM table_name;
SELECT customer, AVG(amount) AS avg_spent
FROM Sales
GROUP BY customer;
SELECT AVG(amount)
FROM Sales
WHERE customer = 'Alice';
SELECT customer, AVG(amount) AS avg_spent
FROM Sales
GROUP BY customer
HAVING AVG(amount) > 100;
SELECT AVG(DISTINCT amount)
FROM Sales;
SELECT AVG(COALESCE(amount, 0))
FROM Sales;
MIN()
SELECT MIN(column_name)
FROM table_name;
SELECT category, MIN(price) AS cheapest_item
FROM Products
GROUP BY category;
SELECT MIN(price)
FROM Products
WHERE category = 'Electronics';
SELECT category, MIN(price) AS cheapest_item
FROM Products
GROUP BY category
HAVING MIN(price) > 500;
SELECT MIN(order_date) AS first_order
FROM Orders;
SELECT MIN(COALESCE(price, 0))
FROM Products;
MAX()
SELECT MAX(column_name)
FROM table_name;
SELECT category, MAX(price) AS most_expensive
FROM Products
GROUP BY category;
SELECT MAX(price)
FROM Products
WHERE category = 'Electronics';
SELECT category, MAX(price) AS most_expensive
FROM Products
GROUP BY category
HAVING MAX(price) > 800;
SELECT MAX(order_date) AS latest_order
FROM Orders;
SELECT MAX(COALESCE(price, 0))
FROM Products;
GROUP BY
SELECT column_name, AGGREGATE_FUNCTION(column_name)
FROM table_name
GROUP BY column_name;
SELECT customer, SUM(amount) AS total_spent
FROM Sales
GROUP BY customer;
SELECT customer, category, SUM(amount) AS total_spent
FROM Sales
GROUP BY customer, category;
SELECT customer, COUNT(order_id) AS total_orders
FROM Sales
GROUP BY customer;
SELECT customer, SUM(amount) AS total_spent
FROM Sales
GROUP BY customer
HAVING SUM(amount) > 250;
SELECT customer, SUM(amount) AS total_spent
FROM Sales
GROUP BY customer
ORDER BY total_spent DESC;
HAVING
SELECT column_name, AGGREGATE_FUNCTION(column_name)
FROM table_name
GROUP BY column_name
HAVING condition;
SELECT customer, SUM(amount) AS total_spent
FROM Sales
GROUP BY customer
HAVING SUM(amount) > 200;
-- Using WHERE (filters before grouping)
SELECT customer, SUM(amount) AS total_spent
FROM Sales
WHERE amount > 100
GROUP BY customer;
-- This filters out rows where amount <= 100 before grouping
-- Using HAVING (filters after aggregation)
SELECT customer, SUM(amount) AS total_spent
FROM Sales
GROUP BY customer
HAVING SUM(amount) > 200;
-- This calculates total sales first, then filters customers with total sales > 200.
SELECT category, SUM(amount) AS total_sales
FROM Sales
GROUP BY category
HAVING SUM(amount) > 100 AND COUNT(order_id) > 1;
SELECT customer, SUM(amount) AS total_spent
FROM Sales
GROUP BY customer
HAVING SUM(amount) > 100
ORDER BY total_spent DESC;
COALESCE
SELECT COALESCE(expression1, expression2, ..., default_value);
SELECT name, COALESCE(phone, 'No Phone Available') AS contact_info
FROM Customers;
SELECT COALESCE(NULL, NULL, 'First Non-Null', 'Second Non-Null') AS result;
SELECT name, amount, COALESCE(amount, 0) * 1.1 AS updated_amount
FROM Payments;
SELECT department, COALESCE(SUM(salary), 0) AS total_salary
FROM Employees
GROUP BY department;
-- Returns 'Hello'
SELECT COALESCE(NULL, NULL, 'Hello', 'World');
-- Returns 'Fallback'
SELECT IFNULL(NULL, 'Fallback');
Subqueries and nested queries
- Single-row subquery - Returns one value.
- Multi-row subquery - Returns multiple values.
- Correlated subquery - Depends on the outer query and runs once for each row in the outer query.
- Nested subquery - A subquery inside another subquery.
SELECT employee_name, salary,
(SELECT MAX(salary) FROM Employees) AS highest_salary
FROM Employees;
SELECT employee_name, salary
FROM Employees
WHERE salary > (SELECT AVG(salary) FROM Employees);
SELECT *
FROM (SELECT employee_name, salary FROM Employees WHERE salary > 50000) AS HighEarners;
SELECT employee_name, department_id, salary
FROM Employees e1
WHERE salary = (
SELECT MAX(salary)
FROM Employees e2
WHERE e1.department_id = e2.department_id
);
SELECT employee_name, salary, department_id
FROM Employees
WHERE salary = (
SELECT MIN(salary)
FROM Employees
WHERE department_id = (
SELECT department_id
FROM Employees
GROUP BY department_id
ORDER BY AVG(salary) DESC
LIMIT 1
)
);
- Improves readability by breaking down complex queries.
- Reduces joins when filtering data.
- Can be used in SELECT, FROM, and WHERE clauses.
Use JOINS instead of subqueries when possible because:
- Joins are faster in many cases.
- Subqueries can be inefficient, especially correlated subqueries.
Indexes
CREATE TABLE Employees (
employee_id INT PRIMARY KEY, -- This automatically creates an index
name VARCHAR(100),
salary DECIMAL(10,2)
);
CREATE UNIQUE INDEX idx_employee_email
ON Employees(email);
CREATE INDEX idx_employee_name_salary
ON Employees(name, salary);
-- Efficient for queries like:
SELECT * FROM Employees WHERE name = 'Alice' AND salary > 50000;
CREATE FULLTEXT INDEX idx_article_content
ON Articles(content);
CREATE CLUSTERED INDEX idx_employee_salary
ON Employees(salary);
CREATE NONCLUSTERED INDEX idx_employee_name
ON Employees(name);
-- Efficient for queries like:
SELECT * FROM Employees WHERE name = 'Alice';
-- MySQL:
SHOW INDEX
FROM Employees;
-- In PostgreSQL:
SELECT *
FROM pg_indexes
WHERE tablename = 'employees';
-- In PostgreSQL:
DROP INDEX idx_employee_name;
-- In MySQL:
ALTER TABLE Employees
DROP INDEX idx_employee_name;
Stored procedures and functions
- Reusable - Write once, execute multiple times.
- Can accept parameters - Dynamic operations.
- Can execute multiple SQL statements - SELECT, INSERT, UPDATE, DELETE.
- Faster execution - Precompiled and optimized.
- Enhances security - Restricts direct table access.
-- Creating a stored procedure:
DELIMITER $$ -- Used in MySQL to change the delimiter
CREATE PROCEDURE GetEmployeeDetails(IN emp_id INT)
BEGIN
SELECT * FROM Employees WHERE employee_id = emp_id;
END $$
DELIMITER ; -- Reset delimiter
-- Executing the stored procedure:
CALL GetEmployeeDetails(101);
-- Creating a stored procedure:
DELIMITER $$
CREATE PROCEDURE IncreaseSalary(IN emp_id INT, IN increment DECIMAL(10,2))
BEGIN
UPDATE Employees
SET salary = salary + increment
WHERE employee_id = emp_id;
END $$
DELIMITER ;
-- Executing the stored procedure:
CALL IncreaseSalary(101, 5000);
- Must return a single value.
- Cannot modify data (no INSERT, UPDATE, DELETE).
- Can be used in SELECT statements.
-- Creating a function:
DELIMITER $$
CREATE FUNCTION GetEmployeeSalary(emp_id INT) RETURNS DECIMAL(10,2)
DETERMINISTIC
BEGIN
DECLARE salary DECIMAL(10,2);
SELECT salary INTO salary FROM Employees WHERE employee_id = emp_id;
RETURN salary;
END $$
DELIMITER ;
-- Using the function in a query:
SELECT GetEmployeeSalary(101) AS EmployeeSalary;
-- Dropping a stored procedure
DROP PROCEDURE IF EXISTS GetEmployeeDetails;
-- Dropping a function
DROP FUNCTION IF EXISTS GetEmployeeSalary;
- Perform multiple operations (e.g., Insert + Update).
- Execute a task that doesn't return a value.
- Improve security by restricting direct table access.
Use a function when you need to:
- Compute and return a single value.
- Use it inside a SELECT statement.
Views
- Simplifies Complex Queries - Save frequently used queries as a view.
- Enhances Security - Restrict access to specific columns/rows.
- Provides Abstraction - Users see a virtual table without accessing raw data.
- Improves Maintainability - Change the view definition instead of rewriting queries.
Disadvantages:
- Performance Overhead - Each time a view is accessed, the query runs.
- Cannot Modify Some Views - Views with multiple tables or aggregations cannot be updated.
- Storage Usage (Materialized Views) - Some databases store physical copies (not standard views).
-- Creating a view:
CREATE VIEW EmployeeView AS
SELECT employee_id, name, department, salary
FROM Employees
WHERE salary > 50000;
-- Retrieveing data:
SELECT * FROM EmployeeView;
CREATE OR REPLACE VIEW EmployeeView AS
SELECT employee_id, name, department, salary
FROM Employees
WHERE department = 'IT';
DROP VIEW EmployeeView;
CREATE VIEW EmployeeDepartmentView AS
SELECT e.employee_id, e.name, d.department_name, e.salary
FROM Employees e
JOIN Departments d ON e.department_id = d.department_id;
UPDATE EmployeeView
SET salary = 60000
WHERE employee_id = 101;
- The view is based on a single table.
- The view does not use aggregations (SUM, COUNT, AVG, etc.).
- The view does not use DISTINCT, GROUP BY, or JOINs.
You CANNOT update a view if:
- It contains JOINS, GROUP BY, or aggregate functions.
- It has DISTINCT, UNION, or calculated columns.
Transactions and concurrency control
- Atomicity - All or nothing; either the transaction is fully completed, or none of it happens.
- Consistency - The database remains in a valid state before and after the transaction.
- Isolation - Multiple transactions can run at the same time without interfering with each other.
- Durability - Once a transaction is committed, the changes are permanent even if the system crashes.
START TRANSACTION;
COMMIT;
ROLLBACK;
SAVEPOINT save1;
ROLLBACK TO save1;
-- Without a transaction (risky):
UPDATE Accounts SET balance = balance - 500 WHERE name = 'Alice';
UPDATE Accounts SET balance = balance + 500 WHERE name = 'Bob';
-- Using a transaction (safe):
START TRANSACTION;
UPDATE Accounts SET balance = balance - 500 WHERE name = 'Alice';
UPDATE Accounts SET balance = balance + 500 WHERE name = 'Bob';
COMMIT; -- Ensures both updates are saved together
- Dirty Read - One transaction reads uncommitted changes from another.
- Lost Update - Two transactions update the same data, causing one change to be lost.
- Non-Repeatable Read - A value changes between two reads in the same transaction.
- Phantom Read - A new row appears/disappears when a transaction is re-run.
SET TRANSACTION ISOLATION LEVEL SERIALIZABLE;
Isolation levels prevent concurrency issues by controlling how transactions interact.
Isolation level |
Prevents dirty reads? |
Prevents non-repeatable reads? |
Prevents phantom reads? |
Read uncommitted
|
No |
No |
No |
Read committed |
Yes |
No |
No |
Repeatable read
|
Yes |
Yes |
No |
Serializable |
Yes |
Yes |
Yes |
LOCK TABLE Accounts IN EXCLUSIVE MODE;
- Shared lock (read lock) - Allows multiple reads, but prevents writes.
- Exclusive lock (write lock) - Prevents other transactions from reading or writing.
Normalization and database design
- Eliminates data redundancy - Saves storage space.
- Ensures data consistency - Avoids data anomalies.
- Improves data integrity - Enforces relationships properly.
- Easier to maintain and update - Fewer errors in the database.
- Use Primary & Foreign Keys - Maintain relationships properly.
- Normalize to at least 3NF - Avoid redundancy & anomalies.
- Use Indexing - Speed up query performance.
- Plan for Scalability - Design for future data growth.
- Consider Denormalization for Performance - Only when needed.
Boyce-Codd Normal Form (BCNF)
Denormalization
- Denormalization is the process of reintroducing redundancy to improve performance.
- This is useful for read-heavy applications like data warehouses.
- Faster query performance (avoiding multiple joins).
- Reporting & analytics (precomputed summaries).
- Cache data for quick retrieval.
SQL performance optimization
- Use indexes on columns that are frequently searched or filtered (e.g., WHERE, JOIN, ORDER BY).
- Avoid indexing small tables (full scans are often faster).
- Use composite indexes for queries filtering multiple columns.
SELECT id, name FROM Employees; -- Faster
SELECT * FROM Employees; -- Slower
SELECT department, COUNT(*)
FROM Employees
WHERE department = 'IT'
GROUP BY department;
SELECT * FROM Orders LIMIT 1000;
CREATE INDEX idx_employee_department_id ON Employees(department_id);
CREATE INDEX idx_department_id ON Departments(department_id);
SELECT e.name, d.department_name
FROM Employees e
JOIN Departments d ON e.department_id = d.department_id;
- INNER JOIN - Faster than OUTER JOINs (fetches only matching rows).
- LEFT JOIN - If needed, filter unnecessary NULLs using WHERE column IS NOT NULL.
-- Bad (using IN with subquery - slower):
SELECT * FROM Employees
WHERE department_id IN (SELECT department_id FROM Departments WHERE active = 1);
-- Good (using EXISTS - faster):
SELECT * FROM Employees e
WHERE EXISTS (SELECT 1 FROM Departments d WHERE e.department_id = d.department_id AND d.active = 1);
-- Example of bad data types:
CREATE TABLE Orders (
order_id VARCHAR(255), -- Bad, should be INT
amount FLOAT -- Bad, should be DECIMAL(10,2)
);
-- Optimized data types:
CREATE TABLE Orders (
order_id INT PRIMARY KEY, -- Faster
amount DECIMAL(10,2) -- Accurate
);
CREATE TABLE OrderSummary (
order_id INT PRIMARY KEY,
customer_name VARCHAR(100),
total_price DECIMAL(10,2) -- Pre-calculated total instead of joining multiple tables
);
- Enable query cache - If the same queries run frequently.
- Use materialized views - Store results of complex calculations for re-use.
- Cache results in application layer - Instead of hitting the database every time.
CREATE TABLE Orders (
order_id INT,
order_date DATE
) PARTITION BY RANGE (YEAR(order_date)) (
PARTITION p2019 VALUES LESS THAN (2020),
PARTITION p2020 VALUES LESS THAN (2021),
PARTITION p2021 VALUES LESS THAN (2022)
);
-- Bad (nested subquery - slower):
SELECT * FROM Employees
WHERE department_id = (SELECT department_id FROM Departments WHERE department_name = 'IT');
-- Good (using JOIN - faster):
SELECT e.* FROM Employees e
JOIN Departments d ON e.department_id = d.department_id
WHERE d.department_name = 'IT';
EXPLAIN ANALYZE
SELECT *
FROM Employees
WHERE name = 'John Doe';
- Indexes used or missing
- Query execution time
- Full table scans (bad!)
INSERT INTO Orders (order_id, amount)
VALUES (101, 200), (102, 300), (103, 150);
START TRANSACTION;
UPDATE Orders
SET amount = amount * 1.1
WHERE customer_id = 5;
COMMIT;
OPTIMIZE TABLE Employees;
ANALYZE TABLE Orders;
Common Table Expressions (CTE) and recursive queries
- Improves Readability - Breaks complex queries into simpler parts.
- Reusable - You can reference a CTE multiple times in the same query.
- Better Performance - Helps with optimization by avoiding repeated calculations.
- Supports Recursion - Essential for hierarchical data (e.g., organizational trees).
WITH cte_name AS (
-- CTE Query
SELECT column1, column2 FROM SomeTable WHERE condition
)
SELECT * FROM cte_name;
-- Without CTE (messy query):
SELECT e.name, d.department_name, COUNT(*) AS total_employees
FROM Employees e
JOIN Departments d ON e.department_id = d.department_id
WHERE d.active = 1
GROUP BY e.name, d.department_name
HAVING COUNT(*) > 5;
-- Using a CTE (better structure):
WITH ActiveDepartments AS (
SELECT department_id, department_name FROM Departments WHERE active = 1
)
SELECT e.name, d.department_name, COUNT(*) AS total_employees
FROM Employees e
JOIN ActiveDepartments d ON e.department_id = d.department_id
GROUP BY e.name, d.department_name
HAVING COUNT(*) > 5;
WITH RecursiveCTE AS (
-- Base Case (Starting Point)
SELECT column1, column2 FROM Table WHERE condition
UNION ALL
-- Recursive Case (Refers to Itself)
SELECT column1, column2 FROM Table
JOIN RecursiveCTE ON Table.some_id = RecursiveCTE.some_id
)
SELECT * FROM RecursiveCTE;
- Employee-manager relationships (Org Charts)
- Category-subcategory structures
- Family trees
- Graph traversal
WITH RECURSIVE EmployeeHierarchy AS (
SELECT employee_id, name, manager_id, 1 AS level
FROM Employees
WHERE employee_id = 1
UNION ALL
SELECT e.employee_id, e.name, e.manager_id, eh.level + 1
FROM Employees e
JOIN EmployeeHierarchy eh ON e.manager_id = eh.employee_id
WHERE eh.level < 5 -- Stop at 5 levels deep
)
SELECT * FROM EmployeeHierarchy;
Window functions
FUNCTION() OVER (
-- Divides the dataset into groups (optional):
PARTITION BY column_name
-- Defines the order for calculations (optional):
ORDER BY column_name
)
Category |
Function |
Use Case |
Ranking |
ROW_NUMBER() |
Assigns unique row numbers |
|
RANK() |
Ranks with gaps for ties |
|
DENSE_RANK() |
Ranks without gaps |
Aggregation |
SUM() |
Running totals |
|
AVG() |
Moving averages |
|
COUNT() |
Counts rows per partition |
Offset |
LAG() |
Previous row's value |
|
LEAD() |
Next row's value |
Cumulative |
FIRST_VALUE() |
First row's value in partition |
|
LAST_VALUE() |
Last row's value in partition |
|
NTILE(n) |
Divides data into
n buckets
|
SELECT name, department, salary,
ROW_NUMBER() OVER (PARTITION BY department ORDER BY salary DESC) AS row_num
FROM Employees;
name |
department |
salary |
row_num |
Alice |
IT |
7000 |
1 |
Bob |
IT |
6000 |
2 |
Carol |
IT |
6000 |
3 |
Dave |
HR |
5000 |
1 |
Eve |
HR |
4000 |
2 |
SELECT name, department, salary,
RANK() OVER (PARTITION BY department ORDER BY salary DESC) AS rank_num
FROM Employees;
name |
department |
salary |
rank_num |
Alice |
IT |
7000 |
1 |
Bob |
IT |
6000 |
2 |
Carol |
IT |
6000 |
2 |
Dave |
HR |
5000 |
1 |
Eve |
HR |
4000 |
2 |
SELECT
Region,
Salesperson,
SaleAmount,
SUM(SaleAmount) OVER (
PARTITION BY Region
) AS TotalPerRegion
FROM Sales;
Region |
Salesperson |
SaleAmount |
TotalPerRegion |
East |
Alice |
100 |
450 |
East |
Bob |
150 |
450 |
East |
Carol |
200 |
450 |
West |
Dave |
300 |
550 |
West |
Eve |
250 |
550 |
JSON and XML handling
CREATE TABLE Employees (
id INT PRIMARY KEY,
name VARCHAR(50),
details JSON -- Stores structured JSON data
);
INSERT INTO Employees (id, name, details)
VALUES (1, 'Alice', '{"age": 30, "position": "Developer", "skills": ["SQL", "Python"]}');
-- Method 1
SELECT name, details->>'$.skills[0]' AS first_skill FROM Employees;
-- Method 2
SELECT name, JSON_EXTRACT(details, '$.skills[0]') AS first_skill FROM Employees;
name |
first_skill |
Alice |
SQL |
UPDATE Employees
SET details = JSON_SET(details, '$.position', 'Senior Developer')
WHERE id = 1;
SELECT * FROM Employees
WHERE JSON_CONTAINS(details, '"Python"', '$.skills');
CREATE TABLE Products (
id INT PRIMARY KEY,
name VARCHAR(50),
specs XML -- Stores structured XML data
);
INSERT INTO Products (id, name, specs)
VALUES (1, 'Laptop', 'Dell1000');
SELECT name, specs.value('(/product/brand)[1]', 'VARCHAR(50)') AS brand
FROM Products;
UPDATE Products
SET specs.modify('replace value of (/product/price)[1] with 1200')
WHERE id = 1;
SELECT * FROM Products
WHERE specs.exist('/product[brand="Dell"]') = 1;
CASE
CASE expression
WHEN value1 THEN result1
WHEN value2 THEN result2
...
ELSE default_result
END
CASE
WHEN condition1 THEN result1
WHEN condition2 THEN result2
...
ELSE default_result
END
SELECT name, score,
CASE
WHEN score >= 90 THEN 'A'
WHEN score >= 80 THEN 'B'
WHEN score >= 70 THEN 'C'
WHEN score >= 60 THEN 'D'
ELSE 'F'
END AS grade
FROM Students;
name |
score |
grade |
Alice |
95 |
A |
Bob |
82 |
B |
Carol |
68 |
D |
Dave |
55 |
F |
SELECT name, score
FROM Students
ORDER BY
CASE
WHEN score < 60 THEN 1
ELSE 2
END;
UPDATE Students
SET grade =
CASE
WHEN score >= 90 THEN 'A'
WHEN score >= 80 THEN 'B'
WHEN score >= 70 THEN 'C'
WHEN score >= 60 THEN 'D'
ELSE 'F'
END;
Operators
Operator |
Description |
Example |
= |
Equal to |
salary = 5000 |
!= or
<>
|
Not equal to |
name <> 'Bob' |
> |
Greater than |
age > 30 |
< |
Less than |
price < 100 |
>= |
Greater than or equal |
score >= 80 |
<= |
Less than or equal |
rank <= 5 |
Operator |
Description |
Example |
AND
|
All conditions must be true |
age > 25 AND city = 'Paris'
|
OR
|
At least one condition is true |
age < 18 OR age > 60
|
NOT
|
Negates a condition |
NOT (status = 'inactive')
|
Operator |
Description |
Example |
+
|
Addition |
price + tax
|
-
|
Subtraction |
salary - bonus
|
*
|
Multiplication |
quantity * rate
|
/
|
Division |
total / count
|
%
|
Modulus (remainder) |
score % 2
|
Operator |
Description |
Example |
BETWEEN |
Value within a range |
age BETWEEN 18 AND 30
|
IN |
Value matches any value in a list |
country IN ('US', 'UK', 'IN')
|
LIKE |
Pattern matching using
% and
_
|
name LIKE 'A%' (starts
with A)
|
IS NULL |
Checks if value is null |
email IS NULL |
IS NOT NULL |
Checks if value is not null |
email IS NOT NULL |
Operator |
Description |
& |
Bitwise AND |
` |
` |
^ |
Bitwise XOR |
~ |
Bitwise NOT |
SELECT name, salary
FROM Employees
WHERE department = 'Sales'
AND salary BETWEEN 4000 AND 6000
AND name LIKE 'A%';
LIKE
- % - Matches zero or more characters
- _ - Matches exactly one character
- MySQL - Not case-sensitive by default (unless using a binary collation).
- PostgreSQL - Case-sensitive unless you use ILIKE (case-insensitive LIKE).
- SQL Server - Depends on the collation settings.
SELECT column1, column2, ...
FROM table_name
WHERE column_name LIKE pattern;
SELECT * FROM Customers
WHERE name LIKE 'A%';
SELECT * FROM Customers
WHERE name LIKE '%son';
SELECT * FROM Products
WHERE description LIKE '%organic%';
SELECT * FROM Users
WHERE username LIKE 'm_ke';
SELECT * FROM Books
WHERE title NOT LIKE 'The%';
IN
SELECT column1, column2, ...
FROM table_name
WHERE column_name IN (value1, value2, value3, ...);
SELECT name, department
FROM Employees
WHERE department IN ('HR', 'IT', 'Marketing');
-- It's the same as:
WHERE department = 'HR' OR department = 'IT' OR department = 'Marketing'
SELECT name
FROM Students
WHERE class_id IN (
SELECT id
FROM Classes
WHERE teacher = 'Ms. Smith'
);
SELECT * FROM Products
WHERE category NOT IN ('Clothing', 'Electronics');
Aliases
-- Method 1
SELECT column_name AS alias_name
FROM table_name;
-- Method 2
SELECT column_name alias_name
FROM table_name;
SELECT t.column_name
FROM table_name AS t;
SELECT salary AS "Monthly Salary"
FROM Employees;
SELECT price * quantity AS total_cost
FROM Orders;
Triggers
They can be executed:
- BEFORE the event happens (to validate or modify)
- AFTER the event happens (to log, audit, or react)
CREATE TRIGGER log_insert
AFTER INSERT ON Employees
FOR EACH ROW
BEGIN
INSERT INTO LogTable (event_type, employee_id, action_time)
VALUES ('INSERT', NEW.id, CURRENT_TIMESTAMP);
END;
CREATE TRIGGER validate_salary
BEFORE UPDATE ON Employees
FOR EACH ROW
BEGIN
IF NEW.salary < 0 THEN
SIGNAL SQLSTATE '45000'
SET MESSAGE_TEXT = 'Salary cannot be negative';
END IF;
END;
- NEW: Refers to the new row values (for INSERT or UPDATE)
- OLD: Refers to the old row values (for UPDATE or DELETE)
N+1 problem
-- Step 1: Get all authors
SELECT * FROM Authors;
-- Step 2: For each author, get their books
SELECT * FROM Books WHERE AuthorID = 1;
SELECT * FROM Books WHERE AuthorID = 2;
-- And so on...
SELECT Authors.Name, Books.Title
FROM Authors
JOIN Books ON Authors.AuthorID = Books.AuthorID;
License:
GNU General Public License v3.0 or later