SQL – Basic SQL on one table

SQL BASICs

Order of SQL Operations

  1. FROM –> table selectopn
  2. WHERE –> row(s) selection
  3. GROUP BY –> rows grouping by column
  4. Aggregations on columns –> in SELECT  or ORDER BY
  5. HAVING –> in case you have to select on an aggregated value
  6. WINDOW functions –> operations on a selection of rows
  7. SELECT –> column selection and aggregation on selected columns
  8. DISTINCT –> on a selected column
  9. ORDER BY –> on columns or aggregated column
  10. LIMIT/FETCH/TOP : return a limited set

Selecting Columns

Select – Basic

/* Display Value */
SELECT 'Hello World' AS result;

/* SELECT one column from table */
SELECT title FROM films;

/* Select all columns from table */
SELECT * FROM films;

/* Select multiple columns */
SELECT title, release_year FROM films;

 

Select – DISTINCT

/* Select unique values from a table */
SELECT DISTINCT(roles) from roles;

 

Select – COUNT

/* Count number of rows */
SELECT COUNT(*) FROM people;
> 1000

/* Count number of non-missing values in a column of a table */
SELECT COUNT(jobs) FROM people;
> 600

/* Count number of unique values in a column of a table */
SELECT COUNT(DISTINCT(jobs)) FROM people;
> 20

 

FILTERING ROWS

Filtering – numeric values

/* Filter & show all rows & all columns */
SELECT * FROM films WHERE release_year > 2000;

/* Filter & Count values in a column  */
SELECT count(title) FROM films WHERE release_year > 2000;

/* Filter & show multiple column  */
SELECT title, release_year FROM films WHERE release_year > 2000;

 

Filtering – text values

/* Simple filtering on text */
SELECT * FROM films WHERE language = 'French' ;

 

Filtering – multiple selections – AND

/* Multiple conditions */
SELECT 
  title, release_year 
FROM
  films
WHERE
  language = 'Spanish' AND release_year < 2000

/* Multiple conditions - Spanish movies after 2000 and before 2010 */
SELECT 
  title, release_year 
FROM
  films
WHERE
  language = 'Spanish' AND AND release_year > 2000 AND release_year < 2010

 

Filtering – multiple selections – OR + AND

/* French or Spanish, released after 1989 & before 2000, grossprofit +2 mio */
SELECT 
  title, release_year 
FROM
  films 
WHERE 
  release_year > 1989 AND release_year < 2000 
  AND (language = 'French' OR language = 'Spanish' ) 
  AND gross > 2000000

 

Filtering – BETWEEN values

/* BETWEEN low value and high value inclusive */ 
SELECT 
  title, release_year 
FROM 
  films 
WHERE 
  (release_year BETWEEN 1990 AND 2000) 
  AND budget > 100000000 
  AND (language = 'Spanish' OR language = 'French')

 

Filtering – certain values – WHERE IN

SELECT 
  title, certification 
FROM
  films
WHERE
  (language IN ('English', 'Spanish', 'French'))

 

Filtering – Missing Data

/* Count the rows with a Missing values in Language */
select count(*) from films where language IS NULL

/* Count the rows with values in Language */
select count(*) from films where language IS NOT NULL

/* Previous Statement is equal as this */
select count(language) from films where

Filtering – Patterns text values

/* Pattern : name starts with a B */
SELECT name FROM people WHERE name LIKE 'B%'

/* Pattern : 2nd character must be an r */
SELECT name FROM people WHERE name LIKE '_r%'

/* Pattern : 1st character may not start with A */
SELECT name FROM people WHERE name NOT LIKE  'A%'

 

AGGREGATION Functions

Aggregation – basic

/* Sum */
SELECT SUM(duration) FROM films

/* Average */
SELECT AVG(duration) FROM films

/* Shortest /- Minimum */
SELECT MIN(duration) FROM films

/* Longest - Maximum */
SELECT MAX(duration) FROM films
  • A single aggregate function in the SELECT clause (in the absence of GROUP BY) will force aggregation into a single row

Aggregation – with filtering

/* Amount of the worst performer in 1994 */
select min(gross) from films where release_year = 1994

/* Amount of the best performer between 2000-2012 inc */
select max(gross) from films where release_year between 2000 AND 2012

 

Aggregation – Aliasing

/* Deathrate calculation --> if deathdate IS NOT NULL = counted  */
/* Division by 100.0 --> decimal figure */
SELECT 
  (COUNT(deathdate)* 100.0)/COUNT(*) AS percentage_dead 
from 
  people
> 9.37

/* Calculation of number of decades -- here division by integer 10 */
select 
  (max(release_year) - min(release_year))/10 as number_of_decades 
from 
  films

 

ORDER BY – sorting rows

Sorting – single column

/* Sorting by ascending order */ 
select title, duration
from films
order by duration

/* Sorting by descending order */ 
select title, duration
from films
order by duration DESC

 

Sorting – multiple columns

/* Multiple columns - separate by a comma */
select name, birthdate
from people
order by name, birthdate
  • when using DISTINCT in the SELECT statement, an ORDER BY on a non-selected column can lead to errors
    • eg SELECT DISTINCT name, last_name FROM actors ORDER BY actor_id

 

GROUP BY – grouping rows with columns

/* Largest budget per release year */
select release_year, max(budget)
from films 
group by release_year
order by release_year

/* IMDB SCores counting */
select imdb_score, count(*)
from reviews 
group by imdb_score
order by imdb_score

/* County, release year and lowest Profit */
select country, release_year, min(gross)
from films
group by country, release_year
order by country, release_year
  • A GROUP BY must be committed on all selected columns to have the aggregated field to be calculated
    • Error : SELECT release_year,  language , AVG(budget) FROM films  GROUP BY release_year 
    • Will cause an error because language wasn’t in the ‘group by’

 

HAVING – Filtering with aggregate functions

/* Filter on the year that have + average budget of 60 mio  */
select release_year, avg(budget) as avg_budget, avg(gross) as avg_gross
from films
where release_year > 1990
group by release_year
having avg(budget) > 60000000
order by avg(gross) DESC
  • Using a GROUP BY clause, only expressions built from GROUP BY expressions , or aggregate functions can be used in HAVING, SELECT, and ORDER BY clauses.