SQL
CRUD Operations
INSERT INTO tasks ('title', 'status')
VALUES ('Buy groceries', 'incomplete'), ('Gas car', 'incomplete')
DELETE from tasks
WHERE status='complete'
UPDATE tasks SET status='incomplete'
WHERE title='Buy groceries'
SELECT
Basics
AND
test two or more conditions.
SELECT name , continent FROM world
WHERE area < 2000
AND gdp > 5000000000
IN
check if item is in a list.
SELECT name, population FROM world
WHERE name IN ('Denmark', 'Finland', 'Norway', 'Sweden')
LIKE
allows pattern matching. %
is wildcard.
SELECT name FROM world
WHERE name LIKE 'G%'
BETWEEN
allows range checking (always inclusive).
SELECT name, area/1000 FROM world
WHERE area BETWEEN 207600 AND 244820
SELECT
Within SELECT
(Subqueries)
e.g. List each country where the population is larger than Russia’s:
SELECT name FROM world
WHERE population >
(SELECT population FROM world WHERE name='Russia')
ALL
check a condition over an entire list.
SELECT name FROM world
WHERE gdp >
ALL (SELECT gdp FROM world WHERE continent = 'Europe')
Using WITH
Provides a means to write subqueries for use in a larger SELECT
query.
The subqueries, which are often referred to as Common Table Expressions or CTEs, can be thought of as defining temporary tables that exist just for this query.
Check out: http://www.postgresql.org/docs/9.3/static/queries-with.html
WITH regional_sales AS (
/* subquery for total regional sales */
SELECT region, SUM(amount) AS total_sales
FROM orders
GROUP BY region
),
top_regions AS (
/* subquery for regions with most sales */
SELECT region
FROM regional_sales
WHERE total_sales > (SELECT SUM(total_sales)/10 FROM regional_sales)
)
/* display sales figures per product in top regions only */
SELECT
region,
product,
SUM(quantity) AS product_units,
SUM(amount) AS product_sales
FROM
orders
WHERE
region IN (
SELECT
region
FROM
top_regions)
GROUP BY
region,
product;
Aggregate Functions
SUM
returns the sum of values!
SELECT SUM(population)
FROM world
COUNT
returns the total number of a given value
SELECT COUNT(name) FROM world
WHERE area >= 1000000
DISTINCT
returns only one instance of each value
SELECT DISTINCT continent FROM world
GROUP BY
applies SUM
and COUNT
to groups of values
SELECT continent, SUM(population) FROM world
GROUP BY continent
HAVING
allows you to filter groups displayed
SELECT continent FROM world
GROUP BY continent
HAVING SUM(population)>500000000
ORDER BY
sort results by a given value (lowest to highest)
SELECT id, title, yr
FROM movie
WHERE title LIKE '%Star Trek%'
ORDER BY yr
AVG() /* Returns the average value */
FIRST() /* Returns the first value */
LAST() /* Returns the last value */
MAX() /* Returns the largest value */
MIN() /* Returns the smallest value */
Joining Tables
JOIN
combine databases by linking specific columns
SELECT player
FROM game JOIN goal ON id=matchid
LEFT JOIN include rows from the left table even when the linking value is null.
RIGHT JOIN include rows from the right table even when the linking value is null.
Miscellaneous
NULL
null values. cannot use ‘=‘ or ‘!=‘, but rather IS and IS NOT.
SELECT name
FROM teacher
WHERE dept IS NULL
COALESCE
takes any number of arguments and returns the first value that is not null.
SELECT name, COALESCE(mobile, home, ‘no number’)
FROM addresses
CASE
allows you to test conditions.
SELECT name, population,
CASE WHEN population<1000000
THEN 'small'
WHEN population<10000000
THEN 'medium'
ELSE 'large'
END
FROM world
CKU: SQL & BigQuery
Outline
- Syntax, concepts
- Access to CK Data
- Lab
SELECT
state,
is_male,
AVG(weight_pounds) AS avg_weight,
FROM
[publicdata:samples.natality]
WHERE
is_male= FALSE
AND plurality = 1
AND state = 'CA'
AND mother_age > father_age
GROUP BY
state,
is_male
Same as
SELECT
state,
is_male,
AVG(weight_pounds) AS avg_weight,
FROM
[publicdata:samples.natality]
WHERE
is_male= FALSE
AND plurality = 1
AND state = 'CA'
AND mother_age > father_age
GROUP BY
1,
2
SELECT
year,
is_male,
AVG(weight_pounds) AS avg_weight,
FROM
[publicdata:samples.natality]
WHERE
plurality = 1
GROUP BY
1,
2
ORDER BY
year,
is_male
WHERE
only effects each rowHAVING
only effects each group
SELECT
CASE WHEN weight_pounds > 8 THEN 'Heave' ELSE 'Light' END AS weight_group,
COUNT(*) AS cnt
FROM
[publicdata:samples.natality]
WHERE
plurality = 1
GROUP BY
1
- Filtering goes faster than joining. perhaps one day we'll just have one giant table.
SELECT
*
FROM
[publicdata:samples.shakespeare]
WHERE
word_count > 100
AND corpus_date > 1600
AND SUBSTR(word,1,2) = 'wi'
AND LENGTH(word) = 4
ORDER BY
word_count DESC