\c univ --1A without join select count(*), course_id from takes where course_id = any( select course_id from course where dept_name='Comp. Sci.') group by course_id limit 6; -- this query include courses with 0 students -- the other queries do not. select title, (select count(*) from takes where takes.course_id = course.course_id) as num from course where dept_name = 'Comp. Sci.' order by title; SELECT c.title, count(id) FROM takes t, course c WHERE t.course_id = c.course_id and c.dept_name = 'Comp. Sci.' GROUP BY c.title LIMIT 6; WITH CScourse AS ( SELECT title, course_id AS id FROM course WHERE dept_name = 'Comp. Sci.' ) SELECT CScourse.title, COUNT(takes.course_id) FROM takes, CScourse WHERE CScourse.id = takes.course_id GROUP BY CScourse.title; --1B using join select count(*), takes.course_id from takes join course on takes.course_id=course.course_id where dept_name='Comp. Sci.' group by takes.course_id; SELECT c.title, count(id) FROM takes t JOIN course c ON t.course_id = c.course_id and c.dept_name = 'Comp. Sci.' GROUP BY c.title LIMIT 6; --not actually q2 using sort and limit // this gives ONE course with the max count!! select count(*) as count, course_id, sec_id from takes where year=2009 group by course_id, sec_id order by count desc limit 1; -- this is not actually q2 either -- v2 this gives all courses that have the max count with app(count, course, section) as (select count(*) as count, course_id, sec_id from takes group by course_id, sec_id) select * from app where count=(select max(count) from app); --Q2 -- Not quite Q2 -- what course had the most total students across all sections in all years with app(count, course) as (select count(*) as count, course_id from takes group by course_id) select * from app where count=(select max(count) from app); --Q2 -- What section of what course had the most students in each semester? with app(count, course, section,year,semester) as (select count(*) as count, course_id, sec_id,year,semester from takes group by course_id, sec_id,year, semester), mapp(maxx,semester, year) as (select max(count), semester, year from app group by semester,year) select app.course, app.section, app.count, app.semester, app.year from app inner join mapp on app.count=mapp.maxx and app.semester=mapp.semester and app.year=mapp.year order by app.year, app.semester; WITH d AS (SELECT DISTINCT ON(t.year, t.semester) t.year, t.semester, t.course_id, t.sec_id, count(t.id) AS num_students FROM takes t GROUP BY t.year, t.semester, t.course_id, t.sec_id ORDER BY t.year, t.semester, num_students DESC) SELECT d.year, d.semester, d.course_id, c.title, d.sec_id, d.num_students FROM d JOIN course AS c ON d.course_id = c.course_id; WITH EnrollmentCounts AS ( SELECT course.title, section.sec_id, section.semester, section.year, COUNT(*) AS student_num, RANK() OVER (PARTITION BY section.semester, section.year ORDER BY COUNT(*) DESC) AS rank FROM section JOIN takes ON section.course_id = takes.course_id AND section.sec_id = takes.sec_id AND section.semester = takes.semester AND section.year = takes.year JOIN course ON section.course_id = course.course_id GROUP BY course.title, section.sec_id, section.semester, section.year ) SELECT title, sec_id, semester, year, student_num FROM EnrollmentCounts WHERE rank = 1 ORDER BY year, semester, sec_id; SELECT s.course_id, s.sec_id, s.semester, (SELECT COUNT(t.ID) FROM takes t WHERE t.course_id = s.course_id AND t.sec_id = s.sec_id) AS num_students FROM section s WHERE (SELECT COUNT(t.ID) FROM takes t WHERE t.course_id = s.course_id AND t.sec_id = s.sec_id) = ( SELECT MAX(student_count) FROM ( SELECT s.course_id, s.sec_id, s.semester, (SELECT COUNT(t.ID) FROM takes t WHERE t.course_id = s.course_id AND t.sec_id = s.sec_id) AS student_count FROM section s ) AS subquery Lei Lei WHERE subquery.course_id = s.course_id AND subquery.sec_id = s.sec_id AND subquery.semester = s.semester ) ORDER BY s.semester, num_students DESC; select * from ( select count(*) as num_students, semester, year, title, course_id, sec_id from course natural join takes group by year, semester, course_id, sec_id) as a where num_students = ( select count(*) as num_students2 from course natural join takes where semester = a.semester and year = a.year group by year, semester, course order by num_students2 desc limit 1) order by year, semester; WITH RankedCourses AS ( SELECT course.title AS course_title, section.sec_id, takes.semester, takes.year, COUNT(takes.id) AS num_students, ROW_NUMBER() OVER (PARTITION BY takes.semester, takes.year ORDER BY COUNT(takes.id) DESC) AS ranking FROM takes JOIN section ON takes.course_id = section.course_id AND takes.sec_id = section.sec_id JOIN course ON takes.course_id = course.course_id GROUP BY course.title, section.sec_id, takes.semester, takes.year ) -- find courses with ranking of 1 in each semester SELECT course_title, sec_id, semester, year, num_students FROM RankedCourses WHERE ranking = 1 ORDER BY year, semester; --Q3 WITH d as (SELECT t.course_id, count(t.course_id) AS count FROM takes t GROUP BY t.course_id) SELECT * FROM d WHERE count = (SELECT MAX(count) FROM d); WITH CourseRegis AS ( SELECT course.title, course.course_id, COUNT(*) AS student_num FROM course JOIN takes ON course.course_id = takes.course_id GROUP BY course.title, course.course_id ), MaxCourse AS ( SELECT MAX(student_num) AS max_students FROM CourseRegis ) SELECT title, student_num FROM CourseRegis JOIN MaxCourse ON student_num = max_students; SELECT course_id, total_students FROM ( SELECT course_id, SUM(student_count) AS total_students FROM ( SELECT section.course_id, COUNT(takes.ID) AS student_count FROM section JOIN takes ON section.course_id = takes.course_id AND section.sec_id = takes.sec_id GROUP BY section.course_id, section.sec_id ) AS subquery GROUP BY course_id ) AS total_students_per_course WHERE total_students = ( SELECT MAX(total_students) FROM ( SELECT course_id, SUM(student_count) AS total_students FROM ( SELECT section.course_id, COUNT(takes.ID) AS student_count FROM section JOIN takes ON section.course_id = takes.course_id AND section.sec_id = takes.sec_id GROUP BY section.course_id, section.sec_id ) AS subquery GROUP BY course_id ) AS max_total_students_per_course ); --Q4 -- 3 lowest GPAs with weighting by credits! with taks(course, credits, id, grade) as (select takes.course_id, credits, id, grade from course inner join takes on takes.course_id=course.course_id), ress(id, co, pc, tot_cred) as (select id, count(taks.grade), sum(points*credits) as pc, sum(credits) as tot_cred from taks inner join grade_points on taks.grade=grade_points.grade group by id) select id, pc/tot_cred as GPA from ress order by GPA limit 3; WITH studentGrades AS ( SELECT takes.id AS std_id, grade_points.points, course.credits FROM takes JOIN grade_points ON grade_points.grade = takes.grade JOIN course ON course.course_id = takes.course_id ) SELECT student.name AS student_name, std_id AS id, CAST(SUM(points * credits) / SUM(credits) AS DECIMAL(10, 2)) AS GPA FROM studentGrades JOIN student ON student.id = studentGrades.std_id GROUP BY student.name, std_id ORDER BY GPA ASC LIMIT 3; SELECT s.name, s.id, (SUM(g.points * c.credits) / SUM(c.credits)) AS gpa FROM student s JOIN takes t ON s.id = t.id JOIN course c ON t.course_id = c.course_id JOIN grade_points g ON t.grade = g.grade GROUP BY s.name, s.id ORDER BY gpa LIMIT 3; --Q5 -- the 3 highest and the 3 lowest GPAs in a single table! with taks(course, credits, id, grade) as (select takes.course_id, credits, id, grade from course inner join takes on takes.course_id=course.course_id), ress(id, co, pc, tot_cred) as (select id, count(taks.grade), sum(points*credits) as pc, sum(credits) as tot_cred from taks inner join grade_points on taks.grade=grade_points.grade group by id), bst as (select id, pc/tot_cred as GPA from ress order by GPA limit 3), wst as (select id, pc/tot_cred as GPA from ress order by GPA DESC limit 3) select * from bst UNION select * from wst; WITH GPAs AS (SELECT s.name, s.id, (SUM(g.points * c.credits) / SUM(c.credits)) AS gpa FROM student s JOIN takes t ON s.id = t.id JOIN course c ON t.course_id = c.course_id JOIN grade_points g ON t.grade = g.grade GROUP BY s.name, s.id ORDER BY gpa), bookends AS ((SELECT * FROM GPAs AS g2 ORDER BY g2.gpa DESC LIMIT 3) UNION (SELECT * FROM GPAs AS g2 ORDER BY g2.gpa ASC LIMIT 3)) SELECT * FROM bookends ORDER BY name; WITH StudentGPAs AS ( SELECT t.id, s.name, SUM(gp.points * c.credits) / SUM(c.credits) AS gpa FROM takes t JOIN grade_points gp ON t.grade = gp.grade JOIN course c ON t.course_id = c.course_id JOIN student s ON t.id = s.id GROUP BY t.id, s.name ), LowRankedGPAs AS ( SELECT id, name, gpa, RANK() OVER (ORDER BY gpa ASC) AS low_gpa_rank FROM StudentGPAs ), HighRankedGPAs AS( SELECT id, name, gpa, RANK() OVER (ORDER BY gpa DESC) AS high_gpa_rank FROM StudentGPAs ) SELECT lrg.id, lrg.name, lrg.gpa FROM LowRankedGPAs lrg JOIN HighRankedGPAs hrg ON lrg.id = hrg.id AND lrg.name = hrg.name AND lrg.gpa = hrg.gpa WHERE low_gpa_rank <= 3 OR high_gpa_rank <= 3 ORDER BY lrg.name; --Q6 select distinct id from takes where year > 2005 and not id in (select distinct id from takes where year < 2005) order by id limit 5; SELECT DISTINCT takes.id FROM takes LEFT JOIN takes t2 ON takes.id = t2.id AND t2.year < 2005 LEFT JOIN takes t3 ON takes.id = t3.id AND t3.year > 2005 ORDER BY takes.id LIMIT 5; select distinct id from takes where id not in (select id from takes where year < 2005) and year > 2005 order by id limit 5; WITH After2005Students AS ( SELECT DISTINCT t.id FROM takes t WHERE t.year > 2005 ), Before2005Students AS ( SELECT DISTINCT t.id FROM takes t WHERE t.year < 2005 ) SELECT a.id FROM After2005Students a WHERE a.id NOT IN (SELECT id FROM Before2005Students) ORDER BY a.id LIMIT 5; --Q7 with app(dept_name, maxx) as (select dept_name, max(salary) from instructor group by dept_name) select name, app.dept_name, salary from instructor join app on app.dept_name=instructor.dept_name and app.maxx=instructor.salary where app.dept_name like 'G%' order by salary; select salary, name from instructor where dept_name like 'G%' order by salary desc limit 1; -- this query would show 2 in the case of ties!!! SELECT instructor.name, instructor.salary, instructor.dept_name FROM instructor WHERE -- find departments starting with G instructor.dept_name LIKE 'G%' AND (instructor.salary, instructor.dept_name) IN ( -- find the maximum salary for each 'G' department SELECT MAX(instructor.salary), instructor.dept_name FROM instructor GROUP BY instructor.dept_name ); --Q8 -- this works (almost) but is cheesy -- OTOH it is the way I think ... this is slower because it requires a sort and it does not account for ties!! with app(dept_name, maxx) as (select dept_name, max(salary) from instructor group by dept_name) select name, app.dept_name, salary from instructor join app on app.dept_name=instructor.dept_name and app.maxx=instructor.salary order by salary limit 1; -- this version uses the rank() function with does admit ties. BUT still sorts. with app(dept_name, maxx) as (select dept_name, max(salary) from instructor group by dept_name), app2(name, deppartent, salary, rank) as (select name, app.dept_name, salary, rank() over (order by instructor.salary asc) rank from instructor join app on app.dept_name=instructor.dept_name and app.maxx=instructor.salary) select * from app2 where rank=1; -- this is a better way -- this is more SQL-ish -- OTOH real SQL people do not use "with" very often also faster (no sorting) with app(dept_name, maxx) as (select dept_name, max(salary) from instructor group by dept_name) select name, app.dept_name, salary from instructor join app on app.dept_name=instructor.dept_name and app.maxx=instructor.salary where instructor.salary=(select min(maxx) from app); WITH highestSalary AS ( SELECT dept_name, MAX(salary) AS max_salary FROM instructor GROUP BY dept_name ), minHighest AS ( SELECT MIN(max_salary) AS min_salary FROM highestSalary ) SELECT instructor.name, instructor.dept_name, min_salary FROM minHighest JOIN instructor ON instructor.salary = min_salary; \c rocket --find all launches that were higher than any launch in 1957 -- both of these quieries work and return the same answer -- the first is 15x slower than second!!! -- lesson, set operations are time consuming select apogee, date from launch where apogee > all ( select apogee from launch where date_part('year', date)=1957); select apogee, date from launch where apogee > ( select max(apogee) from launch where date_part('year', date)=1957); -- sakila --12 SELECT actor.first_name, actor.last_name, count(film_id) AS film_num FROM actor JOIN film_actor ON actor.actor_id = film_actor.actor_id GROUP BY actor.actor_id ORDER BY film_num DESC LIMIT 1; SELECT a.first_name, a.last_name, ( SELECT COUNT(film_id) FROM film_actor WHERE actor_id = a.actor_id ) AS movie_num FROM actor a ORDER BY movie_num DESC LIMIT 1; --13 select film.title, aa.first_name, aa.last_name, ab.first_name, ab.last_name from film, film_actor as fa, actor as aa, actor as ab, film_actor as fb where film.film_id=fa.film_id and film.film_id=fb.film_id and fa.film_id=fb.film_id and fa.actor_id=aa.actor_id and fb.actor_id=ab.actor_id and aa.last_name='GUINESS' and ab.last_name='DAVIS'; -- exact same result, but using subqueries before the cartesian product. select film.title, aa.first_name, aa.last_name, ab.first_name, ab.last_name from film, (select * from actor where last_name='GUINESS') as aa, (select * from actor where last_name='DAVIS') as ab, film_actor as fa, film_actor as fb where film.film_id=fa.film_id and film.film_id=fb.film_id and fa.film_id=fb.film_id and fa.actor_id=aa.actor_id and fb.actor_id=ab.actor_id; WITH GuinessFilm AS ( SELECT film_actor.film_id FROM film_actor JOIN actor ON actor.actor_id = film_actor.actor_id WHERE actor.last_name = 'GUINESS' ), DavisFilm AS ( SELECT film_actor.film_id FROM film_actor JOIN actor ON actor.actor_id = film_actor.actor_id WHERE actor.last_name = 'DAVIS' ) SELECT film.film_id, film.title FROM film JOIN DavisFilm ON film.film_id = DavisFilm.film_id JOIN GuinessFilm ON film.film_id = GuinessFilm.film_id; --14 WITH ActorPairs AS ( SELECT actorA.actor_id AS actor_id1, actorB.actor_id AS actor_id2, COUNT(actorA.film_id) AS shared_films FROM film_actor AS actorA JOIN film_actor AS actorB ON actorA.film_id = actorB.film_id AND actorA.actor_id < actorB.actor_id GROUP BY actorA.actor_id, actorB.actor_id ORDER BY shared_films DESC LIMIT 1 ) SELECT a1.first_name AS actor1_first, a1.last_name AS actor1_last, a2.first_name AS actor2_first, a2.last_name AS actor2_last, ActorPairs.shared_films FROM ActorPairs JOIN actor AS a1 ON a1.actor_id = ActorPairs.actor_id1 JOIN actor AS a2 ON a2.actor_id = ActorPairs.actor_id2; --15 with app(a,b,c,d) as (select count(*), f1.actor_id, f2.actor_id, f3.actor_id from film_actor f1 join film_actor f2 on f1.film_id=f2.film_id and f1.actor_id2 order by a; SELECT fa1.actor_id, fa2.actor_id, fa3.actor_id FROM film_actor fa1 JOIN film_actor fa2 ON fa1.film_id = fa2.film_id AND fa1.actor_id < fa2.actor_id JOIN film_actor fa3 ON fa1.film_id = fa3.film_id AND fa2.actor_id < fa3.actor_id GROUP BY fa1.actor_id, fa2.actor_id, fa3.actor_id HAVING COUNT(*) >= 3; --16 SELECT DISTINCT film.title FROM ( SELECT film_id FROM film_actor WHERE actor_id = 22 ) AS actorA JOIN ( SELECT film_id FROM film_actor WHERE actor_id = 102 ) AS actorB ON actorA.film_id = actorB.film_id JOIN ( SELECT film_id FROM film_actor WHERE actor_id = 159 ) AS actorC ON actorA.film_id = actorC.film_id JOIN film ON actorA.film_id = film.film_id; SELECT title FROM film WHERE film_id IN ( ((SELECT film_id FROM film_actor WHERE actor_id = (22) INTERSECT SELECT film_id FROM film_actor WHERE actor_id = (102)) INTERSECT SELECT film_id FROM film_actor WHERE actor_id = (159))); --17 SELECT customer.first_name, customer.last_name FROM customer JOIN rental ON customer.customer_id = rental.customer_id JOIN inventory ON rental.inventory_id = inventory.inventory_id JOIN film ON inventory.film_id = film.film_id WHERE film.title = 'ACE GOLDFINGER'; -- Query when set operations make sense!! -- find all launches in which the apogee is the same as one in 1957 and the apogee is > 100 (above the Karman line) select count(*) from launch where date_part('year', date)<>1957 and apogee = any ( select distinct apogee from launch where apogee>100 and date_part('year', date)=1957); select apogee, date from launch where apogee = any ( select apogee from launch where date_part('year', date)=1957) limit 10; -- same query using a "with" table -- NOTE using with does not really make sense here, but you can with app(apo) as (select apogee from launch where date_part('year', date)=1957) select count(*) from launch where date_part('year', date)<>1957 and apogee = any (select * from app) limit 10; -- same query, but limit to top 10% -- idea is to do the entire query and hold it in a "with table", then print the table limiting to 10% of the length of the table. with app(apogee,date) as (select apogee, date from launch where date_part('year', date)<>1957 and apogee = any ( select distinct apogee from launch where apogee>100 and date_part('year', date)=1957)) select * from app order by apogee limit (select count(*)/100 from app); -- find all launches after 1990 whch used a vehicle that flew in 1957 select count(*) from launch where date_part('year',date)>1990 and vehicle = any ( select distinct vehicle from launch where date_part('year', date)=1957); select * from launch where date_part('year',date)>1990 and vehicle = any ( select distinct vehicle from launch where date_part('year', date)=1957); -- find last flight date of all vehicles that flew in 1957 select max(date) as lastflight, vehicle from launch where date_part('year',date)>1957 and vehicle = any ( select distinct vehicle from launch where date_part('year', date)=1957) group by vehicle order by lastflight; -- launchmass of vehicles that launched in 1957 -- without a join!! with veh(v) as (select distinct vehicle from launch where date_part('year', date)=1957), vw(v, lm) as (select vehicle.name, vehicle.launchmass from vehicle where name=any(select * from veh)) select * from vw where lm > (select min(lm)*100 from vw where lm>0);