-
Notifications
You must be signed in to change notification settings - Fork 3
SQL questions
https://www.interviewquery.com/questions/comments-histogram
CREATE TABLE TESTS.USERS (ID INTEGER PRIMARY KEY NOT NULL, NAME VARCHAR(100), CREATED_AT DATE, NEIGHBORHOOD_ID INTEGER, MAIL VARCHAR(100));
CREATE TABLE TESTS.COMMENTS(USER_ID INTEGER, BODY VARCHAR(16000), CREATED_AT DATE );
INSERT INTO TESTS.USERS VALUES(1,'John',NULL,NULL,NULL);
INSERT INTO TESTS.USERS VALUES(2,'Bob',NULL,NULL,NULL);
INSERT INTO TESTS.COMMENTS VALUES(1,NULL,'2020-01-10');
INSERT INTO TESTS.COMMENTS VALUES(2,NULL,'2020-01-02');
INSERT INTO TESTS.COMMENTS VALUES(2,NULL,'2020-01-01');
INSERT INTO TESTS.COMMENTS VALUES(2,NULL,'2020-02-01');
Solution
SELECT ID, COUNT(USER_ID) FROM TESTS.USERS,TESTS.COMMENTS AS C WHERE ID = C.USER_ID AND C.CREATED_AT BETWEEN '2020-01-01'AND '2020-01-31' GROUP BY ID
https://www.interviewquery.com/questions/post-success
CREATE TABLE TESTS.EVENTS (ID INTEGER, USER_ID INTEGER, CREATED_AT DATE, ACTION VARCHAR(100),URL VARCHAR(1000), PLATFORM VARCHAR(100));
INSERT INTO TESTS.EVENTS VALUES(1,NULL,'2020-01-02','post_enter',NULL,NULL);
INSERT INTO TESTS.EVENTS VALUES(1,NULL,'2020-01-02','post_submit',NULL,NULL);
INSERT INTO TESTS.EVENTS VALUES(2,NULL,'2020-01-02','post_enter',NULL,NULL);
INSERT INTO TESTS.EVENTS VALUES(2,NULL,'2020-01-02','post_cancel',NULL,NULL);
INSERT INTO TESTS.EVENTS VALUES(3,NULL,'2020-01-03','post_submit',NULL,NULL);
INSERT INTO TESTS.EVENTS VALUES(3,NULL,'2020-01-03','post_enter',NULL,NULL);
INSERT INTO TESTS.EVENTS VALUES(4,NULL,'2020-01-03','post_submit',NULL,NULL),(4,NULL,'2020-01-03','post_enter',NULL,NULL);
Solution 1 (complicated). Identity ID successful (post_enter->post_commit) using INTERSECT and join with all entered.
SELECT A.DAY,SUCCESS * 100.0 /FLOAT(ALL) FROM
(SELECT DAY(CREATED_AT) AS DAY,COUNT(ID) AS SUCCESS FROM (
SELECT ID,CREATED_AT FROM TESTS.EVENTS WHERE CREATED_AT BETWEEN '2020-01-01' AND '2020-01-31' AND ACTION = 'post_enter'
INTERSECT
SELECT ID,CREATED_AT FROM TESTS.EVENTS WHERE CREATED_AT BETWEEN '2020-01-01' AND '2020-01-31' AND ACTION = 'post_submit' )
GROUP BY DAY(CREATED_AT)) AS S,
(SELECT DAY(CREATED_AT) AS DAY,COUNT(ID) AS ALL FROM TESTS.EVENTS WHERE CREATED_AT BETWEEN '2020-01-01' AND '2020-01-31' AND ACTION = 'post_enter' GROUP BY DAY(CREATED_AT)) AS A
WHERE S.DAY = A.DAY
Solution2 (simple), simply divide number of 'post_submit' by 'post_enter'
SELECT DAY(CREATED_AT) AS DAY,SUM(CASE WHEN ACTION='post_submit' THEN 1 ELSE 0 END) * 100 / SUM(CASE WHEN ACTION='post_enter' THEN 1 ELSE 0 END) AS ALL FROM TESTS.EVENTS WHERE CREATED_AT BETWEEN '2020-01-01' AND '2020-01-31' GROUP BY DAY(CREATED_AT)
https://www.interviewquery.com/questions/liked-pages
CREATE TABLE FRIENDS (USER_ID INTEGER, FRIEND_ID INTEGER);
CREATE TABLE PAGE_LIKES(USER_ID INTEGER,PAGE_ID INTEGER);
INSERT INTO FRIENDS VALUES (1,10),(1,11);
INSERT INTO PAGE_LIKES VALUES(10,1000),(10,1001),(11,1001),(11,1002);
INSERT INTO FRIENDS VALUES (2,10),(2,12),(2,13);
INSERT INTO PAGE_LIKES VALUES(12,1000),(12,1003),(13,1000);
Select all pages and the number of likes attached.
SELECT F.USER_ID,PAGE_ID,COUNT(PAGE_ID) AS COUNT FROM FRIENDS AS F,PAGE_LIKES AS P WHERE F.FRIEND_ID = P.USER_ID GROUP BY F.USER_ID,PAGE_ID ORDER BY USER_ID,COUNT(PAGE_ID) DESC
Select using CTE and join.
WITH P(USER_ID,PAGE_ID,C) AS
(SELECT F.USER_ID,PAGE_ID,COUNT(PAGE_ID) AS COUNT FROM FRIENDS AS F,PAGE_LIKES AS P WHERE F.FRIEND_ID = P.USER_ID GROUP BY F.USER_ID,PAGE_ID ORDER BY USER_ID,COUNT(PAGE_ID))
SELECT USER_ID,PAGE_ID,C FROM P WHERE C = (SELECT MAX(C) FROM P AS PP WHERE P.USER_ID = PP.USER_ID)
https://www.interviewquery.com/questions/average-order-value
CREATE TABLE TRANSACTIONS (ID INT, USER_ID INT, CREATED_AT DATE, PRODUCT_ID INT, QUANTITY INT);
CREATE TABLE USERS (ID INT, NAME VARCHAR(100), SEX CHAR(1));
CREATE TABLE PRODUCTS (ID INT, NAME VARCHAR(100), PRICE FLOAT);
INSERT INTO USERS VALUES(100, 'Jane','F');
INSERT INTO USERS VALUES(101, 'Bob','M');
INSERT INTO TRANSACTIONS VALUES(1,100,NULL,1000,2);
INSERT INTO TRANSACTIONS VALUES(1,100,NULL,1001,5);
INSERT INTO TRANSACTIONS VALUES(2,100,NULL,1000,1);
INSERT INTO TRANSACTIONS VALUES(2,101,NULL,1000,1);
INSERT INTO TRANSACTIONS VALUES(3,101,NULL,1001,1);
INSERT INTO PRODUCTS VALUES(1000,'Mobile',200.23);
INSERT INTO PRODUCTS VALUES(1001,'Battery',0.5);
Solution
SELECT SEX, ROUND(AVG(VALUE),2) FROM (
SELECT SEX, T.ID, SUM(T.QUANTITY * P.PRICE) AS VALUE FROM TRANSACTIONS T ,USERS U ,PRODUCTS P WHERE U.ID = T.USER_ID AND T.PRODUCT_ID = P.ID GROUP BY SEX,T.ID
) GROUP BY SEX
The table contains the numbers. Subtract the greatest number from the second greatest number.
create table numbers (X INT);
insert into numbers values(1),(2),(3),(10),(7);
Solution. Create a CTE table containing only the greatest (LT), another CTE table with the second greatest number (PT), join and subtract.
with PT as (select * from (select X,row_number() over (order by x desc) as N from numbers) as T where n = 2),
LT as (select * from (select X,row_number() over (order by x desc) as N from numbers) as T where n = 1)
select LT.X - PT.X from LT,PT;
Another solution. Use nth_value.
select (m-p) from (select x as m,nth_value (x,2) over (order by x desc rows between current row and 1 following ) as p from numbers order by x desc limit 1) T
https://app.codility.com/programmers/trainings/6/sql_events_delta/
create table events (
event_type integer not null,
value integer not null,
time timestamp not null,
unique(event_type, time)
);
insert into events values( 2, 5,' 2015-05-09 12:42:00'),( 4 , -42, '2015-05-09 13:19:57'),( 2, 2,' 2015-05-09 14:48:30'),(2,7,'2015-05-09 12:54:39'),(3, 16,' 2015-05-09 13:19:57'),( 3, 20,' 2015-05-09 15:01:09');
Solution. Create LT table containing event_type and the latest timestamp, ST containing the second latest timestamp, join by event_type and subtract.
with LT as (
select event_type,value, N from (select event_type,value,time,row_number() over (partition by event_type order by time desc) as N from events) as GT where N = 1),
ST as (select event_type,value, N from (select event_type,value,time,row_number() over (partition by event_type order by time desc) as N from events) as GT where N = 2)
select LT.event_type,LT.value - ST.value from LT,ST where LT.event_type = ST.event_type;
https://app.codility.com/programmers/trainings/6/sql_world_cup/
create table teams (
team_id integer not null,
team_name varchar(30) not null,
unique(team_id)
);
create table matches (
match_id integer not null,
host_team integer not null,
guest_team integer not null,
host_goals integer not null,
guest_goals integer not null,
unique(match_id)
);
insert into matches values(1,30,20,1,0),(2,10,20, 1, 2),(3,20,50, 2, 2),(4,10,30,1,0),(5,30,50,0,1);
insert into teams values(10,'Give'),(20,'Never'),(30,'You'),(40,'Up'),( 50,' Gonna');
Solution: union (all), first table contains the evaluation of host_team, second the guest_team, right outer join with teams to include team without any match, group by and sum, order by num_points desc and team_id (asc)
with T as (
select host_team as team_id,case when host_goals > guest_goals then 3 when host_goals = guest_goals then 1 else 0 end as num_points from matches
union all
select guest_team as team_id,case when host_goals > guest_goals then 0 when host_goals = guest_goals then 1 else 3 end as num_points from matches
)
select team_id,team_name,case when num_points is null then 0 else num_points end as num_points from (select TE.team_id,team_name,sum(num_points) as num_points from T right join teams as TE on T.team_id = TE.team_id group by TE.team_id,team_name) S order by num_points desc,team_id
Table 'transactions' has the following structure:
create table transactions (
sent_from varchar not null,
receiver varchar not null,
date date not null,
usd_value integer not null);
Write a query that returns a list of receivers that have received at least 1024 USD in at most 3 transactions. There can be more than 3 transfers to that account, as long as some 3 or fewer transactions usd_value total to at least 1024 USD. The table should be ordered by name (in ascending order).
Example, given data:
sent_from | receiver | date | usd_value |
---|---|---|---|
Jonas | Willhelm | 2000-01-01 | 200 |
Jonas | Timpson | 2002-09-27 | 1024 |
Jonas | Bjorn | 2001-03-16 | 512 |
Willhelm | Bjorn | 2010-12-17 | 100 |
Willhelm | Bjorn | 2004-03-22 | 10 |
Brown | Bjorn | 2013_03_20 | 500 |
Bjorn | Willhelm | 2007-06-02 | 400 |
Bjorn | Willhelm | 2001-03-16 | 400 |
Bjorn | Willhelm | 2001-03-16 | 200 |
The query should return the following rowset:
account_name
--------------
Bjorn
Timpson
nsert into transactions values('Jonas','Willhelm','2000-01-01',200),('Jonas','Timpson','2002-09-27',1024),('Jonas','Bjorn','2001-03-16',512),('Willhelm','Bjorn','2010-12-17',100),('Willhelm ','Bjorn','2004-03-22',10);
insert into transactions values('Brown','Bjorn','2013_03_20',500),('Bjorn','Willhelm','2007-06-02',400),('Bjorn','Willhelm','2001-03-16',400),('Bjorn','Willhelm','2001-03-16',200);
Solution.
with T as (
select receiver,usd_value,row_number() over (partition by receiver order by usd_value desc) as N from transactions
) select receiver,sum(usd_value) from T where N <= 3 group by 1 having sum(usd_value) >= 1024 order by receiver;
https://towardsdatascience.com/top-sql-interview-test-questions-techniques-part-1-971bdccc745c
Task: Extract the names and classes for the top 2 test scores for each class in the school, ordered by class and then by score. If there are any tie breakers, use the lower student_id to break the tie.
create table scores (student_id int primary key, scores int not null);
create table classes(student_id int not null, class character(1));
create table names(student_id int primary key, name varchar(100));
insert into scores values(1,90),(2,83),(3,79),(4,97),(5,90),(6,92),(7,94);
insert into classes values(1,'A'),(2,'B'),(3,'A'),(4,'B'),(5,'D'),(6,'B'),(7,'A');
insert into names values(1,'James'),(2,'Michael'),(3,'Valarie'),(4,'Jessie'),(5,'Douglas'),(6,'Shin'),(7,'Joanne');
Expected result:
class | name | score |
---|---|---|
A | Joanne | 94 |
A | James | 90 |
B | Jessie | 97 |
B | Shin | 92 |
Solution
with T as (
select S.student_id,S.scores,C.class,row_number() over (partition by class order by scores desc,S.student_id) as n from scores S,classes C where S.student_id = C.student_id order by C.class,S.scores desc
) select class,name,scores from T,names N where N <=2 and T.student_id = N.student_id
https://towardsdatascience.com/top-sql-interview-test-questions-techniques-part-1-971bdccc745c
Task: Display the rows which contain 3 or more consecutive days of more than 100 cases in those days.
create table cases (id int primary key,date date,new int);
insert into cases values(1,'2021-01-01',20),(2,'2021-01-02',149),(3,'2021-01-03',150),(4,'2021-01-04',99),(5,'2021-01-05',145),(6,'2021-01-06',200),(7,'2021-01-07',199),(8,'2021-01-08',188),(9,'2021-01-09',88);
Expected result:
id | date | new |
---|---|---|
5 | 2021-01-05 | 145 |
6 | 2021-01-06 | 200 |
7 | 2021-01-07 | 199 |
8 | 2021-01-08 | 188 |
Solution 1. Semi cross-join. Distinct necessary
with OH as
(select * from cases where new >= 100)
select distinct F.* from OH as F,OH as S,OH as T where F.id+1 = S.id and F.id+2=T.id or F.id = S.id-1 and F.id = T.id+1 or F.id =S.id+1 and F.id = T.id + 2
order by F.id
Solution 2. Use windows functions LAG and LEAD. Distinct not necessary.
with CTE as
(select *,
LAG(new,+1) over (order by id) as B1,
LAG(new,+2) over (order by id) as B2,
LEAD(new,+1) over (order by id) as A1,
LEAD(new,+2) over (order by id) as A2 from cases)
select CTE.id,CTE.date,CTE.new from CTE where new >= 100 and ((B2 >= 100 and B1>=100) or (B1 >= 100 and A1 >= 100) or (A1 >= 100 and A2 >= 100));