Skip to content

SQL questions

stanislawbartkowski edited this page Nov 27, 2021 · 20 revisions

Q1

https://www.interviewquery.com/questions/comments-histogram

CREATE TABLE TESTS.USERS (ID INTEGER PRIMARY KEY NOT NULL, NAME VARCHAR(100), CREATED_AT DATE, NEIGHBORHOOD_ID INTEGER, MAIL VARCHAR(100));

CREATE TABLE TESTS.COMMENTS(USER_ID INTEGER, BODY VARCHAR(16000), CREATED_AT DATE );
INSERT INTO TESTS.USERS VALUES(1,'John',NULL,NULL,NULL);
INSERT INTO TESTS.USERS VALUES(2,'Bob',NULL,NULL,NULL);

INSERT INTO TESTS.COMMENTS VALUES(1,NULL,'2020-01-10');
INSERT INTO TESTS.COMMENTS VALUES(2,NULL,'2020-01-02');
INSERT INTO TESTS.COMMENTS VALUES(2,NULL,'2020-01-01');
INSERT INTO TESTS.COMMENTS VALUES(2,NULL,'2020-02-01');

Solution

SELECT ID, COUNT(USER_ID) FROM TESTS.USERS,TESTS.COMMENTS AS C WHERE ID = C.USER_ID AND C.CREATED_AT BETWEEN '2020-01-01'AND '2020-01-31' GROUP BY ID

Q2

https://www.interviewquery.com/questions/post-success

CREATE TABLE TESTS.EVENTS (ID INTEGER, USER_ID INTEGER, CREATED_AT DATE, ACTION VARCHAR(100),URL VARCHAR(1000), PLATFORM VARCHAR(100));
INSERT INTO TESTS.EVENTS VALUES(1,NULL,'2020-01-02','post_enter',NULL,NULL);
INSERT INTO TESTS.EVENTS VALUES(1,NULL,'2020-01-02','post_submit',NULL,NULL);

INSERT INTO TESTS.EVENTS VALUES(2,NULL,'2020-01-02','post_enter',NULL,NULL);
INSERT INTO TESTS.EVENTS VALUES(2,NULL,'2020-01-02','post_cancel',NULL,NULL);

INSERT INTO TESTS.EVENTS VALUES(3,NULL,'2020-01-03','post_submit',NULL,NULL);
INSERT INTO TESTS.EVENTS VALUES(3,NULL,'2020-01-03','post_enter',NULL,NULL);

INSERT INTO TESTS.EVENTS VALUES(4,NULL,'2020-01-03','post_submit',NULL,NULL),(4,NULL,'2020-01-03','post_enter',NULL,NULL);

Solution 1 (complicated). Identity ID successful (post_enter->post_commit) using INTERSECT and join with all entered.

SELECT A.DAY,SUCCESS * 100.0 /FLOAT(ALL) FROM 
(SELECT DAY(CREATED_AT) AS DAY,COUNT(ID) AS SUCCESS FROM (
SELECT ID,CREATED_AT FROM TESTS.EVENTS WHERE CREATED_AT BETWEEN '2020-01-01' AND '2020-01-31' AND ACTION = 'post_enter'
INTERSECT 
SELECT ID,CREATED_AT FROM TESTS.EVENTS WHERE CREATED_AT BETWEEN '2020-01-01' AND '2020-01-31' AND ACTION = 'post_submit' )
GROUP BY DAY(CREATED_AT)) AS S,
(SELECT DAY(CREATED_AT) AS DAY,COUNT(ID) AS ALL FROM TESTS.EVENTS WHERE CREATED_AT BETWEEN '2020-01-01' AND '2020-01-31' AND ACTION = 'post_enter' GROUP BY DAY(CREATED_AT)) AS A
WHERE S.DAY = A.DAY

Solution2 (simple), simply divide number of 'post_submit' by 'post_enter'

SELECT DAY(CREATED_AT) AS DAY,SUM(CASE WHEN ACTION='post_submit' THEN 1 ELSE 0 END) * 100 / SUM(CASE WHEN ACTION='post_enter' THEN 1 ELSE 0 END)  AS ALL FROM TESTS.EVENTS WHERE CREATED_AT BETWEEN '2020-01-01' AND '2020-01-31' GROUP BY DAY(CREATED_AT)

Q3

https://www.interviewquery.com/questions/liked-pages

CREATE TABLE FRIENDS (USER_ID INTEGER, FRIEND_ID INTEGER);
CREATE TABLE PAGE_LIKES(USER_ID INTEGER,PAGE_ID INTEGER);
INSERT INTO FRIENDS VALUES (1,10),(1,11);

INSERT INTO PAGE_LIKES VALUES(10,1000),(10,1001),(11,1001),(11,1002);

INSERT INTO FRIENDS VALUES (2,10),(2,12),(2,13);
INSERT INTO PAGE_LIKES VALUES(12,1000),(12,1003),(13,1000);

Select all pages and the number of likes attached.

SELECT F.USER_ID,PAGE_ID,COUNT(PAGE_ID) AS COUNT FROM FRIENDS AS F,PAGE_LIKES AS P WHERE F.FRIEND_ID = P.USER_ID GROUP BY F.USER_ID,PAGE_ID ORDER BY USER_ID,COUNT(PAGE_ID) DESC

Select using CTE and join.

WITH P(USER_ID,PAGE_ID,C) AS 
(SELECT F.USER_ID,PAGE_ID,COUNT(PAGE_ID) AS COUNT FROM FRIENDS AS F,PAGE_LIKES AS P WHERE F.FRIEND_ID = P.USER_ID GROUP BY F.USER_ID,PAGE_ID ORDER BY USER_ID,COUNT(PAGE_ID))
SELECT USER_ID,PAGE_ID,C FROM P WHERE C = (SELECT MAX(C) FROM P AS PP WHERE P.USER_ID = PP.USER_ID)

Q4

https://www.interviewquery.com/questions/average-order-value

CREATE TABLE TRANSACTIONS (ID INT, USER_ID INT, CREATED_AT DATE, PRODUCT_ID INT, QUANTITY INT);
CREATE TABLE USERS (ID INT, NAME VARCHAR(100), SEX CHAR(1));
CREATE TABLE PRODUCTS (ID INT, NAME VARCHAR(100), PRICE FLOAT);
INSERT INTO USERS VALUES(100, 'Jane','F');
INSERT INTO USERS VALUES(101, 'Bob','M');

INSERT INTO TRANSACTIONS VALUES(1,100,NULL,1000,2);
INSERT INTO TRANSACTIONS VALUES(1,100,NULL,1001,5);
INSERT INTO TRANSACTIONS VALUES(2,100,NULL,1000,1);

INSERT INTO TRANSACTIONS VALUES(2,101,NULL,1000,1);
INSERT INTO TRANSACTIONS VALUES(3,101,NULL,1001,1);

INSERT INTO PRODUCTS VALUES(1000,'Mobile',200.23);
INSERT INTO PRODUCTS VALUES(1001,'Battery',0.5);

Solution

SELECT SEX, ROUND(AVG(VALUE),2) FROM (
SELECT SEX, T.ID, SUM(T.QUANTITY * P.PRICE) AS VALUE FROM TRANSACTIONS T ,USERS U ,PRODUCTS P WHERE U.ID = T.USER_ID AND T.PRODUCT_ID = P.ID GROUP BY SEX,T.ID
) GROUP BY SEX

Q5

The table contains the numbers. Subtract the greatest number from the second greatest number.

create table numbers (X INT);
insert into numbers values(1),(2),(3),(10),(7);

Solution. Create a CTE table containing only the greatest (LT), another CTE table with the second greatest number (PT), join and subtract.

with PT as (select * from (select X,row_number() over (order by x desc) as N from numbers) as T where n = 2),
LT as (select * from (select X,row_number() over (order by x desc) as N from numbers) as T where n = 1)
select LT.X - PT.X from LT,PT;

Another solution. Use nth_value.

select (m-p) from (select x as m,nth_value (x,2) over (order by x desc rows between current row and 1 following ) as p from numbers order by x desc limit 1) T

Q6

https://app.codility.com/programmers/trainings/6/sql_events_delta/

 create table events (
      event_type integer not null,
      value integer not null,
      time timestamp not null,
      unique(event_type, time)
  );
insert into events values( 2, 5,' 2015-05-09 12:42:00'),( 4 , -42, '2015-05-09 13:19:57'),( 2, 2,' 2015-05-09 14:48:30'),(2,7,'2015-05-09 12:54:39'),(3, 16,' 2015-05-09 13:19:57'),( 3, 20,' 2015-05-09 15:01:09');

Solution. Create LT table containing event_type and the latest timestamp, ST containing the second latest timestamp, join by event_type and subtract.

with LT as (
select event_type,value, N from (select event_type,value,time,row_number() over (partition by event_type order by time desc) as N from events) as GT where N = 1),
ST as (select event_type,value, N from (select event_type,value,time,row_number() over (partition by event_type order by time desc) as N from events) as GT where N = 2)
select LT.event_type,LT.value - ST.value from LT,ST where LT.event_type = ST.event_type;

Q7

https://app.codility.com/programmers/trainings/6/sql_world_cup/

create table teams (
      team_id integer not null,
      team_name varchar(30) not null,
      unique(team_id)
  );

  create table matches (
      match_id integer not null,
      host_team integer not null,
      guest_team integer not null,
      host_goals integer not null,
      guest_goals integer not null,
      unique(match_id)
  );
insert into matches values(1,30,20,1,0),(2,10,20, 1, 2),(3,20,50, 2, 2),(4,10,30,1,0),(5,30,50,0,1);
  
insert into teams values(10,'Give'),(20,'Never'),(30,'You'),(40,'Up'),( 50,' Gonna');

Solution: union (all), first table contains the evaluation of host_team, second the guest_team, right outer join with teams to include team without any match, group by and sum, order by num_points desc and team_id (asc)

with T as (
  select host_team as team_id,case when host_goals > guest_goals then 3 when host_goals = guest_goals then 1 else 0 end as num_points from matches
  union all
  select guest_team as team_id,case when host_goals > guest_goals then 0 when host_goals = guest_goals then 1 else 3 end as num_points from matches
  ) 
  select team_id,team_name,case when num_points is null then 0 else num_points end as num_points from (select TE.team_id,team_name,sum(num_points) as num_points from T right join teams as TE on T.team_id = TE.team_id group by TE.team_id,team_name) S order by num_points desc,team_id

Q8

Table 'transactions' has the following structure:

create table transactions (
        sent_from varchar not null,
        receiver varchar not null,
        date date not null,
        usd_value integer not null);

Write a query that returns a list of receivers that have received at least 1024 USD in at most 3 transactions. There can be more than 3 transfers to that account, as long as some 3 or fewer transactions usd_value total to at least 1024 USD. The table should be ordered by name (in ascending order).

Example, given data:

sent_from receiver date usd_value
Jonas Willhelm 2000-01-01 200
Jonas Timpson 2002-09-27 1024
Jonas Bjorn 2001-03-16 512
Willhelm Bjorn 2010-12-17 100
Willhelm Bjorn 2004-03-22 10
Brown Bjorn 2013_03_20 500
Bjorn Willhelm 2007-06-02 400
Bjorn Willhelm 2001-03-16 400
Bjorn Willhelm 2001-03-16 200

The query should return the following rowset:
  account_name
 --------------
  Bjorn
  Timpson
nsert into transactions values('Jonas','Willhelm','2000-01-01',200),('Jonas','Timpson','2002-09-27',1024),('Jonas','Bjorn','2001-03-16',512),('Willhelm','Bjorn','2010-12-17',100),('Willhelm ','Bjorn','2004-03-22',10);
insert into transactions values('Brown','Bjorn','2013_03_20',500),('Bjorn','Willhelm','2007-06-02',400),('Bjorn','Willhelm','2001-03-16',400),('Bjorn','Willhelm','2001-03-16',200);        

Solution.

with T as (
select receiver,usd_value,row_number() over (partition by receiver order by usd_value desc) as N from transactions
)  select receiver,sum(usd_value) from T where N <= 3 group by 1 having sum(usd_value) >= 1024 order by receiver;

Q9

https://towardsdatascience.com/top-sql-interview-test-questions-techniques-part-1-971bdccc745c
Task: Extract the names and classes for the top 2 test scores for each class in the school, ordered by class and then by score. If there are any tie breakers, use the lower student_id to break the tie.

create table scores (student_id int primary key, scores int not null);
create table classes(student_id int not null, class character(1));
create table names(student_id int primary key, name varchar(100));
insert into scores values(1,90),(2,83),(3,79),(4,97),(5,90),(6,92),(7,94);
insert into classes values(1,'A'),(2,'B'),(3,'A'),(4,'B'),(5,'D'),(6,'B'),(7,'A');
insert into names values(1,'James'),(2,'Michael'),(3,'Valarie'),(4,'Jessie'),(5,'Douglas'),(6,'Shin'),(7,'Joanne');

Expected result:

class name score
A Joanne 94
A James 90
B Jessie 97
B Shin 92

Solution

with T as (
select S.student_id,S.scores,C.class,row_number() over (partition by class order by scores desc,S.student_id) as n from scores S,classes C where S.student_id = C.student_id order by C.class,S.scores desc
) select class,name,scores from T,names N where N <=2 and T.student_id = N.student_id

Q10

https://towardsdatascience.com/top-sql-interview-test-questions-techniques-part-1-971bdccc745c

Task: Display the rows which contain 3 or more consecutive days of more than 100 cases in those days.

create table cases (id int primary key,date date,new int);
insert into cases values(1,'2021-01-01',20),(2,'2021-01-02',149),(3,'2021-01-03',150),(4,'2021-01-04',99),(5,'2021-01-05',145),(6,'2021-01-06',200),(7,'2021-01-07',199),(8,'2021-01-08',188),(9,'2021-01-09',88);

Expected result:

id date new
5 2021-01-05 145
6 2021-01-06 200
7 2021-01-07 199
8 2021-01-08 188

Solution 1. Semi cross-join. Distinct necessary

with OH as 
(select * from cases where new >= 100)
select distinct F.* from OH as F,OH as S,OH as T where F.id+1 = S.id and F.id+2=T.id or F.id = S.id-1 and F.id = T.id+1 or F.id =S.id+1 and F.id = T.id + 2
order by F.id

Solution 2. Use windows functions LAG and LEAD. Distinct not necessary.

with CTE as
(select *,
   LAG(new,+1) over (order by id) as B1,
   LAG(new,+2) over (order by id) as B2,
   LEAD(new,+1) over (order by id) as A1,
   LEAD(new,+2) over (order by id) as A2 from cases)
select CTE.id,CTE.date,CTE.new from CTE where new >= 100 and ((B2 >= 100 and B1>=100) or (B1 >= 100 and A1 >= 100) or (A1 >= 100 and A2 >= 100));
Clone this wiki locally