Leetccode 50 QNA_comprehensive

Q1: Rising Temperature

Note: A very good question to understand self-join by merging 2 tables on consecutive dates.

Method 1:

SELECT w1.id
FROM Weather w1, Weather w2
WHERE w1.recordDate = DATE_ADD(w2.recordDate, INTERVAL 1 DAY)
AND w1.temperature > w2.temperature;

# Output with all columns:

| id | recordDate | temperature | id | recordDate | temperature |
| -- | ---------- | ----------- | -- | ---------- | ----------- |
| 2  | 2015-01-02 | 25          | 1  | 2015-01-01 | 10          |
| 3  | 2015-01-03 | 20          | 2  | 2015-01-02 | 25          |
| 4  | 2015-01-04 | 30          | 3  | 2015-01-03 | 20          |

-- # Output of final table:
| id | recordDate | temperature | id | recordDate | temperature |
| -- | ---------- | ----------- | -- | ---------- | ----------- |
| 2  | 2015-01-02 | 25          | 1  | 2015-01-01 | 10          |
| 4  | 2015-01-04 | 30          | 3  | 2015-01-03 | 20          |

Method 2:

SELECT w1.id
FROM Weather w1, Weather w2
WHERE DATEDIFF(w1.recordDate, w2.recordDate) = 1
 AND w1.temperature > w2.temperature;

Method 3:

SELECT w1.id
FROM Weather w1
JOIN Weather w2 ON w1.recordDate = DATE_ADD(w2.recordDate, INTERVAL 1 DAY)
WHERE w1.temperature > w2.temperature

-- # Output with all columns:
| id | recordDate | temperature | id | recordDate | temperature |
| -- | ---------- | ----------- | -- | ---------- | ----------- |
| 2  | 2015-01-02 | 25          | 1  | 2015-01-01 | 10          |
| 4  | 2015-01-04 | 30          | 3  | 2015-01-03 | 20          |

Q2: Students and Examinations

SELECT s.student_id, s.student_name, sub.subject_name, COUNT(e.student_id) AS attended_exams
FROM Students s

CROSS JOIN Subjects sub
LEFT JOIN Examinations e ON s.student_id = e.student_id AND sub.subject_name = e.subject_name
GROUP BY s.student_id, s.student_name, sub.subject_name
ORDER BY s.student_id, sub.subject_name;

Q3: Biggest Single Number

Method 1:

SELECT

MAX(num) AS num

FROM (
    SELECT num
    FROM MyNumbers
    GROUP BY num
    HAVING COUNT(num) = 1
) AS unique_numbers;

Method 2:

SELECT MAX(IF(cnt = 1, num, NULL)) AS num
FROM (
    SELECT num, COUNT(*) AS cnt
    FROM MyNumbers
    GROUP BY num
) AS t;

Method 3:

WITH CTE AS(    
    SELECT num
    FROM MyNumbers
    GROUP BY num
    HAVING COUNT(num) = 1
)
SELECT max(num) as num
FROM CTE

Q4: Second Highest Salary

Method 1

select 
CASE WHEN COUNT(*) = 0 THEN NULL ELSE B.salary end as SecondHighestSalary
from(
    Select 
    id,salary,
    DENSE_RANK() OVER(order by salary desc) as Ranking
    from Employee
) B
Where B.Ranking=2
;

Method 2

select 

if(COUNT(*) = 0 , NULL, B.salary) as SecondHighestSalary

from(
    Select 
    id,salary,
    DENSE_RANK() OVER(order by salary desc) as Ranking
    from Employee
) B
Where B.Ranking=2
;

Method 2.1

WITH CTE AS (
    Select 
    id,salary,
    DENSE_RANK() OVER(order by salary desc) as Ranking
    from Employee
) 

SELECT 
if(COUNT(*) = 0 , NULL, salary) as SecondHighestSalary
from CTE
Where Ranking=2
;

Q5: Immediate Food Delivery II

Method 1: Step 1: Write subquery to find first order of all customers- earliest orders Step 2: Fetch records from table from above step, where order_date= customer_pref_delivery_date i.e. immediate order Step 3: Calculate immediate_percentage

Step 2 & 3

select 
round(sum(if (B.order_date=B.customer_pref_delivery_date, 1, 0 ))*100/count(*),2) as immediate_percentage
from( 
    # Step 1: find first order of all customers- earliest orders
    select *,
    DENSE_RANK() over(Partition by customer_id order by order_date) as Ranking
    from Delivery
) B
Where B.Ranking =1

Method 1.1:

WITH CTE AS (
SELECT *,
DENSE_RANK() OVER( PARTITION BY customer_id  ORDER BY order_date ) as rnk
FROM Delivery
)

SELECT 
round(sum(if (B.order_date=v.customer_pref_delivery_date, 1, 0 ))*100/count(*),2) as immediate_percentage

FROM CTE B
WHERE rnk=1

Q6: Game Play Analysis IV

Method 1:

WITH first_login AS (
    -- Get each player's first login date.
    SELECT 
        player_id, 
        MIN(event_date) AS first_date
    FROM Activity
    GROUP BY player_id
),
consecutive AS (
    -- Find players who logged in on the day after their first login.
    SELECT 
        f.player_id
    FROM first_login f
    JOIN Activity a 
      ON f.player_id = a.player_id 
     AND a.event_date = DATE_ADD(f.first_date, INTERVAL 1 DAY)
)
SELECT ROUND(
    (SELECT COUNT(*) FROM consecutive) / (SELECT COUNT(DISTINCT player_id) FROM Activity),
    2
) AS fraction;

Method 2:

# Write your MySQL query statement below
SELECT
  ROUND(COUNT(DISTINCT player_id) / (SELECT COUNT(DISTINCT player_id) FROM Activity), 2) AS fraction
FROM
  Activity
WHERE
  (player_id, DATE_SUB(event_date, INTERVAL 1 DAY))
  IN (
    SELECT player_id, MIN(event_date) AS first_login FROM Activity GROUP BY player_id
  )

Method 3:

select ROUND((count(*) + 0.00)/ (select count(distinct player_id) from activity), 2) as fraction
from (
    select player_id, count(*) consecutive_days
    from (
        select *, 
            datediff(day, min(event_date)over(partition by player_id order by player_id, event_date asc), event_date) +1 
            -row_number()over(partition by player_id order by player_id asc) rw
        from activity
    ) c
    where rw = 0
    group by player_id
) a
where consecutive_days >= 2

Q7: Customer Who Visited but Did Not Make Any Transactions

SELECT v.customer_id  , 

COUNT(v.visit_id ) as count_no_trans 
FROM Visits v LEFT JOIN Transactions  t on v.visit_id=t.visit_id
WHERE t.transaction_id  IS NULL

GROUP BY v.customer_id

Q8: Employees Whose Manager Left the Company

Method 1:

Select employee_id
from Employees
where manager_id not in ( --Step 1: Find managers whose manager_id isn't present in table as employee_id


    Select employee_id
    from Employees
) AND salary < 30000  -- Step 2: Find employees who've salaries < 30000 
order by employee_id asc

Method 2:

WITH CTE AS (
    SELECT *
    FROM Employees
    WHERE salary < 30000 
      AND manager_id IS NOT NULL
)
SELECT c.employee_id
FROM CTE c
LEFT JOIN Employees m ON c.manager_id = m.employee_id
WHERE m.employee_id IS NULL
ORDER BY c.employee_id;

Note: Why can't you above CTE with itself, and why a few of test cases might fail?

For example, consider this scenario:

Employee A has salary <30000 and manager_id = M.
Manager M is still in the company, but because M’s own manager_id is null, M is not included in the CTE.
The LEFT JOIN of CTE A to CTE B on A.manager_id = B.employee_id will not find a match for M (since M is missing from the CTE), and so B.employee_id will be NULL.
Thus, Employee A will be incorrectly selected as if their manager had left.

Recommended Approach

To correctly check whether an employee’s manager has left, you should use the full Employees table for the manager lookup rather than a CTE that filters out managers with null manager_id.

Q9: Confirmation Rate

Method 1:

select s.user_id, 
round(avg(if(c.action="confirmed",1,0)),2) as confirmation_rate

from #s as s 
left join Confirmations as c on s.user_id= c.user_id 
group by user_id;

Method 2:

WITH CTE AS (
    SELECT
        s.user_id,
        SUM(CASE WHEN c.action = 'confirmed' THEN 1 ELSE 0 END) AS success,
        SUM(CASE WHEN c.action = 'timeout' THEN 1 ELSE 0 END) AS failure
    FROM #s s
    LEFT JOIN Confirmations c ON s.user_id = c.user_id
    GROUP BY s.user_id
)
SELECT 
    user_id,
    ROUND(
        IF(success + failure = 0, 0, success / (success + failure)),
    2) AS confirmation_rate
FROM CTE
ORDER BY user_id;

Method 3:

SELECT
    user_id,
    ROUND(
        IF((success + failure) = 0,
           0,
           CAST(success AS DECIMAL) / CAST((success + failure) AS DECIMAL)
        ),
        2
    ) AS confirmation_rate
FROM (
    SELECT
        s.user_id,
        SUM(CASE WHEN c.action = 'confirmed' THEN 1 ELSE 0 END) AS success,
        SUM(CASE WHEN c.action = 'timeout' THEN 1 ELSE 0 END) AS failure
    FROM #s s
    LEFT JOIN Confirmations c ON s.user_id = c.user_id
    GROUP BY s.user_id
) AS count_action;

Method 4:

WITH count_action AS (
    SELECT
        s.user_id,
        SUM(CASE WHEN c.action = 'confirmed' THEN 1 ELSE 0 END) AS success,
        SUM(CASE WHEN c.action = 'timeout' THEN 1 ELSE 0 END) AS failure
    FROM #s s
    LEFT JOIN Confirmations c ON s.user_id = c.user_id
    GROUP BY s.user_id
)
SELECT
    user_id,
    ROUND(
        IF((success + failure) = 0, 0, CAST(success AS DECIMAL) / CAST((success + failure) AS DECIMAL)),
        2
    ) AS confirmation_rate
FROM count_action;

Method 5: Using Full OUTER JOIN (Although not required; redundant)

SELECT 
    user_id,
    ROUND(AVG(IF(action = 'confirmed', 1, 0)), 2) AS confirmation_rate
FROM (
    -- All rows from #s (with matching confirmations if any)
    SELECT s.user_id, c.action
    FROM #s s
    LEFT JOIN Confirmations c ON s.user_id = c.user_id

    UNION ALL

    -- Rows from Confirmations that have no matching #
    SELECT c.user_id, c.action
   FROM #s s
   RIGHT JOIN Confirmations c ON s.user_id = c.user_id
    WHERE s.user_id IS NULL
) t
GROUP BY user_id;

Q10:User Activity for the Past 30 Days I

Method 1:

SELECT activity_date AS day, COUNT(DISTINCT user_id) AS active_users
FROM Activity
WHERE (activity_date > "2019-06-27" AND activity_date <= "2019-07-27")
GROUP BY activity_date;

Note: Suppose specific activity types were the criteria to consider as an active user then-

WITH CTE AS (
    SELECT
        activity_date as "day",
        CASE 
            WHEN activity_type IN ('scroll_down', 'send_message') THEN user_id
        END AS activeusers
    FROM Activity
    WHERE (activity_date > "2019-06-27" AND activity_date <= "2019-07-27")

)

SELECT activity_date, COUNT(DISTINCT activeusers) AS active_users
FROM CTE 
GROUP BY activity_date;

Q11: Monthly Transactions I

Method 1:

SELECT
    DATE_FORMAT(trans_date, '%Y-%m') AS month,
    country,
    COUNT(id) AS trans_count,
    SUM(IF(state='approved', 1, 0)) AS approved_count,
    SUM(amount) AS trans_total_amount,
    SUM(IF(state='approved', amount, 0)) AS approved_total_amount
FROM     Transactions A
GROUP BY  month, country;

Method 2:

SELECT  
SUBSTR(trans_date,1,7) as month, 
country, count(id) as trans_count, 
SUM(CASE WHEN state = 'approved' then 1 else 0 END) as approved_count, 
SUM(amount) as trans_total_amount, 
SUM(CASE WHEN state = 'approved' then amount else 0 END) as approved_total_amount

FROM Transactions
GROUP BY month, country

Q12: Triangle Judgement

Select
x,y,z,
IF(x+y >z AND x+z >y AND y+z>x, "Yes", "No") as triangle

from Triangle

Q12:Last Person to Fit in the Bus

** Method 1:**

WITH CTE AS (
    SELECT *,
           SUM(weight) OVER (ORDER BY turn ASC) AS `Total Weight`
    FROM Queue 
)

SELECT person_name 
FROM CTE
WHERE `Total Weight` <= 1000
ORDER BY `Total Weight` DESC
LIMIT 1

Method 2:

SELECT person_name
FROM (
    SELECT
        person_name,
        turn,
        SUM(weight) OVER (ORDER BY turn ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) AS total_weight
    FROM Queue
) AS ranked
WHERE total_weight <= 1000
ORDER BY turn DESC
LIMIT 1;

Method 3:

WITH CTE AS (
    SELECT 
        turn, person_name, weight,
        SUM(weight) OVER(ORDER BY turn ASC) AS tot_weight 
    FROM Queue
    ORDER BY turn
)
SELECT person_name
FROM Queue q
WHERE q.turn = (SELECT MAX(turn) FROM CTE WHERE tot_weight <= 1000);

Method 4:

SELECT 
    q1.person_name
FROM Queue q1 
JOIN Queue q2 ON q1.turn >= q2.turn
GROUP BY q1.turn
HAVING SUM(q2.weight) <= 1000
ORDER BY SUM(q2.weight) DESC
LIMIT 1

Q13: Delete Duplicate Emails

delete p1 from person p1,person p2 
where p1.email=p2.email and p1.id>=p2.id;

Q13.1: Why p1.id<>=p2.id wouldn't work in above query?

For below input data-

| id | email            |
|----|------------------|
| 1  | john@example.com |
| 2  | bob@example.com  |
| 3  | john@example.com |

Using the query:

SELECT 
    P1.id AS P1_id,
    P1.email AS P1_email,
    P2.id AS P2_id,
    P2.email AS P2_email
FROM Person P1, Person P2
WHERE P1.email = P2.email 
  AND P1.id <> P2.id;

The intermediate result would be:

| P1_id | P1_email         | P2_id | P2_email         |
|-------|------------------|-------|------------------|
| 1     | john@example.com | 3     | john@example.com |
| 3     | john@example.com | 1     | john@example.com |

Explanation:

For john@example.com, there are two rows (id = 1 and id = 3).
The join condition P1.email = P2.email finds that these two rows share the same email.
The condition P1.id <> P2.id ensures that a row isn't paired with itself.
As a result, you get both combinations: one pairing where P1 is the row with id = 1 and P2 is the row with id = 3, and another pairing where P1 is the row with id = 3 and P2 is the row with id = 1.

When you use P1.id > P2.id, only one of these pairings is returned (the one with the higher id as P1), which prevents deleting both copies. Without that condition, the intermediate join produces duplicate pairs, and if used in a DELETE statement, it may lead to unintended results.

Q14: List the Products Ordered in a Period

    SELECT 
        A.product_name,
        SUM(B.unit) as unit
    FROM Products A
    INNER JOIN Orders B ON A.product_id = B.product_id
    WHERE DATE_FORMAT(B.order_date, '%M %Y') = 'February 2020'
    group by A.product_name 
    HAVING unit>=100

Q15: Customers Who Bought All Products

SELECT  customer_id 
FROM Customer 
GROUP BY customer_id
HAVING COUNT(distinct product_key) = (SELECT COUNT(product_key) FROM Product)

Q16: Group Sold Products By The Date

SELECT 
    sell_date,
    COUNT(DISTINCT product) as num_sold,
    GROUP_CONCAT(DISTINCT product ORDER BY product) as products
FROM Activities
GROUP BY sell_date
ORDER BY sell_date;

Q17: Count Salary Categories

WITH flat_category AS (
    SELECT 
        SUM(CASE WHEN income < 20000 THEN 1 ELSE 0 END) AS `Low Salary`, 
        SUM(CASE WHEN income >= 20000 AND income <= 50000 THEN 1 ELSE 0 END) AS `Average Salary`,
        SUM(CASE WHEN income > 50000 THEN 1 ELSE 0 END) AS `High Salary`
    FROM Accounts
)

SELECT 'Low Salary' AS category, `Low Salary` AS accounts_count FROM flat_category
UNION ALL
SELECT 'Average Salary', `Average Salary` FROM flat_category
UNION ALL
SELECT 'High Salary', `High Salary` FROM flat_category;

Q18: Product Price at a Given Date

Method 1

# Write your MySQL query statement below
SELECT 
product_id, 
new_price AS price 

FROM Products 
WHERE (product_id, change_date) IN (
                                    SELECT 
                                    product_id, max(change_date) 
                                    FROM Products 
                                    WHERE change_date <= '2019-08-16' 
                                    GROUP BY product_id)

UNION

SELECT product_id, 10 AS price 
FROM Products 
WHERE product_id NOT IN (SELECT product_id 
                        FROM Products 
                        WHERE change_date <= '2019-08-16');

Method 2

select 
distinct product_id, 10 as price 
from Products 
where product_id not in(
                        select distinct product_id 
                        from Products 
                        where change_date <='2019-08-16' )
union 
select 
product_id, new_price as price 
from Products 
where (product_id,change_date) in (
                        select product_id , max(change_date) as date 
                        from Products 
                        where change_date <='2019-08-16' 
                        group by product_id)

Q19: User Activity for the Past 30 Days I

SELECT activity_date AS day, COUNT(DISTINCT user_id) AS active_users
FROM Activity
WHERE (activity_date > "2019-06-27" AND activity_date <= "2019-07-27")
GROUP BY activity_date;

Q20: Article Views I

select  distinct author_id as id 
from Views 
where author_id = viewer_id 
order by id  asc;

Q21: Average Selling Price

# Write your MySQL query statement below
SELECT p.product_id, 
IFNULL(ROUND(SUM(units*price)/SUM(units),2),0) AS average_price

FROM Prices p 
LEFT JOIN UnitsSold u ON p.product_id = u.product_id AND u.purchase_date BETWEEN start_date AND end_date

group by product_id

Q 21.a : Why is the below query wrong for the above problem but the above query is correct?

The first query is more correct.

Explanation:

First Query:
```
SELECT p.product_id, 
       IFNULL(ROUND(SUM(u.units * p.price) / SUM(u.units), 2), 0) AS average_price
FROM Prices p 
LEFT JOIN UnitsSold u 
  ON p.product_id = u.product_id 
 AND u.purchase_date BETWEEN p.start_date AND p.end_date
GROUP BY p.product_id;
```
- The condition u.purchase_date BETWEEN p.start_date AND p.end_date is placed in the ON clause.
- This ensures that all products from the Prices table are included (even those with no sales) because the LEFT JOIN is preserved.
- For products with no matching sales (i.e., no rows in UnitsSold), u.units will be NULL, and IFNULL(..., 0) ensures the average_price is set to 0.

Second Query:

SELECT p.product_id, 
       IFNULL(ROUND(SUM(u.units * p.price) / SUM(u.units), 2), 0) AS average_price
FROM Prices p 
LEFT JOIN UnitsSold u 
  ON p.product_id = u.product_id 
WHERE u.purchase_date BETWEEN p.start_date AND p.end_date
GROUP BY p.product_id;

The date condition is placed in the WHERE clause, which filters out rows where u.purchase_date is NULL.
This effectively turns the LEFT JOIN into an INNER JOIN, thereby excluding products with no sales from the result.

Since the problem requires that products with no sold units should return an average selling price of 0, the first query is the correct approach.

Q22: Percentage of Users Attended a Contest

select 
contest_id, 
round(count(distinct user_id) * 100 /(select count(user_id) from Users) ,2) as percentage
from  Register
group by contest_id
order by percentage desc,contest_id

Q23: Invalid Tweets

Select tweet_id
from Tweets
where length(content) >15

Q24: Find Followers Count

select user_id , count(distinct follower_id) as followers_count
from followers
group by user_id
order by user_id asc , followers_count asc;

Q25: Employee Bonus

Select A.name, B.bonus
from Employee A
LEFT JOIN Bonus B on A.empId=B.empId
where B.Bonus <1000 or B.Bonus is NULL

Q26: Queries Quality and Percentage

Method 1

select
query_name,
round(avg(cast(rating as decimal) / position), 2) as quality,
round(sum(case when rating < 3 then 1 else 0 end) * 100 / count(*), 2) as poor_query_percentage
from queries
WHERE query_name IS NOT NULL
group by query_name

Method 2

SELECT query_name, 
ROUND(AVG(rating/position),2) AS quality, 
ROUND(AVG(IF(rating < 3, 1, 0))*100,2) 
AS poor_query_percentage 
FROM Queries 
WHERE query_name IS NOT NULL 
GROUP BY query_name;

Q27: Restaurant Growth

WITH temp AS (
    SELECT visited_on, SUM(amount) AS amount
    FROM Customer
    GROUP BY visited_on
)

SELECT 
    visited_on, 
    SUM(amount) OVER (ORDER BY visited_on ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS amount, 
    ROUND(AVG(amount*1.00) OVER (ORDER BY visited_on ROWS BETWEEN 6 PRECEDING AND CURRENT ROW), 2) AS average_amount
FROM temp
ORDER BY visited_on
LIMIT 18446744073709551615 OFFSET 6;

Q27.1: What would happen if you just use outer query something like below?

| visited_on | amount | average_amount |
|------------|--------|----------------|
| 2019-01-07 | 860    | 122.86         |
| 2019-01-08 | 840    | 120            |
| 2019-01-09 | 840    | 120            |
| 2019-01-10 | 850    | 121.43         |

Let's compare the two approaches step by step to see why the wrong query produces different (and incorrect) results:

Wrong Query:

SELECT 
    visited_on,
    ROUND(SUM(amount) OVER (ORDER BY visited_on ROWS BETWEEN 6 PRECEDING AND CURRENT ROW), 2) AS amount,
    ROUND(AVG(amount) OVER (ORDER BY visited_on ROWS BETWEEN 6 PRECEDING AND CURRENT ROW), 2) AS average_amount
FROM Customer
GROUP BY visited_on
LIMIT 18446744073709551615 OFFSET 6;

What Happens Internally:

GROUP BY visited_on:
- The query groups raw Customer rows by each day.
- For each day, an aggregated value is computed.
- However, because window functions are used in the same query that performs the grouping, MySQL applies the window functions on the grouped result.
- If the grouping isn’t isolated properly, MySQL might not compute the aggregated daily totals as expected.
Window Functions on Grouped Results:
- The window functions use the column amount (which ideally should be the daily total) to compute a 7-day rolling sum and average.
- In our wrong query, the daily total for 2019-01-10 comes out as 850 rather than the correct 1000.
- Consequently, the moving average computed over the window becomes 121.43.

Wrong Query Output:

visited_on	amount	average_amount
2019-01-07	860	122.86
2019-01-08	840	120
2019-01-09	840	120
2019-01-10	850	121.43

Note: The daily total on 2019-01-10 is wrong (850 instead of 1000) because the grouping and window calculations are mixed in one query without isolating the aggregation step. This in turn affects the moving average.

Right Query:

WITH temp AS (
    SELECT visited_on, SUM(amount) AS amount
    FROM Customer
    GROUP BY visited_on
)
SELECT 
    visited_on, 
    SUM(amount) OVER (ORDER BY visited_on ROWS BETWEEN 6 PRECEDING AND CURRENT ROW) AS amount, 
    ROUND(AVG(amount * 1.00) OVER (ORDER BY visited_on ROWS BETWEEN 6 PRECEDING AND CURRENT ROW), 2) AS average_amount
FROM temp
ORDER BY visited_on
LIMIT 18446744073709551615 OFFSET 6;

What Happens Internally:

CTE (temp) – Pre-Aggregation:
- The CTE (temp) first groups the Customer table by visited_on and calculates the correct daily total for amount.
- Now, each day appears exactly once with its proper aggregated total.
- For example, on 2019-01-10, the correct daily total is computed as 1000.
Window Functions on Pre-Aggregated Data:
- In the outer query, the window functions are applied to the pre-aggregated daily totals.
- The 7-day rolling sum and average are now computed over the correct daily totals.
- Thus, for 2019-01-10, the moving average is correctly computed as 142.86.

Right (Expected) Output:

visited_on	amount	average_amount
2019-01-07	860	122.86
2019-01-08	840	120
2019-01-09	840	120
2019-01-10	1000	142.86

Key Differences:

Isolation of Aggregation:
- Wrong Query: Combines grouping and window functions in one step, which leads to an incorrect daily total (850 for 2019-01-10) because the window functions are applied on an intermediate result that hasn’t been cleanly aggregated.
- Right Query: Uses a CTE to first pre-aggregate daily totals. This ensures that each day's amount is correct (1000 for 2019-01-10).
Effect on Moving Average:
- Since the moving average is computed using the daily total from each row, an incorrect daily total (850) leads to an incorrect moving average (121.43) in the wrong query.
- The correct daily total (1000) in the right query yields the expected moving average (142.86).

Conclusion:

The right query separates the daily aggregation (summing amounts per visited_on) from the moving window calculation. This isolation ensures that each day’s aggregated amount is accurate, which in turn produces the correct rolling average. In contrast, mixing the GROUP BY and window functions in one query (as in the wrong query) can lead to miscalculations in the daily totals and, consequently, the moving average.

This analysis explains why the wrong query returns 850 and 121.43 for 2019-01-10, while the right query (and expected output) returns 1000 and 142.86.

Q28: Average Selling Price

SELECT p.product_id, 
IFNULL(ROUND(SUM(units*price)/SUM(units),2),0) AS average_price

FROM Prices p 
LEFT JOIN UnitsSold u ON p.product_id = u.product_id AND u.purchase_date BETWEEN start_date AND end_date

group by product_id

Q29: Monthly Transactions I

Method 1:

SELECT
    DATE_FORMAT(trans_date, '%Y-%m') AS month,
    country,
    COUNT(id) AS trans_count,
    SUM(IF(state='approved', 1, 0)) AS approved_count,
    SUM(amount) AS trans_total_amount,
    SUM(IF(state='approved', amount, 0)) AS approved_total_amount
FROM     Transactions A
GROUP BY  month, country;

Method 2:

SELECT  

SUBSTR(trans_date,1,7) as month, 
country, count(id) as trans_count, 
SUM(CASE WHEN state = 'approved' then 1 else 0 END) as approved_count, 
SUM(amount) as trans_total_amount, 
SUM(CASE WHEN state = 'approved' then amount else 0 END) as approved_total_amount

FROM Transactions
GROUP BY month, country

Q30: Product Sales Analysis III

Method 1:

SELECT C.product_id, C.year as first_year, C.quantity, C.price
FROM (
    SELECT A.year, B.product_id, A.quantity, A.price,
           DENSE_RANK() OVER (PARTITION BY B.product_id ORDER BY A.year ASC) AS Ranking
    FROM Sales A
    INNER JOIN Product B ON A.product_id = B.product_id
) AS C
WHERE C.Ranking = 1;

Method 2:

WITH CTE AS (    
    SELECT A.product_id, A.year, A.quantity, A.price,
           DENSE_RANK() OVER (PARTITION BY B.product_id ORDER BY A.year ASC) AS Ranking
    FROM Sales A
    INNER JOIN Product B ON A.product_id = B.product_id

)

SELECT product_id, year as first_year, quantity, price
FROM CTE
WHERE Ranking =1
ORDER BY 4

Q31: Rising Temperature

Method 1

SELECT w1.id
FROM Weather w1, Weather w2
WHERE w1.recordDate = DATE_ADD(w2.recordDate, INTERVAL 1 DAY)
AND w1.temperature > w2.temperature;

# Output with all columns:
 | id | recordDate | temperature | id | recordDate | temperature |
 | -- | ---------- | ----------- | -- | ---------- | ----------- |
 | 2  | 2015-01-02 | 25          | 1  | 2015-01-01 | 10          |
 | 4  | 2015-01-04 | 30          | 3  | 2015-01-03 | 20          |

Method 2

SELECT w1.id
FROM Weather w1, Weather w2
WHERE DATEDIFF(w1.recordDate, w2.recordDate) = 1
 AND w1.temperature > w2.temperature;

** Method 3 **

SELECT w1.id
FROM Weather w1
JOIN Weather w2 ON w1.recordDate = DATE_ADD(w2.recordDate, INTERVAL 1 DAY)
WHERE w1.temperature > w2.temperature

# Output with all columns:
| id | recordDate | temperature | id | recordDate | temperature |
| -- | ---------- | ----------- | -- | ---------- | ----------- |
| 2  | 2015-01-02 | 25          | 1  | 2015-01-01 | 10          |
| 4  | 2015-01-04 | 30          | 3  | 2015-01-03 | 20          |

Q 32: Game Play Analysis IV

# Write your MySQL query statement below
SELECT
  ROUND(COUNT(DISTINCT player_id) / (SELECT COUNT(DISTINCT player_id) FROM Activity), 2) AS fraction
FROM
  Activity
WHERE
  (player_id, DATE_SUB(event_date, INTERVAL 1 DAY))
  IN (
    SELECT player_id, MIN(event_date) AS first_login FROM Activity GROUP BY player_id
  )

** Method 2** Review below query for error once again

select ROUND((count(*) + 0.00)/ (select count(distinct player_id) from activity), 2) as fraction
from (
    select player_id, count(*) consecutive_days
    from (
        select *, 
            datediff(day, min(event_date)over(partition by player_id order by player_id, event_date asc), event_date) +1 
            -row_number()over(partition by player_id order by player_id asc) rw
        from activity
    ) c
    where rw = 0
    group by player_id
) a
where consecutive_days >= 2

Q33: Consecutive Numbers

** Method 1**

with cte as (
 select num,
    lead(num,1) over() num1,
    lead(num,2) over() num2
    from logs
)

select distinct num ConsecutiveNums 
from 
cte where 
(num=num1) and (num=num2)

** Method 2**

 Select 
 distinct B.num ConsecutiveNums 

 from(
    select num,
    lead(num,1) over() num1,
    lead(num,2) over() num2
    from logs A
 )  B
 where (B.num=B.num1) and (B.num=B.num2)

Q34: Exchange Seats

** Method 1**

SELECT
    IF(id % 2 = 1 AND id < (SELECT MAX(id) FROM Seat), id + 1, IF(id % 2 = 0, id - 1, id)) AS id,
    student
FROM Seat
ORDER BY id;

** Method 2**

SELECT
    CASE
        WHEN
            id = (SELECT MAX(id) FROM Seat) AND MOD(id, 2) = 1 
            THEN id
        WHEN
            MOD(id, 2) = 1
            THEN id + 1
        ELSE
            id - 1
    END AS id, student 
FROM Seat 
ORDER BY id;

##Revise below ones again.

Q35: Students and Examinations

# Write your MySQL query statement below
SELECT s.student_id, s.student_name, sub.subject_name, COUNT(e.student_id) AS attended_exams
FROM Students s

CROSS JOIN Subjects sub
LEFT JOIN Examinations e ON s.student_id = e.student_id AND sub.subject_name = e.subject_name
GROUP BY s.student_id, s.student_name, sub.subject_name
ORDER BY s.student_id, sub.subject_name;

Q36: Average Time of Process per Machine

** Method 1**

select a1.machine_id, round(avg(a2.timestamp-a1.timestamp), 3) as processing_time 
from Activity a1
join Activity a2 
on a1.machine_id=a2.machine_id and a1.process_id=a2.process_id
and a1.activity_type='start' and a2.activity_type='end'
group by a1.machine_id;

** Method 2**

select 
A.machine_id,
round(
      (select avg(a1.timestamp) from Activity A where A.activity_type = 'end' and A.machine_id = B.machine_id) - 
      (select avg(a1.timestamp) from Activity A where A.activity_type = 'start' and A.machine_id = B.machine_id)
,3) as processing_time
from Activity B
group by B.machine_id

Q37: Product Sales Analysis III

SELECT C.product_id, C.year as first_year, C.quantity, C.price
FROM (
    SELECT A.year, B.product_id, A.quantity, A.price,
           DENSE_RANK() OVER (PARTITION BY B.product_id ORDER BY A.year ASC) AS Ranking
    FROM Sales A
    INNER JOIN Product B ON A.product_id = B.product_id
) AS C
WHERE C.Ranking = 1;

Leetccode 50 QNA_comprehensive

Q1: Rising Temperature

Q2: Students and Examinations

Q3: Biggest Single Number

Q4: Second Highest Salary

Q5: Immediate Food Delivery II

Q6: Game Play Analysis IV

Q7: Customer Who Visited but Did Not Make Any Transactions

Q8: Employees Whose Manager Left the Company

Recommended Approach

Q9: Confirmation Rate

Q10:User Activity for the Past 30 Days I

Q11: Monthly Transactions I

Q12: Triangle Judgement

Q12:Last Person to Fit in the Bus

Q13: Delete Duplicate Emails

Q13.1: Why p1.id<>=p2.id wouldn't work in above query?

Q14: List the Products Ordered in a Period

Q15: Customers Who Bought All Products

Q16: Group Sold Products By The Date

Q17: Count Salary Categories

Q18: Product Price at a Given Date

Q19: User Activity for the Past 30 Days I

Q20: Article Views I

Q21: Average Selling Price

Q 21.a : Why is the below query wrong for the above problem but the above query is correct?

Explanation:

Q22: Percentage of Users Attended a Contest

Q23: Invalid Tweets

Q24: Find Followers Count

Q25: Employee Bonus

Q26: Queries Quality and Percentage

Q27: Restaurant Growth

Q27.1: What would happen if you just use outer query something like below?

Wrong Query:

Right Query:

Key Differences:

Conclusion:

Q28: Average Selling Price

Q29: Monthly Transactions I

Q30: Product Sales Analysis III

Q31: Rising Temperature

Q 32: Game Play Analysis IV

Q33: Consecutive Numbers

Q34: Exchange Seats

Q35: Students and Examinations

Q36: Average Time of Process per Machine

Q37: Product Sales Analysis III

Uh oh!

Uh oh!

Uh oh!

Clone this wiki locally