Initial commit of LeetCode Scraper project

neenza · neenza · commit e6fee974bc5f · 2025-03-23T16:31:17.000+05:30
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -0,0 +1,29 @@
+name: Python Application
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python
+      uses: actions/setup-python@v2
+      with:
+        python-version: '3.9'
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+    - name: Lint with flake8
+      run: |
+        pip install flake8
+        # stop the build if there are Python syntax errors or undefined names
+        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
+        # exit-zero treats all errors as warnings
+        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,35 @@
+# Python
+__pycache__/
+*.py[cod]
+*$py.class
+*.so
+.Python
+env/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+*.egg-info/
+.installed.cfg
+*.egg
+
+# Virtual Environment
+venv/
+ENV/
+env/
+
+# IDE
+.idea/
+.vscode/
+*.swp
+*.swo
+
+# Project specific
+problems/
diff --git a/LICENSE b/LICENSE
@@ -0,0 +1,21 @@
+MIT License
+
+Copyright (c) 2025 neenza
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/README.md b/README.md
@@ -0,0 +1,86 @@
+# LeetCode Scraper
+
+A Python tool to scrape problem details from LeetCode and save them in JSON format.
+
+## Features
+
+- Scrape LeetCode problems by slug (URL name)
+- Extract problem title, description, examples, and constraints
+- Extract hints, follow-ups, and solutions when available
+- Save data in structured JSON format
+- Get a list of available problems with filtering options
+
+## Installation
+
+1. Clone this repository
+2. Install the required dependencies:
+
+```bash
+pip install -r requirements.txt
+```
+
+## Usage
+
+### Scrape a Specific Problem
+
+```python
+from leetcode_scraper import LeetCodeScraper
+
+scraper = LeetCodeScraper()
+problem_data = scraper.scrape_problem("two-sum")
+print(problem_data)
+```
+
+### Scrape Multiple Problems
+
+```python
+scraper = LeetCodeScraper()
+problem_list = scraper.scrape_problem_list(limit=5)  # Get 5 problems
+
+for problem in problem_list:
+    print(f"Scraping: {problem['title']}")
+    scraper.scrape_problem(problem['slug'])
+    time.sleep(2)  # Add delay between requests
+```
+
+## Output Format
+
+The scraper saves each problem as a JSON file with the following structure:
+
+```json
+{
+  "title": "Two Sum",
+  "problem_id": "1",
+  "frontend_id": "1",
+  "difficulty": "Easy",
+  "problem_slug": "two-sum",
+  "topics": ["Array", "Hash Table"],
+  "description": "Given an array of integers nums and an integer target...",
+  "examples": [
+    {
+      "example_num": 1,
+      "example_text": "Input: nums = [2,7,11,15], target = 9\nOutput: [0,1]"
+    }
+  ],
+  "constraints": [
+    "2 <= nums.length <= 10^4",
+    "-10^9 <= nums[i] <= 10^9",
+    "-10^9 <= target <= 10^9"
+  ],
+  "follow_ups": [
+    "Follow-up: Can you come up with an algorithm that is less than O(n²) time complexity?"
+  ],
+  "hints": [
+    "A really brute force way would be to search for all possible pairs of numbers but that would be too slow.",
+    "Try to use the fact that the array is sorted and use two pointers to speed up the search."
+  ],
+  "code_snippets": {
+    "python": "class Solution:\n    def twoSum(self, nums: List[int], target: int) -> List[int]:\n        "
+  }
+}
+```
+
+## Notes
+
+- Be respectful of LeetCode's servers and avoid making too many requests in a short period
+- The tool adds a delay between requests to avoid being rate-limited
diff --git a/example_usage.py b/example_usage.py
@@ -0,0 +1,72 @@
+from leetcode_scraper import LeetCodeScraper
+import json
+import time
+
+def print_problem_details(problem_data):
+    """Print formatted problem details"""
+    if not problem_data:
+        print("No problem data available")
+        return
+    
+    print("="*80)
+    print(f"TITLE: {problem_data.get('title')}")
+    print(f"DIFFICULTY: {problem_data.get('difficulty')}")
+    print("-"*80)
+    print("DESCRIPTION:")
+    print(problem_data.get('description', 'No description available'))
+    print("-"*80)
+    
+    # Print examples
+    print("EXAMPLES:")
+    for example in problem_data.get('examples', []):
+        print(f"Example {example.get('example_num')}:")
+        print(example.get('example_text'))
+        print()
+    
+    # Print constraints
+    print("CONSTRAINTS:")
+    for constraint in problem_data.get('constraints', []):
+        print(f"- {constraint}")
+    
+    # Print follow-ups if available
+    follow_ups = problem_data.get('follow_ups', [])
+    if follow_ups:
+        print("-"*80)
+        print("FOLLOW-UPS:")
+        for follow_up in follow_ups:
+            print(f"- {follow_up}")
+    
+    # Print hints if available
+    hints = problem_data.get('hints', [])
+    if hints:
+        print("-"*80)
+        print("HINTS:")
+        for i, hint in enumerate(hints, 1):
+            print(f"Hint {i}: {hint}")
+    
+    print("="*80)
+
+if __name__ == "__main__":
+    scraper = LeetCodeScraper()
+    
+    # Example 1: Scrape a single problem
+    print("Scraping 'set-matrix-zeroes' problem...")
+    problem_data = scraper.scrape_problem("set-matrix-zeroes")
+    print_problem_details(problem_data)
+    
+    # Example 2: Get a list of problems and scrape the first 3
+    print("\nGetting list of problems...")
+    problem_list = scraper.scrape_problem_list(limit=3)
+    
+    print(f"Found {len(problem_list)} problems:")
+    for i, problem in enumerate(problem_list, 1):
+        print(f"{i}. {problem['title']} (Difficulty: {'Easy' if problem['difficulty'] == 1 else 'Medium' if problem['difficulty'] == 2 else 'Hard'})")
+    
+    # Uncomment to scrape all problems in the list
+    """
+    print("\nScraping all problems in the list...")
+    for problem in problem_list:
+        print(f"Scraping {problem['title']}...")
+        scraper.scrape_problem(problem['slug'])
+        time.sleep(2)  # Add delay between requests
+    """
diff --git a/leetcode_scraper.py b/leetcode_scraper.py
diff --git a/requirements.txt b/requirements.txt