Skip to content
New issue

Have a question about this project? # for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “#”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? # to your account

An error occurred: Expecting value: line 1 column 1 (char 0) #485

Open
khuluqilkarim opened this issue Jan 30, 2025 · 0 comments
Open

An error occurred: Expecting value: line 1 column 1 (char 0) #485

khuluqilkarim opened this issue Jan 30, 2025 · 0 comments

Comments

@khuluqilkarim
Copy link

khuluqilkarim commented Jan 30, 2025

import json
from linkedin_api import Linkedin
import re

class LinkedinProfileExtractor:
    def __init__(self, credentials_file=r"......"):
        self.credentials = self.load_credentials(credentials_file)
        self.api = None
        if self.credentials:
            self.api = Linkedin(self.credentials["username"], self.credentials["password"])

    def load_credentials(self, filename):
        try:
            with open(filename, "r") as f:
                return json.load(f)
        except FileNotFoundError:
            print("Credentials file not found.")
            return None
    
    def regex_information(self, text):
        email_regex = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
        phone_regex = r'\+?\d{0,3}?[-.\s]?(\(?\d{1,4}?\)?[-.\s]?)?\d{1,4}[-.\s]?\d{1,4}[-.\s]?\d{1,9}'
        
        emails = re.findall(email_regex, text)
        phones = re.findall(phone_regex, text)

        return {
            "emails": emails,
            "phones": phones
        }

    def extract_profile_summary(self, profile):
        profile_summary = {
            "full_name": f"{profile.get('firstName', '')} {profile.get('lastName', '')}",
            "headline": profile.get('headline', ''),
            "summary": profile.get('summary', ''),
            "aditional_info": self.regex_information(profile.get('summary', '')),
            "industry": profile.get('industryName', ''),
            "location": profile.get('locationName', ''),
            "geo_location": profile.get('geoLocationName', ''),
            "is_student": profile.get('student', False),
            "experience": []
        }

        for exp in profile.get('experience', []):
            experience_data = {
                "title": exp.get('title', ''),
                "company": exp.get('companyName', ''),
                "location": exp.get('locationName', ''),
                "time_period": {
                    "start": exp.get('timePeriod', {}).get('startDate', ''),
                    "end": exp.get('timePeriod', {}).get('endDate', 'Present')
                },
                "industries": exp.get('company', {}).get('industries', [])
            }
            profile_summary["experience"].append(experience_data)

        return profile_summary

    def get_profile_data(self, profile_id):
        if self.api:
            try:
                profile = self.api.get_profile(profile_id)
                profile_summary = self.extract_profile_summary(profile)
                contact_info = self.api.get_profile_contact_info(profile_id)
                return {
                    "profile_summary": profile_summary,
                    "contact_info": contact_info
                }
            except Exception as e:
                print("An error occurred:", str(e))
                if 'CHALLENGE' in str(e).upper():
                    print("LinkedIn requires CAPTCHA or manual login to authenticate.")
        else:
            print("API not initialized. Please check your credentials.")


if __name__ == "__main__":
    linkedin = LinkedinProfileExtractor()
    profile_id = "......"
    profile_data = linkedin.get_profile_data(profile_id)
    
    print(profile_data)
# for free to join this conversation on GitHub. Already have an account? # to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant