# company_profile_grabber.py
# -------------------------
# Purpose: Fetch a short company profile for each ticker (name, sector, industry, description)
# Input:  tickers.xlsx (one ticker per line, with or without header "Ticker")
# Output: company_profiles.xlsx (same folder)
#
# Requires: pandas, yfinance, openpyxl
# Install with:
#   pip install pandas yfinance openpyxl

import os
import time
import pandas as pd
import yfinance as yf

# === PATHS ===
BASE_DIR = r"C:\Users\jeanl\Documents\01. Finance in London\16. Python\company_description"
INPUT_FILE = os.path.join(BASE_DIR, "tickers.xlsx")
OUTPUT_FILE = os.path.join(BASE_DIR, "company_profiles.xlsx")

def load_tickers(file_path: str) -> list[str]:
    """Load tickers from Excel (one per line, with or without header 'Ticker')."""
    df = pd.read_excel(file_path, dtype=str)
    if df.shape[1] == 1:
        tickers = df.iloc[:, 0].tolist()
    elif "Ticker" in df.columns:
        tickers = df["Ticker"].tolist()
    else:
        raise ValueError("Excel must have one column or a column named 'Ticker'.")
    tickers = [t.strip() for t in tickers if pd.notna(t) and t.strip()]
    tickers = list(dict.fromkeys(tickers))  # remove duplicates, keep order
    return tickers

def get_company_info(ticker: str) -> dict:
    """Fetch company info from Yahoo Finance."""
    try:
        t = yf.Ticker(ticker)
        try:
            info = t.get_info()
        except Exception:
            info = getattr(t, "info", {}) or {}
    except Exception:
        info = {}
    return {
        "Ticker": ticker,
        "Company": info.get("longName") or info.get("shortName"),
        "Sector": info.get("sector"),
        "Industry": info.get("industry"),
        "Description": info.get("longBusinessSummary")
    }

def main():
    if not os.path.exists(INPUT_FILE):
        raise FileNotFoundError(f"Input file not found: {INPUT_FILE}")
    print(f"Reading tickers from: {INPUT_FILE}")

    tickers = load_tickers(INPUT_FILE)
    print(f"Loaded {len(tickers)} tickers")

    rows = []
    for i, sym in enumerate(tickers, 1):
        print(f"[{i}/{len(tickers)}] Fetching {sym} ...", end="", flush=True)
        data = get_company_info(sym)
        rows.append(data)
        print(" done.")
        time.sleep(0.2)  # politeness delay

    df = pd.DataFrame(rows, columns=["Ticker", "Company", "Sector", "Industry", "Description"])
    df = df.sort_values(by="Ticker").reset_index(drop=True)

    df.to_excel(OUTPUT_FILE, index=False)
    print(f"Saved company profiles to: {OUTPUT_FILE}")

if __name__ == "__main__":
    main()