# company_profile_grabber.py # ------------------------- # Purpose: Fetch a short company profile for each ticker (name, sector, industry, description) # Input: tickers.xlsx (one ticker per line, with or without header "Ticker") # Output: company_profiles.xlsx (same folder) # # Requires: pandas, yfinance, openpyxl # Install with: # pip install pandas yfinance openpyxl import os import time import pandas as pd import yfinance as yf # === PATHS === BASE_DIR = r"C:\Users\jeanl\Documents\01. Finance in London\16. Python\company_description" INPUT_FILE = os.path.join(BASE_DIR, "tickers.xlsx") OUTPUT_FILE = os.path.join(BASE_DIR, "company_profiles.xlsx") def load_tickers(file_path: str) -> list[str]: """Load tickers from Excel (one per line, with or without header 'Ticker').""" df = pd.read_excel(file_path, dtype=str) if df.shape[1] == 1: tickers = df.iloc[:, 0].tolist() elif "Ticker" in df.columns: tickers = df["Ticker"].tolist() else: raise ValueError("Excel must have one column or a column named 'Ticker'.") tickers = [t.strip() for t in tickers if pd.notna(t) and t.strip()] tickers = list(dict.fromkeys(tickers)) # remove duplicates, keep order return tickers def get_company_info(ticker: str) -> dict: """Fetch company info from Yahoo Finance.""" try: t = yf.Ticker(ticker) try: info = t.get_info() except Exception: info = getattr(t, "info", {}) or {} except Exception: info = {} return { "Ticker": ticker, "Company": info.get("longName") or info.get("shortName"), "Sector": info.get("sector"), "Industry": info.get("industry"), "Description": info.get("longBusinessSummary") } def main(): if not os.path.exists(INPUT_FILE): raise FileNotFoundError(f"Input file not found: {INPUT_FILE}") print(f"Reading tickers from: {INPUT_FILE}") tickers = load_tickers(INPUT_FILE) print(f"Loaded {len(tickers)} tickers") rows = [] for i, sym in enumerate(tickers, 1): print(f"[{i}/{len(tickers)}] Fetching {sym} ...", end="", flush=True) data = get_company_info(sym) rows.append(data) print(" done.") time.sleep(0.2) # politeness delay df = pd.DataFrame(rows, columns=["Ticker", "Company", "Sector", "Industry", "Description"]) df = df.sort_values(by="Ticker").reset_index(drop=True) df.to_excel(OUTPUT_FILE, index=False) print(f"Saved company profiles to: {OUTPUT_FILE}") if __name__ == "__main__": main()