fix: date parsing, continent grouping, dynamic analytics, and sector scraping
All checks were successful
Deployment / deploy-docker (push) Successful in 5s

This commit is contained in:
Melchior Reimers
2026-01-23 18:17:02 +01:00
parent 1086c4aa1d
commit dbd4fbfb47
3 changed files with 312 additions and 253 deletions

View File

@@ -35,6 +35,8 @@ def get_processed_isins():
return []
return []
from bs4 import BeautifulSoup
def fetch_metadata(isin):
logger.info(f"Fetching metadata for ISIN: {isin}")
metadata = {
@@ -45,10 +47,15 @@ def fetch_metadata(isin):
'sector': 'Unknown'
}
# Common headers to avoid blocks
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
}
# 1. GLEIF API for Name and Country
try:
gleif_url = f"https://api.gleif.org/api/v1/lei-records?filter[isin]={isin}"
res = requests.get(gleif_url, timeout=10)
res = requests.get(gleif_url, headers=headers, timeout=10)
if res.status_code == 200:
data = res.json().get('data', [])
if data:
@@ -58,11 +65,25 @@ def fetch_metadata(isin):
except Exception as e:
logger.error(f"GLEIF error for {isin}: {e}")
# 2. Continent mapping from Country Code
# 2. Yahoo Finance for Sector
try:
# We use the lookup URL as discussed
yahoo_url = f"https://finance.yahoo.com/lookup/?s={isin}"
res = requests.get(yahoo_url, headers=headers, timeout=10)
if res.status_code == 200:
soup = BeautifulSoup(res.text, 'html.parser')
# Look for the sector link in the results table
sector_link = soup.find('a', href=lambda x: x and '/sector/' in x)
if sector_link:
metadata['sector'] = sector_link.text.strip()
except Exception as e:
logger.error(f"Yahoo sector error for {isin}: {e}")
# 3. Continent mapping from Country Code
if metadata['country'] != 'Unknown':
try:
country_url = f"https://restcountries.com/v3.1/alpha/{metadata['country']}"
res = requests.get(country_url, timeout=10)
res = requests.get(country_url, headers=headers, timeout=10)
if res.status_code == 200:
data = res.json()
if data and isinstance(data, list):