fix: date parsing, continent grouping, dynamic analytics, and sector scraping
All checks were successful
Deployment / deploy-docker (push) Successful in 5s
All checks were successful
Deployment / deploy-docker (push) Successful in 5s
This commit is contained in:
@@ -35,6 +35,8 @@ def get_processed_isins():
|
||||
return []
|
||||
return []
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
def fetch_metadata(isin):
|
||||
logger.info(f"Fetching metadata for ISIN: {isin}")
|
||||
metadata = {
|
||||
@@ -45,10 +47,15 @@ def fetch_metadata(isin):
|
||||
'sector': 'Unknown'
|
||||
}
|
||||
|
||||
# Common headers to avoid blocks
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36'
|
||||
}
|
||||
|
||||
# 1. GLEIF API for Name and Country
|
||||
try:
|
||||
gleif_url = f"https://api.gleif.org/api/v1/lei-records?filter[isin]={isin}"
|
||||
res = requests.get(gleif_url, timeout=10)
|
||||
res = requests.get(gleif_url, headers=headers, timeout=10)
|
||||
if res.status_code == 200:
|
||||
data = res.json().get('data', [])
|
||||
if data:
|
||||
@@ -58,11 +65,25 @@ def fetch_metadata(isin):
|
||||
except Exception as e:
|
||||
logger.error(f"GLEIF error for {isin}: {e}")
|
||||
|
||||
# 2. Continent mapping from Country Code
|
||||
# 2. Yahoo Finance for Sector
|
||||
try:
|
||||
# We use the lookup URL as discussed
|
||||
yahoo_url = f"https://finance.yahoo.com/lookup/?s={isin}"
|
||||
res = requests.get(yahoo_url, headers=headers, timeout=10)
|
||||
if res.status_code == 200:
|
||||
soup = BeautifulSoup(res.text, 'html.parser')
|
||||
# Look for the sector link in the results table
|
||||
sector_link = soup.find('a', href=lambda x: x and '/sector/' in x)
|
||||
if sector_link:
|
||||
metadata['sector'] = sector_link.text.strip()
|
||||
except Exception as e:
|
||||
logger.error(f"Yahoo sector error for {isin}: {e}")
|
||||
|
||||
# 3. Continent mapping from Country Code
|
||||
if metadata['country'] != 'Unknown':
|
||||
try:
|
||||
country_url = f"https://restcountries.com/v3.1/alpha/{metadata['country']}"
|
||||
res = requests.get(country_url, timeout=10)
|
||||
res = requests.get(country_url, headers=headers, timeout=10)
|
||||
if res.status_code == 200:
|
||||
data = res.json()
|
||||
if data and isinstance(data, list):
|
||||
|
||||
Reference in New Issue
Block a user