BE-LightningReport/src/reporting/gemini_commentary.py

383 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

from __future__ import annotations
import os
from typing import Any
from src.reporting.strings import ReportLanguage, get_report_language, get_strings
from src.utils import get_risk_definition_by_fixed_intervals
def _format_ring_label(ring_name: str, turkish: bool) -> str:
label = str(ring_name).strip().rstrip(":")
if turkish:
label = label.replace("km", " km").replace("-", "").replace(".", ",")
return label
def _format_ring_display(ring: Any, s, *, for_distribution: bool = False) -> str:
try:
ring_name, total, cg_count, ic_count = ring
except Exception:
return "N/A"
label = _format_ring_label(ring_name, s.gemini_write_turkish)
if s.gemini_write_turkish:
if for_distribution:
return (
f"{label} halkasında "
f"({cg_count} {s.lightning_type_cg}, {ic_count} {s.lightning_type_ic})"
)
return (
f"{label} halkasında toplam {total} olay "
f"({cg_count} {s.lightning_type_cg}, {ic_count} {s.lightning_type_ic})"
)
return f"{label} (total={total}, {s.lightning_type_cg}={cg_count}, {s.lightning_type_ic}={ic_count})"
def _ring_lines(context: dict[str, Any], s) -> list[str]:
top_rings = context.get("top_rings", [])
lines: list[str] = []
for ring in top_rings[:3]:
lines.append(f"- {_format_ring_display(ring, s)}")
return lines
def _build_ring_distribution_sentence_tr(best_ring: Any, outer_ring: Any, s) -> str:
best_txt = _format_ring_display(best_ring, s, for_distribution=True)
if outer_ring is None or outer_ring == best_ring:
return f"Yıldırım olayları {best_txt} yoğunlaşmıştır."
try:
_, outer_total, _, _ = outer_ring
except Exception:
outer_total = 0
if outer_total > 0:
outer_txt = _format_ring_display(outer_ring, s, for_distribution=True)
return f"Olayların büyük bölümü {best_txt}; {outer_txt} de kayıt bulunmaktadır."
return f"Yıldırım olayları {best_txt} yoğunlaşmıştır."
def _build_ring_distribution_sentence_en(best_ring: Any, outer_ring: Any, s) -> str:
best_txt = _format_ring_display(best_ring, s)
if outer_ring is None or outer_ring == best_ring:
return f"Lightning activity is concentrated in {best_txt}."
try:
_, outer_total, _, _ = outer_ring
except Exception:
outer_total = 0
if outer_total > 0:
outer_txt = _format_ring_display(outer_ring, s)
return f"Most events occur in {best_txt}, with additional activity in {outer_txt}."
return f"Lightning activity is concentrated in {best_txt}."
def _storm_lines(context: dict[str, Any]) -> list[str]:
storm_summary = context.get("storm_summary")
lines: list[str] = []
if isinstance(storm_summary, dict) and storm_summary:
total_cells = storm_summary.get("total_cells", 0)
severity_counts = storm_summary.get("severity_counts", {}) or {}
lines.append(f"- total_cells={total_cells}")
for severity, count in severity_counts.items():
lines.append(f"- {severity}_cells={count}")
return lines
def build_gemini_prompt(context: dict[str, Any], language: ReportLanguage | None = None) -> str:
lang = language or get_report_language()
s = get_strings(lang)
analysis_period = context.get("analysis_period", "N/A")
analysis_radius_km = context.get("analysis_radius_km", None)
total_events = context.get("total_events", None)
total_lightning_per_km2 = context.get("total_lightning_per_km2", None)
turbine_count = context.get("turbine_count", None)
is_single_turbine_report = context.get("is_single_turbine_report", None)
max_risk_log = context.get("max_risk_log", None)
max_risk_definition = context.get("max_risk_definition", None)
top_turbine_name = context.get("top_turbine_name", "N/A")
top_turbine_risk_log = context.get("top_turbine_risk_log", None)
storm_over_turbine = context.get("storm_over_turbine")
storm_near_turbine_count = context.get("storm_near_turbine_count")
storm_closest_distance_km = context.get("storm_closest_distance_km")
storm_over_threshold_km = context.get("storm_over_threshold_km", 1.0)
storm_detected = context.get("storm_detected")
storm_closest_to_centroid_km = context.get("storm_closest_to_centroid_km")
ring_lines = _ring_lines(context, s)
storm_lines = _storm_lines(context)
if s.gemini_write_turkish:
return (
"Yıldırım aktivite raporu için tek bir tarafsız, olgusal yorum paragrafı yazıyorsun.\n"
"Tam olarak 3-4 cümle yaz. Akıcı ve doğal Türkçe kullan.\n"
"Sayı uydurma. Yalnızca verilen değerleri kullan.\n"
"\n"
"Dil ve üslup:\n"
"- Çeviri kokan, bürokratik veya yapay ifadelerden kaçın.\n"
"- Şunları kullanma: \"içermekte olup\", \"görülmektedir\", \"yoğunlaşmış olup\", \"mevcut değildir\", \"bulut-yere\", \"bulut içi\".\n"
"- Yıldırım türleri için \"yıldırım\" (yere indirme) ve \"şimşek\" (bulut içi) terimlerini kullan.\n"
"- Mesafe halkalarını \"XY km halkası\" biçiminde ifade et.\n"
"- Olay sayısı sıfır olan halkalardan bahsetme.\n"
"\n"
"Risk açıklama gereksinimleri (paragrafta kısa ve anlaşılır biçimde yansıtılmalı):\n"
"- Türbin riski, yere indirme yıldırımlarında akım büyüklüğü arttıkça artar.\n"
"- Türbin riski, türbine olan mesafe arttıkça üstel olarak azalır.\n"
"- Türbin risk skoru, darbe başına katkıların toplamıdır (görselleştirme için log dönüşümü uygulanır).\n"
"\n"
"Bağlam:\n"
f"- analysis_period: {analysis_period}\n"
f"- analysis_radius_km: {analysis_radius_km}\n"
f"- total_events: {total_events}\n"
f"- total_lightning_per_km2: {total_lightning_per_km2}\n"
f"- turbine_count: {turbine_count}\n"
f"- is_single_turbine_report: {is_single_turbine_report}\n"
f"- top_rings:\n{chr(10).join(ring_lines) if ring_lines else '- N/A'}\n"
f"- max_risk_log: {max_risk_log}\n"
f"- max_risk_definition: {max_risk_definition}\n"
f"- top_turbine_name: {top_turbine_name}\n"
f"- top_turbine_risk_log: {top_turbine_risk_log}\n"
f"- storm_over_turbine: {storm_over_turbine}\n"
f"- storm_near_turbine_count: {storm_near_turbine_count}\n"
f"- storm_closest_distance_km: {storm_closest_distance_km}\n"
f"- storm_over_threshold_km: {storm_over_threshold_km}\n"
f"- storm_detected: {storm_detected}\n"
f"- storm_closest_to_centroid_km: {storm_closest_to_centroid_km}\n"
+ (f"\n- storm_summary:\n{chr(10).join(storm_lines)}" if storm_lines else "\n- storm_summary: not available")
+ "\n\n"
"Paragraf gereksinimleri:\n"
"- İlk cümlede analiz dönemini \"{başlangıç} - {bitiş} arasında\" biçiminde ver; \"döneminde\" kullanma.\n"
"- İlk cümlede toplam olay sayısını (\"toplam X olay\"), yarıçapı (\"X km yarıçaplı alanda\") ve yıldırım-şimşek yoğunluğunu (olay/km²) tek cümlede birleştir; \"yıldırım olayı\" veya \"analiz alanında\" deme.\n"
"- Mesafe halkası dağılımından bir önemli çıkarım ekle; olayların hangi halkada yoğunlaştığınııkça söyle.\n"
"- is_single_turbine_report true ise: \"{top_turbine_name} türbini için log-risk skoru ... sınıfındadır\" gibi doğal bir ifade kullan.\n"
"- is_single_turbine_report false ise: analiz alanında en yüksek riskli türbini ve risk sınıfını belirt.\n"
"- top_turbine_name adını aynen kullan ve max_risk_definition ile ilişkilendir.\n"
"- Fırtına hücresi etkileşimini yalnızca veri varken belirt.\n"
"- Fırtına tespit edildiyse (storm_detected true) ancak hiçbir türbinin üzerine gelmediyse, en yakın fırtına hücresinin merkeze uzaklığını (storm_closest_to_centroid_km) belirt.\n"
"- storm_detected false ise fırtınadan hiç bahsetme.\n"
"- Sayıları yuvarla: yoğunluk 3 ondalık, log risk 2 ondalık, mesafeler 1 ondalık km, sayılar tam sayı.\n"
"- Ton analitik, net ve alarmist olmayan olsun.\n"
"\n"
"Örnek üslup (yalnızca stil rehberi; sayıları kopyalama):\n"
"\"03-05-2026 10:4303-05-2026 10:43 arasında 9,0 km yarıçaplı alanda toplam 2 olay kaydedilmiştir ve yıldırım-şimşek yoğunluğu 0,008 olay/km² olarak hesaplanmıştır. "
"Yıldırım olayları 1,03,0 km halkasında (2 yıldırım, 0 şimşek) yoğunlaşmıştır. "
"T5 türbini için log-risk skoru Düşük Risk sınıfındadır. Bu raporda fırtına hücresi verisi bulunmamaktadır.\"\n"
"\n"
"Çıktı:\n"
"Yalnızca bir paragraf (madde işareti veya başlık yok)."
)
return (
"You are generating a single neutral, factual commentary paragraph for a lightning activity report.\n"
"Write exactly 3-4 sentences.\n"
"Do not invent any numbers. Only use the values provided.\n"
"\n"
"Risk explanation requirements (must be reflected in the paragraph):\n"
"- Turbine risk increases for cloud-to-ground strikes with larger current magnitude.\n"
"- Turbine risk decays exponentially with increasing distance from the turbine.\n"
"- The turbine risk score is the sum of per-strike contributions (then log-transformed for visualization/heatmaps/tables).\n"
"\n"
"Context:\n"
f"- analysis_period: {analysis_period}\n"
f"- analysis_radius_km: {analysis_radius_km}\n"
f"- total_events: {total_events}\n"
f"- total_lightning_per_km2: {total_lightning_per_km2}\n"
f"- turbine_count: {turbine_count}\n"
f"- is_single_turbine_report: {is_single_turbine_report}\n"
f"- top_rings:\n{chr(10).join(ring_lines) if ring_lines else '- N/A'}\n"
f"- max_risk_log: {max_risk_log}\n"
f"- max_risk_definition: {max_risk_definition}\n"
f"- top_turbine_name: {top_turbine_name}\n"
f"- top_turbine_risk_log: {top_turbine_risk_log}\n"
f"- storm_over_turbine: {storm_over_turbine}\n"
f"- storm_near_turbine_count: {storm_near_turbine_count}\n"
f"- storm_closest_distance_km: {storm_closest_distance_km}\n"
f"- storm_over_threshold_km: {storm_over_threshold_km}\n"
f"- storm_detected: {storm_detected}\n"
f"- storm_closest_to_centroid_km: {storm_closest_to_centroid_km}\n"
+ (f"\n- storm_summary:\n{chr(10).join(storm_lines)}" if storm_lines else "\n- storm_summary: not available")
+ "\n\n"
"Requirements for the paragraph:\n"
"- Mention one key takeaway from the ring distribution (e.g., where totals are highest).\n"
"- Mention the overall lightning density (events/km²).\n"
"- If is_single_turbine_report is true: start the sentence mentioning the highest-risk turbine as \"For {top_turbine_name}, ...\"; avoid wording like \"Within the analyzed area\" and avoid verbs like \"was identified\".\n"
"- If is_single_turbine_report is false: you may use wording like \"Within the analyzed area, {top_turbine_name} was identified ...\".\n"
"- Mention the specific turbine name with the highest risk score (top_turbine_name) verbatim.\n"
"- Mention the risk category for that turbine using max_risk_definition.\n"
"- Do not refer only to the category; always associate the risk with top_turbine_name.\n"
"- Mention storm-cell interaction with the turbine when storm information is available:\n"
" - If storm_over_turbine is true: say that storm cells were very close to/over the turbine (based on centroid distance <= storm_over_threshold_km).\n"
" - If storm_over_turbine is false and storm_closest_distance_km is provided: say the closest storm cell centroid came within storm_closest_distance_km km of the turbine.\n"
" - If storms were detected (storm_detected true) but did not pass over any turbine, mention the closest storm cell's distance to the centroid (storm_closest_to_centroid_km).\n"
" - If storm_detected is false, do not mention storms at all.\n"
"- If storm_summary is available, mention total storm cells and at least one severity count.\n"
"- Round numeric values as follows (use the rounded values you are given, avoid long decimals):\n"
" - lightning density to 3 decimals (events/km²)\n"
" - log-transformed risk score(s) to 2 decimals\n"
" - distances (analysis_radius_km, storm_closest_distance_km) to 1 decimal (km)\n"
" - counts to integers\n"
"- Keep tone analytic and non-alarmist.\n"
"\n"
"Output:\n"
"One paragraph only (no bullet points, no headings)."
)
def fallback_commentary(context: dict[str, Any], language: ReportLanguage | None = None) -> str:
lang = language or get_report_language()
s = get_strings(lang)
analysis_period = context.get("analysis_period", "N/A")
analysis_radius_km = context.get("analysis_radius_km", None)
total_events = context.get("total_events", None)
total_lightning_per_km2 = context.get("total_lightning_per_km2", None)
is_single_turbine_report = context.get("is_single_turbine_report", None)
top_rings = context.get("top_rings", [])
max_risk_definition = context.get("max_risk_definition", "N/A")
top_turbine_name = context.get("top_turbine_name", "N/A")
storm_over_turbine = context.get("storm_over_turbine")
storm_closest_distance_km = context.get("storm_closest_distance_km")
storm_over_threshold_km = context.get("storm_over_threshold_km", 1.0)
storm_near_turbine_count = context.get("storm_near_turbine_count")
storm_closest_to_centroid_km = context.get("storm_closest_to_centroid_km")
outermost_ring = top_rings[-1] if top_rings else None
best_ring = top_rings[0] if top_rings else None
storm_summary = context.get("storm_summary") or {}
storm_line = ""
if isinstance(storm_summary, dict) and storm_summary:
total_cells = storm_summary.get("total_cells", 0)
severity_counts = storm_summary.get("severity_counts", {}) or {}
if severity_counts:
severity = max(severity_counts.items(), key=lambda kv: kv[1])[0]
count = severity_counts.get(severity, 0)
severity_label = s.storm_severity_names.get(str(severity).strip().lower(), str(severity).title())
if s.gemini_write_turkish:
storm_line = (
f"Toplam {total_cells} fırtına hücresi kaydedilmiş; en fazla {count} hücre {severity_label} şiddetindedir."
)
else:
storm_line = (
f"Storm data indicates {total_cells} storm cells, with the highest share in {severity_label} ({count} cells)."
)
elif s.gemini_write_turkish:
storm_line = f"Toplam {total_cells} fırtına hücresi kaydedilmiştir."
else:
storm_line = f"Storm data indicates {total_cells} storm cells."
density_txt = (
f"{total_lightning_per_km2:.3f} {s.events_per_km2}"
if isinstance(total_lightning_per_km2, (int, float))
else str(total_lightning_per_km2)
)
if s.gemini_write_turkish:
radius_txt = (
f"{analysis_radius_km:.1f} km yarıçaplı alanda"
if isinstance(analysis_radius_km, (int, float))
else "alanda"
)
events_txt = f"toplam {total_events} olay" if total_events is not None else "olay"
ring_sentence = _build_ring_distribution_sentence_tr(best_ring, outermost_ring, s)
paragraph_intro = (
f"{analysis_period} arasında {radius_txt} {events_txt} kaydedilmiştir ve "
f"yıldırım-şimşek yoğunluğu {density_txt} olarak hesaplanmıştır. "
f"{ring_sentence} "
)
turbine_sentence = (
f"{top_turbine_name} türbini için log-risk skoru {max_risk_definition} sınıfındadır. "
if is_single_turbine_report
else f"Analiz alanında en yüksek log-risk skoruna sahip türbin {top_turbine_name} olup {max_risk_definition} sınıfındadır. "
)
method_sentence = (
"Risk skoru, türbine yakınlık ve akım büyüklüğünün birleşimini yansıtır; "
"modele göre her yere indirme darbesinin katkısı mesafe arttıkça üstel olarak azalır. "
)
if storm_over_turbine:
storm_interaction_sentence = (
f"Fırtına hücrelerinden {storm_near_turbine_count} tanesinin merkezi türbine "
f"{storm_over_threshold_km:.1f} km'den yakın konumlanmıştır. "
)
elif isinstance(storm_closest_to_centroid_km, (int, float)):
storm_interaction_sentence = (
f"Tespit edilen fırtına hücreleri herhangi bir türbinin üzerine denk gelmemiş; "
f"en yakın hücre merkeze {storm_closest_to_centroid_km:.1f} km mesafeye kadar yaklaşmıştır. "
)
elif isinstance(storm_closest_distance_km, (int, float)):
storm_interaction_sentence = (
f"En yakın fırtına hücresi merkezi türbine {storm_closest_distance_km:.1f} km mesafededir. "
)
else:
storm_interaction_sentence = ""
storm_severity_sentence = storm_line
else:
radius_txt = f"within {analysis_radius_km:.1f} km" if isinstance(analysis_radius_km, (int, float)) else ""
events_txt = f"{total_events} total lightning events" if total_events is not None else "N/A"
ring_sentence = _build_ring_distribution_sentence_en(best_ring, outermost_ring, s)
paragraph_intro = (
f"For {analysis_period}, the dataset contains {events_txt} {radius_txt}, corresponding to an overall lightning density of {density_txt}. "
f"{ring_sentence} "
)
turbine_sentence = (
f"For {top_turbine_name}, the log-transformed risk score is the highest in this report and falls in the {max_risk_definition} category. "
if is_single_turbine_report
else f"Within the analyzed area, the turbine with the highest log-transformed risk score is {top_turbine_name}, which falls in the {max_risk_definition} category. "
)
method_sentence = (
f"This indicates the turbine was exposed to a combination of closer cloud-to-ground strikes and stronger current magnitudes. "
f"In the risk model, each cloud-to-ground strike contributes more when it is near the turbine and when |I| is larger, and contributions decrease exponentially with distance; the turbine risk score is the sum over all included strikes (with a log transform used for visualization). "
)
if storm_over_turbine:
storm_interaction_sentence = (
f"Storm interaction: storm-cell centroids came within {storm_over_threshold_km:.1f} km of the turbine (count={storm_near_turbine_count}). "
)
elif isinstance(storm_closest_to_centroid_km, (int, float)):
storm_interaction_sentence = (
f"Detected storm cells did not pass directly over any turbine; the nearest cell came within {storm_closest_to_centroid_km:.1f} km of the centroid. "
)
elif isinstance(storm_closest_distance_km, (int, float)):
storm_interaction_sentence = (
f"Storm interaction: the closest storm-cell centroid came within {storm_closest_distance_km:.1f} km of the turbine. "
)
else:
storm_interaction_sentence = ""
storm_severity_sentence = storm_line
return (paragraph_intro + turbine_sentence + method_sentence + storm_interaction_sentence + storm_severity_sentence).strip()
def generate_gemini_paragraph(context: dict[str, Any], api_key: str | None = None) -> str:
lang = get_report_language()
api_key_final = api_key or os.getenv("GEMINI_API_KEY")
if not api_key_final:
return fallback_commentary(context, lang)
model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
prompt = build_gemini_prompt(context, lang)
try:
import google.generativeai as genai
genai.configure(api_key=api_key_final)
model = genai.GenerativeModel(model_name)
resp = model.generate_content(
prompt,
generation_config={
"temperature": 0.2,
"max_output_tokens": 220,
},
)
text = getattr(resp, "text", None) or ""
text = str(text).strip()
if not text:
return fallback_commentary(context, lang)
return text
except Exception:
return fallback_commentary(context, lang)