383 lines
19 KiB
Python
383 lines
19 KiB
Python
from __future__ import annotations
|
||
|
||
import os
|
||
from typing import Any
|
||
|
||
from src.reporting.strings import ReportLanguage, get_report_language, get_strings
|
||
from src.utils import get_risk_definition_by_fixed_intervals
|
||
|
||
|
||
def _format_ring_label(ring_name: str, turkish: bool) -> str:
|
||
label = str(ring_name).strip().rstrip(":")
|
||
if turkish:
|
||
label = label.replace("km", " km").replace("-", "–").replace(".", ",")
|
||
return label
|
||
|
||
|
||
def _format_ring_display(ring: Any, s, *, for_distribution: bool = False) -> str:
|
||
try:
|
||
ring_name, total, cg_count, ic_count = ring
|
||
except Exception:
|
||
return "N/A"
|
||
|
||
label = _format_ring_label(ring_name, s.gemini_write_turkish)
|
||
if s.gemini_write_turkish:
|
||
if for_distribution:
|
||
return (
|
||
f"{label} halkasında "
|
||
f"({cg_count} {s.lightning_type_cg}, {ic_count} {s.lightning_type_ic})"
|
||
)
|
||
return (
|
||
f"{label} halkasında toplam {total} olay "
|
||
f"({cg_count} {s.lightning_type_cg}, {ic_count} {s.lightning_type_ic})"
|
||
)
|
||
return f"{label} (total={total}, {s.lightning_type_cg}={cg_count}, {s.lightning_type_ic}={ic_count})"
|
||
|
||
|
||
def _ring_lines(context: dict[str, Any], s) -> list[str]:
|
||
top_rings = context.get("top_rings", [])
|
||
lines: list[str] = []
|
||
for ring in top_rings[:3]:
|
||
lines.append(f"- {_format_ring_display(ring, s)}")
|
||
return lines
|
||
|
||
|
||
def _build_ring_distribution_sentence_tr(best_ring: Any, outer_ring: Any, s) -> str:
|
||
best_txt = _format_ring_display(best_ring, s, for_distribution=True)
|
||
if outer_ring is None or outer_ring == best_ring:
|
||
return f"Yıldırım olayları {best_txt} yoğunlaşmıştır."
|
||
|
||
try:
|
||
_, outer_total, _, _ = outer_ring
|
||
except Exception:
|
||
outer_total = 0
|
||
|
||
if outer_total > 0:
|
||
outer_txt = _format_ring_display(outer_ring, s, for_distribution=True)
|
||
return f"Olayların büyük bölümü {best_txt}; {outer_txt} de kayıt bulunmaktadır."
|
||
return f"Yıldırım olayları {best_txt} yoğunlaşmıştır."
|
||
|
||
|
||
def _build_ring_distribution_sentence_en(best_ring: Any, outer_ring: Any, s) -> str:
|
||
best_txt = _format_ring_display(best_ring, s)
|
||
if outer_ring is None or outer_ring == best_ring:
|
||
return f"Lightning activity is concentrated in {best_txt}."
|
||
|
||
try:
|
||
_, outer_total, _, _ = outer_ring
|
||
except Exception:
|
||
outer_total = 0
|
||
|
||
if outer_total > 0:
|
||
outer_txt = _format_ring_display(outer_ring, s)
|
||
return f"Most events occur in {best_txt}, with additional activity in {outer_txt}."
|
||
return f"Lightning activity is concentrated in {best_txt}."
|
||
|
||
|
||
def _storm_lines(context: dict[str, Any]) -> list[str]:
|
||
storm_summary = context.get("storm_summary")
|
||
lines: list[str] = []
|
||
if isinstance(storm_summary, dict) and storm_summary:
|
||
total_cells = storm_summary.get("total_cells", 0)
|
||
severity_counts = storm_summary.get("severity_counts", {}) or {}
|
||
lines.append(f"- total_cells={total_cells}")
|
||
for severity, count in severity_counts.items():
|
||
lines.append(f"- {severity}_cells={count}")
|
||
return lines
|
||
|
||
|
||
def build_gemini_prompt(context: dict[str, Any], language: ReportLanguage | None = None) -> str:
|
||
lang = language or get_report_language()
|
||
s = get_strings(lang)
|
||
|
||
analysis_period = context.get("analysis_period", "N/A")
|
||
analysis_radius_km = context.get("analysis_radius_km", None)
|
||
total_events = context.get("total_events", None)
|
||
total_lightning_per_km2 = context.get("total_lightning_per_km2", None)
|
||
turbine_count = context.get("turbine_count", None)
|
||
is_single_turbine_report = context.get("is_single_turbine_report", None)
|
||
max_risk_log = context.get("max_risk_log", None)
|
||
max_risk_definition = context.get("max_risk_definition", None)
|
||
top_turbine_name = context.get("top_turbine_name", "N/A")
|
||
top_turbine_risk_log = context.get("top_turbine_risk_log", None)
|
||
storm_over_turbine = context.get("storm_over_turbine")
|
||
storm_near_turbine_count = context.get("storm_near_turbine_count")
|
||
storm_closest_distance_km = context.get("storm_closest_distance_km")
|
||
storm_over_threshold_km = context.get("storm_over_threshold_km", 1.0)
|
||
storm_detected = context.get("storm_detected")
|
||
storm_closest_to_centroid_km = context.get("storm_closest_to_centroid_km")
|
||
|
||
ring_lines = _ring_lines(context, s)
|
||
storm_lines = _storm_lines(context)
|
||
|
||
if s.gemini_write_turkish:
|
||
return (
|
||
"Yıldırım aktivite raporu için tek bir tarafsız, olgusal yorum paragrafı yazıyorsun.\n"
|
||
"Tam olarak 3-4 cümle yaz. Akıcı ve doğal Türkçe kullan.\n"
|
||
"Sayı uydurma. Yalnızca verilen değerleri kullan.\n"
|
||
"\n"
|
||
"Dil ve üslup:\n"
|
||
"- Çeviri kokan, bürokratik veya yapay ifadelerden kaçın.\n"
|
||
"- Şunları kullanma: \"içermekte olup\", \"görülmektedir\", \"yoğunlaşmış olup\", \"mevcut değildir\", \"bulut-yere\", \"bulut içi\".\n"
|
||
"- Yıldırım türleri için \"yıldırım\" (yere indirme) ve \"şimşek\" (bulut içi) terimlerini kullan.\n"
|
||
"- Mesafe halkalarını \"X–Y km halkası\" biçiminde ifade et.\n"
|
||
"- Olay sayısı sıfır olan halkalardan bahsetme.\n"
|
||
"\n"
|
||
"Risk açıklama gereksinimleri (paragrafta kısa ve anlaşılır biçimde yansıtılmalı):\n"
|
||
"- Türbin riski, yere indirme yıldırımlarında akım büyüklüğü arttıkça artar.\n"
|
||
"- Türbin riski, türbine olan mesafe arttıkça üstel olarak azalır.\n"
|
||
"- Türbin risk skoru, darbe başına katkıların toplamıdır (görselleştirme için log dönüşümü uygulanır).\n"
|
||
"\n"
|
||
"Bağlam:\n"
|
||
f"- analysis_period: {analysis_period}\n"
|
||
f"- analysis_radius_km: {analysis_radius_km}\n"
|
||
f"- total_events: {total_events}\n"
|
||
f"- total_lightning_per_km2: {total_lightning_per_km2}\n"
|
||
f"- turbine_count: {turbine_count}\n"
|
||
f"- is_single_turbine_report: {is_single_turbine_report}\n"
|
||
f"- top_rings:\n{chr(10).join(ring_lines) if ring_lines else '- N/A'}\n"
|
||
f"- max_risk_log: {max_risk_log}\n"
|
||
f"- max_risk_definition: {max_risk_definition}\n"
|
||
f"- top_turbine_name: {top_turbine_name}\n"
|
||
f"- top_turbine_risk_log: {top_turbine_risk_log}\n"
|
||
f"- storm_over_turbine: {storm_over_turbine}\n"
|
||
f"- storm_near_turbine_count: {storm_near_turbine_count}\n"
|
||
f"- storm_closest_distance_km: {storm_closest_distance_km}\n"
|
||
f"- storm_over_threshold_km: {storm_over_threshold_km}\n"
|
||
f"- storm_detected: {storm_detected}\n"
|
||
f"- storm_closest_to_centroid_km: {storm_closest_to_centroid_km}\n"
|
||
+ (f"\n- storm_summary:\n{chr(10).join(storm_lines)}" if storm_lines else "\n- storm_summary: not available")
|
||
+ "\n\n"
|
||
"Paragraf gereksinimleri:\n"
|
||
"- İlk cümlede analiz dönemini \"{başlangıç} - {bitiş} arasında\" biçiminde ver; \"döneminde\" kullanma.\n"
|
||
"- İlk cümlede toplam olay sayısını (\"toplam X olay\"), yarıçapı (\"X km yarıçaplı alanda\") ve yıldırım-şimşek yoğunluğunu (olay/km²) tek cümlede birleştir; \"yıldırım olayı\" veya \"analiz alanında\" deme.\n"
|
||
"- Mesafe halkası dağılımından bir önemli çıkarım ekle; olayların hangi halkada yoğunlaştığını açıkça söyle.\n"
|
||
"- is_single_turbine_report true ise: \"{top_turbine_name} türbini için log-risk skoru ... sınıfındadır\" gibi doğal bir ifade kullan.\n"
|
||
"- is_single_turbine_report false ise: analiz alanında en yüksek riskli türbini ve risk sınıfını belirt.\n"
|
||
"- top_turbine_name adını aynen kullan ve max_risk_definition ile ilişkilendir.\n"
|
||
"- Fırtına hücresi etkileşimini yalnızca veri varken belirt.\n"
|
||
"- Fırtına tespit edildiyse (storm_detected true) ancak hiçbir türbinin üzerine gelmediyse, en yakın fırtına hücresinin merkeze uzaklığını (storm_closest_to_centroid_km) belirt.\n"
|
||
"- storm_detected false ise fırtınadan hiç bahsetme.\n"
|
||
"- Sayıları yuvarla: yoğunluk 3 ondalık, log risk 2 ondalık, mesafeler 1 ondalık km, sayılar tam sayı.\n"
|
||
"- Ton analitik, net ve alarmist olmayan olsun.\n"
|
||
"\n"
|
||
"Örnek üslup (yalnızca stil rehberi; sayıları kopyalama):\n"
|
||
"\"03-05-2026 10:43–03-05-2026 10:43 arasında 9,0 km yarıçaplı alanda toplam 2 olay kaydedilmiştir ve yıldırım-şimşek yoğunluğu 0,008 olay/km² olarak hesaplanmıştır. "
|
||
"Yıldırım olayları 1,0–3,0 km halkasında (2 yıldırım, 0 şimşek) yoğunlaşmıştır. "
|
||
"T5 türbini için log-risk skoru Düşük Risk sınıfındadır. Bu raporda fırtına hücresi verisi bulunmamaktadır.\"\n"
|
||
"\n"
|
||
"Çıktı:\n"
|
||
"Yalnızca bir paragraf (madde işareti veya başlık yok)."
|
||
)
|
||
|
||
return (
|
||
"You are generating a single neutral, factual commentary paragraph for a lightning activity report.\n"
|
||
"Write exactly 3-4 sentences.\n"
|
||
"Do not invent any numbers. Only use the values provided.\n"
|
||
"\n"
|
||
"Risk explanation requirements (must be reflected in the paragraph):\n"
|
||
"- Turbine risk increases for cloud-to-ground strikes with larger current magnitude.\n"
|
||
"- Turbine risk decays exponentially with increasing distance from the turbine.\n"
|
||
"- The turbine risk score is the sum of per-strike contributions (then log-transformed for visualization/heatmaps/tables).\n"
|
||
"\n"
|
||
"Context:\n"
|
||
f"- analysis_period: {analysis_period}\n"
|
||
f"- analysis_radius_km: {analysis_radius_km}\n"
|
||
f"- total_events: {total_events}\n"
|
||
f"- total_lightning_per_km2: {total_lightning_per_km2}\n"
|
||
f"- turbine_count: {turbine_count}\n"
|
||
f"- is_single_turbine_report: {is_single_turbine_report}\n"
|
||
f"- top_rings:\n{chr(10).join(ring_lines) if ring_lines else '- N/A'}\n"
|
||
f"- max_risk_log: {max_risk_log}\n"
|
||
f"- max_risk_definition: {max_risk_definition}\n"
|
||
f"- top_turbine_name: {top_turbine_name}\n"
|
||
f"- top_turbine_risk_log: {top_turbine_risk_log}\n"
|
||
f"- storm_over_turbine: {storm_over_turbine}\n"
|
||
f"- storm_near_turbine_count: {storm_near_turbine_count}\n"
|
||
f"- storm_closest_distance_km: {storm_closest_distance_km}\n"
|
||
f"- storm_over_threshold_km: {storm_over_threshold_km}\n"
|
||
f"- storm_detected: {storm_detected}\n"
|
||
f"- storm_closest_to_centroid_km: {storm_closest_to_centroid_km}\n"
|
||
+ (f"\n- storm_summary:\n{chr(10).join(storm_lines)}" if storm_lines else "\n- storm_summary: not available")
|
||
+ "\n\n"
|
||
"Requirements for the paragraph:\n"
|
||
"- Mention one key takeaway from the ring distribution (e.g., where totals are highest).\n"
|
||
"- Mention the overall lightning density (events/km²).\n"
|
||
"- If is_single_turbine_report is true: start the sentence mentioning the highest-risk turbine as \"For {top_turbine_name}, ...\"; avoid wording like \"Within the analyzed area\" and avoid verbs like \"was identified\".\n"
|
||
"- If is_single_turbine_report is false: you may use wording like \"Within the analyzed area, {top_turbine_name} was identified ...\".\n"
|
||
"- Mention the specific turbine name with the highest risk score (top_turbine_name) verbatim.\n"
|
||
"- Mention the risk category for that turbine using max_risk_definition.\n"
|
||
"- Do not refer only to the category; always associate the risk with top_turbine_name.\n"
|
||
"- Mention storm-cell interaction with the turbine when storm information is available:\n"
|
||
" - If storm_over_turbine is true: say that storm cells were very close to/over the turbine (based on centroid distance <= storm_over_threshold_km).\n"
|
||
" - If storm_over_turbine is false and storm_closest_distance_km is provided: say the closest storm cell centroid came within storm_closest_distance_km km of the turbine.\n"
|
||
" - If storms were detected (storm_detected true) but did not pass over any turbine, mention the closest storm cell's distance to the centroid (storm_closest_to_centroid_km).\n"
|
||
" - If storm_detected is false, do not mention storms at all.\n"
|
||
"- If storm_summary is available, mention total storm cells and at least one severity count.\n"
|
||
"- Round numeric values as follows (use the rounded values you are given, avoid long decimals):\n"
|
||
" - lightning density to 3 decimals (events/km²)\n"
|
||
" - log-transformed risk score(s) to 2 decimals\n"
|
||
" - distances (analysis_radius_km, storm_closest_distance_km) to 1 decimal (km)\n"
|
||
" - counts to integers\n"
|
||
"- Keep tone analytic and non-alarmist.\n"
|
||
"\n"
|
||
"Output:\n"
|
||
"One paragraph only (no bullet points, no headings)."
|
||
)
|
||
|
||
|
||
def fallback_commentary(context: dict[str, Any], language: ReportLanguage | None = None) -> str:
|
||
lang = language or get_report_language()
|
||
s = get_strings(lang)
|
||
|
||
analysis_period = context.get("analysis_period", "N/A")
|
||
analysis_radius_km = context.get("analysis_radius_km", None)
|
||
total_events = context.get("total_events", None)
|
||
total_lightning_per_km2 = context.get("total_lightning_per_km2", None)
|
||
is_single_turbine_report = context.get("is_single_turbine_report", None)
|
||
top_rings = context.get("top_rings", [])
|
||
max_risk_definition = context.get("max_risk_definition", "N/A")
|
||
top_turbine_name = context.get("top_turbine_name", "N/A")
|
||
storm_over_turbine = context.get("storm_over_turbine")
|
||
storm_closest_distance_km = context.get("storm_closest_distance_km")
|
||
storm_over_threshold_km = context.get("storm_over_threshold_km", 1.0)
|
||
storm_near_turbine_count = context.get("storm_near_turbine_count")
|
||
storm_closest_to_centroid_km = context.get("storm_closest_to_centroid_km")
|
||
|
||
outermost_ring = top_rings[-1] if top_rings else None
|
||
best_ring = top_rings[0] if top_rings else None
|
||
|
||
storm_summary = context.get("storm_summary") or {}
|
||
storm_line = ""
|
||
if isinstance(storm_summary, dict) and storm_summary:
|
||
total_cells = storm_summary.get("total_cells", 0)
|
||
severity_counts = storm_summary.get("severity_counts", {}) or {}
|
||
if severity_counts:
|
||
severity = max(severity_counts.items(), key=lambda kv: kv[1])[0]
|
||
count = severity_counts.get(severity, 0)
|
||
severity_label = s.storm_severity_names.get(str(severity).strip().lower(), str(severity).title())
|
||
if s.gemini_write_turkish:
|
||
storm_line = (
|
||
f"Toplam {total_cells} fırtına hücresi kaydedilmiş; en fazla {count} hücre {severity_label} şiddetindedir."
|
||
)
|
||
else:
|
||
storm_line = (
|
||
f"Storm data indicates {total_cells} storm cells, with the highest share in {severity_label} ({count} cells)."
|
||
)
|
||
elif s.gemini_write_turkish:
|
||
storm_line = f"Toplam {total_cells} fırtına hücresi kaydedilmiştir."
|
||
else:
|
||
storm_line = f"Storm data indicates {total_cells} storm cells."
|
||
|
||
density_txt = (
|
||
f"{total_lightning_per_km2:.3f} {s.events_per_km2}"
|
||
if isinstance(total_lightning_per_km2, (int, float))
|
||
else str(total_lightning_per_km2)
|
||
)
|
||
|
||
if s.gemini_write_turkish:
|
||
radius_txt = (
|
||
f"{analysis_radius_km:.1f} km yarıçaplı alanda"
|
||
if isinstance(analysis_radius_km, (int, float))
|
||
else "alanda"
|
||
)
|
||
events_txt = f"toplam {total_events} olay" if total_events is not None else "olay"
|
||
ring_sentence = _build_ring_distribution_sentence_tr(best_ring, outermost_ring, s)
|
||
paragraph_intro = (
|
||
f"{analysis_period} arasında {radius_txt} {events_txt} kaydedilmiştir ve "
|
||
f"yıldırım-şimşek yoğunluğu {density_txt} olarak hesaplanmıştır. "
|
||
f"{ring_sentence} "
|
||
)
|
||
turbine_sentence = (
|
||
f"{top_turbine_name} türbini için log-risk skoru {max_risk_definition} sınıfındadır. "
|
||
if is_single_turbine_report
|
||
else f"Analiz alanında en yüksek log-risk skoruna sahip türbin {top_turbine_name} olup {max_risk_definition} sınıfındadır. "
|
||
)
|
||
method_sentence = (
|
||
"Risk skoru, türbine yakınlık ve akım büyüklüğünün birleşimini yansıtır; "
|
||
"modele göre her yere indirme darbesinin katkısı mesafe arttıkça üstel olarak azalır. "
|
||
)
|
||
if storm_over_turbine:
|
||
storm_interaction_sentence = (
|
||
f"Fırtına hücrelerinden {storm_near_turbine_count} tanesinin merkezi türbine "
|
||
f"{storm_over_threshold_km:.1f} km'den yakın konumlanmıştır. "
|
||
)
|
||
elif isinstance(storm_closest_to_centroid_km, (int, float)):
|
||
storm_interaction_sentence = (
|
||
f"Tespit edilen fırtına hücreleri herhangi bir türbinin üzerine denk gelmemiş; "
|
||
f"en yakın hücre merkeze {storm_closest_to_centroid_km:.1f} km mesafeye kadar yaklaşmıştır. "
|
||
)
|
||
elif isinstance(storm_closest_distance_km, (int, float)):
|
||
storm_interaction_sentence = (
|
||
f"En yakın fırtına hücresi merkezi türbine {storm_closest_distance_km:.1f} km mesafededir. "
|
||
)
|
||
else:
|
||
storm_interaction_sentence = ""
|
||
storm_severity_sentence = storm_line
|
||
else:
|
||
radius_txt = f"within {analysis_radius_km:.1f} km" if isinstance(analysis_radius_km, (int, float)) else ""
|
||
events_txt = f"{total_events} total lightning events" if total_events is not None else "N/A"
|
||
ring_sentence = _build_ring_distribution_sentence_en(best_ring, outermost_ring, s)
|
||
paragraph_intro = (
|
||
f"For {analysis_period}, the dataset contains {events_txt} {radius_txt}, corresponding to an overall lightning density of {density_txt}. "
|
||
f"{ring_sentence} "
|
||
)
|
||
turbine_sentence = (
|
||
f"For {top_turbine_name}, the log-transformed risk score is the highest in this report and falls in the {max_risk_definition} category. "
|
||
if is_single_turbine_report
|
||
else f"Within the analyzed area, the turbine with the highest log-transformed risk score is {top_turbine_name}, which falls in the {max_risk_definition} category. "
|
||
)
|
||
method_sentence = (
|
||
f"This indicates the turbine was exposed to a combination of closer cloud-to-ground strikes and stronger current magnitudes. "
|
||
f"In the risk model, each cloud-to-ground strike contributes more when it is near the turbine and when |I| is larger, and contributions decrease exponentially with distance; the turbine risk score is the sum over all included strikes (with a log transform used for visualization). "
|
||
)
|
||
if storm_over_turbine:
|
||
storm_interaction_sentence = (
|
||
f"Storm interaction: storm-cell centroids came within {storm_over_threshold_km:.1f} km of the turbine (count={storm_near_turbine_count}). "
|
||
)
|
||
elif isinstance(storm_closest_to_centroid_km, (int, float)):
|
||
storm_interaction_sentence = (
|
||
f"Detected storm cells did not pass directly over any turbine; the nearest cell came within {storm_closest_to_centroid_km:.1f} km of the centroid. "
|
||
)
|
||
elif isinstance(storm_closest_distance_km, (int, float)):
|
||
storm_interaction_sentence = (
|
||
f"Storm interaction: the closest storm-cell centroid came within {storm_closest_distance_km:.1f} km of the turbine. "
|
||
)
|
||
else:
|
||
storm_interaction_sentence = ""
|
||
storm_severity_sentence = storm_line
|
||
|
||
return (paragraph_intro + turbine_sentence + method_sentence + storm_interaction_sentence + storm_severity_sentence).strip()
|
||
|
||
|
||
def generate_gemini_paragraph(context: dict[str, Any], api_key: str | None = None) -> str:
|
||
lang = get_report_language()
|
||
api_key_final = api_key or os.getenv("GEMINI_API_KEY")
|
||
if not api_key_final:
|
||
return fallback_commentary(context, lang)
|
||
|
||
model_name = os.getenv("GEMINI_MODEL", "gemini-1.5-flash")
|
||
prompt = build_gemini_prompt(context, lang)
|
||
|
||
try:
|
||
import google.generativeai as genai
|
||
|
||
genai.configure(api_key=api_key_final)
|
||
model = genai.GenerativeModel(model_name)
|
||
|
||
resp = model.generate_content(
|
||
prompt,
|
||
generation_config={
|
||
"temperature": 0.2,
|
||
"max_output_tokens": 220,
|
||
},
|
||
)
|
||
|
||
text = getattr(resp, "text", None) or ""
|
||
text = str(text).strip()
|
||
if not text:
|
||
return fallback_commentary(context, lang)
|
||
return text
|
||
except Exception:
|
||
return fallback_commentary(context, lang)
|