ucsd-hep-ex · xoqhdgh1002 · Oct 9, 2024 · Oct 22, 2024 · Nov 4, 2024 · Nov 4, 2024
diff --git a/.gitignore b/.gitignore
@@ -1,2 +1,4 @@
 *~
-*.pyc
+*.pyc
+inputs/
+results/
diff --git a/DataGenerator.py b/DataGenerator.py
@@ -2,8 +2,6 @@
 import tensorflow
 import tensorflow.keras as keras
 import numpy as np
-import uproot
-import awkward as ak
 from utils import convertXY2PtPhi, preProcessing, to_np_array
 import h5py
 import os
@@ -14,10 +12,11 @@ class DataGenerator(tensorflow.keras.utils.Sequence):
     'Generates data for Keras'
 
     def __init__(self, list_files, batch_size=1024, n_dim=100, maxNPF=100, compute_ef=0,
-                 max_entry=100000000, edge_list=[]):
+                 max_entry=100000000, edge_list=[], n_features_pf_cat=2, feature_mode='full'):
         'Initialization'
         self.n_features_pf = 6
-        self.n_features_pf_cat = 2
+        self.n_features_pf_cat = n_features_pf_cat
+        self.feature_mode = feature_mode
         self.normFac = 1.
         self.batch_size = batch_size
         self.n_dim = n_dim
@@ -105,6 +104,12 @@ def mass2_calc(self, pi, pj):
         m2 = pij[:, :, 0]**2 - pij[:, :, 1]**2 - pij[:, :, 2]**2 - pij[:, :, 3]**2
         return m2
 
+    def __getstate__(self):
+        """Close file handles before pickling (required for multiprocessing workers)."""
+        state = self.__dict__.copy()
+        state['open_files'] = [None] * len(self.h5files)
+        return state
+
     def __data_generation(self, unique_files, starts, stops):
         'Generates data containing batch_size samples'
         # X : (n_samples, n_dim, n_channels)
@@ -114,18 +119,21 @@ def __data_generation(self, unique_files, starts, stops):
 
         # Generate data
         for ifile, start, stop in zip(unique_files, starts, stops):
-            self.X, self.y = self.__get_features_labels(ifile, start, stop)
-            Xs.append(self.X)
-            ys.append(self.y)
+            X, y = self.__get_features_labels(ifile, start, stop)
+            Xs.append(X)
+            ys.append(y)
 
         # Stack data if going over multiple files
         if len(unique_files) > 1:
-            self.X = np.concatenate(Xs, axis=0)
-            self.y = np.concatenate(ys, axis=0)
+            X = np.concatenate(Xs, axis=0)
+            y = np.concatenate(ys, axis=0)
+        else:
+            X = Xs[0]
+            y = ys[0]
 
         # process inputs
-        Y = self.y / (self.normFac)#(-self.normFac)
-        Xi, Xp, Xc1, Xc2 = preProcessing(self.X, self.normFac)
+        Y = y / self.normFac
+        Xi, Xp, Xc1, Xc2 = preProcessing(X, self.normFac, feature_mode=self.feature_mode)
 
         N = self.maxNPF
         Nr = N*(N-1)
@@ -166,7 +174,7 @@ def __data_generation(self, unique_files, starts, stops):
                 edge_stack.append(m2)
             ef = np.stack(edge_stack, axis=-1)
 
-            Xc = [Xc1, Xc2]
+            Xc = [Xc1, Xc2][:self.n_features_pf_cat]
             # dimension parameter for keras model
             self.emb_input_dim = {i: int(np.max(Xc[i][0:1000])) + 1 for i in range(self.n_features_pf_cat)}
 
@@ -177,7 +185,7 @@ def __data_generation(self, unique_files, starts, stops):
 
         #TODO: modify for new features
         else:
-            Xc = [Xc1, Xc2]
+            Xc = [Xc1, Xc2][:self.n_features_pf_cat]
             # dimension parameter for keras model
             self.emb_input_dim = {i: int(np.max(Xc[i][0:1000])) + 1 for i in range(self.n_features_pf_cat)}
 
@@ -200,9 +208,7 @@ def __get_features_labels(self, ifile, entry_start, entry_stop):
 
         if self.maxNPF < 100:
             order = X[:, :, 0].argsort(axis=1)[:, ::-1]
-            shape = np.shape(X)
-            for x in range(shape[0]):
-                X[x, :, :] = X[x, order[x], :]
+            X = X[np.arange(X.shape[0])[:, None], order, :]
             X = X[:, 0:self.maxNPF, :]
 
         return X, y
diff --git a/PROJECT_GUIDE.md b/PROJECT_GUIDE.md
@@ -0,0 +1,72 @@
+# 🛰️ FPGA-Native L1 Trigger MET ML Guide
+
+이 가이드는 CMS 실험의 **L1 트리거** 환경에서 동작할 **MET Reconstruction 머신러닝 모델**의 학습 파이프라인과 구조를 설명합니다. 이 프로젝트는 단순한 AI 모델을 넘어, 나노초(ns) 단위의 초저지연 하드웨어(FPGA) 탑재를 목표로 설계되었습니다.
+
+---
+
+## 🌳 1. 프로젝트 아키텍처 (숲 보기: 데이터 흐름)
+
+이 프로젝트는 대용량 입자 데이터를 읽어 물리 법칙을 따르는 모델을 학습시키는 **'지능형 공정 라인'**과 같습니다.
+
+### [데이터 파이프라인 모식도]
+```text
+[ inputs/ ] (HDF5 원재료)
+     │
+     ▼
+[ DataGenerator.py ] (데이터 셔틀) ──▶ [ utils.py ] (좌표 변환 & 가공)
+     │                                      (pt, phi -> px, py)
+     ▼
+[ train.py ] (공장장: 전체 조율) ◀───▶ [ configs/*.yaml ] (레시피)
+     │
+     ├──▶ [ models.py ] (설계도: 신경망 구조 빌드)
+     │
+     ├──▶ [ loss/loss.py ] (검수관: 물리적 오차 채점)
+     │
+     └──▶ [ results/ ] (최종 출하: 성능 보고서 & 그래프)
+```
+
+- **상호작용 핵심**: `train.py`가 공장장으로서 레시피(`yaml`)를 읽고, 셔틀(`DataGenerator`)로부터 재료를 받아 설계도(`models.py`)대로 모델을 찍어내어 검수(`loss`)하는 구조입니다.
+
+---
+
+## 🌿 2. 모델의 핵심 구조 (나무 보기: `graph_embedding`)
+
+가장 고도화된 모델인 **그래프 신경망(GNN)**은 입자들을 서로 연결된 '사회적 관계'로 파악하여 에너지를 계산합니다.
+
+### [연산 단계별 직관적 이해]
+
+1.  **입력 (Input)**: 입자 클라우드 (Particle Cloud)
+    *   **개별 정보**: 위치($\eta$), 신뢰도(PuppiWeight).
+    *   **정체성**: PDG ID(입자 종류) → **Embedding** (추상적 특징 추출).
+    *   **물리적 베이스**: 입자의 원래 운동량($p_x, p_y$).
+
+2.  **중간 연산 (Message Passing)**: "주변 탐색"
+    *   입자 A가 주변 입자 B, C에게 정보를 묻습니다. ("너는 에너지가 얼마나 되니?")
+    *   인접 행렬(Adjacency Matrix)을 통해 모든 입자가 서로 정보를 교환하며 자신의 상태를 업데이트합니다.
+
+3.  **최종 출력 (Output)**: 보정 가중치($w$)
+    *   각 입자별로 **"이 입자의 운동량을 얼마나 믿고 반영할 것인가"**에 대한 0~1 사이의 가중치를 계산합니다.
+    *   **물리 결합**: $\sum (가중치 \times 원본 p_x, p_y)$ 과정을 통해 최종 MET 벡터를 도출합니다.
+
+---
+
+## ⚙️ 3. 학습 파이프라인 (동작 원리 및 하드웨어 제약)
+
+FPGA는 자원이 한정적이고 소수점 연산에 약합니다. 이를 극복하기 위해 다음의 특수 전략이 적용됩니다.
+
+### ① 물리적 제약 기반 가중치 학습 (`t_mode=1`)
+*   **원리**: 모델이 0부터 숫자를 만드는 게 아니라, 기존 물리 계산 결과($\sum p$)에 **보정치**만 곱하는 방식입니다.
+*   **목적**: 학습 속도를 높이고, 하드웨어에서 동작할 때 물리적으로 '말도 안 되는' 결과가 나오지 않게 안전장치를 거는 것입니다.
+
+### ② 양자화 전략 (Quantization with Qkeras)
+*   **문제**: FPGA는 32비트 소수점 연산이 매우 무겁습니다.
+*   **해결**: `Qkeras`를 사용하여 학습 단계에서부터 데이터를 **7비트 정수(Fixed-point)** 등으로 깎아서 학습합니다.
+*   **효과**: 정확도는 유지하면서 FPGA 자원 소모량을 1/4 이하로 줄이고 연산 속도를 극대화합니다.
+
+### ③ 고속 연산 최적화 (Memory & LR)
+*   **Memory Caching**: 270GB+ 여유 RAM에 전처리된 데이터를 통째로 올려 IO 병목을 제거했습니다. (에폭당 3분 → 수 초로 단축)
+*   **Cyclical LR**: 학습률을 파도처럼 변화시켜 모델이 더 정밀한 최솟값을 찾도록 유도합니다. (가벼운 모델의 한계 극복)
+
+---
+
+**결론**: 이 레포지토리는 **물리적 상식**을 신경망에 주입하고, 이를 **FPGA가 처리 가능한 초경량 형태**로 변환하기 위한 최적의 학습 환경을 제공합니다.
diff --git a/PROJECT_OVERVIEW.md b/PROJECT_OVERVIEW.md
@@ -0,0 +1,102 @@
+# 🚀 L1METML Project Deep Dive
+
+이 프로젝트는 CMS 실험의 **L1 트리거(Level-1 Trigger)** 환경에서 머신러닝을 활용해 **결손 가로 에너지(MET)**를 초고속·고정밀로 재구성하는 시스템입니다.
+
+---
+
+## 🏗️ 1. 시스템 아키텍처 (System Architecture)
+
+모델의 내부 구조와 데이터 흐름을 시각화한 모식도입니다.
+
+```mermaid
+graph TD
+    subgraph Input_Layer [입력 데이터]
+        A[Continuous Features<br/>eta, puppiWeight] --> D[Concatenate]
+        B[Categorical Features<br/>pdgId] --> C[Embedding Layer]
+        C --> D
+        P[Raw Momentum<br/>px, py]
+    end
+
+    subgraph Feature_Extraction [특징 추출 - Dense Blocks]
+        D --> E[Dense Layer 1: 64]
+        E --> F[Batch Norm + Tanh]
+        F --> G[Dense Layer 2: 32]
+        G --> H[Batch Norm + Tanh]
+        H --> I[Dense Layer 3: 8]
+    end
+
+    subgraph Physics_Logic [물리 로직 - Weighting]
+        I --> J[met_weight Layer]
+        J --> K[BN: met_weight_minus_one]
+        K --> L{Multiply}
+        P --> L
+    end
+
+    subgraph Output [최종 결과]
+        L --> M[Global Sum / Pooling]
+        M --> N[Predicted MET_x, MET_y]
+    end
+
+    style K fill:#f9f,stroke:#333,stroke-width:2px
+    style L fill:#bbf,stroke:#333,stroke-width:2px
+```
+
+---
+
+## 📊 2. 데이터 구조 상세 (Data Specification)
+
+모델이 입자를 인식하는 방식은 크게 두 가지로 나뉩니다.
+
+| 분류 | 특징 (Features) | 처리 방식 | 역할 |
+| :--- | :--- | :--- | :--- |
+| **연속형 (Cont.)** | $\eta$, PuppiWeight | Scaling & Concatenation | 입자의 기하학적 위치와 신뢰도 제공 |
+| **범주형 (Cat.)** | PDG ID (입자 종류) | **Embedding (8-dim)** | 입자의 고유 특성(질량, 전하 등) 추상화 |
+| **원시 물리량** | $p_x, p_y$ | **Direct Pass-through** | 최종 에너지 합산의 베이스라인 |
+
+---
+
+## ⚙️ 3. 핵심 알고리즘 설명
+
+### ① DeepMET 방식의 가중치 학습
+이 모델은 MET 값을 직접 예측하지 않습니다. 대신 **"각 입자의 운동량을 얼마나 믿을 것인가($w$)"**를 학습합니다.
+*   **수식**: $\vec{MET}_{pred} = \sum_{i=1}^{N} w_i \cdot \vec{p}_{i, raw}$
+*   **이점**: $w=1$이면 기존의 물리적 합산과 동일해집니다. 모델은 여기서부터 미세한 보정값만 찾으면 되므로 학습이 매우 빠르고 물리적으로 안정적입니다.
+
+### ② 하드웨어 지향 설계 (FPGA/L1 Trigger)
+*   **Low Latency**: 복잡한 RNN이나 Transformer 대신 가벼운 Dense 레이어를 사용하여 수십 나노초 내에 연산이 가능하도록 설계되었습니다.
+*   **Quantization (Qkeras)**: 향후 `qkeras`를 통해 비트 수를 줄여(예: 8-bit) 하드웨어 자원 소모를 최소화할 수 있는 구조를 갖추고 있습니다.
+
+---
+
+## 🔄 4. 전체 워크플로우 (End-to-End Workflow)
+
+1.  **Data Prep**: `convertNanoToHDF5.py`를 통해 ROOT 파일을 H5 포맷으로 변환.
+2.  **Config**: `configs/eta_puppi_pdgid.yaml`에서 실험 파라미터 설정.
+3.  **Generator**: `DataGenerator.py`가 데이터를 CPU에서 읽고 전처리(`preProcessing`).
+4.  **Acceleration**: `train.py`에서 `.cache()`를 이용해 전처리된 데이터를 RAM에 상주시켜 IO 병목 제거.
+5.  **Training**: `custom_loss.py`의 물리적 손실 함수를 기반으로 가중치 최적화.
+6.  **Evaluation**: `utils.py`의 `MakePlots`를 호출하여 해상도(Resolution) 및 반응성(Response) 그래프 생성.
+
+---
+
+## 📉 5. 손실 함수 (Loss Function)의 물리적 의미
+
+단순한 MSE(Mean Squared Error) 외에 물리적 특성을 고려한 **Custom Loss**를 사용합니다.
+*   **MSE/MAE Weight**: 예측값과 실제값 사이의 거리 최소화.
+*   **Symmetry Penalty**: $MET_x$와 $MET_y$의 예측 편향이 생기지 않도록 대칭성 유지.
+*   **Response Correction**: 에너지 측정값이 한쪽으로 치우치지 않도록 보정 유도.
+
+---
+
+## 🚀 6. 프로젝트 실행 가이드
+
+```bash
+# 1. 환경 설정 (Conda/L1METML 환경)
+./conda_setup.sh
+
+# 2. 학습 실행 (메모리 캐싱 적용 버전)
+python train.py --config configs/eta_puppi_pdgid.yaml
+
+# 3. 결과 확인
+# results/eta_puppi_pdgid/ 폴더 내 png 파일들 확인
+```
diff --git a/configs/eta_puppi_pdgid.yaml b/configs/eta_puppi_pdgid.yaml
@@ -0,0 +1,52 @@
+callbacks:
+  learning_rate:
+    type: cyclical
+  cyclical_lr:
+    base_lr: 0.0003
+    max_lr: 0.001
+    mode: triangular2
+  early_stopping:
+    monitor: val_loss
+    patience: 10
+data:
+  compute_edge_feat: 0
+  edge_features: []
+  feature_mode: eta_puppi_pdgid
+  maxNPF: 128
+  n_features_pf: 4        # 2 continuous (eta, puppi) + 2 for pxpy = 4 total
+  n_features_pf_cat: 1    # only pdgId
+  normFac: 100
+  preprocessed: true
+loss:
+  baseline_loss: true
+  mse_weight: 1.0
+  mae_weight: 1.0
+  add_respcorr: false
+  use_symmetry: false
+  symmetry_weight: 1.0
+model:
+  activation: tanh
+  emb_out_dim: 8
+  type: dense_embedding
+  units:
+  - 64
+  - 32
+  - 8
+  with_bias: false
+optimizer:
+  clipnorm: 1.0
+  learning_rate: 1.0
+  type: adam
+paths:
+  input: /cms/ldap_home/taebh/ml_workspace/projects/L1METML/inputs
+  output: /cms/ldap_home/taebh/ml_workspace/projects/L1METML/results/eta_puppi_pdgid
+quantization:
+  enabled: false
+  int_bits: 2
+  total_bits: 7
+training:
+  batch_size: 16384
+  epochs: 200
+  mode: 1
+  normFac: 100
+  workflow_type: dataGenerator
diff --git a/configs/eta_puppi_pdgid_v2.yaml b/configs/eta_puppi_pdgid_v2.yaml
@@ -0,0 +1,53 @@
+callbacks:
+  learning_rate:
+    type: cyclical
+  cyclical_lr:
+    base_lr: 0.0003
+    max_lr: 0.001
+    mode: triangular2
+  early_stopping:
+    monitor: val_loss
+    patience: 20
+data:
+  compute_edge_feat: 0
+  edge_features: []
+  feature_mode: full
+  maxNPF: 128
+  n_features_pf: 6        # full: pt, eta, phi, puppi + 2 for pxpy = 6 total
+  n_features_pf_cat: 2    # pdgId + charge
+  normFac: 100
+  preprocessed: true
+loss:
+  baseline_loss: true
+  mse_weight: 1.0
+  mae_weight: 1.0
+  add_respcorr: true
+  respcorr_factor: 1000
+  use_symmetry: false
+  symmetry_weight: 1.0
+model:
+  activation: tanh
+  emb_out_dim: 16
+  type: dense_embedding
+  units:
+  - 128
+  - 64
+  - 32
+  with_bias: true
+optimizer:
+  clipnorm: 1.0
+  learning_rate: 1.0
+  type: adam
+paths:
+  input: /cms/ldap_home/taebh/ml_workspace/projects/L1METML/inputs
+  output: /cms/ldap_home/taebh/ml_workspace/projects/L1METML/results/eta_puppi_pdgid_v2
+quantization:
+  enabled: false
+  int_bits: 2
+  total_bits: 7
+training:
+  batch_size: 16384
+  epochs: 200
+  mode: 1
+  normFac: 100
+  workflow_type: dataGenerator