Add machine type and gcsfuse version in bigquery schema

vipnydav · vipnydav · commit edb99bc49b5d · 2025-05-29T11:43:15.000Z
diff --git a/perf-benchmarking-for-releases/requirements.txt b/perf-benchmarking-for-releases/requirements.txt
@@ -1 +1,2 @@
 google-cloud-bigquery
+requests
diff --git a/perf-benchmarking-for-releases/run-benchmarks.sh b/perf-benchmarking-for-releases/run-benchmarks.sh
@@ -93,15 +93,15 @@ cleanup() {
     # Delete VM if it exists
     if gcloud compute instances describe "${VM_NAME}" --zone="${VM_ZONE}" --project="${PROJECT_ID}" >/dev/null 2>&1; then
         echo "Deleting VM: ${VM_NAME}"
-        gcloud compute instances delete "${VM_NAME}" --zone="${VM_ZONE}" --project="${PROJECT_ID}" --delete-disks=all -q >/dev/null
+        gcloud compute instances delete "${VM_NAME}" --zone="${VM_ZONE}" --project="${PROJECT_ID}" --delete-disks=all -q >/dev/null 2>&1
     else
         echo "VM '${VM_NAME}' not found; skipping deletion."
     fi
 
     # Delete GCS bucket with test data if it exists
     if gcloud storage buckets list --project="${PROJECT_ID}" --filter="name:(${GCS_BUCKET_WITH_FIO_TEST_DATA})" --format="value(name)" | grep -q "^${GCS_BUCKET_WITH_FIO_TEST_DATA}$"; then
         echo "Deleting GCS bucket: ${GCS_BUCKET_WITH_FIO_TEST_DATA}"
-        gcloud storage rm -r "gs://${GCS_BUCKET_WITH_FIO_TEST_DATA}" -q >/dev/null
+        gcloud storage rm -r "gs://${GCS_BUCKET_WITH_FIO_TEST_DATA}" -q >/dev/null 2>&1
     else
         echo "Bucket '${GCS_BUCKET_WITH_FIO_TEST_DATA}' not found; skipping deletion."
     fi
@@ -166,7 +166,7 @@ gcloud compute instances create "${VM_NAME}" \
     --network-interface=network-tier=PREMIUM,nic-type=GVNIC \
     --scopes=https://www.googleapis.com/auth/cloud-platform,https://www.googleapis.com/auth/devstorage.read_write \
     --network-performance-configs=total-egress-bandwidth-tier=TIER_1 \
-    --metadata GCSFUSE_VERSION="${GCSFUSE_VERSION}",GCS_BUCKET_WITH_FIO_TEST_DATA="${GCS_BUCKET_WITH_FIO_TEST_DATA}",RESULTS_BUCKET_NAME="${RESULTS_BUCKET_NAME}",LSSD_ENABLED="${LSSD_ENABLED}" \
+    --metadata GCSFUSE_VERSION="${GCSFUSE_VERSION}",GCS_BUCKET_WITH_FIO_TEST_DATA="${GCS_BUCKET_WITH_FIO_TEST_DATA}",RESULTS_BUCKET_NAME="${RESULTS_BUCKET_NAME}",LSSD_ENABLED="${LSSD_ENABLED}",MACHINE_TYPE="${MACHINE_TYPE}" \
     --metadata-from-file=startup-script=starter-script.sh \
     ${VM_LOCAL_SSD_ARGS}
 echo "VM created. Benchmarks will run on the VM."
diff --git a/perf-benchmarking-for-releases/starter-script.sh b/perf-benchmarking-for-releases/starter-script.sh
@@ -58,11 +58,6 @@ BENCHMARK_LOG_FILE="/tmp/benchmark_run.log"
 exec > >(tee -a "$BENCHMARK_LOG_FILE") 2>&1
 
 cleanup() {
-    # Unmount GCSFuse mount point
-    if mount | grep -q "$MNT"; then
-        sudo umount "$MNT" || echo "Failed to unmount $MNT"
-    fi
-
     # Upload logs and details
     if [[ -f details.txt ]]; then
         gcloud storage cp details.txt "$RESULT_PATH" || echo "Failed to upload details.txt"
@@ -159,10 +154,6 @@ if [[ "$LSSD_ENABLED" == "true" ]]; then
     }
 fi
 
-# Mount GCS bucket using gcsfuse
-mkdir -p "$MNT"
-"$GCSFUSE_BIN" --implicit-dirs "$GCS_BUCKET_WITH_FIO_TEST_DATA" "$MNT"
-
 # Clone the tools repo for uploading fio results to bigquery
 git clone --single-branch --branch fio-to-bigquery https://github.com/GoogleCloudPlatform/gcsfuse-tools.git
 
@@ -177,10 +168,19 @@ for fio_job_file in "$FIO_JOB_DIR"/*.fio; do
 
     sudo sh -c "echo 3 > /proc/sys/vm/drop_caches"
 
-    RESULT_FILE="gcsfuse-${job_name}-benchmark-$(date +%Y%m%d%H%M%S).json"
+    # Mount GCS bucket
+    mkdir -p "$MNT"
+    "$GCSFUSE_BIN" --implicit-dirs "$GCS_BUCKET_WITH_FIO_TEST_DATA" "$MNT"
+
+    RESULT_FILE="gcsfuse-${job_name}-benchmark.json"
 
     DIR="$MNT" fio "$fio_job_file" --output-format=json --output="$RESULT_FILE"
 
+    # Unmount the bucket after each run
+    if mount | grep -q "$MNT"; then
+        sudo umount "$MNT" || echo "Failed to unmount $MNT"
+    fi
+
     # Upload result to BigQuery (default project/dataset/table are built into the script)
     python3 gcsfuse-tools/perf-benchmarking-for-releases/upload-fio-output-to-bigquery.py \
       --result-file "$RESULT_FILE"
diff --git a/perf-benchmarking-for-releases/upload-fio-output-to-bigquery.py b/perf-benchmarking-for-releases/upload-fio-output-to-bigquery.py
@@ -12,6 +12,22 @@
 
 args = parser.parse_args()
 
+import requests
+
+def fetch_metadata(attribute):
+    url = f"http://metadata.google.internal/computeMetadata/v1/instance/attributes/{attribute}"
+    headers = {"Metadata-Flavor": "Google"}
+    try:
+        response = requests.get(url, headers=headers, timeout=5)
+        response.raise_for_status()
+        return response.text
+    except Exception as e:
+        print(f"Failed to fetch metadata attribute '{attribute}': {e}")
+        return "unknown"
+
+machine_type = fetch_metadata("MACHINE_TYPE")
+gcsfuse_version = fetch_metadata("GCSFUSE_VERSION")
+
 # Load the results file
 with open(args.result_file) as f:
     try:
@@ -36,14 +52,16 @@
 # Create table if it doesn't exist
 schema = [
     bigquery.SchemaField("job_name", "STRING"),
+    bigquery.SchemaField("gcsfuse_version", "STRING"),
+    bigquery.SchemaField("machine_type", "STRING"),
     bigquery.SchemaField("start_time", "TIMESTAMP"),
     bigquery.SchemaField("file_size", "STRING"),
     bigquery.SchemaField("block_size", "STRING"),
     bigquery.SchemaField("nrfiles", "INTEGER"),
     bigquery.SchemaField("read_bandwidth_MiBps", "FLOAT"),
     bigquery.SchemaField("write_bandwidth_MiBps", "FLOAT"),
     bigquery.SchemaField("IOPS", "FLOAT"),
-    bigquery.SchemaField("duration_seconds", "FLOAT"),
+    bigquery.SchemaField("avg_latency_ms", "FLOAT"),
 ]
 
 try:
@@ -59,31 +77,45 @@
 rows = []
 for job in data.get("jobs", []):
     jobname = job.get("jobname")
-    # Correctly access job options using .get() for nested keys
     job_options = job.get("job options", {})
 
-    # Use get with a default value for each option and handle string conversion
-    file_size = job_options.get("filesize", data.get("global options",{}).get("filesize", "unknown"))
-    block_size = job_options.get("bs", data.get("global options",{}).get("bs", "unknown"))
-    
-    # Convert nrfiles to int, handle missing values and potential string values
-    nrfiles_str = job_options.get("nrfiles", data.get("global options",{}).get("nrfiles"))
+    file_size = job_options.get("filesize", data.get("global options", {}).get("filesize", "unknown"))
+    block_size = job_options.get("bs", data.get("global options", {}).get("bs", "unknown"))
+
+    nrfiles_str = job_options.get("nrfiles", data.get("global options", {}).get("nrfiles"))
     nrfiles = int(nrfiles_str) if nrfiles_str and isinstance(nrfiles_str, str) and nrfiles_str.isdigit() else 0
 
-    read_bw = job.get("read", {}).get("bw_bytes", 0) / (1024 * 1024)
-    write_bw = job.get("write", {}).get("bw_bytes", 0) / (1024 * 1024)
-    iops = job.get("read", {}).get("iops", 0.0) + job.get("write", {}).get("iops", 0.0)
+    read = job.get("read", {})
+    write = job.get("write", {})
+
+    read_bw = read.get("bw_bytes", 0) / (1024 * 1024)
+    write_bw = write.get("bw_bytes", 0) / (1024 * 1024)
+    iops = read.get("iops", 0.0) + write.get("iops", 0.0)
+
+    read_lat_ns = read.get("lat_ns", {}).get("mean")
+    write_lat_ns = write.get("lat_ns", {}).get("mean")
+
+    if read_lat_ns is not None and write_lat_ns is not None:
+        avg_latency_ms = ((read_lat_ns + write_lat_ns) / 2) / 1_000_000
+    elif read_lat_ns is not None:
+        avg_latency_ms = read_lat_ns / 1_000_000
+    elif write_lat_ns is not None:
+        avg_latency_ms = write_lat_ns / 1_000_000
+    else:
+        avg_latency_ms = 0.0
 
     rows.append({
         "job_name": jobname,
+        "gcsfuse_version": gcsfuse_version,
+        "machine_type": machine_type,
         "start_time": start_time,
         "file_size": file_size,
         "block_size": block_size,
         "nrfiles": nrfiles,
         "read_bandwidth_MiBps": read_bw,
         "write_bandwidth_MiBps": write_bw,
         "IOPS": iops,
-        "duration_seconds": job.get("job_runtime", 0) / 1000,
+        "avg_latency_ms": avg_latency_ms,
     })
 
 # Insert rows