Water-Quality-Inference-Non-Linear-Regression/p90_arcpy_script.py at main · ecorey/Water-Quality-Inference-Non-Linear-Regression · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
"""
The goal of this script is to identify stations with significant differences between actual and predicted P90 values.

Workflow overview:
1. Reads a .csv file containing P90 predictions and modifies it.
2. Creates a table view from the .csv file.
3. Converts the table view to a point feature class.
4. Performs a spatial join between the actual and predicted P90 values.
5. Calculates the difference between actual and predicted P90 values.
6. Selects stations with significant differences and creates a new feature class.
7. Exports the selected stations to a new .csv file.
8. Confirms the completion of the process.
"""

import arcpy
import os
import pandas as pd


###################
##### PARAMS ######
###################

# path to the geodatabase
project_gdb = r"C:\Users\bengs\OneDrive\Documents\ArcGIS\Projects\P90\P90.gdb"
# path to the nn prediction .csv file after nn is run
nn_inference_file = r"C:\Users\bengs\Downloads\P90\p90_gm_predictions_2023_using_data_through_2022.csv"
# created point feature class for actual values
actual_p90_points_fc = os.path.join(project_gdb, "c2023_P90_Scores_XYTableToPoint")
# created point feature class for inferred values
inferred_p90_points_fc = "P90_Prediction_Points_From_CSV"
# created spatial join layer for the inferred values and the actual values
spatial_join_inferred_and_actual = "Spatial_Join_Actual_and_Prediction"

# create .csv file for selected stations that are considered inaccurate
innacurate_stations_csv = r"C:\Users\bengs\Downloads\P90\Selected_Station_Query.csv"

# created point feature class for selected stations
innacurate_stations_point_fc = os.path.join(project_gdb, "Innacurate_Station_Points")

# set environment
arcpy.env.overwriteOutput = True


###################
##### STEP 1 ######
###################


# read the .csv with pandas
print("Reading the .csv file...")
df = pd.read_csv(nn_inference_file)


# save the .csv as a temporary file
nn_inference_file_modified = r"C:\Users\bengs\Downloads\P90\modified_p90_predictions.csv"
df.to_csv(nn_inference_file_modified, index=False)


###################
##### STEP 2 ######
###################


# create a table view from the .csv
print("Creating table view from the .csv file...")
arcpy.management.MakeTableView(nn_inference_file_modified, "csv_view")


###################
##### STEP 3 ######
###################


# create a point feature class using the table view
print("Creating point feature class for inferred P90 values...")
arcpy.management.XYTableToPoint("csv_view",
                               os.path.join(project_gdb, inferred_p90_points_fc),
                               "Long_DD", "Lat_DD",
                               coordinate_system=arcpy.SpatialReference(4326))


###################
##### STEP 4 ######
###################


# spatial join with the new point feature class (inferred values) and the target point feature class (actual values)
print("Performing spatial join...")
spatial_join_output = os.path.join(project_gdb, spatial_join_inferred_and_actual)
arcpy.analysis.SpatialJoin(actual_p90_points_fc,
                           os.path.join(project_gdb, inferred_p90_points_fc),
                           spatial_join_output,
                           join_type="KEEP_COMMON",
                           match_option="CLOSEST")


###################
##### STEP 5 ######
###################


# create a new field called P90_DIFF and calculate the difference between the actual and predicted P90 values
print("Creating and calculating the P90_DIFF field...")
p90_diff_field_name = "P90_DIFF"
arcpy.management.AddField(spatial_join_output, p90_diff_field_name, "DOUBLE")
arcpy.management.CalculateField(spatial_join_output,
                                p90_diff_field_name,
                                "!P90! - !Predicted_P90!",
                                "PYTHON3")


###################
##### STEP 6 ######
###################


# select the inaccurate stations with a query and create a new feature class
print("Selecting the stations that are considered inaccurate...")

# query stations that are considered inaccurate
query = f"({p90_diff_field_name} > 13.9 OR {p90_diff_field_name} < -13.9) AND P90_Model_Accuracy IS NOT NULL"

# select and create a new feature class with the selected stations
arcpy.analysis.Select(spatial_join_output, innacurate_stations_point_fc, query)
print(f"Inaccurate stations point feature class created: {innacurate_stations_point_fc}")

# count the selected stations
selected_stations_count = int(arcpy.management.GetCount(innacurate_stations_point_fc)[0])
print(f"Number of stations matching query: {selected_stations_count}")


###################
##### STEP 7 ######
###################


# create a new .csv file with the selected stations
print("Creating selected stations .csv file...")

# create the .csv file from the selected stations
arcpy.conversion.TableToTable(innacurate_stations_point_fc,
                              os.path.dirname(innacurate_stations_csv),
                              os.path.basename(innacurate_stations_csv))
print(f"Inaccurate stations query results exported to: {innacurate_stations_csv}")


###################
##### STEP 8 ######
###################


# confirm completion
print("Process Completed Successfully!")