Need Help And Opinion Regarding To A School Research Study We Conducted.

import pandas as pd import numpy as np from sklearn.model_selection import train_test_split from sklearn.preprocessing import MinMaxScaler from sklearn.metrics import classification_report, accuracy_score, confusion_matrix, ConfusionMatrixDisplay, precision_recall_curve import tensorflow as tf from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout from tensorflow.keras.callbacks import EarlyStopping import matplotlib.pyplot as plt # Generate synthetic data np.random.seed(42) data = { “Temperature (°C)”: np.random.uniform(15, 45, 1000), # Ambient temperature “Irradiance (W/m²)”: np.random.uniform(100, 1200, 1000), # Solar irradiance “Voltage (V)”: np.random.uniform(280, 400, 1000), # Voltage output “Current (A)”: np.random.uniform(4, 12, 1000), # Current output } # Create DataFrame df = pd.DataFrame(data) df[“Power (W)”] = df[“Voltage (V)”] * df[“Current (A)”] df[“Fault”] = np.where((df[“Power (W)”] < 2000) | (df[“Voltage (V)”] < 320), 1, 0) # Fault criteria # Preprocess data features = [“Temperature (°C)”, “Irradiance (W/m²)”, “Voltage (V)”, “Current (A)”] target = “Fault” X = df[features] y = df[target] # Train-test split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # Scale features scaler = MinMaxScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Build ANN model model = Sequential([ Dense(128, input_dim=X_train_scaled.shape[1], activation=’relu’), Dropout(0.3), Dense(64, activation=’relu’), Dense(1, activation=’sigmoid’) # Sigmoid for binary classification ]) model.compile(optimizer=’adam’, loss=’binary_crossentropy’, metrics=[‘accuracy’]) # Early stopping early_stopping = EarlyStopping(monitor=’val_loss’, patience=5, restore_best_weights=True) # Train ANN model history = model.fit( X_train_scaled, y_train, epochs=50, batch_size=32, validation_split=0.2, verbose=1, callbacks=[early_stopping] ) # Evaluate model y_pred = (model.predict(X_test_scaled) > 0.5).astype(“int32”) print(“ANN Accuracy:”, accuracy_score(y_test, y_pred)) print(“Classification Report:n”, classification_report(y_test, y_pred)) # Confusion Matrix cm = confusion_matrix(y_test, y_pred) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=[0, 1]) disp.plot(cmap=”Blues”) plt.title(“Confusion Matrix (ANN)”) plt.show() # Precision-Recall Curve y_scores = model.predict(X_test_scaled).ravel() precision, recall, _ = precision_recall_curve(y_test, y_scores) plt.plot(recall, precision, marker=’.’, label=”ANN”) plt.title(“Precision-Recall Curve”) plt.xlabel(“Recall”) plt.ylabel(“Precision”) plt.legend() plt.show() # Plot training history plt.plot(history.history[‘accuracy’], label=’Train Accuracy’) plt.plot(history.history[‘val_accuracy’], label=’Validation Accuracy’) plt.title(“Training and Validation Accuracy (ANN)”) plt.xlabel(“Epoch”) plt.ylabel(“Accuracy”) plt.legend() plt.show() Does the synthetic data generated in this code, particularly the ranges for temperature, irradiance, voltage, and current, as well as the fault definition criteria, realistically reflect the operational parameters and fault conditions of photovoltaic systems? Could someone with expertise in photovoltaic system analysis validate whether this data and fault classification logic are appropriate and credible for use in a school research project? (Our research is about studying the effectiveness of machine learning-based photovoltaic systems for predictive maintenance). I tried implementing real-world data with this research, however with limited time and resources, I think using synthetic data would be the best option for this research.


[link] [comments]

Leave a Reply

Your email address will not be published. Required fields are marked *