-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathprocess_data.py
More file actions
193 lines (153 loc) · 8.46 KB
/
process_data.py
File metadata and controls
193 lines (153 loc) · 8.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
import json
import pandas as pd
import matplotlib.pyplot as plt
def read_json_file(file_path):
"""
Reads a JSON file and returns its content.
:param file_path: Path to the JSON file.
:return: Data contained in the JSON file.
"""
try:
with open(file_path, 'r') as f:
data = json.load(f)
print(f"Successfully read JSON file: {file_path}")
return data
except Exception as e:
print(f"Error reading {file_path}: {e}")
return None
def process_block_data(block_data):
"""
Processes block data into a DataFrame with a datetime index.
:param block_data: List of block dictionaries.
:return: DataFrame with processed block data.
"""
# Convert block data to DataFrame
df_blocks = pd.DataFrame(block_data)
# Filter the desired columns from df_blocks and make a copy to avoid SettingWithCopyWarning
columns_to_keep_blocks = ['height', 'size', 'txcount', 'time', 'mediantime', 'difficulty']
df_filtered_blocks = df_blocks[columns_to_keep_blocks].copy()
# Convert 'time' column to datetime and set as index for df_filtered_blocks
df_filtered_blocks.loc[:, 'time'] = pd.to_datetime(df_filtered_blocks['time'], unit='s')
df_filtered_blocks.set_index('time', inplace=True)
# Sort df_filtered_blocks in ascending order by the datetime index
df_filtered_blocks.sort_index(ascending=True, inplace=True)
return df_filtered_blocks
def process_transaction_data(transaction_data):
"""
Processes transaction data into a DataFrame with a datetime index.
:param transaction_data: List of transaction dictionaries.
:return: DataFrame with processed transaction data.
"""
# Convert the list of transaction dictionaries into a DataFrame
df_transactions = pd.DataFrame(transaction_data)
# Create a new DataFrame with only the desired columns for transactions and make a copy to avoid SettingWithCopyWarning
columns_to_keep_transactions = ['size', 'locktime', 'spends', 'sends', 'fee', 'blockindex', 'blocktime', 'time', 'confirmations']
df_filtered_transactions = df_transactions[columns_to_keep_transactions].copy()
# Convert 'time' column to datetime and set as index for df_filtered_transactions
df_filtered_transactions.loc[:, 'time'] = pd.to_datetime(df_filtered_transactions['time'], unit='s')
df_filtered_transactions.set_index('time', inplace=True)
# Sort df_filtered_transactions in ascending order by the datetime index
df_filtered_transactions.sort_index(ascending=True, inplace=True)
return df_filtered_transactions
def calculate_hourly_volume_and_fees(df_filtered_transactions):
"""
Calculates hourly volume and fees aggregation based on the 'sends' and 'fee' columns.
:param df_filtered_transactions: DataFrame with transaction data and datetime index.
:return: DataFrame with hourly aggregated volume and fees.
"""
# Ensure 'sends' and 'fee' columns are numeric
df_filtered_transactions['sends'] = pd.to_numeric(df_filtered_transactions['sends'], errors='coerce')
df_filtered_transactions['fee'] = pd.to_numeric(df_filtered_transactions['fee'], errors='coerce')
# Resample the data to hourly frequency and calculate the sum of 'sends' and 'fee' for each hour
df_hourly_aggregation = df_filtered_transactions.resample('H').agg({'sends': 'sum', 'fee': 'sum'})
# Rename columns for clarity
df_hourly_aggregation.rename(columns={'sends': 'hourly_volume', 'fee': 'hourly_fees'}, inplace=True)
return df_hourly_aggregation
def calculate_transactions_per_hour(df_filtered_transactions):
"""
Calculates the number of transactions per hour.
:param df_filtered_transactions: DataFrame with transaction data and datetime index.
:return: DataFrame with the count of transactions per hour.
"""
# Resample the data to hourly frequency and count the number of transactions for each hour
df_transactions_per_hour = df_filtered_transactions.resample('H').size()
# Convert the series to a DataFrame for easier handling
df_transactions_per_hour = df_transactions_per_hour.to_frame(name='transactions_per_hour')
return df_transactions_per_hour
def calculate_hourly_closing_difficulty(df_filtered_blocks):
"""
Calculates the closing difficulty level for each hour.
:param df_filtered_blocks: DataFrame with block data and datetime index.
:return: DataFrame with the closing difficulty level for each hour.
"""
# Resample the data to hourly frequency and get the last difficulty value for each hour
df_hourly_closing_difficulty = df_filtered_blocks['difficulty'].resample('H').last()
# Convert the series to a DataFrame for easier handling
df_hourly_closing_difficulty = df_hourly_closing_difficulty.to_frame(name='closing_difficulty')
return df_hourly_closing_difficulty
def plot_time_series(df_hourly_aggregation, df_transactions_per_hour, df_hourly_closing_difficulty):
"""
Plots the time series data including hourly volume, fees, number of transactions, and difficulty.
:param df_hourly_aggregation: DataFrame with hourly aggregated volume and fees.
:param df_transactions_per_hour: DataFrame with number of transactions per hour.
:param df_hourly_closing_difficulty: DataFrame with closing difficulty level for each hour.
"""
plt.figure(figsize=(15, 10))
# Plot hourly volume and fees
plt.subplot(3, 1, 1)
plt.plot(df_hourly_aggregation.index, df_hourly_aggregation['hourly_volume'], label='Hourly Volume', color='blue')
plt.plot(df_hourly_aggregation.index, df_hourly_aggregation['hourly_fees'], label='Hourly Fees', color='green')
plt.title('Hourly Transaction Volume and Fees')
plt.xlabel('Time')
plt.ylabel('Value')
plt.legend()
plt.grid(True)
# Plot number of transactions per hour
plt.subplot(3, 1, 2)
plt.bar(df_transactions_per_hour.index, df_transactions_per_hour['transactions_per_hour'], width=0.03, color='orange', label='Transactions per Hour')
plt.title('Number of Transactions per Hour')
plt.xlabel('Time')
plt.ylabel('Number of Transactions')
plt.legend()
plt.grid(True)
# Plot hourly closing difficulty
plt.subplot(3, 1, 3)
plt.plot(df_hourly_closing_difficulty.index, df_hourly_closing_difficulty['closing_difficulty'], label='Closing Difficulty', color='red')
plt.title('Hourly Closing Difficulty')
plt.xlabel('Time')
plt.ylabel('Difficulty')
plt.legend()
plt.grid(True)
# Display the plots
plt.tight_layout()
plt.show()
if __name__ == "__main__":
# File paths to the JSON files
block_data_file_path = "nexa_last_blocks.json" # Replace with your actual file path if different
transaction_data_file_path = "nexa_transactions.json" # Example filename; update with your actual timestamped file name
# Read and process block data
block_data = read_json_file(block_data_file_path)
if block_data:
df_filtered_blocks = process_block_data(block_data)
print("\nFiltered Block DataFrame with Datetime Index (Sorted in Ascending Order):")
print(df_filtered_blocks)
# Calculate hourly closing difficulty
df_hourly_closing_difficulty = calculate_hourly_closing_difficulty(df_filtered_blocks)
print("\nHourly Closing Difficulty DataFrame:")
print(df_hourly_closing_difficulty)
# Read and process transaction data
transaction_data = read_json_file(transaction_data_file_path)
if transaction_data:
df_filtered_transactions = process_transaction_data(transaction_data)
print("\nFiltered Transaction DataFrame with Datetime Index (Sorted in Ascending Order):")
print(df_filtered_transactions)
# Calculate hourly volume and fees aggregation
df_hourly_aggregation = calculate_hourly_volume_and_fees(df_filtered_transactions)
print("\nHourly Volume and Fees Aggregation DataFrame:")
print(df_hourly_aggregation)
# Calculate the number of transactions per hour
df_transactions_per_hour = calculate_transactions_per_hour(df_filtered_transactions)
print("\nNumber of Transactions per Hour DataFrame:")
print(df_transactions_per_hour)
# Plot the time series data
plot_time_series(df_hourly_aggregation, df_transactions_per_hour, df_hourly_closing_difficulty)