I would like to parse this json response:
我想解析这个json响应:
{
"count":2,
"next":null,
"previous":null,
"results":[
{
"id":123,
"type_vname":"Suspicious Remote Desktop",
"category":"LATERAL MOVEMENT",
"src_ip":"192.168.1.1",
"state":"fixed",
"description":null,
"t_score":70,
"c_score":70,
"first_timestamp":"2017-12-13T18:51:22Z",
"last_timestamp":"2017-12-13T18:51:22Z",
"detection_detail_set":[
{
"id":1234567,
"description":"Suspicious Remote Desktop",
"dst_host_id":1234,
"dst_ip":"192.168.1.1",
"count":null,
"count_pos":null,
"dst_dns":null,
"dst_port":80,
"dst_geo":null,
"proto":null,
"first_timestamp":"2017-12-13T18:51:22Z",
"last_timestamp":"2017-12-13T18:51:22Z",
"total_bytes_sent":null,
"total_bytes_rcvd":null,
"url":"https://192.168.1.2/api/detection_details"
},
{
"id":89123456,
"description":"Suspicious Remote Desktop",
"dst_host_id":5678,
"dst_ip":"192.168.1.1",
"count":null,
"count_pos":null,
"dst_dns":null,
"dst_port":80,
"dst_geo":null,
"proto":null,
"first_timestamp":"2017-12-13T18:50:18Z",
"last_timestamp":"2017-12-13T18:50:18Z",
"total_bytes_sent":null,
"total_bytes_rcvd":null,
"url":"https://192.168.1.2/api/detection_details"
}
],
"dns_set":[
],
"relayed_comm_set":[
],
"sensor_luid":"abc1pdj",
"summary":{
"internal_targets":1,
"anomalous_events":2,
"probable_owner":"user"
},
"host":"https://192.168.1.2/api/detection_details",
"url":"https://192.168.1.2/api/detection_details",
"tags":[
],
"targets_key_asset":false,
"triage_rule_id":null
},
{
"id":1235,
"type_vname":"Suspicious Remote Desktop",
"category":"LATERAL MOVEMENT",
"src_ip":"192.168.1.2",
"state":"fixed",
"description":null,
"t_score":70,
"c_score":70,
"first_timestamp":"2017-12-11T19:11:46Z",
"last_timestamp":"2017-12-11T19:11:46Z",
"detection_detail_set":[
{
"id":123445,
"description":"Suspicious Remote Desktop",
"dst_host_id":4958,
"dst_ip":"192.168.1.2",
"count":null,
"count_pos":null,
"dst_dns":null,
"dst_port":80,
"dst_geo":null,
"proto":null,
"first_timestamp":"2017-12-11T19:11:46Z",
"last_timestamp":"2017-12-11T19:11:46Z",
"total_bytes_sent":null,
"total_bytes_rcvd":null,
"url":"https://192.168.1.2/api/detection_details"
},
{
"id":1274857,
"description":"Suspicious Remote Desktop",
"dst_host_id":15423,
"dst_ip":"192.168.1.2",
"count":null,
"count_pos":null,
"dst_dns":null,
"dst_port":80,
"dst_geo":null,
"proto":null,
"first_timestamp":"2017-12-11T19:11:46Z",
"last_timestamp":"2017-12-11T19:11:46Z",
"total_bytes_sent":null,
"total_bytes_rcvd":null,
"url":"https://192.168.1.2/api/detection_details"
},
{
"id":137847,
"description":"Suspicious Remote Desktop",
"dst_host_id":93238,
"dst_ip":"192.168.1.2",
"count":null,
"count_pos":null,
"dst_dns":null,
"dst_port":80,
"dst_geo":null,
"proto":null,
"first_timestamp":"2017-12-11T19:10:53Z",
"last_timestamp":"2017-12-11T19:10:53Z",
"total_bytes_sent":null,
"total_bytes_rcvd":null,
"url":"https://192.168.1.2/api/detection_details"
},
{
"id":2376849874,
"description":"Suspicious Remote Desktop",
"dst_host_id":15423,
"dst_ip":"192.168.1.2",
"count":null,
"count_pos":null,
"dst_dns":null,
"dst_port":80,
"dst_geo":null,
"proto":null,
"first_timestamp":"2017-12-11T19:10:53Z",
"last_timestamp":"2017-12-11T19:10:53Z",
"total_bytes_sent":null,
"total_bytes_rcvd":null,
"url":"https://192.168.1.2/api/detection_details"
}
],
"dns_set":[
],
"relayed_comm_set":[
],
"sensor_luid":"abcery",
"summary":{
"internal_targets":1,
"anomalous_events":4,
"probable_owner":"user"
},
"host":"https://192.168.1.2/api/detection_details",
"url":"https://192.168.1.2/api/detection_details",
"tags":[
],
"targets_key_asset":false,
"triage_rule_id":null
}
]
}
To a dataframe so I can to_csv to a .csv file with the following headers for the json data:
到dataframe,这样我可以将_csv文件转换为.csv文件,json数据的头文件如下:
count
next
previous
results_id
results_type_vname
results_category
results_src_ip
results_state
results_description
results_t_score
results_c_score
results_first_timestamp
results_last_timestamp
results_dns_set
results_relayed_comm_set
results_sensor_luid
results_host
results_url
results_tags
results_targets_key_asset
results_triage_rule_id
summary_internal_targets
summary_anomalous_events
summary_probable_owner
detection_id
detection_description
detection_dst_host_id
detection_dst_ip
detection_count
detection_count_pos
detection_dst_dns
detection_dst_port
detection_dst_geo
detection_proto
detection_first_timestamp
detection_last_timestamp
detection_total_bytes_sent
detection_total_bytes_rcvd
detection_url
I have searched SO and wrote some of my own code here (json response is in 'data'):
我已经搜索了,并在这里写了一些我自己的代码(json响应在“data”中):
import pandas as pd
from pandas.io.json import json_normalize
df = pd.DataFrame(data)
df = json_normalize(data=df['results'], record_path='detection_detail_set',
meta=['category', 'id'], record_prefix='results_', errors='ignore')
df = df.head()
df.to_csv('Output.csv', index=False)
I get the following headers (with data) in the response:
我在回复中得到了如下的标题(带数据):
results_count
results_count_pos
results_description
results_dst_dns
results_dst_geo
results_dst_host_id
results_dst_ip
results_dst_port
results_first_timestamp
results_id
results_last_timestamp
results_proto
results_total_bytes_rcvd
results_total_bytes_sent
results_url
category
id
I feel like I am halfway there. I have tried several combinations and advice from other SO posts to get the remaining data. Nothing has worked thus far. I know the issue I am encountering is due to the nesting, just need to find a way to get the desired result. I appreciate your help!
我觉得我已经走到一半了。我尝试了几个组合和其他SO post的建议来获取剩余的数据。到目前为止,还没有任何工作。我知道我遇到的问题是由于嵌套,只需要找到一种方法来获得所需的结果。我感谢你的帮助!
1 个解决方案
#1
0
Seems like the right idea, just need to merge the results
layer with the unpacked detection
layer:
似乎是正确的想法,只需将结果层与未填充的检测层合并:
results = (json_normalize(data=df["results"], errors="ignore")
.drop("detection_detail_set", 1)
.add_prefix("results_"))
results.columns = results.columns.str.replace("results_summary\\.", "results_")
detection = json_normalize(data=df['results'], meta=['category', 'id'],
record_path='detection_detail_set',
record_prefix="detection_", errors='ignore')
master = results.merge(detection, how="left",
left_on=["results_id", "results_category"],
right_on=["id", "category"])
master.columns
Index(['results_c_score', 'results_category', 'results_description',
'results_dns_set', 'results_first_timestamp', 'results_host',
'results_id', 'results_last_timestamp', 'results_relayed_comm_set',
'results_sensor_luid', 'results_src_ip', 'results_state',
'results_anomalous_events', 'results_internal_targets',
'results_probable_owner', 'results_t_score', 'results_tags',
'results_targets_key_asset', 'results_triage_rule_id',
'results_type_vname', 'results_url', 'detection_count',
'detection_count_pos', 'detection_description', 'detection_dst_dns',
'detection_dst_geo', 'detection_dst_host_id', 'detection_dst_ip',
'detection_dst_port', 'detection_first_timestamp', 'detection_id',
'detection_last_timestamp', 'detection_proto',
'detection_total_bytes_rcvd', 'detection_total_bytes_sent',
'detection_url', 'category', 'id'],
dtype='object')
#1
0
Seems like the right idea, just need to merge the results
layer with the unpacked detection
layer:
似乎是正确的想法,只需将结果层与未填充的检测层合并:
results = (json_normalize(data=df["results"], errors="ignore")
.drop("detection_detail_set", 1)
.add_prefix("results_"))
results.columns = results.columns.str.replace("results_summary\\.", "results_")
detection = json_normalize(data=df['results'], meta=['category', 'id'],
record_path='detection_detail_set',
record_prefix="detection_", errors='ignore')
master = results.merge(detection, how="left",
left_on=["results_id", "results_category"],
right_on=["id", "category"])
master.columns
Index(['results_c_score', 'results_category', 'results_description',
'results_dns_set', 'results_first_timestamp', 'results_host',
'results_id', 'results_last_timestamp', 'results_relayed_comm_set',
'results_sensor_luid', 'results_src_ip', 'results_state',
'results_anomalous_events', 'results_internal_targets',
'results_probable_owner', 'results_t_score', 'results_tags',
'results_targets_key_asset', 'results_triage_rule_id',
'results_type_vname', 'results_url', 'detection_count',
'detection_count_pos', 'detection_description', 'detection_dst_dns',
'detection_dst_geo', 'detection_dst_host_id', 'detection_dst_ip',
'detection_dst_port', 'detection_first_timestamp', 'detection_id',
'detection_last_timestamp', 'detection_proto',
'detection_total_bytes_rcvd', 'detection_total_bytes_sent',
'detection_url', 'category', 'id'],
dtype='object')