pacman::p_load(jsonlite, tidygraph, ggraph, visNetwork, graphlayouts, ggforce, tidyr, dplyr,skimr, tidytext, tidyverse, scales, wordcloud, tm, treemap,lubridate,ggplot2,visNetwork,igraph,reshape2)Untitled
mc3_data <- fromJSON("mc3.json")
mc3_edges <- as_tibble(mc3_data$link)
mc3_nodes <- as_tibble(mc3_data$nodes)
mc3_nodes_selected <- mc3_nodes %>%
select(node_type = type, node_id = id)
# 创建 nodes_Person 表
nodes_Person <- mc3_nodes_selected %>%
filter(node_type == "Entity.Person") %>%
mutate(style = "Person")
# 创建 nodes_Company 表
nodes_Company <- mc3_nodes_selected %>%
filter(node_type != "Entity.Person") %>%
mutate(style = "Company")library(dplyr)
mc3_nodes_selected <- mc3_nodes %>%
select(node_type = type, node_id = id)
# 给每个数据框添加来源标识,并使用style列
nodes_Person <- nodes_Person %>%
mutate(style = "Person")
nodes_Company <- nodes_Company %>%
mutate(style = "Company")
# 合并数据框
node_file <- bind_rows(nodes_Person, nodes_Company)
# 查看合并后的数据框
print(node_file)# A tibble: 60,520 × 3
node_type node_id style
<chr> <chr> <chr>
1 Entity.Person Laura Newman Person
2 Entity.Person Jillian Morales Person
3 Entity.Person Anna Bailey Person
4 Entity.Person Dawn King Person
5 Entity.Person Elizabeth Bell Person
6 Entity.Person Rose Whitney Person
7 Entity.Person Craig Holloway Person
8 Entity.Person Daniel Marquez Person
9 Entity.Person Kayla Boone Person
10 Entity.Person Eric Montes Person
# ℹ 60,510 more rows
library(dplyr)
# 给每个数据框添加来源标识,并使用style列
nodes_Person <- nodes_Person %>%
mutate(style = "Person")
nodes_Company <- nodes_Company %>%
mutate(style = "Company")
# 合并数据框
node_file <- bind_rows(nodes_Person, nodes_Company)
# 创建 mc3_source_target 数据框
mc3_edges <- as_tibble(mc3_data$link)
mc3_edges_add <- mc3_edges %>%
mutate(
start_date = as.Date(start_date, format = "%Y-%m-%d"),
end_date = as.Date(ifelse(is.na(end_date), "2035-12-12", as.character(end_date)), format = "%Y-%m-%d")
) %>%
filter(!is.na(start_date))
mc3_edges_filt <- mc3_edges_add %>% select(type, source, target, start_date, end_date)
mc3_source_target <- mc3_edges_filt %>%
select(source, target)
# 将 mc3_source_target 和 node_file 通过 source 进行左连接
result <- mc3_source_target %>%
left_join(node_file, by = c("source" = "node_id"))
# 删除包含任意 NA 值的行
result <- result %>%
drop_na()
# 查看结果数据框
print(result)# A tibble: 75,727 × 4
source target node_type style
<chr> <chr> <chr> <chr>
1 Avery Inc Allen, Nichols and Thompson Entity.Or… Comp…
2 Berger-Hayes Jensen, Morris and Downs Entity.Or… Comp…
3 Bowers Group Barnett Inc Entity.Or… Comp…
4 Bowman-Howe Bennett Ltd Entity.Or… Comp…
5 Boyd and Sons Armstrong, Bennett and Deleon Entity.Or… Comp…
6 Brown-Johnson Alexander-Wagner Entity.Or… Comp…
7 Burke, Hernandez and Alexander Becker-Mckinney Entity.Or… Comp…
8 Cain, Rodriguez and Gonzales Beck-Green Entity.Or… Comp…
9 Choi, Ortiz and Goodwin Barton, King and Hall Entity.Or… Comp…
10 Cochran Ltd Crosby Ltd Entity.Or… Comp…
# ℹ 75,717 more rows
mc3_edges <- as_tibble(mc3_data$link)
all_end_dates_valid <- mc3_edges %>%
mutate(end_date_year = year(as_date(end_date))) %>%
summarise(all_valid = all(is.na(end_date) | end_date_year == 2035)) %>%
pull(all_valid)
mc3_edges_add <- mc3_edges %>%
mutate(end_date = na_if(end_date, "NA")) %>%
mutate(operation = as.character(ifelse(is.na(end_date), 0, 1)))
mc3_edges_filt <- mc3_edges_add %>% select(type, source, target, start_date, end_date, operation)
# 继续建立一个新的表,选出source和target列
mc3_source_target <- mc3_edges_filt %>%
select(source, target)
# 查看新建的数据框
print(mc3_source_target)# A tibble: 75,817 × 2
source target
<chr> <chr>
1 Avery Inc Allen, Nichols and Thompson
2 Berger-Hayes Jensen, Morris and Downs
3 Bowers Group Barnett Inc
4 Bowman-Howe Bennett Ltd
5 Boyd and Sons Armstrong, Bennett and Deleon
6 Brown-Johnson Alexander-Wagner
7 Burke, Hernandez and Alexander Becker-Mckinney
8 Cain, Rodriguez and Gonzales Beck-Green
9 Choi, Ortiz and Goodwin Barton, King and Hall
10 Cochran Ltd Crosby Ltd
# ℹ 75,807 more rows
library(dplyr)
# 给每个数据框添加来源标识,并使用style列
nodes_Person <- nodes_Person %>%
mutate(style = "Person")
nodes_Company <- nodes_Company %>%
mutate(style = "Company")
# 合并数据框
node_file <- bind_rows(nodes_Person, nodes_Company)
# 创建 mc3_source_target 数据框
mc3_source_target <- mc3_edges_filt %>%
select(source, target)
# 将 mc3_source_target 和 node_file 通过 source 进行左连接
result <- mc3_source_target %>%
left_join(node_file, by = c("source" = "node_id"))
# 删除包含任意 NA 值的行
result <- result %>%
drop_na()
# 查看结果数据框
print(result)# A tibble: 75,817 × 4
source target node_type style
<chr> <chr> <chr> <chr>
1 Avery Inc Allen, Nichols and Thompson Entity.Or… Comp…
2 Berger-Hayes Jensen, Morris and Downs Entity.Or… Comp…
3 Bowers Group Barnett Inc Entity.Or… Comp…
4 Bowman-Howe Bennett Ltd Entity.Or… Comp…
5 Boyd and Sons Armstrong, Bennett and Deleon Entity.Or… Comp…
6 Brown-Johnson Alexander-Wagner Entity.Or… Comp…
7 Burke, Hernandez and Alexander Becker-Mckinney Entity.Or… Comp…
8 Cain, Rodriguez and Gonzales Beck-Green Entity.Or… Comp…
9 Choi, Ortiz and Goodwin Barton, King and Hall Entity.Or… Comp…
10 Cochran Ltd Crosby Ltd Entity.Or… Comp…
# ℹ 75,807 more rows
library(dplyr)
library(tidyr)
library(visNetwork)
# 递归函数来找出所有连接的 source
find_connections <- function(target_name, result_df) {
connections <- c(target_name)
new_sources <- target_name
repeat {
new_targets <- result_df %>%
filter(target %in% new_sources) %>%
pull(source)
if (length(new_targets) == 0) break
new_sources <- setdiff(new_targets, connections)
connections <- unique(c(connections, new_sources))
}
connections
}
# 找出 SouthSeafood Express Corp 的关系网
initial_target <- "SouthSeafood Express Corp"
connections <- find_connections(initial_target, result)
# 过滤出相关数据
network_df <- result %>%
filter(target %in% connections | source %in% connections)
# 创建节点和边的数据框
nodes <- network_df %>%
select(id = source, label = source, group = style) %>%
distinct() %>%
bind_rows(
network_df %>%
select(id = target, label = target, group = style) %>%
distinct()
) %>%
distinct()
edges <- network_df %>%
select(from = source, to = target)
# 确保 nodes 中的 ID 是唯一的
nodes <- nodes %>%
group_by(id) %>%
slice(1) %>%
ungroup()
# 创建和显示网络图
visNetwork(nodes, edges) %>%
visNodes(shape = "dot", size = 10) %>%
visGroups(groupname = "Person", color = list(background = "blue", border = "darkblue")) %>%
visGroups(groupname = "Company", color = list(background = "red", border = "darkred")) %>%
visEdges(arrows = "to") %>%
visLayout(randomSeed = 42)library(dplyr)
library(tidyr)
library(visNetwork)
library(htmlwidgets)Warning: package 'htmlwidgets' was built under R version 4.3.3
# 递归函数来找出所有连接的 source
find_connections <- function(target_name, result_df) {
connections <- c(target_name)
new_sources <- target_name
repeat {
new_targets <- result_df %>%
filter(target %in% new_sources) %>%
pull(source)
if (length(new_targets) == 0) break
new_sources <- setdiff(new_targets, connections)
connections <- unique(c(connections, new_sources))
}
connections
}
# 找出 SouthSeafood Express Corp 的关系网
initial_target <- "SouthSeafood Express Corp"
connections <- find_connections(initial_target, result)
# 过滤出相关数据
network_df <- result %>%
filter(target %in% connections | source %in% connections)
# 创建节点和边的数据框
nodes <- network_df %>%
select(id = source, label = source, group = style) %>%
distinct() %>%
bind_rows(
network_df %>%
select(id = target, label = target, group = style) %>%
distinct()
) %>%
distinct()
edges <- network_df %>%
select(from = source, to = target)
# 确保 nodes 中的 ID 是唯一的
nodes <- nodes %>%
group_by(id) %>%
slice(1) %>%
ungroup()
# 创建和显示网络图
network_plot <- visNetwork(nodes, edges) %>%
visNodes(shape = "dot", size = 10) %>%
visGroups(groupname = "Person", color = list(background = "yellow", border = "orange")) %>%
visGroups(groupname = "Company", color = list(background = "lightblue", border = "blue")) %>%
visEdges(arrows = "to") %>%
visLayout(randomSeed = 42) %>%
visLegend(addNodes = list(
list(label = "Person", shape = "dot", color = list(background = "yellow", border = "orange")),
list(label = "Company", shape = "dot", color = list(background = "lightblue", border = "blue"))
), useGroups = FALSE) %>%
visOptions(highlightNearest = TRUE)
# 添加标题
network_plot <- prependContent(network_plot, htmltools::tags$h2("SouthSeafood Express Corp Relationship Network"))
# 显示网络图
network_plotSouthSeafood Express Corp Relationship Network
saveWidget(network_plot, file = "SouthSeafood_Express_Corp_Relationship_Network.html")