Untitled

pacman::p_load(jsonlite, tidygraph, ggraph, visNetwork, graphlayouts, ggforce, tidyr, dplyr,skimr, tidytext, tidyverse, scales, wordcloud, tm, treemap,lubridate,ggplot2,visNetwork,igraph,reshape2)
mc3_data <- fromJSON("mc3.json")
mc3_edges <- as_tibble(mc3_data$link)
mc3_nodes <- as_tibble(mc3_data$nodes)

mc3_nodes_selected <- mc3_nodes %>%
  select(node_type = type, node_id = id)

# 创建 nodes_Person 表
nodes_Person <- mc3_nodes_selected %>%
  filter(node_type == "Entity.Person") %>%
  mutate(style = "Person")

# 创建 nodes_Company 表
nodes_Company <- mc3_nodes_selected %>%
  filter(node_type != "Entity.Person") %>%
  mutate(style = "Company")
library(dplyr)
mc3_nodes_selected <- mc3_nodes %>%
  select(node_type = type, node_id = id)

# 给每个数据框添加来源标识,并使用style列
nodes_Person <- nodes_Person %>%
  mutate(style = "Person")

nodes_Company <- nodes_Company %>%
  mutate(style = "Company")

# 合并数据框
node_file <- bind_rows(nodes_Person, nodes_Company)

# 查看合并后的数据框
print(node_file)
# A tibble: 60,520 × 3
   node_type     node_id         style 
   <chr>         <chr>           <chr> 
 1 Entity.Person Laura Newman    Person
 2 Entity.Person Jillian Morales Person
 3 Entity.Person Anna Bailey     Person
 4 Entity.Person Dawn King       Person
 5 Entity.Person Elizabeth Bell  Person
 6 Entity.Person Rose Whitney    Person
 7 Entity.Person Craig Holloway  Person
 8 Entity.Person Daniel Marquez  Person
 9 Entity.Person Kayla Boone     Person
10 Entity.Person Eric Montes     Person
# ℹ 60,510 more rows
library(dplyr)

# 给每个数据框添加来源标识,并使用style列
nodes_Person <- nodes_Person %>%
  mutate(style = "Person")

nodes_Company <- nodes_Company %>%
  mutate(style = "Company")

# 合并数据框
node_file <- bind_rows(nodes_Person, nodes_Company)

# 创建 mc3_source_target 数据框

mc3_edges <- as_tibble(mc3_data$link)
mc3_edges_add <- mc3_edges %>%
  mutate(
    start_date = as.Date(start_date, format = "%Y-%m-%d"),
    end_date = as.Date(ifelse(is.na(end_date), "2035-12-12", as.character(end_date)), format = "%Y-%m-%d")
  ) %>%
  filter(!is.na(start_date))
mc3_edges_filt <- mc3_edges_add %>% select(type, source, target, start_date, end_date)
mc3_source_target <- mc3_edges_filt %>% 
  select(source, target)

# 将 mc3_source_target 和 node_file 通过 source 进行左连接
result <- mc3_source_target %>%
  left_join(node_file, by = c("source" = "node_id"))

# 删除包含任意 NA 值的行
result <- result %>%
  drop_na()

# 查看结果数据框
print(result)
# A tibble: 75,727 × 4
   source                         target                        node_type  style
   <chr>                          <chr>                         <chr>      <chr>
 1 Avery Inc                      Allen, Nichols and Thompson   Entity.Or… Comp…
 2 Berger-Hayes                   Jensen, Morris and Downs      Entity.Or… Comp…
 3 Bowers Group                   Barnett Inc                   Entity.Or… Comp…
 4 Bowman-Howe                    Bennett Ltd                   Entity.Or… Comp…
 5 Boyd and Sons                  Armstrong, Bennett and Deleon Entity.Or… Comp…
 6 Brown-Johnson                  Alexander-Wagner              Entity.Or… Comp…
 7 Burke, Hernandez and Alexander Becker-Mckinney               Entity.Or… Comp…
 8 Cain, Rodriguez and Gonzales   Beck-Green                    Entity.Or… Comp…
 9 Choi, Ortiz and Goodwin        Barton, King and Hall         Entity.Or… Comp…
10 Cochran Ltd                    Crosby Ltd                    Entity.Or… Comp…
# ℹ 75,717 more rows
mc3_edges <- as_tibble(mc3_data$link)
all_end_dates_valid <- mc3_edges %>%
  mutate(end_date_year = year(as_date(end_date))) %>%
  summarise(all_valid = all(is.na(end_date) | end_date_year == 2035)) %>%
  pull(all_valid)
mc3_edges_add <- mc3_edges %>%
  mutate(end_date = na_if(end_date, "NA")) %>%
  mutate(operation = as.character(ifelse(is.na(end_date), 0, 1)))
mc3_edges_filt <- mc3_edges_add %>% select(type, source, target, start_date, end_date, operation)
# 继续建立一个新的表,选出source和target列
mc3_source_target <- mc3_edges_filt %>% 
  select(source, target)

# 查看新建的数据框
print(mc3_source_target)
# A tibble: 75,817 × 2
   source                         target                       
   <chr>                          <chr>                        
 1 Avery Inc                      Allen, Nichols and Thompson  
 2 Berger-Hayes                   Jensen, Morris and Downs     
 3 Bowers Group                   Barnett Inc                  
 4 Bowman-Howe                    Bennett Ltd                  
 5 Boyd and Sons                  Armstrong, Bennett and Deleon
 6 Brown-Johnson                  Alexander-Wagner             
 7 Burke, Hernandez and Alexander Becker-Mckinney              
 8 Cain, Rodriguez and Gonzales   Beck-Green                   
 9 Choi, Ortiz and Goodwin        Barton, King and Hall        
10 Cochran Ltd                    Crosby Ltd                   
# ℹ 75,807 more rows
library(dplyr)

# 给每个数据框添加来源标识,并使用style列
nodes_Person <- nodes_Person %>%
  mutate(style = "Person")

nodes_Company <- nodes_Company %>%
  mutate(style = "Company")

# 合并数据框
node_file <- bind_rows(nodes_Person, nodes_Company)

# 创建 mc3_source_target 数据框
mc3_source_target <- mc3_edges_filt %>% 
  select(source, target)

# 将 mc3_source_target 和 node_file 通过 source 进行左连接
result <- mc3_source_target %>%
  left_join(node_file, by = c("source" = "node_id"))

# 删除包含任意 NA 值的行
result <- result %>%
  drop_na()

# 查看结果数据框
print(result)
# A tibble: 75,817 × 4
   source                         target                        node_type  style
   <chr>                          <chr>                         <chr>      <chr>
 1 Avery Inc                      Allen, Nichols and Thompson   Entity.Or… Comp…
 2 Berger-Hayes                   Jensen, Morris and Downs      Entity.Or… Comp…
 3 Bowers Group                   Barnett Inc                   Entity.Or… Comp…
 4 Bowman-Howe                    Bennett Ltd                   Entity.Or… Comp…
 5 Boyd and Sons                  Armstrong, Bennett and Deleon Entity.Or… Comp…
 6 Brown-Johnson                  Alexander-Wagner              Entity.Or… Comp…
 7 Burke, Hernandez and Alexander Becker-Mckinney               Entity.Or… Comp…
 8 Cain, Rodriguez and Gonzales   Beck-Green                    Entity.Or… Comp…
 9 Choi, Ortiz and Goodwin        Barton, King and Hall         Entity.Or… Comp…
10 Cochran Ltd                    Crosby Ltd                    Entity.Or… Comp…
# ℹ 75,807 more rows
library(dplyr)
library(tidyr)
library(visNetwork)

# 递归函数来找出所有连接的 source
find_connections <- function(target_name, result_df) {
  connections <- c(target_name)
  new_sources <- target_name
  repeat {
    new_targets <- result_df %>% 
      filter(target %in% new_sources) %>% 
      pull(source)
    
    if (length(new_targets) == 0) break
    
    new_sources <- setdiff(new_targets, connections)
    connections <- unique(c(connections, new_sources))
  }
  connections
}

# 找出 SouthSeafood Express Corp 的关系网
initial_target <- "SouthSeafood Express Corp"
connections <- find_connections(initial_target, result)

# 过滤出相关数据
network_df <- result %>% 
  filter(target %in% connections | source %in% connections)

# 创建节点和边的数据框
nodes <- network_df %>%
  select(id = source, label = source, group = style) %>%
  distinct() %>%
  bind_rows(
    network_df %>%
      select(id = target, label = target, group = style) %>%
      distinct()
  ) %>%
  distinct()

edges <- network_df %>%
  select(from = source, to = target)

# 确保 nodes 中的 ID 是唯一的
nodes <- nodes %>%
  group_by(id) %>%
  slice(1) %>%
  ungroup()

# 创建和显示网络图
visNetwork(nodes, edges) %>%
  visNodes(shape = "dot", size = 10) %>%
  visGroups(groupname = "Person", color = list(background = "blue", border = "darkblue")) %>%
  visGroups(groupname = "Company", color = list(background = "red", border = "darkred")) %>%
  visEdges(arrows = "to") %>%
  visLayout(randomSeed = 42)
library(dplyr)
library(tidyr)
library(visNetwork)
library(htmlwidgets)
Warning: package 'htmlwidgets' was built under R version 4.3.3
# 递归函数来找出所有连接的 source
find_connections <- function(target_name, result_df) {
  connections <- c(target_name)
  new_sources <- target_name
  repeat {
    new_targets <- result_df %>% 
      filter(target %in% new_sources) %>% 
      pull(source)
    
    if (length(new_targets) == 0) break
    
    new_sources <- setdiff(new_targets, connections)
    connections <- unique(c(connections, new_sources))
  }
  connections
}

# 找出 SouthSeafood Express Corp 的关系网
initial_target <- "SouthSeafood Express Corp"
connections <- find_connections(initial_target, result)

# 过滤出相关数据
network_df <- result %>% 
  filter(target %in% connections | source %in% connections)

# 创建节点和边的数据框
nodes <- network_df %>%
  select(id = source, label = source, group = style) %>%
  distinct() %>%
  bind_rows(
    network_df %>%
      select(id = target, label = target, group = style) %>%
      distinct()
  ) %>%
  distinct()

edges <- network_df %>%
  select(from = source, to = target)

# 确保 nodes 中的 ID 是唯一的
nodes <- nodes %>%
  group_by(id) %>%
  slice(1) %>%
  ungroup()

# 创建和显示网络图
network_plot <- visNetwork(nodes, edges) %>%
  visNodes(shape = "dot", size = 10) %>%
  visGroups(groupname = "Person", color = list(background = "yellow", border = "orange")) %>%
  visGroups(groupname = "Company", color = list(background = "lightblue", border = "blue")) %>%
  visEdges(arrows = "to") %>%
  visLayout(randomSeed = 42) %>%
  visLegend(addNodes = list(
    list(label = "Person", shape = "dot", color = list(background = "yellow", border = "orange")),
    list(label = "Company", shape = "dot", color = list(background = "lightblue", border = "blue"))
  ), useGroups = FALSE) %>%
  visOptions(highlightNearest = TRUE)

# 添加标题
network_plot <- prependContent(network_plot, htmltools::tags$h2("SouthSeafood Express Corp Relationship Network"))

# 显示网络图
network_plot

SouthSeafood Express Corp Relationship Network

saveWidget(network_plot, file = "SouthSeafood_Express_Corp_Relationship_Network.html")