Skip to content

Commit be326e7

Browse files
committed
started revising post scripts
1 parent 765fa72 commit be326e7

8 files changed

Lines changed: 78 additions & 36 deletions

File tree

src/post/description/_all_post.R

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212

1313

1414
source("src/post/description/avgdist_evol.R")
15-
#source("src/post/description/centr_clusters.R")
16-
#source("src/post/description/centr_vs_centr.R")
15+
source("src/post/description/centr_clusters.R")
16+
source("src/post/description/centr_vs_centr.R")
1717
#source("src/post/description/centr_vs_occ.R")
1818
#source("src/post/description/char_distr.R")
1919
#source("src/post/description/char_sim.R")

src/post/description/avgdist_evol.R

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ start.rec.log(text="AvgDistEvol")
1616

1717
###############################################################################
1818
# publication order vs. story order
19-
pub.order <- TRUE
19+
pub.order <- TRUE # TRUE=publication order, FALSE=story order
2020

2121
tlog(0, "Evolution of the average distance over scenes: pub.order=",pub.order)
2222

@@ -26,19 +26,20 @@ tlog(0, "Evolution of the average distance over scenes: pub.order=",pub.order)
2626
###############################################################################
2727
# plots unfiltered and filtered figures as separate files
2828

29-
# load corpus stats
30-
tlog(0, "Read corpus stats")
31-
data <- read.corpus.data()
32-
33-
# get filtered characters
34-
filt.names <- data$char.stats[data$char.stats[,COL_FILTER]=="Discard",COL_NAME]
35-
if(length(filt.names)==0) stop("Empty list of filtered characters")
36-
29+
## load corpus stats
30+
#tlog(0, "Read corpus stats")
31+
#data <- read.corpus.data()
32+
#
33+
## get filtered characters
34+
#filt.names <- data$char.stats[data$char.stats[,COL_FILTER]=="Discard",COL_NAME]
35+
#if(length(filt.names)==0) stop("Empty list of filtered characters")
36+
#
3737
# compute the sequence of scene-based graphs (possibly one for each scene)
3838
tlog(0, "Extract graph sequence")
3939
gs <- extract.static.graph.scenes(
4040
inter.df=data$inter.df,
4141
char.stats=data$char.stats,
42+
scene.stats=data$scene.stats, scene.chars=data$scene.chars,
4243
volume.stats=data$volume.stats,
4344
ret.seq=TRUE, pub.order=pub.order
4445
)
@@ -49,17 +50,19 @@ dist.vals <- list()
4950
order.txt <- if(pub.order) "publication" else "story"
5051

5152
# compute average distance for each graph in the sequence
53+
tlog(0, "Compute average distances for unfiltered graphs")
5254
#print(any(sapply(gs, function(g) is_connected(g, mode="weak")))) # check that each graph is connected
5355
g.orders[[1]] <- future_sapply(gs, gorder)
5456
dist.vals[[1]] <- future_sapply(gs, function(g) mean_distance(graph=g, directed=FALSE, unconnected=TRUE))
5557

5658
# same for filtered graphs
59+
tlog(0, "Compute average distances for filtered graphs")
5760
gs.filt <- future_lapply(gs, function(g) delete_vertices(g, v=intersect(filt.names,V(g)$name)))
5861
g.orders[[2]] <- future_sapply(gs.filt, gorder)
5962
dist.vals[[2]] <- future_sapply(gs.filt, function(g) mean_distance(graph=g, directed=FALSE, unconnected=TRUE))
6063

6164
# loop over unfiltered/filtered
62-
tlog(0, "Loop over unfiltered/filtered")
65+
tlog(0, "Loop over unfiltered/filtered graphs")
6366
natures <- c("unfiltered", "filtered")
6467
pal <- ATT_COLORS_FILT
6568
for(i in 1:2)
@@ -93,7 +96,7 @@ for(i in 1:2)
9396
print(summary(fit))
9497

9598
# plot distance as a function of graph order
96-
plot.file <- get.path.stats.topo(net.type="static", mode="scenes", meas.name=paste0(MEAS_DISTANCE,SFX_AVG), filtered=filt.txt, suf=paste0("evolution_",order.txt,"_lines"))
99+
plot.file <- get.path.stats.topo(net.type="static", mode="scenes", meas.name=paste0(MEAS_DISTANCE,SFX_AVG), weights="none", filtered=filt.txt, suf=paste0("evolution_",order.txt,"_lines"))
97100
tlog(2, "Plotting in file ",plot.file)
98101
for(fformat in PLOT_FORMAT)
99102
{ if(fformat==PLOT_FORMAT_PDF)
@@ -123,7 +126,7 @@ tlog(0, "Unfiltered/filtered loop complete")
123126

124127
###############################################################################
125128
# same thing, but plots both unfiltered and filtered figures in the same file
126-
plot.file <- get.path.stats.topo(net.type="static", mode="scenes", meas.name=paste0(MEAS_DISTANCE,SFX_AVG), filtered="both", suf=paste0("evolution_",order.txt,"_lines"))
129+
plot.file <- get.path.stats.topo(net.type="static", mode="scenes", meas.name=paste0(MEAS_DISTANCE,SFX_AVG), weights="none", filtered="both", suf=paste0("evolution_",order.txt,"_lines"))
127130
tlog(0, "Plotting both unfiltered and filtered results in file ",plot.file)
128131

129132
# process all formats

src/post/description/centr_clusters.R

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,13 @@ kept <- which(data$char.stats[,COL_FILTER]=="Keep")
2828
centr.names <- c(MEAS_DEGREE, MEAS_BETWEENNESS, MEAS_CLOSENESS, MEAS_EIGENCNTR)
2929

3030
# get centrality values
31-
tlog(0, "Read centrality values")
31+
tlog(0, "Retrieve centrality values")
3232
vals.unf <- matrix(NA, nrow=nrow(data$char.stats), ncol=length(centr.names), dimnames=list(data$char.stats[,COL_NAME],centr.names))
3333
vals.flt <- matrix(NA, nrow=length(kept), ncol=length(centr.names), dimnames=list(data$char.stats[kept,COL_FILTER]=="Keep",centr.names))
3434
for(centr.name in centr.names)
35-
{ vals.unf[,centr.name] <- load.static.nodelink.stats.scenes(object="nodes", weights="occurrences", measure=centr.name, filtered=FALSE)
36-
vals.flt[,centr.name] <- load.static.nodelink.stats.scenes(object="nodes", weights="occurrences", measure=centr.name, filtered=TRUE)
35+
{ tlog(2, "Centrality ",centr.name)
36+
vals.unf[,centr.name] <- load.static.nodelink.stats.scenes(object="nodes", weights="none", measure=centr.name, filtered="unfiltered")
37+
vals.flt[,centr.name] <- load.static.nodelink.stats.scenes(object="nodes", weights="none", measure=centr.name, filtered="filtered")
3738
}
3839

3940
# filter NA values

src/post/description/centr_vs_centr.R

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -55,12 +55,12 @@ for(centr1.name in centr.names)
5555
{ tlog(4, "Processing second centrality measure ",centr2.name)
5656

5757
# get centrality 1 values
58-
centr1.vals.unf <- load.static.nodelink.stats.scenes(object="nodes", weights="occurrences", measure=centr1.name, filtered=FALSE)
59-
centr1.vals.flt <- load.static.nodelink.stats.scenes(object="nodes", weights="occurrences", measure=centr1.name, filtered=TRUE)
58+
centr1.vals.unf <- load.static.nodelink.stats.scenes(object="nodes", weights="none", measure=centr1.name, filtered="unfiltered")
59+
centr1.vals.flt <- load.static.nodelink.stats.scenes(object="nodes", weights="none", measure=centr1.name, filtered="filtered")
6060

6161
# get centrality 2 values
62-
centr2.vals.unf <- load.static.nodelink.stats.scenes(object="nodes", weights="occurrences", measure=centr2.name, filtered=FALSE)
63-
centr2.vals.flt <- load.static.nodelink.stats.scenes(object="nodes", weights="occurrences", measure=centr2.name, filtered=TRUE)
62+
centr2.vals.unf <- load.static.nodelink.stats.scenes(object="nodes", weights="none", measure=centr2.name, filtered="unfiltered")
63+
centr2.vals.flt <- load.static.nodelink.stats.scenes(object="nodes", weights="none", measure=centr2.name, filtered="filtered")
6464

6565
#### handle unfiltered data
6666
tlog(6,"Dealing with the unfiltered data")
@@ -71,7 +71,7 @@ for(centr1.name in centr.names)
7171
centr1.vals.unf <- centr1.vals.unf[idx]
7272
centr2.vals.unf <- centr2.vals.unf[idx]
7373
corr.mat.unf.clean[centr1.name,centr2.name] <- cor(centr1.vals.unf, centr2.vals.unf, method="spearman")
74-
tlog(8,"Spearman correlation before cleaning: ", corr.mat.unf.clean[centr1.name,centr2.name])
74+
tlog(8,"Spearman correlation after cleaning: ", corr.mat.unf.clean[centr1.name,centr2.name])
7575
avg.centr2.vals.unf <- sapply(1:max(centr2.vals.unf), function(d) mean(centr1.vals.unf[centr2.vals.unf==d]))
7676

7777
# # keep tail
@@ -101,7 +101,7 @@ for(centr1.name in centr.names)
101101
col.sec <- combine.colors(col, "WHITE", transparency=20)
102102
xlab <- NODE_MEASURES[[centr2.name]]$cname
103103
ylab <- NODE_MEASURES[[centr1.name]]$cname
104-
plot.file <- get.path.stats.topo(net.type="static", mode="scenes", meas.name=MEAS_MULTI_NODES, filtered="both", suf=paste0("centr_",centr2.name,"_vs_",centr1.name))
104+
plot.file <- get.path.stats.topo(net.type="static", mode="scenes", meas.name=MEAS_MULTI_NODES, weights="none", filtered="both", suf=paste0("centr_",centr2.name,"_vs_",centr1.name))
105105
tlog(8, "Plotting in file ",plot.file)
106106
pdf(file=paste0(plot.file,PLOT_FORMAT_PDF), bg="white")
107107
par(

src/post/description/char_sim.R

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,8 @@ tlog(0,"Load data and network")
3232
tlog(2,"Reading previously computed corpus stats")
3333
data <- read.corpus.data()
3434
char.stats <- data$char.stats
35+
scene.stats <- data$scene.stats
36+
scene.chars <- data$scene.chars
3537
volume.stats <- data$volume.stats
3638
scene.stats <- data$scene.stats
3739

@@ -61,6 +63,7 @@ if(narr.smooth)
6163
inter.df=data$inter.df,
6264
char.stats=char.stats,
6365
volume.stats=volume.stats,
66+
scene.stats=scene.stats, scene.chars=scene.chars,
6467
ret.seq=TRUE, pub.order=pub.order
6568
)
6669
# possibly set weights

src/post/description/partial_extr.R

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ idx1 <- 1:(idx-1)
4343
gs[[1]] <- extract.static.graph.scenes(
4444
inter.df=data$inter.df[idx1,],
4545
char.stats=data$char.stats,
46+
scene.stats=data$scene.stats, scene.chars=data$scene.chars,
4647
volume.stats=data$volume.stats
4748
)
4849

@@ -52,6 +53,7 @@ idx2 <- idx:nrow(data$inter.df)
5253
gs[[2]] <- extract.static.graph.scenes(
5354
inter.df=data$inter.df[idx2,],
5455
char.stats=data$char.stats,
56+
scene.stats=data$scene.stats, scene.chars=data$scene.chars,
5557
volume.stats=data$volume.stats
5658
)
5759

src/post/description/pref_attach.R

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,6 +68,7 @@ data <- read.corpus.data()
6868
gs.unf <- extract.static.graph.scenes(
6969
inter.df=data$inter.df,
7070
char.stats=data$char.stats,
71+
scene.stats=data$scene.stats, scene.chars=data$scene.chars,
7172
volume.stats=data$volume.stats,
7273
ret.seq=TRUE, pub.order=TRUE # TODO check that this parm is used in the rest of the graph (should be a main script param)
7374
)

src/static/extract_scene.R

Lines changed: 45 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,8 @@
1919
#
2020
# inter.df: dataframe containing the pairwise interactions.
2121
# char.stats: table describing all the characters occurring in the BD series.
22+
# scene.stats: table describing all the scenes constituting the BD series.
23+
# scene.chars: list of characters occurring in each scene.
2224
# volume.stats: table describing all the volumes constituting the BD series.
2325
# vol: volume of interest (optional, and ignored if arc is specififed or if ret.seq is TRUE).
2426
# arc: narrative arc of interest (optional, and ignored if ret.seq is TRUE).
@@ -30,7 +32,7 @@
3032
# - Duration: total duration (in number of panels).
3133
# If ret.set==TRUE, then the function returns a list of graphs.
3234
###############################################################################
33-
extract.static.graph.scenes <- function(inter.df, char.stats, volume.stats, vol=NA, arc=NA, ret.seq=FALSE, pub.order=TRUE)
35+
extract.static.graph.scenes <- function(inter.df, char.stats, scene.stats, scene.chars, volume.stats, vol=NA, arc=NA, ret.seq=FALSE, pub.order=TRUE)
3436
{ tlog(2,"Extracting the scene-based static graph")
3537
res <- list()
3638
vname <- NA
@@ -48,28 +50,50 @@ extract.static.graph.scenes <- function(inter.df, char.stats, volume.stats, vol=
4850
# possibly filter interactions
4951
if(!is.na(arc))
5052
{ is <- which(inter.df[,COL_ARC_ID]==arc)
53+
scenes.ord <- which(scene.stats[,COL_ARC_ID]==arc)
5154
}
5255
else if(!is.na(vol))
5356
{ vname <- paste0(vol,"_",volume.stats[vol,COL_VOLUME])
5457
is <- which(inter.df[,COL_VOLUME_ID]==vol)
58+
scenes.ord <- which(scene.stats[,COL_VOLUME_ID]==vol)
5559
}
5660
else
57-
{ # order interactions by publication order
58-
if(pub.order)
59-
is <- 1:nrow(inter.df)
60-
# or by story order
61-
else
62-
{ vol.ranks <- volume.stats[inter.df[,COL_VOLUME_ID],COL_RANK]
63-
scene.ranks <- inter.df[,COL_SCENE_ID]
64-
is <- order(vol.ranks, scene.ranks)
61+
{ is <- 1:nrow(inter.df)
62+
scenes.ord <- 1:nrow(scene.stats)
63+
}
64+
65+
# possibly sort scenes and interactions by story order
66+
if(!pub.order)
67+
{ is <- is[order(inter.df[is,COL_RANK])]
68+
scenes.ord <- scenes.ord[order(scene.stats[scenes.ord,COL_RANK])]
69+
}
70+
71+
# possibly init the list with empty graphs or isolates
72+
if(ret.seq)
73+
{ #tlog(2,"Initializing the graph list")
74+
s <- 1
75+
while(scenes.ord[s]!=inter.df[is[1],COL_SCENE_ID])
76+
{ tlog(4,"Processing s=",s," (scenes.ord[s]=",scenes.ord[s]," and inter.df[is[1],COL_SCENE_ID]=",inter.df[is[1],COL_SCENE_ID],") -- (length(scene.chars[[s]]=",length(scene.chars[[s]]),")")
77+
if(length(scene.chars[[scenes.ord[s]]])==0)
78+
g <- make_empty_graph(n=0, directed=FALSE)
79+
else if(length(scene.chars[[scenes.ord[s]]])==1)
80+
{ idx <- which(char.stats[,COL_NAME]==scene.chars[[scenes.ord[s]]])
81+
g <- graph_from_data_frame(d=static.df, directed=FALSE, vertices=char.stats[idx,])
82+
}
83+
res[[s]] <- g
84+
s <- s + 1
6585
}
6686
}
6787

6888
# build the edgelist by considering each line (i.e. interaction) in the dataframe
6989
prev.scene <- NA
90+
prev.scene.idx <- NA
91+
tlog(2,"Building the edge list")
7092
for(i in is)
7193
{ # get the current scene id
7294
cur.scene <- inter.df[i,COL_SCENE_ID]
95+
cur.scene.idx <- which(scenes.ord==cur.scene)
96+
#tlog(4,"Processing scene ",cur.scene," (",cur.scene.idx,"/",length(scenes.ord),")")
7397

7498
# get the characters
7599
from.char <- inter.df[i,COL_CHAR_FROM]
@@ -98,9 +122,11 @@ extract.static.graph.scenes <- function(inter.df, char.stats, volume.stats, vol=
98122
if(ret.seq)
99123
{ # possibly copy previous graph
100124
if(!is.na(prev.scene) && cur.scene!=prev.scene)
101-
{ for(s in (prev.scene+1):(cur.scene-1))
102-
{ g <- res[[s-1]]
103-
g <- set_graph_attr(graph=g, name="Scene", value=s)
125+
{ # possibly several times, to represent interaction-less scenes
126+
for(s in (prev.scene.idx+1):(cur.scene.idx-1))
127+
{ #tlog(6,"s=",s," scenes.ord[s-1]=",scenes.ord[s-1]," length(res)=",length(res))
128+
g <- res[[s-1]]
129+
g <- set_graph_attr(graph=g, name="SceneId", value=scene.stats[scenes.ord[s],COL_SCENE_ID])
104130
res[[s]] <- g
105131
}
106132
}
@@ -109,10 +135,11 @@ extract.static.graph.scenes <- function(inter.df, char.stats, volume.stats, vol=
109135
idx <- which(char.stats[,COL_NAME] %in% c(cbind(static.df[,COL_CHAR_FROM],static.df[,COL_CHAR_TO])))
110136
g <- graph_from_data_frame(d=static.df, directed=FALSE, vertices=char.stats[idx,])
111137
g$Scene <- cur.scene
112-
res[[cur.scene]] <- g
138+
res[[cur.scene.idx]] <- g
113139
}
114140

115141
prev.scene <- cur.scene
142+
prev.scene.idx <- cur.scene.idx
116143
}
117144

118145
# set up result variable
@@ -236,12 +263,15 @@ extract.static.graphs.base <- function(data)
236263
inter.df <- data$inter.df
237264
page.stats <- data$page.stats
238265
char.stats <- data$char.stats
266+
scene.stats <- data$scene.stats
267+
scene.chars <- data$scene.chars
239268
volume.stats <- data$volume.stats
240269

241270
# extract the full scene-based static graph
242271
g <- extract.static.graph.scenes(
243272
inter.df=inter.df,
244273
char.stats=char.stats,
274+
scene.stats=scene.stats, scene.chars=scene.chars,
245275
volume.stats=volume.stats
246276
)
247277
# record to file
@@ -270,6 +300,7 @@ extract.static.graphs.base <- function(data)
270300
g <- extract.static.graph.scenes(
271301
inter.df=inter.df,
272302
char.stats=char.stats,
303+
scene.stats=scene.stats, scene.chars=scene.chars,
273304
volume.stats=volume.stats,
274305
arc=a
275306
)
@@ -297,6 +328,7 @@ extract.static.graphs.base <- function(data)
297328
g <- extract.static.graph.scenes(
298329
inter.df=data$inter.df,
299330
char.stats=char.stats,
331+
scene.stats=scene.stats, scene.chars=scene.chars,
300332
volume.stats=volume.stats,
301333
vol=v
302334
)

0 commit comments

Comments
 (0)