Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Menu
Open sidebar
Benoit Barbot
ProcessMiningCosmos
Commits
26f4b587
Commit
26f4b587
authored
Jan 27, 2022
by
Pierre Cry
Browse files
all
parent
6f8ed9aa
Changes
131
Expand all
Hide whitespace changes
Inline
Side-by-side
Algorithms/ActiTraC/cluster_id.R
0 → 100644
View file @
26f4b587
cluster_id
<-
function
(
x
){
#Fonction qui permet simplement de recuperer l'indice de la colonne des clusters dans un eventlog passe par actitrac
if
(
!
"eventlog"
%in%
class
(
x
)){
stop
(
'Please provide object of class "eventlog".'
)
}
which
(
colnames
(
x
)
==
"clusters"
)
}
Algorithms/ActiTraC/pm_clusterwise.R
0 → 100644
View file @
26f4b587
pm_clusterwise
<-
function
(
x
,
clus_index
,
discovery
=
"heuristics"
,
...
){
#Fonction qui prend un resultat d'actitrac en entree et donne en sortie les modeles de process mining par cluster sous forme de liste.
if
(
!
"eventlog"
%in%
class
(
x
)){
stop
(
'This function only accepts inputs of class "eventlog".'
)
}
colnames
(
x
)[
clus_index
]
<-
"clusters"
if
(
min
(
x
$
clusters
)
==
0
){
x
<-
filter
(
x
,
clusters
>
0
)
}
n
<-
length
(
unique
(
x
$
clusters
))
model
<-
list
(
NA
)
for
(
i
in
1
:
n
){
clus
<-
filter
(
x
,
clusters
==
i
)
model
[[
i
]]
<-
switch
(
discovery
,
# "heuristics"=as.petrinet(causal_net(clus,dependencies=dependency_matrix(clus,dependency_type=dependency_type_fhm(all_connected="TRUE")))),
"heuristics"
=
causal_net
(
clus
,
...
),
"inductive"
=
discovery_inductive
(
clus
),
"alpha"
=
discovery_alpha
(
clus
))
names
(
model
)[
i
]
<-
paste0
(
"model"
,
i
)
}
model
$
discovery
<-
discovery
model
}
\ No newline at end of file
Algorithms/ActiTraC/py_actitrac_MRA.R
0 → 100644
View file @
26f4b587
py_actitrac_MRA
<-
function
(
L
,
nb_clus
=
2
,
tf
=
.97
,
mcs
=
5
,
w
=
25
,
N
=
TRUE
,
verbose
=
TRUE
,
only_dist
=
FALSE
){
#
#Fonction qui execute l'algorithme de clustering ActiTraC sur les traces d'un eventlog.
#Cette version utilise une version de pm4py importee de python.
#Liste des parametres :
#- L : log (data frame) ou eventlog genere par bupaR
#- nb_clus : numerique / nombre de clusters a trouver
#- tf : numerique / adequation minimum des modeles generes par actitrac pour decider de l'inclusion d'une trace a un cluster
#- mcs : numerique / taille minimum des clusters
#- w : numerique / proportion (en pourcent) de traces considerees pour chaque ajout a un cluster
#- N : booleen / TRUE : les traces ignorees par l'algorithme forment un groupe a part. FALSE : Les traces ignorees sont affectees au cluster
# donnant l'adequation la plus elevee
#- verbose : booleen / TRUE : l'algorithme imprime un suivi de chaque etape
#- only_dist : booleen / TRUE : l'algorithme ne calcule que la matrice des distances euclidiennes MRA. Renvoie la matrice des distances "dist"
# et le tableau des repetitions "MRA"
#
#
#Installation / chargement des packages necessaires
using
<-
function
(
...
){
#fonction qui prend un vecteur de caracteres et verifie s'ils sont installes, les installe si non, puis les charge
libs
<-
unlist
(
list
(
...
))
req
<-
unlist
(
lapply
(
libs
,
require
,
character.only
=
TRUE
))
need
<-
libs
[
req
==
FALSE
]
if
(
length
(
need
)
>
0
){
install.packages
(
need
)
lapply
(
need
,
require
,
character.only
=
TRUE
)
}
}
using
(
"magrittr"
,
"heuristicsmineR"
,
"dplyr"
,
"pm4py"
,
"data.table"
,
"cluster"
,
"pbapply"
,
"bupaR"
,
"processmapR"
,
"reticulate"
)
reticulate
::
import
(
"pm4py"
)
if
(
!
"eventlog"
%in%
class
(
L
)){
id
<-
select.list
(
colnames
(
L
),
graphics
=
TRUE
,
title
=
"Case identifier variable:"
)
activity
<-
select.list
(
colnames
(
L
)[
-
which
(
colnames
(
L
)
==
id
)],
graphics
=
TRUE
,
title
=
"Activity variable:"
)
timestamp
<-
select.list
(
colnames
(
L
)[
-
which
(
colnames
(
L
)
%in%
c
(
id
,
activity
))],
graphics
=
TRUE
,
title
=
"Timestamp variable:"
)
if
(
verbose
)
cat
(
"\n"
,
"Conversion to eventlog..."
,
"\n"
)
l
<-
simple_eventlog
(
L
,
id
,
activity
,
timestamp
)
}
else
{
id
<-
case_id
(
L
)
activity
<-
activity_id
(
L
)
timestamp
<-
timestamp
(
L
)
l
<-
L
}
color_sup
<-
function
(){
rstudio_with_ansi_support
<-
function
()
{
if
(
Sys.getenv
(
"RSTUDIO"
,
""
)
==
""
)
return
(
FALSE
)
## This is set *before* the rstudio initialization, in 1.1 and above
if
((
cols
<-
Sys.getenv
(
"RSTUDIO_CONSOLE_COLOR"
,
""
))
!=
""
&&
!
is.na
(
as.numeric
(
cols
)))
{
return
(
TRUE
)
}
## This only works if the initialization is complete
requireNamespace
(
"rstudioapi"
,
quietly
=
TRUE
)
&&
rstudioapi
::
isAvailable
()
&&
rstudioapi
::
hasFun
(
"getConsoleHasColor"
)
}
enabled
<-
getOption
(
"crayon.enabled"
)
if
(
!
is.null
(
enabled
))
{
return
(
isTRUE
(
enabled
))
}
if
(
rstudio_with_ansi_support
()
&&
sink.number
()
==
0
)
{
return
(
TRUE
)
}
if
(
!
isatty
(
stdout
()))
{
return
(
FALSE
)
}
if
(
os_type
()
==
"windows"
)
{
if
(
Sys.getenv
(
"ConEmuANSI"
)
==
"ON"
)
{
return
(
TRUE
)
}
if
(
Sys.getenv
(
"CMDER_ROOT"
)
!=
""
)
{
return
(
TRUE
)
}
return
(
FALSE
)
}
if
(
inside_emacs
()
&&
!
is.na
(
emacs_version
()[
1
])
&&
emacs_version
()[
1
]
>=
23
)
{
return
(
TRUE
)
}
if
(
"COLORTERM"
%in%
names
(
Sys.getenv
()))
{
return
(
TRUE
)
}
if
(
Sys.getenv
(
"TERM"
)
==
"dumb"
)
{
return
(
FALSE
)
}
grepl
(
"^screen|^xterm|^vt100|color|ansi|cygwin|linux"
,
Sys.getenv
(
"TERM"
),
ignore.case
=
TRUE
,
perl
=
TRUE
)
}
#Detecte si les couleurs sont supportees par la console
if
(
color_sup
())
using
(
"crayon"
)
crayon_is_loaded
<-
"crayon"
%in%
installed.packages
()[,
"Package"
]
###### Bloc de fonctions utilisees pour le clustering
suffixes
<-
function
(
x
,
sep
=
","
){
#Retourne les suffixes d'une s?quence (apr?s d?composition par seqdecomp()) dans l'ordre lexicographique
x
<-
rev
(
x
)
concat
<-
paste
(
x
,
collapse
=
sep
)
suffixes
<-
substring
(
concat
,
1
,
cumsum
(
c
(
nchar
(
x
[[
1
]]),
nchar
(
x
[
-1
])
+
nchar
(
sep
))))
%>%
strsplit
(
split
=
","
)
%>%
lapply
(
rev
)
%>%
sapply
(
paste
,
collapse
=
","
)
suffixes
}
#Cherche l'ensemble des suffixes dans une trace
LCP
<-
function
(
w
,
sep
=
","
){
#Fonction qui prend une s?quence en entr?e et donne en sortie :
#Les suffixes "suffixes" dans l'ordre lexicographique,
#la permutation "r" ayant permis de les mettre dans l'ordre lexicographique,
#la permutation "p" permettant de revenir ? l'ordre initial
#un data.frame contenant la taille de lcp[i] pour tout i et le pr?fixe commun
#w est une sequence seqdecomp issue d'une trace
w
<-
paste0
(
c
(
w
,
"$"
),
collapse
=
","
)
#On rajoute un caract?ee hors de l'alphabet pour signifier la fin,
#Permet de compter l'etape finale reelle dans les comparaisons a droite et a gauche
#Si l'on s'en tient a l'article, r doit representer l'ordre des symboles avec lesquels chaque suffixe commence dans w, une fois ces suffixes
#dans l'ordre lexicographique, pas l'ordre des suffixes eux-memes une fois classes. On peut donc ecrire, pour avoir la bonne permutation :
suf
<-
suffixes
(
w
,
sep
=
sep
)[
length
(
w
)
:
1
]
#On inverse pour avoir les indices depuis le debut de w au lieu de la fin
r
<-
order
(
suf
,
method
=
"radix"
)
#Permutation pour avoir l'ordre lexicographique
suf
<-
suf
[
r
]
p
<-
order
(
r
)
#Permutation inverse, qui permet de revenir aux suffixes dans l'ordre d'origine
#LCP (Longest common prefix) pour chaque paire de suffixes consecutifs :
if
(
length
(
suf
)
==
1
){
lcpArray
<-
data.frame
(
n
=
1
,
prefix
=
w
)
r
=
1
p
=
1
list
(
suffixes
=
suf
,
r
=
r
,
p
=
p
,
lcpArray
)
}
else
{
lsuf
<-
strsplit
(
suf
,
split
=
sep
)
#On separe les mots dans chaque suffixe
#Pour le suffixe i : le prefixe commun est de taille k si les symboles 1:k de ce suffixe sont aussi les k premiers symboles du suffixe i+1,
#dans le bon ordre.
#On utilise pmatch, qui renvoie les indices ou les deux suffixes ont des symboles en commun. Il faut ensuite des indices consecutifs
#a partir de 1 pour avoir un prefixe commun.
lcp
<-
sapply
(
1
:
(
length
(
suf
)
-1
),
function
(
x
){
if
(
lsuf
[[
x
]][
1
]
==
lsuf
[[
x
+1
]][
1
]){
(((
pmatch
(
lsuf
[[
x
]],
lsuf
[[
x
+1
]])
==
1
:
length
(
lsuf
[[
x
]]))
*
1
)
%>%
as.character
%>%
na.exclude
%>%
paste0
(
collapse
=
""
)
%>%
strsplit
(
split
=
"0"
))[[
1
]][
1
]
%>%
nchar
}
else
0
})
lcpLab
<-
c
(
NA
,
length
(
lcp
))
for
(
i
in
1
:
length
(
lcp
)){
if
(
lcp
[
i
]
!=
0
){
lcpLab
[
i
]
<-
paste
(
lsuf
[[
i
]][
1
:
(
lcp
[
i
])],
collapse
=
sep
)
}
else
{
lcpLab
[
i
]
<-
""
}
}
list
(
suffixes
=
suf
,
r
=
r
,
p
=
p
,
lcpArray
=
data.frame
(
n
=
lcp
,
prefix
=
lcpLab
))
}
}
#Cherche les plus longs prefixes communs de chaque suffixe d'une trace
findmaxr1
<-
function
(
w
,
ml
=
1
,
sep
=
","
){
#Fonction qui prend une trace w, un nombre minimum de repetitions ml et un separateur sep en entree
#Donne en sortie l'ensemble des repetitions maximales de la sequence, desquelles on a retire les repetitions de taille 1 (cf article)
maxr
<-
list
()
count
<-
numeric
(
0
)
lcpComp
<-
LCP
(
w
,
sep
=
sep
)
if
(
sum
(
lcpComp
$
lcpArray
$
n
)
==
0
){
maxr
}
else
{
w
<-
paste0
(
c
(
w
,
"$"
),
collapse
=
sep
)
#On rajoute un caractere hors de l'alphabet pour signifier la fin pour inclure le caractere final ds comparaisons
w
<-
seqdecomp
(
w
,
sep
=
sep
)
n
<-
length
(
w
)
r
<-
lcpComp
$
r
p
<-
lcpComp
$
p
lcp
<-
lcpComp
$
lcpArray
$
n
S
<-
c
(
which
(
lcp
<
ml
),
n
)
I
<-
c
(
order
(
lcp
))
if
(
max
(
lcp
)
<
ml
)
maxr
else
{
initial
<-
min
(
which
(
lcp
[
I
]
>=
ml
))
cpt
<
-1
for
(
t
in
initial
:
(
n
-1
)){
i
<-
I
[
t
]
p_i
<-
max
(
S
[
S
<
i
])
+1
n_i
<-
min
(
S
[
S
>
i
])
S
<-
union
(
S
,
i
)
if
((
p_i
==
1
|
lcp
[
p_i
-1
]
!=
lcp
[
i
])
&
(
n_i
==
n
|
lcp
[
n_i
]
!=
lcp
[
i
])){
#Si repetition maximale a droite
#Tests de maximalite a gauche. On les separe car certains sont mutuellement exclusifs, les mettre dans le meme "if" mene a une erreur
if
(
r
[
p_i
]
==
1
)
{
maxr
[[
cpt
]]
<-
w
[
r
[
i
]
:
(
r
[
i
]
+
lcp
[
i
]
-1
)]
count
[
cpt
]
<-
n_i
-
p_i
+1
cpt
<-
cpt
+1
}
else
{
if
(
r
[
n_i
]
==
1
){
maxr
[[
cpt
]]
<-
w
[
r
[
i
]
:
(
r
[
i
]
+
lcp
[
i
]
-1
)]
count
[
cpt
]
<-
n_i
-
p_i
+1
cpt
<-
cpt
+1
}
else
{
if
(
w
[
r
[
p_i
]
-1
]
!=
w
[
r
[
n_i
]
-1
]
|
(
p
[
r
[
n_i
]
-1
]
-
p
[
r
[
p_i
]
-1
]
!=
n_i
-
p_i
)){
maxr
[[
cpt
]]
<-
w
[
r
[
i
]
:
(
r
[
i
]
+
lcp
[
i
]
-1
)]
count
[
cpt
]
<-
n_i
-
p_i
+1
cpt
<-
cpt
+1
}
}
}
}
}
w
<-
paste
(
w
[
-
length
(
w
)],
collapse
=
sep
)
n
<-
count
maxr
<-
sapply
(
maxr
,
paste
,
collapse
=
sep
)
if
(
length
(
maxr
)
==
0
)
list
()
else
{
data.frame
(
trace
=
w
,
repetitions
=
maxr
,
n
)
}
}
}
}
#Trouve les repetitions maximales dans une trace
findmaxr
<-
function
(
dat
,
ml
=
2
,
sep
=
","
){
#Fonction qui prend en entree un log au format eventlog de bupaR
#Donne en sortie l'ensemble des repetitions maximales de taille >1 pour chaque trace de l'eventlog
x
<-
traces
(
dat
)
$
trace
lmax
<-
pblapply
(
x
,
findmaxr1
,
sep
=
sep
,
ml
=
ml
)
#lmax<-lmax[which(sapply(lmax,function(x) length(x)>1))]
total
<-
data.table
::
rbindlist
(
lmax
)
total
$
trace
<-
as.character
(
total
$
trace
)
total
}
#Trouve les repetitions maximales de chaque trace d'un log
###### Fin du bloc de fonctions
###### Initialisation
if
(
verbose
)
cat
(
"\n"
,
"INITIALIZATION"
,
"\n"
)
if
(
verbose
)
cat
(
"\n"
,
"Calculating traces frequency and ranks..."
,
"\n"
)
GL
<-
traces
(
l
)
GL
$
rank_trace
<
-1
:
nrow
(
GL
)
cl
<-
case_list
(
l
)
GL
$
`case:concept:name`
<-
cl
$
`case:concept:name`
[
!
duplicated
(
cl
$
trace
)]
GL
<-
GL
[,
c
(
5
,
1
:
4
)]
GL
<-
as.data.frame
(
GL
)
CS
<-
NULL
R
<-
GL
#dpi restants, pour l'instant aucun n'est traite
if
(
verbose
)
cat
(
"\n"
,
"Finding maximal repetitions in eventlog..."
,
"\n"
)
result
<-
findmaxr
(
l
)
m
<-
dcast
(
result
,
trace
~
repetitions
,
value.var
=
"n"
)
m
<-
as.data.frame
(
m
)
#Ajout
m
<-
m
[
match
(
GL
$
trace
,
m
$
trace
),]
m
$
trace
<-
GL
[,
id
]
m
[
is.na
(
m
)]
<
-0
colnames
(
m
)[
which
(
colnames
(
m
)
==
"trace"
)]
<-
id
rownames
(
m
)
<-
NULL
if
(
verbose
)
cat
(
"\n"
,
"Constructing distance matrix..."
,
"\n"
)
d
<-
m
[,
-1
]
%>%
daisy
(
metric
=
"euclidean"
,
warnType
=
FALSE
)
%>%
as.matrix
dimnames
(
d
)
<-
list
(
m
[,
id
],
m
[,
id
])
if
(
only_dist
){
return
(
list
(
MRA
=
m
,
dist
=
d
))
}
cpt
<
-1
#Numero de cluster
###### Boucle while
if
(
verbose
)
cat
(
"\n"
,
"Starting clustering phase."
,
"\n"
,
"\n"
)
while
(
cpt
<=
nb_clus
&
nrow
(
R
)
!=
0
){
C
<-
NULL
#Ensemble des dpi dans le cluster
I
<-
NULL
#Ensemble des dpi ignores
if
(
verbose
)
cat
(
"###############################################################################################################"
,
"\n"
)
if
(
verbose
)
cat
(
"\n"
,
"CLUSTER"
,
cpt
,
"\n"
)
##### Phase 1 : Selection
if
(
verbose
)
cat
(
"\n"
,
"___________________________________________________________________________"
,
"\n"
,
"\n"
,
" "
,
"Selection"
,
" "
,
"\n"
,
"___________________________________________________________________________"
,
"\n"
)
repeat
{
if
(
is.null
(
I
))
RI
<-
R
else
RI
<-
R
[
-
which
(
R
[,
id
]
%in%
I
[,
id
]),]
# if(verbose) cat("\n","Traces left for cluster",cpt,":",nrow(RI)," | ")
W
<-
unique
(
rbind
(
RI
[
1
:
(
nrow
(
RI
)
*
w
/
100
),],
RI
[
which.max
(
RI
$
relative_frequency
),]))
if
(
verbose
)
cat
(
"\n"
,
"Closest trace out of"
,
nrow
(
W
),
"|"
,
nrow
(
RI
),
"remaining | "
)
if
(
is.null
(
C
)
|
nrow
(
W
)
==
1
){
cur_dpi
<-
RI
[
1
,]
#dpi considere s'il n'y a aucun dpi dans le cluster et qu'il n'y a qu'un dpi dans le top w% de RI
}
else
{
#Sinon, on prend le dpi dont la distance euclidienne MRA moyenne au cluster est minimale
if
(
nrow
(
C
)
==
1
){
cur_dpi
<-
W
[
which.min
(
d
[
as.character
(
rownames
(
d
))
%in%
W
[,
id
],
as.character
(
colnames
(
d
))
%in%
C
[,
id
]]),]
}
else
{
cur_dpi
<-
W
[
which.min
(
rowMeans
(
d
[
as.character
(
rownames
(
d
))
%in%
W
[,
id
],
as.character
(
colnames
(
d
))
%in%
C
[,
id
]])),]
}
}
pn
<-
pm4py
$
algo
$
discovery
$
heuristics
$
factory
$
apply
(
l
[
as.data.frame
(
l
)[,
id
]
%in%
union
(
C
[,
id
],
cur_dpi
[,
id
]),])
if
(
is.null
(
C
))
ev
<
-1
else
ev
<-
evaluation_fitness
(
l
[
as.data.frame
(
l
)[,
id
]
%in%
union
(
C
[,
id
],
cur_dpi
[,
id
]),],
pn
[[
1
]],
pn
[[
2
]],
pn
[[
3
]])
$
log_fitness
if
(
verbose
)
cat
(
"fitness :"
,
format
(
round
(
ev
,
4
),
nsmall
=
4
))
if
(
ev
>=
tf
){
C
<-
rbind
(
C
,
cur_dpi
)
R
<-
R
[
-
which
(
R
[,
id
]
==
cur_dpi
[,
id
]),]
}
else
{
if
(
verbose
){
if
(
crayon_is_loaded
)
cat
(
red
(
" -> Unfit"
))
else
cat
(
" -> Unfit"
)
}
if
(
nrow
(
C
)
>=
mcs
){
if
(
verbose
)
cat
(
"\n"
,
"\n"
,
"Cluster"
,
cpt
,
"reached at least required cluster size and current trace doesn't fit"
)
if
(
verbose
)
cat
(
"\n"
,
"___________________________________________________________________________"
,
"\n"
,
"\n"
,
" "
,
"Look ahead"
,
" "
,
"\n"
,
"___________________________________________________________________________"
,
"\n"
)
if
(
verbose
)
cat
(
"\n"
,
"Adding remaining fitting traces to cluster"
,
cpt
,
"..."
)
pn
<-
pm4py
$
algo
$
discovery
$
heuristics
$
factory
$
apply
(
l
[
as.data.frame
(
l
)[,
id
]
%in%
C
[,
id
],])
##### Phase 2 : Look ahead
#La taille min de cluster a ete atteinte
#On rajoute a C les traces dont l'adequation vaut 100%, le reste est ignore et sera teste pour le cluster suivant
evbool
<-
pbsapply
(
1
:
nrow
(
R
),
function
(
x
)
evaluation_fitness
(
l
[
as.data.frame
(
l
)[,
id
]
%in%
R
[
x
,
id
],],
pn
[[
1
]],
pn
[[
2
]],
pn
[[
3
]])
$
perc_fit_traces
)
==
100
if
(
verbose
)
cat
(
"\n"
,
sum
(
evbool
),
"traces added to cluster"
,
cpt
,
"\n"
)
C
<-
rbind
(
C
,
R
[
evbool
,])
R
<-
R
[
evbool
==
FALSE
,]
break
}
else
{
if
(
verbose
){
if
(
crayon_is_loaded
)
cat
(
red
(
" -> Ignored"
))
else
cat
(
" -> Ignored"
)
}
I
<-
rbind
(
I
,
cur_dpi
)
}
}
if
(
nrow
(
R
)
==
0
|
sum
(
R
[,
id
]
%in%
I
[,
id
])
==
nrow
(
R
)){
if
(
verbose
)
cat
(
"\n"
,
"cluster"
,
cpt
,
"done"
,
"\n"
,
"\n"
)
break
}
}
C
$
clus
<-
cpt
CS
<-
rbind
(
CS
,
C
)
if
(
verbose
)
cat
(
"\n"
,
"Cluster"
,
cpt
,
"done"
,
"\n"
,
"\n"
)
cpt
<-
cpt
+1
}
##### Phase 3 : Residual traces resolution
if
(
verbose
)
cat
(
"###############################################################################################################"
,
"\n"
)
if
(
verbose
)
cat
(
"\n"
,
"___________________________________________________________________________"
,
"\n"
,
"\n"
,
" "
,
"Residual trace resolution"
,
" "
,
"\n"
,
"___________________________________________________________________________"
,
"\n"
)
if
(
N
){
if
(
verbose
)
cat
(
"\n"
,
nrow
(
R
),
"ignored traces form a separate cluster of index 0"
,
"\n"
)
if
(
nrow
(
R
)
!=
0
){
R
$
clus
<
-0
CS
<-
rbind
(
CS
,
R
)
}
}
else
{
if
(
nrow
(
R
)
!=
0
){
if
(
verbose
)
cat
(
"\n"
,
nrow
(
R
),
"Ignored traces are affected to best fitting cluster..."
,
"\n"
,
"\n"
)
mods
<-
lapply
(
sort
(
unique
(
CS
$
clus
)),
function
(
x
)
pm4py
$
algo
$
discovery
$
heuristics
$
factory
$
apply
(
l
[
as.data.frame
(
l
)[,
id
]
%in%
CS
[
CS
$
clus
==
x
,
id
],]))
ignored
<-
R
[,
id
]
fits
<-
sapply
(
1
:
length
(
mods
),
function
(
y
)
pbsapply
(
ignored
,
evaluation_fitness
(
l
[
as.data.frame
(
l
)[,
id
]
==
x
,],
mods
[[
y
]][[
1
]],
mods
[[
y
]][[
2
]],
mods
[[
y
]][[
3
]])
$
average_trace_fitness
))
%>%
data.frame
%>%
setNames
(
1
:
length
(
mods
))
R
$
clus
<-
apply
(
fits
,
1
,
which.max
)
CS
<-
rbind
(
CS
,
R
)
}
else
{
cat
(
"\n"
,
"No traces left ignored."
)
}
}
if
(
verbose
)
cat
(
"\n"
,
"Expanding clustering to individual cases..."
)
dat_traces
<-
case_list
(
l
)
dat_traces
$
clus
<-
CS
$
clus
[
match
(
dat_traces
$
trace
,
CS
$
trace
)]
colnames
(
dat_traces
)[
1
]
<-
id
#
vars
<-
c
(
case_id
(
l
),
activity_id
(
l
),
timestamp
(
l
))
#assigner le clustering a l'eventlog
l
$
clusters
<-
dat_traces
$
clus
[
match
(
as.data.frame
(
l
)[,
vars
[
1
]],
as.data.frame
(
dat_traces
)[,
vars
[
1
]])]
#
final
<-
list
(
MRA
=
m
,
dist
=
d
,
clustered_log
=
l
)
class
(
final
)
<-
"act.res"
if
(
verbose
)
cat
(
"Done."
,
"\n"
)
final
}
Algorithms/ActiTraC/py_actitrac_freq.R
0 → 100644
View file @
26f4b587
py_actitrac_freq
<-
function
(
L
,
nb_clus
=
2
,
tf
=
.97
,
mcs
=
5
,
N
=
TRUE
,
verbose
=
TRUE
,
only_dist
=
FALSE
){
library
(
pm4py
)
#
#Fonction qui execute l'algorithme de clustering ActiTraC sur les traces d'un eventlog.
#Cette version utilise une version de pm4py importee de python
#Liste des parametres :
#- L : log (data frame) ou eventlog genere par bupaR
#- nb_clus : numerique / nombre de clusters a trouver
#- tf : numerique / adequation minimum des modeles generes par actitrac pour decider de l'inclusion d'une trace a un cluster
#- mcs : numerique / taille minimum des clusters
#- N : booleen / TRUE : les traces ignorees par l'algorithme forment un groupe a part. FALSE : Les traces ignorees sont affectees au cluster
# donnant l'adequation la plus elevee
#- verbose : booleen / TRUE : l'algorithme imprime un suivi de chaque etape
#
#
#Installation / chargement des packages necessaires
using
<-
function
(
...
){
#fonction qui prend un vecteur de caracteres et verifie s'ils sont installes, les installe si non, puis les charge
libs
<-
unlist
(
list
(
...
))
req
<-
unlist
(
lapply
(
libs
,
require
,
character.only
=
TRUE
))
need
<-
libs
[
req
==
FALSE
]
if
(
length
(
need
)
>
0
){
install.packages
(
need
)
lapply
(
need
,
require
,
character.only
=
TRUE
)
}
}
using
(
"magrittr"
,
"heuristicsmineR"
,
"dplyr"
,
"TraMineR"
,
"pm4py"
,
"data.table"
,
"cluster"
,
"pbapply"
,
"bupaR"
,
"processmapR"
)
if
(
!
"eventlog"
%in%
class
(
L
)){
id
<-
select.list
(
colnames
(
L
),
graphics
=
TRUE
,
title
=
"Case identifier variable:"
)
activity
<-
select.list
(
colnames
(
L
)[
-
which
(
colnames
(
L
)
==
id
)],
graphics
=
TRUE
,
title
=
"Activity variable:"
)
timestamp
<-
select.list
(
colnames
(
L
)[
-
which
(
colnames
(
L
)
%in%
c
(
id
,
activity
))],
graphics
=
TRUE
,
title
=
"Timestamp variable:"
)
if
(
verbose
)
cat
(
"\n"
,
"Conversion to eventlog..."
,
"\n"
)
l
<-
simple_eventlog
(
L
,
id
,
activity
,
timestamp
)
}
else
{
id
<-
case_id
(
L
)
activity
<-
activity_id
(
L
)
timestamp
<-
timestamp
(
L
)
l
<-
L
}
color_sup
<-
function
(){
rstudio_with_ansi_support
<-
function
()
{
if
(
Sys.getenv
(
"RSTUDIO"
,
""
)
==
""
)
return
(
FALSE
)
## This is set *before* the rstudio initialization, in 1.1 and above
if
((
cols
<-
Sys.getenv
(
"RSTUDIO_CONSOLE_COLOR"
,
""
))
!=
""
&&
!
is.na
(
as.numeric
(
cols
)))
{
return
(
TRUE
)
}
## This only works if the initialization is complete
requireNamespace
(
"rstudioapi"
,
quietly
=
TRUE
)
&&
rstudioapi
::
isAvailable
()
&&
rstudioapi
::
hasFun
(
"getConsoleHasColor"
)
}
enabled
<-
getOption
(
"crayon.enabled"
)
if
(
!
is.null
(
enabled
))
{
return
(
isTRUE
(
enabled
))
}
if
(
rstudio_with_ansi_support
()
&&
sink.number
()
==
0
)
{
return
(
TRUE
)
}