-
Notifications
You must be signed in to change notification settings - Fork 2
Expand file tree
/
Copy pathDeleteConsecutiveAttributes.sql
More file actions
66 lines (56 loc) · 3.51 KB
/
DeleteConsecutiveAttributes.sql
File metadata and controls
66 lines (56 loc) · 3.51 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
--------------------------------------------------------------------------------------------------------------------------------
-- DELETE CONSECUTIVE ATTRIBUTES
--------------------------------------------------------------------------------------------------------------------------------
-- Current versions of the OnTopic Library evaluate whether or not an attribute value has changed since the previous version,
-- and doesn't create a new attribute version if it has. This wasn't true in previous versions, however. As a result, there are
-- some cases, and especially in older databases, where unnecessary duplicates occur for attribute values. This script will
-- detect concurrent duplicates and remove them from the database. This reduces the size of the database, without interfering
-- with the data integrity. If attribute values are not consecutive, the duplicates aren't deleted; e.g., if the value of
-- <c>Title</c> gets changed, then gets reverted, all three versions will be retained in the database.
--------------------------------------------------------------------------------------------------------------------------------
CREATE PROCEDURE [Utilities].[DeleteConsecutiveAttributes]
AS
SET NOCOUNT ON;
--------------------------------------------------------------------------------------------------------------------------------
-- CHECK INITIAL VALUES
--------------------------------------------------------------------------------------------------------------------------------
DECLARE @Count INT
SELECT @Count = COUNT(TopicID)
FROM Attributes
Print('Initial Count: ' + CAST(@Count AS VARCHAR) + ' Attributes in the database.');
--------------------------------------------------------------------------------------------------------------------------------
-- IDENTIFY GROUPS OF CONCURRENT DUPLICATES
--------------------------------------------------------------------------------------------------------------------------------
WITH GroupedValues AS (
SELECT TopicID,
AttributeKey,
AttributeValue,
Version,
ValueGroup = ROW_NUMBER() OVER(PARTITION BY TopicID, AttributeKey ORDER BY TopicID, AttributeKey, Version)
- ROW_NUMBER() OVER(PARTITION BY TopicID, AttributeKey, AttributeValue ORDER BY TopicID, AttributeKey, Version)
FROM Attributes
),
--------------------------------------------------------------------------------------------------------------------------------
-- RANK DUPLICATES BY DATE
--------------------------------------------------------------------------------------------------------------------------------
RankedValues AS (
SELECT TopicID,
AttributeKey,
AttributeValue,
Version,
ValueGroup,
ValueRank = ROW_NUMBER() OVER(PARTITION BY ValueGroup, TopicID, AttributeKey, AttributeValue ORDER BY TopicID, AttributeKey, Version)
FROM GroupedValues
)
--------------------------------------------------------------------------------------------------------------------------------
-- DELETE NEWER DUPLICATES
--------------------------------------------------------------------------------------------------------------------------------
DELETE
FROM RankedValues
WHERE ValueRank > 1;
--------------------------------------------------------------------------------------------------------------------------------
-- CHECK FINAL VALUES
--------------------------------------------------------------------------------------------------------------------------------
SELECT @Count = @Count - Count(TopicID)
FROM Attributes
Print('Final Count: ' + CAST(@Count AS VARCHAR) + ' Attributes were identified and deleted.')