-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsomething.sql
More file actions
206 lines (195 loc) · 12.3 KB
/
something.sql
File metadata and controls
206 lines (195 loc) · 12.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
CREATE PROCEDURE CREATE_INVOICE_KEY_VALUE_EXPENSE_LINES_CHILD_OF_LINE(invoiceID INT)
BEGIN
INSERT INTO invoice_key_value (
InvoiceID,
InvoiceKeyValueID,
sort_id,
page_id,
data_hash,
key_value,
form_field_value,
geom,
type,
sub_type,
item_group_order_id,
MandatoryFieldID
) SELECT
invoiceID,
DocumentBlockId AS InvoiceKeyValueID,
ITEM_ORDER_ID AS sort_id,
PageNumber AS page_id,
DataHash,
BlockText AS key_value,
'' AS form_field_value,
ST_AsGeoJSON (BlockGeom) as geom,
'EXPENSE_ROW_ITEM' AS type,
BlockType,
ITEM_GROUP_ORDER_ID as item_group_order_id,
(SELECT MandatoryFieldID FROM mandatory_field mf
INNER JOIN base_key bk ON mf.BaseKeyID=bk.BaseKeyID
INNER JOIN Invoice I ON I.ProjectID = mf.ProjectID
WHERE bkey = BlockType AND I.InvoiceID=invoiceID LIMIT 1) t7
FROM (
WITH RECURSIVE tree (RootID, DocumentBlockId, SortID, ColumnIndex, RowIndex, RootGeom,
BlockGeom, AnalyzeDocumentResultID, AnalyzeExpenseResultID, PageNumber,
BlockText, BlockType, EntityType, IDRelation, Generation, DataHash)
AS (SELECT AnalyzeDocumentBlockID,
document_block_id,
0 as SortID,
column_index,
row_index,
block_geom,
block_geom,
adb.AnalyzeDocumentResultID,
ap.AnalyzeExpenseResultID,
ap.page_number,
block_text,
block_type,
'PARENT',
AnalyzeDocumentBlockID,
0,
UUID() AS data_hash
FROM analyze_document_block adb
LEFT JOIN analyze_document_block_tree on AnalyzeDocumentBlockID = childID
INNER JOIN attachment_page ap
on adb.AnalyzeDocumentResultID = ap.AnalyzeDocumentResultID
INNER JOIN attachment a on ap.AttachmentID = a.AttachmentID
INNER JOIN Invoice I on a.EMailID = I.EMailID
WHERE I.InvoiceID = invoiceID
AND block_type = 'LINE'
UNION ALL
SELECT T.RootId,
B.document_block_id,
(C.childID - (SELECT MIN(AnalyzeDocumentBlockID)
FROM analyze_document_block adb2
INNER JOIN analyze_document_block_tree adbt2
ON adb2.AnalyzeDocumentBlockID = adbt2.childID
WHERE C.parentID = adbt2.parentID LIMIT 1) + 1 ) as SortID,
T.ColumnIndex,
T.RowIndex,
T.RootGeom,
B.block_geom,
T.AnalyzeDocumentResultID,
T.AnalyzeExpenseResultID,
ap.page_number,
B.block_text,
B.block_type,
C.entity_type,
C.childID,
T.Generation + 1,
T.DataHash
FROM tree AS T
INNER JOIN analyze_document_block_tree AS C ON C.parentID = T.IDRelation
INNER JOIN analyze_document_block B ON C.childID = B.AnalyzeDocumentBlockID
INNER JOIN attachment_page ap
on B.AnalyzeDocumentResultID = ap.AnalyzeDocumentResultID
WHERE T.Generation >= 0)
SELECT RootID,
DocumentBlockId,
BlockText,
PageNumber,
DataHash,
BlockGeom,
(SELECT ITEM_ORDER_ID FROM
(
SELECT
aelie.AnalyzeExpenseLineItemExpenseID as aelieid,
ITEM_ORDER_ID,
SUBSTR(UNHEX(MD5(ST_AsGeoJSON(adb.block_geom))),0,25) AS data_hash,
adb.document_block_id dbid,
adb.block_text btext,
adb.block_geom AS BlockGeom,
aelie.type AS BlockType,
POSITION(adb.block_text IN aelie.value_text) as strpos,
ap.page_number AS PageNumber
FROM
analyze_document_block adb
INNER JOIN attachment_page ap ON adb.AnalyzeDocumentResultID = ap.AnalyzeDocumentResultID
INNER JOIN attachment a on ap.AttachmentID = a.AttachmentID
INNER JOIN Invoice I on a.EMailID = I.EMailID
LEFT JOIN analyze_expense_line_item_expense aelie ON ap.AnalyzeExpenseResultID = aelie.AnalyzeExpenseResultID
INNER JOIN (
SELECT
adb2.document_block_id,
DENSE_RANK() OVER (x) AS ITEM_ORDER_ID
FROM
analyze_document_block adb2
INNER JOIN attachment_page ap2 ON adb2.AnalyzeDocumentResultID = ap2.AnalyzeDocumentResultID
INNER JOIN attachment a2 on ap2.AttachmentID = a2.AttachmentID
INNER JOIN Invoice I2 on a2.EMailID = I2.EMailID
LEFT JOIN analyze_expense_line_item_expense aelie2 ON ap2.AnalyzeExpenseResultID = aelie2.AnalyzeExpenseResultID
WHERE (aelie2.type = 'EXPENSE_ROW')
AND I2.InvoiceID = invoiceID
AND ST_Intersects(adb2.block_geom, (aelie2.value_geom)) = 1
AND block_type = 'WORD'
WINDOW x AS (ORDER BY aelie2.AnalyzeExpenseLineItemExpenseID)
ORDER BY aelie2.AnalyzeExpenseLineItemExpenseID
) t2 ON t2.document_block_id = adb.document_block_id
WHERE I.InvoiceID = invoiceID
AND ST_Intersects(adb.block_geom, (SELECT aelie.value_geom FROM analyze_expense_line_item_expense aelie2 WHERE aelie.AnalyzeExpenseLineItemExpenseID = aelie2.AnalyzeExpenseLineItemExpenseID)) = 1
AND block_type = 'WORD'
AND type <> 'EXPENSE_ROW'
AND POSITION(adb.block_text IN aelie.value_text) <> 0
GROUP BY adb.document_block_id, aelieid
ORDER BY ITEM_ORDER_ID, aelieid, strpos
) t77 WHERE t77.dbid = DocumentBlockId LIMIT 1) as ITEM_GROUP_ORDER_ID,
(SELECT BlockType FROM
(
SELECT
aelie.AnalyzeExpenseLineItemExpenseID as aelieid,
ITEM_ORDER_ID,
SUBSTR(UNHEX(MD5(ST_AsGeoJSON(adb.block_geom))),0,25) AS data_hash,
adb.document_block_id dbid,
adb.block_text btext,
adb.block_geom AS BlockGeom,
aelie.type AS BlockType,
POSITION(adb.block_text IN aelie.value_text) as strpos,
ap.page_number AS PageNumber
FROM
analyze_document_block adb
INNER JOIN attachment_page ap ON adb.AnalyzeDocumentResultID = ap.AnalyzeDocumentResultID
INNER JOIN attachment a on ap.AttachmentID = a.AttachmentID
INNER JOIN Invoice I on a.EMailID = I.EMailID
LEFT JOIN analyze_expense_line_item_expense aelie ON ap.AnalyzeExpenseResultID = aelie.AnalyzeExpenseResultID
INNER JOIN (
SELECT
adb2.document_block_id,
DENSE_RANK() OVER (x) AS ITEM_ORDER_ID
FROM
analyze_document_block adb2
INNER JOIN attachment_page ap2 ON adb2.AnalyzeDocumentResultID = ap2.AnalyzeDocumentResultID
INNER JOIN attachment a2 on ap2.AttachmentID = a2.AttachmentID
INNER JOIN Invoice I2 on a2.EMailID = I2.EMailID
LEFT JOIN analyze_expense_line_item_expense aelie2 ON ap2.AnalyzeExpenseResultID = aelie2.AnalyzeExpenseResultID
WHERE (aelie2.type = 'EXPENSE_ROW')
AND I2.InvoiceID = invoiceID
AND ST_Intersects(adb2.block_geom, (aelie2.value_geom)) = 1
AND block_type = 'WORD'
WINDOW x AS (ORDER BY aelie2.AnalyzeExpenseLineItemExpenseID)
ORDER BY aelie2.AnalyzeExpenseLineItemExpenseID
) t2 ON t2.document_block_id = adb.document_block_id
WHERE I.InvoiceID = invoiceID
AND ST_Intersects(adb.block_geom, (SELECT aelie.value_geom FROM analyze_expense_line_item_expense aelie2 WHERE aelie.AnalyzeExpenseLineItemExpenseID = aelie2.AnalyzeExpenseLineItemExpenseID LIMIT 1)) = 1
AND block_type = 'WORD'
AND type <> 'EXPENSE_ROW'
AND POSITION(adb.block_text IN aelie.value_text) <> 0
GROUP BY adb.document_block_id, aelieid
ORDER BY ITEM_ORDER_ID, aelieid, strpos
) t77 WHERE t77.dbid = DocumentBlockId LIMIT 1) as BlockType,
DENSE_RANK() OVER (x) AS ITEM_ORDER_ID
FROM tree
INNER JOIN (SELECT DISTINCT adb2.document_block_id as docblockid
FROM analyze_document_block adb2
INNER JOIN attachment_page ap2
ON adb2.AnalyzeDocumentResultID = ap2.AnalyzeDocumentResultID
INNER JOIN analyze_expense_line_item_expense aelie2
ON ap2.AnalyzeExpenseResultID = aelie2.AnalyzeExpenseResultID
WHERE ST_Intersects(adb2.block_geom,(aelie2.value_geom)) = 1
AND block_type = 'WORD'
AND aelie2.type != 'EXPENSE_ROW') t2 ON tree.DocumentBlockId = t2.docblockid
LEFT JOIN invoice_key_value ikv ON InvoiceKeyValueID = tree.DocumentBlockId
WHERE ikv.InvoiceKeyValueID IS NULL
GROUP BY docblockid
WINDOW x AS (PARTITION BY RootID ORDER BY SortID)
) t11;
END