www.williamhiggsbarrister.com.au

   
     Call Clerk on (02) 9336 5399
Tap To Call

VBA macro for extracting defined terms

‘Document assembly’ is the process by which an operator creates an entire document from a variety of component parts and then personalizes that document to meet the needs of the intended recipient.
Included within the scope of the term ‘document assembly’ are how source clauses are:

· created
· neutered, and
· assembled.

One of the first steps in automatically assembling legal documents is identifying terms in square brackets. This is the way that some automated document assembly systems work like Pathagoras. To create a legal document for automation and assembly begins with surrounding key terms such as party names and so on with square brackets eg [William Higgs] or [Higgs Limited]. You get the idea. The idea is that the square bracketed terms will often be repeated throughout a legal document, so it is efficient to define them.

The [Initial Unitholders] have paid the [Initial Sum] to the [Trustee] to establish a trust on the terms of the[Trust Deed].
The terms  Initial Unitholders, Initial Sum, Trustee and Trust Deed are all obviously variables and they would be repeated throughout a contract. A 'variable' is a place holder for personal data. You should strategically place variables within your source clauses where you want that data to appear. Consequently, those variables will also appear in the first draft of any newly assembled document.

At some stage in the assembly we will need to prompt the user to enter those terms.
So lets start by producing a Word Macro that searches for terms in [ ] and produce them in a new Word Document. Once we have the Terms we can then prompt our assembly program to complete the legal contract with those specific terms. This is called an Interview. An Interview is a series of questions asked at the beginning of the final assembly process regarding what you want included/discarded in the final legal document. It consists of questions that are presented in a menu format.
The interview is asked in a single window, with multiple questions being asked. More about the Interview in later blogs.

Here is some code you can enter into a macro in Microsoft Word. Obviously make sure your Word Document has some [ ] terms in it.

Option Explicit

' ===========
' ENTRY POINT
' ===========
Sub ExtractBracketedText_ToTable()
    Dim doc As Document: Set doc = ActiveDocument
    Dim bodyRng As Range: Set bodyRng = doc.StoryRanges(wdMainTextStory) ' main text only

    ' Regex to capture [ ... ] (non-nested), allowing across line breaks
    Dim rx As Object: Set rx = CreateObject("VBScript.RegExp")
    rx.Global = True
    rx.IgnoreCase = False
    rx.MultiLine = True
    ' Group 1 captures the inner text (without the brackets)
    rx.Pattern = "\[([\s\S]*?)\]"

    If Not rx.Test(bodyRng.Text) Then
        MsgBox "No bracketed text found in the main story.", vbInformation
        Exit Sub
    End If

    Dim matches As Object, m As Object
    Set matches = rx.Execute(bodyRng.Text)

    ' Prepare a simple record type via arrays
    Dim n As Long: n = matches.Count
    Dim arrText() As String, arrPage() As Long, arrCtx() As String, arrStart() As Long, arrLen() As Long
    ReDim arrText(1 To n)
    ReDim arrPage(1 To n)
    ReDim arrCtx(1 To n)
    ReDim arrStart(1 To n)
    ReDim arrLen(1 To n)

    Dim i As Long, startInDoc As Long, lengthInDoc As Long
    For i = 1 To n
        Set m = matches(i - 1)
        ' Map back to document coordinates
        startInDoc = bodyRng.Start + m.FirstIndex
        lengthInDoc = m.Length

        arrStart(i) = startInDoc
        arrLen(i) = lengthInDoc
        arrText(i) = CleanOneLine(m.SubMatches(0)) ' inner text only

        ' Page number (guard errors just in case)
        Dim hitRng As Range
        Set hitRng = doc.Range(startInDoc, startInDoc + lengthInDoc)
        On Error Resume Next
        arrPage(i) = hitRng.Information(wdActiveEndAdjustedPageNumber)
        If Err.Number <> 0 Then
            Err.Clear: arrPage(i) = 0
        End If
        On Error GoTo 0

        ' Context snippet around the entire [ ... ] region
        arrCtx(i) = GetContextSnippet(doc, startInDoc, lengthInDoc, 45)
    Next i

    RenderResultsTable arrText, arrPage, arrCtx
    MsgBox "Bracketed-text report created.", vbInformation
End Sub

' =================
' RENDER TO NEW DOC
' =================
Private Sub RenderResultsTable(ByRef arrText() As String, ByRef arrPage() As Long, ByRef arrCtx() As String)
    Dim outDoc As Document: Set outDoc = Documents.Add
    outDoc.Activate

    Selection.Style = wdStyleHeading1
    Selection.TypeText "Bracketed Text Report"
    Selection.TypeParagraph
    Selection.Style = wdStyleNormal
    Selection.TypeText "Generated: " & Format(Now, "yyyy-mm-dd hh:nn")
    Selection.TypeParagraph: Selection.TypeParagraph

    Dim n As Long: n = UBound(arrText) - LBound(arrText) + 1
    If n <= 0 Then
        Selection.TypeText "No results."
        Exit Sub
    End If

    Dim tbl As Table
    Set tbl = outDoc.Tables.Add(Selection.Range, n + 1, 4)
    With tbl
        .Style = "Table Grid"
        .Cell(1, 1).Range.Text = "#"
        .Cell(1, 2).Range.Text = "Extracted Text"
        .Cell(1, 3).Range.Text = "Page"
        .Cell(1, 4).Range.Text = "Context"
        .Rows(1).Range.Bold = True
    End With

    Dim i As Long, row As Long
    row = 2
    For i = LBound(arrText) To UBound(arrText)
        tbl.Cell(row, 1).Range.Text = CStr(i - LBound(arrText) + 1)
        tbl.Cell(row, 2).Range.Text = arrText(i)
        If arrPage(i) > 0 Then
            tbl.Cell(row, 3).Range.Text = CStr(arrPage(i))
        Else
            tbl.Cell(row, 3).Range.Text = "-"
        End If
        tbl.Cell(row, 4).Range.Text = arrCtx(i)
        row = row + 1
    Next i

    ' Nice column widths (percent)
    tbl.Columns(1).PreferredWidthType = wdPreferredWidthPercent: tbl.Columns(1).PreferredWidth = 6
    tbl.Columns(2).PreferredWidthType = wdPreferredWidthPercent: tbl.Columns(2).PreferredWidth = 39
    tbl.Columns(3).PreferredWidthType = wdPreferredWidthPercent: tbl.Columns(3).PreferredWidth = 8
    tbl.Columns(4).PreferredWidthType = wdPreferredWidthPercent: tbl.Columns(4).PreferredWidth = 47
End Sub

' ========================
' CONTEXT & SMALL HELPERS
' ========================
Private Function GetContextSnippet(doc As Document, ByVal startInDoc As Long, ByVal lengthInDoc As Long, ByVal wing As Long) As String
    Dim L As Long, R As Long
    L = MaxLng(0, startInDoc - wing)
    R = MinLng(doc.Content.End, startInDoc + lengthInDoc + wing)

    Dim leftR As Range, rightR As Range, hitR As Range
    Set hitR = doc.Range(startInDoc, startInDoc + lengthInDoc)
    Set leftR = doc.Range(L, startInDoc)
    Set rightR = doc.Range(startInDoc + lengthInDoc, R)

    GetContextSnippet = "…" & CleanOneLine(leftR.Text) & "[" & CleanOneLine(hitR.Text) & "]" & CleanOneLine(rightR.Text) & "…"
End Function

Private Function CleanOneLine(ByVal s As String) As String
    s = Replace$(s, vbCr, " ")
    s = Replace$(s, vbLf, " ")
    s = Replace$(s, Chr$(160), " ")
    Do While InStr(s, "  ") > 0
        s = Replace$(s, "  ", " ")
    Loop
    CleanOneLine = Trim$(s)
End Function

Private Function MinLng(a As Long, b As Long) As Long
    If a < b Then MinLng = a Else MinLng = b
End Function

Private Function MaxLng(a As Long, b As Long) As Long
    If a > b Then MaxLng = a Else MaxLng = b
End Function

Good luck