Skip to content

✨ [RUM-10415] Add support for action name allowlist masking #3648

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 16 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/core/src/domain/configuration/configuration.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ export const DefaultPrivacyLevel = {
ALLOW: 'allow',
MASK: 'mask',
MASK_USER_INPUT: 'mask-user-input',
MASK_UNLESS_ALLOWLISTED: 'mask-unless-allowlisted',
} as const
export type DefaultPrivacyLevel = (typeof DefaultPrivacyLevel)[keyof typeof DefaultPrivacyLevel]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import { createHooks } from '../hooks'
import type { RumMutationRecord } from '../../browser/domMutationObservable'
import type { ActionContexts } from './actionCollection'
import { startActionCollection } from './actionCollection'
import { ActionNameSource } from './getActionNameFromElement'

describe('actionCollection', () => {
const lifeCycle = new LifeCycle()
Expand Down Expand Up @@ -50,7 +51,7 @@ describe('actionCollection', () => {
duration: 100 as Duration,
id: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
name: 'foo',
nameSource: 'text_content',
nameSource: ActionNameSource.TEXT_CONTENT,
startClocks: { relative: 1234 as RelativeTime, timeStamp: 123456789 as TimeStamp },
type: ActionType.CLICK,
event,
Expand Down Expand Up @@ -142,7 +143,7 @@ describe('actionCollection', () => {
frustrationTypes: [],
id: 'aaaaaaaa-bbbb-cccc-dddd-eeeeeeeeeeee',
name: 'foo',
nameSource: 'text_content',
nameSource: ActionNameSource.TEXT_CONTENT,
startClocks: { relative: 0 as RelativeTime, timeStamp: 0 as TimeStamp },
type: ActionType.CLICK,
})
Expand Down
20 changes: 16 additions & 4 deletions packages/rum-core/src/domain/action/actionCollection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import type { DefaultRumEventAttributes, Hooks } from '../hooks'
import type { RumMutationRecord } from '../../browser/domMutationObservable'
import type { ActionContexts, ClickAction } from './trackClickActions'
import { trackClickActions } from './trackClickActions'
import { createActionAllowList, actionNameDictionary } from './privacy/allowedDictionary'

export type { ActionContexts }

Expand All @@ -31,8 +32,13 @@ export function startActionCollection(
windowOpenObservable: Observable<void>,
configuration: RumConfiguration
) {
lifeCycle.subscribe(LifeCycleEventType.AUTO_ACTION_COMPLETED, (action) =>
lifeCycle.notify(LifeCycleEventType.RAW_RUM_EVENT_COLLECTED, processAction(action))
createActionAllowList()

const { unsubscribe: unsubscribeAutoActionCompleted } = lifeCycle.subscribe(
LifeCycleEventType.AUTO_ACTION_COMPLETED,
(action) => {
lifeCycle.notify(LifeCycleEventType.RAW_RUM_EVENT_COLLECTED, processAction(action))
}
)

hooks.register(HookNames.Assemble, ({ startTime, eventType }): DefaultRumEventAttributes | SKIPPED => {
Expand Down Expand Up @@ -63,7 +69,8 @@ export function startActionCollection(
lifeCycle,
domMutationObservable,
windowOpenObservable,
configuration
configuration,
actionNameDictionary
))
}

Expand All @@ -72,7 +79,11 @@ export function startActionCollection(
lifeCycle.notify(LifeCycleEventType.RAW_RUM_EVENT_COLLECTED, processAction(action))
},
actionContexts,
stop,
stop: () => {
actionNameDictionary?.clear()
unsubscribeAutoActionCompleted()
stop()
},
}
}

Expand Down Expand Up @@ -104,6 +115,7 @@ function processAction(action: AutoAction | CustomAction): RawRumEventCollectedD
},
}
: undefined

const actionEvent: RawRumActionEvent = combine(
{
action: { id: generateUUID(), target: { name: action.name }, type: action.type },
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { appendElement, mockRumConfiguration } from '../../../test'
import { NodePrivacyLevel } from '../privacy'
import { NodePrivacyLevel } from '../privacyConstants'
import { ActionNameSource, getActionNameFromElement } from './getActionNameFromElement'

const defaultConfiguration = mockRumConfiguration()
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import { safeTruncate } from '@datadog/browser-core'
import { NodePrivacyLevel, getPrivacySelector } from '../privacy'
import { NodePrivacyLevel, getPrivacySelector } from '../privacyConstants'
import type { RumConfiguration } from '../configuration'

/**
Expand All @@ -14,8 +14,9 @@ export const enum ActionNameSource {
TEXT_CONTENT = 'text_content',
STANDARD_ATTRIBUTE = 'standard_attribute',
BLANK = 'blank',
MASK_DISALLOWED = 'mask_disallowed',
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💬 suggestion: ‏I find this source name confusing. As mentioned in my other comment, I think we should ensure consistent behavior across our masking strategies. That way, we can keep using MASK_PLACEHOLDER as the source. If we need to identify which masking strategy was applied, we could include it as a separate event attribute. Wdyt?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideally, yes we would like to distinguish the two strategies. We did add mask_disallowed as a name source in rum-event-format as a separate attribute. But I'm open to change it to another name.

}
type ActionName = {
export type ActionName = {
name: string
nameSource: ActionNameSource
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,257 @@
import { ActionType } from '../../../rawRumEvent.types'
import { NodePrivacyLevel } from '../../privacyConstants'
import { ActionNameSource, ACTION_NAME_PLACEHOLDER } from '../getActionNameFromElement'
import type { ClickActionBase } from '../trackClickActions'
import {
createActionAllowList,
processRawAllowList,
maskActionName,
tokenize,
isBrowserSupported,
} from './allowedDictionary'
import type { AllowedDictionary } from './allowedDictionary'

const TEST_STRINGS = {
COMPLEX_MIXED: 'test-user-name:💥$$$, test-user-id:hello>=42@world?',
PARAGRAPH_MIXED: "This isn't a sentence, it's RUM's test: 💥, $$$ = 1 + 2 + 3, and more.",
}

const LANGUAGES_TEST_STRINGS = {
FRENCH_MIXED_SENTENCE: "C'est pas un test, c'est RUM's test: 💥, $$$ = 1 + 2 + 3, et plus.",
SPANISH_MIXED_SENTENCE: "Este no es un test, es RUM's test: 💥, $$$ = 1 + 2 + 3, y más.",
GERMAN_MIXED_SENTENCE: "Das ist kein Test, das ist RUM's Test: 💥, $$$ = 1 + 2 + 3, und mehr.",
ITALIAN_MIXED_SENTENCE: "Questo non è un test, questo è RUM's test: 💥, $$$ = 1 + 2 + 3, e altro.",
PORTUGUESE_MIXED_SENTENCE: "Este não é um teste, este é RUM's test: 💥, $$$ = 1 + 2 + 3, e mais.",
}
if (isBrowserSupported()) {
describe('Test tokenize', () => {
it('should handle emojis when Browser supports unicode regex', () => {
const paragraphMixedTokens = tokenize(TEST_STRINGS.PARAGRAPH_MIXED)
expect(paragraphMixedTokens).toContain('💥')
expect(paragraphMixedTokens).not.toContain('$$$')
expect(paragraphMixedTokens).not.toContain('1')
expect(paragraphMixedTokens).not.toContain('2')
expect(paragraphMixedTokens).not.toContain('3')
})

it('should return empty array for whitespace-only strings', () => {
expect(tokenize(' ')).toEqual([])
expect(tokenize(' ')).toEqual([])
expect(tokenize('\t')).toEqual([])
expect(tokenize('\n')).toEqual([])
expect(tokenize('\r')).toEqual([])
expect(tokenize(' \t\n\r ')).toEqual([])
})

/**
* This test is to ensure that the match regex is working as expected in all browsers.
* With unicode regex, we can support symbols and emojis OOTB.
* But in older versions of browsers, we need to use a minimal fallback regex which does
* not support many symbols, to avoid bloating the bundle size.
*
* Only European languages (Except Russian) are tested here.
* We can't test Russian because it's not supported by the fallback regex.
* Asian languages are not supported by our current tokenizer strategy.
*/
it('Tokenized results matches words and symbols in TEST_STRINGS', () => {
const expectedParagraphMixed = [
'This',
"isn't",
'a',
'sentence',
"it's",
"RUM's",
'test',
'💥',
'=',
'+',
'+',
'and',
'more',
]
expect(tokenize(TEST_STRINGS.PARAGRAPH_MIXED).sort()).toEqual(expectedParagraphMixed.sort())

const expectedFrench = ["C'est", 'pas', 'un', 'test', "c'est", "RUM's", 'test', '💥', '=', '+', '+', 'et', 'plus']
expect(tokenize(LANGUAGES_TEST_STRINGS.FRENCH_MIXED_SENTENCE).sort()).toEqual(expectedFrench.sort())

const expectedSpanish = ['Este', 'no', 'es', 'un', 'test', 'es', "RUM's", 'test', '💥', '=', '+', '+', 'y', 'más']
expect(tokenize(LANGUAGES_TEST_STRINGS.SPANISH_MIXED_SENTENCE).sort()).toEqual(expectedSpanish.sort())

const expectedGerman = [
'Das',
'ist',
'kein',
'Test',
'das',
'ist',
"RUM's",
'Test',
'💥',
'=',
'+',
'+',
'und',
'mehr',
]
expect(tokenize(LANGUAGES_TEST_STRINGS.GERMAN_MIXED_SENTENCE).sort()).toEqual(expectedGerman.sort())

const expectedPortuguese = [
'Este',
'não',
'é',
'um',
'teste',
'este',
'é',
"RUM's",
'test',
'💥',
'=',
'+',
'+',
'e',
'mais',
]
expect(tokenize(LANGUAGES_TEST_STRINGS.PORTUGUESE_MIXED_SENTENCE).sort()).toEqual(expectedPortuguese.sort())
})
})
}

describe('createActionAllowList', () => {
beforeAll(() => {
window.$DD_ALLOW = new Set([TEST_STRINGS.COMPLEX_MIXED, TEST_STRINGS.PARAGRAPH_MIXED])
})

afterAll(() => {
window.$DD_ALLOW = undefined
})

it('should create an action name dictionary and clear it', () => {
const actionNameDictionary = createActionAllowList()
if (!isBrowserSupported()) {
expect(actionNameDictionary.allowlist.size).toBe(0)
expect(actionNameDictionary.rawStringIterator).toBeDefined()
return
}
expect(actionNameDictionary.allowlist.size).toBeGreaterThan(0)
expect(actionNameDictionary.rawStringIterator).toBeDefined()
actionNameDictionary.clear()
expect(actionNameDictionary.allowlist.size).toBe(0)
expect(actionNameDictionary.rawStringIterator).toBeUndefined()
})

it('should handle when $DD_ALLOW is undefined and redefined later', () => {
window.$DD_ALLOW = undefined
const actionNameDictionary = createActionAllowList()
expect(actionNameDictionary.rawStringIterator).toBeUndefined()

window.$DD_ALLOW = new Set([TEST_STRINGS.COMPLEX_MIXED, TEST_STRINGS.PARAGRAPH_MIXED])
// Trigger the observer manually
window.$DD_ALLOW_OBSERVERS?.forEach((observer) => observer())
expect(actionNameDictionary.rawStringIterator).toBeDefined()
actionNameDictionary.clear()
})
})

if (isBrowserSupported()) {
describe('actionNameDictionary processing', () => {
let actionNameDictionary: AllowedDictionary
let clearActionNameDictionary: () => void

beforeEach(() => {
window.$DD_ALLOW = new Set([TEST_STRINGS.COMPLEX_MIXED, TEST_STRINGS.PARAGRAPH_MIXED])
actionNameDictionary = createActionAllowList()
clearActionNameDictionary = actionNameDictionary.clear
})

afterEach(() => {
window.$DD_ALLOW = undefined
clearActionNameDictionary()
})

it('initializes allowlist with normalized words from $DD_ALLOW', () => {
expect(actionNameDictionary.allowlist.has('test')).toBeTrue()
expect(actionNameDictionary.allowlist.has('hello')).toBeTrue()
expect(actionNameDictionary.allowlist.has('world')).toBeTrue()
})

it('updates dictionary when $DD_ALLOW changes', () => {
const initialAllowlistSize = actionNameDictionary.allowlist.size

// Simulate a change in $DD_ALLOW
window.$DD_ALLOW?.add('new-Word')
window.$DD_ALLOW?.add('another-Word')
// Trigger the observer manually
processRawAllowList(window.$DD_ALLOW, actionNameDictionary)

// Verify dictionary is updated with new words
expect(actionNameDictionary.allowlist.has('word')).toBeTrue()
expect(actionNameDictionary.allowlist.has('new')).toBeTrue()
expect(actionNameDictionary.allowlist.has('another')).toBeTrue()
// Old words should still be present
expect(actionNameDictionary.allowlist.size).toBe(initialAllowlistSize + 3)
})
})
}

describe('createActionNameDictionary and maskActionName', () => {
let actionNameDictionary: AllowedDictionary
let clearActionNameDictionary: () => void
const clickActionBase: ClickActionBase = {
type: ActionType.CLICK,
name: 'test-💥-xxxxxx-xxx',
nameSource: ActionNameSource.MASK_DISALLOWED,
target: {
selector: 'button',
width: 100,
height: 100,
},
position: { x: 0, y: 0 },
}

beforeEach(() => {
window.$DD_ALLOW = new Set([TEST_STRINGS.PARAGRAPH_MIXED])
actionNameDictionary = createActionAllowList()
clearActionNameDictionary = actionNameDictionary.clear
})

afterEach(() => {
window.$DD_ALLOW = undefined
clearActionNameDictionary()
})

it('should not run if $DD_ALLOW is not defined', () => {
window.$DD_ALLOW = undefined as any
clickActionBase.name = 'mask-feature-off'
const testString = maskActionName(clickActionBase, NodePrivacyLevel.ALLOW, actionNameDictionary.allowlist)
expect(testString.name).toBe('mask-feature-off')
expect(testString.nameSource).toBe(ActionNameSource.MASK_DISALLOWED)
})

it('masks words not in allowlist (with dictionary from $DD_ALLOW)', () => {
clickActionBase.name = "test this: if 💥 isn't pii"
let expected = "test this: xxx 💥 isn't xxx"
if (!isBrowserSupported()) {
expected = ACTION_NAME_PLACEHOLDER
}
const testString1 = maskActionName(clickActionBase, NodePrivacyLevel.MASK, actionNameDictionary.allowlist)
expect(testString1.name).toBe(expected)
expect(testString1.nameSource).toBe(ActionNameSource.MASK_DISALLOWED)

clickActionBase.name = 'test-💥+123*hello wild'
expected = 'test-💥+xxx xxx'
if (!isBrowserSupported()) {
expected = ACTION_NAME_PLACEHOLDER
}
const testString2 = maskActionName(clickActionBase, NodePrivacyLevel.MASK, actionNameDictionary.allowlist)
expect(testString2.name).toBe(expected)
expect(testString2.nameSource).toBe(ActionNameSource.MASK_DISALLOWED)
})

it('handles empty string', () => {
clickActionBase.name = ''
const result = maskActionName(clickActionBase, NodePrivacyLevel.ALLOW, actionNameDictionary.allowlist)
expect(result.name).toBe('')
expect(result.nameSource).toBe(ActionNameSource.MASK_DISALLOWED)
})
})
Loading